Skip to content

Commit

Permalink
add UTF-8 validator
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastiaanKlippert committed May 29, 2019
1 parent 95cb971 commit b4b9c72
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
14 changes: 14 additions & 0 deletions decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ package dbf

import (
"bytes"
"errors"
"io/ioutil"
"unicode/utf8"

"golang.org/x/text/encoding/charmap"
"golang.org/x/text/transform"
)

var ErrInvalidUTF8 = errors.New("invalid UTF-8 data")

// The charset decoding is all done in this file so you could use an different decoder

// Decoder is the interface as passed to OpenFile
Expand Down Expand Up @@ -39,3 +42,14 @@ type UTF8Decoder struct{}
func (d *UTF8Decoder) Decode(in []byte) ([]byte, error) {
return in, nil
}

// UTF8Validator checks if valid UTF8 is read
type UTF8Validator struct{}

// Decode decodes a UTF8 byte slice to a UTF8 byte slice
func (d *UTF8Validator) Decode(in []byte) ([]byte, error) {
if utf8.Valid(in) {
return in, nil
}
return nil, ErrInvalidUTF8
}
27 changes: 27 additions & 0 deletions decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,30 @@ func TestWin1250Decoder_Decode(t *testing.T) {
t.Errorf("Want %s, have %s", want, string(b))
}
}

func TestUTF8UTF8Validator_Decode(t *testing.T) {
dec := new(UTF8Validator)

// Test valid UTF-8 data
in := []byte("Tésting ㇹ Д")
b, err := dec.Decode(in)
if err != nil {
t.Fatalf("error in decode: %s", err)
}
if bytes.Equal(in, b) == false {
t.Errorf("Want %s, have %s", string(in), string(b))
}

// Test invalid UTF-8
in = []byte{0xff}
_, err = dec.Decode(in)
if err == nil {
t.Fatalf("wanted an error in Decode, but have no error")
}
if err != ErrInvalidUTF8 {
t.Fatalf("wanted error %s, have %s", ErrInvalidUTF8, err)
}



}

0 comments on commit b4b9c72

Please sign in to comment.