From b4b9c724e4a756e49abd8555bb549f3a0bdd4e96 Mon Sep 17 00:00:00 2001 From: Sebastiaan Klippert Date: Wed, 29 May 2019 21:54:44 +0200 Subject: [PATCH] add UTF-8 validator --- decoder.go | 14 ++++++++++++++ decoder_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/decoder.go b/decoder.go index 79ee301..3b8ffec 100644 --- a/decoder.go +++ b/decoder.go @@ -2,6 +2,7 @@ package dbf import ( "bytes" + "errors" "io/ioutil" "unicode/utf8" @@ -9,6 +10,8 @@ import ( "golang.org/x/text/transform" ) +var ErrInvalidUTF8 = errors.New("invalid UTF-8 data") + // The charset decoding is all done in this file so you could use an different decoder // Decoder is the interface as passed to OpenFile @@ -39,3 +42,14 @@ type UTF8Decoder struct{} func (d *UTF8Decoder) Decode(in []byte) ([]byte, error) { return in, nil } + +// UTF8Validator checks if valid UTF8 is read +type UTF8Validator struct{} + +// Decode decodes a UTF8 byte slice to a UTF8 byte slice +func (d *UTF8Validator) Decode(in []byte) ([]byte, error) { + if utf8.Valid(in) { + return in, nil + } + return nil, ErrInvalidUTF8 +} diff --git a/decoder_test.go b/decoder_test.go index 2109719..e9349e8 100644 --- a/decoder_test.go +++ b/decoder_test.go @@ -29,3 +29,30 @@ func TestWin1250Decoder_Decode(t *testing.T) { t.Errorf("Want %s, have %s", want, string(b)) } } + +func TestUTF8UTF8Validator_Decode(t *testing.T) { + dec := new(UTF8Validator) + + // Test valid UTF-8 data + in := []byte("Tésting ㇹ Д") + b, err := dec.Decode(in) + if err != nil { + t.Fatalf("error in decode: %s", err) + } + if bytes.Equal(in, b) == false { + t.Errorf("Want %s, have %s", string(in), string(b)) + } + + // Test invalid UTF-8 + in = []byte{0xff} + _, err = dec.Decode(in) + if err == nil { + t.Fatalf("wanted an error in Decode, but have no error") + } + if err != ErrInvalidUTF8 { + t.Fatalf("wanted error %s, have %s", ErrInvalidUTF8, err) + } + + + +} \ No newline at end of file