Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module github.com/kshedden/datareader

go 1.22.3

require (
github.com/pkg/errors v0.9.1
golang.org/x/text v0.16.0
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
90 changes: 75 additions & 15 deletions stata_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ const (
)

var (
supportedDtaVersions = []int{114, 115, 117, 118}
rowCountLength = map[int]int{114: 4, 115: 4, 117: 4, 118: 8}
nvarLength = map[int]int{114: 2, 115: 2, 117: 2, 118: 2}
supportedDtaVersions = []int{108, 113, 114, 115, 117, 118}
rowCountLength = map[int]int{108: 4, 113: 4, 114: 4, 115: 4, 117: 4, 118: 8}
nvarLength = map[int]int{108: 2, 113: 2, 114: 2, 115: 2, 117: 2, 118: 2}
datasetLabelLength = map[int]int{117: 1, 118: 2}
valueLabelLength = map[int]int{117: 33, 118: 129}
voLength = map[int]int{117: 8, 118: 12}
Expand Down Expand Up @@ -223,9 +223,17 @@ func (rdr *StataReader) init() error {
}

if rdr.FormatVersion < 117 {
if err := rdr.readExpansionFields(); err != nil {
logerr(err)
return err
if rdr.FormatVersion <= 108 {
// dta 108 uses a 16 bit int for length.
if err := rdr.readExpansionFieldsInt16(); err != nil {
logerr(err)
return err
}
} else {
if err := rdr.readExpansionFields(); err != nil {
logerr(err)
return err
}
}
}

Expand Down Expand Up @@ -273,6 +281,34 @@ func (rdr *StataReader) readExpansionFields() error {
return nil
}

func (rdr *StataReader) readExpansionFieldsInt16() error {
var b byte
var i int16

for {
err := binary.Read(rdr.reader, rdr.ByteOrder, &b)
if err != nil {
logerr(err)
return err
}
err = binary.Read(rdr.reader, rdr.ByteOrder, &i)
if err != nil {
logerr(err)
return err
}

if b == 0 && i == 0 {
break
}
if _, err := rdr.reader.Seek(int64(i), 1); err != nil {
logerr(err)
return err
}
}

return nil
}

// readInt reads a 1, 2, 4 or 8 byte signed integer.
func (rdr *StataReader) readInt(width int) (int, error) {

Expand Down Expand Up @@ -612,6 +648,10 @@ func (rdr *StataReader) readVartypes() error {
err = rdr.readVartypes8()
case rdr.FormatVersion == 114:
err = rdr.readVartypes8()
case rdr.FormatVersion == 113:
err = rdr.readVartypes8()
case rdr.FormatVersion == 108:
err = rdr.readVartypes8()
default:
err = fmt.Errorf("unknown format version %v", rdr.FormatVersion)
}
Expand Down Expand Up @@ -658,19 +698,21 @@ func (rdr *StataReader) translateVartypes() error {

for k := 0; k < int(rdr.Nvar); k++ {
switch {
case rdr.varTypes[k] <= 244:
// strf
continue
case rdr.varTypes[k] == 251:
case rdr.varTypes[k] == 251 || rdr.varTypes[k] == 98:
rdr.varTypes[k] = StataInt8Type
case rdr.varTypes[k] == 252:
case rdr.varTypes[k] == 252 || rdr.varTypes[k] == 105:
rdr.varTypes[k] = StataInt16Type
case rdr.varTypes[k] == 253:
case rdr.varTypes[k] == 253 || rdr.varTypes[k] == 108:
rdr.varTypes[k] = StataInt32Type
case rdr.varTypes[k] == 254:
case rdr.varTypes[k] == 254 || rdr.varTypes[k] == 102:
rdr.varTypes[k] = StataFloat32Type
case rdr.varTypes[k] == 255:
case rdr.varTypes[k] == 255 || rdr.varTypes[k] == 100:
rdr.varTypes[k] = StataFloat64Type
case rdr.varTypes[k] <= 244:
// strf
if rdr.FormatVersion <= 108 {
rdr.varTypes[k] -= 127
}
default:
return fmt.Errorf("unknown variable type")
}
Expand All @@ -692,6 +734,10 @@ func (rdr *StataReader) readFormats() error {
err = rdr.doReadFormats(49, false)
case rdr.FormatVersion == 114:
err = rdr.doReadFormats(49, false)
case rdr.FormatVersion == 113:
err = rdr.doReadFormats(12, false)
case rdr.FormatVersion == 108:
err = rdr.doReadFormats(12, false)
default:
err = fmt.Errorf("unknown format version %v", rdr.FormatVersion)
}
Expand Down Expand Up @@ -758,6 +804,10 @@ func (rdr *StataReader) readVarnames() error {
err = rdr.doReadVarnames(33, false)
case 114:
err = rdr.doReadVarnames(33, false)
case 113:
err = rdr.doReadVarnames(33, false)
case 108:
err = rdr.doReadVarnames(9, false)
default:
err = fmt.Errorf("unknown format version %d", rdr.FormatVersion)
}
Expand Down Expand Up @@ -806,6 +856,12 @@ func (rdr *StataReader) readValueLabelNames() error {
err = rdr.doReadValueLabelNames(33, false)
case 115:
err = rdr.doReadValueLabelNames(33, false)
case 114:
err = rdr.doReadValueLabelNames(33, false)
case 113:
err = rdr.doReadValueLabelNames(33, false)
case 108:
err = rdr.doReadValueLabelNames(9, false)
default:
return fmt.Errorf("unknown format version %v", rdr.FormatVersion)
}
Expand Down Expand Up @@ -849,6 +905,10 @@ func (rdr *StataReader) readVariableLabels() error {
err = rdr.doReadVariableLabels(81, false)
case 114:
err = rdr.doReadVariableLabels(81, false)
case 113:
err = rdr.doReadVariableLabels(81, false)
case 108:
err = rdr.doReadVariableLabels(81, false)
default:
err = fmt.Errorf("Unknown format version %d", rdr.FormatVersion)
}
Expand Down Expand Up @@ -1161,6 +1221,7 @@ func (rdr *StataReader) readRow(i int, buf, buf8 []byte, data []interface{}, mis
case t == StataInt8Type:
var x int8
if err := binary.Read(rdr.reader, rdr.ByteOrder, &x); err != nil {
fmt.Println(t)
panic(err)
}
if x < -127 || x > 100 {
Expand Down Expand Up @@ -1208,7 +1269,6 @@ func (rdr *StataReader) Read(rows int) ([]*Series, error) {
if rdr.rowsRead > int(rdr.rowCount) {
break
}

rdr.readRow(i, buf, buf8, data, missing)
}

Expand Down