Skip to content

Commit

Permalink
Row states
Browse files Browse the repository at this point in the history
  • Loading branch information
jaakkor2 committed Mar 3, 2024
1 parent c02d89c commit c9caff3
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 10 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.1.10-DEV"

[deps]
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b"
Expand All @@ -13,6 +14,7 @@ WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"

[compat]
CodecZlib = "0.7"
Colors = "0.12"
DataFrames = "1"
Dates = "1.6"
LibDeflate = "0.4"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# JMPReader.jl

Reader for JMP data tables.
Reader for JMP data tables. [JMP](https://en.wikipedia.org/wiki/JMP_(statistical_software)) is proprietary statistical software.

## Example
```
Expand Down
11 changes: 6 additions & 5 deletions src/JMPReader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ module JMPReader

export readjmp

using Dates: unix2datetime, DateTime, Date, Time
using DataFrames: DataFrame, select!, insertcols!
using CodecZlib: transcode, GzipDecompressor
using LibDeflate: gzip_decompress!, Decompressor, LibDeflateErrors, LibDeflateErrors.deflate_insufficient_space
using WeakRefStrings: StringVector
using Base.Threads: nthreads, @spawn, threadid
using Base.Iterators: partition
using CodecZlib: transcode, GzipDecompressor
using Colors: RGB, Colorant, FixedPointNumbers.N0f8
using DataFrames: DataFrame, select!, insertcols!
using Dates: unix2datetime, DateTime, Date, Time
using LibDeflate: gzip_decompress!, Decompressor, LibDeflateErrors, LibDeflateErrors.deflate_insufficient_space
using Mmap: mmap
using WeakRefStrings: StringVector

include("types.jl")
include("constants.jl")
Expand Down
26 changes: 22 additions & 4 deletions src/column.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,29 @@ function column_data(io, info, i::Int, deflatebuffer::Vector{UInt8})
end

# row states
if dt2 == 0x03
@warn("row state not implemented")
if dt1 == 0x09 && dt2 == 0x03
width = dt5
rs = Rowstate[]
for row in 1:info.nrows
offset = width*(row - 1)
markeridx = bitcat(a[offset + 7], a[offset + 6])
marker = markeridx 0x001f ? rowstatemarkers[markeridx + 1] : Char(markeridx)
if a[offset+4] == 0xff
r, g, b = 255 - a[offset + 3], 255 - a[offset + 2], 256 - a[offset + 1]
color = parse(Colorant, "rgb($r,$g,$b)") # TODO improve
else
color = parse(Colorant, rowstatecolors[a[offset + 1] + 1])
end
push!(rs, Rowstate(marker, color))
end
return rs
end
if dt1 == 0x03 && dt2 == 0x03
width = dt5
T = Int64
out = reinterpret(T, @view a[end - width*info.nrows+1:end])
return out
end


# character
if dt1 in [0x02, 0x09] && dt2 in [0x01, 0x02]
Expand Down Expand Up @@ -162,7 +181,6 @@ function column_data(io, info, i::Int, deflatebuffer::Vector{UInt8})
end
end


@error("Data type combination `(dt1,dt2,dt3,dt4,dt5,dt6)=$dt1,$dt2,$dt3,$dt4,$dt5,$dt6` not implemented, found in column `$(info.column.names[i])` (i=$i), returning a vector of NaN's")
return fill(NaN, info.nrows)
end
30 changes: 30 additions & 0 deletions src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,33 @@ const JMP_STARTDATE = DateTime(1904, 1, 1)

# offset for number of rows
const OFFSET_NROWS = 368

# row state
const rowstatemarkers = [
'', '+', 'X', '',
'', '', 'Y', 'Z',
'', '', '', '*',
'', '', '', '',
'', '', '', '',
'', '', '', '',
'', '', '<', '>',
'', '', '/', '\\',
]

const rowstatecolors = [
"#000000", "#555555", "#787878", "#C0C0C0", "#FFFFFF",
"#A00922", "#C91629", "#F03246", "#FF5C76", "#FF98A6",
"#904700", "#BC5B03", "#E57406", "#FF9138", "#FFB17D",
"#706F00", "#AFA502", "#DAD109", "#F0E521", "#FFF977",
"#516A00", "#729400", "#90BF04", "#A2DC06", "#C1FF3D",
"#00670C", "#11981B", "#21BC2D", "#23E72E", "#6AFF6B",

"#007254", "#019970", "#04C791", "#06E3AA", "#0FFFBC",
"#006D71", "#01989C", "#0CBCBC", "#06E2E3", "#67FFF7",
"#00638F", "#0380B4", "#05A1D2", "#08C5F7", "#75E4FF",
"#034AB0", "#0557D6", "#2867FD", "#4E9CFF", "#7E89FF",

"#6A02A7", "#8C05CF", "#AB08FC", "#C170FF", "#D29AFF",
"#930195", "#B803B9", "#DC06E1", "#FA50FF", "#FD96FF",
"#9D0170", "#C5048D", "#E906A4", "#FF49BC", "#FF8CCD",
]
5 changes: 5 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,8 @@ struct Info
ncols::Int32
column::Column
end

struct Rowstate
marker::Char
color::RGB{N0f8}
end
3 changes: 3 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,6 @@ function filter_names(names, rules)
end
return idx
end

# https://discourse.julialang.org/t/newbie-question-convert-two-8-byte-values-into-a-single-16-byte-value/7662/4?u=jaakkor2
bitcat(a::UInt8, b::UInt8) = (UInt16(a) << 8) | b
Binary file added test/rowstate.jmp
Binary file not shown.
8 changes: 8 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ using JMPReader
using Dates: Date, DateTime, Second
using Printf: @sprintf
using DataFrames: names
using Colors: RGB, FixedPointNumbers.N0f8

@testset "example1.jmp" begin
df = readjmp(joinpath(@__DIR__, "example1.jmp"))
Expand Down Expand Up @@ -113,4 +114,11 @@ end
df = readjmp("currencies.jmp")
@test all(isapprox.(df.AUD, [1.0,2.0,2.0]))
@test all(isapprox.(df.COP, [3.14,2.78,1.41]))
end

@testset "row states" begin
df = readjmp("rowstate.jmp")
@test df.rowstate3[2].marker == ''
@test df.rowstate3[3].marker == ''
@test df.rowstate2[3].color == RGB{N0f8}(0.753,0.753,0.753)
end

0 comments on commit c9caff3

Please sign in to comment.