diff --git a/DESCRIPTION b/DESCRIPTION index 81c2b68..0fba467 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: yyjsonr Type: Package Title: Fast JSON Parser and Generator -Version: 0.1.18.9000 +Version: 0.1.18.9001 Authors@R: c( person("Mike", "Cheng", role = c("aut", "cre", 'cph'), email = "mikefc@coolbutuseless.com"), diff --git a/NEWS.md b/NEWS.md index e440a71..6e1fed7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ +# yyjsonr 0.1.18.9001 2024-02-01 + +* Read JSON from '.gz' files in `read_json_file()` # yyjsonr 0.1.18.9000 2024-01-25 diff --git a/src/R-yyjson-parse.c b/src/R-yyjson-parse.c index e92c91f..933d0c2 100644 --- a/src/R-yyjson-parse.c +++ b/src/R-yyjson-parse.c @@ -9,6 +9,7 @@ #include #include +#include "zlib.h" #include "yyjson.h" #include "R-yyjson-parse.h" @@ -1938,6 +1939,57 @@ SEXP parse_from_raw_(SEXP raw_, SEXP parse_opts_) { return parse_json_from_str(str, (size_t)length(raw_), &opt); } + +//=========================================================================== +// Parse from file given as a filename - ending in ".gz" +//=========================================================================== +SEXP parse_from_gzfile_(SEXP filename_, SEXP parse_opts_) { + + const char *filename = (const char *)CHAR( STRING_ELT(filename_, 0) ); + filename = R_ExpandFileName(filename); + parse_options opt = create_parse_options(parse_opts_); + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Read tail end of .gz file to get length. + // If uncompressed length > 4GB this method will fail as there are + // only 4-bytes reserved for the field! + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FILE *fp = fopen(filename, "rb"); + if (fp == NULL) { + error("couldn't open file: %s", filename); + } + + fseek(fp, -4, SEEK_END); + int32_t uncompressed_len; + fread(&uncompressed_len, 4, 1, fp); + fclose(fp); + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Allocate a buffer to hold the uncompressed file. + // Note: this approach will change if/when yyjson implements streaming + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + char *buf = (char *)malloc(uncompressed_len + 1); + if (buf == 0) { + error("Couldn't allocate buffer for reading json.gz file: %s", filename); + } + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Uncompress file to buffer + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + gzFile gzfp = gzopen(filename, "r"); + gzread(gzfp, (void *)buf, uncompressed_len); + gzclose(gzfp); + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Parse buffer as string + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SEXP res_ = PROTECT(parse_json_from_str(buf, (size_t)uncompressed_len, &opt)); + free(buf); + + UNPROTECT(1); + return res_; +} + //=========================================================================== // Parse from file given as a filename //=========================================================================== @@ -1945,8 +1997,13 @@ SEXP parse_from_file_(SEXP filename_, SEXP parse_opts_) { const char *filename = (const char *)CHAR( STRING_ELT(filename_, 0) ); filename = R_ExpandFileName(filename); - parse_options opt = create_parse_options(parse_opts_); + size_t len = strlen(filename); + if (strncmp(filename + len - 3, ".gz", 3) == 0) { + return parse_from_gzfile_(filename_, parse_opts_); + } + + parse_options opt = create_parse_options(parse_opts_); return parse_json_from_file(filename, &opt); } diff --git a/tests/testthat/examples/mtcars.json b/tests/testthat/examples/mtcars.json new file mode 100644 index 0000000..6548c2e --- /dev/null +++ b/tests/testthat/examples/mtcars.json @@ -0,0 +1 @@ +[{"mpg":21.0,"cyl":6.0,"disp":160.0,"hp":110.0,"drat":3.9,"wt":2.62,"qsec":16.46,"vs":0.0,"am":1.0,"gear":4.0,"carb":4.0},{"mpg":21.0,"cyl":6.0,"disp":160.0,"hp":110.0,"drat":3.9,"wt":2.875,"qsec":17.02,"vs":0.0,"am":1.0,"gear":4.0,"carb":4.0},{"mpg":22.8,"cyl":4.0,"disp":108.0,"hp":93.0,"drat":3.85,"wt":2.32,"qsec":18.61,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":21.4,"cyl":6.0,"disp":258.0,"hp":110.0,"drat":3.08,"wt":3.215,"qsec":19.44,"vs":1.0,"am":0.0,"gear":3.0,"carb":1.0},{"mpg":18.7,"cyl":8.0,"disp":360.0,"hp":175.0,"drat":3.15,"wt":3.44,"qsec":17.02,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":18.1,"cyl":6.0,"disp":225.0,"hp":105.0,"drat":2.76,"wt":3.46,"qsec":20.22,"vs":1.0,"am":0.0,"gear":3.0,"carb":1.0},{"mpg":14.3,"cyl":8.0,"disp":360.0,"hp":245.0,"drat":3.21,"wt":3.57,"qsec":15.84,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":24.4,"cyl":4.0,"disp":146.7,"hp":62.0,"drat":3.69,"wt":3.19,"qsec":20.0,"vs":1.0,"am":0.0,"gear":4.0,"carb":2.0},{"mpg":22.8,"cyl":4.0,"disp":140.8,"hp":95.0,"drat":3.92,"wt":3.15,"qsec":22.9,"vs":1.0,"am":0.0,"gear":4.0,"carb":2.0},{"mpg":19.2,"cyl":6.0,"disp":167.6,"hp":123.0,"drat":3.92,"wt":3.44,"qsec":18.3,"vs":1.0,"am":0.0,"gear":4.0,"carb":4.0},{"mpg":17.8,"cyl":6.0,"disp":167.6,"hp":123.0,"drat":3.92,"wt":3.44,"qsec":18.9,"vs":1.0,"am":0.0,"gear":4.0,"carb":4.0},{"mpg":16.4,"cyl":8.0,"disp":275.8,"hp":180.0,"drat":3.07,"wt":4.07,"qsec":17.4,"vs":0.0,"am":0.0,"gear":3.0,"carb":3.0},{"mpg":17.3,"cyl":8.0,"disp":275.8,"hp":180.0,"drat":3.07,"wt":3.73,"qsec":17.6,"vs":0.0,"am":0.0,"gear":3.0,"carb":3.0},{"mpg":15.2,"cyl":8.0,"disp":275.8,"hp":180.0,"drat":3.07,"wt":3.78,"qsec":18.0,"vs":0.0,"am":0.0,"gear":3.0,"carb":3.0},{"mpg":10.4,"cyl":8.0,"disp":472.0,"hp":205.0,"drat":2.93,"wt":5.25,"qsec":17.98,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":10.4,"cyl":8.0,"disp":460.0,"hp":215.0,"drat":3.0,"wt":5.424,"qsec":17.82,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":14.7,"cyl":8.0,"disp":440.0,"hp":230.0,"drat":3.23,"wt":5.345,"qsec":17.42,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":32.4,"cyl":4.0,"disp":78.7,"hp":66.0,"drat":4.08,"wt":2.2,"qsec":19.47,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":30.4,"cyl":4.0,"disp":75.7,"hp":52.0,"drat":4.93,"wt":1.615,"qsec":18.52,"vs":1.0,"am":1.0,"gear":4.0,"carb":2.0},{"mpg":33.9,"cyl":4.0,"disp":71.1,"hp":65.0,"drat":4.22,"wt":1.835,"qsec":19.9,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":21.5,"cyl":4.0,"disp":120.1,"hp":97.0,"drat":3.7,"wt":2.465,"qsec":20.01,"vs":1.0,"am":0.0,"gear":3.0,"carb":1.0},{"mpg":15.5,"cyl":8.0,"disp":318.0,"hp":150.0,"drat":2.76,"wt":3.52,"qsec":16.87,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":15.2,"cyl":8.0,"disp":304.0,"hp":150.0,"drat":3.15,"wt":3.435,"qsec":17.3,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":13.3,"cyl":8.0,"disp":350.0,"hp":245.0,"drat":3.73,"wt":3.84,"qsec":15.41,"vs":0.0,"am":0.0,"gear":3.0,"carb":4.0},{"mpg":19.2,"cyl":8.0,"disp":400.0,"hp":175.0,"drat":3.08,"wt":3.845,"qsec":17.05,"vs":0.0,"am":0.0,"gear":3.0,"carb":2.0},{"mpg":27.3,"cyl":4.0,"disp":79.0,"hp":66.0,"drat":4.08,"wt":1.935,"qsec":18.9,"vs":1.0,"am":1.0,"gear":4.0,"carb":1.0},{"mpg":26.0,"cyl":4.0,"disp":120.3,"hp":91.0,"drat":4.43,"wt":2.14,"qsec":16.7,"vs":0.0,"am":1.0,"gear":5.0,"carb":2.0},{"mpg":30.4,"cyl":4.0,"disp":95.1,"hp":113.0,"drat":3.77,"wt":1.513,"qsec":16.9,"vs":1.0,"am":1.0,"gear":5.0,"carb":2.0},{"mpg":15.8,"cyl":8.0,"disp":351.0,"hp":264.0,"drat":4.22,"wt":3.17,"qsec":14.5,"vs":0.0,"am":1.0,"gear":5.0,"carb":4.0},{"mpg":19.7,"cyl":6.0,"disp":145.0,"hp":175.0,"drat":3.62,"wt":2.77,"qsec":15.5,"vs":0.0,"am":1.0,"gear":5.0,"carb":6.0},{"mpg":15.0,"cyl":8.0,"disp":301.0,"hp":335.0,"drat":3.54,"wt":3.57,"qsec":14.6,"vs":0.0,"am":1.0,"gear":5.0,"carb":8.0},{"mpg":21.4,"cyl":4.0,"disp":121.0,"hp":109.0,"drat":4.11,"wt":2.78,"qsec":18.6,"vs":1.0,"am":1.0,"gear":4.0,"carb":2.0}] \ No newline at end of file diff --git a/tests/testthat/examples/mtcars.json.gz b/tests/testthat/examples/mtcars.json.gz new file mode 100644 index 0000000..5c6b536 Binary files /dev/null and b/tests/testthat/examples/mtcars.json.gz differ diff --git a/tests/testthat/test-read-json-file-gz.R b/tests/testthat/test-read-json-file-gz.R new file mode 100644 index 0000000..e75574e --- /dev/null +++ b/tests/testthat/test-read-json-file-gz.R @@ -0,0 +1,8 @@ + +test_that("reading from gz compressed files works", { + + expect_identical( + read_json_file(testthat::test_path("examples/mtcars.json")), + read_json_file(testthat::test_path("examples/mtcars.json.gz")) + ) +})