diff --git a/DESCRIPTION b/DESCRIPTION index e7ddbd6..81c2b68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: yyjsonr Type: Package Title: Fast JSON Parser and Generator -Version: 0.1.18 +Version: 0.1.18.9000 Authors@R: c( person("Mike", "Cheng", role = c("aut", "cre", 'cph'), email = "mikefc@coolbutuseless.com"), diff --git a/NEWS.md b/NEWS.md index 3e3774a..e440a71 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ +# yyjsonr 0.1.18.9000 2024-01-25 + +* New `fast_numerics` flag when writing. + * Default `FALSE` + * If `TRUE` the user is guaranteeing that there are no NA, NaN or Inf values + in the numeric and integer vectors, and thus a faster method for writing + these vectors to JSON can be used. +* Changed writing of `raw` vectors to always use the `fast_numerics` method, + as raw R vectors (by definition) will not have NA, NaN or Inf values. + # yyjsonr 0.1.18 2024-01-22 * Fixes for CRAN diff --git a/R/json-opts.R b/R/json-opts.R index ebc4f05..120a1d7 100644 --- a/R/json-opts.R +++ b/R/json-opts.R @@ -256,6 +256,15 @@ opts_read_json <- function( #' @param num_specials Should special numeric values (i.e. NA, NaN, Inf) be #' converted to a JSON \code{null} value or converted to a string #' representation e.g. "NA"/"NaN" etc. Default: 'null' +#' @param fast_numerics Does the user guarantee that there are no NA, NaN or Inf +#' values in the numeric vectors? Default: FALSE. If \code{TRUE} then +#' numeric and integer vectors will be written to JSON using a faster method. +#' Note: if there are NA, NaN or Inf values, an error will be thrown. +#' Expert users are invited to also consider the +#' \code{YYJSON_WRITE_ALLOW_INF_AND_NAN} and +#' \code{YYJSON_WRITE_INF_AND_NAN_AS_NULL} options for \code{yyjson_write_flags} +#' and should consult the \code{yyjson} API documentation for +#' further details. #' @param yyjson_write_flag integer vector corresponding to internal \code{yyjson} #' options. See \code{yyjson_write_flag} in this package, and read #' the yyjson API documentation for more information. This is considered @@ -277,6 +286,7 @@ opts_write_json <- function( name_repair = c('none', 'minimal'), num_specials = c('null', 'string'), str_specials = c('null', 'string'), + fast_numerics = FALSE, yyjson_write_flag = 0L) { structure( @@ -289,6 +299,7 @@ opts_write_json <- function( name_repair = match.arg(name_repair), str_specials = match.arg(str_specials), num_specials = match.arg(num_specials), + fast_numerics = isTRUE(fast_numerics), yyjson_write_flag = as.integer(yyjson_write_flag) ), class = "opts_write_json" diff --git a/man/opts_write_json.Rd b/man/opts_write_json.Rd index 2533b90..ec15ed5 100644 --- a/man/opts_write_json.Rd +++ b/man/opts_write_json.Rd @@ -13,6 +13,7 @@ opts_write_json( name_repair = c("none", "minimal"), num_specials = c("null", "string"), str_specials = c("null", "string"), + fast_numerics = FALSE, yyjson_write_flag = 0L ) } @@ -48,6 +49,16 @@ representation e.g. "NA"/"NaN" etc. Default: 'null'} be converted to a JSON \code{null} value, or converted to a string "NA"? Default: 'null'} +\item{fast_numerics}{Does the user guarantee that there are no NA, NaN or Inf +values in the numeric vectors? Default: FALSE. If \code{TRUE} then +numeric and integer vectors will be written to JSON using a faster method. +Note: if there are NA, NaN or Inf values, an error will be thrown. +Expert users are invited to also consider the +\code{YYJSON_WRITE_ALLOW_INF_AND_NAN} and +\code{YYJSON_WRITE_INF_AND_NAN_AS_NULL} options for \code{yyjson_write_flags} +and should consult the \code{yyjson} API documentation for +further details.} + \item{yyjson_write_flag}{integer vector corresponding to internal \code{yyjson} options. See \code{yyjson_write_flag} in this package, and read the yyjson API documentation for more information. This is considered diff --git a/src/R-yyjson-serialize.c b/src/R-yyjson-serialize.c index c4c6acd..fbdc831 100644 --- a/src/R-yyjson-serialize.c +++ b/src/R-yyjson-serialize.c @@ -36,6 +36,7 @@ serialize_options parse_serialize_options(SEXP serialize_opts_) { .name_repair = NAME_REPAIR_NONE, .num_specials = NUM_SPECIALS_AS_NULL, .str_specials = STR_SPECIALS_AS_NULL, + .fast_numerics = FALSE, .yyjson_write_flag = 0, }; @@ -85,6 +86,8 @@ serialize_options parse_serialize_options(SEXP serialize_opts_) { } else if (strcmp(opt_name, "num_specials") == 0) { const char *val = CHAR(STRING_ELT(val_, 0)); opt.num_specials = strcmp(val, "string") == 0 ? NUM_SPECIALS_AS_STRING : NUM_SPECIALS_AS_NULL; + } else if (strcmp(opt_name, "fast_numerics") == 0) { + opt.fast_numerics = asLogical(val_); } else { warning("Unknown option ignored: '%s'\n", opt_name); } @@ -390,14 +393,9 @@ yyjson_mut_val *vector_factor_to_json_array(SEXP vec_, yyjson_mut_doc *doc, seri //=========================================================================== yyjson_mut_val *vector_rawsxp_to_json_array(SEXP vec_, yyjson_mut_doc *doc, serialize_options *opt) { - yyjson_mut_val *arr = yyjson_mut_arr(doc); - - unsigned char *ptr = RAW(vec_); - for (int i = 0; i < length(vec_); i++) { - yyjson_mut_arr_append(arr, yyjson_mut_uint(doc, *ptr++)); - } - return arr; + // Raw vectors can't have NA, so can use the fast method + return yyjson_mut_arr_with_uint8(doc, RAW(vec_), (size_t)length(vec_)); } @@ -455,16 +453,19 @@ yyjson_mut_val *vector_intsxp_to_json_array(SEXP vec_, yyjson_mut_doc *doc, seri return vector_date_to_json_array(vec_, doc, opt); } else if (inherits(vec_, "POSIXct")) { return vector_posixct_to_json_array(vec_, doc, opt); - } - - yyjson_mut_val *arr = yyjson_mut_arr(doc); - - int32_t *ptr = INTEGER(vec_); - for (int i = 0; i < length(vec_); i++) { - yyjson_mut_arr_append(arr, scalar_integer_to_json_val(*ptr++, doc, opt)); + } else if (opt->fast_numerics) { + return yyjson_mut_arr_with_sint32(doc, INTEGER(vec_), (size_t)length(vec_)); + } else { + + yyjson_mut_val *arr = yyjson_mut_arr(doc); + + int32_t *ptr = INTEGER(vec_); + for (int i = 0; i < length(vec_); i++) { + yyjson_mut_arr_append(arr, scalar_integer_to_json_val(*ptr++, doc, opt)); + } + + return arr; } - - return arr; } @@ -480,17 +481,19 @@ yyjson_mut_val *vector_realsxp_to_json_array(SEXP vec_, yyjson_mut_doc *doc, ser return vector_posixct_to_json_array(vec_, doc, opt); } else if (inherits(vec_, "integer64")) { return vector_integer64_to_json_array(vec_, doc, opt); + } else if (opt->fast_numerics) { + return yyjson_mut_arr_with_double(doc, REAL(vec_), (size_t)length(vec_)); + } else { + + yyjson_mut_val *arr = yyjson_mut_arr(doc); + + double *ptr = REAL(vec_); + for (int i = 0; i < length(vec_); i++) { + yyjson_mut_arr_append(arr, scalar_double_to_json_val(*ptr++, doc, opt)); + } + + return arr; } - - - yyjson_mut_val *arr = yyjson_mut_arr(doc); - - double *ptr = REAL(vec_); - for (int i = 0; i < length(vec_); i++) { - yyjson_mut_arr_append(arr, scalar_double_to_json_val(*ptr++, doc, opt)); - } - - return arr; } diff --git a/src/R-yyjson-serialize.h b/src/R-yyjson-serialize.h index 262d6e7..7c6beef 100644 --- a/src/R-yyjson-serialize.h +++ b/src/R-yyjson-serialize.h @@ -75,6 +75,7 @@ typedef struct { unsigned int str_specials; unsigned int num_specials; unsigned int yyjson_write_flag; + bool fast_numerics; } serialize_options; //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/testthat/test-fast-numerics.R b/tests/testthat/test-fast-numerics.R new file mode 100644 index 0000000..328903e --- /dev/null +++ b/tests/testthat/test-fast-numerics.R @@ -0,0 +1,12 @@ + +test_that("fast-numerics works", { + expect_equal( + write_json_str(mtcars, dataframe = 'columns', fast_numerics = FALSE), + write_json_str(mtcars, dataframe = 'columns', fast_numerics = TRUE) + ) + + expect_equal( + write_json_str(iris, dataframe = 'columns', fast_numerics = FALSE), + write_json_str(iris, dataframe = 'columns', fast_numerics = TRUE) + ) +})