From f6e29baf59d0a95c75dd109ecda5e4df346221aa Mon Sep 17 00:00:00 2001
From: mike <mikefc@coolbutuseless.com>
Date: Sat, 16 Mar 2024 11:33:54 +1000
Subject: [PATCH] Added read_ndjson_str()

---
 NAMESPACE                    |   1 +
 NEWS.md                      |   1 +
 R/ndjson.R                   |  53 +++++
 man/read_json_conn.Rd        |   3 +-
 man/read_json_file.Rd        |   3 +-
 man/read_json_raw.Rd         |   3 +-
 man/read_json_str.Rd         |   3 +-
 man/read_ndjson_file.Rd      |   3 +-
 man/read_ndjson_str.Rd       |  68 ++++++
 src/Makevars                 |   2 +-
 src/init.c                   |   6 +
 src/ndjson-parse.c           | 422 +++++++++++++++++++++++++++++++++++
 src/utils.c                  |   2 +-
 tests/testthat/test-ndjson.R |  42 +++-
 14 files changed, 601 insertions(+), 11 deletions(-)
 create mode 100644 man/read_ndjson_str.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4ec2d9c..ef2ea92 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ export(read_json_file)
 export(read_json_raw)
 export(read_json_str)
 export(read_ndjson_file)
+export(read_ndjson_str)
 export(validate_json_file)
 export(validate_json_str)
 export(write_json_file)
diff --git a/NEWS.md b/NEWS.md
index 7b01bf1..b36a90c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,7 @@
 # yyjsonr 0.1.18.9004 2024-03-15
 
 * Re-introduce NDJSON support
+* Add `read_ndjson_str()`
 
 # yyjsonr 0.1.18.9003 2024-03-13
 
diff --git a/R/ndjson.R b/R/ndjson.R
index 81e2aba..ff3ef9c 100644
--- a/R/ndjson.R
+++ b/R/ndjson.R
@@ -63,6 +63,59 @@ read_ndjson_file <- function(filename, type = c('df', 'list'), nread = -1, nskip
 }
 
 
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#' Parse an NDJSON file to a data.frame or list
+#' 
+#' If reading as data.frame, each row of NDJSON becomes a row in the data.frame.  
+#' If reading as a list, then each row becomes an element in the list.
+#' 
+#' If parsing NDJSON to a data.frame it is usually better if the json objects
+#' are consistent from line-to-line.  Type inference for the data.frame is done
+#' during initialisation by reading through \code{nprobe} lines.  Warning: if
+#' there is a type-mismatch further into the file than it is probed, then you 
+#' will get missing values in the data.frame, or JSON values not captured in 
+#' the R data.
+#' 
+#' No flattening of the namespace is done i.e. nested object remain nested.
+#' 
+#' @inheritParams read_ndjson_file
+#' @param x string containing NDJSON
+#'
+#' @examples
+#' tmp <- tempfile()
+#' json <- write_ndjson_str(head(mtcars))
+#' read_ndjson_str(json, type = 'list')
+#' 
+#' @family JSON Parsers
+#' @return NDJSON data read into R as list or data.frame depending 
+#'         on \code{'type'} argument
+#' @export
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+read_ndjson_str <- function(x, type = c('df', 'list'), nread = -1, nskip = 0, nprobe = 100, opts = list(), ...) {
+  
+  type <- match.arg(type)
+  
+  if (type == 'list') {
+    .Call(
+      parse_ndjson_str_as_list_,
+      x, 
+      nread,
+      nskip,
+      modify_list(opts, list(...))
+    )
+  } else {
+    .Call(
+      parse_ndjson_str_as_df_,
+      x, 
+      nread,
+      nskip,
+      nprobe,
+      modify_list(opts, list(...))
+    )
+  }
+}
+
+
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #' Write list or data.frame object to NDJSON in a file
 #' 
diff --git a/man/read_json_conn.Rd b/man/read_json_conn.Rd
index 10a56b8..bdd3d5d 100644
--- a/man/read_json_conn.Rd
+++ b/man/read_json_conn.Rd
@@ -37,6 +37,7 @@ Other JSON Parsers:
 \code{\link{read_json_file}()},
 \code{\link{read_json_raw}()},
 \code{\link{read_json_str}()},
-\code{\link{read_ndjson_file}()}
+\code{\link{read_ndjson_file}()},
+\code{\link{read_ndjson_str}()}
 }
 \concept{JSON Parsers}
diff --git a/man/read_json_file.Rd b/man/read_json_file.Rd
index aac53cf..82edf0a 100644
--- a/man/read_json_file.Rd
+++ b/man/read_json_file.Rd
@@ -30,6 +30,7 @@ Other JSON Parsers:
 \code{\link{read_json_conn}()},
 \code{\link{read_json_raw}()},
 \code{\link{read_json_str}()},
-\code{\link{read_ndjson_file}()}
+\code{\link{read_ndjson_file}()},
+\code{\link{read_ndjson_str}()}
 }
 \concept{JSON Parsers}
diff --git a/man/read_json_raw.Rd b/man/read_json_raw.Rd
index 55a7f63..185b424 100644
--- a/man/read_json_raw.Rd
+++ b/man/read_json_raw.Rd
@@ -29,6 +29,7 @@ Other JSON Parsers:
 \code{\link{read_json_conn}()},
 \code{\link{read_json_file}()},
 \code{\link{read_json_str}()},
-\code{\link{read_ndjson_file}()}
+\code{\link{read_ndjson_file}()},
+\code{\link{read_ndjson_str}()}
 }
 \concept{JSON Parsers}
diff --git a/man/read_json_str.Rd b/man/read_json_str.Rd
index 6891972..f11bac0 100644
--- a/man/read_json_str.Rd
+++ b/man/read_json_str.Rd
@@ -28,6 +28,7 @@ Other JSON Parsers:
 \code{\link{read_json_conn}()},
 \code{\link{read_json_file}()},
 \code{\link{read_json_raw}()},
-\code{\link{read_ndjson_file}()}
+\code{\link{read_ndjson_file}()},
+\code{\link{read_ndjson_str}()}
 }
 \concept{JSON Parsers}
diff --git a/man/read_ndjson_file.Rd b/man/read_ndjson_file.Rd
index 49d9512..f9119a4 100644
--- a/man/read_ndjson_file.Rd
+++ b/man/read_ndjson_file.Rd
@@ -63,6 +63,7 @@ Other JSON Parsers:
 \code{\link{read_json_conn}()},
 \code{\link{read_json_file}()},
 \code{\link{read_json_raw}()},
-\code{\link{read_json_str}()}
+\code{\link{read_json_str}()},
+\code{\link{read_ndjson_str}()}
 }
 \concept{JSON Parsers}
diff --git a/man/read_ndjson_str.Rd b/man/read_ndjson_str.Rd
new file mode 100644
index 0000000..5d736e3
--- /dev/null
+++ b/man/read_ndjson_str.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ndjson.R
+\name{read_ndjson_str}
+\alias{read_ndjson_str}
+\title{Parse an NDJSON file to a data.frame or list}
+\usage{
+read_ndjson_str(
+  x,
+  type = c("df", "list"),
+  nread = -1,
+  nskip = 0,
+  nprobe = 100,
+  opts = list(),
+  ...
+)
+}
+\arguments{
+\item{x}{string containing NDJSON}
+
+\item{type}{The type of R object the JSON should be parsed into. Valid
+values are 'df' or 'list'.  Default: 'df' (data.frame)}
+
+\item{nread}{Number of records to read. Default: -1 (reads all JSON strings)}
+
+\item{nskip}{Number of records to skip before starting to read. Default: 0
+(skip no data)}
+
+\item{nprobe}{Number of lines to read to determine types for data.frame
+columns.  Default: 100.   Use \code{-1} to probe entire file.}
+
+\item{opts}{Named list of options for parsing. Usually created by \code{opts_read_json()}}
+
+\item{...}{Other named options can be used to override any options in \code{opts}.
+The valid named options are identical to arguments to \code{\link[=opts_read_json]{opts_read_json()}}}
+}
+\value{
+NDJSON data read into R as list or data.frame depending
+on \code{'type'} argument
+}
+\description{
+If reading as data.frame, each row of NDJSON becomes a row in the data.frame.
+If reading as a list, then each row becomes an element in the list.
+}
+\details{
+If parsing NDJSON to a data.frame it is usually better if the json objects
+are consistent from line-to-line.  Type inference for the data.frame is done
+during initialisation by reading through \code{nprobe} lines.  Warning: if
+there is a type-mismatch further into the file than it is probed, then you
+will get missing values in the data.frame, or JSON values not captured in
+the R data.
+
+No flattening of the namespace is done i.e. nested object remain nested.
+}
+\examples{
+tmp <- tempfile()
+json <- write_ndjson_str(head(mtcars))
+read_ndjson_str(json, type = 'list')
+
+}
+\seealso{
+Other JSON Parsers: 
+\code{\link{read_json_conn}()},
+\code{\link{read_json_file}()},
+\code{\link{read_json_raw}()},
+\code{\link{read_json_str}()},
+\code{\link{read_ndjson_file}()}
+}
+\concept{JSON Parsers}
diff --git a/src/Makevars b/src/Makevars
index 10fb6a8..cffea85 100644
--- a/src/Makevars
+++ b/src/Makevars
@@ -1,2 +1,2 @@
 PKG_LIBS=-lz
-#PKG_CFLAGS += -Wconversion
\ No newline at end of file
+#PKG_CFLAGS += -Wconversion
diff --git a/src/init.c b/src/init.c
index 94810cf..fa05a26 100644
--- a/src/init.c
+++ b/src/init.c
@@ -26,6 +26,9 @@ extern SEXP validate_json_str_ (SEXP str_     , SEXP verbose_, SEXP parse_opts_)
 extern SEXP parse_ndjson_file_as_df_  (SEXP filename_, SEXP nread_, SEXP nskip_, SEXP nprobe_, SEXP parse_opts_);
 extern SEXP parse_ndjson_file_as_list_(SEXP filename_, SEXP nread_, SEXP nskip_,               SEXP parse_opts_);
 
+extern SEXP parse_ndjson_str_as_df_  (SEXP str_, SEXP nread_, SEXP nskip_, SEXP nprobe_, SEXP parse_opts_);
+extern SEXP parse_ndjson_str_as_list_(SEXP str_, SEXP nread_, SEXP nskip_,               SEXP parse_opts_);
+
 extern SEXP serialize_df_to_ndjson_str_ (SEXP robj_,                 SEXP serialize_opts_);
 extern SEXP serialize_df_to_ndjson_file_(SEXP robj_, SEXP filename_, SEXP serialize_opts_);
 
@@ -58,6 +61,9 @@ static const R_CallMethodDef CEntries[] = {
   {"parse_ndjson_file_as_df_"  , (DL_FUNC) &parse_ndjson_file_as_df_  , 5},
   {"parse_ndjson_file_as_list_", (DL_FUNC) &parse_ndjson_file_as_list_, 4},
   
+  {"parse_ndjson_str_as_df_"  , (DL_FUNC) &parse_ndjson_str_as_df_  , 5},
+  {"parse_ndjson_str_as_list_", (DL_FUNC) &parse_ndjson_str_as_list_, 4},
+  
   {"serialize_df_to_ndjson_str_" , (DL_FUNC) &serialize_df_to_ndjson_str_ , 2},
   {"serialize_df_to_ndjson_file_", (DL_FUNC) &serialize_df_to_ndjson_file_, 3},
   
diff --git a/src/ndjson-parse.c b/src/ndjson-parse.c
index 736a191..3bb1ca2 100644
--- a/src/ndjson-parse.c
+++ b/src/ndjson-parse.c
@@ -206,6 +206,130 @@ SEXP parse_ndjson_file_as_list_(SEXP filename_, SEXP nread_, SEXP nskip_, SEXP p
 
 
 
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Parse ndjson as a list of R objects: one-r-object-per-line-of-input
+// 
+// Compared to parsing to data.frame
+//   PRO: Simple
+//   PRO: Can handle any type without worrying about data.frame column types
+//        being consistent across multiple input lines
+//   CON: Slower: Every object on every line gets allocated into an R object
+//        Compared to data.frame which allocates all its space at once and
+//        just slots values into this memory.
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+SEXP parse_ndjson_str_as_list_(SEXP str_, SEXP nread_, SEXP nskip_, SEXP parse_opts_) {
+  
+  parse_options opt = create_parse_options(parse_opts_);
+  opt.yyjson_read_flag |= YYJSON_READ_STOP_WHEN_DONE;
+  
+  int nread = asInteger(nread_);
+  int nskip = asInteger(nskip_);
+  
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Allocating a list with a default starting size to grow into.
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  SEXP list_ = PROTECT(allocVector(VECSXP, 64));
+  R_xlen_t list_size = XLENGTH(list_);
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Iterate over the file.  For each line
+  //   - check if new data would overflow list
+  //        - if so, then grow list
+  //   - create a yyjson doc from this line
+  //   - if document is NULL
+  //        insert a NULL into list
+  //   - otherwise 
+  //        insert resulting robject into list
+  //   - free the doc
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  char *str = (char *)CHAR( STRING_ELT(str_, 0) );
+  size_t str_size = strlen(str);
+  size_t orig_str_size = strlen(str);
+  size_t total_read = 0;
+  
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Skip lines if requested
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  while (nskip > 0 && total_read < orig_str_size) {
+    yyjson_read_err err;
+    yyjson_doc *doc = yyjson_read_opts(str, str_size, opt.yyjson_read_flag, NULL, &err);
+    size_t pos = yyjson_doc_get_read_size(doc);
+    yyjson_doc_free(doc);
+    
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    // Advance string 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    total_read += pos + 1;
+    str += pos + 1;
+    str_size -= (pos + 1);
+    
+    nskip--;
+  }
+  
+  
+  unsigned int i = 0;
+  while (total_read < orig_str_size) {
+    
+    if (i >= nread) {
+      break;
+    }
+    
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    // Grow list if we need more room
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    if (i >= list_size) {
+      UNPROTECT(1);
+      list_ = PROTECT(grow_list(list_));
+      list_size = XLENGTH(list_);
+    }
+    
+    yyjson_read_err err;
+    yyjson_doc *doc = yyjson_read_opts(str, str_size, opt.yyjson_read_flag, NULL, &err);
+    size_t pos = yyjson_doc_get_read_size(doc);
+    
+    
+    if (doc == NULL) {
+      warning("Couldn't parse NDJSON row %i. Inserting 'NULL'\n", i + 1);
+      SET_VECTOR_ELT(list_, i, R_NilValue);
+    } else {
+      SET_VECTOR_ELT(list_, i, parse_json_from_str(str, str_size, &opt));
+    }
+    i++;
+    
+    yyjson_doc_free(doc);
+    
+    
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    // Advance string 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    total_read += pos + 1;
+    str += pos + 1;
+    str_size -= (pos + 1);
+  }
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // In-situ faux truncation of a VECSXP object.
+  // This just hides the trailing elements from R
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  SETLENGTH(list_, i);
+  SET_TRUELENGTH(list_, list_size);
+  SET_GROWABLE_BIT(list_);
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Close input, tidy memory and return
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  UNPROTECT(1);
+  return list_;
+}
+
+
+
+
 
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 // Parse ndjson as a data.frame one-rorw-per-line-of-input
@@ -508,3 +632,301 @@ SEXP parse_ndjson_file_as_df_(SEXP filename_, SEXP nread_, SEXP nskip_, SEXP npr
   return df_;
 }
 
+
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Parse string into data.frame
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+SEXP parse_ndjson_str_as_df_(SEXP str_, SEXP nread_, SEXP nskip_, SEXP nprobe_, SEXP parse_opts_) {
+  
+  int nprotect = 0;
+  parse_options opt = create_parse_options(parse_opts_);
+  opt.yyjson_read_flag |= YYJSON_READ_STOP_WHEN_DONE;
+  
+  int nread  = asInteger(nread_);
+  int nskip  = asInteger(nskip_);
+  int nprobe = asInteger(nprobe_);
+  
+  if (nread  <= 0) { nread  = INT32_MAX; }
+  if (nprobe <= 0) { nprobe = INT32_MAX; }
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Iterate over the file.  For each line
+  //   - check if new data would overflow list
+  //        - if so, then grow list
+  //   - create a yyjson doc from this line
+  //   - if document is NULL
+  //        insert a NULL into list
+  //   - otherwise 
+  //        insert resulting robject into list
+  //   - free the doc
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  char *str = (char *)CHAR( STRING_ELT(str_, 0) );
+  size_t str_size = strlen(str);
+  size_t orig_str_size = strlen(str);
+  size_t total_read = 0;
+  
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Skip lines if requested
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  while (nskip > 0 && total_read < orig_str_size) {
+    yyjson_read_err err;
+    yyjson_doc *doc = yyjson_read_opts(str, str_size, opt.yyjson_read_flag, NULL, &err);
+    size_t pos = yyjson_doc_get_read_size(doc);
+    yyjson_doc_free(doc);
+    
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    // Advance string 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    total_read += pos + 1;
+    str += pos + 1;
+    str_size -= (pos + 1);
+    
+    nskip--;
+  }
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Accumulation of unique key-names in the objects
+  // These will become the column names of the data.frame.
+  // Each column also has a 'type_bitset' to keep track of the type of each
+  // value across the different {}-objects
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  char *colname[MAX_DF_COLS];
+  unsigned int type_bitset[MAX_DF_COLS] = {0};
+  unsigned int sexp_type[MAX_DF_COLS] = {0};
+  int ncols = 0;
+  int nrows = 0;
+  
+  
+  char *mark_str = str;
+  size_t mark_str_size = str_size;
+  size_t mark_total_read = total_read;
+  
+  
+  while (nprobe > 0 && total_read < orig_str_size) {
+    yyjson_read_err err;
+    yyjson_doc *doc = yyjson_read_opts(str, str_size, opt.yyjson_read_flag, NULL, &err);
+    size_t pos = yyjson_doc_get_read_size(doc);
+    if (doc == NULL) {
+      // output_verbose_error(buf, err);
+      error("Couldn't parse JSON during probe line %i\n", nrows + 1);
+    }
+    
+    yyjson_val *obj = yyjson_doc_get_root(doc);
+    yyjson_val *key;
+    yyjson_obj_iter obj_iter = yyjson_obj_iter_with(obj); // MUST be an object
+    
+    while ((key = yyjson_obj_iter_next(&obj_iter))) {
+      yyjson_val *val = yyjson_obj_iter_get_val(key);
+      
+      int name_idx = -1;
+      for (int i = 0; i < ncols; i++) {
+        if (yyjson_equals_str(key, colname[i])) {
+          name_idx = i;
+          break;
+        }
+      }
+      if (name_idx < 0) {
+        // Name has not been seen yet
+        name_idx = ncols;
+        colname[ncols] = (char *)yyjson_get_str(key);
+        ncols++;
+        if (ncols == MAX_DF_COLS) {
+          error("Maximum columns for data.frame exceeded: %i", MAX_DF_COLS);
+        }
+      }
+      
+      type_bitset[name_idx] = update_type_bitset(type_bitset[name_idx], val, &opt);
+    }
+    
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    // Advance string 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    total_read += pos + 1;
+    str += pos + 1;
+    str_size -= (pos + 1);
+    
+    
+    nrows++;    
+    nprobe--; 
+  }
+  // Rprintf("Step X0: nrows = %i\n", nrows);
+  
+  // json <- write_ndjson_str(head(mtcars)); read_ndjson_str(json)
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Read the rest of the string to figure out how many rows there are in total
+  // TODO: Just count "\n" here
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  if (total_read < orig_str_size) {
+    for (size_t sp = 0; sp < str_size; sp++) {
+      if (str[sp] == '\n') {
+        nrows++;
+      }
+    }
+    if (str[str_size =1] != '\n') {
+      // STring does not end in newline, so need to manually count the last row
+      nrows++;
+    }
+  }
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // How many rows does the user want to read vs how many do we have
+  // and how many they want to skip.
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  nrows = nrows > nread ? nread : nrows;
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Create a data.frame.
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  SEXP df_ = PROTECT(allocVector(VECSXP, ncols)); nprotect++;
+  
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // For each column name,
+  //   - determine the best SEXP to represent the 'type_bitset'
+  //   - Call a parse function which will
+  //        - loop through the entire []-array, plucking the value from each
+  //          {}-object
+  //        - return an atomic vector or a list
+  //   - place this vector as a column in the data.frame
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  for (unsigned int col = 0; col < ncols; col++) {
+    sexp_type[col] = get_best_sexp_to_represent_type_bitset(type_bitset[col], &opt);
+    
+    // INT64SXP is actually contained in a REALSXP
+    unsigned int alloc_type = sexp_type[col] == INT64SXP ? REALSXP : sexp_type[col];
+    
+    // Allocate memory for column
+    SEXP vec_ = PROTECT(allocVector(alloc_type, nrows));
+    if (sexp_type[col] == INT64SXP) {
+      setAttrib(vec_, R_ClassSymbol, mkString("integer64"));
+    }
+    
+    // place vector into data.frame
+    SET_VECTOR_ELT(df_, col, vec_);
+    UNPROTECT(1); // no longer needs protection once part of data.frame
+  }
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Parse file
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  str        = mark_str;
+  str_size   = mark_str_size;
+  total_read = mark_total_read;
+  
+  // keep track of actual number of rows parsed.
+  // This might not be the same as 'nrow' as we can skip rows that we 
+  // can't parse.
+  int row = 0;
+  
+  for (unsigned int i = 0; i < nrows; i++) {
+    yyjson_read_err err;
+    yyjson_doc *doc = yyjson_read_opts(str, str_size, opt.yyjson_read_flag, NULL, &err);
+    size_t pos = yyjson_doc_get_read_size(doc);
+    if (doc == NULL) {
+      // output_verbose_error(buf, err);
+      error("Couldn't parse JSON on line %i\n", i + 1);
+    }
+    
+    yyjson_val *obj = yyjson_doc_get_root(doc);
+    if (yyjson_get_type(obj) != YYJSON_TYPE_OBJ) {
+      error("parse_ndjson_as_df() only works if all lines represent JSON objects");
+    }
+    
+    for (unsigned int col = 0; col < ncols; col++) {
+      SEXP column_ = VECTOR_ELT(df_, col);
+      
+      yyjson_val *val = yyjson_obj_get(obj, colname[col]);
+      
+      switch(sexp_type[col]) {
+      case LGLSXP:
+        LOGICAL(column_)[row] = json_val_to_logical(val, &opt);
+        break;
+      case INTSXP:
+        INTEGER(column_)[row] = json_val_to_integer(val, &opt);
+        break;
+      case INT64SXP: {
+        long long tmp = json_val_to_integer64(val, &opt);
+        ((long long *)(REAL(column_)))[row] = tmp;
+      }
+        break;
+      case REALSXP:
+        REAL(column_)[row] = json_val_to_double(val, &opt);
+        break;
+      case STRSXP:
+        if (val == NULL) {
+          SET_STRING_ELT(column_, row, NA_STRING);
+        } else {
+          SET_STRING_ELT(column_, row, json_val_to_charsxp(val, &opt));
+        }
+        break;
+      case VECSXP:
+        if (val == NULL) {
+          SET_VECTOR_ELT(column_, row, opt.df_missing_list_elem);
+        } else {
+          SET_VECTOR_ELT(column_, row, json_as_robj(val, &opt));
+        }
+        break;
+      default:
+        error("parse_ndjson_file_as_df_(): Unknown type");
+      } 
+      
+    }
+    
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    // Advance string 
+    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    total_read += pos + 1;
+    str += pos + 1;
+    str_size -= (pos + 1);
+    
+    
+    row++;
+  }
+  
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Set colnames on data.frame
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  SEXP nms_ = PROTECT(allocVector(STRSXP, ncols)); nprotect++;
+  for (unsigned int i = 0; i < ncols; i++) {
+    SET_STRING_ELT(nms_, i, mkChar(colname[i]));
+  }
+  Rf_setAttrib(df_, R_NamesSymbol, nms_);
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Resize each data.frame column vector to match the actual data length
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  if (nrows != row) {
+    int allocated_length = nrows;
+    int data_length      = row;
+    for (int i=0; i < length(df_); i++) {
+      SETLENGTH(VECTOR_ELT(df_, i), data_length);
+      SET_TRUELENGTH(VECTOR_ELT(df_, i), allocated_length);
+      SET_GROWABLE_BIT(VECTOR_ELT(df_, i));
+    }
+  }
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Set empty rownames on data.frame
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  SEXP rownames = PROTECT(allocVector(INTSXP, 2)); nprotect++;
+  SET_INTEGER_ELT(rownames, 0, NA_INTEGER);
+  SET_INTEGER_ELT(rownames, 1, -row);
+  setAttrib(df_, R_RowNamesSymbol, rownames);
+  
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Set 'data.frame' class
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  SET_CLASS(df_, mkString("data.frame"));
+  
+  UNPROTECT(nprotect);
+  return df_;
+}
diff --git a/src/utils.c b/src/utils.c
index c8606de..4a87ce3 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -20,4 +20,4 @@
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 SEXP yyjson_version_(void) {
   return mkString(YYJSON_VERSION_STRING);
-}
\ No newline at end of file
+}
diff --git a/tests/testthat/test-ndjson.R b/tests/testthat/test-ndjson.R
index d099142..6138914 100644
--- a/tests/testthat/test-ndjson.R
+++ b/tests/testthat/test-ndjson.R
@@ -59,9 +59,9 @@ test_that("write_ndjson_file df works", {
 test_that("write_ndjson_str df works", {
   file <- tempfile()
   write_ndjson_file(iris, file)
-  ref <- write_ndjson_str(iris)
+  ref2 <- write_ndjson_str(iris)
   res <- paste(readLines(file), collapse = "\n")  
-  expect_identical(res, ref)
+  expect_identical(res, ref2)
 })
 
 test_that("write_ndjson_file list works", {
@@ -74,11 +74,45 @@ test_that("write_ndjson_file list works", {
 
 
 test_that("write_ndjson_str list works", {
+  
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # Check write_ndjson_file() and write_ndjson_str() agree
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   file <- tempfile()
   write_ndjson_file(tref, file)
-  ref <- write_ndjson_str(tref)
+  ref2 <- write_ndjson_str(tref)
   res <- paste(readLines(file), collapse = "\n")  
-  expect_identical(res, ref)
+  expect_identical(res, ref2)
+  
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # Read NDJSON string as list
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  json <- write_ndjson_str(tref)
+  ref2 <- read_ndjson_str(json, type = 'list')  
+  expect_identical(ref2, tref)
+  
+  json <- write_ndjson_str(tref)
+  ref2 <- read_ndjson_str(json, type = 'list', nskip = 1)  
+  expect_identical(ref2, tref[-1])
+  
+  json <- write_ndjson_str(tref)
+  ref2 <- read_ndjson_str(json, type = 'list', nskip = 2, nread = 3)  
+  expect_identical(ref2, tref[3:5])
+  
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # Read NDJSON string as data.frame
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  json <- write_ndjson_str(ref)
+  ref2 <- read_ndjson_str(json, type = 'df')  
+  expect_identical(ref2, ref)
+  
+  json <- write_ndjson_str(ref)
+  ref2 <- read_ndjson_str(json, type = 'df', nskip  = 1)  
+  expect_identical(ref2, ref[-1, ], ignore_attr = TRUE)
+  
+  json <- write_ndjson_str(ref)
+  ref2 <- read_ndjson_str(json, type = 'df', nskip  = 2, nread = 3)  
+  expect_identical(ref2, ref[3:5, ], ignore_attr = TRUE)
 })