@@ -96,11 +96,29 @@ impl<'a> Parser<'a> {
9696 /// Parses the List type
9797 fn parse_list ( & mut self ) -> ArrowResult < DataType > {
9898 self . expect_token ( Token :: LParen ) ?;
99+ let nullable = self . nullable ( ) ;
99100 let data_type = self . parse_next_type ( ) ?;
100- self . expect_token ( Token :: RParen ) ?;
101- Ok ( DataType :: List ( Arc :: new ( Field :: new_list_field (
102- data_type, true ,
103- ) ) ) )
101+
102+ match self . next_token ( ) ? {
103+ // default field name
104+ Token :: RParen => Ok ( DataType :: List ( Arc :: new ( Field :: new_list_field (
105+ data_type, nullable,
106+ ) ) ) ) ,
107+ // expects: field: 'field_name'
108+ Token :: Comma => {
109+ self . expect_token ( Token :: Field ) ?;
110+ self . expect_token ( Token :: Colon ) ?;
111+ let field_name = self . parse_single_quoted_string ( "List's field" ) ?;
112+ self . expect_token ( Token :: RParen ) ?;
113+ Ok ( DataType :: List ( Arc :: new ( Field :: new (
114+ field_name, data_type, nullable,
115+ ) ) ) )
116+ }
117+ tok => Err ( make_error (
118+ self . val ,
119+ & format ! ( "Expected a single string for a field name; got {tok:?}" ) ,
120+ ) ) ,
121+ }
104122 }
105123
106124 /// Parses the LargeList type
@@ -150,6 +168,19 @@ impl<'a> Parser<'a> {
150168 }
151169 }
152170
171+ /// Parses the next single quoted string
172+ fn parse_single_quoted_string ( & mut self , context : & str ) -> ArrowResult < String > {
173+ let token = self . next_token ( ) ?;
174+ if let Token :: SingleQuotedString ( string) = token {
175+ Ok ( string)
176+ } else {
177+ Err ( make_error (
178+ self . val ,
179+ & format ! ( "expected single quoted string for {context}, got '{token}'" ) ,
180+ ) )
181+ }
182+ }
183+
153184 /// Parses the next integer value
154185 fn parse_i64 ( & mut self , context : & str ) -> ArrowResult < i64 > {
155186 match self . next_token ( ) ? {
@@ -354,16 +385,13 @@ impl<'a> Parser<'a> {
354385 tok => {
355386 return Err ( make_error (
356387 self . val ,
357- & format ! ( "Expected a quoted string for a field name; got {tok:?}" ) ,
388+ & format ! ( "Expected a double quoted string for a field name; got {tok:?}" ) ,
358389 ) ) ;
359390 }
360391 } ;
361392 self . expect_token ( Token :: Colon ) ?;
362393
363- let nullable = self
364- . tokenizer
365- . next_if ( |next| matches ! ( next, Ok ( Token :: Nullable ) ) )
366- . is_some ( ) ;
394+ let nullable = self . nullable ( ) ;
367395 let field_type = self . parse_next_type ( ) ?;
368396 fields. push ( Arc :: new ( Field :: new ( field_name, field_type, nullable) ) ) ;
369397 match self . next_token ( ) ? {
@@ -382,6 +410,12 @@ impl<'a> Parser<'a> {
382410 Ok ( DataType :: Struct ( Fields :: from ( fields) ) )
383411 }
384412
413+ fn nullable ( & mut self ) -> bool {
414+ self . tokenizer
415+ . next_if ( |next| matches ! ( next, Ok ( Token :: Nullable ) ) )
416+ . is_some ( )
417+ }
418+
385419 /// return the next token, or an error if there are none left
386420 fn next_token ( & mut self ) -> ArrowResult < Token > {
387421 match self . tokenizer . next ( ) {
@@ -406,6 +440,11 @@ fn is_separator(c: char) -> bool {
406440 c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
407441}
408442
443+ enum QuoteType {
444+ Double ,
445+ Single ,
446+ }
447+
409448#[ derive( Debug ) ]
410449/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for parsing
411450///
@@ -527,6 +566,7 @@ impl<'a> Tokenizer<'a> {
527566 "None" => Token :: None ,
528567
529568 "nullable" => Token :: Nullable ,
569+ "field" => Token :: Field ,
530570
531571 "Struct" => Token :: Struct ,
532572
@@ -537,9 +577,14 @@ impl<'a> Tokenizer<'a> {
537577 Ok ( token)
538578 }
539579
540- /// Parses e.g. `"foo bar"`
541- fn parse_quoted_string ( & mut self ) -> ArrowResult < Token > {
542- if self . next_char ( ) != Some ( '\"' ) {
580+ /// Parses e.g. `"foo bar"`, `'foo bar'`
581+ fn parse_quoted_string ( & mut self , quote_type : QuoteType ) -> ArrowResult < Token > {
582+ let quote = match quote_type {
583+ QuoteType :: Double => '\"' ,
584+ QuoteType :: Single => '\'' ,
585+ } ;
586+
587+ if self . next_char ( ) != Some ( quote) {
543588 return Err ( make_error ( self . val , "Expected \" " ) ) ;
544589 }
545590
@@ -561,7 +606,7 @@ impl<'a> Tokenizer<'a> {
561606 is_escaped = true ;
562607 self . word . push ( c) ;
563608 }
564- '"' => {
609+ c if c == quote => {
565610 if is_escaped {
566611 self . word . push ( c) ;
567612 is_escaped = false ;
@@ -585,7 +630,10 @@ impl<'a> Tokenizer<'a> {
585630 return Err ( make_error ( self . val , "empty strings aren't allowed" ) ) ;
586631 }
587632
588- Ok ( Token :: DoubleQuotedString ( val) )
633+ match quote_type {
634+ QuoteType :: Double => Ok ( Token :: DoubleQuotedString ( val) ) ,
635+ QuoteType :: Single => Ok ( Token :: SingleQuotedString ( val) ) ,
636+ }
589637 }
590638}
591639
@@ -601,7 +649,10 @@ impl Iterator for Tokenizer<'_> {
601649 continue ;
602650 }
603651 '"' => {
604- return Some ( self . parse_quoted_string ( ) ) ;
652+ return Some ( self . parse_quoted_string ( QuoteType :: Double ) ) ;
653+ }
654+ '\'' => {
655+ return Some ( self . parse_quoted_string ( QuoteType :: Single ) ) ;
605656 }
606657 '(' => {
607658 self . next_char ( ) ;
@@ -652,11 +703,13 @@ enum Token {
652703 None ,
653704 Integer ( i64 ) ,
654705 DoubleQuotedString ( String ) ,
706+ SingleQuotedString ( String ) ,
655707 List ,
656708 LargeList ,
657709 FixedSizeList ,
658710 Struct ,
659711 Nullable ,
712+ Field ,
660713}
661714
662715impl Display for Token {
@@ -687,8 +740,10 @@ impl Display for Token {
687740 Token :: Dictionary => write ! ( f, "Dictionary" ) ,
688741 Token :: Integer ( v) => write ! ( f, "Integer({v})" ) ,
689742 Token :: DoubleQuotedString ( s) => write ! ( f, "DoubleQuotedString({s})" ) ,
743+ Token :: SingleQuotedString ( s) => write ! ( f, "SingleQuotedString({s})" ) ,
690744 Token :: Struct => write ! ( f, "Struct" ) ,
691745 Token :: Nullable => write ! ( f, "nullable" ) ,
746+ Token :: Field => write ! ( f, "field" ) ,
692747 }
693748 }
694749}
@@ -828,7 +883,16 @@ mod test {
828883 ) ,
829884 ] ) ) ,
830885 DataType :: Struct ( Fields :: empty( ) ) ,
831- // TODO support more structured types (List, LargeList, Union, Map, RunEndEncoded, etc)
886+ DataType :: List ( Arc :: new( Field :: new_list_field( DataType :: Int64 , true ) ) ) ,
887+ DataType :: List ( Arc :: new( Field :: new_list_field( DataType :: Int64 , false ) ) ) ,
888+ DataType :: List ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
889+ DataType :: List ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , false ) ) ) ,
890+ DataType :: List ( Arc :: new( Field :: new(
891+ "nested_list" ,
892+ DataType :: List ( Arc :: new( Field :: new( "Int64" , DataType :: Int64 , true ) ) ) ,
893+ true ,
894+ ) ) ) ,
895+ // TODO support more structured types (LargeList, Union, Map, RunEndEncoded, etc)
832896 ]
833897 }
834898
0 commit comments