Skip to content

Commit 79b6f7e

Browse files
committed
support parse list
1 parent d49f017 commit 79b6f7e

File tree

1 file changed

+80
-16
lines changed

1 file changed

+80
-16
lines changed

arrow-schema/src/datatype_parse.rs

Lines changed: 80 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,29 @@ impl<'a> Parser<'a> {
9696
/// Parses the List type
9797
fn parse_list(&mut self) -> ArrowResult<DataType> {
9898
self.expect_token(Token::LParen)?;
99+
let nullable = self.nullable();
99100
let data_type = self.parse_next_type()?;
100-
self.expect_token(Token::RParen)?;
101-
Ok(DataType::List(Arc::new(Field::new_list_field(
102-
data_type, true,
103-
))))
101+
102+
match self.next_token()? {
103+
// default field name
104+
Token::RParen => Ok(DataType::List(Arc::new(Field::new_list_field(
105+
data_type, nullable,
106+
)))),
107+
// expects: field: 'field_name'
108+
Token::Comma => {
109+
self.expect_token(Token::Field)?;
110+
self.expect_token(Token::Colon)?;
111+
let field_name = self.parse_single_quoted_string("List's field")?;
112+
self.expect_token(Token::RParen)?;
113+
Ok(DataType::List(Arc::new(Field::new(
114+
field_name, data_type, nullable,
115+
))))
116+
}
117+
tok => Err(make_error(
118+
self.val,
119+
&format!("Expected a single string for a field name; got {tok:?}"),
120+
)),
121+
}
104122
}
105123

106124
/// Parses the LargeList type
@@ -150,6 +168,19 @@ impl<'a> Parser<'a> {
150168
}
151169
}
152170

171+
/// Parses the next single quoted string
172+
fn parse_single_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
173+
let token = self.next_token()?;
174+
if let Token::SingleQuotedString(string) = token {
175+
Ok(string)
176+
} else {
177+
Err(make_error(
178+
self.val,
179+
&format!("expected single quoted string for {context}, got '{token}'"),
180+
))
181+
}
182+
}
183+
153184
/// Parses the next integer value
154185
fn parse_i64(&mut self, context: &str) -> ArrowResult<i64> {
155186
match self.next_token()? {
@@ -354,16 +385,13 @@ impl<'a> Parser<'a> {
354385
tok => {
355386
return Err(make_error(
356387
self.val,
357-
&format!("Expected a quoted string for a field name; got {tok:?}"),
388+
&format!("Expected a double quoted string for a field name; got {tok:?}"),
358389
));
359390
}
360391
};
361392
self.expect_token(Token::Colon)?;
362393

363-
let nullable = self
364-
.tokenizer
365-
.next_if(|next| matches!(next, Ok(Token::Nullable)))
366-
.is_some();
394+
let nullable = self.nullable();
367395
let field_type = self.parse_next_type()?;
368396
fields.push(Arc::new(Field::new(field_name, field_type, nullable)));
369397
match self.next_token()? {
@@ -382,6 +410,12 @@ impl<'a> Parser<'a> {
382410
Ok(DataType::Struct(Fields::from(fields)))
383411
}
384412

413+
fn nullable(&mut self) -> bool {
414+
self.tokenizer
415+
.next_if(|next| matches!(next, Ok(Token::Nullable)))
416+
.is_some()
417+
}
418+
385419
/// return the next token, or an error if there are none left
386420
fn next_token(&mut self) -> ArrowResult<Token> {
387421
match self.tokenizer.next() {
@@ -406,6 +440,11 @@ fn is_separator(c: char) -> bool {
406440
c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
407441
}
408442

443+
enum QuoteType {
444+
Double,
445+
Single,
446+
}
447+
409448
#[derive(Debug)]
410449
/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for parsing
411450
///
@@ -527,6 +566,7 @@ impl<'a> Tokenizer<'a> {
527566
"None" => Token::None,
528567

529568
"nullable" => Token::Nullable,
569+
"field" => Token::Field,
530570

531571
"Struct" => Token::Struct,
532572

@@ -537,9 +577,14 @@ impl<'a> Tokenizer<'a> {
537577
Ok(token)
538578
}
539579

540-
/// Parses e.g. `"foo bar"`
541-
fn parse_quoted_string(&mut self) -> ArrowResult<Token> {
542-
if self.next_char() != Some('\"') {
580+
/// Parses e.g. `"foo bar"`, `'foo bar'`
581+
fn parse_quoted_string(&mut self, quote_type: QuoteType) -> ArrowResult<Token> {
582+
let quote = match quote_type {
583+
QuoteType::Double => '\"',
584+
QuoteType::Single => '\'',
585+
};
586+
587+
if self.next_char() != Some(quote) {
543588
return Err(make_error(self.val, "Expected \""));
544589
}
545590

@@ -561,7 +606,7 @@ impl<'a> Tokenizer<'a> {
561606
is_escaped = true;
562607
self.word.push(c);
563608
}
564-
'"' => {
609+
c if c == quote => {
565610
if is_escaped {
566611
self.word.push(c);
567612
is_escaped = false;
@@ -585,7 +630,10 @@ impl<'a> Tokenizer<'a> {
585630
return Err(make_error(self.val, "empty strings aren't allowed"));
586631
}
587632

588-
Ok(Token::DoubleQuotedString(val))
633+
match quote_type {
634+
QuoteType::Double => Ok(Token::DoubleQuotedString(val)),
635+
QuoteType::Single => Ok(Token::SingleQuotedString(val)),
636+
}
589637
}
590638
}
591639

@@ -601,7 +649,10 @@ impl Iterator for Tokenizer<'_> {
601649
continue;
602650
}
603651
'"' => {
604-
return Some(self.parse_quoted_string());
652+
return Some(self.parse_quoted_string(QuoteType::Double));
653+
}
654+
'\'' => {
655+
return Some(self.parse_quoted_string(QuoteType::Single));
605656
}
606657
'(' => {
607658
self.next_char();
@@ -652,11 +703,13 @@ enum Token {
652703
None,
653704
Integer(i64),
654705
DoubleQuotedString(String),
706+
SingleQuotedString(String),
655707
List,
656708
LargeList,
657709
FixedSizeList,
658710
Struct,
659711
Nullable,
712+
Field,
660713
}
661714

662715
impl Display for Token {
@@ -687,8 +740,10 @@ impl Display for Token {
687740
Token::Dictionary => write!(f, "Dictionary"),
688741
Token::Integer(v) => write!(f, "Integer({v})"),
689742
Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
743+
Token::SingleQuotedString(s) => write!(f, "SingleQuotedString({s})"),
690744
Token::Struct => write!(f, "Struct"),
691745
Token::Nullable => write!(f, "nullable"),
746+
Token::Field => write!(f, "field"),
692747
}
693748
}
694749
}
@@ -828,7 +883,16 @@ mod test {
828883
),
829884
])),
830885
DataType::Struct(Fields::empty()),
831-
// TODO support more structured types (List, LargeList, Union, Map, RunEndEncoded, etc)
886+
DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
887+
DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
888+
DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
889+
DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))),
890+
DataType::List(Arc::new(Field::new(
891+
"nested_list",
892+
DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
893+
true,
894+
))),
895+
// TODO support more structured types (LargeList, Union, Map, RunEndEncoded, etc)
832896
]
833897
}
834898

0 commit comments

Comments
 (0)