diff --git a/ext/re2/re2.cc b/ext/re2/re2.cc index 90d7b35..96f7218 100644 --- a/ext/re2/re2.cc +++ b/ext/re2/re2.cc @@ -687,6 +687,46 @@ static VALUE re2_matchdata_inspect(VALUE self) { return result; } +/* + * Returns the array of submatches for pattern matching. + * + * @return [Array] the array of submatches + * @example + * m = RE2::Regexp.new('(\d+)').match("bob 123") + * m.deconstruct #=> ["123"] + * + * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456") + * in x, y + * puts "Matched #{x} #{y}" + * else + * puts "Unrecognised match" + * end + */ +static VALUE re2_matchdata_deconstruct(VALUE self) { + int i; + re2_matchdata *m; + re2_pattern *p; + re2::StringPiece *match; + VALUE array; + + Data_Get_Struct(self, re2_matchdata, m); + Data_Get_Struct(m->regexp, re2_pattern, p); + + array = rb_ary_new2(m->number_of_matches - 1); + for (i = 1; i < m->number_of_matches; i++) { + match = &m->matches[i]; + + if (match->empty()) { + rb_ary_push(array, Qnil); + } else { + rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(), + p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1")); + } + } + + return array; +} + /* * Returns a new RE2 object with a compiled version of * +pattern+ stored inside. Equivalent to +RE2.new+. diff --git a/spec/re2/match_data_spec.rb b/spec/re2/match_data_spec.rb index 245dd8b..2c77a25 100644 --- a/spec/re2/match_data_spec.rb +++ b/spec/re2/match_data_spec.rb @@ -241,4 +241,18 @@ expect(md.end(:foo)).to be_nil end end + + describe "#deconstruct" do + it "returns all capturing groups" do + md = RE2::Regexp.new('w(o)(o)').match('woo') + + expect(md.deconstruct).to eq(['o', 'o']) + end + + it "includes optional capturing groups as nil" do + md = RE2::Regexp.new('w(.)(.)(.)?').match('woo') + + expect(md.deconstruct).to eq(['o', 'o', nil]) + end + end end