Skip to content

Commit

Permalink
Add RE2::MatchData#deconstruct_keys
Browse files Browse the repository at this point in the history
GitHub: #57

In order to support pattern matching in Ruby, add a deconstruct_keys
method to RE2::MatchData that will return all named matches if given nil
or a subset of matches if given an array of capturing group names as
symbols.

To match [Ruby's implementation of
this](ruby/ruby#6216) there are a few edge
cases:

1. If the number of keys given exceeds the number of named capturing
   groups, immediately return an empty hash.
2. Each key is added to the hash in turn and the first invalid key will
   cause the hash to be returned so it is possible to get a partial set
   of named captures.
  • Loading branch information
mudge committed Oct 22, 2022
1 parent 6cf188d commit 5c9c125
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 0 deletions.
68 changes: 68 additions & 0 deletions ext/re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,7 @@ static VALUE re2_matchdata_inspect(VALUE self) {
* m = RE2::Regexp.new('(\d+)').match("bob 123")
* m.deconstruct #=> ["123"]
*
* @example pattern matching
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
* in x, y
* puts "Matched #{x} #{y}"
Expand Down Expand Up @@ -727,6 +728,71 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
return array;
}

/*
* Returns a hash of capturing group names to submatches for pattern matching.
*
* As this is used by Ruby's pattern matching, it will return an empty hash if given
* more keys than there are capturing groups. Given keys will populate the hash in
* order but an invalid name will cause the hash to be immediately returned.
*
* @return [Hash] a hash of capturing group names to submatches
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
* @example
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
* m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
* m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
* m.deconstruct_keys([:fruit]) #=> {}
* m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
*
* @example pattern matching
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
* in numbers:, letters:
* puts "Numbers: #{numbers}, letters: #{letters}"
* else
* puts "Unrecognised match"
* end
*/
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
int i;
VALUE capturing_groups, key;
re2_matchdata *m;
re2_pattern *p;
map<string, int> groups;
map<string, int>::iterator iterator;

Data_Get_Struct(self, re2_matchdata, m);
Data_Get_Struct(m->regexp, re2_pattern, p);

groups = p->pattern->NamedCapturingGroups();
capturing_groups = rb_hash_new();

if (NIL_P(keys)) {
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
rb_hash_aset(capturing_groups,
ID2SYM(rb_intern(iterator->first.data())),
re2_matchdata_nth_match(iterator->second, self));
}
} else {
Check_Type(keys, T_ARRAY);

if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
for (i = 0; i < RARRAY_LEN(keys); i++) {
key = RARRAY_AREF(keys, i);
Check_Type(key, T_SYMBOL);
string name(rb_id2name(SYM2ID(key)));

if (groups.count(name) == 0) {
break;
}

rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
}
}
}

return capturing_groups;
}

/*
* Returns a new RE2 object with a compiled version of
* +pattern+ stored inside. Equivalent to +RE2.new+.
Expand Down Expand Up @@ -1708,6 +1774,8 @@ void Init_re2(void) {
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
rb_define_method(re2_cMatchData, "deconstruct",
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
rb_define_method(re2_cMatchData, "deconstruct_keys",
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);

rb_define_method(re2_cScanner, "string",
RUBY_METHOD_FUNC(re2_scanner_string), 0);
Expand Down
44 changes: 44 additions & 0 deletions spec/re2/match_data_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -255,4 +255,48 @@
expect(md.deconstruct).to eq(['o', 'o', nil])
end
end

describe "#deconstruct_keys" do
it "returns all named captures if given nil" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
end

it "returns only named captures if given names" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
end

it "returns named captures up until an invalid name is given" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
end

it "returns an empty hash if given more capture names than exist" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
end

it "returns an empty hash if there are no named capturing groups" do
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys(nil)).to eq({})
end

it "raises an error if given a non-array of keys" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
end

it "raises an error if given keys as non-symbols" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
end
end
end

0 comments on commit 5c9c125

Please sign in to comment.