Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support pattern matching #58

Merged
merged 2 commits into from
Oct 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions ext/re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,112 @@ static VALUE re2_matchdata_inspect(VALUE self) {
return result;
}

/*
* Returns the array of submatches for pattern matching.
*
* @return [Array<String, nil>] the array of submatches
* @example
* m = RE2::Regexp.new('(\d+)').match("bob 123")
* m.deconstruct #=> ["123"]
*
* @example pattern matching
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
* in x, y
* puts "Matched #{x} #{y}"
* else
* puts "Unrecognised match"
* end
*/
static VALUE re2_matchdata_deconstruct(VALUE self) {
int i;
re2_matchdata *m;
re2_pattern *p;
re2::StringPiece *match;
VALUE array;

Data_Get_Struct(self, re2_matchdata, m);
Data_Get_Struct(m->regexp, re2_pattern, p);

array = rb_ary_new2(m->number_of_matches - 1);
for (i = 1; i < m->number_of_matches; i++) {
match = &m->matches[i];

if (match->empty()) {
rb_ary_push(array, Qnil);
} else {
rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
}
}

return array;
}

/*
* Returns a hash of capturing group names to submatches for pattern matching.
*
* As this is used by Ruby's pattern matching, it will return an empty hash if given
* more keys than there are capturing groups. Given keys will populate the hash in
* order but an invalid name will cause the hash to be immediately returned.
*
* @return [Hash] a hash of capturing group names to submatches
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
* @example
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
* m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
* m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
* m.deconstruct_keys([:fruit]) #=> {}
* m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
*
* @example pattern matching
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
* in numbers:, letters:
* puts "Numbers: #{numbers}, letters: #{letters}"
* else
* puts "Unrecognised match"
* end
*/
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
int i;
VALUE capturing_groups, key;
re2_matchdata *m;
re2_pattern *p;
map<string, int> groups;
map<string, int>::iterator iterator;

Data_Get_Struct(self, re2_matchdata, m);
Data_Get_Struct(m->regexp, re2_pattern, p);

groups = p->pattern->NamedCapturingGroups();
capturing_groups = rb_hash_new();

if (NIL_P(keys)) {
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
rb_hash_aset(capturing_groups,
ID2SYM(rb_intern(iterator->first.data())),
re2_matchdata_nth_match(iterator->second, self));
}
} else {
Check_Type(keys, T_ARRAY);

if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
for (i = 0; i < RARRAY_LEN(keys); i++) {
key = rb_ary_entry(keys, i);
Check_Type(key, T_SYMBOL);
string name(rb_id2name(SYM2ID(key)));

if (groups.count(name) == 0) {
break;
}

rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
}
}
}

return capturing_groups;
}

/*
* Returns a new RE2 object with a compiled version of
* +pattern+ stored inside. Equivalent to +RE2.new+.
Expand Down Expand Up @@ -1666,6 +1772,10 @@ void Init_re2(void) {
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
rb_define_method(re2_cMatchData, "inspect",
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
rb_define_method(re2_cMatchData, "deconstruct",
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
rb_define_method(re2_cMatchData, "deconstruct_keys",
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);

rb_define_method(re2_cScanner, "string",
RUBY_METHOD_FUNC(re2_scanner_string), 0);
Expand Down
58 changes: 58 additions & 0 deletions spec/re2/match_data_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,62 @@
expect(md.end(:foo)).to be_nil
end
end

describe "#deconstruct" do
it "returns all capturing groups" do
md = RE2::Regexp.new('w(o)(o)').match('woo')

expect(md.deconstruct).to eq(['o', 'o'])
end

it "includes optional capturing groups as nil" do
md = RE2::Regexp.new('w(.)(.)(.)?').match('woo')

expect(md.deconstruct).to eq(['o', 'o', nil])
end
end

describe "#deconstruct_keys" do
it "returns all named captures if given nil" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys(nil)).to eq(:numbers => '123', :letters => 'abc')
end

it "returns only named captures if given names" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys([:numbers])).to eq(:numbers => '123')
end

it "returns named captures up until an invalid name is given" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(:numbers => '123')
end

it "returns an empty hash if given more capture names than exist" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({})
end

it "returns an empty hash if there are no named capturing groups" do
md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc')

expect(md.deconstruct_keys(nil)).to eq({})
end

it "raises an error if given a non-array of keys" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect { md.deconstruct_keys(0) }.to raise_error(TypeError)
end

it "raises an error if given keys as non-symbols" do
md = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')

expect { md.deconstruct_keys([0]) }.to raise_error(TypeError)
end
end
end