Skip to content

Commit

Permalink
Increase test coverage and coerce inputs to string
Browse files Browse the repository at this point in the history
Where possible, coerce inputs to strings with StringValue rather than
raising a TypeError. This particularly impacts the relatively recent
RE2::Set API which was excessively strict about its arguments.

Add test coverage to all parts of the API, better covering edge cases
including how encoding is handled based on the encoding of the given
pattern.
  • Loading branch information
mudge committed Sep 15, 2023
1 parent 7e499c2 commit 029e019
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 44 deletions.
15 changes: 10 additions & 5 deletions ext/re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ static VALUE re2_scanner_scan(VALUE self) {
original_input_size = c->input->size();

for (i = 0; i < c->number_of_capturing_groups; i++) {
matches[i] = "";
argv[i] = &matches[i];
args[i] = &argv[i];
}
Expand Down Expand Up @@ -1404,7 +1403,9 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
UNUSED(self);
re2_pattern *p;

/* Convert all the inputs to be pumped into RE2::Replace. */
/* Take a copy of str so it can be modified in-place by
* RE2::Replace.
*/
string str_as_string(StringValuePtr(str));

/* Do the replacement. */
Expand Down Expand Up @@ -1440,7 +1441,9 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
VALUE rewrite) {
UNUSED(self);

/* Convert all the inputs to be pumped into RE2::GlobalReplace. */
/* Take a copy of str so it can be modified in-place by
* RE2::GlobalReplace.
*/
re2_pattern *p;
string str_as_string(StringValuePtr(str));

Expand Down Expand Up @@ -1579,11 +1582,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
* set.add("def") #=> 1
*/
static VALUE re2_set_add(VALUE self, VALUE pattern) {
Check_Type(pattern, T_STRING);
StringValue(pattern);
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
std::string err;
re2_set *s;
Data_Get_Struct(self, re2_set, s);

int index = s->set->Add(regex, &err);
if (index < 0) {
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
Expand Down Expand Up @@ -1669,7 +1673,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
VALUE str, options, exception_option;
bool raise_exception = true;
rb_scan_args(argc, argv, "11", &str, &options);
Check_Type(str, T_STRING);

StringValue(str);
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
std::vector<int> v;
re2_set *s;
Expand Down
6 changes: 3 additions & 3 deletions spec/kernel_spec.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
RSpec.describe Kernel do
describe "#RE2" do
describe ".RE2" do
it "returns an RE2::Regexp instance given a pattern" do
expect(RE2('w(o)(o)')).to be_a(RE2::Regexp)
end

it "returns an RE2::Regexp instance given a pattern and options" do
re = RE2('w(o)(o)', :case_sensitive => false)
expect(re).to be_a(RE2::Regexp)
expect(re).to_not be_case_sensitive

expect(re).not_to be_case_sensitive
end
end
end
24 changes: 24 additions & 0 deletions spec/re2/match_data_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@
a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a
expect(a).to eq(["ab", nil, "a", "b"])
end

it "returns UTF-8 strings if the pattern is UTF-8" do
a = RE2::Regexp.new('w(o)(o)').match('woo').to_a

expect(a.map(&:encoding)).to all eq(Encoding::UTF_8)
end

it "returns ISO-8859-1 strings if the pattern is not UTF-8" do
a = RE2::Regexp.new('w(o)(o)', :utf8 => false).match('woo').to_a

expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1)
end
end

describe "#[]" do
Expand All @@ -19,6 +31,18 @@
expect(md[2]).to eq("23")
end

it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do
md = RE2::Regexp.new('(\d)(\d{2})').match("123")

expect(md[1].encoding).to eq(Encoding::UTF_8)
end

it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do
md = RE2::Regexp.new('(\d)(\d{2})', :utf8 => false).match("123")

expect(md[1].encoding).to eq(Encoding::ISO_8859_1)
end

it "has the whole match as the 0th item" do
md = RE2::Regexp.new('(\d)(\d{2})').match("123")
expect(md[0]).to eq("123")
Expand Down
82 changes: 60 additions & 22 deletions spec/re2/scanner_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
end

describe "#scan" do
it "returns the next array of matches" do
it "returns the next array of matches", :aggregate_failures do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan("It is a truth universally acknowledged")

expect(scanner.scan).to eq(["It"])
expect(scanner.scan).to eq(["is"])
expect(scanner.scan).to eq(["a"])
Expand All @@ -33,92 +34,123 @@
expect(scanner.scan).to be_nil
end

it "returns multiple capturing groups at a time", :aggregate_failures do
r = RE2::Regexp.new('(\w+) (\w+)')
scanner = r.scan("It is a truth universally acknowledged")

expect(scanner.scan).to eq(["It", "is"])
expect(scanner.scan).to eq(["a", "truth"])
expect(scanner.scan).to eq(["universally", "acknowledged"])
expect(scanner.scan).to be_nil
end

it "returns an empty array if there are no capturing groups" do
r = RE2::Regexp.new('\w+')
scanner = r.scan("Foo bar")

expect(scanner.scan).to eq([])
end

it "returns nil if there is no match" do
r = RE2::Regexp.new('\d+')
scanner = r.scan("Foo bar")

expect(scanner.scan).to be_nil
end

it "returns nil if the regexp is invalid" do
r = RE2::Regexp.new('???', :log_errors => false)
scanner = r.scan("Foo bar")

expect(scanner.scan).to be_nil
end

it "returns an empty array if the input is empty" do
it "returns an empty array if the input is empty", :aggregate_failures do
r = RE2::Regexp.new("")
scanner = r.scan("")

expect(scanner.scan).to eq([])
expect(scanner.scan).to be_nil
end

it "returns an array of nil with an empty input and capture" do
it "returns an array of nil with an empty input and capture", :aggregate_failures do
r = RE2::Regexp.new("()")
scanner = r.scan("")

expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end

it "returns an empty array for every match if the pattern is empty" do
it "returns an empty array for every match if the pattern is empty", :aggregate_failures do
r = RE2::Regexp.new("")
scanner = r.scan("Foo")

expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to be_nil
end

it "returns an array of nil if the pattern is an empty capturing group" do
it "returns an array of nil if the pattern is an empty capturing group", :aggregate_failures do
r = RE2::Regexp.new("()")
scanner = r.scan("Foo")

expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end

it "returns array of nils with multiple empty capturing groups" do
it "returns array of nils with multiple empty capturing groups", :aggregate_failures do
r = RE2::Regexp.new("()()()")
scanner = r.scan("Foo")

expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to be_nil
end

it "supports empty groups with multibyte characters" do
it "supports empty groups with multibyte characters", :aggregate_failures do
r = RE2::Regexp.new("()€")
scanner = r.scan("€")

expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end

it "raises a type error if given input that can't be coerced to a String" do
r = RE2::Regexp.new('(\w+)')

expect { r.scan(0) }.to raise_error(TypeError)
end

it "accepts input that can be coerced to a String", :aggregate_failures do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan(StringLike.new("Hello world"))

expect(scanner.scan).to eq(["Hello"])
expect(scanner.scan).to eq(["world"])
expect(scanner.scan).to be_nil
end
end

it "is enumerable" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")

expect(scanner).to be_a(Enumerable)
end

describe "#each" do
it "yields each match" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")
matches = []
scanner.each do |match|
matches << match
end

expect(matches).to eq([["1"], ["2"], ["3"]])
expect { |b| scanner.each(&b) }.to yield_successive_args(["1"], ["2"], ["3"])
end

it "returns an enumerator when not given a block" do
Expand All @@ -135,22 +167,28 @@
end

describe "#rewind" do
it "resets any consumption" do
it "resets any consumption", :aggregate_failures do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")

expect(scanner.to_enum.first).to eq(["1"])
expect(scanner.to_enum.first).to eq(["2"])

scanner.rewind

expect(scanner.to_enum.first).to eq(["1"])
end

it "resets the eof? check" do
it "resets the eof? check", :aggregate_failures do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1")
scanner.scan
expect(scanner.eof?).to be_truthy

expect(scanner).to be_eof

scanner.rewind
expect(scanner.eof?).to be_falsey

expect(scanner).not_to be_eof
end
end

Expand All @@ -159,46 +197,46 @@
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1 2 3")

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns true if the input has been consumed" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1")
scanner.scan

expect(scanner.eof?).to be_truthy
expect(scanner).to be_eof
end

it "returns false if no match is made" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("a")
scanner.scan

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns false with an empty input that has not been scanned" do
r = RE2::Regexp.new("")
scanner = r.scan("")

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns false with an empty input that has not been matched" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("")
scanner.scan

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns true with an empty input that has been matched" do
r = RE2::Regexp.new("")
scanner = r.scan("")
scanner.scan

expect(scanner.eof?).to be_truthy
expect(scanner).to be_eof
end
end
end
Loading

0 comments on commit 029e019

Please sign in to comment.