Skip to content

Commit

Permalink
Increase test coverage and coerce inputs to string
Browse files Browse the repository at this point in the history
Where possible, coerce inputs to strings with StringValue rather than
raising a TypeError. This particularly impacts the relatively recent
RE2::Set API which was excessively strict about its arguments.

Add test coverage to all parts of the API, better covering edge cases
including how encoding is handled based on the encoding of the RE2
pattern.
  • Loading branch information
mudge committed Sep 15, 2023
1 parent 7e499c2 commit 279621d
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 44 deletions.
15 changes: 10 additions & 5 deletions ext/re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ static VALUE re2_scanner_scan(VALUE self) {
original_input_size = c->input->size();

for (i = 0; i < c->number_of_capturing_groups; i++) {
matches[i] = "";
argv[i] = &matches[i];
args[i] = &argv[i];
}
Expand Down Expand Up @@ -1404,7 +1403,9 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
UNUSED(self);
re2_pattern *p;

/* Convert all the inputs to be pumped into RE2::Replace. */
/* Take a copy of str so it can be modified in-place by
* RE2::Replace.
*/
string str_as_string(StringValuePtr(str));

/* Do the replacement. */
Expand Down Expand Up @@ -1440,7 +1441,9 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
VALUE rewrite) {
UNUSED(self);

/* Convert all the inputs to be pumped into RE2::GlobalReplace. */
/* Take a copy of str so it can be modified in-place by
* RE2::GlobalReplace.
*/
re2_pattern *p;
string str_as_string(StringValuePtr(str));

Expand Down Expand Up @@ -1579,11 +1582,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
* set.add("def") #=> 1
*/
static VALUE re2_set_add(VALUE self, VALUE pattern) {
Check_Type(pattern, T_STRING);
StringValue(pattern);
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
std::string err;
re2_set *s;
Data_Get_Struct(self, re2_set, s);

int index = s->set->Add(regex, &err);
if (index < 0) {
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
Expand Down Expand Up @@ -1669,7 +1673,8 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
VALUE str, options, exception_option;
bool raise_exception = true;
rb_scan_args(argc, argv, "11", &str, &options);
Check_Type(str, T_STRING);

StringValue(str);
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
std::vector<int> v;
re2_set *s;
Expand Down
6 changes: 3 additions & 3 deletions spec/kernel_spec.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
RSpec.describe Kernel do
describe "#RE2" do
describe ".RE2" do
it "returns an RE2::Regexp instance given a pattern" do
expect(RE2('w(o)(o)')).to be_a(RE2::Regexp)
end

it "returns an RE2::Regexp instance given a pattern and options" do
re = RE2('w(o)(o)', :case_sensitive => false)
expect(re).to be_a(RE2::Regexp)
expect(re).to_not be_case_sensitive

expect(re).not_to be_case_sensitive
end
end
end
82 changes: 60 additions & 22 deletions spec/re2/scanner_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
end

describe "#scan" do
it "returns the next array of matches" do
it "returns the next array of matches", :aggregate_failures do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan("It is a truth universally acknowledged")

expect(scanner.scan).to eq(["It"])
expect(scanner.scan).to eq(["is"])
expect(scanner.scan).to eq(["a"])
Expand All @@ -33,92 +34,123 @@
expect(scanner.scan).to be_nil
end

it "returns multiple capturing groups at a time", :aggregate_failures do
r = RE2::Regexp.new('(\w+) (\w+)')
scanner = r.scan("It is a truth universally acknowledged")

expect(scanner.scan).to eq(["It", "is"])
expect(scanner.scan).to eq(["a", "truth"])
expect(scanner.scan).to eq(["universally", "acknowledged"])
expect(scanner.scan).to be_nil
end

it "returns an empty array if there are no capturing groups" do
r = RE2::Regexp.new('\w+')
scanner = r.scan("Foo bar")

expect(scanner.scan).to eq([])
end

it "returns nil if there is no match" do
r = RE2::Regexp.new('\d+')
scanner = r.scan("Foo bar")

expect(scanner.scan).to be_nil
end

it "returns nil if the regexp is invalid" do
r = RE2::Regexp.new('???', :log_errors => false)
scanner = r.scan("Foo bar")

expect(scanner.scan).to be_nil
end

it "returns an empty array if the input is empty" do
it "returns an empty array if the input is empty", :aggregate_failures do
r = RE2::Regexp.new("")
scanner = r.scan("")

expect(scanner.scan).to eq([])
expect(scanner.scan).to be_nil
end

it "returns an array of nil with an empty input and capture" do
it "returns an array of nil with an empty input and capture", :aggregate_failures do
r = RE2::Regexp.new("()")
scanner = r.scan("")

expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end

it "returns an empty array for every match if the pattern is empty" do
it "returns an empty array for every match if the pattern is empty", :aggregate_failures do
r = RE2::Regexp.new("")
scanner = r.scan("Foo")

expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to be_nil
end

it "returns an array of nil if the pattern is an empty capturing group" do
it "returns an array of nil if the pattern is an empty capturing group", :aggregate_failures do
r = RE2::Regexp.new("()")
scanner = r.scan("Foo")

expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end

it "returns array of nils with multiple empty capturing groups" do
it "returns array of nils with multiple empty capturing groups", :aggregate_failures do
r = RE2::Regexp.new("()()()")
scanner = r.scan("Foo")

expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to be_nil
end

it "supports empty groups with multibyte characters" do
it "supports empty groups with multibyte characters", :aggregate_failures do
r = RE2::Regexp.new("()€")
scanner = r.scan("€")

expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end

it "raises a type error if given input that can't be coerced to a String" do
r = RE2::Regexp.new('(\w+)')

expect { r.scan(0) }.to raise_error(TypeError)
end

it "accepts input that can be coerced to a String", :aggregate_failures do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan(StringLike.new("Hello world"))

expect(scanner.scan).to eq(["Hello"])
expect(scanner.scan).to eq(["world"])
expect(scanner.scan).to be_nil
end
end

it "is enumerable" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")

expect(scanner).to be_a(Enumerable)
end

describe "#each" do
it "yields each match" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")
matches = []
scanner.each do |match|
matches << match
end

expect(matches).to eq([["1"], ["2"], ["3"]])
expect { |b| scanner.each(&b) }.to yield_successive_args(["1"], ["2"], ["3"])
end

it "returns an enumerator when not given a block" do
Expand All @@ -135,22 +167,28 @@
end

describe "#rewind" do
it "resets any consumption" do
it "resets any consumption", :aggregate_failures do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")

expect(scanner.to_enum.first).to eq(["1"])
expect(scanner.to_enum.first).to eq(["2"])

scanner.rewind

expect(scanner.to_enum.first).to eq(["1"])
end

it "resets the eof? check" do
it "resets the eof? check", :aggregate_failures do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1")
scanner.scan
expect(scanner.eof?).to be_truthy

expect(scanner).to be_eof

scanner.rewind
expect(scanner.eof?).to be_falsey

expect(scanner).not_to be_eof
end
end

Expand All @@ -159,46 +197,46 @@
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1 2 3")

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns true if the input has been consumed" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1")
scanner.scan

expect(scanner.eof?).to be_truthy
expect(scanner).to be_eof
end

it "returns false if no match is made" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("a")
scanner.scan

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns false with an empty input that has not been scanned" do
r = RE2::Regexp.new("")
scanner = r.scan("")

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns false with an empty input that has not been matched" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("")
scanner.scan

expect(scanner.eof?).to be_falsey
expect(scanner).not_to be_eof
end

it "returns true with an empty input that has been matched" do
r = RE2::Regexp.new("")
scanner = r.scan("")
scanner.scan

expect(scanner.eof?).to be_truthy
expect(scanner).to be_eof
end
end
end
42 changes: 41 additions & 1 deletion spec/re2/set_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,17 @@
end
end

it "raises an error if given a non-string pattern" do
it "raises an error if given a pattern that can't be coerced to a String" do
set = RE2::Set.new(:unanchored, :log_errors => false)

expect { set.add(0) }.to raise_error(TypeError)
end

it "accepts a pattern that can be coerced to a String" do
set = RE2::Set.new

expect(set.add(StringLike.new("abc"))).to eq(0)
end
end

describe "#compile" do
Expand All @@ -96,6 +102,24 @@
expect(set.match("abcdefghi", :exception => false)).to eq([0, 1, 2])
end

it "returns an empty array if there is no match" do
set = RE2::Set.new
set.add("abc")
set.compile

expect(set.match("def", :exception => false)).to be_empty
end

it "returns an empty array if there is no match when :exception is true" do
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?

set = RE2::Set.new
set.add("abc")
set.compile

expect(set.match("def")).to be_empty
end

it "raises an error if called before #compile by default" do
skip "Underlying RE2::Set::Match does not output error information" unless RE2::Set.match_raises_errors?

Expand Down Expand Up @@ -139,6 +163,22 @@

expect { set.match("", 0) }.to raise_error(TypeError)
end

it "raises a Type Error if given input that can't be coerced to a String" do
set = RE2::Set.new
set.add("abc")
set.compile

expect { set.match(0, :exception => false) }.to raise_error(TypeError)
end

it "accepts input if it can be coerced to a String" do
set = RE2::Set.new
set.add("abc")
set.compile

expect(set.match(StringLike.new("abcdef"), :exception => false)).to contain_exactly(0)
end
end

def silence_stderr
Expand Down
Loading

0 comments on commit 279621d

Please sign in to comment.