Skip to content

Commit 1447771

Browse files
authored
Merge pull request #103 from ammar/handle-custom-encoding-opts
Respect encoding in custom options arguments
2 parents 05e2472 + ecbd803 commit 1447771

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
- enable frozen string literals (#98)
1313

14+
### Fixed
15+
16+
- scan with correct encoding when passing regopt individually (#102)
17+
- thanks to [Earlopain](https://github.com/Earlopain) for the report
18+
1419
## [2.11.1] - 2025-08-08 - Janosch Müller
1520

1621
### Fixed

lib/regexp_parser/scanner/scanner.rl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,7 @@ class Regexp::Scanner
686686

687687
input = input_object.is_a?(Regexp) ? input_object.source : input_object
688688
self.free_spacing = free_spacing?(input_object, options)
689-
self.regexp_encoding = input_object.encoding if input_object.is_a?(::Regexp)
689+
self.regexp_encoding = extract_encoding(input_object, options)
690690
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
691691

692692
data = input.unpack("c*")
@@ -780,6 +780,14 @@ class Regexp::Scanner
780780
:group_depth, :set_depth, :conditional_stack,
781781
:char_pos
782782

783+
def extract_encoding(input_object, options)
784+
if input_object.is_a?(::Regexp)
785+
input_object.encoding
786+
elsif options && (options & Regexp::NOENCODING)
787+
Encoding::BINARY
788+
end
789+
end
790+
783791
def free_spacing?(input_object, options)
784792
if options && !input_object.is_a?(String)
785793
raise ArgumentError, 'options cannot be supplied unless scanning a String'

spec/scanner/options_spec.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def expect_type_tokens(tokens, type_tokens)
77
expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
88
end
99

10-
it 'raises if if scanning from a Regexp and options are passed' do
10+
it 'raises if scanning from a Regexp and options are passed' do
1111
expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
1212
ArgumentError,
1313
'options cannot be supplied unless scanning a String'
@@ -25,6 +25,17 @@ def expect_type_tokens(tokens, type_tokens)
2525
)
2626
end
2727

28+
it 'sets encoding based on options if scanning from a String' do
29+
expect_type_tokens(
30+
RS.scan('\x94\x95', options: ::Regexp::NOENCODING),
31+
[
32+
# in non-binary encodings, these would be seen as a single utf8 escape
33+
%i[escape hex],
34+
%i[escape hex],
35+
]
36+
)
37+
end
38+
2839
it 'does not set free_spacing if scanning from a String and passing no options' do
2940
expect_type_tokens(
3041
RS.scan('a+#c'),

0 commit comments

Comments
 (0)