Skip to content

Commit 07c0073

Browse files
committed
Fix scanning of octal sequences in sets
[#86]
1 parent b3ea02a commit 07c0073

File tree

4 files changed

+29
-1
lines changed

4 files changed

+29
-1
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717
- minor improvements to `Regexp::Scanner` performance
1818
- overall improvement of parse performance: about 10% for large Regexps
1919

20+
### Fixed
21+
22+
- parsing of octal escape sequences in sets, e.g. `[\141]`
23+
* thanks to [Randy Stauner](https://github.com/rwstauner) for the report
24+
2025
## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:[email protected])
2126

2227
### Fixed

lib/regexp_parser/scanner/scanner.rl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@
126126
keep_mark | sequence_char;
127127

128128
# escapes that also work within a character set
129-
set_escape = backslash | brackets | escaped_ascii | property_char |
129+
set_escape = backslash | brackets | escaped_ascii |
130+
octal_sequence | property_char |
130131
sequence_char | single_codepoint_char_type;
131132

132133

@@ -247,12 +248,22 @@
247248
# set escapes scanner
248249
# --------------------------------------------------------------------------
249250
set_escape_sequence := |*
251+
# Special case: in sets, octal sequences have higher priority than backrefs
252+
octal_sequence {
253+
emit(:escape, :octal, copy(data, ts-1, te))
254+
fret;
255+
};
256+
257+
# Scan all other escapes that work in sets with the generic escape scanner
250258
set_escape > (escaped_set_alpha, 2) {
251259
fhold;
252260
fnext character_set;
253261
fcall escape_sequence;
254262
};
255263

264+
# Treat all remaining escapes - those not supported in sets - as literal.
265+
# (This currently includes \^, \-, \&, \:, although these could potentially
266+
# be meta chars when not escaped, depending on their position in the set.)
256267
any > (escaped_set_alpha, 1) {
257268
emit(:escape, :literal, copy(data, ts-1, te))
258269
fret;

spec/parser/sets_spec.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,18 @@
1414
[0] => [:set, :character, CharacterSet, text: '[', count: 3],
1515
[0, 1] => [:escape, :backspace, EscapeSequence::Backspace, text: '\b']
1616

17+
include_examples 'parse', '[a\xFz]',
18+
[0] => [:set, :character, CharacterSet, text: '[', count: 3],
19+
[0, 1] => [:escape, :hex, EscapeSequence::Hex, text: '\xF']
20+
1721
include_examples 'parse', '[a\x20c]',
1822
[0] => [:set, :character, CharacterSet, text: '[', count: 3],
1923
[0, 1] => [:escape, :hex, EscapeSequence::Hex, text: '\x20']
2024

25+
include_examples 'parse', '[a\77c]',
26+
[0] => [:set, :character, CharacterSet, text: '[', count: 3],
27+
[0, 1] => [:escape, :octal, EscapeSequence::Octal, text: '\77']
28+
2129
include_examples 'parse', '[a\u0640c]',
2230
[0] => [:set, :character, CharacterSet, text: '[', count: 3],
2331
[0, 1] => [:escape, :codepoint, EscapeSequence::Codepoint, text: '\u0640']

spec/scanner/sets_spec.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,13 @@
3333
include_examples 'scan', '[\u{40}]', 1 => [:escape, :codepoint_list, '\u{40}', 1, 7]
3434
include_examples 'scan', '[\c2]', 1 => [:escape, :control, '\c2', 1, 4]
3535
include_examples 'scan', '[\C-C]', 1 => [:escape, :control, '\C-C', 1, 5]
36+
include_examples 'scan', '[\xF]', 1 => [:escape, :hex, '\xF', 1, 4]
3637
include_examples 'scan', '[\x20]', 1 => [:escape, :hex, '\x20', 1, 5]
3738
include_examples 'scan', '[\M-Z]', 1 => [:escape, :meta_sequence, '\M-Z', 1, 5]
3839
include_examples 'scan', '[\M-\C-X]', 1 => [:escape, :meta_sequence, '\M-\C-X', 1, 8]
40+
include_examples 'scan', '[\7]', 1 => [:escape, :octal, '\7', 1, 3]
41+
include_examples 'scan', '[\77]', 1 => [:escape, :octal, '\77', 1, 4]
42+
include_examples 'scan', '[\777]', 1 => [:escape, :octal, '\777', 1, 5]
3943
include_examples 'scan', '[\\[]', 1 => [:escape, :set_open, '\[', 1, 3]
4044
include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
4145
include_examples 'scan', '[a\-]', 2 => [:escape, :literal, '\-', 2, 4]

0 commit comments

Comments
 (0)