Skip to content

Commit be48522

Browse files
committed
Improved javascript regex regocnizing for extracting js messages
1 parent 4f8c7f6 commit be48522

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

CHANGES.rst

+9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
Babel Changelog
22
===============
33

4+
Next version
5+
--------------
6+
7+
Bugfixes
8+
~~~~~~~~
9+
10+
* Regex for parsing JavaScript regexes improved. Before this, the lexer couldn't recognize certain regexes,
11+
breaking the parsing of JS files.
12+
413
Version 2.9.1
514
-------------
615

babel/messages/jslexer.py

+51-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,57 @@
2424
name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE)
2525
dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE)
2626
division_re = re.compile(r'/=?')
27-
regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL)
27+
28+
regex_re = re.compile(
29+
r'''
30+
31+
# Opening slash of the regex
32+
/
33+
34+
(?:
35+
36+
# 1) Blackslashed character
37+
#
38+
# Match a backslash `\` and then it's following character, allowing
39+
# to blackslash the `/` for example.
40+
(?:\\.)?
41+
42+
|
43+
44+
# 2) Regex character class `[a-z]`
45+
#
46+
# Match regex character class, like `[a-z]`. Inside a character
47+
# class, a `/` character may appear, which does not close the
48+
# regex. Therefore we allow it here inside a character class.
49+
\[
50+
(?:
51+
[^\]]*
52+
|
53+
\\\]
54+
)*
55+
\]
56+
57+
|
58+
59+
# 3) Other characters
60+
#
61+
# Match anything except a closing slash `/`, a backslash `\`, or a
62+
# opening bracket `[`. Those last two will be handled by the other
63+
# matchers.
64+
[^/\\\[]*
65+
66+
)*
67+
68+
# Closing slash of the regex
69+
/
70+
71+
# regex flags
72+
[a-zA-Z]*
73+
74+
''',
75+
re.DOTALL + re.VERBOSE
76+
)
77+
2878
line_re = re.compile(r'(\r\n|\n|\r)')
2979
line_join_re = re.compile(r'\\' + line_re.pattern)
3080
uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')

0 commit comments

Comments
 (0)