Skip to content

Commit 8216761

Browse files
committed
[MERGE chakra-core#5592 @rhuanjl] Implement RegExp dotAll flag (/s)
Merge pull request chakra-core#5592 from rhuanjl:dotall This PR implements the es2018 RegExp /s dotAll flag. See proposal spec here: https://tc39.github.io/proposal-regexp-dotall-flag/ Notes: 1. I've given this the flag ES9RegExDotAll but I've made it default to enabled 2. When run with the ES6RegExPrototypeProperties this passes all relevant test262 tests, without that flag several test262 tests fail (hopefully that can be enabled soon) fixes: chakra-core#2787 cc: @mathiasbynens
2 parents 0b13df8 + 4bcc8d1 commit 8216761

36 files changed

+4346
-4046
lines changed

lib/Common/ConfigFlagsList.h

+2
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ PHASE(All)
640640
#define DEFAULT_CONFIG_ES6UnicodeVerbose (true)
641641
#define DEFAULT_CONFIG_ES6Unscopables (true)
642642
#define DEFAULT_CONFIG_ES6RegExSticky (true)
643+
#define DEFAULT_CONFIG_ES2018RegExDotAll (true)
643644
#ifdef COMPILE_DISABLE_ES6RegExPrototypeProperties
644645
// If ES6RegExPrototypeProperties needs to be disabled by compile flag, DEFAULT_CONFIG_ES6RegExPrototypeProperties should be false
645646
#define DEFAULT_CONFIG_ES6RegExPrototypeProperties (false)
@@ -1135,6 +1136,7 @@ FLAGPR (Boolean, ES6, ES6Unicode , "Enable ES6 Unicode 6.0
11351136
FLAGPR (Boolean, ES6, ES6UnicodeVerbose , "Enable ES6 Unicode 6.0 verbose failure output" , DEFAULT_CONFIG_ES6UnicodeVerbose)
11361137
FLAGPR (Boolean, ES6, ES6Unscopables , "Enable ES6 With Statement Unscopables" , DEFAULT_CONFIG_ES6Unscopables)
11371138
FLAGPR (Boolean, ES6, ES6RegExSticky , "Enable ES6 RegEx sticky flag" , DEFAULT_CONFIG_ES6RegExSticky)
1139+
FLAGPR (Boolean, ES6, ES2018RegExDotAll , "Enable ES2018 RegEx dotAll flag" , DEFAULT_CONFIG_ES2018RegExDotAll)
11381140

11391141
#ifndef COMPILE_DISABLE_ES6RegExPrototypeProperties
11401142
#define COMPILE_DISABLE_ES6RegExPrototypeProperties 0

lib/Parser/RegexFlags.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace UnifiedRegex
1414
MultilineRegexFlag = 1 << 2,
1515
UnicodeRegexFlag = 1 << 3,
1616
StickyRegexFlag = 1 << 4,
17-
AllRegexFlags = (1 << 5) - 1
17+
DotAllRegexFlag = 1 << 5,
18+
AllRegexFlags = (1 << 6) - 1
1819
};
1920
}

lib/Parser/RegexParser.cpp

+32-2
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ namespace UnifiedRegex
145145
, tempLocationOfRange(nullptr)
146146
, codePointAtTempLocation(0)
147147
, unicodeFlagPresent(false)
148+
, dotAllFlagPresent(false)
148149
, caseInsensitiveFlagPresent(false)
149150
, positionAfterLastSurrogate(nullptr)
150151
, valueOfLastSurrogate(INVALID_CODEPOINT)
@@ -2758,6 +2759,16 @@ namespace UnifiedRegex
27582759
}
27592760
flags = (RegexFlags)(flags | MultilineRegexFlag);
27602761
break;
2762+
case 's':
2763+
if (scriptContext->GetConfig()->IsES2018RegExDotAllEnabled())
2764+
{
2765+
if ((flags & DotAllRegexFlag) != 0)
2766+
{
2767+
Fail(JSERR_RegExpSyntax);
2768+
}
2769+
flags = (RegexFlags)(flags | DotAllRegexFlag);
2770+
break;
2771+
}
27612772
case 'u':
27622773
// If we don't have unicode enabled, fall through to default
27632774
if (scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled())
@@ -2832,12 +2843,15 @@ namespace UnifiedRegex
28322843
Fail(JSERR_RegExpSyntax);
28332844
this->unicodeFlagPresent = (flags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
28342845
this->caseInsensitiveFlagPresent = (flags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2846+
this->dotAllFlagPresent = (flags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
28352847
Assert(!this->unicodeFlagPresent || scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
2848+
Assert(!this->dotAllFlagPresent || scriptContext->GetConfig()->IsES2018RegExDotAllEnabled());
28362849
}
28372850
else
28382851
{
28392852
this->unicodeFlagPresent = false;
28402853
this->caseInsensitiveFlagPresent = false;
2854+
this->dotAllFlagPresent = false;
28412855
}
28422856

28432857
// If this HR has been set, that means we have an earlier failure than the one caught above.
@@ -2891,6 +2905,7 @@ namespace UnifiedRegex
28912905
Options(flags);
28922906
this->unicodeFlagPresent = (flags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
28932907
this->caseInsensitiveFlagPresent = (flags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2908+
this->dotAllFlagPresent = (flags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
28942909
Assert(!this->unicodeFlagPresent || scriptContext->GetConfig()->IsES6UnicodeExtensionsEnabled());
28952910

28962911
// If this HR has been set, that means we have an earlier failure than the one caught above.
@@ -2946,6 +2961,7 @@ namespace UnifiedRegex
29462961
Options(dummyFlags);
29472962
this->unicodeFlagPresent = (dummyFlags & UnifiedRegex::UnicodeRegexFlag) == UnifiedRegex::UnicodeRegexFlag;
29482963
this->caseInsensitiveFlagPresent = (dummyFlags & UnifiedRegex::IgnoreCaseRegexFlag) == UnifiedRegex::IgnoreCaseRegexFlag;
2964+
this->dotAllFlagPresent = (dummyFlags & UnifiedRegex::DotAllRegexFlag) == UnifiedRegex::DotAllRegexFlag;
29492965
outTotalEncodedChars = Chars<EncodedChar>::OSB(next, input);
29502966
outTotalChars = Pos();
29512967

@@ -3101,7 +3117,14 @@ namespace UnifiedRegex
31013117
switch (cc)
31023118
{
31033119
case '.':
3104-
standardChars->SetNonNewline(ctAllocator, partialPrefixSetNode->set);
3120+
if (this->dotAllFlagPresent)
3121+
{
3122+
standardChars->SetFullSet(ctAllocator, partialPrefixSetNode->set);
3123+
}
3124+
else
3125+
{
3126+
standardChars->SetNonNewline(ctAllocator, partialPrefixSetNode->set);
3127+
}
31053128
break;
31063129
case 'S':
31073130
standardChars->SetNonWhitespace(ctAllocator, partialPrefixSetNode->set);
@@ -3137,7 +3160,14 @@ namespace UnifiedRegex
31373160
switch (cc)
31383161
{
31393162
case '.':
3140-
standardChars->SetNonNewline(ctAllocator, setNode->set);
3163+
if (this->dotAllFlagPresent)
3164+
{
3165+
standardChars->SetFullSet(ctAllocator, setNode->set);
3166+
}
3167+
else
3168+
{
3169+
standardChars->SetNonNewline(ctAllocator, setNode->set);
3170+
}
31413171
break;
31423172
case 'S':
31433173
standardChars->SetNonWhitespace(ctAllocator, setNode->set);

lib/Parser/RegexParser.h

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ namespace UnifiedRegex
107107
SurrogatePairTracker* currentSurrogatePairNode;
108108
bool unicodeFlagPresent;
109109
bool caseInsensitiveFlagPresent;
110+
bool dotAllFlagPresent;
110111

111112
// The following two variables are used to determine if the the surrogate pair has been encountered
112113
// First holds the temporary location, second holds the value of the codepoint

lib/Parser/RegexPattern.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ namespace UnifiedRegex
9393
return (rep.unified.program->flags & IgnoreCaseRegexFlag) != 0;
9494
}
9595

96+
bool RegexPattern::IsDotAll() const
97+
{
98+
return GetScriptContext()->GetConfig()->IsES2018RegExDotAllEnabled() && (rep.unified.program->flags & DotAllRegexFlag) != 0;
99+
}
100+
96101
bool RegexPattern::IsGlobal() const
97102
{
98103
return (rep.unified.program->flags & GlobalRegexFlag) != 0;
@@ -195,6 +200,8 @@ namespace UnifiedRegex
195200
w->Print(_u("g"));
196201
if (IsMultiline())
197202
w->Print(_u("m"));
203+
if (IsDotAll())
204+
w->Print(_u("s"));
198205
if (IsUnicode())
199206
w->Print(_u("u"));
200207
if (IsSticky())

lib/Parser/RegexPattern.h

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ namespace UnifiedRegex
6262
bool IsIgnoreCase() const;
6363
bool IsGlobal() const;
6464
bool IsMultiline() const;
65+
bool IsDotAll() const;
6566
bool IsUnicode() const;
6667
bool IsSticky() const;
6768
bool WasLastMatchSuccessful() const;

lib/Parser/RegexRuntime.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -5755,9 +5755,10 @@ namespace UnifiedRegex
57555755
w->Print(_u("flags: "));
57565756
if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global "));
57575757
if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline "));
5758-
if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase"));
5759-
if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode"));
5760-
if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky"));
5758+
if ((flags & IgnoreCaseRegexFlag) != 0) w->Print(_u("ignorecase "));
5759+
if ((flags & DotAllRegexFlag) != 0) w->Print(_u("dotAll "));
5760+
if ((flags & UnicodeRegexFlag) != 0) w->Print(_u("unicode "));
5761+
if ((flags & StickyRegexFlag) != 0) w->Print(_u("sticky "));
57615762
w->EOL();
57625763
w->PrintEOL(_u("numGroups: %d"), numGroups);
57635764
w->PrintEOL(_u("numLoops: %d"), numLoops);

lib/Parser/StandardChars.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,11 @@ END {
261261
set.SetNotRanges(setAllocator, numNewlinePairs, newlineStr);
262262
}
263263

264+
void StandardChars<char16>::SetFullSet(ArenaAllocator* setAllocator, CharSet<Char> &set)
265+
{
266+
set.SetNotRanges(allocator, 0, nullptr);
267+
}
268+
264269
CharSet<char16>* StandardChars<char16>::GetFullSet()
265270
{
266271
if (fullSet == 0)

lib/Parser/StandardChars.h

+1
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ namespace UnifiedRegex
323323
void SetNonWordIUChars(ArenaAllocator* setAllocator, CharSet<Char> &set);
324324
void SetNewline(ArenaAllocator* setAllocator, CharSet<Char> &set);
325325
void SetNonNewline(ArenaAllocator* setAllocator, CharSet<Char> &set);
326+
void SetFullSet(ArenaAllocator* setAllocator, CharSet<Char> &set);
326327

327328
CharSet<Char>* GetFullSet();
328329
CharSet<Char>* GetEmptySet();

lib/Runtime/Base/JnDirectFields.h

+1
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ ENTRY(compile)
422422
ENTRY(global)
423423
ENTRY(lastIndex)
424424
ENTRY(multiline)
425+
ENTRY(dotAll)
425426
ENTRY(ignoreCase)
426427
ENTRY(unicode)
427428
ENTRY(sticky)

lib/Runtime/Base/ThreadConfigFlagsList.h

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ FLAG_RELEASE(IsES6ToStringTagEnabled, ES6ToStringTag)
3838
FLAG_RELEASE(IsES6UnicodeExtensionsEnabled, ES6Unicode)
3939
FLAG_RELEASE(IsES6UnscopablesEnabled, ES6Unscopables)
4040
FLAG_RELEASE(IsES6RegExStickyEnabled, ES6RegExSticky)
41+
FLAG_RELEASE(IsES2018RegExDotAllEnabled, ES2018RegExDotAll)
4142
FLAG_RELEASE(IsES6RegExPrototypePropertiesEnabled, ES6RegExPrototypeProperties)
4243
FLAG_RELEASE(IsES6RegExSymbolsEnabled, ES6RegExSymbols)
4344
FLAG_RELEASE(IsES6HasInstanceEnabled, ES6HasInstance)

0 commit comments

Comments
 (0)