Skip to content

Commit 4528396

Browse files
committed
feat: tab support for indentation stripping
1 parent 8d4890c commit 4528396

File tree

3 files changed

+37
-6
lines changed

3 files changed

+37
-6
lines changed

src/libexpr/parser-state.hh

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,11 @@ inline Formals * ParserState::validateFormals(Formals * formals, PosIdx pos, Sym
167167
return formals;
168168
}
169169

170+
enum IndentChar {
171+
Tab = '\t',
172+
Space = ' ',
173+
};
174+
170175
inline Expr * ParserState::stripIndentation(const PosIdx pos,
171176
std::vector<std::pair<PosIdx, std::variant<Expr *, StringToken>>> && es)
172177
{
@@ -175,6 +180,7 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos,
175180
/* Figure out the minimum indentation. Note that by design
176181
whitespace-only final lines are not taken into account. (So
177182
the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */
183+
std::optional<IndentChar> indentChar = std::nullopt;
178184
bool atStartOfLine = true; /* = seen only whitespace in the current line */
179185
size_t minIndent = 1000000;
180186
size_t curIndent = 0;
@@ -189,18 +195,25 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos,
189195
continue;
190196
}
191197
for (size_t j = 0; j < str->l; ++j) {
198+
auto cur = str->p[j];
192199
if (atStartOfLine) {
193-
if (str->p[j] == ' ')
200+
if (
201+
indentChar == cur
202+
|| (!indentChar && (cur == ' ' || cur == '\t'))
203+
) {
204+
if (!indentChar) {
205+
indentChar = IndentChar(cur);
206+
}
194207
curIndent++;
195-
else if (str->p[j] == '\n') {
208+
} else if (cur == '\n') {
196209
/* Empty line, doesn't influence minimum
197210
indentation. */
198211
curIndent = 0;
199212
} else {
200213
atStartOfLine = false;
201214
if (curIndent < minIndent) minIndent = curIndent;
202215
}
203-
} else if (str->p[j] == '\n') {
216+
} else if (cur == '\n') {
204217
atStartOfLine = true;
205218
curIndent = 0;
206219
}
@@ -222,7 +235,7 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos,
222235
std::string s2;
223236
for (size_t j = 0; j < t.l; ++j) {
224237
if (atStartOfLine) {
225-
if (t.p[j] == ' ') {
238+
if (t.p[j] == indentChar) {
226239
if (curDropped++ >= minIndent)
227240
s2 += t.p[j];
228241
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', \${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\nEscaping of ' followed by ': ''\nEscaping of $ followed by {: \${\nAnd finally to interpret \\n etc. as in a string: \n, \r, \t.\nfoo\n'bla'\nbar\ncut -d $'\\t' -f 1\nending dollar $$\n"
1+
"This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', \${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\nEscaping of ' followed by ': ''\nEscaping of $ followed by {: \${\nAnd finally to interpret \\n etc. as in a string: \n, \r, \t.\nfoo\n'bla'\nbar\ncut -d $'\\t' -f 1\nending dollar $$\nThis text uses\n\ttabs\nfor indentation\nAnd this text uses\n spaces\nbut is indented with tabs\n \tThis text uses\n\t both spaces and tabs\n\t\tso nothing is stripped\n\t"

tests/functional/lang/eval-okay-ind-string.nix

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,22 @@ let
125125
# Accept dollars at end of strings
126126
s17 = ''ending dollar $'' + ''$'' + "\n";
127127

128-
in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 + s16 + s17
128+
s18 = ''
129+
This text uses
130+
tabs
131+
for indentation
132+
'';
133+
134+
s19 = ''
135+
And this text uses
136+
spaces
137+
but is indented with tabs
138+
'';
139+
140+
s20 = ''
141+
This text uses
142+
both spaces and tabs
143+
so nothing is stripped
144+
'';
145+
146+
in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 + s16 + s17 + s18 + s19 + s20

0 commit comments

Comments
 (0)