@@ -16,19 +16,23 @@ list[str] newLineCharacters = [
1616 "\u2029" // PS
1717];
1818
19+ @synopsis {Comparator to sort strings by length (ascending).}
1920private bool bySize (str a , str b ) = size (a ) < size (b );
21+
22+ @synopsis {Comparator to sort strings by relative position in a reference list.}
2023private bool (str , str ) byIndex (list [str ] indices ) {
2124 return bool (str a , str b ) {
2225 return indexOf (indices , a ) < indexOf (indices , b );
2326 };
2427}
2528
29+ @synopsis {Determine the most-used newline character in a string.}
2630str mostUsedNewline (str input , list [str ] lineseps = newLineCharacters , str (list [str ]) tieBreaker = getFirstFrom ) {
2731 linesepCounts = (nl : 0 | nl <- lineseps );
2832 for (nl <- sort (lineseps , bySize )) {
2933 int count = size (findAll (input , nl ));
3034 linesepCounts [nl ] = count ;
31- // subtract all occurrences of substrings that we counted before
35+ // subtract all occurrences of substrings of newline characters that we counted before
3236 for (str snl <- substrings (nl ), linesepCounts [snl ]?) {
3337 linesepCounts [snl ] = linesepCounts [snl ] - count ;
3438 }
@@ -38,6 +42,7 @@ str mostUsedNewline(str input, list[str] lineseps = newLineCharacters, str(list[
3842 return tieBreaker (sort (byCount [max (domain (byCount ))], byIndex (lineseps )));
3943}
4044
45+ @synopsis {Split a string to an indentation prefix and the remainder of the string.}
4146tuple [str indentation , str rest ] splitIndentation (/^<indentation :\s *><rest :.*> /)
4247 = <indentation , rest > ;
4348
@@ -57,6 +62,7 @@ str(str) indentTabsAsSpaces(int tabSize) {
5762 };
5863}
5964
65+ @synopsis {Compute all possible strict substrings of a string.}
6066set [str ] substrings (str input )
6167 = {input [i ..i +l ] | int i <- [0 ..size (input )], int l <- [1 ..size (input )], i + l <= size (input )};
6268
@@ -69,10 +75,11 @@ test bool mostUsedNewlineTestTie()
6975test bool mostUsedNewlineTestGreedy ()
7076 = mostUsedNewline ("\r\n\r\n\n " ) == "\r\n " ;
7177
78+ @synopsis {If a string does not end with a newline character, append one. }
7279str insertFinalNewline (str input , list [str ] lineseps = newLineCharacters )
7380 = any (nl <- lineseps , endsWith (input , nl ))
7481 ? input
75- : input + mostUsedNewline (input )
82+ : input + mostUsedNewline (input , lineseps = lineseps )
7683 ;
7784
7885test bool insertFinalNewlineTestSimple ()
@@ -87,7 +94,8 @@ test bool insertFinalNewlineTestMixed()
8794 = insertFinalNewline ("a\n b\r\n " )
8895 == "a\n b\r\n " ;
8996
90- str trimFinalNewline (str input , list [str ] lineseps = newLineCharacters ) {
97+ @synopsis {Remove all newlines from the end of a string.}
98+ str trimFinalNewlines (str input , list [str ] lineseps = newLineCharacters ) {
9199 orderedSeps = reverse (sort (lineseps , bySize ));
92100 while (nl <- orderedSeps , endsWith (input , nl )) {
93101 input = input [0 ..-size (nl )];
@@ -96,14 +104,15 @@ str trimFinalNewline(str input, list[str] lineseps = newLineCharacters) {
96104}
97105
98106test bool trimFinalNewlineTestSimple ()
99- = trimFinalNewline ("a\n\n\n " ) == "a" ;
107+ = trimFinalNewlines ("a\n\n\n " ) == "a" ;
100108
101109test bool trimFinalNewlineTestEndOnly ()
102- = trimFinalNewline ("a\n\n\n b\n\n " ) == "a\n\n\n b" ;
110+ = trimFinalNewlines ("a\n\n\n b\n\n " ) == "a\n\n\n b" ;
103111
104112test bool trimFinalNewlineTestWhiteSpace ()
105- = trimFinalNewline ("a\n\n\n b\n\n " ) == "a\n\n\n b\n\n " ;
113+ = trimFinalNewlines ("a\n\n\n b\n\n " ) == "a\n\n\n b\n\n " ;
106114
115+ @synopsis {Split a string in <text, newline> pairs for each line.}
107116list [tuple [str , str ]] separateLines (str input , list [str ] lineseps = newLineCharacters ) {
108117 orderedSeps = reverse (sort (lineseps , bySize ));
109118
@@ -125,15 +134,18 @@ list[tuple[str, str]] separateLines(str input, list[str] lineseps = newLineChara
125134 return lines ;
126135}
127136
137+ @synopsis {Concatenate a list of <line, newline> pairs to form a single string.}
128138str mergeLines (list [tuple [str , str ]] lines )
129139 = ("" | it + line + sep | <line , sep > <- lines );
130140
141+ @synopsis {Process the text of a string per line, maintaining the original newline characters.}
131142str perLine (str input , str (str ) lineFunc , list [str ] lineseps = newLineCharacters )
132143 = mergeLines ([<lineFunc (l ), nl > | <l , nl > <- separateLines (input , lineseps =lineseps )]);
133144
134145test bool perLineTest ()
135146 = perLine ("a\n b\r\n c\n\r\n " , str (str line ) { return line + "x" ; }) == "ax\n bx\r\n cx\n x\r\n x" ;
136147
148+ @synopsis {Trim trailing non-newline whitespace from each line in a multi-line string.}
137149str trimTrailingWhitespace (str input ) {
138150 str trimLineTrailingWs (/^<nonWhiteSpace :.*\S > \s *$/) = nonWhiteSpace ;
139151 default str trimLineTrailingWs (/^\s *$/) = "" ;
0 commit comments