Document & fix string/formatting utils.

toinehartman · toinehartman · commit 4b58c9871af9 · 2025-08-18T16:47:44.000+02:00
diff --git a/rascal-lsp/src/main/rascal/library/util/Format.rsc b/rascal-lsp/src/main/rascal/library/util/Format.rsc
@@ -16,19 +16,23 @@ list[str] newLineCharacters = [
     "\u2029" // PS
 ];
 
+@synopsis{Comparator to sort strings by length (ascending).}
 private bool bySize(str a, str b) = size(a) < size(b);
+
+@synopsis{Comparator to sort strings by relative position in a reference list.}
 private bool(str, str) byIndex(list[str] indices) {
     return bool(str a, str b) {
         return indexOf(indices, a) < indexOf(indices, b);
     };
 }
 
+@synopsis{Determine the most-used newline character in a string.}
 str mostUsedNewline(str input, list[str] lineseps = newLineCharacters, str(list[str]) tieBreaker = getFirstFrom) {
     linesepCounts = (nl: 0 | nl <- lineseps);
     for (nl <- sort(lineseps, bySize)) {
         int count = size(findAll(input, nl));
         linesepCounts[nl] = count;
-        // subtract all occurrences of substrings that we counted before
+        // subtract all occurrences of substrings of newline characters that we counted before
         for (str snl <- substrings(nl), linesepCounts[snl]?) {
             linesepCounts[snl] = linesepCounts[snl] - count;
         }
@@ -38,6 +42,7 @@ str mostUsedNewline(str input, list[str] lineseps = newLineCharacters, str(list[
     return tieBreaker(sort(byCount[max(domain(byCount))], byIndex(lineseps)));
 }
 
+@synopsis{Split a string to an indentation prefix and the remainder of the string.}
 tuple[str indentation, str rest] splitIndentation(/^<indentation:\s*><rest:.*>/)
     = <indentation, rest>;
 
@@ -57,6 +62,7 @@ str(str) indentTabsAsSpaces(int tabSize) {
     };
 }
 
+@synopsis{Compute all possible strict substrings of a string.}
 set[str] substrings(str input)
     = {input[i..i+l] | int i <- [0..size(input)], int l <- [1..size(input)], i + l <= size(input)};
 
@@ -69,10 +75,11 @@ test bool mostUsedNewlineTestTie()
 test bool mostUsedNewlineTestGreedy()
     = mostUsedNewline("\r\n\r\n\n") == "\r\n";
 
+@synopsis{If a string does not end with a newline character, append one. }
 str insertFinalNewline(str input, list[str] lineseps = newLineCharacters)
     = any(nl <- lineseps, endsWith(input, nl))
     ? input
-    : input + mostUsedNewline(input)
+    : input + mostUsedNewline(input, lineseps=lineseps)
     ;
 
 test bool insertFinalNewlineTestSimple()
@@ -87,7 +94,8 @@ test bool insertFinalNewlineTestMixed()
     = insertFinalNewline("a\nb\r\n")
     == "a\nb\r\n";
 
-str trimFinalNewline(str input, list[str] lineseps = newLineCharacters) {
+@synopsis{Remove all newlines from the end of a string.}
+str trimFinalNewlines(str input, list[str] lineseps = newLineCharacters) {
     orderedSeps = reverse(sort(lineseps, bySize));
     while (nl <- orderedSeps, endsWith(input, nl)) {
         input = input[0..-size(nl)];
@@ -96,14 +104,15 @@ str trimFinalNewline(str input, list[str] lineseps = newLineCharacters) {
 }
 
 test bool trimFinalNewlineTestSimple()
-    = trimFinalNewline("a\n\n\n") == "a";
+    = trimFinalNewlines("a\n\n\n") == "a";
 
 test bool trimFinalNewlineTestEndOnly()
-    = trimFinalNewline("a\n\n\nb\n\n") == "a\n\n\nb";
+    = trimFinalNewlines("a\n\n\nb\n\n") == "a\n\n\nb";
 
 test bool trimFinalNewlineTestWhiteSpace()
-    = trimFinalNewline("a\n\n\nb\n\n ") == "a\n\n\nb\n\n ";
+    = trimFinalNewlines("a\n\n\nb\n\n ") == "a\n\n\nb\n\n ";
 
+@synopsis{Split a string in <text, newline> pairs for each line.}
 list[tuple[str, str]] separateLines(str input, list[str] lineseps = newLineCharacters) {
     orderedSeps = reverse(sort(lineseps, bySize));
 
@@ -125,15 +134,18 @@ list[tuple[str, str]] separateLines(str input, list[str] lineseps = newLineChara
     return lines;
 }
 
+@synopsis{Concatenate a list of <line, newline> pairs to form a single string.}
 str mergeLines(list[tuple[str, str]] lines)
     = ("" | it + line + sep | <line, sep> <- lines);
 
+@synopsis{Process the text of a string per line, maintaining the original newline characters.}
 str perLine(str input, str(str) lineFunc, list[str] lineseps = newLineCharacters)
     = mergeLines([<lineFunc(l), nl> | <l, nl> <- separateLines(input, lineseps=lineseps)]);
 
 test bool perLineTest()
     = perLine("a\nb\r\nc\n\r\n", str(str line) { return line + "x"; }) == "ax\nbx\r\ncx\nx\r\nx";
 
+@synopsis{Trim trailing non-newline whitespace from each line in a multi-line string.}
 str trimTrailingWhitespace(str input) {
     str trimLineTrailingWs(/^<nonWhiteSpace:.*\S>\s*$/) = nonWhiteSpace;
     default str trimLineTrailingWs(/^\s*$/) = "";