Skip to content

Commit 7b46ba1

Browse files
committed
Merge branch 'lines-to-lists'
2 parents 5ec4d6e + 1a3ae6f commit 7b46ba1

File tree

3 files changed

+337
-68
lines changed

3 files changed

+337
-68
lines changed

src/Utility/Convert.php

Lines changed: 107 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,8 +1016,13 @@ public static function queryToData(array $query): array
10161016
* are grouped with any subsequent 'child' lines ("list items")
10171017
*
10181018
* Lines that match `$regex` are regarded as list items. Other lines are
1019-
* used as the section name for subsequent list items. Blank lines clear the
1020-
* current section name and are not included in the return value.
1019+
* used as the section name for subsequent list items. Blank lines between
1020+
* list items clear the current section name.
1021+
*
1022+
* If a named subpattern in `$regex` called `indent` matches a non-empty
1023+
* string, subsequent lines with the same number of spaces for indentation
1024+
* as there are characters in the match are treated as part of the item,
1025+
* including any blank lines.
10211026
*
10221027
* @param string $separator Used between top-level lines and sections.
10231028
* @param string|null $marker Added before each section name. The equivalent
@@ -1031,56 +1036,136 @@ public static function linesToLists(
10311036
string $text,
10321037
string $separator = "\n",
10331038
?string $marker = null,
1034-
string $regex = '/^\h*[-*] /',
1039+
string $regex = '/^(?P<indent>\h*[-*] )/',
10351040
bool $clean = false
10361041
): string {
1037-
$marker = $marker ? $marker . ' ' : null;
1038-
$indent = $marker ? str_repeat(' ', mb_strlen($marker)) : '';
1039-
$markerIsItem = $marker && Pcre::match($regex, $marker);
1042+
$marker = ($marker ?? '') !== '' ? $marker . ' ' : null;
1043+
$indent = $marker !== null ? str_repeat(' ', mb_strlen($marker)) : '';
1044+
$markerIsItem = $marker !== null && Pcre::match($regex, $marker);
10401045

10411046
/** @var array<string,string[]> */
10421047
$sections = [];
1043-
foreach (preg_split('/\r\n|\n|\r/', $text) as $line) {
1048+
$lastWasItem = false;
1049+
$lines = preg_split('/\r\n|\n|\r/', $text);
1050+
for ($i = 0; $i < count($lines); $i++) {
1051+
$line = $lines[$i];
1052+
10441053
// Remove pre-existing markers early to ensure sections with the
10451054
// same name are combined
1046-
if ($marker && !$markerIsItem && strpos($line, $marker) === 0) {
1055+
if ($marker !== null && !$markerIsItem && strpos($line, $marker) === 0) {
10471056
$line = substr($line, strlen($marker));
10481057
}
1049-
if (!trim($line)) {
1050-
unset($section);
1058+
1059+
// Treat blank lines between items as section breaks
1060+
if (trim($line) === '') {
1061+
if ($lastWasItem) {
1062+
unset($section);
1063+
}
10511064
continue;
10521065
}
1053-
if (!Pcre::match($regex, $line)) {
1066+
1067+
// Collect any subsequent indented lines
1068+
if (Pcre::match($regex, $line, $matches)) {
1069+
$matchIndent = $matches['indent'] ?? '';
1070+
if ($matchIndent !== '') {
1071+
$matchIndent = str_repeat(' ', mb_strlen($matchIndent));
1072+
$pendingWhitespace = '';
1073+
$backtrack = 0;
1074+
while ($i < count($lines) - 1) {
1075+
$nextLine = $lines[$i + 1];
1076+
if (trim($nextLine) === '') {
1077+
$pendingWhitespace .= $nextLine . "\n";
1078+
$backtrack++;
1079+
} elseif (substr($nextLine, 0, strlen($matchIndent)) === $matchIndent) {
1080+
$line .= "\n" . $pendingWhitespace . $nextLine;
1081+
$pendingWhitespace = '';
1082+
$backtrack = 0;
1083+
} else {
1084+
$i -= $backtrack;
1085+
break;
1086+
}
1087+
$i++;
1088+
};
1089+
}
1090+
} else {
10541091
$section = $line;
10551092
}
1093+
10561094
$key = $section ?? $line;
1095+
10571096
if (!array_key_exists($key, $sections)) {
10581097
$sections[$key] = [];
10591098
}
1060-
if ($key != $line && !in_array($line, $sections[$key])) {
1061-
$sections[$key][] = $line;
1099+
1100+
if ($key !== $line) {
1101+
if (!in_array($line, $sections[$key])) {
1102+
$sections[$key][] = $line;
1103+
}
1104+
$lastWasItem = true;
1105+
} else {
1106+
$lastWasItem = false;
10621107
}
10631108
}
1109+
10641110
// Move lines with no associated list to the top
10651111
/** @var array<string,string[]> */
1066-
$sections = array_merge(
1067-
array_filter($sections, fn($lines) => !count($lines)),
1068-
array_filter($sections, fn($lines) => count($lines))
1069-
);
1112+
$top = [];
1113+
$last = null;
1114+
foreach ($sections as $section => $lines) {
1115+
if (count($lines)) {
1116+
continue;
1117+
}
1118+
1119+
unset($sections[$section]);
1120+
1121+
if ($clean) {
1122+
$top[$section] = [];
1123+
continue;
1124+
}
1125+
1126+
// Collect second and subsequent consecutive top-level list items
1127+
// under the first so they don't form a loose list
1128+
if (Pcre::match($regex, $section)) {
1129+
if ($last !== null) {
1130+
$top[$last][] = $section;
1131+
continue;
1132+
}
1133+
$last = $section;
1134+
} else {
1135+
$last = null;
1136+
}
1137+
$top[$section] = [];
1138+
}
1139+
/** @var array<string,string[]> */
1140+
$sections = array_merge($top, $sections);
1141+
10701142
$groups = [];
1071-
foreach ($sections as $section => $sectionLines) {
1143+
foreach ($sections as $section => $lines) {
10721144
if ($clean) {
10731145
$section = Pcre::replace($regex, '', $section, 1);
10741146
}
1075-
if ($marker &&
1147+
1148+
$marked = false;
1149+
if ($marker !== null &&
10761150
!($markerIsItem && strpos($section, $marker) === 0) &&
10771151
!Pcre::match($regex, $section)) {
10781152
$section = $marker . $section;
1153+
$marked = true;
10791154
}
1080-
$groups[] = $section;
1081-
if ($sectionLines) {
1082-
$groups[] = $indent . implode("\n" . $indent, $sectionLines);
1155+
1156+
if (!$lines) {
1157+
$groups[] = $section;
1158+
continue;
10831159
}
1160+
1161+
// Don't separate or indent top-level list items collected above
1162+
if (!$marked && Pcre::match($regex, $section)) {
1163+
$groups[] = implode("\n", [$section, ...$lines]);
1164+
continue;
1165+
}
1166+
1167+
$groups[] = $section;
1168+
$groups[] = $indent . implode("\n" . $indent, $lines);
10841169
}
10851170

10861171
return implode($separator, $groups);

tests/convert

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -135,49 +135,3 @@ print_r([
135135
'PT48M' => Convert::intervalToSeconds('PT48M'),
136136
'P1W2D' => Convert::intervalToSeconds('P1W2D'),
137137
]);
138-
139-
$text = <<<EOF
140-
- Before lists
141-
142-
Section:
143-
- d
144-
Other section:
145-
- <not a letter>
146-
Without a subsequent list
147-
Section:
148-
- a
149-
- b
150-
Section:
151-
- c
152-
- b
153-
- d
154-
EOF;
155-
156-
$mbText = <<<EOF
157-
- Before lists
158-
📍 Section:
159-
- list item
160-
- another
161-
162-
Other section:
163-
- item i
164-
- item ii
165-
166-
- Standalone
167-
168-
Also standalone
169-
170-
Section:
171-
- another
172-
- and another
173-
EOF;
174-
175-
print_r([
176-
'from' => $text,
177-
'to' => Convert::linesToLists($text),
178-
'markdown' => Convert::linesToLists($text, "\n\n"),
179-
'nested' => Convert::linesToLists($text, "\n\n", '-'),
180-
'mb_from' => $mbText,
181-
'mb_to' => Convert::linesToLists($mbText, "\n", '📍'),
182-
'mb_markdown' => Convert::linesToLists($mbText, "\n\n", '📍'),
183-
]);

0 commit comments

Comments
 (0)