@@ -1016,8 +1016,13 @@ public static function queryToData(array $query): array
10161016 * are grouped with any subsequent 'child' lines ("list items")
10171017 *
10181018 * Lines that match `$regex` are regarded as list items. Other lines are
1019- * used as the section name for subsequent list items. Blank lines clear the
1020- * current section name and are not included in the return value.
1019+ * used as the section name for subsequent list items. Blank lines between
1020+ * list items clear the current section name.
1021+ *
1022+ * If a named subpattern in `$regex` called `indent` matches a non-empty
1023+ * string, subsequent lines with the same number of spaces for indentation
1024+ * as there are characters in the match are treated as part of the item,
1025+ * including any blank lines.
10211026 *
10221027 * @param string $separator Used between top-level lines and sections.
10231028 * @param string|null $marker Added before each section name. The equivalent
@@ -1031,56 +1036,136 @@ public static function linesToLists(
10311036 string $ text ,
10321037 string $ separator = "\n" ,
10331038 ?string $ marker = null ,
1034- string $ regex = '/^\h*[-*] / ' ,
1039+ string $ regex = '/^(?P<indent> \h*[-*] ) / ' ,
10351040 bool $ clean = false
10361041 ): string {
1037- $ marker = $ marker ? $ marker . ' ' : null ;
1038- $ indent = $ marker ? str_repeat (' ' , mb_strlen ($ marker )) : '' ;
1039- $ markerIsItem = $ marker && Pcre::match ($ regex , $ marker );
1042+ $ marker = ( $ marker ?? '' ) !== '' ? $ marker . ' ' : null ;
1043+ $ indent = $ marker !== null ? str_repeat (' ' , mb_strlen ($ marker )) : '' ;
1044+ $ markerIsItem = $ marker !== null && Pcre::match ($ regex , $ marker );
10401045
10411046 /** @var array<string,string[]> */
10421047 $ sections = [];
1043- foreach (preg_split ('/\r\n|\n|\r/ ' , $ text ) as $ line ) {
1048+ $ lastWasItem = false ;
1049+ $ lines = preg_split ('/\r\n|\n|\r/ ' , $ text );
1050+ for ($ i = 0 ; $ i < count ($ lines ); $ i ++) {
1051+ $ line = $ lines [$ i ];
1052+
10441053 // Remove pre-existing markers early to ensure sections with the
10451054 // same name are combined
1046- if ($ marker && !$ markerIsItem && strpos ($ line , $ marker ) === 0 ) {
1055+ if ($ marker !== null && !$ markerIsItem && strpos ($ line , $ marker ) === 0 ) {
10471056 $ line = substr ($ line , strlen ($ marker ));
10481057 }
1049- if (!trim ($ line )) {
1050- unset($ section );
1058+
1059+ // Treat blank lines between items as section breaks
1060+ if (trim ($ line ) === '' ) {
1061+ if ($ lastWasItem ) {
1062+ unset($ section );
1063+ }
10511064 continue ;
10521065 }
1053- if (!Pcre::match ($ regex , $ line )) {
1066+
1067+ // Collect any subsequent indented lines
1068+ if (Pcre::match ($ regex , $ line , $ matches )) {
1069+ $ matchIndent = $ matches ['indent ' ] ?? '' ;
1070+ if ($ matchIndent !== '' ) {
1071+ $ matchIndent = str_repeat (' ' , mb_strlen ($ matchIndent ));
1072+ $ pendingWhitespace = '' ;
1073+ $ backtrack = 0 ;
1074+ while ($ i < count ($ lines ) - 1 ) {
1075+ $ nextLine = $ lines [$ i + 1 ];
1076+ if (trim ($ nextLine ) === '' ) {
1077+ $ pendingWhitespace .= $ nextLine . "\n" ;
1078+ $ backtrack ++;
1079+ } elseif (substr ($ nextLine , 0 , strlen ($ matchIndent )) === $ matchIndent ) {
1080+ $ line .= "\n" . $ pendingWhitespace . $ nextLine ;
1081+ $ pendingWhitespace = '' ;
1082+ $ backtrack = 0 ;
1083+ } else {
1084+ $ i -= $ backtrack ;
1085+ break ;
1086+ }
1087+ $ i ++;
1088+ };
1089+ }
1090+ } else {
10541091 $ section = $ line ;
10551092 }
1093+
10561094 $ key = $ section ?? $ line ;
1095+
10571096 if (!array_key_exists ($ key , $ sections )) {
10581097 $ sections [$ key ] = [];
10591098 }
1060- if ($ key != $ line && !in_array ($ line , $ sections [$ key ])) {
1061- $ sections [$ key ][] = $ line ;
1099+
1100+ if ($ key !== $ line ) {
1101+ if (!in_array ($ line , $ sections [$ key ])) {
1102+ $ sections [$ key ][] = $ line ;
1103+ }
1104+ $ lastWasItem = true ;
1105+ } else {
1106+ $ lastWasItem = false ;
10621107 }
10631108 }
1109+
10641110 // Move lines with no associated list to the top
10651111 /** @var array<string,string[]> */
1066- $ sections = array_merge (
1067- array_filter ($ sections , fn ($ lines ) => !count ($ lines )),
1068- array_filter ($ sections , fn ($ lines ) => count ($ lines ))
1069- );
1112+ $ top = [];
1113+ $ last = null ;
1114+ foreach ($ sections as $ section => $ lines ) {
1115+ if (count ($ lines )) {
1116+ continue ;
1117+ }
1118+
1119+ unset($ sections [$ section ]);
1120+
1121+ if ($ clean ) {
1122+ $ top [$ section ] = [];
1123+ continue ;
1124+ }
1125+
1126+ // Collect second and subsequent consecutive top-level list items
1127+ // under the first so they don't form a loose list
1128+ if (Pcre::match ($ regex , $ section )) {
1129+ if ($ last !== null ) {
1130+ $ top [$ last ][] = $ section ;
1131+ continue ;
1132+ }
1133+ $ last = $ section ;
1134+ } else {
1135+ $ last = null ;
1136+ }
1137+ $ top [$ section ] = [];
1138+ }
1139+ /** @var array<string,string[]> */
1140+ $ sections = array_merge ($ top , $ sections );
1141+
10701142 $ groups = [];
1071- foreach ($ sections as $ section => $ sectionLines ) {
1143+ foreach ($ sections as $ section => $ lines ) {
10721144 if ($ clean ) {
10731145 $ section = Pcre::replace ($ regex , '' , $ section , 1 );
10741146 }
1075- if ($ marker &&
1147+
1148+ $ marked = false ;
1149+ if ($ marker !== null &&
10761150 !($ markerIsItem && strpos ($ section , $ marker ) === 0 ) &&
10771151 !Pcre::match ($ regex , $ section )) {
10781152 $ section = $ marker . $ section ;
1153+ $ marked = true ;
10791154 }
1080- $ groups [] = $ section ;
1081- if ($ sectionLines ) {
1082- $ groups [] = $ indent . implode ("\n" . $ indent , $ sectionLines );
1155+
1156+ if (!$ lines ) {
1157+ $ groups [] = $ section ;
1158+ continue ;
10831159 }
1160+
1161+ // Don't separate or indent top-level list items collected above
1162+ if (!$ marked && Pcre::match ($ regex , $ section )) {
1163+ $ groups [] = implode ("\n" , [$ section , ...$ lines ]);
1164+ continue ;
1165+ }
1166+
1167+ $ groups [] = $ section ;
1168+ $ groups [] = $ indent . implode ("\n" . $ indent , $ lines );
10841169 }
10851170
10861171 return implode ($ separator , $ groups );
0 commit comments