From 54790b41c46694dcc4f8c80b4859f48b269f90c9 Mon Sep 17 00:00:00 2001 From: Philipp Kitzberger Date: Wed, 29 Jan 2025 11:34:36 +0100 Subject: [PATCH] [FEATURE] Improve parsing/rendering of score debug output --- .../Search/Score/ScoreCalculationService.php | 166 ++++++++++++------ 1 file changed, 114 insertions(+), 52 deletions(-) diff --git a/Classes/Domain/Search/Score/ScoreCalculationService.php b/Classes/Domain/Search/Score/ScoreCalculationService.php index 9a517a57a..8b0734156 100644 --- a/Classes/Domain/Search/Score/ScoreCalculationService.php +++ b/Classes/Domain/Search/Score/ScoreCalculationService.php @@ -15,20 +15,31 @@ namespace ApacheSolrForTypo3\Solr\Domain\Search\Score; +use TYPO3\CMS\Core\Utility\GeneralUtility; + /** * Provides the functionality to calculate scores and renders them in a minimalistic template. */ class ScoreCalculationService { + private array $fieldBoostMapping; + /** * Renders an overview in HTML of how the score for a certain document has been calculated by Apache Solr using debug data. * * @param string $solrDebugData debug data from the solr response + * @param string $queryFields * @return string The HTML showing the score analysis */ public function getRenderedScores(string $solrDebugData, string $queryFields): string { - $highScores = $this->parseScores($solrDebugData, $queryFields); + foreach (GeneralUtility::trimExplode(',', $queryFields, true) as $queryField) { + list($field, $boost) = explode('^', $queryField); + $this->fieldBoostMapping[$field] = (float)$boost; + } + + $solrDebugArray = explode(PHP_EOL, trim($solrDebugData)); + $highScores = $this->parseScores($solrDebugArray); return $this->render($highScores); } @@ -38,71 +49,122 @@ public function getRenderedScores(string $solrDebugData, string $queryFields): s public function render(array $highScores): string { $scores = []; - $totalScore = 0; + + $content = '' + . '' + . ''; foreach ($highScores as $highScore) { - /** @var Score $highScore */ - $scores[] = - '' - . '' - . ''; - $totalScore += $highScore->getScore(); + $content .= $this->renderRow($highScore['node'], $level = 0, null); + foreach ($highScore['children'] ?? [] as $child) { + $content .= $this->renderRow($child['node'], $level = 1, $highScore['node']); + foreach ($child['children'] ?? [] as $grandchild) { + $content .= $this->renderRow($grandchild['node'], $level = 2, $child['node']); + foreach ($grandchild['children'] ?? [] as $greatgrandchild) { + $content .= $this->renderRow($greatgrandchild['node'], $level = 3, $grandchild['node']); + } + } + } } - return '
ScoreFieldBoostSearch term
+ ' . htmlspecialchars(number_format($highScore->getScore(), 9)) . '' . htmlspecialchars($highScore->getFieldName()) . '' . htmlspecialchars(number_format($highScore->getBoost(), 9)) . '
' - . '' - . '' . implode('', $scores) . '' - . '' + $content .= '' . '
ScoreFieldBoost
= ' . $totalScore . ' (Inaccurate analysis! Not all parts of the score have been taken into account.)
'; + + return $content; + } + + private function renderRow($node, $level, $parent) + { + $style = ''; + if ($parent?->getFieldName() === 'max of') { + if ($parent->getScore() != $node->getScore()) { + $style = 'color:gray'; + } + } + $pad = str_repeat(' ', $level * 7); + return '' + . '' . $pad . '+ ' . number_format($node->getScore(), 2) . '' + . '' . htmlspecialchars($node->getFieldName()) . '' + . '' . htmlspecialchars($node->getBoost()) . '' + . '' . htmlspecialchars($node->getSearchTerm()) . '' + .''; } /** - * Parses the debugData and the queryFields into an array of score objects. - * - * @return Score[] array of Score + * Recursively turns an array of indented lines into a hierarchical array. */ - public function parseScores(string $debugData, string $queryFields): array + private function parseScores(array &$lines = [], int $depth = 0, int $failsafe = 0): array { - $highScores = []; - - /* TODO Provide better parsing - * - * parsing could be done line by line, - * * recording indentation level - * * replacing abbreviations - * * replacing phrases like "product of" by mathematical symbols (* or x) - * * ... - */ - - // matches search term weights, ex: 0.42218783 = (MATCH) weight(content:iPod^40.0 in 43), product of: - $pattern = '/(.*) = weight\(([^ \)]*)/'; - $scoreMatches = []; - preg_match_all($pattern, $debugData, $scoreMatches); - - foreach ($scoreMatches[0] as $key => $value) { - // split field from search term - [$field, $searchTerm] = explode(':', $scoreMatches[2][$key]); - - $currentScoreValue = (float)$scoreMatches[1][$key]; - - $scoreWasSetForFieldBefore = isset($highScores[$field]); - $scoreIsHigher = false; - if ($scoreWasSetForFieldBefore) { - /** @var Score $previousScore */ - $previousScore = $highScores[$field]; - $scoreIsHigher = $previousScore->getScore() < $currentScoreValue; + if ($failsafe >= 1000) { + die('failsafe'); + } + + $result = []; + while ($line = current($lines)) { + $indentation = strlen($line) - strlen(ltrim($line)); + $currentDepth = (int)($indentation / 2); + + if ($currentDepth < $depth) { + // that's the next parent already! + break; } - // keep track of the highest score per search term - if (!$scoreWasSetForFieldBefore || $scoreIsHigher) { - $pattern = '/' . preg_quote($field, '/') . '\^([\d.]*)/'; - $boostMatches = []; - preg_match_all($pattern, $queryFields, $boostMatches); - $boost = (float)($boostMatches[1][0] ?? 0); - $highScores[$field] = new Score($boost, $field, $currentScoreValue, $searchTerm); + if ($currentDepth == $depth) { + // that's a sibling + array_shift($lines); + } + + if ($currentDepth >= $depth) { + // that's the first kid + $result[] = [ + 'node' => $this->parseLine(trim($line)), + 'children' => $this->parseScores($lines, $depth+1, $failsafe++), + ]; + } + } + + return $result; + } + + /** + * Parses a single line of score debugging output and + * transforms it into a Score object. + */ + private function parseLine(string $line): ?Score + { + if (preg_match('/(\d+\.\d+) = weight\((.*)\)/', $line, $weightMatch)) { + $score = (float)$weightMatch[1]; + $field = ''; + $boost = 0.0; + $searchTerm = '??'; + if (preg_match('/(\w+):(\w+)/', $weightMatch[2], $match)) { + $field = $match[1]; + $boost = $this->fieldBoostMapping[$field] ?? 0.0; + $searchTerm = $match[2]; + } elseif (preg_match('/(\w+):"([\w\ ]+)"/', $weightMatch[2], $match)) { + $field = $match[1]; + $boost = $this->fieldBoostMapping[$field] ?? 0.0; + $searchTerm = $match[2]; } + $score = new Score($boost, $field, $score, $searchTerm); + } elseif (preg_match('/(\d+\.\d+) = sum of:/', $line, $match)) { + $score = (float)$match[1]; + $score = new Score(0.0, 'sum of', $score, ''); + } elseif (preg_match('/(\d+\.\d+) = max of:/', $line, $match)) { + $score = (float)$match[1]; + $score = new Score(0.0, 'max of', $score, ''); + } elseif (preg_match('/(\d+\.\d+) = FunctionQuery\((.*)\),/', $line, $match)) { + $score = (float)$match[1]; + $function = $match[2]; + $score = new Score(0.0, 'boostFunction', $score, $function); + } elseif (preg_match('/(\d+\.\d+) = (.*)/', $line, $match)) { + $score = (float)$match[1]; + $misc = $match[2]; + $score = new Score(0.0, '', $score, $misc); + } else { + $score = null; } - return $highScores; + return $score; } }