diff --git a/Classes/Domain/Search/Score/ScoreCalculationService.php b/Classes/Domain/Search/Score/ScoreCalculationService.php
index 9a517a57a..8b0734156 100644
--- a/Classes/Domain/Search/Score/ScoreCalculationService.php
+++ b/Classes/Domain/Search/Score/ScoreCalculationService.php
@@ -15,20 +15,31 @@
namespace ApacheSolrForTypo3\Solr\Domain\Search\Score;
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+
/**
* Provides the functionality to calculate scores and renders them in a minimalistic template.
*/
class ScoreCalculationService
{
+ private array $fieldBoostMapping;
+
/**
* Renders an overview in HTML of how the score for a certain document has been calculated by Apache Solr using debug data.
*
* @param string $solrDebugData debug data from the solr response
+ * @param string $queryFields
* @return string The HTML showing the score analysis
*/
public function getRenderedScores(string $solrDebugData, string $queryFields): string
{
- $highScores = $this->parseScores($solrDebugData, $queryFields);
+ foreach (GeneralUtility::trimExplode(',', $queryFields, true) as $queryField) {
+ list($field, $boost) = explode('^', $queryField);
+ $this->fieldBoostMapping[$field] = (float)$boost;
+ }
+
+ $solrDebugArray = explode(PHP_EOL, trim($solrDebugData));
+ $highScores = $this->parseScores($solrDebugArray);
return $this->render($highScores);
}
@@ -38,71 +49,122 @@ public function getRenderedScores(string $solrDebugData, string $queryFields): s
public function render(array $highScores): string
{
$scores = [];
- $totalScore = 0;
+
+ $content = '
'
+ . '| Score | Field | Boost | Search term |
'
+ . '';
foreach ($highScores as $highScore) {
- /** @var Score $highScore */
- $scores[] =
- '+ ' . htmlspecialchars(number_format($highScore->getScore(), 9)) . ' | '
- . '' . htmlspecialchars($highScore->getFieldName()) . ' | '
- . '' . htmlspecialchars(number_format($highScore->getBoost(), 9)) . ' | ';
- $totalScore += $highScore->getScore();
+ $content .= $this->renderRow($highScore['node'], $level = 0, null);
+ foreach ($highScore['children'] ?? [] as $child) {
+ $content .= $this->renderRow($child['node'], $level = 1, $highScore['node']);
+ foreach ($child['children'] ?? [] as $grandchild) {
+ $content .= $this->renderRow($grandchild['node'], $level = 2, $child['node']);
+ foreach ($grandchild['children'] ?? [] as $greatgrandchild) {
+ $content .= $this->renderRow($greatgrandchild['node'], $level = 3, $grandchild['node']);
+ }
+ }
+ }
}
- return ''
- . '| Score | Field | Boost |
'
- . '' . implode('
', $scores) . '
'
- . '| = ' . $totalScore . ' (Inaccurate analysis! Not all parts of the score have been taken into account.) |
'
+ $content .= ''
. '
';
+
+ return $content;
+ }
+
+ private function renderRow($node, $level, $parent)
+ {
+ $style = '';
+ if ($parent?->getFieldName() === 'max of') {
+ if ($parent->getScore() != $node->getScore()) {
+ $style = 'color:gray';
+ }
+ }
+ $pad = str_repeat(' ', $level * 7);
+ return ''
+ . '| ' . $pad . '+ ' . number_format($node->getScore(), 2) . ' | '
+ . '' . htmlspecialchars($node->getFieldName()) . ' | '
+ . '' . htmlspecialchars($node->getBoost()) . ' | '
+ . '' . htmlspecialchars($node->getSearchTerm()) . ' | '
+ .'
';
}
/**
- * Parses the debugData and the queryFields into an array of score objects.
- *
- * @return Score[] array of Score
+ * Recursively turns an array of indented lines into a hierarchical array.
*/
- public function parseScores(string $debugData, string $queryFields): array
+ private function parseScores(array &$lines = [], int $depth = 0, int $failsafe = 0): array
{
- $highScores = [];
-
- /* TODO Provide better parsing
- *
- * parsing could be done line by line,
- * * recording indentation level
- * * replacing abbreviations
- * * replacing phrases like "product of" by mathematical symbols (* or x)
- * * ...
- */
-
- // matches search term weights, ex: 0.42218783 = (MATCH) weight(content:iPod^40.0 in 43), product of:
- $pattern = '/(.*) = weight\(([^ \)]*)/';
- $scoreMatches = [];
- preg_match_all($pattern, $debugData, $scoreMatches);
-
- foreach ($scoreMatches[0] as $key => $value) {
- // split field from search term
- [$field, $searchTerm] = explode(':', $scoreMatches[2][$key]);
-
- $currentScoreValue = (float)$scoreMatches[1][$key];
-
- $scoreWasSetForFieldBefore = isset($highScores[$field]);
- $scoreIsHigher = false;
- if ($scoreWasSetForFieldBefore) {
- /** @var Score $previousScore */
- $previousScore = $highScores[$field];
- $scoreIsHigher = $previousScore->getScore() < $currentScoreValue;
+ if ($failsafe >= 1000) {
+ die('failsafe');
+ }
+
+ $result = [];
+ while ($line = current($lines)) {
+ $indentation = strlen($line) - strlen(ltrim($line));
+ $currentDepth = (int)($indentation / 2);
+
+ if ($currentDepth < $depth) {
+ // that's the next parent already!
+ break;
}
- // keep track of the highest score per search term
- if (!$scoreWasSetForFieldBefore || $scoreIsHigher) {
- $pattern = '/' . preg_quote($field, '/') . '\^([\d.]*)/';
- $boostMatches = [];
- preg_match_all($pattern, $queryFields, $boostMatches);
- $boost = (float)($boostMatches[1][0] ?? 0);
- $highScores[$field] = new Score($boost, $field, $currentScoreValue, $searchTerm);
+ if ($currentDepth == $depth) {
+ // that's a sibling
+ array_shift($lines);
+ }
+
+ if ($currentDepth >= $depth) {
+ // that's the first kid
+ $result[] = [
+ 'node' => $this->parseLine(trim($line)),
+ 'children' => $this->parseScores($lines, $depth+1, $failsafe++),
+ ];
+ }
+ }
+
+ return $result;
+ }
+
+ /**
+ * Parses a single line of score debugging output and
+ * transforms it into a Score object.
+ */
+ private function parseLine(string $line): ?Score
+ {
+ if (preg_match('/(\d+\.\d+) = weight\((.*)\)/', $line, $weightMatch)) {
+ $score = (float)$weightMatch[1];
+ $field = '';
+ $boost = 0.0;
+ $searchTerm = '??';
+ if (preg_match('/(\w+):(\w+)/', $weightMatch[2], $match)) {
+ $field = $match[1];
+ $boost = $this->fieldBoostMapping[$field] ?? 0.0;
+ $searchTerm = $match[2];
+ } elseif (preg_match('/(\w+):"([\w\ ]+)"/', $weightMatch[2], $match)) {
+ $field = $match[1];
+ $boost = $this->fieldBoostMapping[$field] ?? 0.0;
+ $searchTerm = $match[2];
}
+ $score = new Score($boost, $field, $score, $searchTerm);
+ } elseif (preg_match('/(\d+\.\d+) = sum of:/', $line, $match)) {
+ $score = (float)$match[1];
+ $score = new Score(0.0, 'sum of', $score, '');
+ } elseif (preg_match('/(\d+\.\d+) = max of:/', $line, $match)) {
+ $score = (float)$match[1];
+ $score = new Score(0.0, 'max of', $score, '');
+ } elseif (preg_match('/(\d+\.\d+) = FunctionQuery\((.*)\),/', $line, $match)) {
+ $score = (float)$match[1];
+ $function = $match[2];
+ $score = new Score(0.0, 'boostFunction', $score, $function);
+ } elseif (preg_match('/(\d+\.\d+) = (.*)/', $line, $match)) {
+ $score = (float)$match[1];
+ $misc = $match[2];
+ $score = new Score(0.0, '', $score, $misc);
+ } else {
+ $score = null;
}
- return $highScores;
+ return $score;
}
}