Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 111 additions & 53 deletions Classes/Domain/Search/Score/ScoreCalculationService.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

namespace ApacheSolrForTypo3\Solr\Domain\Search\Score;

use TYPO3\CMS\Core\Utility\GeneralUtility;

/**
* Provides the functionality to calculate scores and renders them in a minimalistic template.
*
Expand All @@ -23,6 +25,8 @@
*/
class ScoreCalculationService
{
private array $fieldBoostMapping;

/**
* Renders an overview of how the score for a certain document has been
* calculated.
Expand All @@ -33,7 +37,13 @@ class ScoreCalculationService
*/
public function getRenderedScores($solrDebugData, $queryFields)
{
$highScores = $this->parseScores($solrDebugData, $queryFields);
foreach (GeneralUtility::trimExplode(',', $queryFields, true) as $queryField) {
list($field, $boost) = explode('^', $queryField);
$this->fieldBoostMapping[$field] = $boost;
}

$solrDebugArray = explode(PHP_EOL, trim($solrDebugData));
$highScores = $this->parseScores($solrDebugArray);
return $this->render($highScores);
}

Expand All @@ -46,75 +56,123 @@ public function getRenderedScores($solrDebugData, $queryFields)
public function render(array $highScores)
{
$scores = [];
$totalScore = 0;

$content = '<table class="table">'
. '<thead><tr><th>Score</th><th>Field</th><th>Boost</th><th>Search term</th></tr></thead>'
. '<tbody>';

foreach ($highScores as $highScore) {
/** @var $highScore Score */
$scores[] =
'<td>+ ' . htmlspecialchars($highScore->getScore()) . '</td>'
. '<td>' . htmlspecialchars($highScore->getFieldName()) . '</td>'
. '<td>' . htmlspecialchars($highScore->getBoost()) . '</td>';
$totalScore += $highScore->getScore();
$content .= $this->renderRow($highScore['node'], $level = 0, null);
foreach ($highScore['children'] ?? [] as $child) {
$content .= $this->renderRow($child['node'], $level = 1, $highScore['node']);
foreach ($child['children'] ?? [] as $grandchild) {
$content .= $this->renderRow($grandchild['node'], $level = 2, $child['node']);
foreach ($grandchild['children'] ?? [] as $greatgrandchild) {
$content .= $this->renderRow($greatgrandchild['node'], $level = 3, $grandchild['node']);
}
}
}
}

$content = '<table class="table">'
. '<thead><tr><th>Score</th><th>Field</th><th>Boost</th></tr></thead>'
. '<tbody><tr>' . implode('</tr><tr>', $scores) . '</tbody></tr>'
. '<tfoot><tr><td colspan="3">= ' . $totalScore . ' (Inaccurate analysis! Not all parts of the score have been taken into account.)</td></tr></tfoot>'
$content .= '</tbody>'
. '</table>';

return $content;
}

public function renderRow($node, $level, $parent)
{
$style = '';
if ($parent?->getFieldName() === 'max of') {
if ($parent->getScore() != $node->getScore()) {
$style = 'color:gray';
}
}
$pad = str_repeat('&nbsp', $level * 7);
return '<tr>'
. '<td style="' . $style . '">' . $pad . '+&nbsp;' . number_format($node->getScore(), 2) . '</td>'
. '<td style="' . $style . '">' . htmlspecialchars($node->getFieldName()) . '</td>'
. '<td style="' . $style . '">' . htmlspecialchars($node->getBoost()) . '</td>'
. '<td style="' . $style . '">' . htmlspecialchars($node->getSearchTerm()) . '</td>'
.'</tr>';
}

/**
* Parses the debugData and the queryFields into an array of score objects.
*
* @param string $debugData
* @param string $queryFields
* @return array[] array of Score
* Recursively turns an array of indented lines into a hierarchical array.
*/
public function parseScores($debugData, $queryFields)
function parseScores(array &$lines = [], int $depth = 0, int $failsafe = 0): array
{
$highScores = [];

/* TODO Provide better parsing
*
* parsing could be done line by line,
* * recording indentation level
* * replacing abbreviations
* * replacing phrases like "product of" by mathematical symbols (* or x)
* * ...
*/

// matches search term weights, ex: 0.42218783 = (MATCH) weight(content:iPod^40.0 in 43), product of:
$pattern = '/(.*) = weight\(([^ \)]*)/';
$scoreMatches = [];
preg_match_all($pattern, $debugData, $scoreMatches);

foreach ($scoreMatches[0] as $key => $value) {
// split field from search term
list($field, $searchTerm) = explode(':', $scoreMatches[2][$key]);

$currentScoreValue = $scoreMatches[1][$key];

$scoreWasSetForFieldBefore = isset($highScores[$field]);
$scoreIsHigher = false;
if ($scoreWasSetForFieldBefore) {
/** @var $previousScore Score */
$previousScore = $highScores[$field];
$scoreIsHigher = $previousScore->getScore() < $currentScoreValue;
if ($failsafe >= 1000) {
die('failsafe');
}

$result = [];
while ($line = current($lines)) {
$indentation = strlen($line) - strlen(ltrim($line));
$currentDepth = (int)($indentation / 2);

if ($currentDepth < $depth) {
// that's the next parent already!
break;
}

if ($currentDepth == $depth) {
// that's a sibling
array_shift($lines);
}

if ($currentDepth >= $depth) {
// that's the first kid
$result[] = [
'node' => $this->parseLine(trim($line)),
'children' => $this->parseScores($lines, $depth+1, $failsafe++),
];
}
}

return $result;
}

// keep track of highest score per search term
if (!$scoreWasSetForFieldBefore || $scoreIsHigher) {
$pattern = '/' . preg_quote($field, '/') . '\^([\d.]*)/';
$boostMatches = [];
preg_match_all($pattern, $queryFields, $boostMatches);
$boost = $boostMatches[1][0];
$highScores[$field] = new Score($boost, $field, $currentScoreValue, $searchTerm);
/**
* Parses a single line of score debugging output and
* transforms it into a Score object.
*/
function parseLine(string $line): ?Score
{
if (preg_match('/(\d+\.\d+) = weight\((.*)\)/', $line, $weightMatch)) {
$score = $weightMatch[1];
$field = '';
$boost = '';
$searchTerm = '??';
if (preg_match('/(\w+):(\w+)/', $weightMatch[2], $match)) {
$field = $match[1];
$boost = $this->fieldBoostMapping[$field] ?? '';
$searchTerm = $match[2];
} elseif (preg_match('/(\w+):"([\w\ ]+)"/', $weightMatch[2], $match)) {
$field = $match[1];
$boost = $this->fieldBoostMapping[$field] ?? '';
$searchTerm = $match[2];
}
$score = new Score($boost, $field, $score, $searchTerm);
} elseif (preg_match('/(\d+\.\d+) = sum of:/', $line, $match)) {
$score = $match[1];
$score = new Score('', 'sum of', $score, '');
} elseif (preg_match('/(\d+\.\d+) = max of:/', $line, $match)) {
$score = $match[1];
$score = new Score('', 'max of', $score, '');
} elseif (preg_match('/(\d+\.\d+) = FunctionQuery\((.*)\),/', $line, $match)) {
$score = $match[1];
$function = $match[2];
$score = new Score('', 'boostFunction', $score, $function);
} elseif (preg_match('/(\d+\.\d+) = (.*)/', $line, $match)) {
$score = $match[1];
$misc = $match[2];
$score = new Score('', '', $score, $misc);
} else {
$score = null;
}

return $highScores;
return $score;
}
}
Loading