-
Notifications
You must be signed in to change notification settings - Fork 311
Bug #3846: Release search version sorting is lexicographic instead of natural numeric order #3847
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,7 +24,10 @@ | |
| import java.io.IOException; | ||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
| import java.util.Map; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
|
|
||
| import static org.eclipse.sw360.datahandler.couchdb.lucene.NouveauLuceneAwareDatabaseConnector.prepareWildcardQuery; | ||
| import static org.eclipse.sw360.nouveau.LuceneAwareCouchDbConnector.DEFAULT_DESIGN_PREFIX; | ||
|
|
@@ -37,19 +40,30 @@ | |
| public class ReleaseSearchHandler { | ||
|
|
||
| private static final String DDOC_NAME = DEFAULT_DESIGN_PREFIX + "lucene"; | ||
| private static final Pattern DIGIT_SEQUENCE_PATTERN = Pattern.compile("\\d+"); | ||
|
|
||
| private static final NouveauIndexDesignDocument luceneSearchView | ||
| = new NouveauIndexDesignDocument("releases", | ||
| new NouveauIndexFunction( | ||
| "function(doc) {" + | ||
| " function normalizeVersionForSort(version) {" + | ||
| " if (!version || typeof(version) !== 'string') { return ''; }" + | ||
| " var lower = version.toLowerCase();" + | ||
| " return lower.replace(/\\d+/g, function(match) {" + | ||
| " var normalized = match.replace(/^0+(?!$)/, '');" + | ||
| " var length = normalized.length.toString();" + | ||
| " while (length.length < 6) { length = '0' + length; }" + | ||
| " return '{' + length + normalized + '}';" + | ||
| " });" + | ||
|
Comment on lines
+52
to
+57
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you mind explaining the magic here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This builds a natural sort key for version-like strings. It lowercases the input, then rewrites every numeric chunk into a sortable token: first its length (zero-padded), then the normalized number itself. That way lexicographic sorting compares numbers by numeric magnitude instead of plain string order, so for example 1.2.10 sorts after 1.2.3.
The purpose of this is to make plain string sorting behave like numeric sorting for embedded numbers. For example:
Because the comparison looks at the length first, and then the value, this ensures that:
|
||
| " }" + | ||
| " if(doc.type == 'release') {" + | ||
| " if (doc.name && typeof(doc.name) == 'string' && doc.name.length > 0) {" + | ||
| " index('text', 'name', doc.name, {'store': true});" + | ||
| " index('string', 'name_sort', doc.name);" + | ||
| " }" + | ||
| " if (doc.version && typeof(doc.version) == 'string' && doc.version.length > 0) {" + | ||
| " index('text', 'version', doc.version, {'store': true});" + | ||
| " index('string', 'version_sort', doc.version);" + | ||
| " index('string', 'version_sort', normalizeVersionForSort(doc.version));" + | ||
| " }" + | ||
| " if(doc.createdOn && doc.createdOn.length) {"+ | ||
| " var dt = new Date(doc.createdOn);"+ | ||
|
|
@@ -102,4 +116,40 @@ public Map<PaginationData, List<Release>> search(String searchText, PaginationDa | |
| default -> "createdOn"; | ||
| }; | ||
| } | ||
|
|
||
| static String normalizeVersionForSort(String version) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unused function declared??
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| if (version == null || version.isEmpty()) { | ||
| return ""; | ||
| } | ||
| String lower = version.toLowerCase(Locale.ROOT); | ||
| Matcher matcher = DIGIT_SEQUENCE_PATTERN.matcher(lower); | ||
| StringBuilder normalized = new StringBuilder(lower.length() + 16); | ||
| int cursor = 0; | ||
| while (matcher.find()) { | ||
| normalized.append(lower, cursor, matcher.start()); | ||
| appendNumericToken(normalized, matcher.group()); | ||
| cursor = matcher.end(); | ||
| } | ||
| normalized.append(lower, cursor, lower.length()); | ||
| return normalized.toString(); | ||
| } | ||
|
|
||
| private static void appendNumericToken(StringBuilder output, String rawNumber) { | ||
| int significantStart = 0; | ||
| while (significantStart < rawNumber.length() - 1 && rawNumber.charAt(significantStart) == '0') { | ||
| significantStart++; | ||
| } | ||
| String significant = rawNumber.substring(significantStart); | ||
| output.append('{'); | ||
| appendZeroPaddedLength(output, significant.length()); | ||
| output.append(significant).append('}'); | ||
| } | ||
|
|
||
| private static void appendZeroPaddedLength(StringBuilder output, int length) { | ||
| String lengthAsString = Integer.toString(length); | ||
| for (int i = lengthAsString.length(); i < 6; i++) { | ||
| output.append('0'); | ||
| } | ||
| output.append(lengthAsString); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| /* | ||
| * Copyright Siemens AG, 2026. Part of the SW360 Portal Project. | ||
| * | ||
| * This program and the accompanying materials are made | ||
| * available under the terms of the Eclipse Public License 2.0 | ||
| * which is available at https://www.eclipse.org/legal/epl-2.0/ | ||
| * | ||
| * SPDX-License-Identifier: EPL-2.0 | ||
| */ | ||
| package org.eclipse.sw360.datahandler.db; | ||
|
|
||
| import org.junit.Test; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.Arrays; | ||
| import java.util.Comparator; | ||
| import java.util.List; | ||
|
|
||
| import static org.junit.Assert.assertEquals; | ||
| import static org.junit.Assert.assertTrue; | ||
|
|
||
| public class ReleaseSearchHandlerTest { | ||
|
|
||
| @Test | ||
| public void should_sort_versions_naturally_for_numeric_segments() { | ||
| List<String> versions = new ArrayList<>(Arrays.asList("1.10", "1.2", "1.0", "2.0", "1.9")); | ||
|
|
||
| versions.sort(Comparator.comparing(ReleaseSearchHandler::normalizeVersionForSort)); | ||
|
|
||
| assertEquals(Arrays.asList("1.0", "1.2", "1.9", "1.10", "2.0"), versions); | ||
| } | ||
|
|
||
| @Test | ||
| public void should_treat_leading_zero_numeric_segments_as_equal_value() { | ||
| String v1 = ReleaseSearchHandler.normalizeVersionForSort("1.02"); | ||
| String v2 = ReleaseSearchHandler.normalizeVersionForSort("1.2"); | ||
|
|
||
| assertEquals(v1, v2); | ||
| } | ||
|
|
||
| @Test | ||
| public void should_sort_numeric_suffixes_naturally() { | ||
| String alpha2 = ReleaseSearchHandler.normalizeVersionForSort("1.0.0-alpha2"); | ||
| String alpha10 = ReleaseSearchHandler.normalizeVersionForSort("1.0.0-alpha10"); | ||
|
|
||
| assertTrue(alpha2.compareTo(alpha10) < 0); | ||
| } | ||
|
|
||
| @Test | ||
| public void should_pad_numeric_length_prefix_to_six_digits() { | ||
| String normalized = ReleaseSearchHandler.normalizeVersionForSort("v123"); | ||
|
|
||
| assertEquals("v{000003123}", normalized); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why the magic number
6?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank for asking,
6is just a fixed padding width for the length prefix, so all rewritten numeric tokens have a comparable shape.For example:
3-> length1->00000110-> length2->000002123-> length3->000003I chose
6simply as a sufficiently large constant for expected version segments, not because it has special meaning. The goal is only to keep the length field fixed-width so lexicographic comparison works reliably. If we want, I can replace it with a named constant or add a short comment to make that clearer.