Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.eclipse.sw360.datahandler.couchdb.lucene.NouveauLuceneAwareDatabaseConnector.prepareWildcardQuery;
import static org.eclipse.sw360.nouveau.LuceneAwareCouchDbConnector.DEFAULT_DESIGN_PREFIX;
Expand All @@ -37,19 +40,30 @@
public class ReleaseSearchHandler {

private static final String DDOC_NAME = DEFAULT_DESIGN_PREFIX + "lucene";
private static final Pattern DIGIT_SEQUENCE_PATTERN = Pattern.compile("\\d+");

private static final NouveauIndexDesignDocument luceneSearchView
= new NouveauIndexDesignDocument("releases",
new NouveauIndexFunction(
"function(doc) {" +
" function normalizeVersionForSort(version) {" +
" if (!version || typeof(version) !== 'string') { return ''; }" +
" var lower = version.toLowerCase();" +
" return lower.replace(/\\d+/g, function(match) {" +
" var normalized = match.replace(/^0+(?!$)/, '');" +
" var length = normalized.length.toString();" +
" while (length.length < 6) { length = '0' + length; }" +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the magic number 6?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank for asking, 6 is just a fixed padding width for the length prefix, so all rewritten numeric tokens have a comparable shape.

For example:

  • 3 -> length 1 -> 000001
  • 10 -> length 2 -> 000002
  • 123 -> length 3 -> 000003

I chose 6 simply as a sufficiently large constant for expected version segments, not because it has special meaning. The goal is only to keep the length field fixed-width so lexicographic comparison works reliably. If we want, I can replace it with a named constant or add a short comment to make that clearer.

" return '{' + length + normalized + '}';" +
" });" +
Comment on lines +52 to +57
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind explaining the magic here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This builds a natural sort key for version-like strings. It lowercases the input, then rewrites every numeric chunk into a sortable token: first its length (zero-padded), then the normalized number itself. That way lexicographic sorting compares numbers by numeric magnitude instead of plain string order, so for example 1.2.10 sorts after 1.2.3.

  • lower: first convert everything to lowercase, so case differences do not affect sorting.
  • replace(/\d+/g, ...): finds each contiguous numeric segment in the string.
  • match.replace(/^0+(?!$)/, ''): removes leading zeros, while still preserving a single 0.
    • for example, 0012 -> 12
    • and 000 -> 0
  • normalized.length: gets the length of that normalized numeric string.
  • Then the length is left-padded to a fixed width of 6 digits.
    • 2 -> 000002
    • 10 -> 000010
  • Finally it returns '{'+ length + normalized + '}'.

The purpose of this is to make plain string sorting behave like numeric sorting for embedded numbers.

For example:

  • 1.2.3 → the numeric segment 3 becomes something like {0000013}
  • 1.2.10 → the numeric segment 10 becomes something like {00000210}

Because the comparison looks at the length first, and then the value, this ensures that:

  • 3 is smaller than 10
  • and you do not get the usual lexicographic problem where "10" < "3"

" }" +
" if(doc.type == 'release') {" +
" if (doc.name && typeof(doc.name) == 'string' && doc.name.length > 0) {" +
" index('text', 'name', doc.name, {'store': true});" +
" index('string', 'name_sort', doc.name);" +
" }" +
" if (doc.version && typeof(doc.version) == 'string' && doc.version.length > 0) {" +
" index('text', 'version', doc.version, {'store': true});" +
" index('string', 'version_sort', doc.version);" +
" index('string', 'version_sort', normalizeVersionForSort(doc.version));" +
" }" +
" if(doc.createdOn && doc.createdOn.length) {"+
" var dt = new Date(doc.createdOn);"+
Expand Down Expand Up @@ -102,4 +116,40 @@ public Map<PaginationData, List<Release>> search(String searchText, PaginationDa
default -> "createdOn";
};
}

static String normalizeVersionForSort(String version) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused function declared??

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

normalizeVersionForSort(String) is currently used by unit tests as a Java mirror of the JS index normalization logic, to ensure both implementations stay consistent.

if (version == null || version.isEmpty()) {
return "";
}
String lower = version.toLowerCase(Locale.ROOT);
Matcher matcher = DIGIT_SEQUENCE_PATTERN.matcher(lower);
StringBuilder normalized = new StringBuilder(lower.length() + 16);
int cursor = 0;
while (matcher.find()) {
normalized.append(lower, cursor, matcher.start());
appendNumericToken(normalized, matcher.group());
cursor = matcher.end();
}
normalized.append(lower, cursor, lower.length());
return normalized.toString();
}

private static void appendNumericToken(StringBuilder output, String rawNumber) {
int significantStart = 0;
while (significantStart < rawNumber.length() - 1 && rawNumber.charAt(significantStart) == '0') {
significantStart++;
}
String significant = rawNumber.substring(significantStart);
output.append('{');
appendZeroPaddedLength(output, significant.length());
output.append(significant).append('}');
}

private static void appendZeroPaddedLength(StringBuilder output, int length) {
String lengthAsString = Integer.toString(length);
for (int i = lengthAsString.length(); i < 6; i++) {
output.append('0');
}
output.append(lengthAsString);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright Siemens AG, 2026. Part of the SW360 Portal Project.
*
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.eclipse.sw360.datahandler.db;

import org.junit.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

public class ReleaseSearchHandlerTest {

@Test
public void should_sort_versions_naturally_for_numeric_segments() {
List<String> versions = new ArrayList<>(Arrays.asList("1.10", "1.2", "1.0", "2.0", "1.9"));

versions.sort(Comparator.comparing(ReleaseSearchHandler::normalizeVersionForSort));

assertEquals(Arrays.asList("1.0", "1.2", "1.9", "1.10", "2.0"), versions);
}

@Test
public void should_treat_leading_zero_numeric_segments_as_equal_value() {
String v1 = ReleaseSearchHandler.normalizeVersionForSort("1.02");
String v2 = ReleaseSearchHandler.normalizeVersionForSort("1.2");

assertEquals(v1, v2);
}

@Test
public void should_sort_numeric_suffixes_naturally() {
String alpha2 = ReleaseSearchHandler.normalizeVersionForSort("1.0.0-alpha2");
String alpha10 = ReleaseSearchHandler.normalizeVersionForSort("1.0.0-alpha10");

assertTrue(alpha2.compareTo(alpha10) < 0);
}

@Test
public void should_pad_numeric_length_prefix_to_six_digits() {
String normalized = ReleaseSearchHandler.normalizeVersionForSort("v123");

assertEquals("v{000003123}", normalized);
}
}
Loading