diff --git a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java index 08f75f6b..75a30e15 100644 --- a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java +++ b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java @@ -27,6 +27,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Collection; +import javax.annotation.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,12 +123,15 @@ public static String removeCommentChars(String s) { } /** - * Locate the original text starting with the start token and ending with the end token + * Locate the original text starting with the start token and ending with the + * end token + * * @param fullLicenseText entire license text - * @param startToken starting token - * @param endToken ending token + * @param startToken starting token + * @param endToken ending token * @param tokenToLocation token location - * @return original text starting with the start token and ending with the end token + * @return original text starting with the start token and ending with the end + * token */ public static String locateOriginalText(String fullLicenseText, int startToken, int endToken, Map tokenToLocation, String[] tokens) { @@ -190,12 +194,25 @@ public static String locateOriginalText(String fullLicenseText, int startToken, } // ignore } - - /* - * @param text text to test - * @return the first token in the license text + + /** + * Return the first license token found in the given text + *

+ * The method normalizes the input text, removes comment characters, + * and splits it into tokens + * using {@link LicenseTextHelper#TOKEN_SPLIT_PATTERN}. + * It returns the first non-empty token found, + * or {@code null} if no such token exists. + *

+ * + * @param text The license text to extract the first token from. + * @return The first non-empty token as a {@link String}, + * or {@code null} if none is found. */ - public static String getFirstLicenseToken(String text) { + public static @Nullable String getFirstLicenseToken(@Nullable String text) { + if (text == null || text.isEmpty()) { + return null; + } String textToTokenize = LicenseTextHelper.normalizeText(LicenseTextHelper.replaceMultWord(LicenseTextHelper.replaceSpaceComma( LicenseTextHelper.removeLineSeparators(removeCommentChars(text))))).toLowerCase(); Matcher m = LicenseTextHelper.TOKEN_SPLIT_PATTERN.matcher(textToTokenize); @@ -206,32 +223,42 @@ public static String getFirstLicenseToken(String text) { } return null; } - + /** - * @param text text to test - * @return true if the text contains a single token + * Check whether the given text contains only a single token + *

+ * A single token string is a string that contains zero or one token, + * as identified by the {@link LicenseTextHelper#TOKEN_SPLIT_PATTERN}. + * Whitespace and punctuation such as dots, commas, question marks, + * and quotation marks are ignored. + *

+ * + * @param text The text to test. + * @return {@code true} if the text contains zero or one token, + * {@code false} otherwise. */ - public static boolean isSingleTokenString(String text) { - if (text.contains("\n")) { - return false; + public static boolean isSingleTokenString(@Nullable String text) { + if (text == null || text.isEmpty()) { + return true; // Zero tokens is considered a single token string } Matcher m = LicenseTextHelper.TOKEN_SPLIT_PATTERN.matcher(text); boolean found = false; while (m.find()) { if (!m.group(1).trim().isEmpty()) { if (found) { - return false; + return false; // More than one eligible token found } else { - found = true; + found = true; // First eligible token found } } } - return true; + return true; // Zero or one eligible token found } /** * Compares two licenses from potentially two different documents which may have * different license ID's for the same license + * * @param license1 first license to compare * @param license2 second license to compare * @param xlationMap Mapping the license ID's from license 1 to license 2 @@ -342,11 +369,15 @@ public static List getNonOptionalLicenseText(String licenseTemplate, } /** - * @param template Template in the standard template format used for comparison + * Compare the provided text against a license template using SPDX matching + * guidelines + * + * @param template Template in the standard template format used for + * comparison * @param compareText Text to compare using the template - * @return any differences found + * @return Any differences found * @throws SpdxCompareException on comparison errors - */ + */ public static DifferenceDescription isTextMatchingTemplate(String template, String compareText) throws SpdxCompareException { CompareTemplateOutputHandler compareTemplateOutputHandler; try { diff --git a/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java b/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java index 4761cc2b..a593a6d5 100644 --- a/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java +++ b/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java @@ -539,18 +539,33 @@ public void testLicenseEqualsNoneLicense() throws InvalidSPDXAnalysisException, Map xlationMap = new HashMap<>();; assertTrue(LicenseCompareHelper.isLicenseEqual(lic3, lic4, xlationMap)); assertFalse(LicenseCompareHelper.isLicenseEqual(lic4, lic2, xlationMap)); - } - - + } + public void testisSingleTokenString() { - assertTrue(LicenseCompareHelper.isSingleTokenString(" token ")); + assertTrue(LicenseCompareHelper.isSingleTokenString(null)); + assertTrue(LicenseCompareHelper.isSingleTokenString("")); + assertTrue(LicenseCompareHelper.isSingleTokenString(" ")); + assertTrue(LicenseCompareHelper.isSingleTokenString("\n")); assertTrue(LicenseCompareHelper.isSingleTokenString("'")); assertTrue(LicenseCompareHelper.isSingleTokenString(" '")); assertTrue(LicenseCompareHelper.isSingleTokenString("' ")); + assertTrue(LicenseCompareHelper.isSingleTokenString("''")); + assertTrue(LicenseCompareHelper.isSingleTokenString("token")); + assertTrue(LicenseCompareHelper.isSingleTokenString(" token")); + assertTrue(LicenseCompareHelper.isSingleTokenString(" token ")); + assertTrue(LicenseCompareHelper.isSingleTokenString("token\n")); + assertTrue(LicenseCompareHelper.isSingleTokenString("\ntoken")); + assertTrue(LicenseCompareHelper.isSingleTokenString(" \n token ")); + assertTrue(LicenseCompareHelper.isSingleTokenString(":;token?")); + assertTrue(LicenseCompareHelper.isSingleTokenString("'''token")); + assertTrue(LicenseCompareHelper.isSingleTokenString("token'''")); assertFalse(LicenseCompareHelper.isSingleTokenString("a and")); assertFalse(LicenseCompareHelper.isSingleTokenString("a\nand")); + assertFalse(LicenseCompareHelper.isSingleTokenString("a and ")); + assertFalse(LicenseCompareHelper.isSingleTokenString(" a and")); + assertFalse(LicenseCompareHelper.isSingleTokenString(" a.and")); } - + public void regressionTestMatchingGpl20Only() throws IOException, InvalidSPDXAnalysisException, SpdxCompareException { String compareText = UnitTestHelper.fileToText(GPL_2_TEXT); DifferenceDescription result = LicenseCompareHelper.isTextStandardLicense(LicenseInfoFactory.getListedLicenseById("GPL-2.0-only"), compareText); @@ -568,11 +583,16 @@ public void testMatchingStandardLicenseIds() throws IOException, InvalidSPDXAnal assertTrue(result[3].startsWith("GPL-2")); } } - + public void testFirstLicenseToken() { assertEquals("first", LicenseCompareHelper.getFirstLicenseToken(" first,token that is needed\nnext")); + assertEquals("first", LicenseCompareHelper.getFirstLicenseToken("// first,second")); + assertNull(LicenseCompareHelper.getFirstLicenseToken(null)); + assertNull(LicenseCompareHelper.getFirstLicenseToken("")); + assertNull(LicenseCompareHelper.getFirstLicenseToken(" ")); + assertNull(LicenseCompareHelper.getFirstLicenseToken("# ")); } - + @SuppressWarnings("unused") private String stringCharToUnicode(String s, int location) { return "\\u" + Integer.toHexString(s.charAt(location) | 0x10000).substring(1);