-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Alejandro Perez <[email protected]>
- Loading branch information
1 parent
db510fb
commit cbbb141
Showing
9 changed files
with
213 additions
and
32 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
package com.scanoss.utils; | ||
|
||
import java.nio.charset.StandardCharsets; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
/** | ||
* SCANOSS Hpsm Class | ||
* <p> | ||
* The Hpsm class provides all the necessary implementations to generate HPSM fingerprint for a given file or contents. | ||
* </p> | ||
*/ | ||
public class Hpsm { | ||
|
||
// CRC8 table, Polynomial, initial CRC and post CRC XOR value. | ||
private static final int CRC8_MAXIM_DOW_TABLE_SIZE = 0x100; | ||
private static final int CRC8_MAXIM_DOW_POLYNOMIAL = 0x8C; // 0x31 reflected | ||
private static final int CRC8_MAXIM_DOW_INITIAL = 0x00; // 0x00 reflected | ||
private static final int CRC8_MAXIM_DOW_FINAL = 0x00; // 0x00 reflected | ||
private static int[] crc8MaximDowTable = new int[CRC8_MAXIM_DOW_TABLE_SIZE]; | ||
|
||
private static final byte[] HEX_ARRAY = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII); | ||
|
||
/** | ||
* Calculates the HPSM value for the given content, represented as an array of bytes. | ||
* This method performs normalization on the content, calculates CRC8 for each line, | ||
* and returns the hexadecimal representation of the CRC8 values. | ||
* | ||
* @param content the content as an array of bytes | ||
* @return the HPSM value in hexadecimal format | ||
*/ | ||
public static String calcHpsm(byte[] content) { | ||
List<Integer> listNormalized = new ArrayList<>(); | ||
List<Integer> crcLines = new ArrayList<>(); | ||
|
||
int lastLine = 0; | ||
crc8MaximDowGenerateTable(); | ||
|
||
for (int i = 0; i < content.length ; i++) { | ||
char c = (char) content[i]; | ||
if (c == '\n') { // When there is a new line | ||
if (!listNormalized.isEmpty()) { | ||
crcLines.add(crc8MaximDowBuffer(convertListToByteArray(listNormalized))); | ||
listNormalized.clear(); | ||
} else if (lastLine + 1 == i) { | ||
crcLines.add(0xFF); | ||
} else if (i - lastLine > 1) { | ||
crcLines.add(0x00); | ||
} | ||
lastLine = i; | ||
} else { | ||
int cNormalized = WinnowingUtils.normalize(c); | ||
if (cNormalized != 0) listNormalized.add(cNormalized); | ||
} | ||
} | ||
|
||
return convertToHex(convertListToByteArray(crcLines)); | ||
} | ||
|
||
/** | ||
* Calculates CRC-8 using the Maxim/Dallas polynomial without using a lookup table. | ||
* This method is suitable for applications where memory constraints are critical | ||
* and a lookup table cannot be afforded. | ||
* | ||
* @param crc The current CRC value. | ||
* @param b The byte to be processed. | ||
* @return The updated CRC value after processing the byte. | ||
*/ | ||
private static int crc8MaximDowByteNoTable(int crc, int b) { | ||
crc ^= b; | ||
for (int count = 0; count < 8; count++) { | ||
boolean isSet = (crc & 0x01) != 0; | ||
crc >>= 1; | ||
if (isSet) crc ^= CRC8_MAXIM_DOW_POLYNOMIAL; | ||
} | ||
return crc; | ||
} | ||
|
||
/** | ||
* Generates a lookup table for CRC-8 using the Maxim/Dallas polynomial. | ||
* The generated table is used for faster CRC calculations. | ||
*/ | ||
private static void crc8MaximDowGenerateTable() { | ||
for (int i = 0; i < CRC8_MAXIM_DOW_TABLE_SIZE; i++) { | ||
crc8MaximDowTable[i] = crc8MaximDowByteNoTable(0, i); | ||
} | ||
} | ||
|
||
/** | ||
* Calculates CRC-8 using the Maxim/Dow polynomial with a lookup table. | ||
* This method utilizes a pre-generated lookup table for faster CRC calculations. | ||
* | ||
* @param crc The current CRC value. | ||
* @param b The byte to be processed. | ||
* @return The updated CRC value after processing the byte. | ||
*/ | ||
private static int crc8MaximDowByte(int crc, int b) { | ||
int index = b ^ crc; | ||
return crc8MaximDowTable[index] ^ (crc >> 8); | ||
} | ||
|
||
/** | ||
* Calculates CRC-8 for a buffer of bytes using the Maxim/Dallas polynomial. | ||
* | ||
* @param buffer The buffer containing bytes for CRC calculation. | ||
* @return The CRC-8 value for the given buffer. | ||
*/ | ||
private static int crc8MaximDowBuffer(byte[] buffer) { | ||
int crc = CRC8_MAXIM_DOW_INITIAL; | ||
for (byte b : buffer) { | ||
crc = crc8MaximDowByte(crc, b & 0xFF); // Convert byte to unsigned integer | ||
} | ||
crc ^= CRC8_MAXIM_DOW_FINAL; | ||
return crc; | ||
} | ||
|
||
/** | ||
* Converts a list of integers to a byte array. | ||
* | ||
* @param integerList The list of integers to be converted. | ||
* @return The byte array representing the converted integers. | ||
*/ | ||
private static byte[] convertListToByteArray(List<Integer> integerList) { | ||
byte[] byteArray = new byte[integerList.size()]; | ||
for (int i = 0; i < integerList.size(); i++) { | ||
byteArray[i] = integerList.get(i).byteValue(); | ||
} | ||
return byteArray; | ||
} | ||
|
||
/** | ||
* Converts an array of bytes to its hexadecimal representation. | ||
* | ||
* @param bytes the array of bytes to be converted | ||
* @return the hexadecimal representation of the input byte array | ||
*/ | ||
private static String convertToHex(byte [] bytes) { | ||
byte[] hexChars = new byte[bytes.length * 2]; | ||
for (int j = 0; j < bytes.length; j++) { | ||
int v = bytes[j] & 0xFF; | ||
hexChars[j * 2] = HEX_ARRAY[v >>> 4]; | ||
hexChars[j * 2 + 1] = HEX_ARRAY[v & 0x0F]; | ||
} | ||
return new String(hexChars, StandardCharsets.UTF_8).toLowerCase(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package com.scanoss.utils; | ||
|
||
public class WinnowingUtils { | ||
|
||
/** | ||
* Normalise the given character | ||
* | ||
* @param c character to normalise | ||
* @return normalised character | ||
*/ | ||
public static char normalize(char c) { | ||
if (c < '0' || c > 'z') { | ||
return 0; | ||
} else if (c <= '9' || c >= 'a') { | ||
return c; | ||
} else if (c >= 'A' && c <= 'Z') { | ||
return (char) (c + 32); | ||
} else { | ||
return 0; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters