diff --git a/.mvn/wrapper/MavenWrapperDownloader.java b/.mvn/wrapper/MavenWrapperDownloader.java deleted file mode 100644 index d28cd86..0000000 --- a/.mvn/wrapper/MavenWrapperDownloader.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2007-present the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.net.*; -import java.io.*; -import java.nio.channels.*; -import java.util.Properties; - -public class MavenWrapperDownloader { - - private static final String WRAPPER_VERSION = "0.5.6"; - /** - * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. - */ - private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" - + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; - - /** - * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to - * use instead of the default one. - */ - private static final String MAVEN_WRAPPER_PROPERTIES_PATH = - ".mvn/wrapper/maven-wrapper.properties"; - - /** - * Path where the maven-wrapper.jar will be saved to. - */ - private static final String MAVEN_WRAPPER_JAR_PATH = - ".mvn/wrapper/maven-wrapper.jar"; - - /** - * Name of the property which should be used to override the default download url for the wrapper. - */ - private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; - - public static void main(String args[]) { - System.out.println("- Downloader started"); - File baseDirectory = new File(args[0]); - System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); - - // If the maven-wrapper.properties exists, read it and check if it contains a custom - // wrapperUrl parameter. - File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); - String url = DEFAULT_DOWNLOAD_URL; - if (mavenWrapperPropertyFile.exists()) { - FileInputStream mavenWrapperPropertyFileInputStream = null; - try { - mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); - Properties mavenWrapperProperties = new Properties(); - mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); - url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); - } catch (IOException e) { - System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); - } finally { - try { - if (mavenWrapperPropertyFileInputStream != null) { - mavenWrapperPropertyFileInputStream.close(); - } - } catch (IOException e) { - // Ignore ... - } - } - } - System.out.println("- Downloading from: " + url); - - File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); - if (!outputFile.getParentFile().exists()) { - if (!outputFile.getParentFile().mkdirs()) { - System.out.println( - "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); - } - } - System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); - try { - downloadFileFromURL(url, outputFile); - System.out.println("Done"); - System.exit(0); - } catch (Throwable e) { - System.out.println("- Error downloading"); - e.printStackTrace(); - System.exit(1); - } - } - - private static void downloadFileFromURL(String urlString, File destination) throws Exception { - if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { - String username = System.getenv("MVNW_USERNAME"); - char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); - Authenticator.setDefault(new Authenticator() { - @Override - protected PasswordAuthentication getPasswordAuthentication() { - return new PasswordAuthentication(username, password); - } - }); - } - URL website = new URL(urlString); - ReadableByteChannel rbc; - rbc = Channels.newChannel(website.openStream()); - FileOutputStream fos = new FileOutputStream(destination); - fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); - fos.close(); - rbc.close(); - } -} diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties deleted file mode 100644 index 642d572..0000000 --- a/.mvn/wrapper/maven-wrapper.properties +++ /dev/null @@ -1,2 +0,0 @@ -distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip -wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..ae1e74b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,175 @@ +# Guidance for AI agents, bots, and humans contributing to Chronicle Software's OpenHFT projects. + +LLM-based agents can accelerate development only if they respect our house rules. This file tells you: + +* how to run and verify the build; +* what *not* to comment; +* when to open pull requests. + +## Language & character-set policy + +| Requirement | Rationale | +|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **British English** spelling (`organisation`, `licence`, *not* `organization`, `license`) except technical US spellings like `synchronized` | Keeps wording consistent with Chronicle's London HQ and existing docs. See the University of Oxford style guide for reference. | +| **ISO-8859-1** (code-points 0-255). Avoid smart quotes, non-breaking spaces and accented characters. | ISO-8859-1 survives every toolchain Chronicle uses, incl. low-latency binary wire formats that expect the 8th bit to be 0. | +| If a symbol is not available in ISO-8859-1, use a textual form such as `micro-second`, `>=`, `:alpha:`, `:yes:`. This is the preferred approach and Unicode must not be inserted. | Extended or '8-bit ASCII' variants are *not* portable and are therefore disallowed. | + +## Javadoc guidelines + +**Goal:** Every Javadoc block should add information you cannot glean from the method signature alone. Anything else is +noise and slows readers down. + +| Do | Don't | +|---------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------| +| State *behavioural contracts*, edge-cases, thread-safety guarantees, units, performance characteristics and checked exceptions. | Restate the obvious ("Gets the value", "Sets the name"). | +| Keep the first sentence short; it becomes the summary line in aggregated docs. | Duplicate parameter names/ types unless more explanation is needed. | +| Prefer `@param` for *constraints* and `@throws` for *conditions*, following Oracle's style guide. | Pad comments to reach a line-length target. | +| Remove or rewrite autogenerated Javadoc for trivial getters/setters. | Leave stale comments that now contradict the code. | + +The principle that Javadoc should only explain what is *not* manifest from the signature is well-established in the +wider Java community. + +## Build & test commands + +Agents must verify that the project still compiles and all unit tests pass before opening a PR: + +```bash +# From repo root +mvn -q verify +``` + +## Commit-message & PR etiquette + +1. **Subject line <= 72 chars**, imperative mood: "Fix roll-cycle offset in `ExcerptAppender`". +2. Reference the JIRA/GitHub issue if it exists. +3. In *body*: *root cause -> fix -> measurable impact* (latency, allocation, etc.). Use ASCII bullet points. +4. **Run `mvn verify`** again after rebasing. + +## What to ask the reviewers + +* *Is this AsciiDoc documentation precise enough for a clean-room re-implementation?* +* Does the Javadoc explain the code's *why* and *how* that a junior developer would not be expected to work out? +* Are the documentation, tests and code updated together so the change is clear? +* Does the commit point back to the relevant requirement or decision tag? +* Would an example or small diagram help future maintainers? + +## Project requirements + +See the [Decision Log](src/main/docs/decision-log.adoc) for the latest project decisions. +See the [Project Requirements](src/main/docs/project-requirements.adoc) for details on project requirements. + +## Elevating the Workflow with Real-Time Documentation + +Building upon our existing Iterative Workflow, the newest recommendation is to emphasise *real-time updates* to +documentation. +Ensure the relevant `.adoc` files are updated when features, requirements, implementation details, or tests change. +This tight loop informs the AI accurately and creates immediate clarity for all team members. + +### Benefits of Real-Time Documentation + +* **Confidence in documentation**: Accurate docs prevent miscommunications that derail real-world outcomes. +* **Reduced drift**: Real-time updates keep requirements, tests and code aligned. +* **Faster feedback**: AI can quickly highlight inconsistencies when everything is in sync. +* **Better quality**: Frequent checks align the implementation with the specified behaviour. +* **Smoother onboarding**: Up-to-date AsciiDoc clarifies the system for new developers. +* **Incremental changes**: AIDE flags newly updated files so you can keep the documentation synchronised. + +### Best Practices + +* **Maintain Sync**: Keep documentation (AsciiDoc), tests, and code synchronised in version control. Changes in one area + should prompt reviews and potential updates in the others. +* **Doc-First for New Work**: For *new* features or requirements, aim to update documentation first, then use AI to help + produce or refine corresponding code and tests. For refactoring or initial bootstrapping, updates might flow from + code/tests back to documentation, which should then be reviewed and finalised. +* **Small Commits**: Each commit should ideally relate to a single requirement or coherent change, making reviews easier + for humans and AI analysis tools. + +- **Team Buy-In**: Encourage everyone to review AI outputs critically and contribute to maintaining the synchronicity of + all artefacts. + +## AI Agent Guidelines + +When using AI agents to assist with development, please adhere to the following guidelines: + +* **Respect the Language & Character-set Policy**: Ensure all AI-generated content follows the British English and + ISO-8859-1 guidelines outlined above. + Focus on Clarity: AI-generated documentation should be clear and concise and add value beyond what is already present + in the code or existing documentation. +* **Avoid Redundancy**: Do not generate content that duplicates existing documentation or code comments unless it + provides additional context or clarification. +* **Review AI Outputs**: Always review AI-generated content for accuracy, relevance, and adherence to the project's + documentation standards before committing it to the repository. + +## Company-Wide Tagging + +This section records **company-wide** decisions that apply to *all* Chronicle projects. All identifiers use +the --xxx prefix. The `xxx` are unique across in the same Scope even if the tags are different. +Component-specific decisions live in their xxx-decision-log.adoc files. + +### Tag Taxonomy (Nine-Box Framework) + +To improve traceability, we adopt the Nine-Box taxonomy for requirement and decision identifiers. These tags are used in +addition to the existing ALL prefix, which remains reserved for global decisions across every project. + +.Adopt a Nine-Box Requirement Taxonomy + +| Tag | Scope | Typical examples | +|------|-----------------------------------|-------------------------------------| +| FN | Functional user-visible behaviour | Message routing, business rules | +| NF-P | Non-functional - Performance | Latency budgets, throughput targets | +| NF-S | Non-functional - Security | Authentication method, TLS version | +| NF-O | Non-functional - Operability | Logging, monitoring, health checks | +| TEST | Test / QA obligations | Chaos scenarios, benchmarking rigs | +| DOC | Documentation obligations | Sequence diagrams, user guides | +| OPS | Operational / DevOps concerns | Helm values, deployment checklist | +| UX | Operator or end-user experience | CLI ergonomics, dashboard layouts | +| RISK | Compliance / risk controls | GDPR retention, audit trail | + +`ALL-*` stays global, case-exact tags. Pick one primary tag if multiple apply. + +### Decision Record Template + +```asciidoc +=== [Identifier] Title of Decision + +Date:: YYYY-MM-DD +Context:: +* What is the issue that this decision addresses? +* What are the driving forces, constraints, and requirements? +Decision Statement:: +* What is the change that is being proposed or was decided? +Alternatives Considered:: +* [Alternative 1 Name/Type]: +** *Description:* Brief description of the alternative. +** *Pros:* ... +** *Cons:* ... +* [Alternative 2 Name/Type]: +** *Description:* Brief description of the alternative. +** *Pros:* ... +** *Cons:* ... +Rationale for Decision:: +* Why was the chosen decision selected? +* How does it address the context and outweigh the cons of alternatives? +Impact & Consequences:: +* What are the positive and negative consequences of this decision? +* How does this decision affect the system, developers, users, or operations? +- What are the trade-offs made? +Notes/Links:: +** (Optional: Links to relevant issues, discussions, documentation, proof-of-concepts) +``` + +## Asciidoc formatting guidelines + +### List Indentation + +Do not rely on indentation for list items in AsciiDoc documents. Use the following pattern instead: + +```asciidoc +section:: Top Level Section +* first level + ** nested level +``` + +### Emphasis and Bold Text + +In AsciiDoc, an underscore `_` is _emphasis_; `*text*` is *bold*. diff --git a/LICENSE.adoc b/LICENSE.adoc new file mode 100644 index 0000000..f450566 --- /dev/null +++ b/LICENSE.adoc @@ -0,0 +1,9 @@ +== Copyright 2016-2025 chronicle.software + +Licensed under the *Apache License, Version 2.0* (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. diff --git a/README.adoc b/README.adoc index 525bf0b..3dcd576 100644 --- a/README.adoc +++ b/README.adoc @@ -1,10 +1,11 @@ == Zero-Allocation Hashing +:pp: ++ Chronicle Software image:https://maven-badges.herokuapp.com/maven-central/net.openhft/zero-allocation-hashing/badge.svg[caption="",link=https://maven-badges.herokuapp.com/maven-central/net.openhft/zero-allocation-hashing] image:https://javadoc.io/badge2/net.openhft/zero-allocation-hashing/javadoc.svg[link="https://www.javadoc.io/doc/net.openhft/zero-allocation-hashing/latest/index.html"] -//image:https://javadoc-badge.appspot.com/net.openhft/zero-allocation-hashing.svg?label=javadoc[JavaDoc, link=https://www.javadoc.io/doc/net.openhft/zero-allocation-hashing] +// image:https://javadoc-badge.appspot.com/net.openhft/zero-allocation-hashing.svg?label=javadoc[JavaDoc, link=https://www.javadoc.io/doc/net.openhft/zero-allocation-hashing] image:https://img.shields.io/github/license/OpenHFT/Zero-Allocation-Hashing[GitHub] image:https://img.shields.io/badge/release%20notes-subscribe-brightgreen[link="https://chronicle.software/release-notes/"] image:https://sonarcloud.io/api/project_badges/measure?project=OpenHFT_Zero-Allocation-Hashing&metric=alert_status[link="https://sonarcloud.io/dashboard?id=OpenHFT_Zero-Allocation-Hashing"] @@ -15,7 +16,7 @@ toc::[] This project provides a Java API for hashing any sequence of bytes in Java, including all kinds of primitive arrays, buffers, `CharSequence` and more. -Written for Java 7+ under Apache 2.0 license. +Written for Java 8+ under Apache 2.0 licence. The key difference compared to other similar projects, e.g. https://guava.dev/releases/28.1-jre/api/docs/com/google/common/hash/package-summary.html[Guava hashing], is that this has no object allocation during the hash computation and does not use `ThreadLocal`. @@ -25,24 +26,24 @@ This provides consistent results whatever the byte order, while only moderately Currently `long`-valued hash function interface is defined for 64-bit hash, and `long[]`-valued hash function interface for more than 64-bit hash, with the following implementations (in alphabetical order): -- *https://github.com/google/cityhash[CityHash], version 1.1* (latest; 1.1.1 is a C++ language-specific maintenance release). +* *https://github.com/google/cityhash[CityHash], version 1.1* (latest; 1.1.1 is a C{pp} language-specific maintenance release). -- Two algorithms from *https://github.com/google/farmhash[FarmHash]*: `farmhashna` (introduced in FarmHash 1.0) and `farmhashuo` (introduced in FarmHash 1.1). +* Two algorithms from *https://github.com/google/farmhash[FarmHash]*: `farmhashna` (introduced in FarmHash 1.0) and `farmhashuo` (introduced in FarmHash 1.1). -- *https://github.com/jandrewrogers/MetroHash[MetroHash]* (using the metrohash64_2 initialization vector). +* *https://github.com/jandrewrogers/MetroHash[MetroHash]* (using the metrohash64_2 initialization vector). -- *https://github.com/aappleby/smhasher/wiki/MurmurHash3[MurmurHash3]* 128-bit and low 64-bit. +* *https://github.com/aappleby/smhasher/wiki/MurmurHash3[MurmurHash3]* 128-bit and low 64-bit. -- *https://github.com/wangyi-fudan/wyhash[wyHash]*, version 3. +* *https://github.com/wangyi-fudan/wyhash[wyHash]*, version 3. -- *https://github.com/Cyan4973/xxHash[xxHash]*. +* *https://github.com/Cyan4973/xxHash[xxHash]*. -- *https://github.com/Cyan4973/xxHash[xxh3, xxh128]*, 128-bit and 64 bit. +* *https://github.com/Cyan4973/xxHash[xxh3, xxh128]*, 128-bit and 64 bit. These are thoroughly tested with *https://www.oracle.com/java/technologies/java-se-support-roadmap.html[LTS JDKs]* -7, 8, and 11, the latest non-LTS JDKs 16 on both little- and big- endian platforms. -Other non-LTS JDKs from 9 should also work, but they will not be tested from half year after EOL. +8, 11, 17, and 21, plus the latest non-LTS JDKs on both little- and big-endian platforms. +Other non-LTS JDKs from 9 should also work, but they will not be tested from half a year after EOL. ==== Performance @@ -56,8 +57,8 @@ Tested on Intel Core i7-4870HQ CPU @ 2.50GHz |FarmHash `uo` |7.2 |7 |CityHash |7.0 |7 |MurmurHash |5.3 |12 -|MetroHash |https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[??] | https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[??] -|WyHash |https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[??] |https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[??] +|MetroHash |TODO (see https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[]) | TODO (see https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[]) +|WyHash |TODO (see https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[]) |TODO (see https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[]) |=== @@ -82,15 +83,26 @@ There is no simple way to hash these using this project, for example, classes su } ---- -* You need to hash byte sequences of unknown length, for the simpliest example, +* You need to hash byte sequences of unknown length, for the simplest example, `Iterator`. * You need to transform the byte sequence (e.g. encode or decode it with a specific coding), and hash the resulting byte sequence on the way without dumping it to memory. -==== Java Doc +==== Javadoc See http://javadoc.io/doc/net.openhft/zero-allocation-hashing/latest +==== Internal documentation + +* link:src/main/docs/specifications.adoc[Requirements overview] +* link:src/main/docs/architecture-overview.adoc[Architecture overview] +* link:src/main/docs/invariants-and-contracts.adoc[Invariants and contracts] +* link:src/main/docs/algorithm-profiles.adoc[Algorithm profiles] +* link:src/main/docs/testing-strategy.adoc[Testing strategy] +* link:src/main/docs/performance-benchmarks.adoc[Performance benchmarks] +* link:src/main/docs/unsafe-and-platform-notes.adoc[Unsafe and platform notes] +* link:src/main/docs/change-log-template.adoc[Change-log template] + == Quick start Gradle: @@ -98,7 +110,7 @@ Gradle: [source,groovy] ---- dependencies { - implementation 'net.openhft:zero-allocation-hashing:0.16' + implementation 'net.openhft:zero-allocation-hashing:0.27ea1' } ---- @@ -109,7 +121,7 @@ Or Maven: net.openhft zero-allocation-hashing - 0.16 + 0.27ea1 ---- @@ -120,7 +132,7 @@ In Java: long hash = LongHashFunction.wy_3().hashChars("hello"); ---- -See *http://javadoc.io/doc/net.openhft/zero-allocation-hashing/0.15[JavaDocs]* for more information. +See *http://javadoc.io/doc/net.openhft/zero-allocation-hashing/latest[JavaDocs]* for more information. == Contributions are most welcome! diff --git a/pom.xml b/pom.xml index 79abf8a..1857092 100644 --- a/pom.xml +++ b/pom.xml @@ -1,6 +1,6 @@ 8 - ${project.target.release} - 3.5.0 - ${project.target.release} ${project.target.release} @@ -52,6 +49,15 @@ all,-missing openhft https://sonarcloud.io + + 3.6.0 + 8.45.1 + 4.8.6.6 + 1.14.0 + 3.28.0 + 0.8.14 + 0.918 + 0.854 @@ -109,33 +115,6 @@ - - maven-enforcer-plugin - - - enforce-versions - - enforce - - - - - [${project.maven.min-version},) - - - [${project.jdk.min-version},) - - - project.target.release - Target Java SE release must be at least 7! - ^([7-9]|[1-9]\d+)$ - - - - - - - maven-compiler-plugin 3.8.1 @@ -404,7 +383,7 @@ org.jacoco jacoco-maven-plugin - 0.8.6 + ${jacoco-maven-plugin.version} @@ -423,6 +402,142 @@ + + code-review + + false + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + ${checkstyle.version} + + + com.puppycrawl.tools + checkstyle + ${puppycrawl.version} + + + + + checkstyle + + check + + verify + + + + src/main/config/checkstyle.xml + true + true + warning + + + + + com.github.spotbugs + spotbugs-maven-plugin + ${spotbugs.version} + + + com.h3xstream.findsecbugs + findsecbugs-plugin + ${findsecbugs.version} + + + + + spotbugs + + check + + verify + + + + Max + Low + true + src/main/config/spotbugs-exclude.xml + + + + + org.apache.maven.plugins + maven-pmd-plugin + ${maven-pmd-plugin.version} + + + pmd + + check + + verify + + + + true + true + + src/main/config/pmd-ruleset.xml + + src/main/config/pmd-exclude.properties + + + + + org.jacoco + jacoco-maven-plugin + ${jacoco-maven-plugin.version} + + + prepare-agent + + prepare-agent + + + + report + + report + + verify + + + check + + check + + verify + + + + BUNDLE + + + LINE + COVEREDRATIO + ${jacoco.line.coverage} + + + BRANCH + COVEREDRATIO + ${jacoco.branch.coverage} + + + + + + + + + + + + diff --git a/src/main/config/checkstyle.xml b/src/main/config/checkstyle.xml new file mode 100644 index 0000000..5dc41c1 --- /dev/null +++ b/src/main/config/checkstyle.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/config/pmd-exclude.properties b/src/main/config/pmd-exclude.properties new file mode 100644 index 0000000..98eeb06 --- /dev/null +++ b/src/main/config/pmd-exclude.properties @@ -0,0 +1,5 @@ +# PMD exclusions with justifications +# Format: filepath=rule1,rule2 +# +# Example: +# net/openhft/hashing/LegacyParser.java=AvoidReassigningParameters,TooManyFields diff --git a/src/main/config/pmd-ruleset.xml b/src/main/config/pmd-ruleset.xml new file mode 100644 index 0000000..b635cde --- /dev/null +++ b/src/main/config/pmd-ruleset.xml @@ -0,0 +1,15 @@ + + + + Baseline Chronicle rule selections used during the code-review profile. + + + + + + + + diff --git a/src/main/config/spotbugs-exclude.xml b/src/main/config/spotbugs-exclude.xml new file mode 100644 index 0000000..c437e3d --- /dev/null +++ b/src/main/config/spotbugs-exclude.xml @@ -0,0 +1,156 @@ + + + + + + + HASH-SER-101: Wrapper relies on readResolve to keep singleton reference for cache reuse; revisit + after HASH-127 factory refactor. + + + + + + + + HASH-SER-102: Seed value regenerated by constructor; serialization compatibility required with 0.26 + clients. + + + + + + + + HASH-SER-103: readResolve maintains static NA instance contract documented in invariants.adoc §3; + removal tracked under HASH-128. + + + + + + + + HASH-SER-104: Seed restored via custom constructor to avoid leaking internal mixing array; verify + once HASH-128 migrates to records. + + + + + + + + HASH-SER-105: Dual hash seeds rehydrate lazily to keep compatibility with legacy hashing; + constructor delegates to shared initializer by design. + + + + + + + + HASH-SER-106: Anonymous inner class wraps functional interface for backwards serialization shape; + revisit when converting to lambda in HASH-204. + + + + + + + + HASH-SER-107: Serialization keeps static singleton to protect direct byte buffer backing; tracked + in HASH-205 Metro clean-up. + + + + + + + + HASH-SER-108: Seeded wrapper recreates transient metro state during deserialization; compatibility + retained for on-disk caches. + + + + + + + + HASH-FN-109: Switch fallthrough implements Murmur reference mixing step; modifying would change + published hash outputs. + + + + + + + + HASH-SER-110: Tuple adapter remains singleton for performance; remove once tuple hashing migrates + to service loader (HASH-210). + + + + + + + + HASH-SER-111: Seeded tuple variant rebuilds transient state after deserialization; constructor + delegation retains reference constants from spec. + + + + + + + + HASH-PLAT-112: Privileged block required for Unsafe acquisition on Java 8; catch clause records + platform availability per testing strategy doc. + + + + + + + + HASH-SER-113: Singleton ensures stateless wyhash adapter remains cache-friendly; revisit after + wyhash v5 migration HASH-211. + + + + + + + + HASH-FN-114: Local variables kept for JIT-bound inlining per upstream implementation notes; integer + multiply cast is intentional widening. + + + + + + + + HASH-SEC-201: Secret bytes are defined by the xxHash reference implementation; Chronicle exposes + the same constant for interoperability. + + + + + + + + HASH-SER-115: Singleton maintains compatibility with native xxHash wrappers; change deferred until + XXHash dropping in HASH-215. + + + + + + + + HASH-SER-116: Seed field recomputed post-deserialization to avoid leaking seeds from serialized + stream; tracked for redesign in HASH-215. + + + + diff --git a/src/main/docs/algorithm-profiles.adoc b/src/main/docs/algorithm-profiles.adoc new file mode 100644 index 0000000..f62b785 --- /dev/null +++ b/src/main/docs/algorithm-profiles.adoc @@ -0,0 +1,94 @@ +== Algorithm Profiles +:pp: ++ + +Chronicle Software + +toc::[] + +=== CityHash 1.1 + +Factories :: +`LongHashFunction.city_1_1()`, `.city_1_1(long)`, `.city_1_1(long, long)` (`LongHashFunction.java:53-115`). +Implementation :: +`net.openhft.hashing.CityAndFarmHash_1_1` ports Google’s CityHash64 v1.1 (`CityAndFarmHash_1_1.java`). +Key traits :: +* Normalises inputs to little-endian and forwards short-length cases to specialised mix routines (1–3, 4–7, 8–16 byte fast paths). +* Produces identical output across host endianness; big-endian incurs the expected byte swapping cost. +* Provides seedless, single-seed, and dual-seed variants mirroring the upstream API. + +=== FarmHash NA (1.0) + +Factories :: +`LongHashFunction.farmNa()`, `.farmNa(long)`, `.farmNa(long, long)` (`LongHashFunction.java:117-179`). +Implementation :: +Shares `CityAndFarmHash_1_1` with CityHash; the class carries the `farmhashna` logic, including the <32 byte shortcut to CityHash output. +Key traits :: +* Deterministic across endianness; the mixing rounds assume little-endian inputs and convert when necessary. +* Seeds map directly onto the upstream `farmhashna` parameters. + +=== FarmHash UO (1.1) + +Factories :: +`LongHashFunction.farmUo()`, `.farmUo(long)`, `.farmUo(long, long)` (`LongHashFunction.java:181-243`). +Implementation :: +Also hosted in `CityAndFarmHash_1_1`, which covers the 1.1 update’s longer pipelines. +Key traits :: +* Maintains parity with Google’s C{pp} release for test vectors. +* Endianness neutral: always routes through an `Access` view that matches the algorithm’s little-endian assumptions. + +=== MurmurHash3 + +Factories :: +`LongHashFunction.murmur_3()`, `.murmur_3(long)` for 64-bit (`LongHashFunction.java:245-268`); `LongTupleHashFunction.murmur_3()`, `.murmur_3(long)` for 128-bit (`LongTupleHashFunction.java:35-69`). +Implementation :: +`net.openhft.hashing.MurmurHash_3` adapts Austin Appleby’s x64 variants. +It extends `DualHashFunction` so the 128-bit engine also exposes the low 64 bits through `LongHashFunction`. +Key traits :: +* Little-endian canonicalisation via `Access.byteOrder`. +* Supports zero-length hashing through pre-computed constants to keep `hashVoid()` stable. + +=== xxHash (XXH64) + +Factories :: +`LongHashFunction.xx()`, `.xx(long)` (`LongHashFunction.java:270-298`). +Implementation :: +`net.openhft.hashing.XxHash` ports the official XXH64 reference and keeps the unsigned prime constants as signed Java longs. +Key traits :: +* Uses four-lane accumulation for ≥32 byte inputs, matching upstream behaviour bit-for-bit. +* Applies the canonical avalanche round in `XxHash.finalize` for all lengths. +* Seeded and seedless instances differ only by the stored `seed()` override; serialisation preserves both forms. + +=== XXH3 / XXH128 + +Factories :: +`LongHashFunction.xx3()`, `.xx3(long)` for 64-bit, plus `.xx128low()` / `.xx128low(long)` for the low 64 bits of XXH128 (`LongHashFunction.java:300-341`). +Full 128-bit results live behind `LongTupleHashFunction.xx128()` and `.xx128(long)` (`LongTupleHashFunction.java:71-104`). +Implementation :: +`net.openhft.hashing.XXH3` keeps the FARSH-derived 192 byte secret and streaming logic. +It defines distinct entry points for 64-bit, 128-bit, and low-64-bit projections. +Key traits :: +* Optimises for short messages with dedicated 1–3, 4–8, 9–16, 17–128, and 129–240 byte paths. +* Uses `UnsafeAccess.INSTANCE.byteOrder(null, LITTLE_ENDIAN)` once to avoid per-call adapter allocation. +* The 128-bit variant reuses the same mixing core; exposing the low 64 bits avoids extra copies for callers that only need a single `long`. + +=== wyHash v3 + +Factories :: +`LongHashFunction.wy_3()`, `.wy_3(long)` (`LongHashFunction.java:343-369`). +Implementation :: +`net.openhft.hashing.WyHash` mirrors Wang Yi’s version 3 reference, including the `_wymum` 128-bit multiply-fold helper built on `Maths.unsignedLongMulXorFold`. +Key traits :: +* Supports streaming chunks up to 256 bytes per loop iteration; beyond that it accumulates in 32 byte strides. +* Handles ≤3, ≤8, ≤16, ≤24, ≤32 byte inputs with the same branching as the C code. +* Maintains deterministic output across architectures while acknowledging the performance hit on big-endian systems. + +=== MetroHash (metrohash64_2) + +Factories :: +`LongHashFunction.metro()`, `.metro(long)` (`LongHashFunction.java:371-389`). +Implementation :: +`net.openhft.hashing.MetroHash` implements the 64-bit metrohash variant with the `_2` initialisation vector, matching the original author’s reference. +Key traits :: +* Performs four-lane unrolled mixing for ≥32 byte inputs and cascades down to 16, 8, 4, 2, and 1 byte tails. +* Uses deterministic finalisation (`MetroHash.finalize`) shared by scalar and streaming paths. +* Seeded instances override `seed()` and cache the pre-hashed `hashVoid()` constant to avoid re-computation. diff --git a/src/main/docs/architecture-overview.adoc b/src/main/docs/architecture-overview.adoc new file mode 100644 index 0000000..72775ef --- /dev/null +++ b/src/main/docs/architecture-overview.adoc @@ -0,0 +1,44 @@ +== Zero-Allocation Hashing Architecture Overview +:pp: ++ + +Chronicle Software + +toc::[] + +=== Entry Points + +* `net.openhft.hashing.LongHashFunction` is the primary façade for 64-bit hashes. +It exposes factory methods for CityHash 1.1, FarmHash (NA and UO variants), MurmurHash3, xxHash, XXH3 (64-bit), wyHash v3, and MetroHash (`LongHashFunction.java`). +* `net.openhft.hashing.LongTupleHashFunction` provides multi-word hash results. +It currently delivers 128-bit MurmurHash3 and XXH3 outputs and mirrors the single-word API with reusable `long[]` buffers (`LongTupleHashFunction.java`). +* `net.openhft.hashing.DualHashFunction` bridges tuple implementations back into the `LongHashFunction` contract, ensuring seeded XXH128 and similar algorithms can expose both 64-bit and 128-bit variants without duplicating logic (`DualHashFunction.java`). + +=== Memory Access Abstractions + +* All hashing flows rely on `net.openhft.hashing.Access` to read primitive values from arrays, direct buffers, off-heap memory, or custom structures. `Access.byteOrder(input, desiredOrder)` returns a view that matches the algorithm’s expected endianness (`Access.java:273-308`). +* Concrete strategies cover heap arrays (`UnsafeAccess.INSTANCE`), `ByteBuffer` (`ByteBufferAccess`), `CharSequence` in native or explicit byte order (`CharSequenceAccess`), and compact Latin-1 backed strings (`CompactLatin1CharSequenceAccess`). +* `UnsafeAccess` wraps `sun.misc.Unsafe` for zero-copy reads, falling back to legacy helpers when `getByte` or `getShort` are absent (e.g., pre-Nougat Android) (`UnsafeAccess.java:40-118`). +* Reverse-order wrappers are generated automatically through `Access.newDefaultReverseAccess`, allowing algorithms to treat every source as little-endian while still accepting big-endian buffers (`Access.java:295-344`). + +=== Algorithm Implementations + +* Each upstream hash family lives in its own package-private class and exposes seed-aware factories back to the public façade. +** `CityAndFarmHash_1_1` adapts CityHash64 1.1 plus FarmHash NA/UO variants, including the short-input specialisations from the original C{pp} sources. +** `MurmurHash_3` contains both 64-bit and 128-bit variants, reusing `DualHashFunction` to provide `LongHashFunction` and `LongTupleHashFunction` accessors. +** `XxHash` implements XXH64 with the upstream prime constants and treats all inputs as little-endian via `Access.byteOrder` (`XxHash.java`). +** `XXH3` delivers XXH3 64-bit and 128-bit functions, including the FARSH-derived secret and block-stripe accumulation strategy (`XXH3.java`). +** `WyHash` ports wyHash v3, including the 256-byte streaming loop and `_wymum` mixing helper built on `Maths.unsignedLongMulXorFold` (`WyHash.java`). +** `MetroHash` implements the metrohash64_2 variant using four-lane accumulation and deterministic finalisation (`MetroHash.java`). + +=== Runtime Adaptation + +* `net.openhft.hashing.Util.VALID_STRING_HASH` selects the correct `StringHash` strategy at JVM initialisation time by inspecting `java.vm.name` and `java.version`, covering HotSpot, OpenJ9, Zing, and unknown VMs (`Util.java:29-63`). +* `ModernHotSpotStringHash`, `ModernCompactStringHash`, and `HotSpotPrior7u6StringHash` encode the memory layout differences between pre-compact, compact-string, and legacy HotSpot builds. +When the VM cannot be recognised, `UnknownJvmStringHash` provides a defensive fallback. +* Direct buffer hashing uses `sun.nio.ch.DirectBuffer` addresses pulled via `LongHashFunction.hashBytes(ByteBuffer)` and `LongHashFunction.hashMemory(long, long)`; `Util.getDirectBufferAddress` centralises the address extraction (`Util.java:65-68`). + +=== Supporting Utilities + +* `net.openhft.hashing.Primitives` houses byte-order normalisation helpers and unsigned conversions so algorithms can expect canonical little-endian operands even on big-endian hardware (`Primitives.java`). +* `net.openhft.hashing.Maths` provides low-level arithmetic helpers such as `unsignedLongMulXorFold` used by wyHash and XXH3 for 128-bit cross-products (`Maths.java`). +* Tests under `src/test/java/net/openhft/hashing` validate the API contract across arrays, primitives, buffers, and custom access strategies, and serve as reference snippets for typical `Access` usage. diff --git a/src/main/docs/change-log-template.adoc b/src/main/docs/change-log-template.adoc new file mode 100644 index 0000000..da67691 --- /dev/null +++ b/src/main/docs/change-log-template.adoc @@ -0,0 +1,29 @@ +== Change Log Template + +Chronicle Software + +Use this skeleton when documenting repository changes, whether in commit messages, pull-request descriptions, or the decision log. +Keep the tone factual, favour British English, and ensure every item links back to a requirement tag where possible. + +---- +Subject: + +Root cause +- Describe the trigger (bug, requirement, optimisation target). +- Reference relevant Nine-Box identifiers (for example `FN-xxx`, `NF-P-xxx`). + +Fix +- Enumerate the concrete code or documentation changes. +- Highlight any new invariants or assumptions introduced. + +Impact +- Capture measurable effects (latency, throughput, allocation, API surface). +- Note any compatibility considerations or migration steps. + +Verification +- List commands executed locally (e.g., `mvn -q verify`, targeted benchmarks). +- Mention additional artefacts provided (logs, flame graphs, benchmark tables). + +References +- Link to upstream issues, decision-log entries, or external design docs. +---- diff --git a/src/main/docs/invariants-and-contracts.adoc b/src/main/docs/invariants-and-contracts.adoc new file mode 100644 index 0000000..57697e9 --- /dev/null +++ b/src/main/docs/invariants-and-contracts.adoc @@ -0,0 +1,60 @@ +== Invariants and Contracts + +Chronicle Software + +toc::[] + +=== Hash Interface Guarantees + +* Every `LongHashFunction` and `LongTupleHashFunction` implementation treats primitives as if they were written to memory using the platform’s native byte order; the API therefore guarantees that `hashLong(v)` equals `hashLongs(new long[] {v})` and similar array forms (`LongHashFunction.java`, `LongTupleHashFunction.java`). +* All bundled algorithms normalise multi-byte reads to little-endian before mixing, so the same input bytes produce identical hashes on big- and little-endian machines. +Performance may differ, but results must not (`CityAndFarmHash_1_1.java`, `XxHash.java`, `XXH3.java`, `WyHash.java`, `MetroHash.java`, `MurmurHash_3.java`). +* `hash(Object, Access, long off, long len)` assumes the addressed region is contiguous and valid for the requested byte count. +Implementations do not insert bounds checks beyond those provided by the chosen `Access` strategy, so callers must uphold the contract (`LongHashFunction.java:548-612`). +* `hashMemory(long address, long length)` treats the `address` as an absolute memory pointer. +Passing invalid or unmapped addresses is undefined behaviour and will surface as JVM crashes or segmentation faults rather than managed exceptions (`LongHashFunction.java:619-643`). +* `hashVoid()` returns the deterministic hash for an empty byte sequence. +Tests rely on this to confirm consistency, so new algorithms must define a stable zero-length value (`LongHashFunction.java`, `LongHashFunctionTest.java:36-45`). + +=== Access Strategy Requirements + +* Custom `Access` implementations must keep `getLong`, `getUnsignedInt`, `getUnsignedShort`, and related methods mutually consistent under the reported `byteOrder`. +If the order is wrong or inconsistent the algorithm will observe incoherent data (`Access.java:21-71`). +* `Access.byteOrder(input, desiredOrder)` may return the same instance or a cached reverse-order delegate. +Implementations should avoid allocating per call; use `reverseAccess()` to supply a singleton for the opposite endianness (`Access.java:273-344`). +* `UnsafeAccess` exposes heap arrays, off-heap memory, and even raw addresses when passed a `null` base plus an absolute offset. +Alternative `Access` implementations should document whether they permit null bases or require range checks (`UnsafeAccess.java:50-111`). + +=== Result Buffer Handling + +* `LongTupleHashFunction.hash*(…, long[] result)` requires a pre-sized buffer created via `newResultArray()`. +The method throws `NullPointerException` for null buffers and `IllegalArgumentException` for undersized buffers; the helper checks are centralised in `DualHashFunction` (`DualHashFunction.java:12-74`). +* The allocation-free path is only honoured when callers reuse buffers. +The overloads that return `long[]` will always allocate exactly one new array per call by design (`LongTupleHashFunction.java:70-118`). + +=== Seed and Determinism Rules + +* Seeded factory methods (for example `LongHashFunction.xx(long seed)`) embed the seed inside immutable instances. +The seed influences the entire mixing pipeline, so equality tests must compare the resulting hash values rather than object identity (`LongHashFunction.java:199-282`, `XxHash.java:117-196`). +* Serialization preserves the chosen algorithm and seed. +Several implementations expose singleton seedless instances via `readResolve`, ensuring deserialisation maintains canonical objects where applicable (`XxHash.java:104-116`, `MetroHash.java:95-114`, `WyHash.java:116-138`). + +=== String Handling + +* `hashChars` and `hash(CharSequence…)` delegate to `Util.VALID_STRING_HASH`, which inspects the running JVM to choose the correct memory layout strategy. +Altering char sequence hashing must preserve this runtime detection, or mixed HotSpot/OpenJ9 estates will diverge (`Util.java:29-63`, `ModernCompactStringHash.java`, `ModernHotSpotStringHash.java`, `HotSpotPrior7u6StringHash.java`). +* Latin-1 compact strings are read through `CompactLatin1CharSequenceAccess`, which reinterprets the backing `byte[]` without allocating. +Any change to string support must maintain zero-allocation access for both UTF-16 and compact encodings (`CompactLatin1CharSequenceAccess.java`). + +=== Array and Buffer Offsets + +* Methods that accept `byte[]` plus `off` and `len` use `Util.checkArrayOffs` for bounds validation. +Negative lengths or offsets, or slices that extend past the array end, raise `IndexOutOfBoundsException` immediately (`Util.java:70-77`, `LongHashFunction.java:480-547`). +* ByteBuffer hashing honours the buffer’s position, limit, and order. +The implementation temporarily adjusts `Buffer` state to satisfy IBM JDK 7 quirks, then restores the original markers (`LongHashFunction.java:392-470`, `LongHashFunctionTest.java:120-176`). + +=== Thread Safety + +* All public hash function instances are effectively stateless after construction. +Concurrent calls share read-only tables (for example, the XXH3 secret) and do not mutate internal fields, so the objects are safe to reuse across threads. +Callers remain responsible for protecting the input they provide (`XXH3.java`, `XxHash.java`, `CityAndFarmHash_1_1.java`). diff --git a/src/main/docs/performance-benchmarks.adoc b/src/main/docs/performance-benchmarks.adoc new file mode 100644 index 0000000..a3121a4 --- /dev/null +++ b/src/main/docs/performance-benchmarks.adoc @@ -0,0 +1,30 @@ +== Performance Benchmarks + +Chronicle Software + +toc::[] + +=== Current Baseline + +* The published README table reports throughput (GB/s) and bootstrap latency (ns) per algorithm. +MetroHash and wyHash entries are currently marked TODO pending refreshed measurements (`README.adoc:53-66`). +* No automated benchmarking harness ships with the repository today. +Bench figures are captured offline and copied into the documentation. + +=== Measurement Protocol + +* Build the library in release mode (`mvn -q verify`) before running any benchmarks to ensure all classes are compiled and tests have passed. +* Pin tests to an isolated core and disable extraneous system load; past measurements targeted Intel Core i7-4870HQ @ 2.50 GHz, and new numbers should quote the exact CPU model and frequency. +* Use a harness that exercises each `LongHashFunction` via the public API (for example, `hashBytes(byte[])`) over representative buffer sizes. +Warm up the JVM until results stabilise before sampling throughput. +* Record: +** *Speed (GB/s):* sustained throughput while hashing large buffers in steady state. +** *Bootstrap (ns):* per-call overhead measured on very small inputs (e.g., empty or sub-cache-line data). +* Capture both native and non-native endianness where practical; conversion overhead on big-endian systems should be noted if it materially deviates from little-endian results. + +=== Reporting Guidelines + +* Update `README.adoc` with the latest benchmark table, noting the hardware, JVM version, and any JVM flags used during the run. +* Flag missing data as TODO (as done for MetroHash and wyHash) rather than leaving stale values in place. +* When an optimisation changes algorithm behaviour or memory access, include before/after data in the pull request description so reviewers can confirm the impact. +* Archive raw benchmark logs alongside the pull request or link to them from the decision log to ease future regression analysis. diff --git a/src/main/docs/specifications.adoc b/src/main/docs/specifications.adoc new file mode 100644 index 0000000..2b00814 --- /dev/null +++ b/src/main/docs/specifications.adoc @@ -0,0 +1,49 @@ +== Zero-Allocation Hashing Specification + +Chronicle Software + +toc::[] + +=== DOC-001 Scope + +* Provides zero-allocation hashing utilities for byte-oriented inputs in Java. +* Focuses on deterministic, cross-platform hash outputs across little- and big-endian architectures. +* Excludes POJO graph hashing and streaming transforms that require reallocating data. + +=== FN-101 Functional Requirements + +* Expose hashing entry points for `long` and `long[]` return types covering 64-bit and 128-bit output families. +* Support primitive arrays, `ByteBuffer`, direct memory regions, and `CharSequence` inputs without intermediate object creation. +* Offer algorithm selectors that remain stable across releases to preserve backward compatibility for stored digests. +* Provide predictable hashing irrespective of JVM byte order; the API must normalise endianness internally. + +=== NF-P-201 Performance Characteristics + +* Hashing calls must avoid heap allocations during steady-state use; initial static initialisation may allocate. +* Hot-path hashing should remain branch-light to minimise CPU misprediction on modern x86 and ARM cores. +* Benchmark coverage to include `xxHash`, `FarmHash na`, `FarmHash uo`, `CityHash`, `MurmurHash3`, `MetroHash`, and `wyHash`. +* TODO Gather refreshed MetroHash and wyHash throughput and bootstrap metrics (tracked in https://github.com/OpenHFT/Zero-Allocation-Hashing/issues/28[]). + +=== NF-O-301 Operability Expectations + +* Library compiles with Maven using `mvn -q verify` on JDK 8 through JDK 21. +* Maintain Apache 2.0 licencing headers across source and documentation artefacts. +* Provide published Javadoc via https://javadoc.io/doc/net.openhft/zero-allocation-hashing/latest[] that reflects the released API surface. + +=== TEST-401 Quality Assurance + +* Unit tests must validate hashing consistency against known-good vectors for each algorithm. +* Cross-endian verification to compare outputs between little-endian and big-endian environments. +* Regression suites should cover null handling, bounds checking, and alignment-sensitive code paths. + +=== DOC-501 Documentation Obligations + +* Keep `README.adoc` aligned with the latest release version and supported JDK matrix. +* Update this specification whenever algorithms, performance guarantees, or platform support change. +* Record notable decisions in the project decision log with appropriate Nine-Box tags. + +=== OPS-601 Release Checklist + +* Confirm `mvn -q verify` passes on the targeted JDK matrix prior to tagging a release. +* Ensure SonarCloud analysis is green before publishing artefacts. +* Publish release notes summarising changes, performance impacts, and compatibility considerations. diff --git a/src/main/docs/testing-strategy.adoc b/src/main/docs/testing-strategy.adoc new file mode 100644 index 0000000..8f277bc --- /dev/null +++ b/src/main/docs/testing-strategy.adoc @@ -0,0 +1,37 @@ +== Testing Strategy +:pp: ++ + +Chronicle Software + +toc::[] + +=== Regression Coverage + +* `LongHashFunctionTest.test` is the canonical harness for verifying that an algorithm produces identical values across the entire API surface (primitives, arrays, buffers, `Access`-backed inputs, and direct memory). +All algorithm-specific tests delegate to this helper when checking new vectors (`LongHashFunctionTest.java:23-189`). +* Tuple variants reuse the same assertions through `LongTupleHashFunctionTest` (invoked transitively), ensuring the 128-bit paths remain aligned with their 64-bit projections. + +=== Reference Vectors + +* CityHash and FarmHash tests (`City64_1_1_Test`, `FarmHashTest`, `OriginalFarmHashTest`) replay the official C{pp} outputs for inputs up to 1024 bytes with deterministic seeds. +The source programs used to generate those vectors are embedded in each test for reproducibility. +* XXH64 (`XxHashTest`), XXH3 (`XXH3Test`), and XXH128 (`XXH128Test`) load tables generated by the upstream xxHash project, covering lengths up to two full XXH3 blocks. +* MetroHash (`MetroHashTest`) and wyHash (`WyHashTest`) follow the same pattern with public reference implementations, ensuring the adapted Java code remains bit-for-bit compatible. +* Collision and edge case tests (`XxHashCollisionTest`, `OriginalFarmHashTest`) target previously reported defects and must continue to pass to guard against regressions. + +=== Endianness Validation + +* Every algorithm test is executed under both native and non-native byte orders by virtue of the `LongHashFunctionTest` helper, which rewrites `ByteBuffer` order and exercises the `Access.byteOrder` adaptation code paths. +* Dedicated tests such as `LongHashFunctionTest.testNegativePrimitives` confirm that signed primitive hashes match the equivalent unsigned byte sequences, catching regressions in `Primitives.nativeToLittleEndian`. + +=== Adding or Modifying Algorithms + +. Generate authoritative reference hashes using the upstream implementation (the existing tests include example C or C{pp} snippets). +. Create or extend a parameterised JUnit test that iterates over message lengths (the standard pattern covers lengths `[0, N]` with `N` at least 1024). +. Feed the new vectors through `LongHashFunctionTest.test` (or the tuple equivalent) to validate all API entry points. +. Augment `LongHashFunctionTest` if the algorithm exposes new surface area (for example, additional primitive types or new `Access` strategies). + +=== Build and Verification + +* Run `mvn -q verify` from the repository root before publishing changes; the goal invokes unit tests under the appropriate JVM profiles and ensures multi-version compatibility. +* Include new regression cases alongside the code change so future automated runs catch behavioural drift without manual intervention. diff --git a/src/main/docs/unsafe-and-platform-notes.adoc b/src/main/docs/unsafe-and-platform-notes.adoc new file mode 100644 index 0000000..f14fbfc --- /dev/null +++ b/src/main/docs/unsafe-and-platform-notes.adoc @@ -0,0 +1,33 @@ +== Unsafe and Platform Notes + +Chronicle Software + +toc::[] + +=== Internal API Usage + +* `UnsafeAccess` reflects on `sun.misc.Unsafe.theUnsafe` to obtain the singleton and uses it for raw array access, field offsets, and direct memory loads (`UnsafeAccess.java:40-118`). +This keeps hashing allocation-free but depends on the `jdk.unsupported` module. +* Direct buffer handling casts to `sun.nio.ch.DirectBuffer` to retrieve native addresses for `hashBytes(ByteBuffer)` and `hashMemory` (`Util.java:65-68`, `LongHashFunction.java:430-470`). +The code path assumes the buffer is direct; heap buffers are copied through array access instead. +* String hashing inspects `java.lang.String.value` and, on compact-string VMs, treats the backing `byte[]` as Latin-1 using `CompactLatin1CharSequenceAccess` (`ModernCompactStringHash.java:10-62`). +Older HotSpot builds and other JVMs fall back to UTF-16 behaviour through `ModernHotSpotStringHash` or `HotSpotPrior7u6StringHash`. + +=== JVM Configuration + +* On Java 9 and newer, strong encapsulation may block reflective access to the JDK internals above. +Add the following opens/exports when running with a strict module layer: +** `--add-opens java.base/sun.misc=ALL-UNNAMED` (access to `Unsafe.theUnsafe`). +** `--add-exports java.base/sun.nio.ch=ALL-UNNAMED` (casts to `DirectBuffer`). +** `--add-opens java.base/java.lang=ALL-UNNAMED` (reading `String.value` for compact strings). +* The library does not automatically request these permissions. +Applications embedding Zero-Allocation Hashing must configure the JVM command line or module descriptors accordingly. + +=== Platform Behaviour + +* All algorithms canonicalise multi-byte reads to little-endian using `Access.byteOrder` and `Primitives.nativeToLittleEndian`. +Hash outputs therefore match across architectures, but big-endian CPUs incur additional byte swap overhead (see `XxHash.java:14-19`, `XXH3.java:23-28`, `WyHash.java:9-15`). +* When targeting non-HotSpot JVMs, `Util.VALID_STRING_HASH` falls back to a conservative implementation (`UnknownJvmStringHash`) if the VM name or version is unrecognised. +This ensures correctness at the cost of potential performance; review the decision log before altering the detection logic (`Util.java:29-63`). +* Passing invalid pointers to `hashMemory` or misreporting `Access` byte order is undefined and can crash the JVM. +Always validate foreign memory addresses and keep access strategies consistent with the underlying storage layout. diff --git a/src/main/java/net/openhft/hashing/Access.java b/src/main/java/net/openhft/hashing/Access.java index a158f81..7fcc5d9 100644 --- a/src/main/java/net/openhft/hashing/Access.java +++ b/src/main/java/net/openhft/hashing/Access.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/ByteBufferAccess.java b/src/main/java/net/openhft/hashing/ByteBufferAccess.java index 05dd0fa..4e726d6 100644 --- a/src/main/java/net/openhft/hashing/ByteBufferAccess.java +++ b/src/main/java/net/openhft/hashing/ByteBufferAccess.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/CharSequenceAccess.java b/src/main/java/net/openhft/hashing/CharSequenceAccess.java index c13c215..81660a7 100644 --- a/src/main/java/net/openhft/hashing/CharSequenceAccess.java +++ b/src/main/java/net/openhft/hashing/CharSequenceAccess.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/CityAndFarmHash_1_1.java b/src/main/java/net/openhft/hashing/CityAndFarmHash_1_1.java index cb94df8..1d16b5a 100644 --- a/src/main/java/net/openhft/hashing/CityAndFarmHash_1_1.java +++ b/src/main/java/net/openhft/hashing/CityAndFarmHash_1_1.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/HotSpotPrior7u6StringHash.java b/src/main/java/net/openhft/hashing/HotSpotPrior7u6StringHash.java index 67b6043..3473a3b 100644 --- a/src/main/java/net/openhft/hashing/HotSpotPrior7u6StringHash.java +++ b/src/main/java/net/openhft/hashing/HotSpotPrior7u6StringHash.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/LongHashFunction.java b/src/main/java/net/openhft/hashing/LongHashFunction.java index 570d5e3..2ffe73b 100644 --- a/src/main/java/net/openhft/hashing/LongHashFunction.java +++ b/src/main/java/net/openhft/hashing/LongHashFunction.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -493,7 +493,7 @@ protected LongHashFunction() { public abstract long hash(T input, Access access, long off, long len); private long unsafeHash(Object input, long off, long len) { - return hash(input, UnsafeAccess.INSTANCE, off, len); + return hash(input, INSTANCE, off, len); } /** diff --git a/src/main/java/net/openhft/hashing/LongTupleHashFunction.java b/src/main/java/net/openhft/hashing/LongTupleHashFunction.java index e33e3c4..6b1b6f3 100644 --- a/src/main/java/net/openhft/hashing/LongTupleHashFunction.java +++ b/src/main/java/net/openhft/hashing/LongTupleHashFunction.java @@ -974,7 +974,7 @@ public long[] hashLongs(final long[] input, final int off, final int len) { // Internal helper // @NotNull - private static final Access OBJECT_ACCESS = UnsafeAccess.INSTANCE; + private static final Access OBJECT_ACCESS = INSTANCE; @NotNull private static final Access CHAR_SEQ_ACCESS = nativeCharSequenceAccess(); @NotNull diff --git a/src/main/java/net/openhft/hashing/MetroHash.java b/src/main/java/net/openhft/hashing/MetroHash.java index aa4e7f3..f3f07ed 100644 --- a/src/main/java/net/openhft/hashing/MetroHash.java +++ b/src/main/java/net/openhft/hashing/MetroHash.java @@ -34,10 +34,10 @@ static long metroHash64(long seed, T input, Access access, long off, long remaining -= 32; } while (remaining >= 32); - v2 ^= Long.rotateRight(((v0 + v3) * k0) + v1, 37) * k1; - v3 ^= Long.rotateRight(((v1 + v2) * k1) + v0, 37) * k0; - v0 ^= Long.rotateRight(((v0 + v2) * k0) + v3, 37) * k1; - v1 ^= Long.rotateRight(((v1 + v3) * k1) + v2, 37) * k0; + v2 ^= Long.rotateRight((v0 + v3) * k0 + v1, 37) * k1; + v3 ^= Long.rotateRight((v1 + v2) * k1 + v0, 37) * k0; + v0 ^= Long.rotateRight((v0 + v2) * k0 + v3, 37) * k1; + v1 ^= Long.rotateRight((v1 + v3) * k1 + v2, 37) * k0; h += v0 ^ v1; } diff --git a/src/main/java/net/openhft/hashing/ModernCompactStringHash.java b/src/main/java/net/openhft/hashing/ModernCompactStringHash.java index ac30884..de613b7 100644 --- a/src/main/java/net/openhft/hashing/ModernCompactStringHash.java +++ b/src/main/java/net/openhft/hashing/ModernCompactStringHash.java @@ -20,7 +20,7 @@ enum ModernCompactStringHash implements StringHash { valueOffset = UnsafeAccess.UNSAFE.objectFieldOffset(valueField); final byte[] value = (byte[]) UnsafeAccess.UNSAFE.getObject("A", valueOffset); - enableCompactStrings = (1 == value.length); + enableCompactStrings = value.length == 1; } catch (final NoSuchFieldException e) { throw new AssertionError(e); } diff --git a/src/main/java/net/openhft/hashing/ModernHotSpotStringHash.java b/src/main/java/net/openhft/hashing/ModernHotSpotStringHash.java index b5d71fa..250431c 100644 --- a/src/main/java/net/openhft/hashing/ModernHotSpotStringHash.java +++ b/src/main/java/net/openhft/hashing/ModernHotSpotStringHash.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/MurmurHash_3.java b/src/main/java/net/openhft/hashing/MurmurHash_3.java index 65a6b37..359923a 100644 --- a/src/main/java/net/openhft/hashing/MurmurHash_3.java +++ b/src/main/java/net/openhft/hashing/MurmurHash_3.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,37 +61,37 @@ private static long hash(long seed, @Nullable T input, Access access, lon long k2 = 0L; switch ((int) remaining) { case 15: - k2 ^= ((long) access.u8(input, offset + 14L)) << 48;// fall through + k2 ^= (long) access.u8(input, offset + 14L) << 48;// fall through case 14: - k2 ^= ((long) access.u8(input, offset + 13L)) << 40;// fall through + k2 ^= (long) access.u8(input, offset + 13L) << 40;// fall through case 13: - k2 ^= ((long) access.u8(input, offset + 12L)) << 32;// fall through + k2 ^= (long) access.u8(input, offset + 12L) << 32;// fall through case 12: - k2 ^= ((long) access.u8(input, offset + 11L)) << 24;// fall through + k2 ^= (long) access.u8(input, offset + 11L) << 24;// fall through case 11: - k2 ^= ((long) access.u8(input, offset + 10L)) << 16;// fall through + k2 ^= (long) access.u8(input, offset + 10L) << 16;// fall through case 10: - k2 ^= ((long) access.u8(input, offset + 9L)) << 8; // fall through + k2 ^= (long) access.u8(input, offset + 9L) << 8; // fall through case 9: - k2 ^= ((long) access.u8(input, offset + 8L)); // fall through + k2 ^= (long) access.u8(input, offset + 8L); // fall through case 8: k1 ^= access.i64(input, offset); break; case 7: - k1 ^= ((long) access.u8(input, offset + 6L)) << 48; // fall through + k1 ^= (long) access.u8(input, offset + 6L) << 48; // fall through case 6: - k1 ^= ((long) access.u8(input, offset + 5L)) << 40; // fall through + k1 ^= (long) access.u8(input, offset + 5L) << 40; // fall through case 5: - k1 ^= ((long) access.u8(input, offset + 4L)) << 32; // fall through + k1 ^= (long) access.u8(input, offset + 4L) << 32; // fall through case 4: k1 ^= access.u32(input, offset); break; case 3: - k1 ^= ((long) access.u8(input, offset + 2L)) << 16; // fall through + k1 ^= (long) access.u8(input, offset + 2L) << 16; // fall through case 2: - k1 ^= ((long) access.u8(input, offset + 1L)) << 8; // fall through + k1 ^= (long) access.u8(input, offset + 1L) << 8; // fall through case 1: - k1 ^= ((long) access.u8(input, offset)); + k1 ^= (long) access.u8(input, offset); case 0: break; default: @@ -116,43 +116,43 @@ private static long hash(long seed, @Nullable T input, Access access, lon // { // switch ((int) remaining) { // case 15: -// k2 ^= ((long) access.u8(input, offset + 14L)) << 48; +// k2 ^= (long) access.u8(input, offset + 14L) << 48; // case 14: // k2 ^= ((long) Primitives.nativeToLittleEndian( -// access.u16(input, offset + 12L))) << 32; +// access.u16(input, offset + 12L)) << 32; // break fetch8_11; // case 13: -// k2 ^= ((long) access.u8(input, offset + 12L)) << 32; +// k2 ^= (long) access.u8(input, offset + 12L) << 32; // case 12: // break fetch8_11; // case 11: -// k2 ^= ((long) access.u8(input, offset + 10L)) << 16; +// k2 ^= (long) access.u8(input, offset + 10L) << 16; // case 10: // k2 ^= (long) Primitives.nativeToLittleEndian( -// access.u16(input, offset + 8L)); +// access.u16(input, offset + 8L); // break fetch0_7; // case 9: -// k2 ^= ((long) access.u8(input, offset + 8L)); +// k2 ^= (long) access.u8(input, offset + 8L); // case 8: // break fetch0_7; // case 7: -// k1 ^= ((long) access.u8(input, offset + 6L)) << 48; +// k1 ^= (long) access.u8(input, offset + 6L) << 48; // case 6: // k1 ^= ((long) Primitives.nativeToLittleEndian( -// access.u16(input, offset + 4L))) << 32; +// access.u16(input, offset + 4L)) << 32; // break fetch0_3; // case 5: -// k1 ^= ((long) access.u8(input, offset + 4L)) << 32; +// k1 ^= (long) access.u8(input, offset + 4L) << 32; // case 4: // break fetch0_3; // case 3: -// k1 ^= ((long) access.u8(input, offset + 2L)) << 16; +// k1 ^= (long) access.u8(input, offset + 2L) << 16; // case 2: // k1 ^= (long) Primitives.nativeToLittleEndian( -// access.u16(input, offset)); +// access.u16(input, offset); // break megaSwitch; // case 1: -// k1 ^= ((long) access.u8(input, offset)); +// k1 ^= (long) access.u8(input, offset); // break megaSwitch; // default: // throw new AssertionError(); diff --git a/src/main/java/net/openhft/hashing/Primitives.java b/src/main/java/net/openhft/hashing/Primitives.java index 3ef96a7..6d0146c 100644 --- a/src/main/java/net/openhft/hashing/Primitives.java +++ b/src/main/java/net/openhft/hashing/Primitives.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/StringHash.java b/src/main/java/net/openhft/hashing/StringHash.java index 0137eac..12e8e54 100644 --- a/src/main/java/net/openhft/hashing/StringHash.java +++ b/src/main/java/net/openhft/hashing/StringHash.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/UnknownJvmStringHash.java b/src/main/java/net/openhft/hashing/UnknownJvmStringHash.java index 2e19f68..86b9762 100644 --- a/src/main/java/net/openhft/hashing/UnknownJvmStringHash.java +++ b/src/main/java/net/openhft/hashing/UnknownJvmStringHash.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/UnsafeAccess.java b/src/main/java/net/openhft/hashing/UnsafeAccess.java index 3197cce..b0a9566 100644 --- a/src/main/java/net/openhft/hashing/UnsafeAccess.java +++ b/src/main/java/net/openhft/hashing/UnsafeAccess.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/Util.java b/src/main/java/net/openhft/hashing/Util.java index 8d55607..7dc5ea9 100644 --- a/src/main/java/net/openhft/hashing/Util.java +++ b/src/main/java/net/openhft/hashing/Util.java @@ -55,6 +55,7 @@ static private boolean isZing(@NotNull final String name) { stringHash = HotSpotPrior7u6StringHash.INSTANCE; } } catch (final Throwable ignore) { + // Fallback handled in finally } finally { if (null == stringHash) { VALID_STRING_HASH = UnknownJvmStringHash.INSTANCE; diff --git a/src/main/java/net/openhft/hashing/WyHash.java b/src/main/java/net/openhft/hashing/WyHash.java index eaefb66..c99850b 100644 --- a/src/main/java/net/openhft/hashing/WyHash.java +++ b/src/main/java/net/openhft/hashing/WyHash.java @@ -158,7 +158,7 @@ public long hashLong(long input) { @Override public long hashInt(int input) { input = Primitives.nativeToLittleEndian(input); - long longInput = (input & 0xFFFFFFFFL); + long longInput = input & 0xFFFFFFFFL; return _wymum(_wymum(longInput ^ seed() ^ _wyp0, longInput ^ seed() ^ _wyp1) ^ seed(), 4 ^ _wyp4); diff --git a/src/main/java/net/openhft/hashing/XXH3.java b/src/main/java/net/openhft/hashing/XXH3.java index 88d9851..d1dab87 100644 --- a/src/main/java/net/openhft/hashing/XXH3.java +++ b/src/main/java/net/openhft/hashing/XXH3.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/XxHash.java b/src/main/java/net/openhft/hashing/XxHash.java index 6c3972f..60f9399 100644 --- a/src/main/java/net/openhft/hashing/XxHash.java +++ b/src/main/java/net/openhft/hashing/XxHash.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/net/openhft/hashing/package-info.java b/src/main/java/net/openhft/hashing/package-info.java index 16d964c..f74b7e6 100644 --- a/src/main/java/net/openhft/hashing/package-info.java +++ b/src/main/java/net/openhft/hashing/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/ByteBufferAccessTest.java b/src/test/java/net/openhft/hashing/ByteBufferAccessTest.java new file mode 100644 index 0000000..0ffb673 --- /dev/null +++ b/src/test/java/net/openhft/hashing/ByteBufferAccessTest.java @@ -0,0 +1,66 @@ +/* + * Copyright 2014-2025 chronicle.software + */ +package net.openhft.hashing; + +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static java.nio.ByteOrder.BIG_ENDIAN; +import static java.nio.ByteOrder.LITTLE_ENDIAN; +import static org.junit.Assert.assertEquals; + +public class ByteBufferAccessTest { + + private static final byte[] SAMPLE = { + (byte) 0x12, (byte) 0x34, (byte) 0x56, (byte) 0x78, + (byte) 0x9A, (byte) 0xBC, (byte) 0xDE, (byte) 0xF0 + }; + + @Test + public void littleEndianAccessReadsExpectedValues() { + ByteBuffer buffer = ByteBuffer.wrap(SAMPLE).order(LITTLE_ENDIAN); + ByteBufferAccess access = ByteBufferAccess.INSTANCE; + + assertEquals(ByteOrder.LITTLE_ENDIAN, access.byteOrder(buffer)); + assertEquals(0xF0DEBC9A78563412L, access.getLong(buffer, 0)); + assertEquals(0xF0DEBC9AL, access.getUnsignedInt(buffer, 4)); + assertEquals(0x78563412, access.getInt(buffer, 0)); + assertEquals(0xBC9A, access.getUnsignedShort(buffer, 4)); + assertEquals(0x5634, access.getShort(buffer, 1)); + assertEquals(0xDE, access.getUnsignedByte(buffer, 6)); + assertEquals(-68, access.getByte(buffer, 5)); + } + + @Test + public void bigEndianAccessReadsExpectedValues() { + ByteBuffer buffer = ByteBuffer.wrap(SAMPLE).order(BIG_ENDIAN); + ByteBufferAccess access = ByteBufferAccess.INSTANCE; + + assertEquals(ByteOrder.BIG_ENDIAN, access.byteOrder(buffer)); + assertEquals(0x123456789ABCDEF0L, access.getLong(buffer, 0)); + assertEquals(0x12345678L, access.getUnsignedInt(buffer, 0)); + assertEquals((int) 0x9ABCDEF0L, access.getInt(buffer, 4)); + assertEquals(0x5678, access.getUnsignedShort(buffer, 2)); + assertEquals(0x789A, access.getShort(buffer, 3)); + assertEquals(0x34, access.getUnsignedByte(buffer, 1)); + assertEquals(-102, access.getByte(buffer, 4)); + } + + @Test + public void reverseAccessFlipsByteOrder() { + ByteBuffer buffer = ByteBuffer.wrap(SAMPLE).order(LITTLE_ENDIAN); + Access reverse = ByteBufferAccess.INSTANCE.reverseAccess(); + + assertEquals(ByteOrder.BIG_ENDIAN, reverse.byteOrder(buffer)); + assertEquals(0x123456789ABCDEF0L, reverse.getLong(buffer, 0)); + assertEquals(0x12345678L, reverse.getUnsignedInt(buffer, 0)); + assertEquals((int) 0x9ABCDEF0L, reverse.getInt(buffer, 4)); + assertEquals(0x5678, reverse.getUnsignedShort(buffer, 2)); + assertEquals(0x789A, reverse.getShort(buffer, 3)); + assertEquals(0x34, reverse.getUnsignedByte(buffer, 1)); + assertEquals(-102, reverse.getByte(buffer, 4)); + } +} diff --git a/src/test/java/net/openhft/hashing/City64_1_1_Test.java b/src/test/java/net/openhft/hashing/City64_1_1_Test.java index 32b8416..d13e751 100644 --- a/src/test/java/net/openhft/hashing/City64_1_1_Test.java +++ b/src/test/java/net/openhft/hashing/City64_1_1_Test.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/DualHashFunctionTest.java b/src/test/java/net/openhft/hashing/DualHashFunctionTest.java new file mode 100644 index 0000000..c22e04a --- /dev/null +++ b/src/test/java/net/openhft/hashing/DualHashFunctionTest.java @@ -0,0 +1,32 @@ +/* + * Copyright 2014-2025 chronicle.software + */ +package net.openhft.hashing; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class DualHashFunctionTest { + + @Test(expected = IllegalArgumentException.class) + public void hashLongRejectsTooSmallResultArray() { + LongTupleHashFunction tuple = XXH3.asLongTupleHashFunctionWithoutSeed(); + tuple.hashLong(17L, new long[0]); + } + + @Test + public void longHashViewMatchesTupleFirstWord() { + LongTupleHashFunction tuple = XXH3.asLongTupleHashFunctionWithoutSeed(); + long value = 123456789L; + long[] viaAllocation = tuple.hashLong(value); + + long[] reuse = new long[viaAllocation.length]; + tuple.hashLong(value, reuse); + + assertArrayEquals(viaAllocation, reuse); + long asLong = ((DualHashFunction) tuple).asLongHashFunction().hashLong(value); + assertEquals(viaAllocation[0], asLong); + } +} diff --git a/src/test/java/net/openhft/hashing/FarmHashTest.java b/src/test/java/net/openhft/hashing/FarmHashTest.java index e4c4eee..1103db9 100644 --- a/src/test/java/net/openhft/hashing/FarmHashTest.java +++ b/src/test/java/net/openhft/hashing/FarmHashTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/LongHashFunctionTest.java b/src/test/java/net/openhft/hashing/LongHashFunctionTest.java index 70f2664..45d1bce 100644 --- a/src/test/java/net/openhft/hashing/LongHashFunctionTest.java +++ b/src/test/java/net/openhft/hashing/LongHashFunctionTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ package net.openhft.hashing; +import org.junit.Test; + import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -222,4 +224,51 @@ private static void testLatin1String(LongHashFunction f, byte[] data) { fail(e.toString()); } } + + @Test + public void hashBooleansSliceMatchesUnsafeAccess() { + LongHashFunction f = LongHashFunction.city_1_1(); + boolean[] data = {true, false, true, true, false}; + long expected = f.hash(data, UnsafeAccess.unsafe(), UnsafeAccess.BOOLEAN_BASE + 1, 3); + long actual = f.hashBooleans(data, 1, 3); + assertEquals(expected, actual); + try { + f.hashBooleans(data, 3, 5); + fail("Expected IndexOutOfBoundsException for invalid boolean slice"); + } catch (IndexOutOfBoundsException expectedException) { + // expected + } + } + + @Test + public void hashBytesSupportsDirectAndReadOnlyBuffers() { + LongHashFunction f = LongHashFunction.city_1_1(); + byte[] source = {0, 1, 2, 3, 4, 5}; + long expected = f.hashBytes(source, 1, 4); + + ByteBuffer direct = ByteBuffer.allocateDirect(source.length); + direct.put(source); + ((Buffer) direct).flip(); + long directHash = f.hashBytes(direct, 1, 4); + assertEquals(expected, directHash); + + ByteBuffer readOnly = ByteBuffer.wrap(source).asReadOnlyBuffer(); + long readOnlyHash = f.hashBytes(readOnly, 1, 4); + assertEquals(expected, readOnlyHash); + } + + @Test + public void hashCharsSliceMatchesCharArray() { + LongHashFunction f = LongHashFunction.city_1_1(); + String sample = "abcdef"; + long expected = f.hashChars(sample.toCharArray(), 1, 3); + long actual = f.hashChars(sample, 1, 3); + assertEquals(expected, actual); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void hashCharsRejectsInvalidSlice() { + LongHashFunction f = LongHashFunction.city_1_1(); + f.hashChars("abc", 2, 2); + } } diff --git a/src/test/java/net/openhft/hashing/ModernCompactStringHashTest.java b/src/test/java/net/openhft/hashing/ModernCompactStringHashTest.java new file mode 100644 index 0000000..ec516f1 --- /dev/null +++ b/src/test/java/net/openhft/hashing/ModernCompactStringHashTest.java @@ -0,0 +1,238 @@ +/* + * Copyright 2014-2025 chronicle.software + */ +package net.openhft.hashing; + +import org.junit.Assume; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class ModernCompactStringHashTest { + + private static boolean isCompactStringsVm() { + return System.getProperty("java.version").compareTo("1.9") >= 0; + } + + private static boolean compactStringsEnabled() { + if (!isCompactStringsVm()) { + return false; + } + try { + java.lang.reflect.Field field = ModernCompactStringHash.class.getDeclaredField("enableCompactStrings"); + field.setAccessible(true); + return field.getBoolean(null); + } catch (ReflectiveOperationException e) { + throw new AssertionError(e); + } + } + + private static boolean isCompactLatin1(String s) { + if (!compactStringsEnabled()) { + return false; + } + try { + java.lang.reflect.Field offsetField = ModernCompactStringHash.class.getDeclaredField("valueOffset"); + offsetField.setAccessible(true); + long valueOffset = offsetField.getLong(null); + Object value = UnsafeAccess.UNSAFE.getObject(s, valueOffset); + return value instanceof byte[] && ((byte[]) value).length == s.length(); + } catch (ReflectiveOperationException e) { + throw new AssertionError(e); + } + } + + @Test + public void longHashReturnsVoidWhenLenIsZero() { + Assume.assumeTrue(isCompactLatin1("Cafe")); + RecordingLongHashFunction hash = new RecordingLongHashFunction(); + + long actual = ModernCompactStringHash.INSTANCE.longHash("abc", hash, 0, 0); + + assertTrue(hash.hashVoidCalled); + assertEquals(RecordingLongHashFunction.HASH_VOID_RESULT, actual); + assertEquals(0L, hash.lastLength); + } + + @Test + public void longHashUsesCompactAccessForLatin1() { + Assume.assumeTrue(isCompactLatin1("Cafe")); + RecordingLongHashFunction hash = new RecordingLongHashFunction(); + + long actual = ModernCompactStringHash.INSTANCE.longHash("Cafe", hash, 1, 2); + + Assume.assumeTrue("Compact string path not active", hash.lastAccess == CompactLatin1CharSequenceAccess.INSTANCE); + assertEquals(RecordingLongHashFunction.HASH_RESULT, actual); + assertEquals(2L, hash.lastOffset); + assertEquals(4L, hash.lastLength); + } + + @Test + public void longHashFallsBackToUnsafeForNonLatin1() { + Assume.assumeTrue(isCompactStringsVm()); + RecordingLongHashFunction hash = new RecordingLongHashFunction(); + + ModernCompactStringHash.INSTANCE.longHash("ab\u0100c", hash, 0, 4); + + assertSame(UnsafeAccess.INSTANCE, hash.lastAccess); + assertEquals(UnsafeAccess.BYTE_BASE, hash.lastOffset); + assertEquals(8L, hash.lastLength); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void longHashValidatesOffsets() { + Assume.assumeTrue(isCompactStringsVm()); + ModernCompactStringHash.INSTANCE.longHash("abc", new RecordingLongHashFunction(), 3, 1); + } + + @Test + public void tupleHashUsesCompactAccessForLatin1() { + Assume.assumeTrue(isCompactStringsVm()); + RecordingLongTupleHashFunction hash = new RecordingLongTupleHashFunction(); + long[] out = new long[hash.newResultArray().length]; + + ModernCompactStringHash.INSTANCE.hash("Cafe", hash, 1, 2, out); + + Assume.assumeTrue("Compact string path not active", hash.lastAccess == CompactLatin1CharSequenceAccess.INSTANCE); + assertEquals(2L, hash.lastOffset); + assertEquals(4L, hash.lastLength); + assertEquals(RecordingLongTupleHashFunction.RESULT_VALUE, out[0]); + } + + @Test + public void tupleHashReturnsVoidWhenLenIsZero() { + Assume.assumeTrue(isCompactStringsVm()); + RecordingLongTupleHashFunction hash = new RecordingLongTupleHashFunction(); + long[] out = new long[hash.newResultArray().length]; + + ModernCompactStringHash.INSTANCE.hash("abc", hash, 0, 0, out); + + assertTrue(hash.hashVoidCalled); + assertEquals(RecordingLongTupleHashFunction.VOID_RESULT_VALUE, out[0]); + } + + @Test + public void tupleHashFallsBackToUnsafeForNonLatin1() { + Assume.assumeTrue(isCompactStringsVm()); + RecordingLongTupleHashFunction hash = new RecordingLongTupleHashFunction(); + long[] out = new long[hash.newResultArray().length]; + + ModernCompactStringHash.INSTANCE.hash("ab\u0100c", hash, 0, 4, out); + + assertSame(UnsafeAccess.INSTANCE, hash.lastAccess); + assertEquals(UnsafeAccess.BYTE_BASE, hash.lastOffset); + assertEquals(8L, hash.lastLength); + } + + private static final class RecordingLongHashFunction extends LongHashFunction { + static final long HASH_RESULT = 0x1234L; + static final long HASH_VOID_RESULT = 0x5678L; + + Access lastAccess; + long lastOffset; + long lastLength; + boolean hashVoidCalled; + + @Override + public long hashLong(long input) { + throw new UnsupportedOperationException(); + } + + @Override + public long hashInt(int input) { + throw new UnsupportedOperationException(); + } + + @Override + public long hashShort(short input) { + throw new UnsupportedOperationException(); + } + + @Override + public long hashChar(char input) { + throw new UnsupportedOperationException(); + } + + @Override + public long hashByte(byte input) { + throw new UnsupportedOperationException(); + } + + @Override + public long hashVoid() { + hashVoidCalled = true; + lastAccess = null; + lastOffset = 0L; + lastLength = 0L; + return HASH_VOID_RESULT; + } + + @Override + public long hash(T input, Access access, long off, long len) { + lastAccess = access; + lastOffset = off; + lastLength = len; + return HASH_RESULT; + } + } + + private static final class RecordingLongTupleHashFunction extends LongTupleHashFunction { + static final long RESULT_VALUE = 0x2233L; + static final long VOID_RESULT_VALUE = 0x3344L; + + Access lastAccess; + long lastOffset; + long lastLength; + boolean hashVoidCalled; + + @Override + public int bitsLength() { + return 128; + } + + @Override + public void hashLong(long input, long[] result) { + throw new UnsupportedOperationException(); + } + + @Override + public void hashInt(int input, long[] result) { + throw new UnsupportedOperationException(); + } + + @Override + public void hashShort(short input, long[] result) { + throw new UnsupportedOperationException(); + } + + @Override + public void hashChar(char input, long[] result) { + throw new UnsupportedOperationException(); + } + + @Override + public void hashByte(byte input, long[] result) { + throw new UnsupportedOperationException(); + } + + @Override + public void hashVoid(long[] result) { + hashVoidCalled = true; + result[0] = VOID_RESULT_VALUE; + if (result.length > 1) { + result[1] = 0L; + } + } + + @Override + public void hash(T input, Access access, long off, long len, long[] result) { + lastAccess = access; + lastOffset = off; + lastLength = len; + result[0] = RESULT_VALUE; + if (result.length > 1) { + result[1] = 0L; + } + } + } +} diff --git a/src/test/java/net/openhft/hashing/MurmurHash3Test.java b/src/test/java/net/openhft/hashing/MurmurHash3Test.java index ff26bcd..f91d34e 100644 --- a/src/test/java/net/openhft/hashing/MurmurHash3Test.java +++ b/src/test/java/net/openhft/hashing/MurmurHash3Test.java @@ -1,5 +1,5 @@ /* - * Copyright 2014 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2014-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/OriginalFarmHashTest.java b/src/test/java/net/openhft/hashing/OriginalFarmHashTest.java index 3ae50f2..2f09c8d 100644 --- a/src/test/java/net/openhft/hashing/OriginalFarmHashTest.java +++ b/src/test/java/net/openhft/hashing/OriginalFarmHashTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/XXH128Test.java b/src/test/java/net/openhft/hashing/XXH128Test.java index dbc4088..cdaec4b 100644 --- a/src/test/java/net/openhft/hashing/XXH128Test.java +++ b/src/test/java/net/openhft/hashing/XXH128Test.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/XXH3Test.java b/src/test/java/net/openhft/hashing/XXH3Test.java index 97b5eef..a1196fc 100644 --- a/src/test/java/net/openhft/hashing/XXH3Test.java +++ b/src/test/java/net/openhft/hashing/XXH3Test.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/XxHashCollisionTest.java b/src/test/java/net/openhft/hashing/XxHashCollisionTest.java index a98b34f..4e22d1e 100644 --- a/src/test/java/net/openhft/hashing/XxHashCollisionTest.java +++ b/src/test/java/net/openhft/hashing/XxHashCollisionTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/test/java/net/openhft/hashing/XxHashTest.java b/src/test/java/net/openhft/hashing/XxHashTest.java index 720ee28..2c5bd22 100644 --- a/src/test/java/net/openhft/hashing/XxHashTest.java +++ b/src/test/java/net/openhft/hashing/XxHashTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 Higher Frequency Trading http://www.higherfrequencytrading.com + * Copyright 2015-2025 chronicle.software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.