diff --git a/.bazeliskrc b/.bazeliskrc new file mode 100644 index 000000000..f4c188428 --- /dev/null +++ b/.bazeliskrc @@ -0,0 +1 @@ +USE_BAZEL_VERSION=7.4.1 \ No newline at end of file diff --git a/.bazelrc b/.bazelrc index 7ba093994..6792b5cd2 100644 --- a/.bazelrc +++ b/.bazelrc @@ -15,6 +15,10 @@ common --experimental_repo_remote_exec common --experimental_cc_shared_library +common --registry=https://raw.githubusercontent.com/secretflow/bazel-registry/main +common --registry=https://bcr.bazel.build +common --registry=https://baidu.github.io/babylon/registry + # Required by OpenXLA build --nocheck_visibility @@ -64,3 +68,6 @@ build:macos --action_env MACOSX_DEPLOYMENT_TARGET=13.0 build:linux --copt=-fopenmp build:linux --linkopt=-fopenmp + +# NOTE: fix build on linux_aarch64, since `build:linux_aarch64` works only if `--config linux_aarch64` option included. +build:linux --cxxopt=-Wno-mismatched-new-delete diff --git a/.bazelversion b/.bazelversion deleted file mode 100644 index f22d756da..000000000 --- a/.bazelversion +++ /dev/null @@ -1 +0,0 @@ -6.5.0 diff --git a/.circleci/asan-config.yml b/.circleci/asan-config.yml index 901cad5de..fb1237402 100644 --- a/.circleci/asan-config.yml +++ b/.circleci/asan-config.yml @@ -46,10 +46,6 @@ jobs: - run: name: Checkout devtools command: git clone https://github.com/secretflow/devtools.git ../devtools - - run: - name: "Install dependencies" - command: | - python3 -m pip install numpy - run: name: "test" command: | diff --git a/.circleci/benchmark-config.yml b/.circleci/benchmark-config.yml index 8a4ddad37..82eab8c37 100644 --- a/.circleci/benchmark-config.yml +++ b/.circleci/benchmark-config.yml @@ -47,8 +47,6 @@ jobs: "python3 -m pip install -U pip && \ python3 -m pip install spu && \ cd /home/admin/dev/ && \ - python3 -m pip install -r requirements-dev.txt && \ - python3 -m pip install -r examples/python/ml/requirements.txt && \ export PYTHONPATH="${PWD}:$PYTHONPATH" && \ bash .circleci/run-nn.sh" | tee benchmark_results.log - run: diff --git a/.circleci/continue-config.yml b/.circleci/continue-config.yml index 50ff328c9..5b0669ff8 100644 --- a/.circleci/continue-config.yml +++ b/.circleci/continue-config.yml @@ -52,11 +52,6 @@ commands: - run: name: Checkout devtools command: git clone https://github.com/secretflow/devtools.git ../devtools - - run: - name: "Install dependencies" - command: | - python3 -m pip install -r requirements.txt - python3 -m pip install -r requirements-dev.txt - run: name: Setup GCS command: | @@ -64,7 +59,10 @@ commands: ../devtools/bazel_cache_setup.py --in_file=../gcs.data --out_file=../gcs.json --min_download - run: name: "build" - command: bazel build <> -c opt --ui_event_filters=-info,-debug,-warning + command: | + bazel --version && python3 --version + if [ ! -e "/usr/bin/python3" ]; then ln -s `which python3` /usr/bin/python3; fi + bazel run //:requirements-dev.update && bazel build <> -c opt --ui_event_filters=-info,-debug,-warning - run: name: "test" command: | @@ -114,7 +112,6 @@ jobs: name: Install extra deps command: | python3 -m pip install tzdata - python3 -m pip install -r examples/python/ml/requirements.txt - build_and_test: targets: //examples/python/ml:ml_test extra_bazel_test_args: --test_env LD_LIBRARY_PATH=/root/miniconda3/lib/ @@ -134,8 +131,6 @@ jobs: bash ~/miniconda.sh -b -p $HOME/miniconda source $HOME/miniconda/bin/activate conda init bash zsh - pip install -r requirements.txt - pip install -r requirements-dev.txt - build_and_test # Invoke jobs via workflows # See: https://circleci.com/docs/2.0/configuration-reference/#workflows diff --git a/.circleci/release-config.yml b/.circleci/release-config.yml index 762fa5999..fdc689b6a 100644 --- a/.circleci/release-config.yml +++ b/.circleci/release-config.yml @@ -52,11 +52,11 @@ commands: conda create -n build python=<< parameters.python_ver >> -y conda activate build - sh ./build_wheel_entrypoint.sh + bazel build //:spu_wheel -c opt --@rules_python//python/config_settings:python_version=<< parameters.python_ver >> python3 -m pip install twine - ls dist/*.whl + ls bazel-bin/spu*.whl - python3 -m twine upload -r pypi -u __token__ -p ${PYPI_TWINE_TOKEN} dist/*.whl + python3 -m twine upload -r pypi -u __token__ -p ${PYPI_TWINE_TOKEN} bazel-bin/spu*.whl # Define a job to be invoked later in a workflow. # See: https://circleci.com/docs/2.0/configuration-reference/#jobs @@ -103,7 +103,7 @@ workflows: - linux_publish: matrix: parameters: - python_ver: ["3.9", "3.10", "3.11"] + python_ver: ["3.10", "3.11"] executor: ["linux_x64_executor", "linux_aarch64_executor"] filters: tags: @@ -111,7 +111,7 @@ workflows: - macOS_publish: matrix: parameters: - python_ver: ["3.9", "3.10", "3.11"] + python_ver: ["3.10", "3.11"] filters: tags: only: /.*(? - 'libspu/compiler/tests/interpret/template/**.template' - 'LICENSE' - 'NOTICE' - - '.bazelversion' + - 'MODULE.bazel.lock' + - '.bazeliskrc' - '.clang-format' - '.clang-tidy' - '.gitattributes' - '.gitignore' - '.gitmodules' - 'pyproject.toml' - - 'setup.cfg' - 'libspu/core/half.h' # MIT comment: never # <9> diff --git a/BUILD.bazel b/BUILD.bazel index 7a517cec5..6a5b98642 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -11,3 +11,114 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +load("@python_versions//3.10:defs.bzl", compile_pip_requirements_3_10 = "compile_pip_requirements") +load("@python_versions//3.11:defs.bzl", compile_pip_requirements_3_11 = "compile_pip_requirements") + +# load("@python_versions//3.9:defs.bzl", compile_pip_requirements_3_9 = "compile_pip_requirements") +load("@rules_python//python:packaging.bzl", "py_package", "py_wheel") +load("@rules_python//python:pip.bzl", "compile_pip_requirements") +load("//:version.bzl", "SPU_VERSION") + +# compile_pip_requirements_3_9( +# name = "requirements_3_9", +# src = "requirements_3_9.txt", +# requirements_txt = "requirements_lock_3_9.txt", +# tags = ["manual"], +# ) + +compile_pip_requirements_3_10( + name = "requirements_3_10", + src = "requirements.txt", + requirements_txt = "requirements_lock_3_10.txt", + tags = ["manual"], +) + +compile_pip_requirements_3_11( + name = "requirements_3_11", + src = "requirements.txt", + requirements_txt = "requirements_lock_3_11.txt", + tags = ["manual"], +) + +compile_pip_requirements( + name = "requirements-dev", + src = "requirements-dev.txt", + requirements_txt = "requirements_dev_lock.txt", + tags = ["manual"], +) + +exports_files([ + "README.md", +]) + +# https://rules-python.readthedocs.io/en/latest/api/rules_python/python/packaging.html#py_wheel_rule +py_wheel( + name = "spu_wheel", + abi = select( + { + "@rules_python//python/config_settings:is_python_3.10": "cp310", + "@rules_python//python/config_settings:is_python_3.11": "cp311", + # "@rules_python//python/config_settings:is_python_3.9": "cp39", + "//conditions:default": "none", + }, + ), + author = "SecretFlow Team", + author_email = "secretflow-contact@service.alipay.com", + classifiers = [ + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + ], + description_content_type = "text/markdown", + description_file = "README.md", + distribution = "spu", + extra_distinfo_files = { + "//:LICENSE": "LICENSE", + }, + homepage = "https://github.com/secretflow/spu", + license = "Apache License 2.0", + # TODO: add other fields. + platform = select( + { + "@bazel_tools//src/conditions:linux_x86_64": "manylinux2014_x86_64", + "@bazel_tools//src/conditions:darwin_arm64": "macosx_13_0_arm64", + "@bazel_tools//src/conditions:linux_aarch64": "manylinux_2_28_aarch64", + "//conditions:default": "any", + }, + ), + python_requires = ">=3.9", + python_tag = select( + { + "@rules_python//python/config_settings:is_python_3.10": "cp310", + "@rules_python//python/config_settings:is_python_3.11": "cp311", + # "@rules_python//python/config_settings:is_python_3.9": "cp39", + "//conditions:default": "py3", + }, + ), + requires_file = "requirements.txt", + summary = "SPU aims to be a 'provable', 'measurable' secure computation device.", + tags = ["manual"], + twine = None, + version = SPU_VERSION, + deps = [ + ":spu_pkg", + ], +) + +py_package( + name = "spu_pkg", + packages = [ + "libspu", + "spu", + ], + visibility = ["//visibility:private"], + deps = [ + "//spu:api", + "//spu:init", + "//spu:libpsi", + "//spu:libspu", + "//spu/ops/groupby", + "//spu/utils:distributed", + ], +) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97ff36b09..428fda2f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ > > please add your unreleased change here. +- [SPU] Migrating to Bazel Modules and update minimum Python version to 3.10. +- [Feature] Add soPRF (LowMC cipher) for SEMI2K +- [API] Add Permute/InvPermute support in HLO +- [Feature] Add SSL configuration to the TTP server +- [Feature] Support quick sort for semi2k and aby3 + ## 20241219 - [SPU] 0.9.3b0 release diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9701c407b..ef2743e3d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,13 +62,6 @@ docker exec -it spu-dev-$(whoami) bash Install gcc>=11.2, cmake>=3.26, ninja, nasm>=2.15, python>=3.9, bazelisk, xxd, lld ``` -About the commands used to install the above dependencies, you can follow [Ubuntu docker file](https://github.com/secretflow/devtools/blob/main/dockerfiles/ubuntu-base-ci.DockerFile). - -```sh -python3 -m pip install -r requirements.txt -python3 -m pip install -r requirements-dev.txt -``` - #### macOS ```sh @@ -90,10 +83,6 @@ brew install bazelisk cmake ninja libomp wget # For Intel mac only brew install nasm - -# Install python dependencies -pip install -r requirements.txt -pip install -r requirements-dev.txt ``` ### Build & UnitTest @@ -117,6 +106,7 @@ bazel test //... --features=ubsan - `--define gperf=on` enable gperf - `--define tracelog=on` enable link trace log. +- `--@rules_python//python/config_settings:python_version=3.10` set the Python version as 3.10, the default version is 3.11 ### Build docs diff --git a/INSTALLATION.md b/INSTALLATION.md index 660baa8f1..de416aaa0 100644 --- a/INSTALLATION.md +++ b/INSTALLATION.md @@ -38,8 +38,8 @@ pip install spu - At the root of repo, run ```bash -python setup.py bdist_wheel -pip install dist/*.whl --force-reinstall +bazel build //:spu_wheel -c opt +pip install bazel-bin/spu-*.whl --force-reinstall ``` - Once GCC/bazel/python/Xcode version or other environment settings have changed, please run the following command to ensure a clean build @@ -51,5 +51,15 @@ bazel clean --expunge #### Build with GPU support ```bash -export ENABLE_GPU_BUILD=1 && python setup.py bdist_wheel +bazel build //:spu_wheel -c opt --config=gpu +``` + +#### Build with specified python version + +```bash +# build with python 3.10 +bazel build //:spu_wheel -c opt --@rules_python//python/config_settings:python_version=3.10 + +# build with python 3.11 +bazel build //:spu_wheel -c opt --@rules_python//python/config_settings:python_version=3.11 ``` diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 000000000..2552234e8 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,164 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Bazel now uses Bzlmod by default to manage external dependencies. +# Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel. +# +# For more details, please check https://github.com/bazelbuild/bazel/issues/18958 +############################################################################### + +module( + name = "spulib", + version = "0.9.4", + compatibility_level = 1, +) + +bazel_dep(name = "grpc") +single_version_override( + module_name = "grpc", + patch_strip = 1, + patches = [ + "//bazel/patches:grpc-1.66.patch", + "//bazel/patches:grpc-module-file.patch", + ], + version = "1.66.0.bcr.3", +) + +bazel_dep(name = "protobuf", version = "27.3") +single_version_override( + module_name = "protobuf", + patch_strip = 1, + patches = [ + "//bazel/patches:protobuf-xla.patch", + ], + version = "27.3", +) + +bazel_dep(name = "bazel_skylib", version = "1.7.1") +bazel_dep(name = "apple_support", version = "1.17.1") +bazel_dep(name = "rules_cc", version = "0.0.12") +bazel_dep(name = "rules_cuda", version = "0.2.3") +bazel_dep(name = "rules_foreign_cc", version = "0.12.0") +bazel_dep(name = "bazel_features", version = "1.20.0") +bazel_dep(name = "platforms", version = "0.0.8") +bazel_dep(name = "pybind11_bazel", version = "2.13.6") +bazel_dep(name = "rules_python", version = "0.29.0") +bazel_dep(name = "rules_proto", version = "6.0.0-rc1") +bazel_dep(name = "spdlog", version = "1.14.1") +bazel_dep(name = "fmt", version = "11.0.2") +bazel_dep(name = "abseil-cpp", version = "20240722.0") + +DEFAULT_PYTHON_VERSION = "3.11" + +SUPPORTED_PYTHON_VERSIONS = [ + # "3.9", + "3.10", + "3.11", +] + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") + +[ + python.toolchain( + ignore_root_user_error = True, + is_default = python_version == DEFAULT_PYTHON_VERSION, + python_version = python_version, + ) + for python_version in SUPPORTED_PYTHON_VERSIONS +] + +use_repo(python, "python_versions") + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") + +[ + pip.parse( + hub_name = "spu_pip", + python_version = python_version, + requirements_lock = "//:requirements_lock_{}.txt".format(python_version.replace(".", "_")), + ) + for python_version in SUPPORTED_PYTHON_VERSIONS +] + +use_repo(pip, "spu_pip") +pip.parse( + hub_name = "spu_pip_dev", + python_version = DEFAULT_PYTHON_VERSION, + requirements_lock = "//:requirements_dev_lock.txt", +) +pip.override( + file = "torch-2.3.0-cp311-cp311-manylinux1_x86_64.whl", + patch_strip = 1, + patches = [ + # FIXME: https://github.com/pytorch/pytorch/issues/117350 + "//bazel/patches:pytorch.patch", + "//bazel/patches:pytorch_record.patch", + ], +) +use_repo(pip, "spu_pip_dev") + +# --registry=https://baidu.github.io/babylon/registry +bazel_dep(name = "leveldb", version = "1.23") + +# self-hosted registry +bazel_dep(name = "eigen", version = "3.4.90-20230801-66e8f3") +bazel_dep(name = "emp-tool", version = "0.2.5") +bazel_dep(name = "emp-ot", version = "0.2.4") +bazel_dep(name = "brpc", version = "1.11.0-20241212-282bc90") +bazel_dep(name = "seal", version = "4.1.1") +bazel_dep(name = "cutlass", version = "3.5.1") +bazel_dep(name = "llvm-raw", version = "20240809.0-35f55f5") +bazel_dep(name = "sse2neon", version = "1.7.0-20240330-8df2f48") + +llvm = use_extension("@llvm-raw//utils/bazel:extension.bzl", "llvm") +llvm.configure( + targets = [ + "AArch64", + "X86", + "ARM", + ], +) +use_repo(llvm, "llvm-project") + +bazel_dep(name = "stablehlo", version = "20240808.0-24d1807") +bazel_dep(name = "xla", version = "20240814.0-64bdcc5") +bazel_dep(name = "yacl", version = "20241212.0-871832a") +bazel_dep(name = "psi") +git_override( + module_name = "psi", + commit = "8ead92f1bb10329c7e7e56d541fecb3dcd47ee03", + remote = "https://github.com/secretflow/psi.git", +) + +spu_dependencies = use_extension("//bazel:defs.bzl", "non_module_dependencies") +use_repo(spu_dependencies, "xtensor") + +new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository") + +new_local_repository( + name = "macos_omp_x64", + build_file = "@yacl//bazel:local_openmp_macos.BUILD", + path = "/usr/local/opt/libomp", +) + +new_local_repository( + name = "macos_omp_arm64", + build_file = "@yacl//bazel:local_openmp_macos.BUILD", + path = "/opt/homebrew/opt/libomp/", +) + +# test +bazel_dep(name = "googletest", version = "1.15.2", dev_dependency = True) +bazel_dep(name = "google_benchmark", version = "1.8.5", dev_dependency = True) diff --git a/WORKSPACE b/WORKSPACE deleted file mode 100644 index 8a40fb588..000000000 --- a/WORKSPACE +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2021 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -workspace(name = "spulib") - -load("//bazel:repositories.bzl", "spu_deps") - -spu_deps() - -# -# yacl -# Warning: SPU relies on yacl to bring in common 3p libraries. -# Please make sure yacl_deps are called right after spu_deps. -# -load("@yacl//bazel:repositories.bzl", "yacl_deps") - -yacl_deps() - -load("@psi//bazel:repositories.bzl", "psi_deps") - -psi_deps() - -load("@rules_python//python:repositories.bzl", "py_repositories") - -py_repositories() - -load("@pybind11_bazel//:python_configure.bzl", "python_configure") - -python_configure( - name = "local_config_python", - python_version = "3", -) - -load( - "@rules_foreign_cc//foreign_cc:repositories.bzl", - "rules_foreign_cc_dependencies", -) - -rules_foreign_cc_dependencies( - register_built_tools = False, - register_default_tools = False, - register_preinstalled_tools = True, -) - -load("@bazel_features//:deps.bzl", "bazel_features_deps") - -bazel_features_deps() - -load("@rules_cuda//cuda:repositories.bzl", "register_detected_cuda_toolchains", "rules_cuda_dependencies") - -rules_cuda_dependencies() - -register_detected_cuda_toolchains() - -load("@xla//:workspace4.bzl", "xla_workspace4") - -xla_workspace4() - -load("@xla//:workspace3.bzl", "xla_workspace3") - -xla_workspace3() - -load("@xla//:workspace2.bzl", "xla_workspace2") - -xla_workspace2() - -load("@xla//:workspace1.bzl", "xla_workspace1") - -xla_workspace1() - -load("@xla//:workspace0.bzl", "xla_workspace0") - -xla_workspace0() - -load("@rules_proto_grpc//:repositories.bzl", "rules_proto_grpc_repos", "rules_proto_grpc_toolchains") - -rules_proto_grpc_toolchains() - -rules_proto_grpc_repos() - -# -# boost -# -load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps") - -boost_deps() diff --git a/bazel/local_openmp_macos.BUILD b/bazel/defs.bzl similarity index 66% rename from bazel/local_openmp_macos.BUILD rename to bazel/defs.bzl index 82d976b8b..d5037417e 100644 --- a/bazel/local_openmp_macos.BUILD +++ b/bazel/defs.bzl @@ -1,4 +1,4 @@ -# Copyright 2022 Ant Group Co., Ltd. +# Copyright 2024 Ant Group Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,16 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_cc//cc:defs.bzl", "cc_library") +load("//bazel:repositories.bzl", "spu_deps") -cc_library( - name = "openmp", - srcs = [ - "lib/libomp.a", - ], - hdrs = ["include/omp.h"], - includes = [ - "include/", - ], - visibility = ["//visibility:public"], +def _non_module_dependencies_impl(_ctx): + spu_deps() + +non_module_dependencies = module_extension( + implementation = _non_module_dependencies_impl, ) diff --git a/bazel/eigen.BUILD b/bazel/eigen.BUILD deleted file mode 100644 index 80ccf3ca6..000000000 --- a/bazel/eigen.BUILD +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2022 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# copy from tf: -# https://raw.githubusercontent.com/tensorflow/tensorflow/master/third_party/eigen.BUILD -# -# Description: -# Eigen is a C++ template library for linear algebra: vectors, -# matrices, and related algorithms. - -load("@rules_cc//cc:defs.bzl", "cc_library") -load("@yacl//bazel:yacl.bzl", "OMP_DEPS") - -licenses([ - # Note: Eigen is an MPL2 library that includes GPL v3 and LGPL v2.1+ code. - # We've taken special care to not reference any restricted code. - "reciprocal", # MPL2 - "notice", # Portions BSD -]) - -exports_files(["COPYING.MPL2"]) - -EIGEN_FILES = [ - "Eigen/**", - "unsupported/Eigen/CXX11/**", - "unsupported/Eigen/FFT", - "unsupported/Eigen/KroneckerProduct", - "unsupported/Eigen/src/FFT/**", - "unsupported/Eigen/src/KroneckerProduct/**", - "unsupported/Eigen/MatrixFunctions", - "unsupported/Eigen/SpecialFunctions", - "unsupported/Eigen/src/MatrixFunctions/**", - "unsupported/Eigen/src/SpecialFunctions/**", -] - -# Files known to be under MPL2 license. -EIGEN_MPL2_HEADER_FILES = glob( - EIGEN_FILES, - exclude = [ - # Guarantees that any non-MPL2 file added to the list above will fail to - # compile. - "Eigen/src/Core/util/NonMPL2.h", - "Eigen/**/CMakeLists.txt", - ], -) - -cc_library( - name = "eigen3", - hdrs = EIGEN_MPL2_HEADER_FILES, - defines = [ - # This define (mostly) guarantees we don't link any problematic - # code. We use it, but we do not rely on it, as evidenced above. - "EIGEN_MPL2_ONLY", - ], - includes = ["."], - visibility = ["//visibility:public"], - deps = OMP_DEPS, -) - -filegroup( - name = "eigen_header_files", - srcs = EIGEN_MPL2_HEADER_FILES, - visibility = ["//visibility:public"], -) diff --git a/bazel/emp-ot.BUILD b/bazel/emp-ot.BUILD deleted file mode 100644 index 4e3814729..000000000 --- a/bazel/emp-ot.BUILD +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2022 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -load("@spulib//bazel:spu.bzl", "spu_cmake_external") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all_srcs", - srcs = glob(["**"]), -) - -spu_cmake_external( - name = "emp-ot", - cache_entries = { - "CMAKE_FOLDER": "$EXT_BUILD_DEPS/emp-tool", - "EMP-TOOL_INCLUDE_DIR": "$EXT_BUILD_DEPS/emp-tool/include", - "EMP-TOOL_LIBRARY": "$EXT_BUILD_DEPS/emp-tool/lib", - "OPENSSL_ROOT_DIR": "$EXT_BUILD_DEPS/openssl", - "BUILD_TESTING": "OFF", - }, - lib_source = ":all_srcs", - out_headers_only = True, - deps = [ - "@com_github_emptoolkit_emp_tool//:emp-tool", - "@com_github_openssl_openssl//:openssl", - ], -) diff --git a/bazel/emp-tool.BUILD b/bazel/emp-tool.BUILD deleted file mode 100644 index 034df99dc..000000000 --- a/bazel/emp-tool.BUILD +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2022 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -load("@spulib//bazel:spu.bzl", "spu_cmake_external") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all_srcs", - srcs = glob(["**"]), -) - -spu_cmake_external( - name = "emp-tool", - cache_entries = { - "OPENSSL_ROOT_DIR": "$EXT_BUILD_DEPS/openssl", - "BUILD_TESTING": "OFF", - }, - lib_source = ":all_srcs", - out_data_dirs = ["cmake"], - out_static_libs = [ - "libemp-tool.a", - ], - deps = [ - "@com_github_openssl_openssl//:openssl", - ], -) diff --git a/bazel/hexl.BUILD b/bazel/hexl.BUILD deleted file mode 100644 index 40adfc30f..000000000 --- a/bazel/hexl.BUILD +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2022 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -load("@spulib//bazel:spu.bzl", "spu_cmake_external") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all_srcs", - srcs = glob(["**"]), -) - -spu_cmake_external( - name = "hexl", - cache_entries = { - "CMAKE_BUILD_TYPE": "Release", - "CpuFeatures_DIR": "$EXT_BUILD_DEPS/cpu_features/lib/cmake/CpuFeatures/", - "HEXL_BENCHMARK": "OFF", - "HEXL_TESTING": "OFF", - "CMAKE_INSTALL_LIBDIR": "lib", - }, - lib_source = ":all_srcs", - out_data_dirs = ["lib/cmake"], - out_static_libs = ["libhexl.a"], - deps = [ - "@com_github_google_cpu_features//:cpu_features", - ], -) diff --git a/bazel/nvidia_cutlass.BUILD b/bazel/patches/BUILD.bazel similarity index 55% rename from bazel/nvidia_cutlass.BUILD rename to bazel/patches/BUILD.bazel index dab20b76d..8289c83ed 100644 --- a/bazel/nvidia_cutlass.BUILD +++ b/bazel/patches/BUILD.bazel @@ -4,30 +4,10 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -load("@spulib//bazel:spu.bzl", "spu_cc_library") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all", - srcs = glob(["**"]), -) - -spu_cc_library( - name = "cutlass", - srcs = [], - hdrs = glob([ - "include/**/*.h", - "include/**/*.hpp", - ]), - strip_include_prefix = "include", - visibility = ["//visibility:public"], -) diff --git a/bazel/patches/emp-ot.patch b/bazel/patches/emp-ot.patch deleted file mode 100644 index 31cb353a4..000000000 --- a/bazel/patches/emp-ot.patch +++ /dev/null @@ -1,99 +0,0 @@ -diff --git a/emp-ot/ferret/ferret_cot.hpp b/emp-ot/ferret/ferret_cot.hpp -index 9dc8222..fbd6170 100644 ---- a/emp-ot/ferret/ferret_cot.hpp -+++ b/emp-ot/ferret/ferret_cot.hpp -@@ -28,8 +28,8 @@ FerretCOT::FerretCOT(int party, int threads, T **ios, - template - FerretCOT::~FerretCOT() { - if (ot_pre_data != nullptr) { -- if(party == ALICE) write_pre_data128_to_file((void*)ot_pre_data, (__uint128_t)Delta, pre_ot_filename); -- else write_pre_data128_to_file((void*)ot_pre_data, (__uint128_t)0, pre_ot_filename); -+ // if(party == ALICE) write_pre_data128_to_file((void*)ot_pre_data, (__uint128_t)Delta, pre_ot_filename); -+ // else write_pre_data128_to_file((void*)ot_pre_data, (__uint128_t)0, pre_ot_filename); - delete[] ot_pre_data; - } - if (ot_data != nullptr) delete[] ot_data; -@@ -100,7 +100,9 @@ void FerretCOT::setup(std::string pre_file) { - }); - - ot_pre_data = new block[param.n_pre]; -- bool hasfile = file_exists(pre_ot_filename), hasfile2; -+ //bool hasfile = file_exists(pre_ot_filename), hasfile2; -+ bool hasfile = false; -+ bool hasfile2 = false; - if(party == ALICE) { - io->send_data(&hasfile, sizeof(bool)); - io->flush(); - -diff --git a/emp-ot/ferret/mpcot_reg.h b/emp-ot/ferret/mpcot_reg.h -index 6659aa7..6b01601 100644 ---- a/emp-ot/ferret/mpcot_reg.h -+++ b/emp-ot/ferret/mpcot_reg.h -@@ -123,6 +123,10 @@ public: - for(int i = start; i < end; ++i) - exec_f2k_sender(senders[i], ot, sparse_vector+i*leave_n, - ios[threads - 1], i); -+ -+ for (int i = 0; i < threads; i++) -+ ios[i]->flush(); -+ - for (auto & f : fut) f.get(); - } - -@@ -152,7 +156,7 @@ public: - block *ggm_tree_mem, IO *io, int i) { - sender->compute(ggm_tree_mem, Delta_f2k); - sender->template send_f2k>(ot, io, i); -- io->flush(); -+ //io->flush(); - if(is_malicious) - sender->consistency_check_msg_gen(consist_check_VW+i); - } - -diff --git a/emp-ot/ferret/preot.h b/emp-ot/ferret/preot.h -index 0ac7641..a0ae2d3 100644 ---- a/emp-ot/ferret/preot.h -+++ b/emp-ot/ferret/preot.h -@@ -10,10 +10,6 @@ class OTPre { public: - block * pre_data = nullptr; - bool * bits = nullptr; - int n; -- vector pointers; -- vector choices; -- vector pointers0; -- vector pointers1; - - CCRH ccrh; - int length, count; - -diff --git a/emp-ot/ferret/twokeyprp.h b/emp-ot/ferret/twokeyprp.h -index fd6236d..c2361a3 100644 ---- a/emp-ot/ferret/twokeyprp.h -+++ b/emp-ot/ferret/twokeyprp.h -@@ -9,8 +9,8 @@ class TwoKeyPRP { public: - emp::AES_KEY aes_key[2]; - - TwoKeyPRP(block seed0, block seed1) { -- AES_set_encrypt_key((const block)seed0, aes_key); -- AES_set_encrypt_key((const block)seed1, &aes_key[1]); -+ AES_set_encrypt_key(seed0, aes_key); -+ AES_set_encrypt_key(seed1, &aes_key[1]); - } - - void node_expand_1to2(block *children, block parent) { - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index fa06fd7..faf9802 100755 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -12,5 +12,8 @@ include_directories(${EMP-TOOL_INCLUDE_DIRS}) - install(FILES cmake/emp-ot-config.cmake DESTINATION cmake/) - install(DIRECTORY emp-ot DESTINATION include/) - --ENABLE_TESTING() --ADD_SUBDIRECTORY(test) -+option(ENABLE_TESTS "Enable tests" OFF) -+if (${ENABLE_TESTS}) -+ ENABLE_TESTING() -+ ADD_SUBDIRECTORY(test) -+endif() diff --git a/bazel/patches/emp-tool-cmake.patch b/bazel/patches/emp-tool-cmake.patch deleted file mode 100644 index 01aa13dbe..000000000 --- a/bazel/patches/emp-tool-cmake.patch +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index d9abb31..4c2c171 100755 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -56,11 +56,14 @@ find_package(OpenSSL REQUIRED) - include_directories(${OPENSSL_INCLUDE_DIR}) - - --add_library(${NAME} SHARED ${sources}) -+add_library(${NAME} STATIC ${sources}) - - install(DIRECTORY emp-tool DESTINATION include/) - install(DIRECTORY cmake/ DESTINATION cmake/) - install(TARGETS ${NAME} DESTINATION lib) - --ENABLE_TESTING() --ADD_SUBDIRECTORY(test) -+option(ENABLE_TESTS "Enable tests" OFF) -+if (${ENABLE_TESTS}) -+ ENABLE_TESTING() -+ ADD_SUBDIRECTORY(test) -+endif() diff --git a/bazel/patches/emp-tool-sse2neon.patch b/bazel/patches/emp-tool-sse2neon.patch deleted file mode 100644 index e94b22e07..000000000 --- a/bazel/patches/emp-tool-sse2neon.patch +++ /dev/null @@ -1,6507 +0,0 @@ -diff --git a/emp-tool/utils/sse2neon.h b/emp-tool/utils/sse2neon.h -index d09b9c7..efa63a4 100644 ---- a/emp-tool/utils/sse2neon.h -+++ b/emp-tool/utils/sse2neon.h -@@ -113,7 +113,7 @@ - #ifdef _MSC_VER - #include - #if (defined(_M_AMD64) || defined(__x86_64__)) || \ -- (defined(_M_ARM) || defined(__arm__)) -+ (defined(_M_ARM64) || defined(__arm64__)) - #define SSE2NEON_HAS_BITSCAN64 - #endif - #endif -@@ -441,7 +441,7 @@ typedef int64x2_t __m128i; /* 128-bit vector containing integers */ - // by applications which attempt to access the contents of an __m128 struct - // directly. It is important to note that accessing the __m128 struct directly - // is bad coding practice by Microsoft: @see: --// https://docs.microsoft.com/en-us/cpp/cpp/m128 -+// https://learn.microsoft.com/en-us/cpp/cpp/m128 - // - // However, some legacy source code may try to access the contents of an __m128 - // struct directly so the developer can use the SIMDVec as an alias for it. Any -@@ -621,47 +621,6 @@ FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a) - * 4, 5, 12, 13, 6, 7, 14, 15); - * // Shuffle packed 8-bit integers - * __m128i v_out = _mm_shuffle_epi8(v_in, v_perm); // pshufb -- * -- * Data (Number, Binary, Byte Index): -- +------+------+-------------+------+------+-------------+ -- | 1 | 2 | 3 | 4 | Number -- +------+------+------+------+------+------+------+------+ -- | 0000 | 0001 | 0000 | 0010 | 0000 | 0011 | 0000 | 0100 | Binary -- +------+------+------+------+------+------+------+------+ -- | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Index -- +------+------+------+------+------+------+------+------+ -- -- +------+------+------+------+------+------+------+------+ -- | 5 | 6 | 7 | 8 | Number -- +------+------+------+------+------+------+------+------+ -- | 0000 | 0101 | 0000 | 0110 | 0000 | 0111 | 0000 | 1000 | Binary -- +------+------+------+------+------+------+------+------+ -- | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | Index -- +------+------+------+------+------+------+------+------+ -- * Index (Byte Index): -- +------+------+------+------+------+------+------+------+ -- | 1 | 0 | 2 | 3 | 8 | 9 | 10 | 11 | -- +------+------+------+------+------+------+------+------+ -- -- +------+------+------+------+------+------+------+------+ -- | 4 | 5 | 12 | 13 | 6 | 7 | 14 | 15 | -- +------+------+------+------+------+------+------+------+ -- * Result: -- +------+------+------+------+------+------+------+------+ -- | 1 | 0 | 2 | 3 | 8 | 9 | 10 | 11 | Index -- +------+------+------+------+------+------+------+------+ -- | 0001 | 0000 | 0000 | 0010 | 0000 | 0101 | 0000 | 0110 | Binary -- +------+------+------+------+------+------+------+------+ -- | 256 | 2 | 5 | 6 | Number -- +------+------+------+------+------+------+------+------+ -- -- +------+------+------+------+------+------+------+------+ -- | 4 | 5 | 12 | 13 | 6 | 7 | 14 | 15 | Index -- +------+------+------+------+------+------+------+------+ -- | 0000 | 0011 | 0000 | 0111 | 0000 | 0100 | 0000 | 1000 | Binary -- +------+------+------+------+------+------+------+------+ -- | 3 | 7 | 4 | 8 | Number -- +------+------+------+------+------+------+-------------+ - */ - - /* Constants for use with _mm_prefetch. */ -@@ -1069,9 +1028,9 @@ FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) - }) - #endif - --// NEON does not support a general purpose permute intrinsic --// Selects four specific single-precision, floating-point values from a and b, --// based on the mask i. -+// NEON does not support a general purpose permute intrinsic. -+// Shuffle single-precision (32-bit) floating-point elements in a using the -+// control in imm8, and store the results in dst. - // - // C equivalent: - // __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, -@@ -1082,7 +1041,7 @@ FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) - // return ret; - // } - // --// https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_ps - #define _mm_shuffle_ps_default(a, b, imm) \ - __extension__({ \ - float32x4_t ret; \ -@@ -1100,12 +1059,10 @@ FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) - vreinterpretq_m128_f32(ret); \ - }) - --// Shuffles the lower 4 signed or unsigned 16-bit integers in a as specified --// by imm. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/y41dkk37(v=vs.100) --// FORCE_INLINE __m128i _mm_shufflelo_epi16_function(__m128i a, --// __constrange(0,255) int --// imm) -+// Shuffle 16-bit integers in the low 64 bits of a using the control in imm8. -+// Store the results in the low 64 bits of dst, with the high 64 bits being -+// copied from from a to dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16 - #define _mm_shufflelo_epi16_function(a, imm) \ - __extension__({ \ - int16x8_t ret = vreinterpretq_s16_m128i(a); \ -@@ -1120,12 +1077,10 @@ FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) - vreinterpretq_m128i_s16(ret); \ - }) - --// Shuffles the upper 4 signed or unsigned 16-bit integers in a as specified --// by imm. --// https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx --// FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, --// __constrange(0,255) int --// imm) -+// Shuffle 16-bit integers in the high 64 bits of a using the control in imm8. -+// Store the results in the high 64 bits of dst, with the low 64 bits being -+// copied from from a to dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16 - #define _mm_shufflehi_epi16_function(a, imm) \ - __extension__({ \ - int16x8_t ret = vreinterpretq_s16_m128i(a); \ -@@ -1147,22 +1102,19 @@ FORCE_INLINE void _mm_empty(void) {} - - /* SSE */ - --// Adds the four single-precision, floating-point values of a and b. --// --// r0 := a0 + b0 --// r1 := a1 + b1 --// r2 := a2 + b2 --// r3 := a3 + b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx -+// Add packed single-precision (32-bit) floating-point elements in a and b, and -+// store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ps - FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_f32( - vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - } - --// adds the scalar single-precision floating point values of a and b. --// https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx -+// Add the lower single-precision (32-bit) floating-point element in a and b, -+// store the result in the lower element of dst, and copy the upper 3 packed -+// elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss - FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) - { - float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); -@@ -1171,30 +1123,18 @@ FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) - return vreinterpretq_m128_f32(vaddq_f32(a, value)); - } - --// Computes the bitwise AND of the four single-precision, floating-point values --// of a and b. --// --// r0 := a0 & b0 --// r1 := a1 & b1 --// r2 := a2 & b2 --// r3 := a3 & b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx -+// Compute the bitwise AND of packed single-precision (32-bit) floating-point -+// elements in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_ps - FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_s32( - vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); - } - --// Computes the bitwise AND-NOT of the four single-precision, floating-point --// values of a and b. --// --// r0 := ~a0 & b0 --// r1 := ~a1 & b1 --// r2 := ~a2 & b2 --// r3 := ~a3 & b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx -+// Compute the bitwise NOT of packed single-precision (32-bit) floating-point -+// elements in a and then AND with b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_ps - FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_s32( -@@ -1204,13 +1144,7 @@ FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) - - // Average packed unsigned 16-bit integers in a and b, and store the results in - // dst. --// --// FOR j := 0 to 3 --// i := j*16 --// dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_pu16 - FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b) - { - return vreinterpret_m64_u16( -@@ -1219,186 +1153,199 @@ FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b) - - // Average packed unsigned 8-bit integers in a and b, and store the results in - // dst. --// --// FOR j := 0 to 7 --// i := j*8 --// dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_pu8 - FORCE_INLINE __m64 _mm_avg_pu8(__m64 a, __m64 b) - { - return vreinterpret_m64_u8( - vrhadd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); - } - --// Compares for equality. --// https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for equality, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ps - FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32( - vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - } - --// Compares for equality. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/k423z28e(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for equality, store the result in the lower element of dst, and copy the -+// upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_ss - FORCE_INLINE __m128 _mm_cmpeq_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpeq_ps(a, b)); - } - --// Compares for greater than or equal. --// https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for greater-than-or-equal, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ps - FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32( - vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - } - --// Compares for greater than or equal. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/kesh3ddc(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for greater-than-or-equal, store the result in the lower element of dst, -+// and copy the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_ss - FORCE_INLINE __m128 _mm_cmpge_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpge_ps(a, b)); - } - --// Compares for greater than. --// --// r0 := (a0 > b0) ? 0xffffffff : 0x0 --// r1 := (a1 > b1) ? 0xffffffff : 0x0 --// r2 := (a2 > b2) ? 0xffffffff : 0x0 --// r3 := (a3 > b3) ? 0xffffffff : 0x0 --// --// https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for greater-than, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ps - FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32( - vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - } - --// Compares for greater than. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/1xyyyy9e(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for greater-than, store the result in the lower element of dst, and copy -+// the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_ss - FORCE_INLINE __m128 _mm_cmpgt_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpgt_ps(a, b)); - } - --// Compares for less than or equal. --// --// r0 := (a0 <= b0) ? 0xffffffff : 0x0 --// r1 := (a1 <= b1) ? 0xffffffff : 0x0 --// r2 := (a2 <= b2) ? 0xffffffff : 0x0 --// r3 := (a3 <= b3) ? 0xffffffff : 0x0 --// --// https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for less-than-or-equal, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ps - FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32( - vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - } - --// Compares for less than or equal. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/a7x0hbhw(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for less-than-or-equal, store the result in the lower element of dst, and -+// copy the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_ss - FORCE_INLINE __m128 _mm_cmple_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmple_ps(a, b)); - } - --// Compares for less than --// https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for less-than, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ps - FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32( - vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - } - --// Compares for less than --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fy94wye7(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for less-than, store the result in the lower element of dst, and copy the -+// upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_ss - FORCE_INLINE __m128 _mm_cmplt_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmplt_ps(a, b)); - } - --// Compares for inequality. --// https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for not-equal, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ps - FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32(vmvnq_u32( - vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); - } - --// Compares for inequality. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/ekya8fh4(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for not-equal, store the result in the lower element of dst, and copy the -+// upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_ss - FORCE_INLINE __m128 _mm_cmpneq_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpneq_ps(a, b)); - } - --// Compares for not greater than or equal. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/wsexys62(v=vs.100) -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for not-greater-than-or-equal, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ps - FORCE_INLINE __m128 _mm_cmpnge_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32(vmvnq_u32( - vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); - } - --// Compares for not greater than or equal. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fk2y80s8(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for not-greater-than-or-equal, store the result in the lower element of -+// dst, and copy the upper 3 packed elements from a to the upper elements of -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_ss - FORCE_INLINE __m128 _mm_cmpnge_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpnge_ps(a, b)); - } - --// Compares for not greater than. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/d0xh7w0s(v=vs.100) -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for not-greater-than, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ps - FORCE_INLINE __m128 _mm_cmpngt_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32(vmvnq_u32( - vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); - } - --// Compares for not greater than. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for not-greater-than, store the result in the lower element of dst, and -+// copy the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_ss - FORCE_INLINE __m128 _mm_cmpngt_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpngt_ps(a, b)); - } - --// Compares for not less than or equal. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/6a330kxw(v=vs.100) -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for not-less-than-or-equal, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ps - FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32(vmvnq_u32( - vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); - } - --// Compares for not less than or equal. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for not-less-than-or-equal, store the result in the lower element of dst, -+// and copy the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_ss - FORCE_INLINE __m128 _mm_cmpnle_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpnle_ps(a, b)); - } - --// Compares for not less than. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/4686bbdw(v=vs.100) -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// for not-less-than, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ps - FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_u32(vmvnq_u32( - vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); - } - --// Compares for not less than. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/56b9z2wf(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b for not-less-than, store the result in the lower element of dst, and copy -+// the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_ss - FORCE_INLINE __m128 _mm_cmpnlt_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpnlt_ps(a, b)); - } - --// Compares the four 32-bit floats in a and b to check if any values are NaN. --// Ordered compare between each value returns true for "orderable" and false for --// "not orderable" (NaN). --// https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx see --// also: -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// to see if neither is NaN, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ps -+// -+// See also: - // http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean - // http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics - FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b) -@@ -1413,15 +1360,18 @@ FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b) - return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb)); - } - --// Compares for ordered. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/343t62da(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b to see if neither is NaN, store the result in the lower element of dst, and -+// copy the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_ss - FORCE_INLINE __m128 _mm_cmpord_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpord_ps(a, b)); - } - --// Compares for unordered. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/khy6fk1t(v=vs.100) -+// Compare packed single-precision (32-bit) floating-point elements in a and b -+// to see if either is NaN, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ps - FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b) - { - uint32x4_t f32a = -@@ -1431,16 +1381,18 @@ FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b) - return vreinterpretq_m128_u32(vmvnq_u32(vandq_u32(f32a, f32b))); - } - --// Compares for unordered. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/2as2387b(v=vs.100) -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b to see if either is NaN, store the result in the lower element of dst, and -+// copy the upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_ss - FORCE_INLINE __m128 _mm_cmpunord_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_cmpunord_ps(a, b)); - } - --// Compares the lower single-precision floating point scalar values of a and b --// using an equality operation. : --// https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx -+// Compare the lower single-precision (32-bit) floating-point element in a and b -+// for equality, and return the boolean result (0 or 1). -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_ss - FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) - { - uint32x4_t a_eq_b = -@@ -1448,9 +1400,9 @@ FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) - return vgetq_lane_u32(a_eq_b, 0) & 0x1; - } - --// Compares the lower single-precision floating point scalar values of a and b --// using a greater than or equal operation. : --// https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx -+// Compare the lower single-precision (32-bit) floating-point element in a and b -+// for greater-than-or-equal, and return the boolean result (0 or 1). -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_ss - FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) - { - uint32x4_t a_ge_b = -@@ -1458,9 +1410,9 @@ FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) - return vgetq_lane_u32(a_ge_b, 0) & 0x1; - } - --// Compares the lower single-precision floating point scalar values of a and b --// using a greater than operation. : --// https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx -+// Compare the lower single-precision (32-bit) floating-point element in a and b -+// for greater-than, and return the boolean result (0 or 1). -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_ss - FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) - { - uint32x4_t a_gt_b = -@@ -1468,9 +1420,9 @@ FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) - return vgetq_lane_u32(a_gt_b, 0) & 0x1; - } - --// Compares the lower single-precision floating point scalar values of a and b --// using a less than or equal operation. : --// https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx -+// Compare the lower single-precision (32-bit) floating-point element in a and b -+// for less-than-or-equal, and return the boolean result (0 or 1). -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_ss - FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) - { - uint32x4_t a_le_b = -@@ -1478,11 +1430,9 @@ FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) - return vgetq_lane_u32(a_le_b, 0) & 0x1; - } - --// Compares the lower single-precision floating point scalar values of a and b --// using a less than operation. : --// https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx Important --// note!! The documentation on MSDN is incorrect! If either of the values is a --// NAN the docs say you will get a one, but in fact, it will return a zero!! -+// Compare the lower single-precision (32-bit) floating-point element in a and b -+// for less-than, and return the boolean result (0 or 1). -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_ss - FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) - { - uint32x4_t a_lt_b = -@@ -1490,9 +1440,9 @@ FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) - return vgetq_lane_u32(a_lt_b, 0) & 0x1; - } - --// Compares the lower single-precision floating point scalar values of a and b --// using an inequality operation. : --// https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx -+// Compare the lower single-precision (32-bit) floating-point element in a and b -+// for not-equal, and return the boolean result (0 or 1). -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_ss - FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) - { - return !_mm_comieq_ss(a, b); -@@ -1502,13 +1452,7 @@ FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) - // (32-bit) floating-point elements, store the results in the lower 2 elements - // of dst, and copy the upper 2 packed elements from a to the upper elements of - // dst. --// --// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) --// dst[63:32] := Convert_Int32_To_FP32(b[63:32]) --// dst[95:64] := a[95:64] --// dst[127:96] := a[127:96] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_pi2ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_pi2ps - FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b) - { - return vreinterpretq_m128_f32( -@@ -1518,13 +1462,7 @@ FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b) - - // Convert packed single-precision (32-bit) floating-point elements in a to - // packed 32-bit integers, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ps2pi -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ps2pi - FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a) - { - #if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) -@@ -1539,11 +1477,7 @@ FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a) - // Convert the signed 32-bit integer b to a single-precision (32-bit) - // floating-point element, store the result in the lower element of dst, and - // copy the upper 3 packed elements from a to the upper elements of dst. --// --// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_si2ss - FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b) - { - return vreinterpretq_m128_f32( -@@ -1552,7 +1486,7 @@ FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b) - - // Convert the lower single-precision (32-bit) floating-point element in a to a - // 32-bit integer, and store the result in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_ss2si - FORCE_INLINE int _mm_cvt_ss2si(__m128 a) - { - #if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) -@@ -1567,14 +1501,7 @@ FORCE_INLINE int _mm_cvt_ss2si(__m128 a) - - // Convert packed 16-bit integers in a to packed single-precision (32-bit) - // floating-point elements, and store the results in dst. --// --// FOR j := 0 to 3 --// i := j*16 --// m := j*32 --// dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi16_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi16_ps - FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a) - { - return vreinterpretq_m128_f32( -@@ -1584,13 +1511,7 @@ FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a) - // Convert packed 32-bit integers in b to packed single-precision (32-bit) - // floating-point elements, store the results in the lower 2 elements of dst, - // and copy the upper 2 packed elements from a to the upper elements of dst. --// --// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) --// dst[63:32] := Convert_Int32_To_FP32(b[63:32]) --// dst[95:64] := a[95:64] --// dst[127:96] := a[127:96] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_ps - FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b) - { - return vreinterpretq_m128_f32( -@@ -1603,13 +1524,7 @@ FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b) - // of dst, then convert the packed signed 32-bit integers in b to - // single-precision (32-bit) floating-point element, and store the results in - // the upper 2 elements of dst. --// --// dst[31:0] := Convert_Int32_To_FP32(a[31:0]) --// dst[63:32] := Convert_Int32_To_FP32(a[63:32]) --// dst[95:64] := Convert_Int32_To_FP32(b[31:0]) --// dst[127:96] := Convert_Int32_To_FP32(b[63:32]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32x2_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32x2_ps - FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b) - { - return vreinterpretq_m128_f32(vcvtq_f32_s32( -@@ -1618,14 +1533,7 @@ FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b) - - // Convert the lower packed 8-bit integers in a to packed single-precision - // (32-bit) floating-point elements, and store the results in dst. --// --// FOR j := 0 to 3 --// i := j*8 --// m := j*32 --// dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi8_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi8_ps - FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a) - { - return vreinterpretq_m128_f32(vcvtq_f32_s32( -@@ -1636,18 +1544,7 @@ FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a) - // packed 16-bit integers, and store the results in dst. Note: this intrinsic - // will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and - // 0x7FFFFFFF. --// --// FOR j := 0 to 3 --// i := 16*j --// k := 32*j --// IF a[k+31:k] >= FP32(0x7FFF) && a[k+31:k] <= FP32(0x7FFFFFFF) --// dst[i+15:i] := 0x7FFF --// ELSE --// dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi16 - FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a) - { - return vreinterpret_m64_s16( -@@ -1656,31 +1553,14 @@ FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a) - - // Convert packed single-precision (32-bit) floating-point elements in a to - // packed 32-bit integers, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi32 - #define _mm_cvtps_pi32(a) _mm_cvt_ps2pi(a) - - // Convert packed single-precision (32-bit) floating-point elements in a to - // packed 8-bit integers, and store the results in lower 4 elements of dst. - // Note: this intrinsic will generate 0x7F, rather than 0x80, for input values - // between 0x7F and 0x7FFFFFFF. --// --// FOR j := 0 to 3 --// i := 8*j --// k := 32*j --// IF a[k+31:k] >= FP32(0x7F) && a[k+31:k] <= FP32(0x7FFFFFFF) --// dst[i+7:i] := 0x7F --// ELSE --// dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pi8 - FORCE_INLINE __m64 _mm_cvtps_pi8(__m128 a) - { - return vreinterpret_m64_s8(vqmovn_s16( -@@ -1689,14 +1569,7 @@ FORCE_INLINE __m64 _mm_cvtps_pi8(__m128 a) - - // Convert packed unsigned 16-bit integers in a to packed single-precision - // (32-bit) floating-point elements, and store the results in dst. --// --// FOR j := 0 to 3 --// i := j*16 --// m := j*32 --// dst[m+31:m] := Convert_UInt16_To_FP32(a[i+15:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu16_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpu16_ps - FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a) - { - return vreinterpretq_m128_f32( -@@ -1706,14 +1579,7 @@ FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a) - // Convert the lower packed unsigned 8-bit integers in a to packed - // single-precision (32-bit) floating-point elements, and store the results in - // dst. --// --// FOR j := 0 to 3 --// i := j*8 --// m := j*32 --// dst[m+31:m] := Convert_UInt8_To_FP32(a[i+7:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu8_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpu8_ps - FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a) - { - return vreinterpretq_m128_f32(vcvtq_f32_u32( -@@ -1723,21 +1589,13 @@ FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a) - // Convert the signed 32-bit integer b to a single-precision (32-bit) - // floating-point element, store the result in the lower element of dst, and - // copy the upper 3 packed elements from a to the upper elements of dst. --// --// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_ss - #define _mm_cvtsi32_ss(a, b) _mm_cvt_si2ss(a, b) - - // Convert the signed 64-bit integer b to a single-precision (32-bit) - // floating-point element, store the result in the lower element of dst, and - // copy the upper 3 packed elements from a to the upper elements of dst. --// --// dst[31:0] := Convert_Int64_To_FP32(b[63:0]) --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_ss - FORCE_INLINE __m128 _mm_cvtsi64_ss(__m128 a, int64_t b) - { - return vreinterpretq_m128_f32( -@@ -1745,10 +1603,7 @@ FORCE_INLINE __m128 _mm_cvtsi64_ss(__m128 a, int64_t b) - } - - // Copy the lower single-precision (32-bit) floating-point element of a to dst. --// --// dst[31:0] := a[31:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_f32 - FORCE_INLINE float _mm_cvtss_f32(__m128 a) - { - return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); -@@ -1756,18 +1611,12 @@ FORCE_INLINE float _mm_cvtss_f32(__m128 a) - - // Convert the lower single-precision (32-bit) floating-point element in a to a - // 32-bit integer, and store the result in dst. --// --// dst[31:0] := Convert_FP32_To_Int32(a[31:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si32 - #define _mm_cvtss_si32(a) _mm_cvt_ss2si(a) - - // Convert the lower single-precision (32-bit) floating-point element in a to a - // 64-bit integer, and store the result in dst. --// --// dst[63:0] := Convert_FP32_To_Int64(a[31:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_si64 - FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a) - { - #if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) -@@ -1781,13 +1630,7 @@ FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a) - - // Convert packed single-precision (32-bit) floating-point elements in a to - // packed 32-bit integers with truncation, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ps2pi -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ps2pi - FORCE_INLINE __m64 _mm_cvtt_ps2pi(__m128 a) - { - return vreinterpret_m64_s32( -@@ -1796,10 +1639,7 @@ FORCE_INLINE __m64 _mm_cvtt_ps2pi(__m128 a) - - // Convert the lower single-precision (32-bit) floating-point element in a to a - // 32-bit integer with truncation, and store the result in dst. --// --// dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ss2si -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_ss2si - FORCE_INLINE int _mm_cvtt_ss2si(__m128 a) - { - return vgetq_lane_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)), 0); -@@ -1807,60 +1647,49 @@ FORCE_INLINE int _mm_cvtt_ss2si(__m128 a) - - // Convert packed single-precision (32-bit) floating-point elements in a to - // packed 32-bit integers with truncation, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_pi32 - #define _mm_cvttps_pi32(a) _mm_cvtt_ps2pi(a) - - // Convert the lower single-precision (32-bit) floating-point element in a to a - // 32-bit integer with truncation, and store the result in dst. --// --// dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si32 - #define _mm_cvttss_si32(a) _mm_cvtt_ss2si(a) - - // Convert the lower single-precision (32-bit) floating-point element in a to a - // 64-bit integer with truncation, and store the result in dst. --// --// dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_si64 - FORCE_INLINE int64_t _mm_cvttss_si64(__m128 a) - { - return (int64_t) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); - } - --// Divides the four single-precision, floating-point values of a and b. --// --// r0 := a0 / b0 --// r1 := a1 / b1 --// r2 := a2 / b2 --// r3 := a3 / b3 --// --// https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx -+// Divide packed single-precision (32-bit) floating-point elements in a by -+// packed elements in b, and store the results in dst. -+// Due to ARMv7-A NEON's lack of a precise division intrinsic, we implement -+// division by multiplying a by b's reciprocal before using the Newton-Raphson -+// method to approximate the results. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ps - FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) - { --#if defined(__aarch64__) && !SSE2NEON_PRECISE_DIV -+#if defined(__aarch64__) - return vreinterpretq_m128_f32( - vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - #else - float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(b)); - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b))); --#if SSE2NEON_PRECISE_DIV - // Additional Netwon-Raphson iteration for accuracy - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b))); --#endif - return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip)); - #endif - } - --// Divides the scalar single-precision floating point value of a by b. --// https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx -+// Divide the lower single-precision (32-bit) floating-point element in a by the -+// lower single-precision (32-bit) floating-point element in b, store the result -+// in the lower element of dst, and copy the upper 3 packed elements from a to -+// the upper elements of dst. -+// Warning: ARMv7-A does not produce the same result compared to Intel and not -+// IEEE-compliant. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_ss - FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) - { - float32_t value = -@@ -1871,12 +1700,12 @@ FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) - - // Extract a 16-bit integer from a, selected with imm8, and store the result in - // the lower element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_pi16 - #define _mm_extract_pi16(a, imm) \ - (int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm)) - - // Free aligned memory that was allocated with _mm_malloc. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_free - #if !defined(SSE2NEON_ALLOC_DEFINED) - FORCE_INLINE void _mm_free(void *addr) - { -@@ -1887,7 +1716,7 @@ FORCE_INLINE void _mm_free(void *addr) - // Macro: Get the flush zero bits from the MXCSR control and status register. - // The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or - // _MM_FLUSH_ZERO_OFF --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_FLUSH_ZERO_MODE -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_FLUSH_ZERO_MODE - FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode() - { - union { -@@ -1911,7 +1740,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode() - // Macro: Get the rounding mode bits from the MXCSR control and status register. - // The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, - // _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_ROUNDING_MODE -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_GET_ROUNDING_MODE - FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE() - { - union { -@@ -1938,15 +1767,17 @@ FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE() - - // Copy a to dst, and insert the 16-bit integer i into dst at the location - // specified by imm8. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_pi16 - #define _mm_insert_pi16(a, b, imm) \ - __extension__({ \ - vreinterpret_m64_s16( \ - vset_lane_s16((b), vreinterpret_s16_m64(a), (imm))); \ - }) - --// Loads four single-precision, floating-point values. --// https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx -+// Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point -+// elements) from memory into dst. mem_addr must be aligned on a 16-byte -+// boundary or a general-protection exception may be generated. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps - FORCE_INLINE __m128 _mm_load_ps(const float *p) - { - return vreinterpretq_m128_f32(vld1q_f32(p)); -@@ -1960,52 +1791,40 @@ FORCE_INLINE __m128 _mm_load_ps(const float *p) - // dst[95:64] := MEM[mem_addr+31:mem_addr] - // dst[127:96] := MEM[mem_addr+31:mem_addr] - // --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ps1 - #define _mm_load_ps1 _mm_load1_ps - --// Loads an single - precision, floating - point value into the low word and --// clears the upper three words. --// https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx -+// Load a single-precision (32-bit) floating-point element from memory into the -+// lower of dst, and zero the upper 3 elements. mem_addr does not need to be -+// aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_ss - FORCE_INLINE __m128 _mm_load_ss(const float *p) - { - return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0)); - } - --// Loads a single single-precision, floating-point value, copying it into all --// four words --// https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx -+// Load a single-precision (32-bit) floating-point element from memory into all -+// elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_ps - FORCE_INLINE __m128 _mm_load1_ps(const float *p) - { - return vreinterpretq_m128_f32(vld1q_dup_f32(p)); - } - --// Sets the upper two single-precision, floating-point values with 64 --// bits of data loaded from the address p; the lower two values are passed --// through from a. --// --// r0 := a0 --// r1 := a1 --// r2 := *p0 --// r3 := *p1 --// --// https://msdn.microsoft.com/en-us/library/w92wta0x(v%3dvs.100).aspx -+// Load 2 single-precision (32-bit) floating-point elements from memory into the -+// upper 2 elements of dst, and copy the lower 2 elements from a to dst. -+// mem_addr does not need to be aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pi - FORCE_INLINE __m128 _mm_loadh_pi(__m128 a, __m64 const *p) - { - return vreinterpretq_m128_f32( - vcombine_f32(vget_low_f32(a), vld1_f32((const float32_t *) p))); - } - --// Sets the lower two single-precision, floating-point values with 64 --// bits of data loaded from the address p; the upper two values are passed --// through from a. --// --// Return Value --// r0 := *p0 --// r1 := *p1 --// r2 := a2 --// r3 := a3 --// --// https://msdn.microsoft.com/en-us/library/s57cyak2(v=vs.100).aspx -+// Load 2 single-precision (32-bit) floating-point elements from memory into the -+// lower 2 elements of dst, and copy the upper 2 elements from a to dst. -+// mem_addr does not need to be aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pi - FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p) - { - return vreinterpretq_m128_f32( -@@ -2015,21 +1834,17 @@ FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p) - // Load 4 single-precision (32-bit) floating-point elements from memory into dst - // in reverse order. mem_addr must be aligned on a 16-byte boundary or a - // general-protection exception may be generated. --// --// dst[31:0] := MEM[mem_addr+127:mem_addr+96] --// dst[63:32] := MEM[mem_addr+95:mem_addr+64] --// dst[95:64] := MEM[mem_addr+63:mem_addr+32] --// dst[127:96] := MEM[mem_addr+31:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_ps - FORCE_INLINE __m128 _mm_loadr_ps(const float *p) - { - float32x4_t v = vrev64q_f32(vld1q_f32(p)); - return vreinterpretq_m128_f32(vextq_f32(v, v, 2)); - } - --// Loads four single-precision, floating-point values. --// https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx -+// Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point -+// elements) from memory into dst. mem_addr does not need to be aligned on any -+// particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_ps - FORCE_INLINE __m128 _mm_loadu_ps(const float *p) - { - // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are -@@ -2038,11 +1853,7 @@ FORCE_INLINE __m128 _mm_loadu_ps(const float *p) - } - - // Load unaligned 16-bit integer from memory into the first element of dst. --// --// dst[15:0] := MEM[mem_addr+15:mem_addr] --// dst[MAX:16] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16 - FORCE_INLINE __m128i _mm_loadu_si16(const void *p) - { - return vreinterpretq_m128i_s16( -@@ -2050,20 +1861,17 @@ FORCE_INLINE __m128i _mm_loadu_si16(const void *p) - } - - // Load unaligned 64-bit integer from memory into the first element of dst. --// --// dst[63:0] := MEM[mem_addr+63:mem_addr] --// dst[MAX:64] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64 - FORCE_INLINE __m128i _mm_loadu_si64(const void *p) - { - return vreinterpretq_m128i_s64( - vcombine_s64(vld1_s64((const int64_t *) p), vdup_n_s64(0))); - } - --// Allocate aligned blocks of memory. --// https://software.intel.com/en-us/ --// cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks -+// Allocate size bytes of memory, aligned to the alignment specified in align, -+// and return a pointer to the allocated memory. _mm_free should be used to free -+// memory that is allocated with _mm_malloc. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_malloc - #if !defined(SSE2NEON_ALLOC_DEFINED) - FORCE_INLINE void *_mm_malloc(size_t size, size_t align) - { -@@ -2081,7 +1889,7 @@ FORCE_INLINE void *_mm_malloc(size_t size, size_t align) - // Conditionally store 8-bit integer elements from a into memory using mask - // (elements are not stored when the highest bit is not set in the corresponding - // element) and a non-temporal memory hint. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmove_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmove_si64 - FORCE_INLINE void _mm_maskmove_si64(__m64 a, __m64 mask, char *mem_addr) - { - int8x8_t shr_mask = vshr_n_s8(vreinterpret_s8_m64(mask), 7); -@@ -2095,27 +1903,23 @@ FORCE_INLINE void _mm_maskmove_si64(__m64 a, __m64 mask, char *mem_addr) - // Conditionally store 8-bit integer elements from a into memory using mask - // (elements are not stored when the highest bit is not set in the corresponding - // element) and a non-temporal memory hint. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_maskmovq -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_maskmovq - #define _m_maskmovq(a, mask, mem_addr) _mm_maskmove_si64(a, mask, mem_addr) - - // Compare packed signed 16-bit integers in a and b, and store packed maximum - // values in dst. --// --// FOR j := 0 to 3 --// i := j*16 --// dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pi16 - FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b) - { - return vreinterpret_m64_s16( - vmax_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); - } - --// Computes the maximums of the four single-precision, floating-point values of --// a and b. --// https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b, -+// and store packed maximum values in dst. dst does not follow the IEEE Standard -+// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or -+// signed-zero values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps - FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) - { - #if SSE2NEON_PRECISE_MINMAX -@@ -2130,22 +1934,19 @@ FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) - - // Compare packed unsigned 8-bit integers in a and b, and store packed maximum - // values in dst. --// --// FOR j := 0 to 7 --// i := j*8 --// dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pu8 - FORCE_INLINE __m64 _mm_max_pu8(__m64 a, __m64 b) - { - return vreinterpret_m64_u8( - vmax_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); - } - --// Computes the maximum of the two lower scalar single-precision floating point --// values of a and b. --// https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b, store the maximum value in the lower element of dst, and copy the upper 3 -+// packed elements from a to the upper element of dst. dst does not follow the -+// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when -+// inputs are NaN or signed-zero values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ss - FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) - { - float32_t value = vgetq_lane_f32(_mm_max_ps(a, b), 0); -@@ -2155,22 +1956,18 @@ FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) - - // Compare packed signed 16-bit integers in a and b, and store packed minimum - // values in dst. --// --// FOR j := 0 to 3 --// i := j*16 --// dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pi16 - FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b) - { - return vreinterpret_m64_s16( - vmin_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); - } - --// Computes the minima of the four single-precision, floating-point values of a --// and b. --// https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx -+// Compare packed single-precision (32-bit) floating-point elements in a and b, -+// and store packed minimum values in dst. dst does not follow the IEEE Standard -+// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or -+// signed-zero values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps - FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) - { - #if SSE2NEON_PRECISE_MINMAX -@@ -2185,22 +1982,19 @@ FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) - - // Compare packed unsigned 8-bit integers in a and b, and store packed minimum - // values in dst. --// --// FOR j := 0 to 7 --// i := j*8 --// dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pu8 - FORCE_INLINE __m64 _mm_min_pu8(__m64 a, __m64 b) - { - return vreinterpret_m64_u8( - vmin_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); - } - --// Computes the minimum of the two lower scalar single-precision floating point --// values of a and b. --// https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx -+// Compare the lower single-precision (32-bit) floating-point elements in a and -+// b, store the minimum value in the lower element of dst, and copy the upper 3 -+// packed elements from a to the upper element of dst. dst does not follow the -+// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when -+// inputs are NaN or signed-zero values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ss - FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) - { - float32_t value = vgetq_lane_f32(_mm_min_ps(a, b), 0); -@@ -2208,8 +2002,10 @@ FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) - vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); - } - --// Sets the low word to the single-precision, floating-point value of b --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/35hdzazd(v=vs.100) -+// Move the lower single-precision (32-bit) floating-point element from b to the -+// lower element of dst, and copy the upper 3 packed elements from a to the -+// upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_ss - FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b) - { - return vreinterpretq_m128_f32( -@@ -2217,25 +2013,26 @@ FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b) - vreinterpretq_f32_m128(a), 0)); - } - --// Moves the upper two values of B into the lower two values of A. --// --// r3 := a3 --// r2 := a2 --// r1 := b3 --// r0 := b2 --FORCE_INLINE __m128 _mm_movehl_ps(__m128 __A, __m128 __B) --{ -- float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(__A)); -- float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(__B)); -+// Move the upper 2 single-precision (32-bit) floating-point elements from b to -+// the lower 2 elements of dst, and copy the upper 2 elements from a to the -+// upper 2 elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehl_ps -+FORCE_INLINE __m128 _mm_movehl_ps(__m128 a, __m128 b) -+{ -+#if defined(aarch64__) -+ return vreinterpretq_m128_u64( -+ vzip2q_u64(vreinterpretq_u64_m128(b), vreinterpretq_u64_m128(a))); -+#else -+ float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); -+ float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(b32, a32)); -+#endif - } - --// Moves the lower two values of B into the upper two values of A. --// --// r3 := b1 --// r2 := b0 --// r1 := a1 --// r0 := a0 -+// Move the lower 2 single-precision (32-bit) floating-point elements from b to -+// the upper 2 elements of dst, and copy the lower 2 elements from a to the -+// lower 2 elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movelh_ps - FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B) - { - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(__A)); -@@ -2245,7 +2042,7 @@ FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B) - - // Create mask from the most significant bit of each 8-bit element in a, and - // store the result in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pi8 - FORCE_INLINE int _mm_movemask_pi8(__m64 a) - { - uint8x8_t input = vreinterpret_u8_m64(a); -@@ -2264,10 +2061,9 @@ FORCE_INLINE int _mm_movemask_pi8(__m64 a) - #endif - } - --// NEON does not provide this method --// Creates a 4-bit mask from the most significant bits of the four --// single-precision, floating-point values. --// https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx -+// Set each bit of mask dst based on the most significant bit of the -+// corresponding packed single-precision (32-bit) floating-point element in a. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_ps - FORCE_INLINE int _mm_movemask_ps(__m128 a) - { - uint32x4_t input = vreinterpretq_u32_m128(a); -@@ -2288,14 +2084,9 @@ FORCE_INLINE int _mm_movemask_ps(__m128 a) - #endif - } - --// Multiplies the four single-precision, floating-point values of a and b. --// --// r0 := a0 * b0 --// r1 := a1 * b1 --// r2 := a2 * b2 --// r3 := a3 * b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx -+// Multiply packed single-precision (32-bit) floating-point elements in a and b, -+// and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ps - FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_f32( -@@ -2305,11 +2096,7 @@ FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) - // Multiply the lower single-precision (32-bit) floating-point element in a and - // b, store the result in the lower element of dst, and copy the upper 3 packed - // elements from a to the upper elements of dst. --// --// dst[31:0] := a[31:0] * b[31:0] --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_ss - FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_mul_ps(a, b)); -@@ -2318,16 +2105,16 @@ FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b) - // Multiply the packed unsigned 16-bit integers in a and b, producing - // intermediate 32-bit integers, and store the high 16 bits of the intermediate - // integers in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_pu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_pu16 - FORCE_INLINE __m64 _mm_mulhi_pu16(__m64 a, __m64 b) - { - return vreinterpret_m64_u16(vshrn_n_u32( - vmull_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)), 16)); - } - --// Computes the bitwise OR of the four single-precision, floating-point values --// of a and b. --// https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx -+// Compute the bitwise OR of packed single-precision (32-bit) floating-point -+// elements in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_ps - FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_s32( -@@ -2336,65 +2123,53 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) - - // Average packed unsigned 8-bit integers in a and b, and store the results in - // dst. --// --// FOR j := 0 to 7 --// i := j*8 --// dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgb -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pavgb - #define _m_pavgb(a, b) _mm_avg_pu8(a, b) - - // Average packed unsigned 16-bit integers in a and b, and store the results in - // dst. --// --// FOR j := 0 to 3 --// i := j*16 --// dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pavgw - #define _m_pavgw(a, b) _mm_avg_pu16(a, b) - - // Extract a 16-bit integer from a, selected with imm8, and store the result in - // the lower element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pextrw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pextrw - #define _m_pextrw(a, imm) _mm_extract_pi16(a, imm) - - // Copy a to dst, and insert the 16-bit integer i into dst at the location - // specified by imm8. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=m_pinsrw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=m_pinsrw - #define _m_pinsrw(a, i, imm) _mm_insert_pi16(a, i, imm) - - // Compare packed signed 16-bit integers in a and b, and store packed maximum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxsw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmaxsw - #define _m_pmaxsw(a, b) _mm_max_pi16(a, b) - - // Compare packed unsigned 8-bit integers in a and b, and store packed maximum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxub -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmaxub - #define _m_pmaxub(a, b) _mm_max_pu8(a, b) - - // Compare packed signed 16-bit integers in a and b, and store packed minimum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminsw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pminsw - #define _m_pminsw(a, b) _mm_min_pi16(a, b) - - // Compare packed unsigned 8-bit integers in a and b, and store packed minimum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminub -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pminub - #define _m_pminub(a, b) _mm_min_pu8(a, b) - - // Create mask from the most significant bit of each 8-bit element in a, and - // store the result in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmovmskb -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmovmskb - #define _m_pmovmskb(a) _mm_movemask_pi8(a) - - // Multiply the packed unsigned 16-bit integers in a and b, producing - // intermediate 32-bit integers, and store the high 16 bits of the intermediate - // integers in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pmulhuw - #define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b) - - // Fetch the line of data from memory that contains address p to a location in -@@ -2422,26 +2197,22 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i) - // b, then horizontally sum each consecutive 8 differences to produce four - // unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low - // 16 bits of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=m_psadbw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=m_psadbw - #define _m_psadbw(a, b) _mm_sad_pu8(a, b) - - // Shuffle 16-bit integers in a using the control in imm8, and store the results - // in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pshufw -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_m_pshufw - #define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm) - - // Compute the approximate reciprocal of packed single-precision (32-bit) - // floating-point elements in a, and store the results in dst. The maximum - // relative error for this approximation is less than 1.5*2^-12. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ps - FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) - { - float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); --#if SSE2NEON_PRECISE_DIV -- // Additional Netwon-Raphson iteration for accuracy -- recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); --#endif - return vreinterpretq_m128_f32(recip); - } - -@@ -2449,30 +2220,21 @@ FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) - // floating-point element in a, store the result in the lower element of dst, - // and copy the upper 3 packed elements from a to the upper elements of dst. The - // maximum relative error for this approximation is less than 1.5*2^-12. --// --// dst[31:0] := (1.0 / a[31:0]) --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp_ss - FORCE_INLINE __m128 _mm_rcp_ss(__m128 a) - { - return _mm_move_ss(a, _mm_rcp_ps(a)); - } - --// Computes the approximations of the reciprocal square roots of the four --// single-precision floating point values of in. --// The current precision is 1% error. --// https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx -+// Compute the approximate reciprocal square root of packed single-precision -+// (32-bit) floating-point elements in a, and store the results in dst. The -+// maximum relative error for this approximation is less than 1.5*2^-12. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ps - FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) - { - float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in)); --#if SSE2NEON_PRECISE_SQRT -- // Additional Netwon-Raphson iteration for accuracy - out = vmulq_f32( - out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out)); -- out = vmulq_f32( -- out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out)); --#endif - return vreinterpretq_m128_f32(out); - } - -@@ -2480,7 +2242,7 @@ FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) - // (32-bit) floating-point element in a, store the result in the lower element - // of dst, and copy the upper 3 packed elements from a to the upper elements of - // dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt_ss - FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in) - { - return vsetq_lane_f32(vgetq_lane_f32(_mm_rsqrt_ps(in), 0), in, 0); -@@ -2490,7 +2252,7 @@ FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in) - // b, then horizontally sum each consecutive 8 differences to produce four - // unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low - // 16 bits of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_pu8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_pu8 - FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b) - { - uint64x1_t t = vpaddl_u32(vpaddl_u16( -@@ -2502,7 +2264,7 @@ FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b) - // Macro: Set the flush zero bits of the MXCSR control and status register to - // the value in unsigned 32-bit integer a. The flush zero may contain any of the - // following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_FLUSH_ZERO_MODE -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_FLUSH_ZERO_MODE - FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag) - { - // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting, -@@ -2531,16 +2293,18 @@ FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag) - #endif - } - --// Sets the four single-precision, floating-point values to the four inputs. --// https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx -+// Set packed single-precision (32-bit) floating-point elements in dst with the -+// supplied values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps - FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) - { - float ALIGN_STRUCT(16) data[4] = {x, y, z, w}; - return vreinterpretq_m128_f32(vld1q_f32(data)); - } - --// Sets the four single-precision, floating-point values to w. --// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -+// Broadcast single-precision (32-bit) floating-point value a to all elements of -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps1 - FORCE_INLINE __m128 _mm_set_ps1(float _w) - { - return vreinterpretq_m128_f32(vdupq_n_f32(_w)); -@@ -2550,7 +2314,7 @@ FORCE_INLINE __m128 _mm_set_ps1(float _w) - // the value in unsigned 32-bit integer a. The rounding mode may contain any of - // the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, - // _MM_ROUND_TOWARD_ZERO --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_ROUNDING_MODE -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE - FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) - { - union { -@@ -2595,45 +2359,48 @@ FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) - - // Copy single-precision (32-bit) floating-point element a to the lower element - // of dst, and zero the upper 3 elements. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ss - FORCE_INLINE __m128 _mm_set_ss(float a) - { - return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0)); - } - --// Sets the four single-precision, floating-point values to w. --// --// r0 := r1 := r2 := r3 := w --// --// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -+// Broadcast single-precision (32-bit) floating-point value a to all elements of -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_ps - FORCE_INLINE __m128 _mm_set1_ps(float _w) - { - return vreinterpretq_m128_f32(vdupq_n_f32(_w)); - } - -+// Set the MXCSR control and status register with the value in unsigned 32-bit -+// integer a. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr - // FIXME: _mm_setcsr() implementation supports changing the rounding mode only. - FORCE_INLINE void _mm_setcsr(unsigned int a) - { - _MM_SET_ROUNDING_MODE(a); - } - -+// Get the unsigned 32-bit value of the MXCSR control and status register. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr - // FIXME: _mm_getcsr() implementation supports reading the rounding mode only. - FORCE_INLINE unsigned int _mm_getcsr() - { - return _MM_GET_ROUNDING_MODE(); - } - --// Sets the four single-precision, floating-point values to the four inputs in --// reverse order. --// https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx -+// Set packed single-precision (32-bit) floating-point elements in dst with the -+// supplied values in reverse order. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_ps - FORCE_INLINE __m128 _mm_setr_ps(float w, float z, float y, float x) - { - float ALIGN_STRUCT(16) data[4] = {w, z, y, x}; - return vreinterpretq_m128_f32(vld1q_f32(data)); - } - --// Clears the four single-precision, floating-point values. --// https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx -+// Return vector of type __m128 with all elements set to zero. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_ps - FORCE_INLINE __m128 _mm_setzero_ps(void) - { - return vreinterpretq_m128_f32(vdupq_n_f32(0)); -@@ -2641,7 +2408,7 @@ FORCE_INLINE __m128 _mm_setzero_ps(void) - - // Shuffle 16-bit integers in a using the control in imm8, and store the results - // in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi16 - #ifdef _sse2neon_shuffle - #define _mm_shuffle_pi16(a, imm) \ - __extension__({ \ -@@ -2775,19 +2542,17 @@ FORCE_INLINE void _mm_lfence(void) - }) - #endif - --// Computes the approximations of square roots of the four single-precision, --// floating-point values of a. First computes reciprocal square roots and then --// reciprocals of the four values. --// --// r0 := sqrt(a0) --// r1 := sqrt(a1) --// r2 := sqrt(a2) --// r3 := sqrt(a3) --// --// https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx -+// Compute the square root of packed single-precision (32-bit) floating-point -+// elements in a, and store the results in dst. -+// Due to ARMv7-A NEON's lack of a precise square root intrinsic, we implement -+// square root by multiplying input in with its reciprocal square root before -+// using the Newton-Raphson method to approximate the results. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps - FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) - { --#if SSE2NEON_PRECISE_SQRT -+#if defined(__aarch64__) -+ return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in))); -+#else - float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in)); - - // Test for vrsqrteq_f32(0) -> positive infinity case. -@@ -2798,28 +2563,23 @@ FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) - recip = vreinterpretq_f32_u32( - vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip))); - -- // Additional Netwon-Raphson iteration for accuracy - recip = vmulq_f32( - vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)), - recip); -+ // Additional Netwon-Raphson iteration for accuracy - recip = vmulq_f32( - vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)), - recip); - - // sqrt(s) = s * 1/sqrt(s) - return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip)); --#elif defined(__aarch64__) -- return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in))); --#else -- float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in)); -- float32x4_t sq = vrecpeq_f32(recipsq); -- return vreinterpretq_m128_f32(sq); - #endif - } - --// Computes the approximation of the square root of the scalar single-precision --// floating point value of in. --// https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx -+// Compute the square root of the lower single-precision (32-bit) floating-point -+// element in a, store the result in the lower element of dst, and copy the -+// upper 3 packed elements from a to the upper elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ss - FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) - { - float32_t value = -@@ -2828,8 +2588,10 @@ FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) - vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0)); - } - --// Stores four single-precision, floating-point values. --// https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx -+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point -+// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary -+// or a general-protection exception may be generated. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps - FORCE_INLINE void _mm_store_ps(float *p, __m128 a) - { - vst1q_f32(p, vreinterpretq_f32_m128(a)); -@@ -2838,21 +2600,16 @@ FORCE_INLINE void _mm_store_ps(float *p, __m128 a) - // Store the lower single-precision (32-bit) floating-point element from a into - // 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte - // boundary or a general-protection exception may be generated. --// --// MEM[mem_addr+31:mem_addr] := a[31:0] --// MEM[mem_addr+63:mem_addr+32] := a[31:0] --// MEM[mem_addr+95:mem_addr+64] := a[31:0] --// MEM[mem_addr+127:mem_addr+96] := a[31:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps1 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ps1 - FORCE_INLINE void _mm_store_ps1(float *p, __m128 a) - { - float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); - vst1q_f32(p, vdupq_n_f32(a0)); - } - --// Stores the lower single - precision, floating - point value. --// https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx -+// Store the lower single-precision (32-bit) floating-point element from a into -+// memory. mem_addr does not need to be aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_ss - FORCE_INLINE void _mm_store_ss(float *p, __m128 a) - { - vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0); -@@ -2861,34 +2618,20 @@ FORCE_INLINE void _mm_store_ss(float *p, __m128 a) - // Store the lower single-precision (32-bit) floating-point element from a into - // 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte - // boundary or a general-protection exception may be generated. --// --// MEM[mem_addr+31:mem_addr] := a[31:0] --// MEM[mem_addr+63:mem_addr+32] := a[31:0] --// MEM[mem_addr+95:mem_addr+64] := a[31:0] --// MEM[mem_addr+127:mem_addr+96] := a[31:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_ps - #define _mm_store1_ps _mm_store_ps1 - --// Stores the upper two single-precision, floating-point values of a to the --// address p. --// --// *p0 := a2 --// *p1 := a3 --// --// https://msdn.microsoft.com/en-us/library/a7525fs8(v%3dvs.90).aspx -+// Store the upper 2 single-precision (32-bit) floating-point elements from a -+// into memory. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pi - FORCE_INLINE void _mm_storeh_pi(__m64 *p, __m128 a) - { - *p = vreinterpret_m64_f32(vget_high_f32(a)); - } - --// Stores the lower two single-precision floating point values of a to the --// address p. --// --// *p0 := a0 --// *p1 := a1 --// --// https://msdn.microsoft.com/en-us/library/h54t98ks(v=vs.90).aspx -+// Store the lower 2 single-precision (32-bit) floating-point elements from a -+// into memory. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pi - FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a) - { - *p = vreinterpret_m64_f32(vget_low_f32(a)); -@@ -2897,13 +2640,7 @@ FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a) - // Store 4 single-precision (32-bit) floating-point elements from a into memory - // in reverse order. mem_addr must be aligned on a 16-byte boundary or a - // general-protection exception may be generated. --// --// MEM[mem_addr+31:mem_addr] := a[127:96] --// MEM[mem_addr+63:mem_addr+32] := a[95:64] --// MEM[mem_addr+95:mem_addr+64] := a[63:32] --// MEM[mem_addr+127:mem_addr+96] := a[31:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_ps - FORCE_INLINE void _mm_storer_ps(float *p, __m128 a) - { - float32x4_t tmp = vrev64q_f32(vreinterpretq_f32_m128(a)); -@@ -2911,22 +2648,24 @@ FORCE_INLINE void _mm_storer_ps(float *p, __m128 a) - vst1q_f32(p, rev); - } - --// Stores four single-precision, floating-point values. --// https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx -+// Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point -+// elements) from a into memory. mem_addr does not need to be aligned on any -+// particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_ps - FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) - { - vst1q_f32(p, vreinterpretq_f32_m128(a)); - } - - // Stores 16-bits of integer data a at the address p. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16 - FORCE_INLINE void _mm_storeu_si16(void *p, __m128i a) - { - vst1q_lane_s16((int16_t *) p, vreinterpretq_s16_m128i(a), 0); - } - - // Stores 64-bits of integer data a at the address p. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64 - FORCE_INLINE void _mm_storeu_si64(void *p, __m128i a) - { - vst1q_lane_s64((int64_t *) p, vreinterpretq_s64_m128i(a), 0); -@@ -2934,7 +2673,7 @@ FORCE_INLINE void _mm_storeu_si64(void *p, __m128i a) - - // Store 64-bits of integer data from a into memory using a non-temporal memory - // hint. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pi -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pi - FORCE_INLINE void _mm_stream_pi(__m64 *p, __m64 a) - { - vst1_s64((int64_t *) p, vreinterpret_s64_m64(a)); -@@ -2942,7 +2681,7 @@ FORCE_INLINE void _mm_stream_pi(__m64 *p, __m64 a) - - // Store 128-bits (composed of 4 packed single-precision (32-bit) floating- - // point elements) from a into memory using a non-temporal memory hint. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_ps - FORCE_INLINE void _mm_stream_ps(float *p, __m128 a) - { - #if __has_builtin(__builtin_nontemporal_store) -@@ -2952,14 +2691,10 @@ FORCE_INLINE void _mm_stream_ps(float *p, __m128 a) - #endif - } - --// Subtracts the four single-precision, floating-point values of a and b. --// --// r0 := a0 - b0 --// r1 := a1 - b1 --// r2 := a2 - b2 --// r3 := a3 - b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx -+// Subtract packed single-precision (32-bit) floating-point elements in b from -+// packed single-precision (32-bit) floating-point elements in a, and store the -+// results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ps - FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_f32( -@@ -2970,11 +2705,7 @@ FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) - // the lower single-precision (32-bit) floating-point element in a, store the - // result in the lower element of dst, and copy the upper 3 packed elements from - // a to the upper elements of dst. --// --// dst[31:0] := a[31:0] - b[31:0] --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_ss - FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_sub_ps(a, b)); -@@ -2983,7 +2714,7 @@ FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b) - // Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision - // (32-bit) floating-point elements in row0, row1, row2, and row3, and store the - // transposed matrix in these vectors (row0 now contains column 0, etc.). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=MM_TRANSPOSE4_PS -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=MM_TRANSPOSE4_PS - #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \ -@@ -3008,7 +2739,7 @@ FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b) - #define _mm_ucomineq_ss _mm_comineq_ss - - // Return vector of type __m128i with undefined elements. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_undefined_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_undefined_si128 - FORCE_INLINE __m128i _mm_undefined_si128(void) - { - #if defined(__GNUC__) || defined(__clang__) -@@ -3023,7 +2754,7 @@ FORCE_INLINE __m128i _mm_undefined_si128(void) - } - - // Return vector of type __m128 with undefined elements. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_ps - FORCE_INLINE __m128 _mm_undefined_ps(void) - { - #if defined(__GNUC__) || defined(__clang__) -@@ -3037,15 +2768,9 @@ FORCE_INLINE __m128 _mm_undefined_ps(void) - #endif - } - --// Selects and interleaves the upper two single-precision, floating-point values --// from a and b. --// --// r0 := a2 --// r1 := b2 --// r2 := a3 --// r3 := b3 --// --// https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx -+// Unpack and interleave single-precision (32-bit) floating-point elements from -+// the high half a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_ps - FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) - { - #if defined(__aarch64__) -@@ -3059,15 +2784,9 @@ FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) - #endif - } - --// Selects and interleaves the lower two single-precision, floating-point values --// from a and b. --// --// r0 := a0 --// r1 := b0 --// r2 := a1 --// r3 := b1 --// --// https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx -+// Unpack and interleave single-precision (32-bit) floating-point elements from -+// the low half of a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_ps - FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) - { - #if defined(__aarch64__) -@@ -3081,9 +2800,9 @@ FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) - #endif - } - --// Computes bitwise EXOR (exclusive-or) of the four single-precision, --// floating-point values of a and b. --// https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx -+// Compute the bitwise XOR of packed single-precision (32-bit) floating-point -+// elements in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_ps - FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) - { - return vreinterpretq_m128_s32( -@@ -3092,42 +2811,32 @@ FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) - - /* SSE2 */ - --// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or --// unsigned 16-bit integers in b. --// https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx -+// Add packed 16-bit integers in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16 - FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( - vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - --// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or --// unsigned 32-bit integers in b. --// --// r0 := a0 + b0 --// r1 := a1 + b1 --// r2 := a2 + b2 --// r3 := a3 + b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx -+// Add packed 32-bit integers in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32 - FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( - vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Adds the 4 signed or unsigned 64-bit integers in a to the 4 signed or --// unsigned 32-bit integers in b. --// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx -+// Add packed 64-bit integers in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64 - FORCE_INLINE __m128i _mm_add_epi64(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s64( - vaddq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); - } - --// Adds the 16 signed or unsigned 8-bit integers in a to the 16 signed or --// unsigned 8-bit integers in b. --// https://technet.microsoft.com/en-us/subscriptions/yc7tcyzs(v=vs.90) -+// Add packed 8-bit integers in a and b, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8 - FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( -@@ -3136,7 +2845,7 @@ FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b) - - // Add packed double-precision (64-bit) floating-point elements in a and b, and - // store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd - FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3155,11 +2864,7 @@ FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b) - // Add the lower double-precision (64-bit) floating-point element in a and b, - // store the result in the lower element of dst, and copy the upper element from - // a to the upper element of dst. --// --// dst[63:0] := a[63:0] + b[63:0] --// dst[127:64] := a[127:64] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd - FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3175,25 +2880,16 @@ FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b) - } - - // Add 64-bit integers a and b, and store the result in dst. --// --// dst[63:0] := a[63:0] + b[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_si64 - FORCE_INLINE __m64 _mm_add_si64(__m64 a, __m64 b) - { - return vreinterpret_m64_s64( - vadd_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b))); - } - --// Adds the 8 signed 16-bit integers in a to the 8 signed 16-bit integers in b --// and saturates. --// --// r0 := SignedSaturate(a0 + b0) --// r1 := SignedSaturate(a1 + b1) --// ... --// r7 := SignedSaturate(a7 + b7) --// --// https://msdn.microsoft.com/en-us/library/1a306ef8(v=vs.100).aspx -+// Add packed signed 16-bit integers in a and b using saturation, and store the -+// results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16 - FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( -@@ -3202,13 +2898,7 @@ FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b) - - // Add packed signed 8-bit integers in a and b using saturation, and store the - // results in dst. --// --// FOR j := 0 to 15 --// i := j*8 --// dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8 - FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( -@@ -3217,16 +2907,16 @@ FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b) - - // Add packed unsigned 16-bit integers in a and b using saturation, and store - // the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16 - FORCE_INLINE __m128i _mm_adds_epu16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( - vqaddq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); - } - --// Adds the 16 unsigned 8-bit integers in a to the 16 unsigned 8-bit integers in --// b and saturates.. --// https://msdn.microsoft.com/en-us/library/9hahyddy(v=vs.100).aspx -+// Add packed unsigned 8-bit integers in a and b using saturation, and store the -+// results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8 - FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -3235,25 +2925,16 @@ FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b) - - // Compute the bitwise AND of packed double-precision (64-bit) floating-point - // elements in a and b, and store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// dst[i+63:i] := a[i+63:i] AND b[i+63:i] --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd - FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b) - { - return vreinterpretq_m128d_s64( - vandq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); - } - --// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in --// b. --// --// r := a & b --// --// https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx -+// Compute the bitwise AND of 128 bits (representing integer data) in a and b, -+// and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128 - FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( -@@ -3262,13 +2943,7 @@ FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) - - // Compute the bitwise NOT of packed double-precision (64-bit) floating-point - // elements in a and then AND with b, and store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd - FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b) - { - // *NOTE* argument swap -@@ -3276,12 +2951,9 @@ FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b) - vbicq_s64(vreinterpretq_s64_m128d(b), vreinterpretq_s64_m128d(a))); - } - --// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the --// 128-bit value in a. --// --// r := (~a) & b --// --// https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx -+// Compute the bitwise NOT of 128 bits (representing integer data) in a and then -+// AND with b, and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128 - FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( -@@ -3289,30 +2961,18 @@ FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) - vreinterpretq_s32_m128i(a))); // *NOTE* argument swap - } - --// Computes the average of the 8 unsigned 16-bit integers in a and the 8 --// unsigned 16-bit integers in b and rounds. --// --// r0 := (a0 + b0) / 2 --// r1 := (a1 + b1) / 2 --// ... --// r7 := (a7 + b7) / 2 --// --// https://msdn.microsoft.com/en-us/library/vstudio/y13ca3c8(v=vs.90).aspx -+// Average packed unsigned 16-bit integers in a and b, and store the results in -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16 - FORCE_INLINE __m128i _mm_avg_epu16(__m128i a, __m128i b) - { - return (__m128i) vrhaddq_u16(vreinterpretq_u16_m128i(a), - vreinterpretq_u16_m128i(b)); - } - --// Computes the average of the 16 unsigned 8-bit integers in a and the 16 --// unsigned 8-bit integers in b and rounds. --// --// r0 := (a0 + b0) / 2 --// r1 := (a1 + b1) / 2 --// ... --// r15 := (a15 + b15) / 2 --// --// https://msdn.microsoft.com/en-us/library/vstudio/8zwh554a(v%3dvs.90).aspx -+// Average packed unsigned 8-bit integers in a and b, and store the results in -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8 - FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -3321,17 +2981,17 @@ FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b) - - // Shift a left by imm8 bytes while shifting in zeros, and store the results in - // dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128 - #define _mm_bslli_si128(a, imm) _mm_slli_si128(a, imm) - - // Shift a right by imm8 bytes while shifting in zeros, and store the results in - // dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128 - #define _mm_bsrli_si128(a, imm) _mm_srli_si128(a, imm) - - // Cast vector of type __m128d to type __m128. This intrinsic is only used for - // compilation and does not generate any instructions, thus it has zero latency. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps - FORCE_INLINE __m128 _mm_castpd_ps(__m128d a) - { - return vreinterpretq_m128_s64(vreinterpretq_s64_m128d(a)); -@@ -3339,7 +2999,7 @@ FORCE_INLINE __m128 _mm_castpd_ps(__m128d a) - - // Cast vector of type __m128d to type __m128i. This intrinsic is only used for - // compilation and does not generate any instructions, thus it has zero latency. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128 - FORCE_INLINE __m128i _mm_castpd_si128(__m128d a) - { - return vreinterpretq_m128i_s64(vreinterpretq_s64_m128d(a)); -@@ -3347,15 +3007,15 @@ FORCE_INLINE __m128i _mm_castpd_si128(__m128d a) - - // Cast vector of type __m128 to type __m128d. This intrinsic is only used for - // compilation and does not generate any instructions, thus it has zero latency. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd - FORCE_INLINE __m128d _mm_castps_pd(__m128 a) - { - return vreinterpretq_m128d_s32(vreinterpretq_s32_m128(a)); - } - --// Applies a type cast to reinterpret four 32-bit floating point values passed --// in as a 128-bit parameter as packed 32-bit integers. --// https://msdn.microsoft.com/en-us/library/bb514099.aspx -+// Cast vector of type __m128 to type __m128i. This intrinsic is only used for -+// compilation and does not generate any instructions, thus it has zero latency. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128 - FORCE_INLINE __m128i _mm_castps_si128(__m128 a) - { - return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a)); -@@ -3363,7 +3023,7 @@ FORCE_INLINE __m128i _mm_castps_si128(__m128 a) - - // Cast vector of type __m128i to type __m128d. This intrinsic is only used for - // compilation and does not generate any instructions, thus it has zero latency. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd - FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a) - { - #if defined(__aarch64__) -@@ -3373,9 +3033,9 @@ FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a) - #endif - } - --// Applies a type cast to reinterpret four 32-bit integers passed in as a --// 128-bit parameter as packed 32-bit floating point values. --// https://msdn.microsoft.com/en-us/library/bb514029.aspx -+// Cast vector of type __m128i to type __m128. This intrinsic is only used for -+// compilation and does not generate any instructions, thus it has zero latency. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps - FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) - { - return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a)); -@@ -3406,9 +3066,9 @@ FORCE_INLINE void _mm_clflush(void const *p) - #endif - } - --// Compares the 8 signed or unsigned 16-bit integers in a and the 8 signed or --// unsigned 16-bit integers in b for equality. --// https://msdn.microsoft.com/en-us/library/2ay060te(v=vs.100).aspx -+// Compare packed 16-bit integers in a and b for equality, and store the results -+// in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16 - FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( -@@ -3416,16 +3076,17 @@ FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b) - } - - // Compare packed 32-bit integers in a and b for equality, and store the results --// in dst -+// in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32 - FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u32( - vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Compares the 16 signed or unsigned 8-bit integers in a and the 16 signed or --// unsigned 8-bit integers in b for equality. --// https://msdn.microsoft.com/en-us/library/windows/desktop/bz5xk21a(v=vs.90).aspx -+// Compare packed 8-bit integers in a and b for equality, and store the results -+// in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8 - FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -3434,7 +3095,7 @@ FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for equality, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd - FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3452,7 +3113,7 @@ FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for equality, store the result in the lower element of dst, and copy the - // upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd - FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_cmpeq_pd(a, b)); -@@ -3460,7 +3121,7 @@ FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for greater-than-or-equal, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd - FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3482,7 +3143,7 @@ FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for greater-than-or-equal, store the result in the lower element of dst, - // and copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd - FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3500,39 +3161,27 @@ FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b) - #endif - } - --// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers --// in b for greater than. --// --// r0 := (a0 > b0) ? 0xffff : 0x0 --// r1 := (a1 > b1) ? 0xffff : 0x0 --// ... --// r7 := (a7 > b7) ? 0xffff : 0x0 --// --// https://technet.microsoft.com/en-us/library/xd43yfsa(v=vs.100).aspx -+// Compare packed signed 16-bit integers in a and b for greater-than, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16 - FORCE_INLINE __m128i _mm_cmpgt_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( - vcgtq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - --// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers --// in b for greater than. --// https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx -+// Compare packed signed 32-bit integers in a and b for greater-than, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32 - FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u32( - vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers --// in b for greater than. --// --// r0 := (a0 > b0) ? 0xff : 0x0 --// r1 := (a1 > b1) ? 0xff : 0x0 --// ... --// r15 := (a15 > b15) ? 0xff : 0x0 --// --// https://msdn.microsoft.com/zh-tw/library/wf45zt2b(v=vs.100).aspx -+// Compare packed signed 8-bit integers in a and b for greater-than, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8 - FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -3541,7 +3190,7 @@ FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for greater-than, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd - FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3563,7 +3212,7 @@ FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for greater-than, store the result in the lower element of dst, and copy - // the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd - FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3583,7 +3232,7 @@ FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for less-than-or-equal, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd - FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3605,7 +3254,7 @@ FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for less-than-or-equal, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd - FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3623,34 +3272,30 @@ FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b) - #endif - } - --// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers --// in b for less than. --// --// r0 := (a0 < b0) ? 0xffff : 0x0 --// r1 := (a1 < b1) ? 0xffff : 0x0 --// ... --// r7 := (a7 < b7) ? 0xffff : 0x0 --// --// https://technet.microsoft.com/en-us/library/t863edb2(v=vs.100).aspx -+// Compare packed signed 16-bit integers in a and b for less-than, and store the -+// results in dst. Note: This intrinsic emits the pcmpgtw instruction with the -+// order of the operands switched. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16 - FORCE_INLINE __m128i _mm_cmplt_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( - vcltq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - -- --// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers --// in b for less than. --// https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx -+// Compare packed signed 32-bit integers in a and b for less-than, and store the -+// results in dst. Note: This intrinsic emits the pcmpgtd instruction with the -+// order of the operands switched. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32 - FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u32( - vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers --// in b for lesser than. --// https://msdn.microsoft.com/en-us/library/windows/desktop/9s46csht(v=vs.90).aspx -+// Compare packed signed 8-bit integers in a and b for less-than, and store the -+// results in dst. Note: This intrinsic emits the pcmpgtb instruction with the -+// order of the operands switched. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8 - FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -3659,7 +3304,7 @@ FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for less-than, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd - FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3681,7 +3326,7 @@ FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for less-than, store the result in the lower element of dst, and copy the - // upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd - FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3700,7 +3345,7 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for not-equal, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd - FORCE_INLINE __m128d _mm_cmpneq_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3718,7 +3363,7 @@ FORCE_INLINE __m128d _mm_cmpneq_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for not-equal, store the result in the lower element of dst, and copy the - // upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd - FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_cmpneq_pd(a, b)); -@@ -3726,7 +3371,7 @@ FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for not-greater-than-or-equal, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd - FORCE_INLINE __m128d _mm_cmpnge_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3751,7 +3396,7 @@ FORCE_INLINE __m128d _mm_cmpnge_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for not-greater-than-or-equal, store the result in the lower element of - // dst, and copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd - FORCE_INLINE __m128d _mm_cmpnge_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_cmpnge_pd(a, b)); -@@ -3759,7 +3404,7 @@ FORCE_INLINE __m128d _mm_cmpnge_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for not-greater-than, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpngt_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cmpngt_pd - FORCE_INLINE __m128d _mm_cmpngt_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3784,7 +3429,7 @@ FORCE_INLINE __m128d _mm_cmpngt_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for not-greater-than, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd - FORCE_INLINE __m128d _mm_cmpngt_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_cmpngt_pd(a, b)); -@@ -3792,7 +3437,7 @@ FORCE_INLINE __m128d _mm_cmpngt_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for not-less-than-or-equal, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd - FORCE_INLINE __m128d _mm_cmpnle_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3817,7 +3462,7 @@ FORCE_INLINE __m128d _mm_cmpnle_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for not-less-than-or-equal, store the result in the lower element of dst, - // and copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd - FORCE_INLINE __m128d _mm_cmpnle_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_cmpnle_pd(a, b)); -@@ -3825,7 +3470,7 @@ FORCE_INLINE __m128d _mm_cmpnle_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // for not-less-than, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd - FORCE_INLINE __m128d _mm_cmpnlt_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3850,7 +3495,7 @@ FORCE_INLINE __m128d _mm_cmpnlt_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b for not-less-than, store the result in the lower element of dst, and copy - // the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd - FORCE_INLINE __m128d _mm_cmpnlt_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_cmpnlt_pd(a, b)); -@@ -3858,7 +3503,7 @@ FORCE_INLINE __m128d _mm_cmpnlt_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // to see if neither is NaN, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd - FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3890,7 +3535,7 @@ FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b to see if neither is NaN, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd - FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3912,7 +3557,7 @@ FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b - // to see if either is NaN, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd - FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3945,7 +3590,7 @@ FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b to see if either is NaN, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd - FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3967,7 +3612,7 @@ FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b) - - // Compare the lower double-precision (64-bit) floating-point element in a and b - // for greater-than-or-equal, and return the boolean result (0 or 1). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd - FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3982,7 +3627,7 @@ FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b) - - // Compare the lower double-precision (64-bit) floating-point element in a and b - // for greater-than, and return the boolean result (0 or 1). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd - FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -3997,7 +3642,7 @@ FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b) - - // Compare the lower double-precision (64-bit) floating-point element in a and b - // for less-than-or-equal, and return the boolean result (0 or 1). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd - FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4012,7 +3657,7 @@ FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b) - - // Compare the lower double-precision (64-bit) floating-point element in a and b - // for less-than, and return the boolean result (0 or 1). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd - FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4027,7 +3672,7 @@ FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b) - - // Compare the lower double-precision (64-bit) floating-point element in a and b - // for equality, and return the boolean result (0 or 1). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd - FORCE_INLINE int _mm_comieq_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4048,7 +3693,7 @@ FORCE_INLINE int _mm_comieq_sd(__m128d a, __m128d b) - - // Compare the lower double-precision (64-bit) floating-point element in a and b - // for not-equal, and return the boolean result (0 or 1). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd - FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b) - { - return !_mm_comieq_sd(a, b); -@@ -4056,14 +3701,7 @@ FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b) - - // Convert packed signed 32-bit integers in a to packed double-precision - // (64-bit) floating-point elements, and store the results in dst. --// --// FOR j := 0 to 1 --// i := j*32 --// m := j*64 --// dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd - FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a) - { - #if defined(__aarch64__) -@@ -4076,9 +3714,9 @@ FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a) - #endif - } - --// Converts the four signed 32-bit integer values of a to single-precision, --// floating-point values --// https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx -+// Convert packed signed 32-bit integers in a to packed single-precision -+// (32-bit) floating-point elements, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps - FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) - { - return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a))); -@@ -4086,14 +3724,7 @@ FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) - - // Convert packed double-precision (64-bit) floating-point elements in a to - // packed 32-bit integers, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// k := 64*j --// dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32 - FORCE_INLINE __m128i _mm_cvtpd_epi32(__m128d a) - { - // vrnd32xq_f64 not supported on clang -@@ -4112,14 +3743,7 @@ FORCE_INLINE __m128i _mm_cvtpd_epi32(__m128d a) - - // Convert packed double-precision (64-bit) floating-point elements in a to - // packed 32-bit integers, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// k := 64*j --// dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_pi32 - FORCE_INLINE __m64 _mm_cvtpd_pi32(__m128d a) - { - __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); -@@ -4132,15 +3756,7 @@ FORCE_INLINE __m64 _mm_cvtpd_pi32(__m128d a) - // Convert packed double-precision (64-bit) floating-point elements in a to - // packed single-precision (32-bit) floating-point elements, and store the - // results in dst. --// --// FOR j := 0 to 1 --// i := 32*j --// k := 64*j --// dst[i+31:i] := Convert_FP64_To_FP32(a[k+64:k]) --// ENDFOR --// dst[127:64] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps - FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a) - { - #if defined(__aarch64__) -@@ -4155,14 +3771,7 @@ FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a) - - // Convert packed signed 32-bit integers in a to packed double-precision - // (64-bit) floating-point elements, and store the results in dst. --// --// FOR j := 0 to 1 --// i := j*32 --// m := j*64 --// dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpi32_pd - FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a) - { - #if defined(__aarch64__) -@@ -4175,15 +3784,9 @@ FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a) - #endif - } - --// Converts the four single-precision, floating-point values of a to signed --// 32-bit integer values. --// --// r0 := (int) a0 --// r1 := (int) a1 --// r2 := (int) a2 --// r3 := (int) a3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx -+// Convert packed single-precision (32-bit) floating-point elements in a to -+// packed 32-bit integers, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32 - // *NOTE*. The default rounding mode on SSE is 'round to even', which ARMv7-A - // does not support! It is supported on ARMv8-A however. - FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) -@@ -4240,14 +3843,7 @@ FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) - // Convert packed single-precision (32-bit) floating-point elements in a to - // packed double-precision (64-bit) floating-point elements, and store the - // results in dst. --// --// FOR j := 0 to 1 --// i := 64*j --// k := 32*j --// dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd - FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a) - { - #if defined(__aarch64__) -@@ -4261,10 +3857,7 @@ FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a) - } - - // Copy the lower double-precision (64-bit) floating-point element of a to dst. --// --// dst[63:0] := a[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64 - FORCE_INLINE double _mm_cvtsd_f64(__m128d a) - { - #if defined(__aarch64__) -@@ -4276,10 +3869,7 @@ FORCE_INLINE double _mm_cvtsd_f64(__m128d a) - - // Convert the lower double-precision (64-bit) floating-point element in a to a - // 32-bit integer, and store the result in dst. --// --// dst[31:0] := Convert_FP64_To_Int32(a[63:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32 - FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a) - { - #if defined(__aarch64__) -@@ -4293,10 +3883,7 @@ FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a) - - // Convert the lower double-precision (64-bit) floating-point element in a to a - // 64-bit integer, and store the result in dst. --// --// dst[63:0] := Convert_FP64_To_Int64(a[63:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64 - FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a) - { - #if defined(__aarch64__) -@@ -4310,17 +3897,14 @@ FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a) - - // Convert the lower double-precision (64-bit) floating-point element in a to a - // 64-bit integer, and store the result in dst. --// --// dst[63:0] := Convert_FP64_To_Int64(a[63:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si64x - #define _mm_cvtsd_si64x _mm_cvtsd_si64 - - // Convert the lower double-precision (64-bit) floating-point element in b to a - // single-precision (32-bit) floating-point element, store the result in the - // lower element of dst, and copy the upper 3 packed elements from a to the - // upper elements of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss - FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b) - { - #if defined(__aarch64__) -@@ -4334,33 +3918,27 @@ FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b) - } - - // Copy the lower 32-bit integer in a to dst. --// --// dst[31:0] := a[31:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32 - FORCE_INLINE int _mm_cvtsi128_si32(__m128i a) - { - return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0); - } - - // Copy the lower 64-bit integer in a to dst. --// --// dst[63:0] := a[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64 - FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a) - { - return vgetq_lane_s64(vreinterpretq_s64_m128i(a), 0); - } - - // Copy the lower 64-bit integer in a to dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x - #define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a) - - // Convert the signed 32-bit integer b to a double-precision (64-bit) - // floating-point element, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd - FORCE_INLINE __m128d _mm_cvtsi32_sd(__m128d a, int32_t b) - { - #if defined(__aarch64__) -@@ -4374,21 +3952,12 @@ FORCE_INLINE __m128d _mm_cvtsi32_sd(__m128d a, int32_t b) - } - - // Copy the lower 64-bit integer in a to dst. --// --// dst[63:0] := a[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si64x - #define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a) - --// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, --// zero extending the upper bits. --// --// r0 := a --// r1 := 0x0 --// r2 := 0x0 --// r3 := 0x0 --// --// https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx -+// Copy 32-bit integer a to the lower elements of dst, and zero the upper -+// elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128 - FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) - { - return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0)); -@@ -4397,7 +3966,7 @@ FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) - // Convert the signed 64-bit integer b to a double-precision (64-bit) - // floating-point element, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_sd - FORCE_INLINE __m128d _mm_cvtsi64_sd(__m128d a, int64_t b) - { - #if defined(__aarch64__) -@@ -4410,11 +3979,9 @@ FORCE_INLINE __m128d _mm_cvtsi64_sd(__m128d a, int64_t b) - #endif - } - --// Moves 64-bit integer a to the least significant 64 bits of an __m128 object, --// zero extending the upper bits. --// --// r0 := a --// r1 := 0x0 -+// Copy 64-bit integer a to the lower element of dst, and zero the upper -+// element. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64_si128 - FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a) - { - return vreinterpretq_m128i_s64(vsetq_lane_s64(a, vdupq_n_s64(0), 0)); -@@ -4422,24 +3989,20 @@ FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a) - - // Copy 64-bit integer a to the lower element of dst, and zero the upper - // element. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_si128 - #define _mm_cvtsi64x_si128(a) _mm_cvtsi64_si128(a) - - // Convert the signed 64-bit integer b to a double-precision (64-bit) - // floating-point element, store the result in the lower element of dst, and - // copy the upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi64x_sd - #define _mm_cvtsi64x_sd(a, b) _mm_cvtsi64_sd(a, b) - - // Convert the lower single-precision (32-bit) floating-point element in b to a - // double-precision (64-bit) floating-point element, store the result in the - // lower element of dst, and copy the upper element from a to the upper element - // of dst. --// --// dst[63:0] := Convert_FP32_To_FP64(b[31:0]) --// dst[127:64] := a[127:64] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd - FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b) - { - double d = (double) vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); -@@ -4454,7 +4017,7 @@ FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b) - - // Convert packed double-precision (64-bit) floating-point elements in a to - // packed 32-bit integers with truncation, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32 - FORCE_INLINE __m128i _mm_cvttpd_epi32(__m128d a) - { - double a0 = ((double *) &a)[0]; -@@ -4464,7 +4027,7 @@ FORCE_INLINE __m128i _mm_cvttpd_epi32(__m128d a) - - // Convert packed double-precision (64-bit) floating-point elements in a to - // packed 32-bit integers with truncation, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_pi32 - FORCE_INLINE __m64 _mm_cvttpd_pi32(__m128d a) - { - double a0 = ((double *) &a)[0]; -@@ -4473,9 +4036,9 @@ FORCE_INLINE __m64 _mm_cvttpd_pi32(__m128d a) - return vreinterpret_m64_s32(vld1_s32(data)); - } - --// Converts the four single-precision, floating-point values of a to signed --// 32-bit integer values using truncate. --// https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx -+// Convert packed single-precision (32-bit) floating-point elements in a to -+// packed 32-bit integers with truncation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32 - FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) - { - return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))); -@@ -4483,10 +4046,7 @@ FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) - - // Convert the lower double-precision (64-bit) floating-point element in a to a - // 32-bit integer with truncation, and store the result in dst. --// --// dst[63:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32 - FORCE_INLINE int32_t _mm_cvttsd_si32(__m128d a) - { - double ret = *((double *) &a); -@@ -4495,10 +4055,7 @@ FORCE_INLINE int32_t _mm_cvttsd_si32(__m128d a) - - // Convert the lower double-precision (64-bit) floating-point element in a to a - // 64-bit integer with truncation, and store the result in dst. --// --// dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64 - FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a) - { - #if defined(__aarch64__) -@@ -4511,21 +4068,12 @@ FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a) - - // Convert the lower double-precision (64-bit) floating-point element in a to a - // 64-bit integer with truncation, and store the result in dst. --// --// dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si64x - #define _mm_cvttsd_si64x(a) _mm_cvttsd_si64(a) - - // Divide packed double-precision (64-bit) floating-point elements in a by - // packed elements in b, and store the results in dst. --// --// FOR j := 0 to 1 --// i := 64*j --// dst[i+63:i] := a[i+63:i] / b[i+63:i] --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd - FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4545,7 +4093,7 @@ FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b) - // lower double-precision (64-bit) floating-point element in b, store the result - // in the lower element of dst, and copy the upper element from a to the upper - // element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd - FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4558,16 +4106,16 @@ FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b) - #endif - } - --// Extracts the selected signed or unsigned 16-bit integer from a and zero --// extends. --// https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx -+// Extract a 16-bit integer from a, selected with imm8, and store the result in -+// the lower element of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16 - // FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm) - #define _mm_extract_epi16(a, imm) \ - vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm)) - --// Inserts the least significant 16 bits of b into the selected 16-bit integer --// of a. --// https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx -+// Copy a to dst, and insert the 16-bit integer i into dst at the location -+// specified by imm8. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16 - // FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, int b, - // __constrange(0,8) int imm) - #define _mm_insert_epi16(a, b, imm) \ -@@ -4576,12 +4124,10 @@ FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b) - vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \ - }) - --// Loads two double-precision from 16-byte aligned memory, floating-point --// values. --// --// dst[127:0] := MEM[mem_addr+127:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd -+// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point -+// elements) from memory into dst. mem_addr must be aligned on a 16-byte -+// boundary or a general-protection exception may be generated. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd - FORCE_INLINE __m128d _mm_load_pd(const double *p) - { - #if defined(__aarch64__) -@@ -4595,21 +4141,13 @@ FORCE_INLINE __m128d _mm_load_pd(const double *p) - - // Load a double-precision (64-bit) floating-point element from memory into both - // elements of dst. --// --// dst[63:0] := MEM[mem_addr+63:mem_addr] --// dst[127:64] := MEM[mem_addr+63:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1 - #define _mm_load_pd1 _mm_load1_pd - - // Load a double-precision (64-bit) floating-point element from memory into the - // lower of dst, and zero the upper element. mem_addr does not need to be - // aligned on any particular boundary. --// --// dst[63:0] := MEM[mem_addr+63:mem_addr] --// dst[127:64] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd - FORCE_INLINE __m128d _mm_load_sd(const double *p) - { - #if defined(__aarch64__) -@@ -4621,8 +4159,9 @@ FORCE_INLINE __m128d _mm_load_sd(const double *p) - #endif - } - --// Loads 128-bit value. : --// https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx -+// Load 128-bits of integer data from memory into dst. mem_addr must be aligned -+// on a 16-byte boundary or a general-protection exception may be generated. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128 - FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) - { - return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); -@@ -4630,11 +4169,7 @@ FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) - - // Load a double-precision (64-bit) floating-point element from memory into both - // elements of dst. --// --// dst[63:0] := MEM[mem_addr+63:mem_addr] --// dst[127:64] := MEM[mem_addr+63:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd - FORCE_INLINE __m128d _mm_load1_pd(const double *p) - { - #if defined(__aarch64__) -@@ -4647,11 +4182,7 @@ FORCE_INLINE __m128d _mm_load1_pd(const double *p) - // Load a double-precision (64-bit) floating-point element from memory into the - // upper element of dst, and copy the lower element from a to dst. mem_addr does - // not need to be aligned on any particular boundary. --// --// dst[63:0] := a[63:0] --// dst[127:64] := MEM[mem_addr+63:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd - FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p) - { - #if defined(__aarch64__) -@@ -4664,7 +4195,7 @@ FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p) - } - - // Load 64-bit integer from memory into the first element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64 - FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p) - { - /* Load the lower 64 bits of the value pointed to by p into the -@@ -4677,11 +4208,7 @@ FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p) - // Load a double-precision (64-bit) floating-point element from memory into the - // lower element of dst, and copy the upper element from a to dst. mem_addr does - // not need to be aligned on any particular boundary. --// --// dst[63:0] := MEM[mem_addr+63:mem_addr] --// dst[127:64] := a[127:64] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd - FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p) - { - #if defined(__aarch64__) -@@ -4697,11 +4224,7 @@ FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p) - // Load 2 double-precision (64-bit) floating-point elements from memory into dst - // in reverse order. mem_addr must be aligned on a 16-byte boundary or a - // general-protection exception may be generated. --// --// dst[63:0] := MEM[mem_addr+127:mem_addr+64] --// dst[127:64] := MEM[mem_addr+63:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd - FORCE_INLINE __m128d _mm_loadr_pd(const double *p) - { - #if defined(__aarch64__) -@@ -4714,39 +4237,32 @@ FORCE_INLINE __m128d _mm_loadr_pd(const double *p) - } - - // Loads two double-precision from unaligned memory, floating-point values. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd - FORCE_INLINE __m128d _mm_loadu_pd(const double *p) - { - return _mm_load_pd(p); - } - --// Loads 128-bit value. : --// https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx -+// Load 128-bits of integer data from memory into dst. mem_addr does not need to -+// be aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128 - FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p) - { - return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); - } - - // Load unaligned 32-bit integer from memory into the first element of dst. --// --// dst[31:0] := MEM[mem_addr+31:mem_addr] --// dst[MAX:32] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32 - FORCE_INLINE __m128i _mm_loadu_si32(const void *p) - { - return vreinterpretq_m128i_s32( - vsetq_lane_s32(*(const int32_t *) p, vdupq_n_s32(0), 0)); - } - --// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit --// integers from b. --// --// r0 := (a0 * b0) + (a1 * b1) --// r1 := (a2 * b2) + (a3 * b3) --// r2 := (a4 * b4) + (a5 * b5) --// r3 := (a6 * b6) + (a7 * b7) --// https://msdn.microsoft.com/en-us/library/yht36sa6(v=vs.90).aspx -+// Multiply packed signed 16-bit integers in a and b, producing intermediate -+// signed 32-bit integers. Horizontally add adjacent pairs of intermediate -+// 32-bit integers, and pack the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16 - FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b) - { - int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)), -@@ -4771,7 +4287,7 @@ FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b) - // (elements are not stored when the highest bit is not set in the corresponding - // element) and a non-temporal memory hint. mem_addr does not need to be aligned - // on any particular boundary. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128 - FORCE_INLINE void _mm_maskmoveu_si128(__m128i a, __m128i mask, char *mem_addr) - { - int8x16_t shr_mask = vshrq_n_s8(vreinterpretq_s8_m128i(mask), 7); -@@ -4782,18 +4298,18 @@ FORCE_INLINE void _mm_maskmoveu_si128(__m128i a, __m128i mask, char *mem_addr) - vst1q_s8((int8_t *) mem_addr, masked); - } - --// Computes the pairwise maxima of the 8 signed 16-bit integers from a and the 8 --// signed 16-bit integers from b. --// https://msdn.microsoft.com/en-us/LIBRary/3x060h7c(v=vs.100).aspx -+// Compare packed signed 16-bit integers in a and b, and store packed maximum -+// values in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16 - FORCE_INLINE __m128i _mm_max_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( - vmaxq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - --// Computes the pairwise maxima of the 16 unsigned 8-bit integers from a and the --// 16 unsigned 8-bit integers from b. --// https://msdn.microsoft.com/en-us/library/st6634za(v=vs.100).aspx -+// Compare packed unsigned 8-bit integers in a and b, and store packed maximum -+// values in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8 - FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -4802,7 +4318,7 @@ FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b, - // and store packed maximum values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd - FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4830,7 +4346,7 @@ FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b, store the maximum value in the lower element of dst, and copy the upper - // element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd - FORCE_INLINE __m128d _mm_max_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4843,18 +4359,18 @@ FORCE_INLINE __m128d _mm_max_sd(__m128d a, __m128d b) - #endif - } - --// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 --// signed 16-bit integers from b. --// https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx -+// Compare packed signed 16-bit integers in a and b, and store packed minimum -+// values in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16 - FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( - vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - --// Computes the pairwise minima of the 16 unsigned 8-bit integers from a and the --// 16 unsigned 8-bit integers from b. --// https://msdn.microsoft.com/ko-kr/library/17k8cf58(v=vs.100).aspxx -+// Compare packed unsigned 8-bit integers in a and b, and store packed minimum -+// values in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8 - FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -4863,7 +4379,7 @@ FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b) - - // Compare packed double-precision (64-bit) floating-point elements in a and b, - // and store packed minimum values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd - FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4890,7 +4406,7 @@ FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b) - // Compare the lower double-precision (64-bit) floating-point elements in a and - // b, store the minimum value in the lower element of dst, and copy the upper - // element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd - FORCE_INLINE __m128d _mm_min_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -4905,11 +4421,7 @@ FORCE_INLINE __m128d _mm_min_sd(__m128d a, __m128d b) - - // Copy the lower 64-bit integer in a to the lower element of dst, and zero the - // upper element. --// --// dst[63:0] := a[63:0] --// dst[127:64] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64 - FORCE_INLINE __m128i _mm_move_epi64(__m128i a) - { - return vreinterpretq_m128i_s64( -@@ -4919,11 +4431,7 @@ FORCE_INLINE __m128i _mm_move_epi64(__m128i a) - // Move the lower double-precision (64-bit) floating-point element from b to the - // lower element of dst, and copy the upper element from a to the upper element - // of dst. --// --// dst[63:0] := b[63:0] --// dst[127:64] := a[127:64] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd - FORCE_INLINE __m128d _mm_move_sd(__m128d a, __m128d b) - { - return vreinterpretq_m128d_f32( -@@ -4931,10 +4439,9 @@ FORCE_INLINE __m128d _mm_move_sd(__m128d a, __m128d b) - vget_high_f32(vreinterpretq_f32_m128d(a)))); - } - --// NEON does not provide a version of this function. --// Creates a 16-bit mask from the most significant bits of the 16 signed or --// unsigned 8-bit integers in a and zero extends the upper bits. --// https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx -+// Create mask from the most significant bit of each 8-bit element in a, and -+// store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8 - FORCE_INLINE int _mm_movemask_epi8(__m128i a) - { - // Use increasingly wide shifts+adds to collect the sign bits -@@ -5017,7 +4524,7 @@ FORCE_INLINE int _mm_movemask_epi8(__m128i a) - - // Set each bit of mask dst based on the most significant bit of the - // corresponding packed double-precision (64-bit) floating-point element in a. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd - FORCE_INLINE int _mm_movemask_pd(__m128d a) - { - uint64x2_t input = vreinterpretq_u64_m128d(a); -@@ -5026,10 +4533,7 @@ FORCE_INLINE int _mm_movemask_pd(__m128d a) - } - - // Copy the lower 64-bit integer in a to dst. --// --// dst[63:0] := a[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi64_pi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_pi64 - FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a) - { - return vreinterpret_m64_s64(vget_low_s64(vreinterpretq_s64_m128i(a))); -@@ -5037,11 +4541,7 @@ FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a) - - // Copy the 64-bit integer a to the lower element of dst, and zero the upper - // element. --// --// dst[63:0] := a[63:0] --// dst[127:64] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movpi64_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movpi64_epi64 - FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a) - { - return vreinterpretq_m128i_s64( -@@ -5050,9 +4550,7 @@ FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a) - - // Multiply the low unsigned 32-bit integers from each packed 64-bit element in - // a and b, and store the unsigned 64-bit results in dst. --// --// r0 := (a0 & 0xFFFFFFFF) * (b0 & 0xFFFFFFFF) --// r1 := (a2 & 0xFFFFFFFF) * (b2 & 0xFFFFFFFF) -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32 - FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b) - { - // vmull_u32 upcasts instead of masking, so we downcast. -@@ -5063,7 +4561,7 @@ FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b) - - // Multiply packed double-precision (64-bit) floating-point elements in a and b, - // and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd - FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -5082,7 +4580,7 @@ FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b) - // Multiply the lower double-precision (64-bit) floating-point element in a and - // b, store the result in the lower element of dst, and copy the upper element - // from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_sd - FORCE_INLINE __m128d _mm_mul_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_mul_pd(a, b)); -@@ -5090,25 +4588,17 @@ FORCE_INLINE __m128d _mm_mul_sd(__m128d a, __m128d b) - - // Multiply the low unsigned 32-bit integers from a and b, and store the - // unsigned 64-bit result in dst. --// --// dst[63:0] := a[31:0] * b[31:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_su32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_su32 - FORCE_INLINE __m64 _mm_mul_su32(__m64 a, __m64 b) - { - return vreinterpret_m64_u64(vget_low_u64( - vmull_u32(vreinterpret_u32_m64(a), vreinterpret_u32_m64(b)))); - } - --// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit --// integers from b. --// --// r0 := (a0 * b0)[31:16] --// r1 := (a1 * b1)[31:16] --// ... --// r7 := (a7 * b7)[31:16] --// --// https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx -+// Multiply the packed signed 16-bit integers in a and b, producing intermediate -+// 32-bit integers, and store the high 16 bits of the intermediate integers in -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16 - FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) - { - /* FIXME: issue with large values because of result saturation */ -@@ -5129,7 +4619,7 @@ FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) - // Multiply the packed unsigned 16-bit integers in a and b, producing - // intermediate 32-bit integers, and store the high 16 bits of the intermediate - // integers in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16 - FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b) - { - uint16x4_t a3210 = vget_low_u16(vreinterpretq_u16_m128i(a)); -@@ -5151,15 +4641,9 @@ FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b) - #endif - } - --// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or --// unsigned 16-bit integers from b. --// --// r0 := (a0 * b0)[15:0] --// r1 := (a1 * b1)[15:0] --// ... --// r7 := (a7 * b7)[15:0] --// --// https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx -+// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit -+// integers, and store the low 16 bits of the intermediate integers in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16 - FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( -@@ -5168,27 +4652,25 @@ FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) - - // Compute the bitwise OR of packed double-precision (64-bit) floating-point - // elements in a and b, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_or_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_or_pd - FORCE_INLINE __m128d _mm_or_pd(__m128d a, __m128d b) - { - return vreinterpretq_m128d_s64( - vorrq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); - } - --// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. --// --// r := a | b --// --// https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx -+// Compute the bitwise OR of 128 bits (representing integer data) in a and b, -+// and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128 - FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( - vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and --// saturates. --// https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx -+// Convert packed signed 16-bit integers from a and b to packed 8-bit integers -+// using signed saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16 - FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( -@@ -5196,19 +4678,9 @@ FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) - vqmovn_s16(vreinterpretq_s16_m128i(b)))); - } - --// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers --// and saturates. --// --// r0 := SignedSaturate(a0) --// r1 := SignedSaturate(a1) --// r2 := SignedSaturate(a2) --// r3 := SignedSaturate(a3) --// r4 := SignedSaturate(b0) --// r5 := SignedSaturate(b1) --// r6 := SignedSaturate(b2) --// r7 := SignedSaturate(b3) --// --// https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx -+// Convert packed signed 32-bit integers from a and b to packed 16-bit integers -+// using signed saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32 - FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( -@@ -5216,19 +4688,9 @@ FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) - vqmovn_s32(vreinterpretq_s32_m128i(b)))); - } - --// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned --// integers and saturates. --// --// r0 := UnsignedSaturate(a0) --// r1 := UnsignedSaturate(a1) --// ... --// r7 := UnsignedSaturate(a7) --// r8 := UnsignedSaturate(b0) --// r9 := UnsignedSaturate(b1) --// ... --// r15 := UnsignedSaturate(b7) --// --// https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx -+// Convert packed signed 16-bit integers from a and b to packed 8-bit integers -+// using unsigned saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16 - FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -5241,6 +4703,7 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) - // 'yield' instruction isn't a good fit because it's effectively a nop on most - // Arm cores. Experience with several databases has shown has shown an 'isb' is - // a reasonable approximation. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause - FORCE_INLINE void _mm_pause() - { - __asm__ __volatile__("isb\n"); -@@ -5250,15 +4713,15 @@ FORCE_INLINE void _mm_pause() - // b, then horizontally sum each consecutive 8 differences to produce two - // unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low - // 16 bits of 64-bit elements in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8 - FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b) - { - uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t) a, (uint8x16_t) b)); - return vreinterpretq_m128i_u64(vpaddlq_u32(vpaddlq_u16(t))); - } - --// Sets the 8 signed 16-bit integer values. --// https://msdn.microsoft.com/en-au/library/3e0fek84(v=vs.90).aspx -+// Set packed 16-bit integers in dst with the supplied values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16 - FORCE_INLINE __m128i _mm_set_epi16(short i7, - short i6, - short i5, -@@ -5272,33 +4735,31 @@ FORCE_INLINE __m128i _mm_set_epi16(short i7, - return vreinterpretq_m128i_s16(vld1q_s16(data)); - } - --// Sets the 4 signed 32-bit integer values. --// https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx -+// Set packed 32-bit integers in dst with the supplied values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32 - FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) - { - int32_t ALIGN_STRUCT(16) data[4] = {i0, i1, i2, i3}; - return vreinterpretq_m128i_s32(vld1q_s32(data)); - } - --// Returns the __m128i structure with its two 64-bit integer values --// initialized to the values of the two 64-bit integers passed in. --// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx -+// Set packed 64-bit integers in dst with the supplied values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64 - FORCE_INLINE __m128i _mm_set_epi64(__m64 i1, __m64 i2) - { - return _mm_set_epi64x((int64_t) i1, (int64_t) i2); - } - --// Returns the __m128i structure with its two 64-bit integer values --// initialized to the values of the two 64-bit integers passed in. --// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx -+// Set packed 64-bit integers in dst with the supplied values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x - FORCE_INLINE __m128i _mm_set_epi64x(int64_t i1, int64_t i2) - { - return vreinterpretq_m128i_s64( - vcombine_s64(vcreate_s64(i2), vcreate_s64(i1))); - } - --// Sets the 16 signed 8-bit integer values. --// https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx -+// Set packed 8-bit integers in dst with the supplied values. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8 - FORCE_INLINE __m128i _mm_set_epi8(signed char b15, - signed char b14, - signed char b13, -@@ -5326,7 +4787,7 @@ FORCE_INLINE __m128i _mm_set_epi8(signed char b15, - - // Set packed double-precision (64-bit) floating-point elements in dst with the - // supplied values. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd - FORCE_INLINE __m128d _mm_set_pd(double e1, double e0) - { - double ALIGN_STRUCT(16) data[2] = {e0, e1}; -@@ -5339,12 +4800,12 @@ FORCE_INLINE __m128d _mm_set_pd(double e1, double e0) - - // Broadcast double-precision (64-bit) floating-point value a to all elements of - // dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1 - #define _mm_set_pd1 _mm_set1_pd - - // Copy double-precision (64-bit) floating-point element a to the lower element - // of dst, and zero the upper element. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd - FORCE_INLINE __m128d _mm_set_sd(double a) - { - #if defined(__aarch64__) -@@ -5354,54 +4815,36 @@ FORCE_INLINE __m128d _mm_set_sd(double a) - #endif - } - --// Sets the 8 signed 16-bit integer values to w. --// --// r0 := w --// r1 := w --// ... --// r7 := w --// --// https://msdn.microsoft.com/en-us/library/k0ya3x0e(v=vs.90).aspx -+// Broadcast 16-bit integer a to all all elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16 - FORCE_INLINE __m128i _mm_set1_epi16(short w) - { - return vreinterpretq_m128i_s16(vdupq_n_s16(w)); - } - --// Sets the 4 signed 32-bit integer values to i. --// --// r0 := i --// r1 := i --// r2 := i --// r3 := I --// --// https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx -+// Broadcast 32-bit integer a to all elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32 - FORCE_INLINE __m128i _mm_set1_epi32(int _i) - { - return vreinterpretq_m128i_s32(vdupq_n_s32(_i)); - } - --// Sets the 2 signed 64-bit integer values to i. --// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/whtfzhzk(v=vs.100) -+// Broadcast 64-bit integer a to all elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64 - FORCE_INLINE __m128i _mm_set1_epi64(__m64 _i) - { - return vreinterpretq_m128i_s64(vdupq_n_s64((int64_t) _i)); - } - --// Sets the 2 signed 64-bit integer values to i. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x -+// Broadcast 64-bit integer a to all elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x - FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i) - { - return vreinterpretq_m128i_s64(vdupq_n_s64(_i)); - } - --// Sets the 16 signed 8-bit integer values to b. --// --// r0 := b --// r1 := b --// ... --// r15 := b --// --// https://msdn.microsoft.com/en-us/library/6e14xhyf(v=vs.100).aspx -+// Broadcast 8-bit integer a to all elements of dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8 - FORCE_INLINE __m128i _mm_set1_epi8(signed char w) - { - return vreinterpretq_m128i_s8(vdupq_n_s8(w)); -@@ -5409,7 +4852,7 @@ FORCE_INLINE __m128i _mm_set1_epi8(signed char w) - - // Broadcast double-precision (64-bit) floating-point value a to all elements of - // dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd - FORCE_INLINE __m128d _mm_set1_pd(double d) - { - #if defined(__aarch64__) -@@ -5419,13 +4862,8 @@ FORCE_INLINE __m128d _mm_set1_pd(double d) - #endif - } - --// Sets the 8 signed 16-bit integer values in reverse order. --// --// Return Value --// r0 := w0 --// r1 := w1 --// ... --// r7 := w7 -+// Set packed 16-bit integers in dst with the supplied values in reverse order. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16 - FORCE_INLINE __m128i _mm_setr_epi16(short w0, - short w1, - short w2, -@@ -5439,8 +4877,8 @@ FORCE_INLINE __m128i _mm_setr_epi16(short w0, - return vreinterpretq_m128i_s16(vld1q_s16((int16_t *) data)); - } - --// Sets the 4 signed 32-bit integer values in reverse order --// https://technet.microsoft.com/en-us/library/security/27yb3ee5(v=vs.90).aspx -+// Set packed 32-bit integers in dst with the supplied values in reverse order. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32 - FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0) - { - int32_t ALIGN_STRUCT(16) data[4] = {i3, i2, i1, i0}; -@@ -5448,14 +4886,14 @@ FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0) - } - - // Set packed 64-bit integers in dst with the supplied values in reverse order. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi64 - FORCE_INLINE __m128i _mm_setr_epi64(__m64 e1, __m64 e0) - { - return vreinterpretq_m128i_s64(vcombine_s64(e1, e0)); - } - --// Sets the 16 signed 8-bit integer values in reverse order. --// https://msdn.microsoft.com/en-us/library/2khb9c7k(v=vs.90).aspx -+// Set packed 8-bit integers in dst with the supplied values in reverse order. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8 - FORCE_INLINE __m128i _mm_setr_epi8(signed char b0, - signed char b1, - signed char b2, -@@ -5483,14 +4921,14 @@ FORCE_INLINE __m128i _mm_setr_epi8(signed char b0, - - // Set packed double-precision (64-bit) floating-point elements in dst with the - // supplied values in reverse order. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd - FORCE_INLINE __m128d _mm_setr_pd(double e1, double e0) - { - return _mm_set_pd(e0, e1); - } - - // Return vector of type __m128d with all elements set to zero. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd - FORCE_INLINE __m128d _mm_setzero_pd(void) - { - #if defined(__aarch64__) -@@ -5500,15 +4938,16 @@ FORCE_INLINE __m128d _mm_setzero_pd(void) - #endif - } - --// Sets the 128-bit value to zero --// https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx -+// Return vector of type __m128i with all elements set to zero. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128 - FORCE_INLINE __m128i _mm_setzero_si128(void) - { - return vreinterpretq_m128i_s32(vdupq_n_s32(0)); - } - --// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. --// https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx -+// Shuffle 32-bit integers in a using the control in imm8, and store the results -+// in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32 - // FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, - // __constrange(0,255) int imm) - #ifdef _sse2neon_shuffle -@@ -5577,11 +5016,7 @@ FORCE_INLINE __m128i _mm_setzero_si128(void) - - // Shuffle double-precision (64-bit) floating-point elements using the control - // in imm8, and store the results in dst. --// --// dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] --// dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd - #ifdef _sse2neon_shuffle - #define _mm_shuffle_pd(a, b, imm8) \ - vreinterpretq_m128d_s64( \ -@@ -5627,17 +5062,7 @@ FORCE_INLINE __m128i _mm_setzero_si128(void) - - // Shift packed 16-bit integers in a left by count while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF count[63:0] > 15 --// dst[i+15:i] := 0 --// ELSE --// dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16 - FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count) - { - uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); -@@ -5650,17 +5075,7 @@ FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count) - - // Shift packed 32-bit integers in a left by count while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// IF count[63:0] > 31 --// dst[i+31:i] := 0 --// ELSE --// dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32 - FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count) - { - uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); -@@ -5673,17 +5088,7 @@ FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count) - - // Shift packed 64-bit integers in a left by count while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// IF count[63:0] > 63 --// dst[i+63:i] := 0 --// ELSE --// dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64 - FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count) - { - uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); -@@ -5696,17 +5101,7 @@ FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count) - - // Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF imm8[7:0] > 15 --// dst[i+15:i] := 0 --// ELSE --// dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16 - FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm) - { - if (_sse2neon_unlikely(imm & ~15)) -@@ -5717,17 +5112,7 @@ FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm) - - // Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// IF imm8[7:0] > 31 --// dst[i+31:i] := 0 --// ELSE --// dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32 - FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm) - { - if (_sse2neon_unlikely(imm & ~31)) -@@ -5738,17 +5123,7 @@ FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm) - - // Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// IF imm8[7:0] > 63 --// dst[i+63:i] := 0 --// ELSE --// dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64 - FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm) - { - if (_sse2neon_unlikely(imm & ~63)) -@@ -5759,14 +5134,7 @@ FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm) - - // Shift a left by imm8 bytes while shifting in zeros, and store the results in - // dst. --// --// tmp := imm8[7:0] --// IF tmp > 15 --// tmp := 16 --// FI --// dst[127:0] := a[127:0] << (tmp*8) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128 - #define _mm_slli_si128(a, imm) \ - __extension__({ \ - int8x16_t ret; \ -@@ -5782,7 +5150,7 @@ FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm) - - // Compute the square root of packed double-precision (64-bit) floating-point - // elements in a, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd - FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a) - { - #if defined(__aarch64__) -@@ -5797,7 +5165,7 @@ FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a) - // Compute the square root of the lower double-precision (64-bit) floating-point - // element in b, store the result in the lower element of dst, and copy the - // upper element from a to the upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd - FORCE_INLINE __m128d _mm_sqrt_sd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -5809,17 +5177,7 @@ FORCE_INLINE __m128d _mm_sqrt_sd(__m128d a, __m128d b) - - // Shift packed 16-bit integers in a right by count while shifting in sign bits, - // and store the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF count[63:0] > 15 --// dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) --// ELSE --// dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16 - FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count) - { - int64_t c = (int64_t) vget_low_s64((int64x2_t) count); -@@ -5830,17 +5188,7 @@ FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count) - - // Shift packed 32-bit integers in a right by count while shifting in sign bits, - // and store the results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// IF count[63:0] > 31 --// dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) --// ELSE --// dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32 - FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count) - { - int64_t c = (int64_t) vget_low_s64((int64x2_t) count); -@@ -5851,17 +5199,7 @@ FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count) - - // Shift packed 16-bit integers in a right by imm8 while shifting in sign - // bits, and store the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF imm8[7:0] > 15 --// dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) --// ELSE --// dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16 - FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm) - { - const int count = (imm & ~15) ? 15 : imm; -@@ -5870,17 +5208,7 @@ FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm) - - // Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, - // and store the results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// IF imm8[7:0] > 31 --// dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) --// ELSE --// dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32 - // FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm) - #define _mm_srai_epi32(a, imm) \ - __extension__({ \ -@@ -5899,17 +5227,7 @@ FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm) - - // Shift packed 16-bit integers in a right by count while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF count[63:0] > 15 --// dst[i+15:i] := 0 --// ELSE --// dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16 - FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count) - { - uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); -@@ -5922,17 +5240,7 @@ FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count) - - // Shift packed 32-bit integers in a right by count while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// IF count[63:0] > 31 --// dst[i+31:i] := 0 --// ELSE --// dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32 - FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count) - { - uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); -@@ -5945,17 +5253,7 @@ FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count) - - // Shift packed 64-bit integers in a right by count while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// IF count[63:0] > 63 --// dst[i+63:i] := 0 --// ELSE --// dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64 - FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) - { - uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); -@@ -5968,17 +5266,7 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) - - // Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF imm8[7:0] > 15 --// dst[i+15:i] := 0 --// ELSE --// dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16 - #define _mm_srli_epi16(a, imm) \ - __extension__({ \ - __m128i ret; \ -@@ -5993,17 +5281,7 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) - - // Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// IF imm8[7:0] > 31 --// dst[i+31:i] := 0 --// ELSE --// dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32 - // FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm) - #define _mm_srli_epi32(a, imm) \ - __extension__({ \ -@@ -6019,17 +5297,7 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) - - // Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and - // store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// IF imm8[7:0] > 63 --// dst[i+63:i] := 0 --// ELSE --// dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64 - #define _mm_srli_epi64(a, imm) \ - __extension__({ \ - __m128i ret; \ -@@ -6044,14 +5312,7 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) - - // Shift a right by imm8 bytes while shifting in zeros, and store the results in - // dst. --// --// tmp := imm8[7:0] --// IF tmp > 15 --// tmp := 16 --// FI --// dst[127:0] := a[127:0] >> (tmp*8) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128 - #define _mm_srli_si128(a, imm) \ - __extension__({ \ - int8x16_t ret; \ -@@ -6066,7 +5327,7 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) - // Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point - // elements) from a into memory. mem_addr must be aligned on a 16-byte boundary - // or a general-protection exception may be generated. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd - FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a) - { - #if defined(__aarch64__) -@@ -6079,7 +5340,7 @@ FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a) - // Store the lower double-precision (64-bit) floating-point element from a into - // 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte - // boundary or a general-protection exception may be generated. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1 - FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a) - { - #if defined(__aarch64__) -@@ -6095,7 +5356,7 @@ FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a) - - // Store the lower double-precision (64-bit) floating-point element from a into - // memory. mem_addr does not need to be aligned on any particular boundary. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_store_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_store_sd - FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a) - { - #if defined(__aarch64__) -@@ -6105,8 +5366,9 @@ FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a) - #endif - } - --// Stores four 32-bit integer values as (as a __m128i value) at the address p. --// https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx -+// Store 128-bits of integer data from a into memory. mem_addr must be aligned -+// on a 16-byte boundary or a general-protection exception may be generated. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128 - FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a) - { - vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a)); -@@ -6115,15 +5377,12 @@ FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a) - // Store the lower double-precision (64-bit) floating-point element from a into - // 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte - // boundary or a general-protection exception may be generated. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=9,526,5601&text=_mm_store1_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#expand=9,526,5601&text=_mm_store1_pd - #define _mm_store1_pd _mm_store_pd1 - - // Store the upper double-precision (64-bit) floating-point element from a into - // memory. --// --// MEM[mem_addr+63:mem_addr] := a[127:64] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd - FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a) - { - #if defined(__aarch64__) -@@ -6133,8 +5392,8 @@ FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a) - #endif - } - --// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. --// https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx -+// Store 64-bit integer from the first element of a into memory. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64 - FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b) - { - vst1_u64((uint64_t *) a, vget_low_u64(vreinterpretq_u64_m128i(b))); -@@ -6142,10 +5401,7 @@ FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b) - - // Store the lower double-precision (64-bit) floating-point element from a into - // memory. --// --// MEM[mem_addr+63:mem_addr] := a[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd - FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a) - { - #if defined(__aarch64__) -@@ -6158,11 +5414,7 @@ FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a) - // Store 2 double-precision (64-bit) floating-point elements from a into memory - // in reverse order. mem_addr must be aligned on a 16-byte boundary or a - // general-protection exception may be generated. --// --// MEM[mem_addr+63:mem_addr] := a[127:64] --// MEM[mem_addr+127:mem_addr+64] := a[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd - FORCE_INLINE void _mm_storer_pd(double *mem_addr, __m128d a) - { - float32x4_t f = vreinterpretq_f32_m128d(a); -@@ -6172,21 +5424,23 @@ FORCE_INLINE void _mm_storer_pd(double *mem_addr, __m128d a) - // Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point - // elements) from a into memory. mem_addr does not need to be aligned on any - // particular boundary. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd - FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a) - { - _mm_store_pd(mem_addr, a); - } - --// Stores 128-bits of integer data a at the address p. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128 -+// Store 128-bits of integer data from a into memory. mem_addr does not need to -+// be aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128 - FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a) - { - vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a)); - } - --// Stores 32-bits of integer data a at the address p. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si32 -+// Store 32-bit integer from the first element of a into memory. mem_addr does -+// not need to be aligned on any particular boundary. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32 - FORCE_INLINE void _mm_storeu_si32(void *p, __m128i a) - { - vst1q_lane_s32((int32_t *) p, vreinterpretq_s32_m128i(a), 0); -@@ -6196,7 +5450,7 @@ FORCE_INLINE void _mm_storeu_si32(void *p, __m128i a) - // elements) from a into memory using a non-temporal memory hint. mem_addr must - // be aligned on a 16-byte boundary or a general-protection exception may be - // generated. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd - FORCE_INLINE void _mm_stream_pd(double *p, __m128d a) - { - #if __has_builtin(__builtin_nontemporal_store) -@@ -6208,10 +5462,10 @@ FORCE_INLINE void _mm_stream_pd(double *p, __m128d a) - #endif - } - --// Stores the data in a to the address p without polluting the caches. If the --// cache line containing address p is already in the cache, the cache will be --// updated. --// https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx -+// Store 128-bits of integer data from a into memory using a non-temporal memory -+// hint. mem_addr must be aligned on a 16-byte boundary or a general-protection -+// exception may be generated. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128 - FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) - { - #if __has_builtin(__builtin_nontemporal_store) -@@ -6224,7 +5478,7 @@ FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) - // Store 32-bit integer a into memory using a non-temporal hint to minimize - // cache pollution. If the cache line containing address mem_addr is already in - // the cache, the cache will be updated. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32 - FORCE_INLINE void _mm_stream_si32(int *p, int a) - { - vst1q_lane_s32((int32_t *) p, vdupq_n_s32(a), 0); -@@ -6233,7 +5487,7 @@ FORCE_INLINE void _mm_stream_si32(int *p, int a) - // Store 64-bit integer a into memory using a non-temporal hint to minimize - // cache pollution. If the cache line containing address mem_addr is already in - // the cache, the cache will be updated. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64 - FORCE_INLINE void _mm_stream_si64(__int64 *p, __int64 a) - { - vst1_s64((int64_t *) p, vdup_n_s64((int64_t) a)); -@@ -6241,32 +5495,25 @@ FORCE_INLINE void _mm_stream_si64(__int64 *p, __int64 a) - - // Subtract packed 16-bit integers in b from packed 16-bit integers in a, and - // store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16 - FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( - vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - --// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or --// unsigned 32-bit integers of a. --// --// r0 := a0 - b0 --// r1 := a1 - b1 --// r2 := a2 - b2 --// r3 := a3 - b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx -+// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and -+// store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32 - FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( - vsubq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Subtract 2 packed 64-bit integers in b from 2 packed 64-bit integers in a, --// and store the results in dst. --// r0 := a0 - b0 --// r1 := a1 - b1 -+// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and -+// store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64 - FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s64( -@@ -6275,7 +5522,7 @@ FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b) - - // Subtract packed 8-bit integers in b from packed 8-bit integers in a, and - // store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8 - FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( -@@ -6285,13 +5532,7 @@ FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b) - // Subtract packed double-precision (64-bit) floating-point elements in b from - // packed double-precision (64-bit) floating-point elements in a, and store the - // results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// dst[i+63:i] := a[i+63:i] - b[i+63:i] --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_pd - FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -6311,71 +5552,50 @@ FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b) - // the lower double-precision (64-bit) floating-point element in a, store the - // result in the lower element of dst, and copy the upper element from a to the - // upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd - FORCE_INLINE __m128d _mm_sub_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_sub_pd(a, b)); - } - - // Subtract 64-bit integer b from 64-bit integer a, and store the result in dst. --// --// dst[63:0] := a[63:0] - b[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_si64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_si64 - FORCE_INLINE __m64 _mm_sub_si64(__m64 a, __m64 b) - { - return vreinterpret_m64_s64( - vsub_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b))); - } - --// Subtracts the 8 signed 16-bit integers of b from the 8 signed 16-bit integers --// of a and saturates. --// --// r0 := SignedSaturate(a0 - b0) --// r1 := SignedSaturate(a1 - b1) --// ... --// r7 := SignedSaturate(a7 - b7) --// --// https://technet.microsoft.com/en-us/subscriptions/3247z5b8(v=vs.90) -+// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a -+// using saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16 - FORCE_INLINE __m128i _mm_subs_epi16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s16( - vqsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); - } - --// Subtracts the 16 signed 8-bit integers of b from the 16 signed 8-bit integers --// of a and saturates. --// --// r0 := SignedSaturate(a0 - b0) --// r1 := SignedSaturate(a1 - b1) --// ... --// r15 := SignedSaturate(a15 - b15) --// --// https://technet.microsoft.com/en-us/subscriptions/by7kzks1(v=vs.90) -+// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a -+// using saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8 - FORCE_INLINE __m128i _mm_subs_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( - vqsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); - } - --// Subtracts the 8 unsigned 16-bit integers of bfrom the 8 unsigned 16-bit --// integers of a and saturates.. --// https://technet.microsoft.com/en-us/subscriptions/index/f44y0s19(v=vs.90).aspx -+// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit -+// integers in a using saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16 - FORCE_INLINE __m128i _mm_subs_epu16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( - vqsubq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); - } - --// Subtracts the 16 unsigned 8-bit integers of b from the 16 unsigned 8-bit --// integers of a and saturates. --// --// r0 := UnsignedSaturate(a0 - b0) --// r1 := UnsignedSaturate(a1 - b1) --// ... --// r15 := UnsignedSaturate(a15 - b15) --// --// https://technet.microsoft.com/en-us/subscriptions/yadkxc18(v=vs.90) -+// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit -+// integers in a using saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8 - FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u8( -@@ -6390,7 +5610,7 @@ FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b) - #define _mm_ucomineq_sd _mm_comineq_sd - - // Return vector of type __m128d with undefined elements. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd - FORCE_INLINE __m128d _mm_undefined_pd(void) - { - #if defined(__GNUC__) || defined(__clang__) -@@ -6404,19 +5624,9 @@ FORCE_INLINE __m128d _mm_undefined_pd(void) - #endif - } - --// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the --// upper 4 signed or unsigned 16-bit integers in b. --// --// r0 := a4 --// r1 := b4 --// r2 := a5 --// r3 := b5 --// r4 := a6 --// r5 := b6 --// r6 := a7 --// r7 := b7 --// --// https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx -+// Unpack and interleave 16-bit integers from the high half of a and b, and -+// store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16 - FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) - { - #if defined(__aarch64__) -@@ -6430,9 +5640,9 @@ FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) - #endif - } - --// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the --// upper 2 signed or unsigned 32-bit integers in b. --// https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx -+// Unpack and interleave 32-bit integers from the high half of a and b, and -+// store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32 - FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) - { - #if defined(__aarch64__) -@@ -6446,30 +5656,24 @@ FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) - #endif - } - --// Interleaves the upper signed or unsigned 64-bit integer in a with the --// upper signed or unsigned 64-bit integer in b. --// --// r0 := a1 --// r1 := b1 -+// Unpack and interleave 64-bit integers from the high half of a and b, and -+// store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64 - FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b) - { -+#if defined(__aarch64__) -+ return vreinterpretq_m128i_s64( -+ vzip2q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); -+#else - int64x1_t a_h = vget_high_s64(vreinterpretq_s64_m128i(a)); - int64x1_t b_h = vget_high_s64(vreinterpretq_s64_m128i(b)); - return vreinterpretq_m128i_s64(vcombine_s64(a_h, b_h)); -+#endif - } - --// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper --// 8 signed or unsigned 8-bit integers in b. --// --// r0 := a8 --// r1 := b8 --// r2 := a9 --// r3 := b9 --// ... --// r14 := a15 --// r15 := b15 --// --// https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx -+// Unpack and interleave 8-bit integers from the high half of a and b, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8 - FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) - { - #if defined(__aarch64__) -@@ -6487,15 +5691,7 @@ FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) - - // Unpack and interleave double-precision (64-bit) floating-point elements from - // the high half of a and b, and store the results in dst. --// --// DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { --// dst[63:0] := src1[127:64] --// dst[127:64] := src2[127:64] --// RETURN dst[127:0] --// } --// dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd - FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -6508,19 +5704,9 @@ FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b) - #endif - } - --// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the --// lower 4 signed or unsigned 16-bit integers in b. --// --// r0 := a0 --// r1 := b0 --// r2 := a1 --// r3 := b1 --// r4 := a2 --// r5 := b2 --// r6 := a3 --// r7 := b3 --// --// https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx -+// Unpack and interleave 16-bit integers from the low half of a and b, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16 - FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) - { - #if defined(__aarch64__) -@@ -6534,15 +5720,9 @@ FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) - #endif - } - --// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the --// lower 2 signed or unsigned 32 - bit integers in b. --// --// r0 := a0 --// r1 := b0 --// r2 := a1 --// r3 := b1 --// --// https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx -+// Unpack and interleave 32-bit integers from the low half of a and b, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32 - FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) - { - #if defined(__aarch64__) -@@ -6556,25 +5736,24 @@ FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) - #endif - } - -+// Unpack and interleave 64-bit integers from the low half of a and b, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64 - FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b) - { -+#if defined(__aarch64__) -+ return vreinterpretq_m128i_s64( -+ vzip1q_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); -+#else - int64x1_t a_l = vget_low_s64(vreinterpretq_s64_m128i(a)); - int64x1_t b_l = vget_low_s64(vreinterpretq_s64_m128i(b)); - return vreinterpretq_m128i_s64(vcombine_s64(a_l, b_l)); -+#endif - } - --// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower --// 8 signed or unsigned 8-bit integers in b. --// --// r0 := a0 --// r1 := b0 --// r2 := a1 --// r3 := b1 --// ... --// r14 := a7 --// r15 := b7 --// --// https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx -+// Unpack and interleave 8-bit integers from the low half of a and b, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8 - FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) - { - #if defined(__aarch64__) -@@ -6590,15 +5769,7 @@ FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) - - // Unpack and interleave double-precision (64-bit) floating-point elements from - // the low half of a and b, and store the results in dst. --// --// DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { --// dst[63:0] := src1[63:0] --// dst[127:64] := src2[63:0] --// RETURN dst[127:0] --// } --// dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd - FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -6613,21 +5784,16 @@ FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b) - - // Compute the bitwise XOR of packed double-precision (64-bit) floating-point - // elements in a and b, and store the results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// dst[i+63:i] := a[i+63:i] XOR b[i+63:i] --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd - FORCE_INLINE __m128d _mm_xor_pd(__m128d a, __m128d b) - { - return vreinterpretq_m128d_s64( - veorq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); - } - --// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in --// b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx -+// Compute the bitwise XOR of 128 bits (representing integer data) in a and b, -+// and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128 - FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( -@@ -6639,17 +5805,7 @@ FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) - // Alternatively add and subtract packed double-precision (64-bit) - // floating-point elements in a to/from packed elements in b, and store the - // results in dst. --// --// FOR j := 0 to 1 --// i := j*64 --// IF ((j & 1) == 0) --// dst[i+63:i] := a[i+63:i] - b[i+63:i] --// ELSE --// dst[i+63:i] := a[i+63:i] + b[i+63:i] --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_addsub_pd - FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b) - { - _sse2neon_const __m128d mask = _mm_set_pd(1.0f, -1.0f); -@@ -6665,7 +5821,7 @@ FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b) - // Alternatively add and subtract packed single-precision (32-bit) - // floating-point elements in a to/from packed elements in b, and store the - // results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=addsub_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=addsub_ps - FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b) - { - _sse2neon_const __m128 mask = _mm_setr_ps(-1.0f, 1.0f, -1.0f, 1.0f); -@@ -6680,7 +5836,7 @@ FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b) - - // Horizontally add adjacent pairs of double-precision (64-bit) floating-point - // elements in a and b, and pack the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pd - FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b) - { - #if defined(__aarch64__) -@@ -6694,9 +5850,9 @@ FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b) - #endif - } - --// Computes pairwise add of each argument as single-precision, floating-point --// values a and b. --// https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx -+// Horizontally add adjacent pairs of single-precision (32-bit) floating-point -+// elements in a and b, and pack the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_ps - FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b) - { - #if defined(__aarch64__) -@@ -6714,7 +5870,7 @@ FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b) - - // Horizontally subtract adjacent pairs of double-precision (64-bit) - // floating-point elements in a and b, and pack the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pd - FORCE_INLINE __m128d _mm_hsub_pd(__m128d _a, __m128d _b) - { - #if defined(__aarch64__) -@@ -6732,7 +5888,7 @@ FORCE_INLINE __m128d _mm_hsub_pd(__m128d _a, __m128d _b) - - // Horizontally subtract adjacent pairs of single-precision (32-bit) - // floating-point elements in a and b, and pack the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_ps - FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b) - { - float32x4_t a = vreinterpretq_f32_m128(_a); -@@ -6749,24 +5905,17 @@ FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b) - // Load 128-bits of integer data from unaligned memory into dst. This intrinsic - // may perform better than _mm_loadu_si128 when the data crosses a cache line - // boundary. --// --// dst[127:0] := MEM[mem_addr+127:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128 - #define _mm_lddqu_si128 _mm_loadu_si128 - - // Load a double-precision (64-bit) floating-point element from memory into both - // elements of dst. --// --// dst[63:0] := MEM[mem_addr+63:mem_addr] --// dst[127:64] := MEM[mem_addr+63:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loaddup_pd - #define _mm_loaddup_pd _mm_load1_pd - - // Duplicate the low double-precision (64-bit) floating-point element from a, - // and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movedup_pd - FORCE_INLINE __m128d _mm_movedup_pd(__m128d a) - { - #if defined(__aarch64__) -@@ -6780,7 +5929,7 @@ FORCE_INLINE __m128d _mm_movedup_pd(__m128d a) - - // Duplicate odd-indexed single-precision (32-bit) floating-point elements - // from a, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movehdup_ps - FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a) - { - #if defined(__aarch64__) -@@ -6799,7 +5948,7 @@ FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a) - - // Duplicate even-indexed single-precision (32-bit) floating-point elements - // from a, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_moveldup_ps - FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a) - { - #if defined(__aarch64__) -@@ -6820,13 +5969,7 @@ FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a) - - // Compute the absolute value of packed signed 16-bit integers in a, and store - // the unsigned results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// dst[i+15:i] := ABS(a[i+15:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16 - FORCE_INLINE __m128i _mm_abs_epi16(__m128i a) - { - return vreinterpretq_m128i_s16(vabsq_s16(vreinterpretq_s16_m128i(a))); -@@ -6834,13 +5977,7 @@ FORCE_INLINE __m128i _mm_abs_epi16(__m128i a) - - // Compute the absolute value of packed signed 32-bit integers in a, and store - // the unsigned results in dst. --// --// FOR j := 0 to 3 --// i := j*32 --// dst[i+31:i] := ABS(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32 - FORCE_INLINE __m128i _mm_abs_epi32(__m128i a) - { - return vreinterpretq_m128i_s32(vabsq_s32(vreinterpretq_s32_m128i(a))); -@@ -6848,13 +5985,7 @@ FORCE_INLINE __m128i _mm_abs_epi32(__m128i a) - - // Compute the absolute value of packed signed 8-bit integers in a, and store - // the unsigned results in dst. --// --// FOR j := 0 to 15 --// i := j*8 --// dst[i+7:i] := ABS(a[i+7:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8 - FORCE_INLINE __m128i _mm_abs_epi8(__m128i a) - { - return vreinterpretq_m128i_s8(vabsq_s8(vreinterpretq_s8_m128i(a))); -@@ -6862,13 +5993,7 @@ FORCE_INLINE __m128i _mm_abs_epi8(__m128i a) - - // Compute the absolute value of packed signed 16-bit integers in a, and store - // the unsigned results in dst. --// --// FOR j := 0 to 3 --// i := j*16 --// dst[i+15:i] := ABS(a[i+15:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi16 - FORCE_INLINE __m64 _mm_abs_pi16(__m64 a) - { - return vreinterpret_m64_s16(vabs_s16(vreinterpret_s16_m64(a))); -@@ -6876,13 +6001,7 @@ FORCE_INLINE __m64 _mm_abs_pi16(__m64 a) - - // Compute the absolute value of packed signed 32-bit integers in a, and store - // the unsigned results in dst. --// --// FOR j := 0 to 1 --// i := j*32 --// dst[i+31:i] := ABS(a[i+31:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi32 - FORCE_INLINE __m64 _mm_abs_pi32(__m64 a) - { - return vreinterpret_m64_s32(vabs_s32(vreinterpret_s32_m64(a))); -@@ -6890,13 +6009,7 @@ FORCE_INLINE __m64 _mm_abs_pi32(__m64 a) - - // Compute the absolute value of packed signed 8-bit integers in a, and store - // the unsigned results in dst. --// --// FOR j := 0 to 7 --// i := j*8 --// dst[i+7:i] := ABS(a[i+7:i]) --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_pi8 - FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) - { - return vreinterpret_m64_s8(vabs_s8(vreinterpret_s8_m64(a))); -@@ -6904,11 +6017,7 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) - - // Concatenate 16-byte blocks in a and b into a 32-byte temporary result, shift - // the result right by imm8 bytes, and store the low 16 bytes in dst. --// --// tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) --// dst[127:0] := tmp[127:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8 - #define _mm_alignr_epi8(a, b, imm) \ - __extension__({ \ - uint8x16_t _a = vreinterpretq_u8_m128i(a); \ -@@ -6926,11 +6035,7 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) - - // Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift - // the result right by imm8 bytes, and store the low 8 bytes in dst. --// --// tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8) --// dst[63:0] := tmp[63:0] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_pi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_pi8 - #define _mm_alignr_pi8(a, b, imm) \ - __extension__({ \ - __m64 ret; \ -@@ -6953,8 +6058,9 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) - ret; \ - }) - --// Computes pairwise add of each argument as a 16-bit signed or unsigned integer --// values a and b. -+// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the -+// signed 16-bit results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16 - FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b) - { - int16x8_t a = vreinterpretq_s16_m128i(_a); -@@ -6968,8 +6074,9 @@ FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b) - #endif - } - --// Computes pairwise add of each argument as a 32-bit signed or unsigned integer --// values a and b. -+// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the -+// signed 32-bit results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32 - FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b) - { - int32x4_t a = vreinterpretq_s32_m128i(_a); -@@ -6985,7 +6092,7 @@ FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b) - - // Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the - // signed 16-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pi16 - FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b) - { - return vreinterpret_m64_s16( -@@ -6994,15 +6101,16 @@ FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b) - - // Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the - // signed 32-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_pi32 - FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b) - { - return vreinterpret_m64_s32( - vpadd_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b))); - } - --// Computes saturated pairwise sub of each argument as a 16-bit signed --// integer values a and b. -+// Horizontally add adjacent pairs of signed 16-bit integers in a and b using -+// saturation, and pack the signed 16-bit results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16 - FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b) - { - #if defined(__aarch64__) -@@ -7025,7 +6133,7 @@ FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b) - - // Horizontally add adjacent pairs of signed 16-bit integers in a and b using - // saturation, and pack the signed 16-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_pi16 - FORCE_INLINE __m64 _mm_hadds_pi16(__m64 _a, __m64 _b) - { - int16x4_t a = vreinterpret_s16_m64(_a); -@@ -7040,7 +6148,7 @@ FORCE_INLINE __m64 _mm_hadds_pi16(__m64 _a, __m64 _b) - - // Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack - // the signed 16-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16 - FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b) - { - int16x8_t a = vreinterpretq_s16_m128i(_a); -@@ -7056,7 +6164,7 @@ FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b) - - // Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack - // the signed 32-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32 - FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b) - { - int32x4_t a = vreinterpretq_s32_m128i(_a); -@@ -7072,7 +6180,7 @@ FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b) - - // Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack - // the signed 16-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_pi16 - FORCE_INLINE __m64 _mm_hsub_pi16(__m64 _a, __m64 _b) - { - int16x4_t a = vreinterpret_s16_m64(_a); -@@ -7087,7 +6195,7 @@ FORCE_INLINE __m64 _mm_hsub_pi16(__m64 _a, __m64 _b) - - // Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack - // the signed 32-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_hsub_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_hsub_pi32 - FORCE_INLINE __m64 _mm_hsub_pi32(__m64 _a, __m64 _b) - { - int32x2_t a = vreinterpret_s32_m64(_a); -@@ -7100,9 +6208,9 @@ FORCE_INLINE __m64 _mm_hsub_pi32(__m64 _a, __m64 _b) - #endif - } - --// Computes saturated pairwise difference of each argument as a 16-bit signed --// integer values a and b. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16 -+// Horizontally subtract adjacent pairs of signed 16-bit integers in a and b -+// using saturation, and pack the signed 16-bit results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16 - FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b) - { - int16x8_t a = vreinterpretq_s16_m128i(_a); -@@ -7118,7 +6226,7 @@ FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b) - - // Horizontally subtract adjacent pairs of signed 16-bit integers in a and b - // using saturation, and pack the signed 16-bit results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_pi16 - FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b) - { - int16x4_t a = vreinterpret_s16_m64(_a); -@@ -7135,12 +6243,7 @@ FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b) - // signed 8-bit integer from b, producing intermediate signed 16-bit integers. - // Horizontally add adjacent pairs of intermediate signed 16-bit integers, - // and pack the saturated results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + --// a[i+7:i]*b[i+7:i] ) --// ENDFOR -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16 - FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b) - { - #if defined(__aarch64__) -@@ -7179,7 +6282,7 @@ FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b) - // signed 8-bit integer from b, producing intermediate signed 16-bit integers. - // Horizontally add adjacent pairs of intermediate signed 16-bit integers, and - // pack the saturated results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_pi16 - FORCE_INLINE __m64 _mm_maddubs_pi16(__m64 _a, __m64 _b) - { - uint16x4_t a = vreinterpret_u16_m64(_a); -@@ -7204,12 +6307,7 @@ FORCE_INLINE __m64 _mm_maddubs_pi16(__m64 _a, __m64 _b) - // Multiply packed signed 16-bit integers in a and b, producing intermediate - // signed 32-bit integers. Shift right by 15 bits while rounding up, and store - // the packed 16-bit integers in dst. --// --// r0 := Round(((int32_t)a0 * (int32_t)b0) >> 15) --// r1 := Round(((int32_t)a1 * (int32_t)b1) >> 15) --// r2 := Round(((int32_t)a2 * (int32_t)b2) >> 15) --// ... --// r7 := Round(((int32_t)a7 * (int32_t)b7) >> 15) -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16 - FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b) - { - // Has issues due to saturation -@@ -7233,7 +6331,7 @@ FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b) - // Multiply packed signed 16-bit integers in a and b, producing intermediate - // signed 32-bit integers. Truncate each intermediate integer to the 18 most - // significant bits, round by adding 1, and store bits [16:1] to dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_pi16 - FORCE_INLINE __m64 _mm_mulhrs_pi16(__m64 a, __m64 b) - { - int32x4_t mul_extend = -@@ -7245,7 +6343,7 @@ FORCE_INLINE __m64 _mm_mulhrs_pi16(__m64 a, __m64 b) - - // Shuffle packed 8-bit integers in a according to shuffle control mask in the - // corresponding 8-bit element of b, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8 - FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b) - { - int8x16_t tbl = vreinterpretq_s8_m128i(a); // input a -@@ -7275,18 +6373,7 @@ FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b) - - // Shuffle packed 8-bit integers in a according to shuffle control mask in the - // corresponding 8-bit element of b, and store the results in dst. --// --// FOR j := 0 to 7 --// i := j*8 --// IF b[i+7] == 1 --// dst[i+7:i] := 0 --// ELSE --// index[2:0] := b[i+2:i] --// dst[i+7:i] := a[index*8+7:index*8] --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pi8 - FORCE_INLINE __m64 _mm_shuffle_pi8(__m64 a, __m64 b) - { - const int8x8_t controlMask = -@@ -7299,16 +6386,7 @@ FORCE_INLINE __m64 _mm_shuffle_pi8(__m64 a, __m64 b) - // 16-bit integer in b is negative, and store the results in dst. - // Element in dst are zeroed out when the corresponding element - // in b is zero. --// --// for i in 0..7 --// if b[i] < 0 --// r[i] := -a[i] --// else if b[i] == 0 --// r[i] := 0 --// else --// r[i] := a[i] --// fi --// done -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16 - FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b) - { - int16x8_t a = vreinterpretq_s16_m128i(_a); -@@ -7336,16 +6414,7 @@ FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b) - // 32-bit integer in b is negative, and store the results in dst. - // Element in dst are zeroed out when the corresponding element - // in b is zero. --// --// for i in 0..3 --// if b[i] < 0 --// r[i] := -a[i] --// else if b[i] == 0 --// r[i] := 0 --// else --// r[i] := a[i] --// fi --// done -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32 - FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b) - { - int32x4_t a = vreinterpretq_s32_m128i(_a); -@@ -7374,16 +6443,7 @@ FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b) - // 8-bit integer in b is negative, and store the results in dst. - // Element in dst are zeroed out when the corresponding element - // in b is zero. --// --// for i in 0..15 --// if b[i] < 0 --// r[i] := -a[i] --// else if b[i] == 0 --// r[i] := 0 --// else --// r[i] := a[i] --// fi --// done -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8 - FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b) - { - int8x16_t a = vreinterpretq_s8_m128i(_a); -@@ -7412,19 +6472,7 @@ FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b) - // Negate packed 16-bit integers in a when the corresponding signed 16-bit - // integer in b is negative, and store the results in dst. Element in dst are - // zeroed out when the corresponding element in b is zero. --// --// FOR j := 0 to 3 --// i := j*16 --// IF b[i+15:i] < 0 --// dst[i+15:i] := -(a[i+15:i]) --// ELSE IF b[i+15:i] == 0 --// dst[i+15:i] := 0 --// ELSE --// dst[i+15:i] := a[i+15:i] --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi16 - FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b) - { - int16x4_t a = vreinterpret_s16_m64(_a); -@@ -7453,19 +6501,7 @@ FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b) - // Negate packed 32-bit integers in a when the corresponding signed 32-bit - // integer in b is negative, and store the results in dst. Element in dst are - // zeroed out when the corresponding element in b is zero. --// --// FOR j := 0 to 1 --// i := j*32 --// IF b[i+31:i] < 0 --// dst[i+31:i] := -(a[i+31:i]) --// ELSE IF b[i+31:i] == 0 --// dst[i+31:i] := 0 --// ELSE --// dst[i+31:i] := a[i+31:i] --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi32 - FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b) - { - int32x2_t a = vreinterpret_s32_m64(_a); -@@ -7494,19 +6530,7 @@ FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b) - // Negate packed 8-bit integers in a when the corresponding signed 8-bit integer - // in b is negative, and store the results in dst. Element in dst are zeroed out - // when the corresponding element in b is zero. --// --// FOR j := 0 to 7 --// i := j*8 --// IF b[i+7:i] < 0 --// dst[i+7:i] := -(a[i+7:i]) --// ELSE IF b[i+7:i] == 0 --// dst[i+7:i] := 0 --// ELSE --// dst[i+7:i] := a[i+7:i] --// FI --// ENDFOR --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_pi8 - FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) - { - int8x8_t a = vreinterpret_s8_m64(_a); -@@ -7536,15 +6560,7 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) - - // Blend packed 16-bit integers from a and b using control mask imm8, and store - // the results in dst. --// --// FOR j := 0 to 7 --// i := j*16 --// IF imm8[j] --// dst[i+15:i] := b[i+15:i] --// ELSE --// dst[i+15:i] := a[i+15:i] --// FI --// ENDFOR -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16 - // FORCE_INLINE __m128i _mm_blend_epi16(__m128i a, __m128i b, - // __constrange(0,255) int imm) - #define _mm_blend_epi16(a, b, imm) \ -@@ -7565,7 +6581,7 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) - - // Blend packed double-precision (64-bit) floating-point elements from a and b - // using control mask imm8, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd - #define _mm_blend_pd(a, b, imm) \ - __extension__({ \ - const uint64_t _mask[2] = { \ -@@ -7579,7 +6595,7 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) - - // Blend packed single-precision (32-bit) floating-point elements from a and b - // using mask, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps - FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8) - { - const uint32_t ALIGN_STRUCT(16) -@@ -7595,15 +6611,7 @@ FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8) - - // Blend packed 8-bit integers from a and b using mask, and store the results in - // dst. --// --// FOR j := 0 to 15 --// i := j*8 --// IF mask[i+7] --// dst[i+7:i] := b[i+7:i] --// ELSE --// dst[i+7:i] := a[i+7:i] --// FI --// ENDFOR -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8 - FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask) - { - // Use a signed shift right to create a mask with the sign bit -@@ -7616,7 +6624,7 @@ FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask) - - // Blend packed double-precision (64-bit) floating-point elements from a and b - // using mask, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd - FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask) - { - uint64x2_t mask = -@@ -7634,7 +6642,7 @@ FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask) - - // Blend packed single-precision (32-bit) floating-point elements from a and b - // using mask, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps - FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask) - { - // Use a signed shift right to create a mask with the sign bit -@@ -7648,7 +6656,7 @@ FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask) - // Round the packed double-precision (64-bit) floating-point elements in a up - // to an integer value, and store the results as packed double-precision - // floating-point elements in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd - FORCE_INLINE __m128d _mm_ceil_pd(__m128d a) - { - #if defined(__aarch64__) -@@ -7662,7 +6670,7 @@ FORCE_INLINE __m128d _mm_ceil_pd(__m128d a) - // Round the packed single-precision (32-bit) floating-point elements in a up to - // an integer value, and store the results as packed single-precision - // floating-point elements in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps - FORCE_INLINE __m128 _mm_ceil_ps(__m128 a) - { - #if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) -@@ -7677,7 +6685,7 @@ FORCE_INLINE __m128 _mm_ceil_ps(__m128 a) - // an integer value, store the result as a double-precision floating-point - // element in the lower element of dst, and copy the upper element from a to the - // upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd - FORCE_INLINE __m128d _mm_ceil_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_ceil_pd(b)); -@@ -7687,11 +6695,7 @@ FORCE_INLINE __m128d _mm_ceil_sd(__m128d a, __m128d b) - // an integer value, store the result as a single-precision floating-point - // element in the lower element of dst, and copy the upper 3 packed elements - // from a to the upper elements of dst. --// --// dst[31:0] := CEIL(b[31:0]) --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss - FORCE_INLINE __m128 _mm_ceil_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_ceil_ps(b)); -@@ -7714,16 +6718,18 @@ FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b) - #endif - } - --// Converts the four signed 16-bit integers in the lower 64 bits to four signed --// 32-bit integers. -+// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32 - FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a) - { - return vreinterpretq_m128i_s32( - vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a)))); - } - --// Converts the two signed 16-bit integers in the lower 32 bits two signed --// 32-bit integers. -+// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64 - FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a) - { - int16x8_t s16x8 = vreinterpretq_s16_m128i(a); /* xxxx xxxx xxxx 0B0A */ -@@ -7732,16 +6738,18 @@ FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a) - return vreinterpretq_m128i_s64(s64x2); - } - --// Converts the two signed 32-bit integers in the lower 64 bits to two signed --// 64-bit integers. -+// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64 - FORCE_INLINE __m128i _mm_cvtepi32_epi64(__m128i a) - { - return vreinterpretq_m128i_s64( - vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))); - } - --// Converts the four unsigned 8-bit integers in the lower 16 bits to four --// unsigned 32-bit integers. -+// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16 - FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a) - { - int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx DCBA */ -@@ -7749,8 +6757,9 @@ FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a) - return vreinterpretq_m128i_s16(s16x8); - } - --// Converts the four unsigned 8-bit integers in the lower 32 bits to four --// unsigned 32-bit integers. -+// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store -+// the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32 - FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a) - { - int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx DCBA */ -@@ -7759,8 +6768,9 @@ FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a) - return vreinterpretq_m128i_s32(s32x4); - } - --// Converts the two signed 8-bit integers in the lower 32 bits to four --// signed 64-bit integers. -+// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit -+// integers, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64 - FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a) - { - int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx xxBA */ -@@ -7770,16 +6780,18 @@ FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a) - return vreinterpretq_m128i_s64(s64x2); - } - --// Converts the four unsigned 16-bit integers in the lower 64 bits to four --// unsigned 32-bit integers. -+// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, -+// and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32 - FORCE_INLINE __m128i _mm_cvtepu16_epi32(__m128i a) - { - return vreinterpretq_m128i_u32( - vmovl_u16(vget_low_u16(vreinterpretq_u16_m128i(a)))); - } - --// Converts the two unsigned 16-bit integers in the lower 32 bits to two --// unsigned 64-bit integers. -+// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, -+// and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64 - FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a) - { - uint16x8_t u16x8 = vreinterpretq_u16_m128i(a); /* xxxx xxxx xxxx 0B0A */ -@@ -7788,8 +6800,9 @@ FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a) - return vreinterpretq_m128i_u64(u64x2); - } - --// Converts the two unsigned 32-bit integers in the lower 64 bits to two --// unsigned 64-bit integers. -+// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, -+// and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64 - FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a) - { - return vreinterpretq_m128i_u64( -@@ -7798,7 +6811,7 @@ FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a) - - // Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, - // and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16 - FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a) - { - uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx HGFE DCBA */ -@@ -7806,9 +6819,9 @@ FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a) - return vreinterpretq_m128i_u16(u16x8); - } - --// Converts the four unsigned 8-bit integers in the lower 32 bits to four --// unsigned 32-bit integers. --// https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx -+// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, -+// and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32 - FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a) - { - uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx DCBA */ -@@ -7817,8 +6830,9 @@ FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a) - return vreinterpretq_m128i_u32(u32x4); - } - --// Converts the two unsigned 8-bit integers in the lower 16 bits to two --// unsigned 64-bit integers. -+// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed -+// 64-bit integers, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64 - FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a) - { - uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx xxBA */ -@@ -7831,7 +6845,7 @@ FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a) - // Conditionally multiply the packed double-precision (64-bit) floating-point - // elements in a and b using the high 4 bits in imm8, sum the four products, and - // conditionally store the sum in dst using the low 4 bits of imm8. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd - FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm) - { - // Generate mask value from constant immediate bit value -@@ -7877,7 +6891,7 @@ FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm) - // Conditionally multiply the packed single-precision (32-bit) floating-point - // elements in a and b using the high 4 bits in imm8, sum the four products, - // and conditionally store the sum in dst using the low 4 bits of imm. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps - FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm) - { - #if defined(__aarch64__) -@@ -7918,22 +6932,24 @@ FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm) - return vreinterpretq_m128_f32(res); - } - --// Extracts the selected signed or unsigned 32-bit integer from a and zero --// extends. -+// Extract a 32-bit integer from a, selected with imm8, and store the result in -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32 - // FORCE_INLINE int _mm_extract_epi32(__m128i a, __constrange(0,4) int imm) - #define _mm_extract_epi32(a, imm) \ - vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)) - --// Extracts the selected signed or unsigned 64-bit integer from a and zero --// extends. -+// Extract a 64-bit integer from a, selected with imm8, and store the result in -+// dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi64 - // FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, __constrange(0,2) int imm) - #define _mm_extract_epi64(a, imm) \ - vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm)) - --// Extracts the selected signed or unsigned 8-bit integer from a and zero --// extends. --// FORCE_INLINE int _mm_extract_epi8(__m128i a, __constrange(0,16) int imm) --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8 -+// Extract an 8-bit integer from a, selected with imm8, and store the result in -+// the lower element of dst. FORCE_INLINE int _mm_extract_epi8(__m128i a, -+// __constrange(0,16) int imm) -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8 - #define _mm_extract_epi8(a, imm) vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm)) - - // Extracts the selected single-precision (32-bit) floating-point from a. -@@ -7943,7 +6959,7 @@ FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm) - // Round the packed double-precision (64-bit) floating-point elements in a down - // to an integer value, and store the results as packed double-precision - // floating-point elements in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd - FORCE_INLINE __m128d _mm_floor_pd(__m128d a) - { - #if defined(__aarch64__) -@@ -7957,7 +6973,7 @@ FORCE_INLINE __m128d _mm_floor_pd(__m128d a) - // Round the packed single-precision (32-bit) floating-point elements in a down - // to an integer value, and store the results as packed single-precision - // floating-point elements in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps - FORCE_INLINE __m128 _mm_floor_ps(__m128 a) - { - #if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) -@@ -7972,7 +6988,7 @@ FORCE_INLINE __m128 _mm_floor_ps(__m128 a) - // an integer value, store the result as a double-precision floating-point - // element in the lower element of dst, and copy the upper element from a to the - // upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd - FORCE_INLINE __m128d _mm_floor_sd(__m128d a, __m128d b) - { - return _mm_move_sd(a, _mm_floor_pd(b)); -@@ -7982,18 +6998,15 @@ FORCE_INLINE __m128d _mm_floor_sd(__m128d a, __m128d b) - // an integer value, store the result as a single-precision floating-point - // element in the lower element of dst, and copy the upper 3 packed elements - // from a to the upper elements of dst. --// --// dst[31:0] := FLOOR(b[31:0]) --// dst[127:32] := a[127:32] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss - FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) - { - return _mm_move_ss(a, _mm_floor_ps(b)); - } - --// Inserts the least significant 32 bits of b into the selected 32-bit integer --// of a. -+// Copy a to dst, and insert the 32-bit integer i into dst at the location -+// specified by imm8. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32 - // FORCE_INLINE __m128i _mm_insert_epi32(__m128i a, int b, - // __constrange(0,4) int imm) - #define _mm_insert_epi32(a, b, imm) \ -@@ -8002,8 +7015,9 @@ FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) - vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm))); \ - }) - --// Inserts the least significant 64 bits of b into the selected 64-bit integer --// of a. -+// Copy a to dst, and insert the 64-bit integer i into dst at the location -+// specified by imm8. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi64 - // FORCE_INLINE __m128i _mm_insert_epi64(__m128i a, __int64 b, - // __constrange(0,2) int imm) - #define _mm_insert_epi64(a, b, imm) \ -@@ -8012,8 +7026,9 @@ FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) - vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm))); \ - }) - --// Inserts the least significant 8 bits of b into the selected 8-bit integer --// of a. -+// Copy a to dst, and insert the lower 8-bit integer from i into dst at the -+// location specified by imm8. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8 - // FORCE_INLINE __m128i _mm_insert_epi8(__m128i a, int b, - // __constrange(0,16) int imm) - #define _mm_insert_epi8(a, b, imm) \ -@@ -8025,7 +7040,7 @@ FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) - // Copy a to tmp, then insert a single-precision (32-bit) floating-point - // element from b into tmp using the control in imm8. Store tmp to dst using - // the mask in imm8 (elements are zeroed out when the corresponding bit is set). --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=insert_ps -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=insert_ps - #define _mm_insert_ps(a, b, imm8) \ - __extension__({ \ - float32x4_t tmp1 = \ -@@ -8045,17 +7060,9 @@ FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) - vbslq_f32(mask, all_zeros, vreinterpretq_f32_m128(tmp2))); \ - }) - --// epi versions of min/max --// Computes the pariwise maximums of the four signed 32-bit integer values of a --// and b. --// --// A 128-bit parameter that can be defined with the following equations: --// r0 := (a0 > b0) ? a0 : b0 --// r1 := (a1 > b1) ? a1 : b1 --// r2 := (a2 > b2) ? a2 : b2 --// r3 := (a3 > b3) ? a3 : b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx -+// Compare packed signed 32-bit integers in a and b, and store packed maximum -+// values in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32 - FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( -@@ -8064,7 +7071,7 @@ FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b) - - // Compare packed signed 8-bit integers in a and b, and store packed maximum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8 - FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( -@@ -8073,7 +7080,7 @@ FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b) - - // Compare packed unsigned 16-bit integers in a and b, and store packed maximum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16 - FORCE_INLINE __m128i _mm_max_epu16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( -@@ -8082,23 +7089,16 @@ FORCE_INLINE __m128i _mm_max_epu16(__m128i a, __m128i b) - - // Compare packed unsigned 32-bit integers in a and b, and store packed maximum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32 - FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u32( - vmaxq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b))); - } - --// Computes the pariwise minima of the four signed 32-bit integer values of a --// and b. --// --// A 128-bit parameter that can be defined with the following equations: --// r0 := (a0 < b0) ? a0 : b0 --// r1 := (a1 < b1) ? a1 : b1 --// r2 := (a2 < b2) ? a2 : b2 --// r3 := (a3 < b3) ? a3 : b3 --// --// https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx -+// Compare packed signed 32-bit integers in a and b, and store packed minimum -+// values in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32 - FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( -@@ -8107,7 +7107,7 @@ FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b) - - // Compare packed signed 8-bit integers in a and b, and store packed minimum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8 - FORCE_INLINE __m128i _mm_min_epi8(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s8( -@@ -8116,7 +7116,7 @@ FORCE_INLINE __m128i _mm_min_epi8(__m128i a, __m128i b) - - // Compare packed unsigned 16-bit integers in a and b, and store packed minimum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16 - FORCE_INLINE __m128i _mm_min_epu16(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( -@@ -8125,7 +7125,7 @@ FORCE_INLINE __m128i _mm_min_epu16(__m128i a, __m128i b) - - // Compare packed unsigned 32-bit integers in a and b, and store packed minimum - // values in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32 - FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u32( -@@ -8134,21 +7134,7 @@ FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b) - - // Horizontally compute the minimum amongst the packed unsigned 16-bit integers - // in a, store the minimum and index in dst, and zero the remaining bits in dst. --// --// index[2:0] := 0 --// min[15:0] := a[15:0] --// FOR j := 0 to 7 --// i := j*16 --// IF a[i+15:i] < min[15:0] --// index[2:0] := j --// min[15:0] := a[i+15:i] --// FI --// ENDFOR --// dst[15:0] := min[15:0] --// dst[18:16] := index[2:0] --// dst[127:19] := 0 --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16 - FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) - { - __m128i dst; -@@ -8198,7 +7184,7 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) - // quadruplets from a. One quadruplet is selected from b starting at on the - // offset specified in imm8. Eight quadruplets are formed from sequential 8-bit - // integers selected from a starting at the offset specified in imm8. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8 - FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm) - { - uint8x16_t _a, _b; -@@ -8278,9 +7264,7 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm) - - // Multiply the low signed 32-bit integers from each packed 64-bit element in - // a and b, and store the signed 64-bit results in dst. --// --// r0 := (int64_t)(int32_t)a0 * (int64_t)(int32_t)b0 --// r1 := (int64_t)(int32_t)a2 * (int64_t)(int32_t)b2 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32 - FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b) - { - // vmull_s32 upcasts instead of masking, so we downcast. -@@ -8289,26 +7273,18 @@ FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b) - return vreinterpretq_m128i_s64(vmull_s32(a_lo, b_lo)); - } - --// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or --// unsigned 32-bit integers from b. --// https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx -+// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit -+// integers, and store the low 32 bits of the intermediate integers in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32 - FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_s32( - vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); - } - --// Packs the 8 unsigned 32-bit integers from a and b into unsigned 16-bit --// integers and saturates. --// --// r0 := UnsignedSaturate(a0) --// r1 := UnsignedSaturate(a1) --// r2 := UnsignedSaturate(a2) --// r3 := UnsignedSaturate(a3) --// r4 := UnsignedSaturate(b0) --// r5 := UnsignedSaturate(b1) --// r6 := UnsignedSaturate(b2) --// r7 := UnsignedSaturate(b3) -+// Convert packed signed 32-bit integers from a and b to packed 16-bit integers -+// using unsigned saturation, and store the results in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32 - FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b) - { - return vreinterpretq_m128i_u16( -@@ -8319,7 +7295,7 @@ FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b) - // Round the packed double-precision (64-bit) floating-point elements in a using - // the rounding parameter, and store the results as packed double-precision - // floating-point elements in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd - FORCE_INLINE __m128d _mm_round_pd(__m128d a, int rounding) - { - #if defined(__aarch64__) -@@ -8448,7 +7424,7 @@ FORCE_INLINE __m128 _mm_round_ps(__m128 a, int rounding) - // the rounding parameter, store the result as a double-precision floating-point - // element in the lower element of dst, and copy the upper element from a to the - // upper element of dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd - FORCE_INLINE __m128d _mm_round_sd(__m128d a, __m128d b, int rounding) - { - return _mm_move_sd(a, _mm_round_pd(b, rounding)); -@@ -8468,7 +7444,7 @@ FORCE_INLINE __m128d _mm_round_sd(__m128d a, __m128d b, int rounding) - // (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress - // exceptions _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see - // _MM_SET_ROUNDING_MODE --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss - FORCE_INLINE __m128 _mm_round_ss(__m128 a, __m128 b, int rounding) - { - return _mm_move_ss(a, _mm_round_ps(b, rounding)); -@@ -8477,10 +7453,7 @@ FORCE_INLINE __m128 _mm_round_ss(__m128 a, __m128 b, int rounding) - // Load 128-bits of integer data from memory into dst using a non-temporal - // memory hint. mem_addr must be aligned on a 16-byte boundary or a - // general-protection exception may be generated. --// --// dst[127:0] := MEM[mem_addr+127:mem_addr] --// --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_load_si128 - FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p) - { - #if __has_builtin(__builtin_nontemporal_store) -@@ -8492,7 +7465,7 @@ FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p) - - // Compute the bitwise NOT of a and then AND with a 128-bit vector containing - // all 1's, and return 1 if the result is zero, otherwise return 0. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones - FORCE_INLINE int _mm_test_all_ones(__m128i a) - { - return (uint64_t) (vgetq_lane_s64(a, 0) & vgetq_lane_s64(a, 1)) == -@@ -8501,7 +7474,7 @@ FORCE_INLINE int _mm_test_all_ones(__m128i a) - - // Compute the bitwise AND of 128 bits (representing integer data) in a and - // mask, and return 1 if the result is zero, otherwise return 0. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros - FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask) - { - int64x2_t a_and_mask = -@@ -8514,7 +7487,7 @@ FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask) - // the bitwise NOT of a and then AND with mask, and set CF to 1 if the result is - // zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, - // otherwise return 0. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_test_mix_ones_zero -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_test_mix_ones_zero - FORCE_INLINE int _mm_test_mix_ones_zeros(__m128i a, __m128i mask) - { - uint64x2_t zf = -@@ -8529,7 +7502,7 @@ FORCE_INLINE int _mm_test_mix_ones_zeros(__m128i a, __m128i mask) - // and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the - // bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, - // otherwise set CF to 0. Return the CF value. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128 - FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b) - { - int64x2_t s64 = -@@ -8542,14 +7515,14 @@ FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b) - // bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, - // otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, - // otherwise return 0. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) _mm_test_mix_ones_zeros(a, b) - - // Compute the bitwise AND of 128 bits (representing integer data) in a and b, - // and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the - // bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, - // otherwise set CF to 0. Return the ZF value. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128 - FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b) - { - int64x2_t s64 = -@@ -9028,7 +8001,7 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound) - FORCE_INLINE int _sse2neon_clz(unsigned int x) - { - #if _MSC_VER -- DWORD cnt = 0; -+ unsigned long cnt = 0; - if (_BitScanForward(&cnt, x)) - return cnt; - return 32; -@@ -9040,7 +8013,7 @@ FORCE_INLINE int _sse2neon_clz(unsigned int x) - FORCE_INLINE int _sse2neon_ctz(unsigned int x) - { - #if _MSC_VER -- DWORD cnt = 0; -+ unsigned long cnt = 0; - if (_BitScanReverse(&cnt, x)) - return 31 - cnt; - return 32; -@@ -9053,18 +8026,16 @@ FORCE_INLINE int _sse2neon_ctzll(unsigned long long x) - { - #if _MSC_VER - unsigned long cnt; --#ifdef defined(SSE2NEON_HAS_BITSCAN64) -- (defined(_M_AMD64) || defined(__x86_64__)) -- if((_BitScanForward64(&cnt, x)) -- return (int)(cnt); -+#if defined(SSE2NEON_HAS_BITSCAN64) -+ if ((_BitScanForward64(&cnt, x)) -+ return (int)(cnt); - #else - if (_BitScanForward(&cnt, (unsigned long) (x))) - return (int) cnt; - if (_BitScanForward(&cnt, (unsigned long) (x >> 32))) - return (int) (cnt + 32); --#endif -- return 64; --#else -+#endif /* SSE2NEON_HAS_BITSCAN64 */ -+#else /* assume GNU compatible compilers */ - return x != 0 ? __builtin_ctzll(x) : 64; - #endif - } -@@ -9155,7 +8126,7 @@ FORCE_INLINE int _mm_cmpestrc(__m128i a, - - // Compare packed strings in a and b with lengths la and lb using the control - // in imm8, and store the generated index in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri - FORCE_INLINE int _mm_cmpestri(__m128i a, - int la, - __m128i b, -@@ -9168,7 +8139,7 @@ FORCE_INLINE int _mm_cmpestri(__m128i a, - - // Compare packed strings in a and b with lengths la and lb using the control - // in imm8, and store the generated mask in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm - FORCE_INLINE __m128i - _mm_cmpestrm(__m128i a, int la, __m128i b, int lb, const int imm8) - { -@@ -9324,8 +8295,8 @@ FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b) - } - - // Starting with the initial value in crc, accumulates a CRC32 value for --// unsigned 16-bit integer v. --// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100) -+// unsigned 16-bit integer v, and stores the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16 - FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v) - { - #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) -@@ -9342,8 +8313,8 @@ FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v) - } - - // Starting with the initial value in crc, accumulates a CRC32 value for --// unsigned 32-bit integer v. --// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100) -+// unsigned 32-bit integer v, and stores the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32 - FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v) - { - #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) -@@ -9360,8 +8331,8 @@ FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v) - } - - // Starting with the initial value in crc, accumulates a CRC32 value for --// unsigned 64-bit integer v. --// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100) -+// unsigned 64-bit integer v, and stores the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64 - FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v) - { - #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) -@@ -9376,8 +8347,8 @@ FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v) - } - - // Starting with the initial value in crc, accumulates a CRC32 value for --// unsigned 8-bit integer v. --// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100) -+// unsigned 8-bit integer v, and stores the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8 - FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v) - { - #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) -@@ -9486,43 +8457,61 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v) - - /* X Macro trick. See https://en.wikipedia.org/wiki/X_Macro */ - #define SSE2NEON_AES_H0(x) (x) --static const uint8_t SSE2NEON_sbox[256] = SSE2NEON_AES_SBOX(SSE2NEON_AES_H0); --static const uint8_t SSE2NEON_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0); -+static const uint8_t _sse2neon_sbox[256] = SSE2NEON_AES_SBOX(SSE2NEON_AES_H0); -+static const uint8_t _sse2neon_rsbox[256] = SSE2NEON_AES_RSBOX(SSE2NEON_AES_H0); - #undef SSE2NEON_AES_H0 - --// In the absence of crypto extensions, implement aesenc using regular neon -+/* x_time function and matrix multiply function */ -+#if !defined(__aarch64__) -+#define SSE2NEON_XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b)) -+#define SSE2NEON_MULTIPLY(x, y) \ -+ (((y & 1) * x) ^ ((y >> 1 & 1) * SSE2NEON_XT(x)) ^ \ -+ ((y >> 2 & 1) * SSE2NEON_XT(SSE2NEON_XT(x))) ^ \ -+ ((y >> 3 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x)))) ^ \ -+ ((y >> 4 & 1) * SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(SSE2NEON_XT(x)))))) -+#endif -+ -+// In the absence of crypto extensions, implement aesenc using regular NEON - // intrinsics instead. See: - // https://www.workofard.com/2017/01/accelerated-aes-for-the-arm64-linux-kernel/ - // https://www.workofard.com/2017/07/ghash-for-low-end-cores/ and --// https://github.com/ColinIanKing/linux-next-mirror/blob/b5f466091e130caaf0735976648f72bd5e09aa84/crypto/aegis128-neon-inner.c#L52 --// for more information Reproduced with permission of the author. -+// for more information. - FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey) - { - #if defined(__aarch64__) -- static const uint8_t shift_rows[] = {0x0, 0x5, 0xa, 0xf, 0x4, 0x9, -- 0xe, 0x3, 0x8, 0xd, 0x2, 0x7, -- 0xc, 0x1, 0x6, 0xb}; -- static const uint8_t ror32by8[] = {0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, -- 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc}; -+ static const uint8_t shift_rows[] = { -+ 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, -+ 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, -+ }; -+ static const uint8_t ror32by8[] = { -+ 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, -+ 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, -+ }; - - uint8x16_t v; - uint8x16_t w = vreinterpretq_u8_m128i(a); - -- // shift rows -+ /* shift rows */ - w = vqtbl1q_u8(w, vld1q_u8(shift_rows)); - -- // sub bytes -- v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(SSE2NEON_sbox), w); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x40), w - 0x40); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x80), w - 0x80); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0xc0), w - 0xc0); -+ /* sub bytes */ -+ // Here, we separate the whole 256-bytes table into 4 64-bytes tables, and -+ // look up each of the table. After each lookup, we load the next table -+ // which locates at the next 64-bytes. In the meantime, the index in the -+ // table would be smaller than it was, so the index parameters of -+ // `vqtbx4q_u8()` need to be added the same constant as the loaded tables. -+ v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), w); -+ // 'w-0x40' equals to 'vsubq_u8(w, vdupq_n_u8(0x40))' -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), w - 0x40); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), w - 0x80); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0); - -- // mix columns -+ /* mix columns */ - w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b); - w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v); - w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8)); - -- // add round key -+ /* add round key */ - return vreinterpretq_m128i_u8(w) ^ RoundKey; - - #else /* ARMv7-A implementation for a table-based AES */ -@@ -9587,31 +8576,34 @@ FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey) - FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey) - { - #if defined(__aarch64__) -- static const uint8_t inv_shift_rows[] = {0x0, 0xd, 0xa, 0x7, 0x4, 0x1, -- 0xe, 0xb, 0x8, 0x5, 0x2, 0xf, -- 0xc, 0x9, 0x6, 0x3}; -- static const uint8_t ror32by8[] = {0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, -- 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc}; -+ static const uint8_t inv_shift_rows[] = { -+ 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb, -+ 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3, -+ }; -+ static const uint8_t ror32by8[] = { -+ 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, -+ 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, -+ }; - - uint8x16_t v; - uint8x16_t w = vreinterpretq_u8_m128i(a); - -- // shift rows -+ // inverse shift rows - w = vqtbl1q_u8(w, vld1q_u8(inv_shift_rows)); - -- // sub bytes -- v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(SSE2NEON_rsbox), w); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_rsbox + 0x40), w - 0x40); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_rsbox + 0x80), w - 0x80); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_rsbox + 0xc0), w - 0xc0); -+ // inverse sub bytes -+ v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_rsbox), w); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x40), w - 0x40); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x80), w - 0x80); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0xc0), w - 0xc0); - -+ // inverse mix columns - // muliplying 'v' by 4 in GF(2^8) - w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b); - w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b); - v ^= w; - v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w); - -- // mix columns - w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & - 0x1b); // muliplying 'v' by 2 in GF(2^8) - w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v); -@@ -9621,35 +8613,29 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey) - return vreinterpretq_m128i_u8(w) ^ RoundKey; - - #else /* ARMv7-A NEON implementation */ --/* FIXME: optimized for NEON */ --#define XT(x) (((x) << 1) ^ ((((x) >> 7) & 1) * 0x1b)) --#define MULTIPLY(x, y) \ -- (((y & 1) * x) ^ ((y >> 1 & 1) * XT(x)) ^ ((y >> 2 & 1) * XT(XT(x))) ^ \ -- ((y >> 3 & 1) * XT(XT(XT(x)))) ^ ((y >> 4 & 1) * XT(XT(XT(XT(x)))))) -- -+ /* FIXME: optimized for NEON */ - uint8_t i, e, f, g, h, v[4][4]; - uint8_t *_a = (uint8_t *) &a; - for (i = 0; i < 16; ++i) { -- v[((i / 4) + (i % 4)) % 4][i % 4] = SSE2NEON_rsbox[_a[i]]; -+ v[((i / 4) + (i % 4)) % 4][i % 4] = _sse2neon_rsbox[_a[i]]; - } - -+ // inverse mix columns - for (i = 0; i < 4; ++i) { - e = v[i][0]; - f = v[i][1]; - g = v[i][2]; - h = v[i][3]; - -- v[i][0] = MULTIPLY(e, 0x0e) ^ MULTIPLY(f, 0x0b) ^ MULTIPLY(g, 0x0d) ^ -- MULTIPLY(h, 0x09); -- v[i][1] = MULTIPLY(e, 0x09) ^ MULTIPLY(f, 0x0e) ^ MULTIPLY(g, 0x0b) ^ -- MULTIPLY(h, 0x0d); -- v[i][2] = MULTIPLY(e, 0x0d) ^ MULTIPLY(f, 0x09) ^ MULTIPLY(g, 0x0e) ^ -- MULTIPLY(h, 0x0b); -- v[i][3] = MULTIPLY(e, 0x0b) ^ MULTIPLY(f, 0x0d) ^ MULTIPLY(g, 0x09) ^ -- MULTIPLY(h, 0x0e); -+ v[i][0] = SSE2NEON_MULTIPLY(e, 0x0e) ^ SSE2NEON_MULTIPLY(f, 0x0b) ^ -+ SSE2NEON_MULTIPLY(g, 0x0d) ^ SSE2NEON_MULTIPLY(h, 0x09); -+ v[i][1] = SSE2NEON_MULTIPLY(e, 0x09) ^ SSE2NEON_MULTIPLY(f, 0x0e) ^ -+ SSE2NEON_MULTIPLY(g, 0x0b) ^ SSE2NEON_MULTIPLY(h, 0x0d); -+ v[i][2] = SSE2NEON_MULTIPLY(e, 0x0d) ^ SSE2NEON_MULTIPLY(f, 0x09) ^ -+ SSE2NEON_MULTIPLY(g, 0x0e) ^ SSE2NEON_MULTIPLY(h, 0x0b); -+ v[i][3] = SSE2NEON_MULTIPLY(e, 0x0b) ^ SSE2NEON_MULTIPLY(f, 0x0d) ^ -+ SSE2NEON_MULTIPLY(g, 0x09) ^ SSE2NEON_MULTIPLY(h, 0x0e); - } --#undef XT --#undef MULTIPLY - - return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)) ^ RoundKey; - #endif -@@ -9657,7 +8643,7 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey) - - // Perform the last round of an AES encryption flow on data (state) in a using - // the round key in RoundKey, and store the result in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128 - FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) - { - #if defined(__aarch64__) -@@ -9673,59 +8659,166 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) - w = vqtbl1q_u8(w, vld1q_u8(shift_rows)); - - // sub bytes -- v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(SSE2NEON_sbox), w); -- // 'w-0x40' equals to 'vsubq_u8(w, vdupq_n_u8(0x40))' -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x40), w - 0x40); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x80), w - 0x80); -- v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0xc0), w - 0xc0); -+ v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), w); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), w - 0x40); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), w - 0x80); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), w - 0xc0); - -- // add round key -+ // add round key - return vreinterpretq_m128i_u8(v) ^ RoundKey; - - #else /* ARMv7-A implementation */ - uint8_t v[16] = { -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 0)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 5)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 10)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 15)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 4)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 9)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 14)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 3)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 8)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 13)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 2)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 7)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 12)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 1)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 6)], -- SSE2NEON_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 11)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 0)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 5)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 10)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 15)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 4)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 9)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 14)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 3)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 8)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 13)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 2)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 7)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 12)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 1)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 6)], -+ _sse2neon_sbox[vgetq_lane_u8(vreinterpretq_u8_m128i(a), 11)], - }; - - return vreinterpretq_m128i_u8(vld1q_u8(v)) ^ RoundKey; - #endif - } - -+// Perform the last round of an AES decryption flow on data (state) in a using -+// the round key in RoundKey, and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128 -+FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey) -+{ -+#if defined(__aarch64__) -+ static const uint8_t inv_shift_rows[] = { -+ 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb, -+ 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3, -+ }; -+ -+ uint8x16_t v; -+ uint8x16_t w = vreinterpretq_u8_m128i(a); -+ -+ // inverse shift rows -+ w = vqtbl1q_u8(w, vld1q_u8(inv_shift_rows)); -+ -+ // inverse sub bytes -+ v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_rsbox), w); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x40), w - 0x40); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0x80), w - 0x80); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_rsbox + 0xc0), w - 0xc0); -+ -+ // add round key -+ return vreinterpretq_m128i_u8(v) ^ RoundKey; -+ -+#else /* ARMv7-A NEON implementation */ -+ /* FIXME: optimized for NEON */ -+ uint8_t v[4][4]; -+ uint8_t *_a = (uint8_t *) &a; -+ for (int i = 0; i < 16; ++i) { -+ v[((i / 4) + (i % 4)) % 4][i % 4] = _sse2neon_rsbox[_a[i]]; -+ } -+ -+ return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)) ^ RoundKey; -+#endif -+} -+ -+// Perform the InvMixColumns transformation on a and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128 -+FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a) -+{ -+#if defined(__aarch64__) -+ static const uint8_t ror32by8[] = { -+ 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, -+ 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, -+ }; -+ uint8x16_t v = vreinterpretq_u8_m128i(a); -+ uint8x16_t w; -+ -+ // multiplying 'v' by 4 in GF(2^8) -+ w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b); -+ w = (w << 1) ^ (uint8x16_t) (((int8x16_t) w >> 7) & 0x1b); -+ v ^= w; -+ v ^= (uint8x16_t) vrev32q_u16((uint16x8_t) w); -+ -+ // multiplying 'v' by 2 in GF(2^8) -+ w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b); -+ w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v); -+ w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8)); -+ return vreinterpretq_m128i_u8(w); -+ -+#else /* ARMv7-A NEON implementation */ -+ uint8_t i, e, f, g, h, v[4][4]; -+ vst1q_u8((uint8_t *) v, vreinterpretq_u8_m128i(a)); -+ for (i = 0; i < 4; ++i) { -+ e = v[i][0]; -+ f = v[i][1]; -+ g = v[i][2]; -+ h = v[i][3]; -+ -+ v[i][0] = SSE2NEON_MULTIPLY(e, 0x0e) ^ SSE2NEON_MULTIPLY(f, 0x0b) ^ -+ SSE2NEON_MULTIPLY(g, 0x0d) ^ SSE2NEON_MULTIPLY(h, 0x09); -+ v[i][1] = SSE2NEON_MULTIPLY(e, 0x09) ^ SSE2NEON_MULTIPLY(f, 0x0e) ^ -+ SSE2NEON_MULTIPLY(g, 0x0b) ^ SSE2NEON_MULTIPLY(h, 0x0d); -+ v[i][2] = SSE2NEON_MULTIPLY(e, 0x0d) ^ SSE2NEON_MULTIPLY(f, 0x09) ^ -+ SSE2NEON_MULTIPLY(g, 0x0e) ^ SSE2NEON_MULTIPLY(h, 0x0b); -+ v[i][3] = SSE2NEON_MULTIPLY(e, 0x0b) ^ SSE2NEON_MULTIPLY(f, 0x0d) ^ -+ SSE2NEON_MULTIPLY(g, 0x09) ^ SSE2NEON_MULTIPLY(h, 0x0e); -+ } -+ -+ return vreinterpretq_m128i_u8(vld1q_u8((uint8_t *) v)); -+#endif -+} -+ -+// Assist in expanding the AES cipher key by computing steps towards generating -+// a round key for encryption cipher using data from a and an 8-bit round -+// constant specified in imm8, and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128 -+// - // Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist. - // This instruction generates a round key for AES encryption. See - // https://kazakov.life/2017/11/01/cryptocurrency-mining-on-ios-devices/ - // for details. --// --// https://msdn.microsoft.com/en-us/library/cc714138(v=vs.120).aspx --FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i key, const int rcon) -+FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) - { -- uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)); -- uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)); -+#if defined(__aarch64__) -+ uint8x16_t _a = vreinterpretq_u8_m128i(a); -+ uint8x16_t v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(_sse2neon_sbox), _a); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x40), _a - 0x40); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0x80), _a - 0x80); -+ v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(_sse2neon_sbox + 0xc0), _a - 0xc0); -+ -+ uint32x4_t v_u32 = vreinterpretq_u32_u8(v); -+ uint32x4_t ror_v = vorrq_u32(vshrq_n_u32(v_u32, 8), vshlq_n_u32(v_u32, 24)); -+ uint32x4_t ror_xor_v = veorq_u32(ror_v, vdupq_n_u32(rcon)); -+ -+ return vreinterpretq_m128i_u32(vtrn2q_u32(v_u32, ror_xor_v)); -+ -+#else /* ARMv7-A NEON implementation */ -+ uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55)); -+ uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xFF)); - for (int i = 0; i < 4; ++i) { -- ((uint8_t *) &X1)[i] = SSE2NEON_sbox[((uint8_t *) &X1)[i]]; -- ((uint8_t *) &X3)[i] = SSE2NEON_sbox[((uint8_t *) &X3)[i]]; -+ ((uint8_t *) &X1)[i] = _sse2neon_sbox[((uint8_t *) &X1)[i]]; -+ ((uint8_t *) &X3)[i] = _sse2neon_sbox[((uint8_t *) &X3)[i]]; - } - return _mm_set_epi32(((X3 >> 8) | (X3 << 24)) ^ rcon, X3, - ((X1 >> 8) | (X1 << 24)) ^ rcon, X1); -+#endif - } - #undef SSE2NEON_AES_SBOX - #undef SSE2NEON_AES_RSBOX - -+#if defined(__aarch64__) -+#undef SSE2NEON_XT -+#undef SSE2NEON_MULTIPLY -+#endif -+ - #else /* __ARM_FEATURE_CRYPTO */ - // Implements equivalent of 'aesenc' by combining AESE (with an empty key) and - // AESMC and then manually applying the real key as an xor operation. This -@@ -9750,7 +8843,9 @@ FORCE_INLINE __m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey) - vreinterpretq_u8_m128i(RoundKey))); - } - --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128 -+// Perform the last round of an AES encryption flow on data (state) in a using -+// the round key in RoundKey, and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128 - FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) - { - return _mm_xor_si128(vreinterpretq_m128i_u8(vaeseq_u8( -@@ -9758,6 +8853,23 @@ FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) - RoundKey); - } - -+// Perform the last round of an AES decryption flow on data (state) in a using -+// the round key in RoundKey, and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128 -+FORCE_INLINE __m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey) -+{ -+ return vreinterpretq_m128i_u8( -+ vaesdq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)) ^ -+ vreinterpretq_u8_m128i(RoundKey)); -+} -+ -+// Perform the InvMixColumns transformation on a and store the result in dst. -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128 -+FORCE_INLINE __m128i _mm_aesimc_si128(__m128i a) -+{ -+ return vreinterpretq_m128i_u8(vaesimcq_u8(vreinterpretq_u8_m128i(a))); -+} -+ - // Assist in expanding the AES cipher key by computing steps towards generating - // a round key for encryption cipher using data from a and an 8-bit round - // constant specified in imm8, and store the result in dst." -@@ -9783,7 +8895,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) - - // Perform a carry-less multiplication of two 64-bit integers, selected from a - // and b according to imm8, and store the results in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128 - FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm) - { - uint64x2_t a = vreinterpretq_u64_m128i(_a); -@@ -9828,7 +8940,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode() - - // Count the number of bits set to 1 in unsigned 32-bit integer a, and - // return that count in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u32 - FORCE_INLINE int _mm_popcnt_u32(unsigned int a) - { - #if defined(__aarch64__) -@@ -9855,7 +8967,7 @@ FORCE_INLINE int _mm_popcnt_u32(unsigned int a) - - // Count the number of bits set to 1 in unsigned 64-bit integer a, and - // return that count in dst. --// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64 -+// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_u64 - FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a) - { - #if defined(__aarch64__) -@@ -9911,7 +9023,6 @@ FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) - - // Return the current 64-bit value of the processor's time-stamp counter. - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc -- - FORCE_INLINE uint64_t _rdtsc(void) - { - #if defined(__aarch64__) diff --git a/bazel/patches/emp-tool.patch b/bazel/patches/emp-tool.patch deleted file mode 100644 index 48220f890..000000000 --- a/bazel/patches/emp-tool.patch +++ /dev/null @@ -1,163 +0,0 @@ -diff --git a/emp-tool/utils/aes.h b/emp-tool/utils/aes.h -index 0235544..75a8486 100644 ---- a/emp-tool/utils/aes.h -+++ b/emp-tool/utils/aes.h -@@ -54,6 +54,10 @@ - - #include "emp-tool/utils/block.h" - -+#ifdef __aarch64__ -+#include "emp-tool/utils/sse2neon.h" -+#endif -+ - namespace emp { - - typedef struct { block rd_key[11]; unsigned int rounds; } AES_KEY; -@@ -103,6 +107,7 @@ AES_set_encrypt_key(const block userkey, AES_KEY *key) { - - #ifdef __x86_64__ - __attribute__((target("aes,sse2"))) -+#endif - inline void AES_ecb_encrypt_blks(block *blks, unsigned int nblks, const AES_KEY *key) { - for (unsigned int i = 0; i < nblks; ++i) - blks[i] = _mm_xor_si128(blks[i], key->rd_key[0]); -@@ -112,22 +117,6 @@ inline void AES_ecb_encrypt_blks(block *blks, unsigned int nblks, const AES_KEY - for (unsigned int i = 0; i < nblks; ++i) - blks[i] = _mm_aesenclast_si128(blks[i], key->rd_key[key->rounds]); - } --#elif __aarch64__ --inline void AES_ecb_encrypt_blks(block *_blks, unsigned int nblks, const AES_KEY *key) { -- uint8x16_t * blks = (uint8x16_t*)(_blks); -- uint8x16_t * keys = (uint8x16_t*)(key->rd_key); -- auto * first = blks; -- for (unsigned int j = 0; j < key->rounds-1; ++j) { -- uint8x16_t key_j = (uint8x16_t)keys[j]; -- blks = first; -- for (unsigned int i = 0; i < nblks; ++i, ++blks) -- *blks = vaesmcq_u8(vaeseq_u8(*blks, key_j)); -- } -- uint8x16_t last_key = (uint8x16_t)keys[key->rounds-1]; -- for (unsigned int i = 0; i < nblks; ++i, ++first) -- *first = vaeseq_u8(*first, last_key) ^ (uint8x16_t)keys[key->rounds]; --} --#endif - - #ifdef __GNUC__ - #ifndef __clang__ -diff --git a/emp-tool/utils/aes_opt.h b/emp-tool/utils/aes_opt.h -index 2594e32..6a78b75 100644 ---- a/emp-tool/utils/aes_opt.h -+++ b/emp-tool/utils/aes_opt.h -@@ -58,7 +58,6 @@ static inline void AES_opt_key_schedule(block* user_key, AES_KEY *keys) { - /* - * With numKeys keys, use each key to encrypt numEncs blocks. - */ --#ifdef __x86_64__ - template - static inline void ParaEnc(block *blks, AES_KEY *keys) { - block * first = blks; -@@ -90,29 +89,6 @@ static inline void ParaEnc(block *blks, AES_KEY *keys) { - } - } - } --#elif __aarch64__ --template --static inline void ParaEnc(block *_blks, AES_KEY *keys) { -- uint8x16_t * first = (uint8x16_t*)(_blks); -- -- for (unsigned int r = 0; r < 9; ++r) { -- auto blks = first; -- for(size_t i = 0; i < numKeys; ++i) { -- uint8x16_t K = vreinterpretq_u8_m128i(keys[i].rd_key[r]); -- for(size_t j = 0; j < numEncs; ++j, ++blks) -- *blks = vaesmcq_u8(vaeseq_u8(*blks, K)); -- } -- } -- -- auto blks = first; -- for(size_t i = 0; i < numKeys; ++i) { -- uint8x16_t K = vreinterpretq_u8_m128i(keys[i].rd_key[9]); -- uint8x16_t K2 = vreinterpretq_u8_m128i(keys[i].rd_key[10]); -- for(size_t j = 0; j < numEncs; ++j, ++blks) -- *blks = vaeseq_u8(*blks, K) ^ K2; -- } --} --#endif - - } - #endif -diff --git a/emp-tool/utils/block.h b/emp-tool/utils/block.h -index f7d3d34..fcc21c1 100644 ---- a/emp-tool/utils/block.h -+++ b/emp-tool/utils/block.h -@@ -5,16 +5,7 @@ - #include - #elif __aarch64__ - #include "sse2neon.h" --inline __m128i _mm_aesimc_si128(__m128i a) { -- return vreinterpretq_m128i_u8(vaesimcq_u8(vreinterpretq_u8_m128i(a))); --} -- --inline __m128i _mm_aesdeclast_si128 (__m128i a, __m128i RoundKey) --{ -- return vreinterpretq_m128i_u8(vaesdq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)) ^ vreinterpretq_u8_m128i(RoundKey)); --} - #endif -- - #include - #include - #include -diff --git a/emp-tool/utils/f2k.h b/emp-tool/utils/f2k.h -index 7fe1b1b..f6186a1 100644 ---- a/emp-tool/utils/f2k.h -+++ b/emp-tool/utils/f2k.h -@@ -6,6 +6,7 @@ namespace emp { - /* multiplication in galois field without reduction */ - #ifdef __x86_64__ - __attribute__((target("sse2,pclmul"))) -+ #endif - inline void mul128(__m128i a, __m128i b, __m128i *res1, __m128i *res2) { - __m128i tmp3, tmp4, tmp5, tmp6; - tmp3 = _mm_clmulepi64_si128(a, b, 0x00); -@@ -22,28 +23,6 @@ namespace emp { - *res1 = tmp3; - *res2 = tmp6; - } -- #elif __aarch64__ -- inline void mul128(__m128i a, __m128i b, __m128i *res1, __m128i *res2) { -- __m128i tmp3, tmp4, tmp5, tmp6; -- poly64_t a_lo = (poly64_t)vget_low_u64(vreinterpretq_u64_m128i(a)); -- poly64_t a_hi = (poly64_t)vget_high_u64(vreinterpretq_u64_m128i(a)); -- poly64_t b_lo = (poly64_t)vget_low_u64(vreinterpretq_u64_m128i(b)); -- poly64_t b_hi = (poly64_t)vget_high_u64(vreinterpretq_u64_m128i(b)); -- tmp3 = (__m128i)vmull_p64(a_lo, b_lo); -- tmp4 = (__m128i)vmull_p64(a_hi, b_lo); -- tmp5 = (__m128i)vmull_p64(a_lo, b_hi); -- tmp6 = (__m128i)vmull_p64(a_hi, b_hi); -- -- tmp4 = _mm_xor_si128(tmp4, tmp5); -- tmp5 = _mm_slli_si128(tmp4, 8); -- tmp4 = _mm_srli_si128(tmp4, 8); -- tmp3 = _mm_xor_si128(tmp3, tmp5); -- tmp6 = _mm_xor_si128(tmp6, tmp4); -- // initial mul now in tmp3, tmp6 -- *res1 = tmp3; -- *res2 = tmp6; -- } -- #endif - - /* multiplication in galois field with reduction */ - #ifdef __x86_64__ -diff --git a/emp-tool/utils/prg.h b/emp-tool/utils/prg.h -index 23bbf42..5101d7e 100644 ---- a/emp-tool/utils/prg.h -+++ b/emp-tool/utils/prg.h -@@ -82,7 +82,7 @@ class PRG { public: - } else { - block tmp[2]; - random_block(tmp, 2); -- memcpy(data, tmp, nbytes); -+ memcpy(data, tmp, nbytes <= 32? nbytes : 32); - } - } - diff --git a/bazel/patches/grpc-1.66.patch b/bazel/patches/grpc-1.66.patch new file mode 100644 index 000000000..b6f82e587 --- /dev/null +++ b/bazel/patches/grpc-1.66.patch @@ -0,0 +1,20 @@ +diff --git a/third_party/BUILD b/third_party/BUILD +index 77cb52d0fc..c4b647f5c9 100644 +--- a/third_party/BUILD ++++ b/third_party/BUILD +@@ -18,13 +18,13 @@ package(default_visibility = ["//:__subpackages__"]) + + alias( + name = "libssl", +- actual = "@boringssl//:ssl", ++ actual = "@openssl//:ssl", + tags = ["manual"], + ) + + alias( + name = "libcrypto", +- actual = "@boringssl//:crypto", ++ actual = "@openssl//:crypto", + tags = ["manual"], + ) + diff --git a/bazel/patches/grpc-module-file.patch b/bazel/patches/grpc-module-file.patch new file mode 100644 index 000000000..29dc393c1 --- /dev/null +++ b/bazel/patches/grpc-module-file.patch @@ -0,0 +1,13 @@ +diff --git a/MODULE.bazel b/MODULE.bazel +index 4a8fbe83..8650f678 100644 +--- a/MODULE.bazel ++++ b/MODULE.bazel +@@ -8,7 +8,7 @@ module( + bazel_dep(name = "abseil-cpp", version = "20240116.0", repo_name = "com_google_absl") + bazel_dep(name = "apple_support", version = "1.15.1", repo_name = "build_bazel_apple_support") + bazel_dep(name = "bazel_skylib", version = "1.5.0") +-bazel_dep(name = "boringssl", version = "0.0.0-20230215-5c22014") ++bazel_dep(name = "openssl", version = "3.3.2") + bazel_dep(name = "c-ares", version = "1.15.0", repo_name = "com_github_cares_cares") + bazel_dep(name = "gazelle", version = "0.36.0", repo_name = "bazel_gazelle") + bazel_dep(name = "google_benchmark", version = "1.8.4", repo_name = "com_github_google_benchmark") \ No newline at end of file diff --git a/bazel/patches/hexl.patch b/bazel/patches/hexl.patch deleted file mode 100644 index 4d498ca8f..000000000 --- a/bazel/patches/hexl.patch +++ /dev/null @@ -1,32 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index b0da96f..61bfdd8 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -113,10 +113,10 @@ message(STATUS "CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}") - #------------------------------------------------------------------------------ - # Set AVX flags - #------------------------------------------------------------------------------ --hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx512dq.cpp" HEXL_HAS_AVX512DQ) --hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx512ifma.cpp" HEXL_HAS_AVX512IFMA) --hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx512vbmi2.cpp" HEXL_HAS_AVX512VBMI2) --hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx256.cpp" HEXL_HAS_AVX256) -+# hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx512dq.cpp" HEXL_HAS_AVX512DQ) -+# hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx512ifma.cpp" HEXL_HAS_AVX512IFMA) -+# hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx512vbmi2.cpp" HEXL_HAS_AVX512VBMI2) -+# hexl_check_compile_flag("${HEXL_CMAKE_PATH}/test-avx256.cpp" HEXL_HAS_AVX256) - - # ------------------------------------------------------------------------------ - # Installation logic... -diff --git a/hexl/CMakeLists.txt b/hexl/CMakeLists.txt -index 7c660a0..7e2e1c9 100644 ---- a/hexl/CMakeLists.txt -+++ b/hexl/CMakeLists.txt -@@ -93,7 +93,7 @@ endif() - - if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") - target_compile_options(hexl PRIVATE -Wall -Wconversion -Wshadow -pedantic -Wextra -- -Wno-unknown-pragmas -march=native -O3 -fomit-frame-pointer -+ -Wno-unknown-pragmas -mavx -O3 -fomit-frame-pointer - -Wno-sign-conversion - -Wno-implicit-int-conversion - ) diff --git a/bazel/patches/protobuf-xla.patch b/bazel/patches/protobuf-xla.patch new file mode 100644 index 000000000..b09b9c49a --- /dev/null +++ b/bazel/patches/protobuf-xla.patch @@ -0,0 +1,347 @@ +diff --git a/BUILD.bazel b/BUILD.bazel +index 301a04656..b4d953fd2 100644 +--- a/BUILD.bazel ++++ b/BUILD.bazel +@@ -8,7 +8,7 @@ load("//bazel:java_proto_library.bzl", "java_proto_library") + load("//bazel:proto_library.bzl", "proto_library") + load("//bazel/toolchains:proto_lang_toolchain.bzl", "proto_lang_toolchain") + load("//build_defs:cpp_opts.bzl", "COPTS", "LINK_OPTS") +-load(":protobuf.bzl", "internal_objc_proto_library", "internal_php_proto_library", "internal_py_proto_library") ++load(":protobuf.bzl", "adapt_proto_library", "internal_objc_proto_library", "internal_php_proto_library", "internal_py_proto_library") + + licenses(["notice"]) + +@@ -192,6 +192,25 @@ cc_library( + visibility = ["//visibility:public"], + ) + ++adapt_proto_library( ++ name = "cc_wkt_protos_genproto", ++ visibility = ["//visibility:public"], ++ deps = [ ++ "//:any_proto", ++ "//:api_proto", ++ "//:compiler_plugin_proto", ++ "//:descriptor_proto", ++ "//:duration_proto", ++ "//:empty_proto", ++ "//:field_mask_proto", ++ "//:source_context_proto", ++ "//:struct_proto", ++ "//:timestamp_proto", ++ "//:type_proto", ++ "//:wrappers_proto", ++ ], ++) ++ + # Source protos that are typically part of the protobuf runtime. + # + # DEPRECATED: Prefer :well_known_type_protos for the Well-Known Types +diff --git a/protobuf.bzl b/protobuf.bzl +index 7db5146a0..a0e05d4d2 100644 +--- a/protobuf.bzl ++++ b/protobuf.bzl +@@ -88,17 +88,17 @@ def _proto_gen_impl(ctx): + if source_dir: + has_sources = any([src.is_source for src in srcs]) + if has_sources: +- import_flags += ["-I" + source_dir] ++ import_flags.append("-I" + source_dir) + else: +- import_flags += ["-I."] ++ import_flags.append("-I.") + + has_generated = any([not src.is_source for src in srcs]) + if has_generated: +- import_flags += ["-I" + gen_dir] ++ import_flags.append("-I" + gen_dir) + + if ctx.attr.includes: + for include in ctx.attr.includes: +- import_flags += ["-I" + _GetPath(ctx, include)] ++ import_flags.append("-I" + _GetPath(ctx, include)) + + import_flags = depset(direct = import_flags) + +@@ -153,7 +153,7 @@ def _proto_gen_impl(ctx): + outs.extend(_RubyOuts([src.basename])) + + # Otherwise, rely on user-supplied outs. +- args += [("--%s_out=" + path_tpl) % (lang, gen_dir)] ++ args.append(("--%s_out=" + path_tpl) % (lang, gen_dir)) + + if ctx.attr.outs: + outs.extend(ctx.attr.outs) +@@ -174,8 +174,8 @@ def _proto_gen_impl(ctx): + + if ctx.attr.plugin_options: + outdir = ",".join(ctx.attr.plugin_options) + ":" + outdir +- args += [("--plugin=protoc-gen-%s=" + path_tpl) % (lang, plugin.path)] +- args += ["--%s_out=%s" % (lang, outdir)] ++ args.append(("--plugin=protoc-gen-%s=" + path_tpl) % (lang, plugin.path)) ++ args.append("--%s_out=%s" % (lang, outdir)) + tools.append(plugin) + + if not in_gen_dir: +@@ -765,3 +765,261 @@ def check_protobuf_required_bazel_version(): + copied filegroup. (Fixed in bazel 0.5.4) + """ + versions.check(minimum_bazel_version = "0.5.4") ++ ++def _CcHdrs(srcs, use_grpc_plugin = False): ++ ret = [s[:-len(".proto")] + ".pb.h" for s in srcs] ++ if use_grpc_plugin: ++ ret += [s[:-len(".proto")] + ".grpc.pb.h" for s in srcs] ++ return ret ++ ++def _CcSrcs(srcs, use_grpc_plugin = False): ++ ret = [s[:-len(".proto")] + ".pb.cc" for s in srcs] ++ if use_grpc_plugin: ++ ret += [s[:-len(".proto")] + ".grpc.pb.cc" for s in srcs] ++ return ret ++ ++def __proto_gen_impl(ctx): ++ """General implementation for generating protos""" ++ srcs = ctx.files.srcs ++ deps = [] ++ deps += ctx.files.srcs ++ source_dir = _SourceDir(ctx) ++ gen_dir = _GenDir(ctx) ++ if source_dir: ++ import_flags = ["-I" + source_dir, "-I" + gen_dir] ++ else: ++ import_flags = ["-I."] ++ ++ for dep in ctx.attr.deps: ++ import_flags += dep.proto.import_flags ++ deps += dep.proto.deps ++ import_flags = depset(import_flags).to_list() ++ deps = depset(deps).to_list() ++ ++ args = [] ++ if ctx.attr.gen_cc: ++ args.append("--cpp_out=" + gen_dir) ++ if ctx.attr.gen_py: ++ args.append("--python_out=" + gen_dir) ++ ++ inputs = srcs + deps ++ tools = [ctx.executable.protoc] ++ if ctx.executable.plugin: ++ plugin = ctx.executable.plugin ++ lang = ctx.attr.plugin_language ++ if not lang and plugin.basename.startswith("protoc-gen-"): ++ lang = plugin.basename[len("protoc-gen-"):] ++ if not lang: ++ fail("cannot infer the target language of plugin", "plugin_language") ++ ++ outdir = gen_dir ++ if ctx.attr.plugin_options: ++ outdir = ",".join(ctx.attr.plugin_options) + ":" + outdir ++ args.append("--plugin=protoc-gen-%s=%s" % (lang, plugin.path)) ++ args.append("--%s_out=%s" % (lang, outdir)) ++ tools.append(plugin) ++ ++ if args: ++ ctx.actions.run( ++ inputs = inputs, ++ outputs = ctx.outputs.outs, ++ arguments = args + import_flags + [s.path for s in srcs], ++ executable = ctx.executable.protoc, ++ mnemonic = "ProtoCompile", ++ tools = tools, ++ use_default_shell_env = True, ++ ) ++ ++ return struct( ++ proto = struct( ++ srcs = srcs, ++ import_flags = import_flags, ++ deps = deps, ++ ), ++ ) ++ ++proto_gen = rule( ++ attrs = { ++ "srcs": attr.label_list(allow_files = True), ++ "deps": attr.label_list(providers = ["proto"]), ++ "includes": attr.string_list(), ++ "protoc": attr.label( ++ cfg = "exec", ++ executable = True, ++ allow_single_file = True, ++ mandatory = True, ++ ), ++ "plugin": attr.label( ++ cfg = "exec", ++ allow_files = True, ++ executable = True, ++ ), ++ "plugin_language": attr.string(), ++ "plugin_options": attr.string_list(), ++ "gen_cc": attr.bool(), ++ "gen_py": attr.bool(), ++ "outs": attr.output_list(), ++ }, ++ implementation = __proto_gen_impl, ++) ++ ++"""Generates codes from Protocol Buffers definitions. ++ ++This rule helps you to implement Skylark macros specific to the target ++language. You should prefer more specific `cc_proto_library `, ++`py_proto_library` and others unless you are adding such wrapper macros. ++ ++Args: ++ srcs: Protocol Buffers definition files (.proto) to run the protocol compiler ++ against. ++ deps: a list of dependency labels; must be other proto libraries. ++ includes: a list of include paths to .proto files. ++ protoc: the label of the protocol compiler to generate the sources. ++ plugin: the label of the protocol compiler plugin to be passed to the protocol ++ compiler. ++ plugin_language: the language of the generated sources ++ plugin_options: a list of options to be passed to the plugin ++ gen_cc: generates C++ sources in addition to the ones from the plugin. ++ gen_py: generates Python sources in addition to the ones from the plugin. ++ outs: a list of labels of the expected outputs from the protocol compiler. ++""" ++ ++def cc_proto_library( ++ name, ++ srcs = [], ++ deps = [], ++ cc_libs = [], ++ include = None, ++ protoc = "@com_google_protobuf//:protoc", ++ internal_bootstrap_hack = False, ++ use_grpc_plugin = False, ++ default_runtime = "@com_google_protobuf//:protobuf", ++ **kwargs): ++ """Bazel rule to create a C++ protobuf library from proto source files ++ ++ NOTE: the rule is only an internal workaround to generate protos. The ++ interface may change and the rule may be removed when bazel has introduced ++ the native rule. ++ ++ Args: ++ name: the name of the cc_proto_library. ++ srcs: the .proto files of the cc_proto_library. ++ deps: a list of dependency labels; must be cc_proto_library. ++ cc_libs: a list of other cc_library targets depended by the generated ++ cc_library. ++ include: a string indicating the include path of the .proto files. ++ protoc: the label of the protocol compiler to generate the sources. ++ internal_bootstrap_hack: a flag indicating if the cc_proto_library is used only ++ for bootstrapping. When it is set to True, no files will be generated. ++ The rule will simply be a provider for .proto files, so that other ++ cc_proto_library can depend on it. ++ use_grpc_plugin: a flag to indicate whether to call the grpc C++ plugin ++ when processing the proto files. ++ default_runtime: the implicitly default runtime which will be depended on by ++ the generated cc_library target. ++ **kwargs: other keyword arguments that are passed to cc_library. ++ ++ """ ++ ++ includes = [] ++ if include != None: ++ includes = [include] ++ ++ if internal_bootstrap_hack: ++ # For pre-checked-in generated files, we add the internal_bootstrap_hack ++ # which will skip the codegen action. ++ proto_gen( ++ name = name + "_genproto", ++ srcs = srcs, ++ deps = [s + "_genproto" for s in deps], ++ includes = includes, ++ protoc = protoc, ++ visibility = ["//visibility:public"], ++ ) ++ ++ # An empty cc_library to make rule dependency consistent. ++ native.cc_library( ++ name = name, ++ **kwargs ++ ) ++ return ++ ++ grpc_cpp_plugin = None ++ if use_grpc_plugin: ++ grpc_cpp_plugin = "//external:grpc_cpp_plugin" ++ ++ gen_srcs = _CcSrcs(srcs, use_grpc_plugin) ++ gen_hdrs = _CcHdrs(srcs, use_grpc_plugin) ++ outs = gen_srcs + gen_hdrs ++ ++ proto_gen( ++ name = name + "_genproto", ++ srcs = srcs, ++ deps = [s + "_genproto" for s in deps], ++ includes = includes, ++ protoc = protoc, ++ plugin = grpc_cpp_plugin, ++ plugin_language = "grpc", ++ gen_cc = 1, ++ outs = outs, ++ visibility = ["//visibility:public"], ++ ) ++ ++ if default_runtime and not default_runtime in cc_libs: ++ cc_libs = cc_libs + [default_runtime] ++ if use_grpc_plugin: ++ cc_libs = cc_libs + ["//external:grpc_lib"] ++ ++ native.cc_library( ++ name = name, ++ srcs = gen_srcs, ++ hdrs = gen_hdrs, ++ deps = cc_libs + deps, ++ includes = includes, ++ alwayslink = 1, ++ **kwargs ++ ) ++ ++"""Generates codes from Protocol Buffers definitions. ++ ++This rule helps you to implement Skylark macros specific to the target ++language. You should prefer more specific `cc_proto_library `, ++`py_proto_library` and others unless you are adding such wrapper macros. ++ ++Args: ++ srcs: Protocol Buffers definition files (.proto) to run the protocol compiler ++ against. ++ deps: a list of dependency labels; must be other proto libraries. ++ includes: a list of include paths to .proto files. ++ protoc: the label of the protocol compiler to generate the sources. ++ plugin: the label of the protocol compiler plugin to be passed to the protocol ++ compiler. ++ plugin_language: the language of the generated sources ++ plugin_options: a list of options to be passed to the plugin ++ gen_cc: generates C++ sources in addition to the ones from the plugin. ++ gen_py: generates Python sources in addition to the ones from the plugin. ++ outs: a list of labels of the expected outputs from the protocol compiler. ++""" ++ ++def _adapt_proto_library_impl(ctx): ++ deps = [dep[ProtoInfo] for dep in ctx.attr.deps] ++ ++ srcs = [src for dep in deps for src in dep.direct_sources] ++ return struct( ++ proto = struct( ++ srcs = srcs, ++ import_flags = ["-I{}".format(path) for dep in deps for path in dep.transitive_proto_path.to_list()], ++ deps = srcs, ++ ), ++ ) ++ ++adapt_proto_library = rule( ++ implementation = _adapt_proto_library_impl, ++ attrs = { ++ "deps": attr.label_list( ++ mandatory = True, ++ providers = [ProtoInfo], ++ ), ++ }, ++ doc = "Adapts `proto_library` from `@rules_proto` to be used with `{cc,py}_proto_library` from this file.", ++) diff --git a/bazel/patches/pytorch.patch b/bazel/patches/pytorch.patch new file mode 100644 index 000000000..df63024e8 --- /dev/null +++ b/bazel/patches/pytorch.patch @@ -0,0 +1,44 @@ +--- a/torch/__init__.py ++++ b/torch/__init__.py +@@ -172,6 +172,41 @@ + here = os.path.abspath(__file__) + lib_path = os.path.join(os.path.dirname(here), 'lib', lib_name) + ++ import pathlib ++ torch_root = pathlib.Path(here).parent.parent ++ packages = [ ++ 'cublas', ++ 'cudnn', ++ 'cuda_nvrtc', ++ 'cuda_runtime', ++ 'cuda_cupti', ++ 'cufft', ++ 'curand', ++ 'cusolver', ++ 'cusparse', ++ 'nccl', ++ 'nvjitlink', ++ 'nvtx', ++ ] ++ rules_python_prefix = 'rules_python~~pip~spu_pip_dev_311_nvidia' ++ cuda_version = 'cu12' ++ nvidia_symlink_dir = torch_root / 'nvidia' ++ nvidia_symlink_dir.mkdir(exist_ok=True) ++ for pkg in packages: ++ pkg_dirname = f'../../../{rules_python_prefix}_{pkg}_{cuda_version}' ++ dest_dir = pathlib.Path(pkg_dirname) / f"site-packages/nvidia/{pkg}" ++ symlink_loc = nvidia_symlink_dir / pkg ++ if symlink_loc.exists(): ++ assert symlink_loc.is_symlink() ++ if symlink_loc.readlink() != dest_dir: ++ symlink_loc.unlink() ++ if not symlink_loc.exists(): ++ symlink_loc.symlink_to(dest_dir) ++ ++ # Preload the correct libnvJitLink library. The other libraries don't need ++ # this because they're loaded via relative paths. ++ ctypes.CDLL(torch_root / 'nvidia/nvjitlink/lib/libnvJitLink.so.12', mode=ctypes.RTLD_GLOBAL) ++ + try: + ctypes.CDLL(lib_path, mode=ctypes.RTLD_GLOBAL) + except OSError as err: diff --git a/bazel/patches/pytorch_record.patch b/bazel/patches/pytorch_record.patch new file mode 100644 index 000000000..9a0f41efa --- /dev/null +++ b/bazel/patches/pytorch_record.patch @@ -0,0 +1,34 @@ +--- a/torch-2.3.0.dist-info/RECORD ++++ b/torch-2.3.0.dist-info/RECORD +@@ -10386,19 +10386,12 @@ + torch/_compile.py,sha256=a2g6zXCXwnirEFu-VSiSzRfk23_-E0MgVbodLOHfjr0,1001 + torch/_classes.py,sha256=zez2IGbpzN3f1P7Tg8s-fg3pz_ATN6hAVTxKsSqtV9o,1686 + torch/_appdirs.py,sha256=GjuBh72l3BhGE4vJSdqGj-8QHjGbkhuMYaOLchLcqOQ,26167 +-torch/__init__.py,sha256=kCWXnQYOawq9ORqPg-5cLOnMwoAboxlI8PQcgqwHJzA,79997 ++torch/__init__.py,sha256=2KMPP6IBfFqD8r02xTXzVtFPnQQz9Yn-fBfO9QBs7Qk,81211 + torch/__future__.py,sha256=yk9l_KWsfVIzUBx9cGr-OdtWmb-pI8ZhcROAm3a_FQw,3185 + torch/__config__.py,sha256=kv8yDflHiu3B2rxjOe48upLeB2VXnTxdXKeAYcxnJ5c,553 + torch/_VF.pyi,sha256=kmuEzpodTqrBJhZhx054yhGBqizo80StmroZOGZz-dI,1137705 + torch/_VF.py,sha256=6gWebiEvyG5GFiNTcMuigU7UAPEesYJmWKQTL_1GTrM,643 + torch/_C.cpython-311-x86_64-linux-gnu.so,sha256=DgQrQZ1UWG8CqyFAOIrkFeOBbdRsL8uRH7R6LYN1GR4,37857 +-torch-2.3.0.dist-info/LICENSE,sha256=nCkW1Dsriv9qPGZi7VhLay19bc-E3XJxgd7q3FRyIU4,351851 +-torch-2.3.0.dist-info/METADATA,sha256=h6XMrDq7R7q01HNfqNNgyDKXDn2p4vfbUB2IxCL7gdY,26120 +-torch-2.3.0.dist-info/NOTICE,sha256=wsx78MrsdlLCtGCopHC-oWd_JB5KuOQx3zTPF_Wp_sA,23632 +-torch-2.3.0.dist-info/WHEEL,sha256=heXqORHgAzVHsSEcHHvvTohM_YB7MmN0HUQQME76CM0,105 +-torch-2.3.0.dist-info/entry_points.txt,sha256=SRhyGhohzXtwg-GPZHrgubQLqk1v5i-kOQiV-mj5fms,296 +-torch-2.3.0.dist-info/top_level.txt,sha256=MsBcfJyMU15lW1efu5w7Tzd4MenrYHiuaixbHMfAoco,25 +-torch-2.3.0.dist-info/RECORD,, + torchgen/static_runtime/generator.py,sha256=Sfe8TR6inZv-FHhJAG6gXBFXutPVMO8I4Uzw3xnGdd4,26374 + torchgen/static_runtime/gen_static_runtime_ops.py,sha256=Esr32XW78YkLpGAGhIw6ZsxmkwgI_Pd3j0rh585ijow,7347 + torchgen/static_runtime/config.py,sha256=fEMB4EdO8aX47aW13s-nVSy-yM5qIzfXrOYaQkmv3-A,14493 +@@ -10538,3 +10531,10 @@ + torchgen/context.py,sha256=798e45g0zoR69Xn_4HjTuBRXbuNnyyK-j5_vWOnrs_s,3974 + torchgen/code_template.py,sha256=z3N3FvXHfvO2aLIu2LoFqe7XGpzbFfzXEyzhGez2KME,2903 + torchgen/__init__.py,sha256=iirTpG38WcCsNMhEbi1dg7_jad6ptk_uzZ-BzaGBFyU,348 ++torch-2.3.0.dist-info/LICENSE,sha256=nCkW1Dsriv9qPGZi7VhLay19bc-E3XJxgd7q3FRyIU4,351851 ++torch-2.3.0.dist-info/METADATA,sha256=h6XMrDq7R7q01HNfqNNgyDKXDn2p4vfbUB2IxCL7gdY,26120 ++torch-2.3.0.dist-info/NOTICE,sha256=wsx78MrsdlLCtGCopHC-oWd_JB5KuOQx3zTPF_Wp_sA,23632 ++torch-2.3.0.dist-info/WHEEL,sha256=heXqORHgAzVHsSEcHHvvTohM_YB7MmN0HUQQME76CM0,105 ++torch-2.3.0.dist-info/entry_points.txt,sha256=SRhyGhohzXtwg-GPZHrgubQLqk1v5i-kOQiV-mj5fms,296 ++torch-2.3.0.dist-info/top_level.txt,sha256=MsBcfJyMU15lW1efu5w7Tzd4MenrYHiuaixbHMfAoco,25 ++torch-2.3.0.dist-info/RECORD,, diff --git a/bazel/patches/xla-non-hermetic-python.patch b/bazel/patches/xla-non-hermetic-python.patch deleted file mode 100644 index ac1b0cc00..000000000 --- a/bazel/patches/xla-non-hermetic-python.patch +++ /dev/null @@ -1,786 +0,0 @@ -diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl -index 7cc1e08568..45480bd4a3 100644 ---- a/third_party/py/BUILD.tpl -+++ b/third_party/py/BUILD.tpl -@@ -5,17 +5,16 @@ package(default_visibility = ["//visibility:public"]) - # Point both runtimes to the same python binary to ensure we always - # use the python binary specified by ./configure.py script. - load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") --load("@python//:defs.bzl", "interpreter") - - py_runtime( - name = "py2_runtime", -- interpreter_path = interpreter, -+ interpreter_path = "%{PYTHON_BIN_PATH}", - python_version = "PY2", - ) - - py_runtime( - name = "py3_runtime", -- interpreter_path = interpreter, -+ interpreter_path = "%{PYTHON_BIN_PATH}", - python_version = "PY3", - ) - -@@ -33,8 +32,27 @@ toolchain( - exec_compatible_with = [%{PLATFORM_CONSTRAINT}], - ) - --alias(name = "python_headers", -- actual = "@python//:python_headers") -+# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib -+# See https://docs.python.org/3/extending/windows.html -+cc_import( -+ name = "python_lib", -+ interface_library = select({ -+ ":windows": ":python_import_lib", -+ # A placeholder for Unix platforms which makes --no_build happy. -+ "//conditions:default": "not-existing.lib", -+ }), -+ system_provided = 1, -+) -+ -+cc_library( -+ name = "python_headers", -+ hdrs = [":python_include"], -+ deps = select({ -+ ":windows": [":python_lib"], -+ "//conditions:default": [], -+ }), -+ includes = ["python_include"], -+) - - # This alias is exists for the use of targets in the @llvm-project dependency, - # which expect a python_headers target called @python_runtime//:headers. We use -@@ -45,9 +63,18 @@ alias( - actual = ":python_headers", - ) - -+cc_library( -+ name = "numpy_headers", -+ hdrs = [":numpy_include"], -+ includes = ["numpy_include"], -+) - - config_setting( - name = "windows", - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], --) -\ No newline at end of file -+) -+ -+%{PYTHON_INCLUDE_GENRULE} -+%{NUMPY_INCLUDE_GENRULE} -+%{PYTHON_IMPORT_LIB_GENRULE} -\ No newline at end of file -diff --git a/third_party/py/numpy/BUILD b/third_party/py/numpy/BUILD -index 97c7907fc3..c80cc5287b 100644 ---- a/third_party/py/numpy/BUILD -+++ b/third_party/py/numpy/BUILD -@@ -2,14 +2,15 @@ licenses(["restricted"]) - - package(default_visibility = ["//visibility:public"]) - --alias( -+py_library( - name = "numpy", -- actual = "@pypi_numpy//:pkg", -+ srcs = ["tf_numpy_dummy.py"], -+ srcs_version = "PY3", - ) - - alias( - name = "headers", -- actual = "@pypi_numpy//:numpy_headers", -+ actual = "@local_config_python//:numpy_headers", - ) - - genrule( -diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl -index 3728a91b93..89732c3e33 100644 ---- a/third_party/py/python_configure.bzl -+++ b/third_party/py/python_configure.bzl -@@ -1,4 +1,9 @@ - """Repository rule for Python autoconfiguration. -+ -+`python_configure` depends on the following environment variables: -+ -+ * `PYTHON_BIN_PATH`: location of python binary. -+ * `PYTHON_LIB_PATH`: Location of python libraries. - """ - - load( -@@ -6,8 +11,192 @@ load( - "BAZEL_SH", - "PYTHON_BIN_PATH", - "PYTHON_LIB_PATH", -+ "TF_PYTHON_CONFIG_REPO", -+ "auto_config_fail", -+ "config_repo_label", -+ "execute", -+ "get_bash_bin", -+ "get_host_environ", -+ "get_python_bin", -+ "is_windows", -+ "raw_exec", -+ "read_dir", - ) - -+def _genrule(src_dir, genrule_name, command, outs): -+ """Returns a string with a genrule. -+ -+ Genrule executes the given command and produces the given outputs. -+ """ -+ return ( -+ "genrule(\n" + -+ ' name = "' + -+ genrule_name + '",\n' + -+ " outs = [\n" + -+ outs + -+ "\n ],\n" + -+ ' cmd = """\n' + -+ command + -+ '\n """,\n' + -+ ")\n" -+ ) -+ -+def _norm_path(path): -+ """Returns a path with '/' and remove the trailing slash.""" -+ path = path.replace("\\", "/") -+ if path[-1] == "/": -+ path = path[:-1] -+ return path -+ -+def _symlink_genrule_for_dir( -+ repository_ctx, -+ src_dir, -+ dest_dir, -+ genrule_name, -+ src_files = [], -+ dest_files = []): -+ """Returns a genrule to symlink(or copy if on Windows) a set of files. -+ -+ If src_dir is passed, files will be read from the given directory; otherwise -+ we assume files are in src_files and dest_files -+ """ -+ if src_dir != None: -+ src_dir = _norm_path(src_dir) -+ dest_dir = _norm_path(dest_dir) -+ files = "\n".join(read_dir(repository_ctx, src_dir)) -+ -+ # Create a list with the src_dir stripped to use for outputs. -+ dest_files = files.replace(src_dir, "").splitlines() -+ src_files = files.splitlines() -+ command = [] -+ outs = [] -+ for i in range(len(dest_files)): -+ if dest_files[i] != "": -+ # If we have only one file to link we do not want to use the dest_dir, as -+ # $(@D) will include the full path to the file. -+ dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i] -+ -+ # Copy the headers to create a sandboxable setup. -+ cmd = "cp -f" -+ command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) -+ outs.append(' "' + dest_dir + dest_files[i] + '",') -+ genrule = _genrule( -+ src_dir, -+ genrule_name, -+ " && ".join(command), -+ "\n".join(outs), -+ ) -+ return genrule -+ -+def _get_python_lib(repository_ctx, python_bin): -+ """Gets the python lib path.""" -+ python_lib = get_host_environ(repository_ctx, PYTHON_LIB_PATH) -+ if python_lib != None: -+ return python_lib -+ -+ # The interesting program to execute. -+ print_lib = [ -+ "from __future__ import print_function", -+ "import site", -+ "import os", -+ "python_paths = []", -+ "if os.getenv('PYTHONPATH') is not None:", -+ " python_paths = os.getenv('PYTHONPATH').split(':')", -+ "try:", -+ " library_paths = site.getsitepackages()", -+ "except AttributeError:", -+ " from distutils.sysconfig import get_python_lib", -+ " library_paths = [get_python_lib()]", -+ "all_paths = set(python_paths + library_paths)", -+ "paths = []", -+ "for path in all_paths:", -+ " if os.path.isdir(path):", -+ " paths.append(path)", -+ "if len(paths) >=1:", -+ " print(paths[0])", -+ ] -+ -+ # The below script writes the above program to a file -+ # and executes it. This is to work around the limitation -+ # of not being able to upload files as part of execute. -+ cmd = "from os import linesep;" -+ cmd += "f = open('script.py', 'w');" -+ for line in print_lib: -+ cmd += "f.write(\"%s\" + linesep);" % line -+ cmd += "f.close();" -+ cmd += "from subprocess import call;" -+ cmd += "call([\"%s\", \"script.py\"]);" % python_bin -+ -+ result = execute(repository_ctx, [python_bin, "-c", cmd]) -+ return result.stdout.strip() -+ -+def _check_python_lib(repository_ctx, python_lib): -+ """Checks the python lib path.""" -+ cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib) -+ result = raw_exec(repository_ctx, [get_bash_bin(repository_ctx), "-c", cmd]) -+ if result.return_code == 1: -+ auto_config_fail("Invalid python library path: %s" % python_lib) -+ -+def _check_python_bin(repository_ctx, python_bin): -+ """Checks the python bin path.""" -+ cmd = '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin) -+ result = raw_exec(repository_ctx, [get_bash_bin(repository_ctx), "-c", cmd]) -+ if result.return_code == 1: -+ auto_config_fail("--define %s='%s' is not executable. Is it the python binary?" % ( -+ PYTHON_BIN_PATH, -+ python_bin, -+ )) -+ -+def _get_python_include(repository_ctx, python_bin): -+ """Gets the python include path.""" -+ result = execute( -+ repository_ctx, -+ [ -+ python_bin, -+ "-Wignore", -+ "-c", -+ "import sysconfig; " + -+ "print(sysconfig.get_path('include'))", -+ ], -+ error_msg = "Problem getting python include path.", -+ error_details = ("Is the Python binary path set up right? " + -+ "(See ./configure or " + PYTHON_BIN_PATH + ".) " + -+ "Is distutils installed?"), -+ ) -+ return result.stdout.splitlines()[0] -+ -+def _get_python_import_lib_name(repository_ctx, python_bin): -+ """Get Python import library name (pythonXY.lib) on Windows.""" -+ result = execute( -+ repository_ctx, -+ [ -+ python_bin, -+ "-c", -+ "import sys;" + -+ 'print("python" + str(sys.version_info[0]) + ' + -+ ' str(sys.version_info[1]) + ".lib")', -+ ], -+ error_msg = "Problem getting python import library.", -+ error_details = ("Is the Python binary path set up right? " + -+ "(See ./configure or " + PYTHON_BIN_PATH + ".) "), -+ ) -+ return result.stdout.splitlines()[0] -+ -+def _get_numpy_include(repository_ctx, python_bin): -+ """Gets the numpy include path.""" -+ return execute( -+ repository_ctx, -+ [ -+ python_bin, -+ "-c", -+ "from __future__ import print_function;" + -+ "import numpy;" + -+ " print(numpy.get_include());", -+ ], -+ error_msg = "Problem getting numpy include path.", -+ error_details = "Is numpy installed?", -+ ).stdout.splitlines()[0] -+ - def _create_local_python_repository(repository_ctx): - """Creates the repository containing files set up to build with Python.""" - -@@ -15,14 +204,68 @@ def _create_local_python_repository(repository_ctx): - # function to be restarted with all previous state being lost. This - # can easily lead to a O(n^2) runtime in the number of labels. - build_tpl = repository_ctx.path(Label("//third_party/py:BUILD.tpl")) -+ -+ python_bin = get_python_bin(repository_ctx) -+ _check_python_bin(repository_ctx, python_bin) -+ python_lib = _get_python_lib(repository_ctx, python_bin) -+ _check_python_lib(repository_ctx, python_lib) -+ python_include = _get_python_include(repository_ctx, python_bin) -+ numpy_include = _get_numpy_include(repository_ctx, python_bin) + "/numpy" -+ python_include_rule = _symlink_genrule_for_dir( -+ repository_ctx, -+ python_include, -+ "python_include", -+ "python_include", -+ ) -+ python_import_lib_genrule = "" -+ -+ # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib -+ # See https://docs.python.org/3/extending/windows.html -+ if is_windows(repository_ctx): -+ python_bin = python_bin.replace("\\", "/") -+ python_include = _norm_path(python_include) -+ python_import_lib_name = _get_python_import_lib_name(repository_ctx, python_bin) -+ python_import_lib_src = python_include.rsplit("/", 1)[0] + "/libs/" + python_import_lib_name -+ python_import_lib_genrule = _symlink_genrule_for_dir( -+ repository_ctx, -+ None, -+ "", -+ "python_import_lib", -+ [python_import_lib_src], -+ [python_import_lib_name], -+ ) -+ numpy_include_rule = _symlink_genrule_for_dir( -+ repository_ctx, -+ numpy_include, -+ "numpy_include/numpy", -+ "numpy_include", -+ ) -+ - platform_constraint = "" - if repository_ctx.attr.platform_constraint: - platform_constraint = "\"%s\"" % repository_ctx.attr.platform_constraint -- repository_ctx.template("BUILD", build_tpl, {"%{PLATFORM_CONSTRAINT}": platform_constraint}) -+ repository_ctx.template("BUILD", build_tpl, { -+ "%{PYTHON_BIN_PATH}": python_bin, -+ "%{PYTHON_INCLUDE_GENRULE}": python_include_rule, -+ "%{PYTHON_IMPORT_LIB_GENRULE}": python_import_lib_genrule, -+ "%{NUMPY_INCLUDE_GENRULE}": numpy_include_rule, -+ "%{PLATFORM_CONSTRAINT}": platform_constraint, -+ }) -+ -+def _create_remote_python_repository(repository_ctx, remote_config_repo): -+ """Creates pointers to a remotely configured repo set up to build with Python. -+ """ -+ repository_ctx.template("BUILD", config_repo_label(remote_config_repo, ":BUILD"), {}) - - def _python_autoconf_impl(repository_ctx): - """Implementation of the python_autoconf repository rule.""" -- _create_local_python_repository(repository_ctx) -+ if get_host_environ(repository_ctx, TF_PYTHON_CONFIG_REPO) != None: -+ _create_remote_python_repository( -+ repository_ctx, -+ get_host_environ(repository_ctx, TF_PYTHON_CONFIG_REPO), -+ ) -+ else: -+ _create_local_python_repository(repository_ctx) - - _ENVIRONS = [ - BAZEL_SH, -@@ -32,6 +275,7 @@ _ENVIRONS = [ - - local_python_configure = repository_rule( - implementation = _create_local_python_repository, -+ environ = _ENVIRONS, - attrs = { - "environ": attr.string_dict(), - "platform_constraint": attr.string(), -@@ -50,6 +294,7 @@ remote_python_configure = repository_rule( - - python_configure = repository_rule( - implementation = _python_autoconf_impl, -+ environ = _ENVIRONS + [TF_PYTHON_CONFIG_REPO], - attrs = { - "platform_constraint": attr.string(), - }, -diff --git a/third_party/tsl/third_party/py/BUILD.tpl b/third_party/tsl/third_party/py/BUILD.tpl -index 7cc1e08568..45480bd4a3 100644 ---- a/third_party/tsl/third_party/py/BUILD.tpl -+++ b/third_party/tsl/third_party/py/BUILD.tpl -@@ -5,17 +5,16 @@ package(default_visibility = ["//visibility:public"]) - # Point both runtimes to the same python binary to ensure we always - # use the python binary specified by ./configure.py script. - load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") --load("@python//:defs.bzl", "interpreter") - - py_runtime( - name = "py2_runtime", -- interpreter_path = interpreter, -+ interpreter_path = "%{PYTHON_BIN_PATH}", - python_version = "PY2", - ) - - py_runtime( - name = "py3_runtime", -- interpreter_path = interpreter, -+ interpreter_path = "%{PYTHON_BIN_PATH}", - python_version = "PY3", - ) - -@@ -33,8 +32,27 @@ toolchain( - exec_compatible_with = [%{PLATFORM_CONSTRAINT}], - ) - --alias(name = "python_headers", -- actual = "@python//:python_headers") -+# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib -+# See https://docs.python.org/3/extending/windows.html -+cc_import( -+ name = "python_lib", -+ interface_library = select({ -+ ":windows": ":python_import_lib", -+ # A placeholder for Unix platforms which makes --no_build happy. -+ "//conditions:default": "not-existing.lib", -+ }), -+ system_provided = 1, -+) -+ -+cc_library( -+ name = "python_headers", -+ hdrs = [":python_include"], -+ deps = select({ -+ ":windows": [":python_lib"], -+ "//conditions:default": [], -+ }), -+ includes = ["python_include"], -+) - - # This alias is exists for the use of targets in the @llvm-project dependency, - # which expect a python_headers target called @python_runtime//:headers. We use -@@ -45,9 +63,18 @@ alias( - actual = ":python_headers", - ) - -+cc_library( -+ name = "numpy_headers", -+ hdrs = [":numpy_include"], -+ includes = ["numpy_include"], -+) - - config_setting( - name = "windows", - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], --) -\ No newline at end of file -+) -+ -+%{PYTHON_INCLUDE_GENRULE} -+%{NUMPY_INCLUDE_GENRULE} -+%{PYTHON_IMPORT_LIB_GENRULE} -\ No newline at end of file -diff --git a/third_party/tsl/third_party/py/numpy/BUILD b/third_party/tsl/third_party/py/numpy/BUILD -index 97c7907fc3..c80cc5287b 100644 ---- a/third_party/tsl/third_party/py/numpy/BUILD -+++ b/third_party/tsl/third_party/py/numpy/BUILD -@@ -2,14 +2,15 @@ licenses(["restricted"]) - - package(default_visibility = ["//visibility:public"]) - --alias( -+py_library( - name = "numpy", -- actual = "@pypi_numpy//:pkg", -+ srcs = ["tf_numpy_dummy.py"], -+ srcs_version = "PY3", - ) - - alias( - name = "headers", -- actual = "@pypi_numpy//:numpy_headers", -+ actual = "@local_config_python//:numpy_headers", - ) - - genrule( -diff --git a/third_party/tsl/third_party/py/python_configure.bzl b/third_party/tsl/third_party/py/python_configure.bzl -index 3728a91b93..89732c3e33 100644 ---- a/third_party/tsl/third_party/py/python_configure.bzl -+++ b/third_party/tsl/third_party/py/python_configure.bzl -@@ -1,4 +1,9 @@ - """Repository rule for Python autoconfiguration. -+ -+`python_configure` depends on the following environment variables: -+ -+ * `PYTHON_BIN_PATH`: location of python binary. -+ * `PYTHON_LIB_PATH`: Location of python libraries. - """ - - load( -@@ -6,8 +11,192 @@ load( - "BAZEL_SH", - "PYTHON_BIN_PATH", - "PYTHON_LIB_PATH", -+ "TF_PYTHON_CONFIG_REPO", -+ "auto_config_fail", -+ "config_repo_label", -+ "execute", -+ "get_bash_bin", -+ "get_host_environ", -+ "get_python_bin", -+ "is_windows", -+ "raw_exec", -+ "read_dir", - ) - -+def _genrule(src_dir, genrule_name, command, outs): -+ """Returns a string with a genrule. -+ -+ Genrule executes the given command and produces the given outputs. -+ """ -+ return ( -+ "genrule(\n" + -+ ' name = "' + -+ genrule_name + '",\n' + -+ " outs = [\n" + -+ outs + -+ "\n ],\n" + -+ ' cmd = """\n' + -+ command + -+ '\n """,\n' + -+ ")\n" -+ ) -+ -+def _norm_path(path): -+ """Returns a path with '/' and remove the trailing slash.""" -+ path = path.replace("\\", "/") -+ if path[-1] == "/": -+ path = path[:-1] -+ return path -+ -+def _symlink_genrule_for_dir( -+ repository_ctx, -+ src_dir, -+ dest_dir, -+ genrule_name, -+ src_files = [], -+ dest_files = []): -+ """Returns a genrule to symlink(or copy if on Windows) a set of files. -+ -+ If src_dir is passed, files will be read from the given directory; otherwise -+ we assume files are in src_files and dest_files -+ """ -+ if src_dir != None: -+ src_dir = _norm_path(src_dir) -+ dest_dir = _norm_path(dest_dir) -+ files = "\n".join(read_dir(repository_ctx, src_dir)) -+ -+ # Create a list with the src_dir stripped to use for outputs. -+ dest_files = files.replace(src_dir, "").splitlines() -+ src_files = files.splitlines() -+ command = [] -+ outs = [] -+ for i in range(len(dest_files)): -+ if dest_files[i] != "": -+ # If we have only one file to link we do not want to use the dest_dir, as -+ # $(@D) will include the full path to the file. -+ dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i] -+ -+ # Copy the headers to create a sandboxable setup. -+ cmd = "cp -f" -+ command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) -+ outs.append(' "' + dest_dir + dest_files[i] + '",') -+ genrule = _genrule( -+ src_dir, -+ genrule_name, -+ " && ".join(command), -+ "\n".join(outs), -+ ) -+ return genrule -+ -+def _get_python_lib(repository_ctx, python_bin): -+ """Gets the python lib path.""" -+ python_lib = get_host_environ(repository_ctx, PYTHON_LIB_PATH) -+ if python_lib != None: -+ return python_lib -+ -+ # The interesting program to execute. -+ print_lib = [ -+ "from __future__ import print_function", -+ "import site", -+ "import os", -+ "python_paths = []", -+ "if os.getenv('PYTHONPATH') is not None:", -+ " python_paths = os.getenv('PYTHONPATH').split(':')", -+ "try:", -+ " library_paths = site.getsitepackages()", -+ "except AttributeError:", -+ " from distutils.sysconfig import get_python_lib", -+ " library_paths = [get_python_lib()]", -+ "all_paths = set(python_paths + library_paths)", -+ "paths = []", -+ "for path in all_paths:", -+ " if os.path.isdir(path):", -+ " paths.append(path)", -+ "if len(paths) >=1:", -+ " print(paths[0])", -+ ] -+ -+ # The below script writes the above program to a file -+ # and executes it. This is to work around the limitation -+ # of not being able to upload files as part of execute. -+ cmd = "from os import linesep;" -+ cmd += "f = open('script.py', 'w');" -+ for line in print_lib: -+ cmd += "f.write(\"%s\" + linesep);" % line -+ cmd += "f.close();" -+ cmd += "from subprocess import call;" -+ cmd += "call([\"%s\", \"script.py\"]);" % python_bin -+ -+ result = execute(repository_ctx, [python_bin, "-c", cmd]) -+ return result.stdout.strip() -+ -+def _check_python_lib(repository_ctx, python_lib): -+ """Checks the python lib path.""" -+ cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib) -+ result = raw_exec(repository_ctx, [get_bash_bin(repository_ctx), "-c", cmd]) -+ if result.return_code == 1: -+ auto_config_fail("Invalid python library path: %s" % python_lib) -+ -+def _check_python_bin(repository_ctx, python_bin): -+ """Checks the python bin path.""" -+ cmd = '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin) -+ result = raw_exec(repository_ctx, [get_bash_bin(repository_ctx), "-c", cmd]) -+ if result.return_code == 1: -+ auto_config_fail("--define %s='%s' is not executable. Is it the python binary?" % ( -+ PYTHON_BIN_PATH, -+ python_bin, -+ )) -+ -+def _get_python_include(repository_ctx, python_bin): -+ """Gets the python include path.""" -+ result = execute( -+ repository_ctx, -+ [ -+ python_bin, -+ "-Wignore", -+ "-c", -+ "import sysconfig; " + -+ "print(sysconfig.get_path('include'))", -+ ], -+ error_msg = "Problem getting python include path.", -+ error_details = ("Is the Python binary path set up right? " + -+ "(See ./configure or " + PYTHON_BIN_PATH + ".) " + -+ "Is distutils installed?"), -+ ) -+ return result.stdout.splitlines()[0] -+ -+def _get_python_import_lib_name(repository_ctx, python_bin): -+ """Get Python import library name (pythonXY.lib) on Windows.""" -+ result = execute( -+ repository_ctx, -+ [ -+ python_bin, -+ "-c", -+ "import sys;" + -+ 'print("python" + str(sys.version_info[0]) + ' + -+ ' str(sys.version_info[1]) + ".lib")', -+ ], -+ error_msg = "Problem getting python import library.", -+ error_details = ("Is the Python binary path set up right? " + -+ "(See ./configure or " + PYTHON_BIN_PATH + ".) "), -+ ) -+ return result.stdout.splitlines()[0] -+ -+def _get_numpy_include(repository_ctx, python_bin): -+ """Gets the numpy include path.""" -+ return execute( -+ repository_ctx, -+ [ -+ python_bin, -+ "-c", -+ "from __future__ import print_function;" + -+ "import numpy;" + -+ " print(numpy.get_include());", -+ ], -+ error_msg = "Problem getting numpy include path.", -+ error_details = "Is numpy installed?", -+ ).stdout.splitlines()[0] -+ - def _create_local_python_repository(repository_ctx): - """Creates the repository containing files set up to build with Python.""" - -@@ -15,14 +204,68 @@ def _create_local_python_repository(repository_ctx): - # function to be restarted with all previous state being lost. This - # can easily lead to a O(n^2) runtime in the number of labels. - build_tpl = repository_ctx.path(Label("//third_party/py:BUILD.tpl")) -+ -+ python_bin = get_python_bin(repository_ctx) -+ _check_python_bin(repository_ctx, python_bin) -+ python_lib = _get_python_lib(repository_ctx, python_bin) -+ _check_python_lib(repository_ctx, python_lib) -+ python_include = _get_python_include(repository_ctx, python_bin) -+ numpy_include = _get_numpy_include(repository_ctx, python_bin) + "/numpy" -+ python_include_rule = _symlink_genrule_for_dir( -+ repository_ctx, -+ python_include, -+ "python_include", -+ "python_include", -+ ) -+ python_import_lib_genrule = "" -+ -+ # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib -+ # See https://docs.python.org/3/extending/windows.html -+ if is_windows(repository_ctx): -+ python_bin = python_bin.replace("\\", "/") -+ python_include = _norm_path(python_include) -+ python_import_lib_name = _get_python_import_lib_name(repository_ctx, python_bin) -+ python_import_lib_src = python_include.rsplit("/", 1)[0] + "/libs/" + python_import_lib_name -+ python_import_lib_genrule = _symlink_genrule_for_dir( -+ repository_ctx, -+ None, -+ "", -+ "python_import_lib", -+ [python_import_lib_src], -+ [python_import_lib_name], -+ ) -+ numpy_include_rule = _symlink_genrule_for_dir( -+ repository_ctx, -+ numpy_include, -+ "numpy_include/numpy", -+ "numpy_include", -+ ) -+ - platform_constraint = "" - if repository_ctx.attr.platform_constraint: - platform_constraint = "\"%s\"" % repository_ctx.attr.platform_constraint -- repository_ctx.template("BUILD", build_tpl, {"%{PLATFORM_CONSTRAINT}": platform_constraint}) -+ repository_ctx.template("BUILD", build_tpl, { -+ "%{PYTHON_BIN_PATH}": python_bin, -+ "%{PYTHON_INCLUDE_GENRULE}": python_include_rule, -+ "%{PYTHON_IMPORT_LIB_GENRULE}": python_import_lib_genrule, -+ "%{NUMPY_INCLUDE_GENRULE}": numpy_include_rule, -+ "%{PLATFORM_CONSTRAINT}": platform_constraint, -+ }) -+ -+def _create_remote_python_repository(repository_ctx, remote_config_repo): -+ """Creates pointers to a remotely configured repo set up to build with Python. -+ """ -+ repository_ctx.template("BUILD", config_repo_label(remote_config_repo, ":BUILD"), {}) - - def _python_autoconf_impl(repository_ctx): - """Implementation of the python_autoconf repository rule.""" -- _create_local_python_repository(repository_ctx) -+ if get_host_environ(repository_ctx, TF_PYTHON_CONFIG_REPO) != None: -+ _create_remote_python_repository( -+ repository_ctx, -+ get_host_environ(repository_ctx, TF_PYTHON_CONFIG_REPO), -+ ) -+ else: -+ _create_local_python_repository(repository_ctx) - - _ENVIRONS = [ - BAZEL_SH, -@@ -32,6 +275,7 @@ _ENVIRONS = [ - - local_python_configure = repository_rule( - implementation = _create_local_python_repository, -+ environ = _ENVIRONS, - attrs = { - "environ": attr.string_dict(), - "platform_constraint": attr.string(), -@@ -50,6 +294,7 @@ remote_python_configure = repository_rule( - - python_configure = repository_rule( - implementation = _python_autoconf_impl, -+ environ = _ENVIRONS + [TF_PYTHON_CONFIG_REPO], - attrs = { - "platform_constraint": attr.string(), - }, diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl index c032474c9..e33ea1526 100644 --- a/bazel/repositories.bzl +++ b/bazel/repositories.bzl @@ -16,91 +16,13 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") def spu_deps(): - _bazel_skylib() - _rules_cuda() - _rules_proto_grpc() - _bazel_platform() _com_github_xtensor_xtensor() _com_github_xtensor_xtl() - _com_github_openxla_xla() - _com_github_pybind11_bazel() - _com_github_pybind11() - _com_intel_hexl() - _com_github_emptoolkit_emp_tool() - _com_github_emptoolkit_emp_ot() - _com_github_facebook_zstd() - _com_github_eigenteam_eigen() - _com_github_nvidia_cutlass() - _yacl() - _libpsi() - -def _yacl(): - maybe( - http_archive, - name = "yacl", - urls = [ - "https://github.com/secretflow/yacl/archive/refs/tags/0.4.5b8_nightly_20241014.tar.gz", - ], - strip_prefix = "yacl-0.4.5b8_nightly_20241014", - sha256 = "9141792f07eba507ffd21c57ec3df2ad5fdf90ce605ffb7bc1b7b4e84a9c34fa", - ) - -def _libpsi(): - maybe( - http_archive, - name = "psi", - urls = [ - "https://github.com/secretflow/psi/archive/refs/tags/v0.5.0.dev241115.tar.gz", - ], - strip_prefix = "psi-0.5.0.dev241115", - sha256 = "4d5ccc61282c4f887cee2c12fe3f414dfd7e916952849e92ffb1f6835d657a35", - ) - -def _rules_proto_grpc(): - http_archive( - name = "rules_proto_grpc", - sha256 = "2a0860a336ae836b54671cbbe0710eec17c64ef70c4c5a88ccfd47ea6e3739bd", - strip_prefix = "rules_proto_grpc-4.6.0", - urls = [ - "https://github.com/rules-proto-grpc/rules_proto_grpc/releases/download/4.6.0/rules_proto_grpc-4.6.0.tar.gz", - ], - ) - -def _rules_cuda(): - http_archive( - name = "rules_cuda", - sha256 = "c92b334d769a07cd991b7675b2f6076b8b95cd3b28b14268a2f379f8baae58e0", - strip_prefix = "rules_cuda-v0.2.3", - urls = ["https://github.com/bazel-contrib/rules_cuda/releases/download/v0.2.3/rules_cuda-v0.2.3.tar.gz"], - ) - -def _bazel_platform(): - http_archive( - name = "platforms", - urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/platforms/releases/download/0.0.10/platforms-0.0.10.tar.gz", - "https://github.com/bazelbuild/platforms/releases/download/0.0.10/platforms-0.0.10.tar.gz", - ], - sha256 = "218efe8ee736d26a3572663b374a253c012b716d8af0c07e842e82f238a0a7ee", - ) - -def _com_github_facebook_zstd(): - maybe( - http_archive, - name = "com_github_facebook_zstd", - build_file = "@spulib//bazel:zstd.BUILD", - strip_prefix = "zstd-1.5.6", - sha256 = "30f35f71c1203369dc979ecde0400ffea93c27391bfd2ac5a9715d2173d92ff7", - type = ".tar.gz", - urls = [ - "https://github.com/facebook/zstd/archive/refs/tags/v1.5.6.tar.gz", - ], - ) def _com_github_xtensor_xtensor(): maybe( http_archive, - name = "com_github_xtensor_xtensor", + name = "xtensor", sha256 = "32d5d9fd23998c57e746c375a544edf544b74f0a18ad6bc3c38cbba968d5e6c7", strip_prefix = "xtensor-0.25.0", build_file = "@spulib//bazel:xtensor.BUILD", @@ -113,7 +35,7 @@ def _com_github_xtensor_xtensor(): def _com_github_xtensor_xtl(): maybe( http_archive, - name = "com_github_xtensor_xtl", + name = "xtl", sha256 = "44fb99fbf5e56af5c43619fc8c29aa58e5fad18f3ba6e7d9c55c111b62df1fbb", strip_prefix = "xtl-0.7.7", build_file = "@spulib//bazel:xtl.BUILD", @@ -122,130 +44,3 @@ def _com_github_xtensor_xtl(): "https://github.com/xtensor-stack/xtl/archive/refs/tags/0.7.7.tar.gz", ], ) - -def _bazel_skylib(): - maybe( - http_archive, - name = "bazel_skylib", - sha256 = "bc283cdfcd526a52c3201279cda4bc298652efa898b10b4db0837dc51652756f", - urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.7.1/bazel-skylib-1.7.1.tar.gz", - "https://github.com/bazelbuild/bazel-skylib/releases/download/1.7.1/bazel-skylib-1.7.1.tar.gz", - ], - ) - -def _com_github_openxla_xla(): - OPENXLA_COMMIT = "64bdcc53a1b24abf19b1fe598e6f9b0fe6454470" - OPENXLA_SHA256 = "60918b3a0391fe9e0bd506c9b90170b7b5fa64d06de7ec1f4f0e351a303a88fa" - - # We need openxla to handle xla/mhlo/stablehlo - maybe( - http_archive, - name = "xla", - sha256 = OPENXLA_SHA256, - strip_prefix = "xla-" + OPENXLA_COMMIT, - type = ".tar.gz", - urls = [ - "https://github.com/openxla/xla/archive/{commit}.tar.gz".format(commit = OPENXLA_COMMIT), - ], - patch_args = ["-p1", "-l"], - patches = ["@spulib//bazel:patches/xla-non-hermetic-python.patch"], - ) - -def _com_github_pybind11_bazel(): - maybe( - http_archive, - name = "pybind11_bazel", - sha256 = "dc4882b23a617575d0fd822aba88aa4a14133c3d428b5a8fb83d81d03444a475", - strip_prefix = "pybind11_bazel-8889d39b2b925b2a47519ae09402a96f00ccf2b4", - urls = [ - "https://github.com/pybind/pybind11_bazel/archive/8889d39b2b925b2a47519ae09402a96f00ccf2b4.zip", - ], - ) - -def _com_github_pybind11(): - maybe( - http_archive, - name = "pybind11", - build_file = "@pybind11_bazel//:pybind11.BUILD", - sha256 = "e08cb87f4773da97fa7b5f035de8763abc656d87d5773e62f6da0587d1f0ec20", - strip_prefix = "pybind11-2.13.6", - urls = [ - "https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.tar.gz", - ], - ) - -def _com_intel_hexl(): - maybe( - http_archive, - name = "com_intel_hexl", - type = "tar.gz", - strip_prefix = "hexl-1.2.5", - sha256 = "3692e6e6183dbc49253e51e86c3e52e7affcac925f57db0949dbb4d34b558a9a", - build_file = "@spulib//bazel:hexl.BUILD", - urls = [ - "https://github.com/intel/hexl/archive/refs/tags/v1.2.5.tar.gz", - ], - patch_args = ["-p1"], - patches = ["@spulib//bazel:patches/hexl.patch"], - ) - -def _com_github_emptoolkit_emp_tool(): - maybe( - http_archive, - name = "com_github_emptoolkit_emp_tool", - sha256 = "b9ab2380312e78020346b5d2db3d0244c7bd8098cb50f8b3620532ef491808d0", - strip_prefix = "emp-tool-0.2.5", - type = "tar.gz", - patch_args = ["-p1"], - patches = [ - "@spulib//bazel:patches/emp-tool.patch", - "@spulib//bazel:patches/emp-tool-cmake.patch", - "@spulib//bazel:patches/emp-tool-sse2neon.patch", - ], - urls = [ - "https://github.com/emp-toolkit/emp-tool/archive/refs/tags/0.2.5.tar.gz", - ], - build_file = "@spulib//bazel:emp-tool.BUILD", - ) - -def _com_github_emptoolkit_emp_ot(): - maybe( - http_archive, - name = "com_github_emptoolkit_emp_ot", - sha256 = "358036e5d18143720ee17103f8172447de23014bcfc1f8e7d5849c525ca928ac", - strip_prefix = "emp-ot-0.2.4", - type = "tar.gz", - patch_args = ["-p1"], - patches = ["@spulib//bazel:patches/emp-ot.patch"], - urls = [ - "https://github.com/emp-toolkit/emp-ot/archive/refs/tags/0.2.4.tar.gz", - ], - build_file = "@spulib//bazel:emp-ot.BUILD", - ) - -def _com_github_eigenteam_eigen(): - EIGEN_COMMIT = "66e8f38891841bf88ee976a316c0c78a52f0cee5" - EIGEN_SHA256 = "01fcd68409c038bbcfd16394274c2bf71e2bb6dda89a2319e23fc59a2da17210" - maybe( - http_archive, - name = "eigen_archive", - sha256 = EIGEN_SHA256, - build_file = "@spulib//bazel:eigen.BUILD", - strip_prefix = "eigen-{commit}".format(commit = EIGEN_COMMIT), - urls = [ - "https://gitlab.com/libeigen/eigen/-/archive/{commit}/eigen-{commit}.tar.gz".format(commit = EIGEN_COMMIT), - ], - ) - -def _com_github_nvidia_cutlass(): - maybe( - http_archive, - name = "cutlass_archive", - strip_prefix = "cutlass-3.6.0", - urls = [ - "https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.6.0.tar.gz", - ], - sha256 = "7576f3437b90d0de5923560ccecebaa1357e5d72f36c0a59ad77c959c9790010", - build_file = "@spulib//bazel:nvidia_cutlass.BUILD", - ) diff --git a/bazel/seal.BUILD b/bazel/seal.BUILD deleted file mode 100644 index 75b22136a..000000000 --- a/bazel/seal.BUILD +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2022 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -load("@spulib//bazel:spu.bzl", "spu_cmake_external") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all", - srcs = glob(["**"]), -) - -config_setting( - name = "can_use_hexl", - constraint_values = [ - "@platforms//cpu:x86_64", - ], - values = {"compilation_mode": "opt"}, -) - -default_config = { - "SEAL_USE_MSGSL": "OFF", - "SEAL_BUILD_DEPS": "OFF", - "SEAL_USE_ZLIB": "OFF", - "SEAL_USE_INTEL_HEXL": "OFF", - "SEAL_THROW_ON_TRANSPARENT_CIPHERTEXT": "OFF", #NOTE(juhou) required by apsi - "SEAL_USE_ZSTD": "ON", - "CMAKE_INSTALL_LIBDIR": "lib", -} - -x64_hexl_config = { - "SEAL_USE_MSGSL": "OFF", - "SEAL_BUILD_DEPS": "OFF", - "SEAL_USE_ZLIB": "OFF", - "SEAL_THROW_ON_TRANSPARENT_CIPHERTEXT": "OFF", #NOTE(juhou) required by apsi - "CMAKE_INSTALL_LIBDIR": "lib", - "CpuFeatures_DIR": "$EXT_BUILD_DEPS/cpu_features/lib/cmake/CpuFeatures/", - "EXT_BUILD_DEPS": "$EXT_BUILD_DEPS", - "SEAL_USE_ZSTD": "ON", - "SEAL_USE_INTEL_HEXL": "ON", -} - -spu_cmake_external( - name = "seal", - cache_entries = select({ - ":can_use_hexl": x64_hexl_config, - "//conditions:default": default_config, - }), - lib_source = "@com_github_microsoft_seal//:all", - out_include_dir = "include/SEAL-4.1", - out_static_libs = ["libseal-4.1.a"], - deps = [ - "@com_github_facebook_zstd//:zstd", - ] + select({ - "@platforms//cpu:x86_64": ["@com_intel_hexl//:hexl"], - "//conditions:default": [], - }), -) diff --git a/bazel/spu.bzl b/bazel/spu.bzl index 0ac3fb45c..f0bff2435 100644 --- a/bazel/spu.bzl +++ b/bazel/spu.bzl @@ -17,6 +17,8 @@ warpper bazel cc_xx to modify flags. """ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") +load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") +load("@spu_pip//:requirements.bzl", pip_dep = "all_requirements") load("@yacl//bazel:yacl.bzl", "yacl_cmake_external") WARNING_FLAGS = [ @@ -59,7 +61,7 @@ def spu_cc_library( linkopts = linkopts, copts = _spu_copts() + copts, deps = deps + [ - "@com_github_gabime_spdlog//:spdlog", + "@spdlog//:spdlog", ], local_defines = local_defines + [ "SPU_BUILD", @@ -96,12 +98,36 @@ def spu_cc_test( # -lm for tcmalloc linkopts = linkopts + ["-lm"], copts = _spu_copts() + copts, - deps = deps + [ - "@com_google_googletest//:gtest_main", - ], + deps = [ + "@googletest//:gtest_main", + ] + deps, local_defines = local_defines + [ "SPU_BUILD", ], linkstatic = True, **kwargs ) + +def spu_py_binary( + deps = [], + **kwargs): + py_binary( + deps = deps + pip_dep, + **kwargs + ) + +def spu_py_library( + deps = [], + **kwargs): + py_library( + deps = deps + pip_dep, + **kwargs + ) + +def spu_py_test( + deps = [], + **kwargs): + py_test( + deps = deps + pip_dep, + **kwargs + ) diff --git a/bazel/xtensor.BUILD b/bazel/xtensor.BUILD index 7789669d2..f4bf88a25 100644 --- a/bazel/xtensor.BUILD +++ b/bazel/xtensor.BUILD @@ -1,4 +1,4 @@ -# Copyright 2022 Ant Group Co., Ltd. +# Copyright 2024 Ant Group Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,18 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@spulib//bazel:spu.bzl", "spu_cmake_external") - -package(default_visibility = ["//visibility:public"]) +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") filegroup( name = "all_srcs", srcs = glob(["**"]), ) -spu_cmake_external( +cmake( name = "xtensor", + generate_args = ["-GNinja"], lib_source = ":all_srcs", out_headers_only = True, - deps = ["@com_github_xtensor_xtl//:xtl"], + visibility = ["//visibility:public"], + deps = [ + "@xtl", + ], ) diff --git a/bazel/zstd.BUILD b/bazel/zstd.BUILD deleted file mode 100644 index 9a9a09242..000000000 --- a/bazel/zstd.BUILD +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2022 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -load("@spulib//bazel:spu.bzl", "spu_cmake_external") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all", - srcs = glob(["**"]), -) - -spu_cmake_external( - name = "zstd", - cache_entries = { - "ZSTD_BUILD_PROGRAMS": "OFF", - "ZSTD_BUILD_SHARED": "OFF", - "ZLIB_BUILD_STATIC": "ON", - "ZSTD_BUILD_TESTS": "OFF", - "ZSTD_MULTITHREAD_SUPPORT": "OFF", - "CMAKE_INSTALL_LIBDIR": "lib", - }, - lib_source = "@com_github_facebook_zstd//:all", - out_include_dir = "include/", - out_static_libs = ["libzstd.a"], - working_directory = "build/cmake", -) diff --git a/benchmark/setup_dockers_and_run.sh b/benchmark/setup_dockers_and_run.sh index e1c8c45e2..bfb9a2f5f 100644 --- a/benchmark/setup_dockers_and_run.sh +++ b/benchmark/setup_dockers_and_run.sh @@ -31,7 +31,6 @@ echo -e "${COLOR_GREEN}Build spu-build${COLOR_END}" docker run --name spu-build --mount type=bind,source="$(pwd)",target=/home/admin/dev/ secretflow/ubuntu-base-ci:latest \ sh -c "cd /home/admin/dev && \ python3 -m pip install -U pip && \ - python3 -m pip install -r requirements.txt && \ bazel build //benchmark/... //examples/python/... -c opt --ui_event_filters=-info,-debug,-warning" docker commit spu-build spu-build:v1 diff --git a/build_wheel_entrypoint.sh b/build_wheel_entrypoint.sh index 17d70cf44..d133a782b 100755 --- a/build_wheel_entrypoint.sh +++ b/build_wheel_entrypoint.sh @@ -15,9 +15,9 @@ # limitations under the License. # -pip install numpy -python setup.py bdist_wheel +# FIXME: add build option `--config=avx` if building on x86_64 platform +bazel build //:spu_wheel -c opt # Ensure binary safety if [[ "$OSTYPE" == "linux-gnu"* ]]; then diff --git a/examples/cpp/BUILD.bazel b/examples/cpp/BUILD.bazel index b2d27cc9d..8431250e3 100644 --- a/examples/cpp/BUILD.bazel +++ b/examples/cpp/BUILD.bazel @@ -31,7 +31,7 @@ spu_cc_binary( "//libspu/kernel/hlo:casting", "//libspu/kernel/hlo:const", "//libspu/kernel/hlo:geometrical", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/strings", "@llvm-project//llvm:Support", "@yacl//yacl/link:factory", ], @@ -43,7 +43,7 @@ spu_cc_library( hdrs = ["utils.h"], deps = [ "//libspu/core:context", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/strings", "@llvm-project//llvm:Support", "@yacl//yacl/link:factory", ], diff --git a/examples/python/ml/BUILD.bazel b/examples/python/ml/BUILD.bazel index 9fcf1b3e7..c06da27d1 100644 --- a/examples/python/ml/BUILD.bazel +++ b/examples/python/ml/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_test") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -43,5 +44,5 @@ py_test( "//examples/python/ml/torch_lr_experiment", "//examples/python/ml/torch_resnet_experiment", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_gpt2/BUILD.bazel b/examples/python/ml/flax_gpt2/BUILD.bazel index b965e871d..168ea6b3f 100644 --- a/examples/python/ml/flax_gpt2/BUILD.bazel +++ b/examples/python/ml/flax_gpt2/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_gpt2/README.md b/examples/python/ml/flax_gpt2/README.md index 7eb25fc4d..652b17b30 100644 --- a/examples/python/ml/flax_gpt2/README.md +++ b/examples/python/ml/flax_gpt2/README.md @@ -3,19 +3,13 @@ This example demonstrates how to use SPU to run private inference on a pre-trained [GPT2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) model. -1. Install huggingface transformers library - - ```sh - pip install 'transformers[flax]' - ``` - -2. Launch SPU backend runtime +1. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- --config `pwd`/examples/python/ml/flax_gpt2/3pc.json up ``` -3. Run `flax_gpt2` example +2. Run `flax_gpt2` example ```sh bazel run -c opt //examples/python/ml/flax_gpt2 -- --config `pwd`/examples/python/ml/flax_gpt2/3pc.json diff --git a/examples/python/ml/flax_mlp/BUILD.bazel b/examples/python/ml/flax_mlp/BUILD.bazel index 56274bd2b..7093ad69c 100644 --- a/examples/python/ml/flax_mlp/BUILD.bazel +++ b/examples/python/ml/flax_mlp/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -25,5 +26,5 @@ py_binary( deps = [ "//examples/python/utils:dataset_utils", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_resnet/BUILD.bazel b/examples/python/ml/flax_resnet/BUILD.bazel index 0f6cfa6e8..70105069b 100644 --- a/examples/python/ml/flax_resnet/BUILD.bazel +++ b/examples/python/ml/flax_resnet/BUILD.bazel @@ -12,28 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_binary", "py_library") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") +load("//bazel:spu.bzl", "spu_py_binary") package(default_visibility = ["//visibility:public"]) -py_library( - name = "models", - srcs = ["models.py"], -) - -py_binary( +spu_py_binary( name = "flax_resnet_training", - srcs = ["flax_resnet_training.py"], + srcs = [ + "flax_resnet_training.py", + "models.py", + ], data = [ "//examples/python/ml/flax_resnet:3pc.json", ], deps = [ - ":models", "//spu/utils:distributed", - ], + ] + all_requirements, ) -py_binary( +spu_py_binary( name = "flax_resnet_inference", srcs = ["flax_resnet_inference.py"], data = [ @@ -41,5 +39,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_resnet/flax_resnet_training.py b/examples/python/ml/flax_resnet/flax_resnet_training.py index 2946ade5d..df0fcd846 100644 --- a/examples/python/ml/flax_resnet/flax_resnet_training.py +++ b/examples/python/ml/flax_resnet/flax_resnet_training.py @@ -28,7 +28,7 @@ import tensorflow_datasets as tfds from flax.training import train_state from jax import random -from models import ResNet18 +from examples.python.ml.flax_resnet.models import ResNet18 NUM_CLASSES = 10 IMAGE_SIZE = 32 diff --git a/examples/python/ml/flax_t5/BUILD.bazel b/examples/python/ml/flax_t5/BUILD.bazel index 2855a08a4..8b842c69f 100644 --- a/examples/python/ml/flax_t5/BUILD.bazel +++ b/examples/python/ml/flax_t5/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_t5/README.md b/examples/python/ml/flax_t5/README.md index 8399b787d..b20292512 100644 --- a/examples/python/ml/flax_t5/README.md +++ b/examples/python/ml/flax_t5/README.md @@ -3,23 +3,17 @@ This example demonstrates how to use SPU to run private inference on a pre-trained [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.FlaxT5ForConditionalGeneration) model. -1. Install huggingface transformers library - - ```sh - pip install 'transformers[flax]' - ``` - -2. Enable While with secret value +1. Enable While with secret value Edit libspu/kernel/hlo/control_flow.cc, change `ENABLE_DEBUG_ONLY_REVEAL_SECRET_CONDITION` to `true`. -3. Launch SPU backend runtime +2. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- --config `pwd`/examples/python/ml/flax_t5/3pc.json up ``` -4. Run `flax_t5` example +3. Run `flax_t5` example ```sh bazel run -c opt //examples/python/ml/flax_t5 -- --config `pwd`/examples/python/ml/flax_t5/3pc.json diff --git a/examples/python/ml/flax_vae/BUILD.bazel b/examples/python/ml/flax_vae/BUILD.bazel index 5585fd246..d269c0416 100644 --- a/examples/python/ml/flax_vae/BUILD.bazel +++ b/examples/python/ml/flax_vae/BUILD.bazel @@ -12,16 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_binary", "py_library") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") +load("//bazel:spu.bzl", "spu_py_binary", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "utils", srcs = ["utils.py"], ) -py_binary( +spu_py_binary( name = "flax_vae", srcs = ["flax_vae.py"], data = [ @@ -30,5 +31,5 @@ py_binary( deps = [ ":utils", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_whisper/BUILD.bazel b/examples/python/ml/flax_whisper/BUILD.bazel index 30bc8d3c0..4c97b86b7 100644 --- a/examples/python/ml/flax_whisper/BUILD.bazel +++ b/examples/python/ml/flax_whisper/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/flax_whisper/README.md b/examples/python/ml/flax_whisper/README.md index 1a331f4e7..99fb07c78 100644 --- a/examples/python/ml/flax_whisper/README.md +++ b/examples/python/ml/flax_whisper/README.md @@ -3,23 +3,17 @@ This example demonstrates how to use SPU to run private inference on a pre-trained [Whisper](https://huggingface.co/docs/transformers/model_doc/whisper#transformers.FlaxWhisperForConditionalGeneration) model. -1. Install huggingface transformers library - - ```sh - pip install 'transformers[flax]' soundfile librosa - ``` - -2. Enable While with secret value +1. Enable While with secret value Edit libspu/kernel/hlo/control_flow.cc, change `ENABLE_DEBUG_ONLY_REVEAL_SECRET_CONDITION` to `true`. -3. Launch SPU backend runtime +2. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- --config `pwd`/examples/python/ml/flax_whisper/3pc.json up ``` -4. Run `flax_whisper` example +3. Run `flax_whisper` example ```sh bazel run -c opt //examples/python/ml/flax_whisper -- --config `pwd`/examples/python/ml/flax_whisper/3pc.json diff --git a/examples/python/ml/haiku_lstm/BUILD.bazel b/examples/python/ml/haiku_lstm/BUILD.bazel index fc7a922bb..d6591e01f 100644 --- a/examples/python/ml/haiku_lstm/BUILD.bazel +++ b/examples/python/ml/haiku_lstm/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/haiku_lstm/README.md b/examples/python/ml/haiku_lstm/README.md index 97f0990c1..e04cfe6e4 100644 --- a/examples/python/ml/haiku_lstm/README.md +++ b/examples/python/ml/haiku_lstm/README.md @@ -6,23 +6,17 @@ This example comes from Haiku official github repo: -1. Install dependencies - - ```sh - pip install -r ../requirements.txt - ``` - -2. Launch SPU backend runtime +1. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- up ``` -3. Run `haiku_lstm` example +2. Run `haiku_lstm` example ```sh bazel run -c opt //examples/python/ml/haiku_lstm -- --output_dir `pwd` ``` -4. Check results +3. Check results When training is finished, you can check the generated images in the specified `output_dir` and compare the results to CPU versions. diff --git a/examples/python/ml/jax_kmeans/BUILD.bazel b/examples/python/ml/jax_kmeans/BUILD.bazel index d59803c12..0c348b9a0 100644 --- a/examples/python/ml/jax_kmeans/BUILD.bazel +++ b/examples/python/ml/jax_kmeans/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/jax_lr/BUILD.bazel b/examples/python/ml/jax_lr/BUILD.bazel index 5c880c45d..591e209c7 100644 --- a/examples/python/ml/jax_lr/BUILD.bazel +++ b/examples/python/ml/jax_lr/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -26,5 +27,5 @@ py_binary( "//examples/python/utils:dataset_utils", "//spu:init", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/jax_svm/BUILD.bazel b/examples/python/ml/jax_svm/BUILD.bazel index 9fdaba473..4bfca8ace 100644 --- a/examples/python/ml/jax_svm/BUILD.bazel +++ b/examples/python/ml/jax_svm/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -25,5 +26,5 @@ py_binary( deps = [ "//examples/python/utils:dataset_utils", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/jraph_gnn/BUILD.bazel b/examples/python/ml/jraph_gnn/BUILD.bazel index 6db9601ed..dc155a183 100644 --- a/examples/python/ml/jraph_gnn/BUILD.bazel +++ b/examples/python/ml/jraph_gnn/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/jraph_gnn/README.md b/examples/python/ml/jraph_gnn/README.md index 328838880..cc074372f 100644 --- a/examples/python/ml/jraph_gnn/README.md +++ b/examples/python/ml/jraph_gnn/README.md @@ -6,13 +6,7 @@ This example comes from Jraph official github repo: -1. Install dependencies - - ```sh - pip install -r ../requirements.txt - ``` - -2. Set runtime configuration +1. Set runtime configuration This example requires a higher precision setting than the default. @@ -20,13 +14,13 @@ This example comes from Jraph official github repo: The default configuration file locates at [examples/python/conf/3pc.json](../../conf/3pc.json). -3. Launch SPU backend runtime +2. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- up ``` -4. Run `jraph_gnn` example +3. Run `jraph_gnn` example ```sh bazel run -c opt //examples/python/ml/jraph_gnn diff --git a/examples/python/ml/requirements.txt b/examples/python/ml/requirements.txt deleted file mode 100644 index 60befe345..000000000 --- a/examples/python/ml/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -dm-haiku -plotnine -jraph -optax -torch==2.3.0 -torch_xla==2.3.0 -torchvision -jax[cpu] -tensorflow_datasets -keras diff --git a/examples/python/ml/ss_lr/BUILD.bazel b/examples/python/ml/ss_lr/BUILD.bazel index caa50155b..413710f13 100644 --- a/examples/python/ml/ss_lr/BUILD.bazel +++ b/examples/python/ml/ss_lr/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -26,5 +27,5 @@ py_binary( "//examples/python/utils:appr_sigmoid", "//examples/python/utils:dataset_utils", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/ss_xgb/BUILD.bazel b/examples/python/ml/ss_xgb/BUILD.bazel index 34168304b..3e9c0f8df 100644 --- a/examples/python/ml/ss_xgb/BUILD.bazel +++ b/examples/python/ml/ss_xgb/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -26,5 +27,5 @@ py_binary( "//examples/python/utils:appr_sigmoid", "//examples/python/utils:dataset_utils", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/stax_mnist_classifier/BUILD.bazel b/examples/python/ml/stax_mnist_classifier/BUILD.bazel index 2c7f6da86..e0e5d5bc1 100644 --- a/examples/python/ml/stax_mnist_classifier/BUILD.bazel +++ b/examples/python/ml/stax_mnist_classifier/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -25,5 +26,5 @@ py_binary( deps = [ "//examples/python/utils:dataset_utils", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/stax_nn/BUILD.bazel b/examples/python/ml/stax_nn/BUILD.bazel index 2318515fd..0a7484b0b 100644 --- a/examples/python/ml/stax_nn/BUILD.bazel +++ b/examples/python/ml/stax_nn/BUILD.bazel @@ -12,16 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") +load("//bazel:spu.bzl", "spu_py_binary", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "models", srcs = ["models.py"], ) -py_binary( +spu_py_binary( name = "stax_nn", srcs = ["stax_nn.py"], data = [ @@ -32,5 +33,5 @@ py_binary( "//examples/python/utils:dataset_utils", "//examples/python/utils:optimizers", "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/tf_experiment/BUILD.bazel b/examples/python/ml/tf_experiment/BUILD.bazel index 8bcce8c09..cfe836b6f 100644 --- a/examples/python/ml/tf_experiment/BUILD.bazel +++ b/examples/python/ml/tf_experiment/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/torch_lr_experiment/BUILD.bazel b/examples/python/ml/torch_lr_experiment/BUILD.bazel index 36cdcce16..f85eb654e 100644 --- a/examples/python/ml/torch_lr_experiment/BUILD.bazel +++ b/examples/python/ml/torch_lr_experiment/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/torch_lr_experiment/README.md b/examples/python/ml/torch_lr_experiment/README.md index 57754ed97..04d5f5d1f 100644 --- a/examples/python/ml/torch_lr_experiment/README.md +++ b/examples/python/ml/torch_lr_experiment/README.md @@ -4,19 +4,13 @@ This example demonstrates how to use SPU to make private inferences on PyTorch m **Note**: Currently, SPU's support of PyTorch is **experimental**. -1. Install a third-party dependency [PyTorch/XLA](https://github.com/pytorch/xla). - - ```sh - pip install torch==2.3.0 torch_xla==2.3.0 - ``` - -2. Launch SPU backend runtime +1. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- up ``` -3. Run `torch_lr_experiment` example +2. Run `torch_lr_experiment` example ```sh bazel run -c opt //examples/python/ml/torch_lr_experiment diff --git a/examples/python/ml/torch_resnet_experiment/BUILD.bazel b/examples/python/ml/torch_resnet_experiment/BUILD.bazel index 91d89e456..87a6033e1 100644 --- a/examples/python/ml/torch_resnet_experiment/BUILD.bazel +++ b/examples/python/ml/torch_resnet_experiment/BUILD.bazel @@ -13,6 +13,7 @@ # limitations under the License. load("@rules_python//python:defs.bzl", "py_binary") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") package(default_visibility = ["//visibility:public"]) @@ -24,5 +25,5 @@ py_binary( ], deps = [ "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/examples/python/ml/torch_resnet_experiment/README.md b/examples/python/ml/torch_resnet_experiment/README.md index af2a04aa5..df902d047 100644 --- a/examples/python/ml/torch_resnet_experiment/README.md +++ b/examples/python/ml/torch_resnet_experiment/README.md @@ -4,20 +4,13 @@ This example demonstrates how to use SPU to make private inferences on PyTorch m **Note**: Currently, SPU's support of PyTorch is **experimental**. -1. Install a third-party dependency [PyTorch/XLA](https://github.com/pytorch/xla). - - ```sh - pip install torch==2.3.0 torch_xla==2.3.0 - pip install torchvision - ``` - -2. Launch SPU backend runtime +1. Launch SPU backend runtime ```sh bazel run -c opt //examples/python/utils:nodectl -- up ``` -3. Run `torch_resnet_experiment` example +2. Run `torch_resnet_experiment` example ```sh bazel run -c opt //examples/python/ml/torch_resnet_experiment diff --git a/examples/python/utils/BUILD.bazel b/examples/python/utils/BUILD.bazel index 9e5fa2d58..aea58934d 100644 --- a/examples/python/utils/BUILD.bazel +++ b/examples/python/utils/BUILD.bazel @@ -12,33 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("@spu_pip_dev//:requirements.bzl", "all_requirements") +load("//bazel:spu.bzl", "spu_py_binary", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "appr_sigmoid", srcs = ["appr_sigmoid.py"], ) -py_library( +spu_py_library( name = "dataset_utils", srcs = ["dataset_utils.py"], ) -py_library( +spu_py_library( name = "stax_utils", srcs = ["stax_utils.py"], deps = [ ], ) -py_library( +spu_py_library( name = "optimizers", srcs = ["optimizers.py"], ) -py_binary( +spu_py_binary( name = "nodectl", srcs = ["nodectl.py"], data = [ @@ -47,5 +48,5 @@ py_binary( deps = [ ":dataset_utils", # server dependent. "//spu/utils:distributed", - ], + ] + all_requirements, ) diff --git a/experimental/squirrel/BUILD.bazel b/experimental/squirrel/BUILD.bazel index 64c794a5c..518fc4ef9 100644 --- a/experimental/squirrel/BUILD.bazel +++ b/experimental/squirrel/BUILD.bazel @@ -60,8 +60,8 @@ spu_cc_library( "//libspu/mpc/cheetah/rlwe:cheetah_rlwe", "//libspu/mpc/cheetah/rlwe:lwe", "//libspu/mpc/cheetah/rlwe:packlwes", - "@com_github_microsoft_seal//:seal", - "@eigen_archive//:eigen3", + "@eigen", + "@seal", "@yacl//yacl/utils:elapsed_timer", ], ) @@ -139,7 +139,7 @@ spu_cc_binary( ":tree_build_worker", ":tree_builder", "//libspu/device:io", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/strings", "@llvm-project//llvm:Support", "@yacl//yacl/link:factory", ], diff --git a/libspu/BUILD.bazel b/libspu/BUILD.bazel index 2b0fcf3f8..8d413c454 100644 --- a/libspu/BUILD.bazel +++ b/libspu/BUILD.bazel @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@protobuf//bazel:py_proto_library.bzl", "py_proto_library") load("@rules_cc//cc:defs.bzl", "cc_proto_library") load("@rules_proto//proto:defs.bzl", "proto_library") -load("@rules_proto_grpc//python:defs.bzl", "python_proto_compile") +load("//:version.bzl", "spu_version_gen") load("//bazel:spu.bzl", "spu_cc_library") package(default_visibility = ["//visibility:public"]) @@ -29,14 +30,18 @@ cc_proto_library( deps = [":spu_proto"], ) -python_proto_compile( +py_proto_library( name = "spu_py_proto", - output_mode = "NO_PREFIX", - prefix_path = "..", - protos = ["//libspu:spu_proto"], + deps = ["//libspu:spu_proto"], ) spu_cc_library( name = "version", - hdrs = ["version.h"], + hdrs = [":spu_version"], +) + +spu_version_gen( + name = "spu_version", + out = "version.h", + template = "version.h.in", ) diff --git a/libspu/compiler/common/BUILD.bazel b/libspu/compiler/common/BUILD.bazel index eef7efaf2..8d64134c3 100644 --- a/libspu/compiler/common/BUILD.bazel +++ b/libspu/compiler/common/BUILD.bazel @@ -22,7 +22,7 @@ spu_cc_library( hdrs = ["ir_printer_config.h"], visibility = ["//visibility:private"], deps = [ - "@com_github_fmtlib_fmt//:fmtlib", + "@fmt", "@llvm-project//mlir:Pass", ], ) diff --git a/libspu/compiler/tests/BUILD.bazel b/libspu/compiler/tests/BUILD.bazel index 2dd061272..28c3224e2 100644 --- a/libspu/compiler/tests/BUILD.bazel +++ b/libspu/compiler/tests/BUILD.bazel @@ -23,8 +23,8 @@ expand_template( substitutions = { "@LIT_SITE_CFG_IN_HEADER@": "# Autogenerated, do not edit.", "@LLVM_TOOLS_DIR@": package_path("@llvm-project//llvm:BUILD"), - "\"@PPHLO_TOOLS_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'spulib', 'libspu', 'compiler', 'tools')", - "\"@PPHLO_SOURCE_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'spulib')", + "\"@PPHLO_TOOLS_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], '_main', 'libspu', 'compiler', 'tools')", + "\"@PPHLO_SOURCE_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], '_main')", }, template = "lit.site.cfg.py.in", ) diff --git a/libspu/compiler/tests/passes/optimizations/sort_lowering.mlir b/libspu/compiler/tests/passes/optimizations/sort_lowering.mlir index 37d98fd1b..ef9e8b790 100644 --- a/libspu/compiler/tests/passes/optimizations/sort_lowering.mlir +++ b/libspu/compiler/tests/passes/optimizations/sort_lowering.mlir @@ -21,3 +21,19 @@ func.func @main(%arg0: tensor<10x!pphlo.secret>, %arg1: tensor<10x!pphlo.se }) {dimension = 0 : i64, is_stable = false} : (tensor<10x!pphlo.secret>, tensor<10x!pphlo.secret>) -> (tensor<10x!pphlo.secret>, tensor<10x!pphlo.secret>) return %0#0, %0#1 : tensor<10x!pphlo.secret>, tensor<10x!pphlo.secret> } + +// ----- + +func.func @main(%arg0: tensor<3x4x!pphlo.secret>, %arg1: tensor<3x4x!pphlo.secret>, %arg2: tensor<3x4x!pphlo.secret>) -> (tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>) { + // CHECK: %0:3 = pphlo.simple_sort %arg0, %arg1, %arg2 ASC, dim = 1, num_keys = 2 : (tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>) -> (tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>) + %0:3 = "pphlo.sort"(%arg0, %arg1, %arg2) ({ + ^bb0(%arg3: tensor>, %arg4: tensor>, %arg5: tensor>, %arg6: tensor>, %arg7: tensor>, %arg8: tensor>): + %1 = pphlo.less %arg3, %arg4 : (tensor>, tensor>) -> tensor> + %2 = pphlo.equal %arg3, %arg4 : (tensor>, tensor>) -> tensor> + %3 = pphlo.less %arg5, %arg6 : (tensor>, tensor>) -> tensor> + %4 = pphlo.and %2, %3 : tensor> + %5 = pphlo.or %1, %4 : tensor> + pphlo.return %5 : tensor> + }) {dimension = 1 : i64, is_stable = true} : (tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>) -> (tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>) + return %0#0, %0#1, %0#2 : tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret>, tensor<3x4x!pphlo.secret> + } diff --git a/libspu/core/BUILD.bazel b/libspu/core/BUILD.bazel index 1f4278493..b5c95acfd 100644 --- a/libspu/core/BUILD.bazel +++ b/libspu/core/BUILD.bazel @@ -175,7 +175,7 @@ spu_cc_library( ":ndarray_ref", ":pt_buffer_view", ":shape", - "@com_github_xtensor_xtensor//:xtensor", + "@xtensor", ], ) @@ -218,9 +218,9 @@ spu_cc_library( hdrs = ["logging.h"], deps = [ "//libspu/core:prelude", - "@com_github_brpc_brpc//:butil", - "@com_github_fmtlib_fmt//:fmtlib", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/strings", + "@brpc//:butil", + "@fmt", "@yacl//yacl/link:trace", ], ) @@ -230,7 +230,7 @@ spu_cc_library( srcs = ["bit_utils.cc"], hdrs = ["bit_utils.h"], deps = [ - "@com_google_absl//absl/numeric:bits", + "@abseil-cpp//absl/numeric:bits", "@yacl//yacl/base:int128", "@yacl//yacl/utils:platform_utils", ], @@ -251,7 +251,7 @@ spu_cc_binary( linkopts = ["-lm"], deps = [ ":bit_utils", - "@com_github_google_benchmark//:benchmark_main", + "@google_benchmark//:benchmark_main", ], ) diff --git a/libspu/core/config.cc b/libspu/core/config.cc index 81ca2f9d4..c40c4c3e1 100644 --- a/libspu/core/config.cc +++ b/libspu/core/config.cc @@ -57,6 +57,11 @@ void populateRuntimeConfig(RuntimeConfig& cfg) { cfg.set_fxp_div_goldschmidt_iters(2); } + // sort + if (cfg.quick_sort_threshold() == 0) { + cfg.set_quick_sort_threshold(32); + } + // fxp exponent config { if (cfg.fxp_exp_mode() == RuntimeConfig::EXP_DEFAULT) { diff --git a/libspu/core/encoding.cc b/libspu/core/encoding.cc index 98a17a1ac..a92c181d0 100644 --- a/libspu/core/encoding.cc +++ b/libspu/core/encoding.cc @@ -127,13 +127,14 @@ NdArrayRef encodeToRing(const PtBufferView& bv, FieldType field, void decodeFromRing(const NdArrayRef& src, DataType in_dtype, size_t fxp_bits, PtBufferView* out_bv, PtType* out_pt_type) { const Type& src_type = src.eltype(); + + SPU_ENFORCE(src_type.isa(), "source must be ring2k, got={}", + src_type); + const FieldType field = src_type.as()->field(); const PtType pt_type = getDecodeType(in_dtype); const size_t numel = src.numel(); - SPU_ENFORCE(src_type.isa(), "source must be ring_type, got={}", - src_type); - if (out_pt_type != nullptr) { *out_pt_type = pt_type; } diff --git a/libspu/core/prelude.h b/libspu/core/prelude.h index d9726cbed..cfb6ce1ce 100644 --- a/libspu/core/prelude.h +++ b/libspu/core/prelude.h @@ -92,4 +92,7 @@ struct formatter : ostream_formatter {}; template <> struct formatter : ostream_formatter {}; +template <> +struct formatter : ostream_formatter {}; + } // namespace fmt diff --git a/libspu/core/value.h b/libspu/core/value.h index 75c10f78d..1eccd384d 100644 --- a/libspu/core/value.h +++ b/libspu/core/value.h @@ -67,6 +67,13 @@ class Value final { bool isPublic() const { return vtype() == VIS_PUBLIC; } bool isSecret() const { return vtype() == VIS_SECRET; } bool isPrivate() const { return vtype() == VIS_PRIVATE; } + int64_t owner() const { + if (isPrivate()) { + return storage_type().as()->owner(); + } else { + return -1; + } + } // Get dtype. DataType dtype() const { return dtype_; } diff --git a/libspu/cuda_support/BUILD.bazel b/libspu/cuda_support/BUILD.bazel index f85ccc36c..eb8430943 100644 --- a/libspu/cuda_support/BUILD.bazel +++ b/libspu/cuda_support/BUILD.bazel @@ -25,7 +25,7 @@ cuda_library( "manual", # Exclude this target from :all expansion ], deps = [ - "@com_github_nvidia_cutlass//:cutlass", + "@cutlass", ], ) diff --git a/libspu/dialect/pphlo/transforms/sort_lowering.cc b/libspu/dialect/pphlo/transforms/sort_lowering.cc index 3640e8ad5..6660a8b40 100644 --- a/libspu/dialect/pphlo/transforms/sort_lowering.cc +++ b/libspu/dialect/pphlo/transforms/sort_lowering.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -69,6 +70,99 @@ struct SortConversion : public OpRewritePattern { return success(); } } + + // pattern for jax.lax.sort lowering + if (comp.hasOneBlock()) { + auto &first_inst = comp.front().front(); + bool match_less = matchPattern(&first_inst, m_Op()); + bool match_greater = matchPattern(&first_inst, m_Op()); + if (match_less || match_greater) { + SortDirectionAttr direction; + + if (match_greater) { + // descent + direction = + SortDirectionAttr::get(op->getContext(), SortDirection::DES); + } else { + // ascent + direction = + SortDirectionAttr::get(op->getContext(), SortDirection::ASC); + } + + size_t key_nums = 0; + const auto comp_name = first_inst.getName().getStringRef(); + // save the result for each instruction for following check. + std::vector results; + for (auto &instr : comp.front().without_terminator()) { + if (matchPattern(&instr, m_Op(comp_name))) { + key_nums++; + } + results.push_back(instr.getResult(0)); + } + + // idx of and/or blocks + size_t lhs_idx = 2 * key_nums - 3; + size_t rhs_idx = 2 * key_nums - 2; + for (auto [i, instr] : + llvm::enumerate(comp.front().without_terminator())) { + if (i <= 2 * key_nums - 2) { + auto lhs_arg = + mlir::dyn_cast(instr.getOperand(0)); + auto rhs_arg = + mlir::dyn_cast(instr.getOperand(1)); + + if (lhs_arg == nullptr || rhs_arg == nullptr) { + return failure(); + } + + auto lhs_idx = lhs_arg.getArgNumber(); + auto rhs_idx = rhs_arg.getArgNumber(); + + // less + equal blocks + if ((i & 1) == 0 && matchPattern(&instr, m_Op(comp_name))) { + if (lhs_idx != i || rhs_idx != (i + 1)) { + return failure(); + } + } + // equal op + if ((i & 1) == 1 && matchPattern(&instr, m_Op())) { + if (lhs_idx != (i - 1) || rhs_idx != i) { + return failure(); + } + } + } else { + // check the operands of and/or + auto lhs = instr.getOperand(0); + auto rhs = instr.getOperand(1); + bool pass = (lhs == results[lhs_idx] && rhs == results[rhs_idx]) || + (lhs == results[rhs_idx] && rhs == results[lhs_idx]); + + // and blocks + if ((i & 1) == 1 && matchPattern(&instr, m_Op())) { + if (!pass) { + return failure(); + } + } + + // or blocks + if ((i & 1) == 0 && matchPattern(&instr, m_Op())) { + if (!pass) { + return failure(); + } + } + + lhs_idx--; + rhs_idx++; + } + } + + rewriter.replaceOpWithNewOp( + op, op.getResultTypes(), op.getOperands(), op.getDimensionAttr(), + rewriter.getI64IntegerAttr(key_nums), direction); + return success(); + } + } + return failure(); } }; diff --git a/libspu/kernel/BUILD.bazel b/libspu/kernel/BUILD.bazel index d588fa53a..f48355aee 100644 --- a/libspu/kernel/BUILD.bazel +++ b/libspu/kernel/BUILD.bazel @@ -27,6 +27,6 @@ spu_cc_library( "//libspu/kernel/hal:prot_wrapper", # BAD "//libspu/kernel/hal:public_helper", # BAD "//libspu/mpc:factory", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], ) diff --git a/libspu/kernel/hal/BUILD.bazel b/libspu/kernel/hal/BUILD.bazel index b5aeef415..258bfe80c 100644 --- a/libspu/kernel/hal/BUILD.bazel +++ b/libspu/kernel/hal/BUILD.bazel @@ -305,3 +305,12 @@ spu_cc_test( "//libspu/kernel:test_util", ], ) + +spu_cc_library( + name = "soprf", + srcs = ["soprf.cc"], + hdrs = ["soprf.h"], + deps = [ + ":ring", + ], +) diff --git a/libspu/kernel/hal/permute.cc b/libspu/kernel/hal/permute.cc index b7c34ff20..d40115766 100644 --- a/libspu/kernel/hal/permute.cc +++ b/libspu/kernel/hal/permute.cc @@ -44,6 +44,129 @@ inline bool _has_same_owner(const Value &x, const Value &y) { return _get_owner(x) == _get_owner(y); } +hal::CompFn _get_cmp_func(SPUContext *ctx, int64_t num_keys, + SortDirection direction, bool append_rand = false) { + hal::CompFn comp_fn = [ctx, num_keys, direction, append_rand]( + absl::Span values) -> spu::Value { + auto scalar_cmp = [direction](spu::SPUContext *ctx, const spu::Value &lhs, + const spu::Value &rhs) { + if (direction == SortDirection::Ascending) { + return hal::less(ctx, lhs, rhs); + } + return hal::greater(ctx, lhs, rhs); + }; + + spu::Value k1 = hal::constant(ctx, true, DT_I1, values[0].shape()); + spu::Value pre_equal = k1; + spu::Value result = scalar_cmp(ctx, values[0], values[1]); + // the idea here is that if the two values of the last key is equal, + // than we compare the two values of the current key, and iteratively to + // update the result which indicates whether to swap values + int64_t idx; + for (idx = 2; idx < num_keys * 2; idx += 2) { + pre_equal = hal::bitwise_and( + ctx, pre_equal, hal::equal(ctx, values[idx - 2], values[idx - 1])); + auto current = scalar_cmp(ctx, values[idx], values[idx + 1]); + current = hal::bitwise_and(ctx, pre_equal, current); + result = hal::bitwise_or(ctx, result, current); + } + + // append rand value to avoid the same key "pitfall" in partition-based + // algorithms (e.g. quick-sort, quick-select). + if (append_rand) { + // must use secret bits here, otherwise some infos will leak + auto rand_bits = hal::random(ctx, VIS_SECRET, DT_I1, values[0].shape()); + + // equal has better performance for aby3 + // cmp+andbb has better performance for semi2k now + pre_equal = hal::bitwise_and( + ctx, pre_equal, hal::equal(ctx, values[idx - 2], values[idx - 1])); + auto current = hal::bitwise_and(ctx, pre_equal, rand_bits); + result = hal::bitwise_or(ctx, result, current); + } + + return result; + }; + + return comp_fn; +} + +bool _check_method_require(SPUContext *ctx, RuntimeConfig::SortMethod method) { + bool pass = false; + switch (method) { + case RuntimeConfig::SORT_RADIX: + pass = ctx->hasKernel("rand_perm_m") && ctx->hasKernel("perm_am") && + ctx->hasKernel("perm_ap") && ctx->hasKernel("inv_perm_am") && + ctx->hasKernel("inv_perm_ap"); + break; + case RuntimeConfig::SORT_QUICK: + // quick sort only requires small subsets of shuffle kernels, but need + // rand_b kernel to avoid calling of a2b. + pass = ctx->hasKernel("rand_perm_m") && ctx->hasKernel("perm_am") && + ctx->hasKernel("rand_b"); + break; + case RuntimeConfig::SORT_NETWORK: + // sort network is a general method which can be used for all MPC + // protocols. + pass = true; + break; + default: + SPU_THROW("Should not reach here"); + } + + return pass; +} + +RuntimeConfig::SortMethod select_sort_method( + SPUContext *ctx, RuntimeConfig::SortMethod preferred_method) { + SPU_ENFORCE(preferred_method != RuntimeConfig::SORT_DEFAULT); + + // if the preferred method is not supported, fall back to sorting network now. + const RuntimeConfig::SortMethod fallback_method = RuntimeConfig::SORT_NETWORK; + + switch (preferred_method) { + case RuntimeConfig::SORT_RADIX: + if (internal::_check_method_require(ctx, RuntimeConfig::SORT_RADIX)) { + return preferred_method; + } + break; + + case RuntimeConfig::SORT_QUICK: + if (internal::_check_method_require(ctx, RuntimeConfig::SORT_QUICK)) { + return preferred_method; + } + break; + + case RuntimeConfig::SORT_NETWORK: + // always true now. + if (internal::_check_method_require(ctx, RuntimeConfig::SORT_NETWORK)) { + return preferred_method; + } + SPU_THROW("should not reach here"); + break; + + default: + SPU_THROW("should not reach here"); + } + + return fallback_method; +} + +std::vector fallback_sort1d(SPUContext *ctx, + absl::Span inputs, + int64_t num_keys, + SortDirection direction) { + auto comp_fn = _get_cmp_func(ctx, num_keys, direction); + Visibility vis = std::all_of(inputs.begin(), inputs.begin() + num_keys, + [](const spu::Value &v) { return v.isPublic(); }) + ? VIS_PUBLIC + : VIS_SECRET; + // currently, general sort1d only supports odd-even sorting network which is + // an unstable sort method. + auto ret = sort1d(ctx, inputs, comp_fn, vis, false); + return ret; +} + void _hint_nbits(const Value &a, size_t nbits) { if (a.storage_type().isa()) { const_cast(a.storage_type()).as()->setNbits(nbits); @@ -218,6 +341,290 @@ void HandleSmallArray(SPUContext *ctx, const CompFn &comparator_body, } } +std::vector _construct_cmp_values( + SPUContext *ctx, const std::vector> &intervals, + absl::Span arr, const int64_t quick_sort_thres, + const int64_t num_keys) { + int64_t lo; + int64_t hi; + int64_t left; + int64_t right; + + std::vector> cmp_values(2 * num_keys); + for (auto &values : cmp_values) { + values.reserve(intervals.size()); + } + + for (const auto &interval : intervals) { + std::tie(lo, hi) = interval; + + if (hi - lo <= quick_sort_thres) { + continue; + } + + left = lo + 1; + right = hi; + + for (int64_t i = 0; i < num_keys; i++) { + // pivot + cmp_values[2 * i].push_back(broadcast_to( + ctx, slice_scalar_at(ctx, arr[i], {lo}), {right - left + 1})); + // others + cmp_values[2 * i + 1].push_back(slice(ctx, arr[i], {left}, {right + 1})); + } + } + + // no need to quick sort + if (cmp_values[0].empty()) { + return {}; + } + + std::vector ret; + ret.reserve(2 * num_keys); + + for (int64_t i = 0; i < 2 * num_keys; i++) { + ret.push_back(concatenate(ctx, cmp_values[i], 0)); + } + + return ret; +} + +bool Partition(SPUContext *ctx, const int64_t num_keys, + const CompFn &comparator_body, absl::Span arr, + std::vector> &intervals) { + if (intervals.empty()) { + return false; + } + + int64_t quick_sort_thres = ctx->config().quick_sort_threshold(); + + int64_t lo; // left end of current interval + int64_t hi; // right end of current interval + + int64_t left; // location of left pointer + int64_t right; // location of right pointer + int64_t mid; // location of pivot element after partition + + auto values = + _construct_cmp_values(ctx, intervals, arr, quick_sort_thres, num_keys); + + if (values.empty()) { + return false; + } + + auto predicate = comparator_body(values); + auto _predicate = dump_public_as(ctx, hal::reveal(ctx, predicate)); + + Index lhs_indices; + Index rhs_indices; + Index pivot_indices; + Index mid_indices; + // save partition output, i.e. (lo, mid, hi), where mid is the location of + // pivot after partition. + std::vector> pos; + // save the intervals that do not need quick sort anymore. + std::vector> pass_vec; + + int64_t length = 0; + for (auto item : intervals) { + std::tie(lo, hi) = item; + + if (hi - lo <= quick_sort_thres) { + pass_vec.emplace_back(lo, hi); + continue; + } + + left = lo + 1; + right = hi; + + auto offset = left; + // use two pointer for partition + for (;;) { + while (right >= left && !_predicate[left - offset + length]) { + left++; + } + while (right >= left && _predicate[right - offset + length]) { + right--; + } + if (right < left) { + break; + } + + lhs_indices.emplace_back(left); + rhs_indices.emplace_back(right); + + left++; + right--; + } + length += (hi - lo); + + pivot_indices.emplace_back(lo); + mid_indices.emplace_back(right); + pos.emplace_back(lo, right, hi); + } + Swap(arr, lhs_indices, rhs_indices); + // swap the pivot + Swap(arr, pivot_indices, mid_indices); + + intervals.swap(pass_vec); + intervals.reserve(2 * intervals.size()); + + while (!pos.empty()) { + std::tie(lo, mid, hi) = pos.back(); + pos.pop_back(); + if (lo < mid) { + intervals.emplace_back(lo, mid - 1); + } + if (mid < hi) { + intervals.emplace_back(mid + 1, hi); + } + } + + return true; +} + +// this algorithm is mainly adopted from odd-even mergesort, but we can reveal +// the comparison because of shuffling +void mergesort(SPUContext *ctx, const CompFn &comparator_body, + absl::Span arr, + std::vector> &intervals) { + const auto N = arr.front().numel(); + int64_t logn = Log2Ceil(N); + // max depth for odd-even merge network + int64_t depth = ((logn + 1) * logn) / 2; + + std::vector lhs_indices(depth); + std::vector rhs_indices(depth); + + int64_t lo; + int64_t hi; + for (auto item : intervals) { + std::tie(lo, hi) = item; + if (hi - lo <= 0) { + continue; + } + + int64_t n = hi - lo + 1; + int64_t cnt = 0; + for (int64_t max_gap_in_stage = 1; max_gap_in_stage < n; + max_gap_in_stage += max_gap_in_stage) { + for (int64_t step = max_gap_in_stage; step > 0; step /= 2) { + for (int64_t j = step % max_gap_in_stage; j + step < n; + j += step + step) { + auto range = max_gap_in_stage + max_gap_in_stage; + + for (int64_t i = 0; i < step; i++) { + auto lhs_idx = i + j; + auto rhs_idx = i + j + step; + + if (rhs_idx >= n) { + break; + } + + if (lhs_idx / range == rhs_idx / range) { + lhs_indices[cnt].emplace_back(lhs_idx + lo); + rhs_indices[cnt].emplace_back(rhs_idx + lo); + } + } + } + cnt += 1; + } + } + } + + size_t num_operands = arr.size(); + for (size_t i = 0; i < lhs_indices.size(); i++) { + if (lhs_indices[i].empty()) { + continue; + } + + Index lhs_indice; + Index rhs_indice; + + std::vector values; + values.reserve(2 * num_operands); + + for (size_t j = 0; j < num_operands; ++j) { + values.emplace_back(arr[j].data().linear_gather(lhs_indices[i]), + arr[j].dtype()); + values.emplace_back(arr[j].data().linear_gather(rhs_indices[i]), + arr[j].dtype()); + } + auto predicate = comparator_body(values); + auto _predicate = dump_public_as(ctx, hal::reveal(ctx, predicate)); + for (size_t k = 0; k < _predicate.size(); k++) { + if (!_predicate[k]) { + lhs_indice.emplace_back(lhs_indices[i][k]); + rhs_indice.emplace_back(rhs_indices[i][k]); + } + } + Swap(arr, lhs_indice, rhs_indice); + } +} + +std::vector QuickMergesort(SPUContext *ctx, const int64_t num_keys, + const CompFn &quick_comp, + const CompFn &merge_comp, + absl::Span inputs) { + // we do not need to copy or _2s here because of the secret shuffling. + std::vector ret(inputs.begin(), inputs.end()); + + const auto n = inputs.front().numel(); + std::vector> intervals; + intervals.emplace_back(0, n - 1); + int64_t quicksort_num = 0; + // set max depth to avoid infinite loop + int64_t depth = 1000; + bool need_quick_sort = true; + + while (!intervals.empty()) { + need_quick_sort = + Partition(ctx, num_keys, quick_comp, absl::MakeSpan(ret), intervals); + quicksort_num += 1; + + if (!need_quick_sort || (quicksort_num == depth)) { + break; + } + } + + if (intervals.empty()) { + return ret; + } + + mergesort(ctx, merge_comp, absl::MakeSpan(ret), intervals); + + return ret; +} + +std::vector PrepareSort(SPUContext *ctx, + absl::Span inputs) { + std::vector inp; + inp.reserve(inputs.size()); + + auto rand_perm = _rand_perm_s(ctx, inputs.front().shape()); + // use a random permutation to break link of values, such that the following + // comparison can be revealed without loss of information. + for (const auto &input : inputs) { + inp.emplace_back( + std::move(_perm_ss(ctx, input, rand_perm).setDtype(input.dtype()))); + } + + return inp; +} + +std::vector quick_sort(SPUContext *ctx, + absl::Span inputs, + int64_t num_keys, SortDirection direction) { + auto inp = PrepareSort(ctx, inputs); + // quick sort will append extra random key + auto quick_comp = _get_cmp_func(ctx, num_keys, direction, true); + // in merge sort stage, only normal keys are used for comparison + auto merge_comp = _get_cmp_func(ctx, num_keys, direction); + auto ret = QuickMergesort(ctx, num_keys, quick_comp, merge_comp, + absl::MakeSpan(inp)); + return ret; +} + void TwoWayPartition(SPUContext *ctx, const CompFn &comparator_body, absl::Span arr, int64_t lo, int64_t hi, const TopKConfig &config, @@ -684,9 +1091,11 @@ spu::Value _apply_inv_perm_ss(SPUContext *ctx, const spu::Value &x, // Compose is actually a special case of apply_perm where both inputs are // permutations. So to be more general, we use the name _apply_perm_ss // rather than _compose_ss here -spu::Value _apply_perm_ss(SPUContext *ctx, const Value &x, const Value &perm) { +std::vector _apply_perm_ss(SPUContext *ctx, + absl::Span x, + const Value &perm) { // 1. = secure shuffle - auto shuffle_perm = hal::_rand_perm_s(ctx, x.shape()); + auto shuffle_perm = hal::_rand_perm_s(ctx, x[0].shape()); auto sp = hal::_perm_ss(ctx, perm, shuffle_perm); // 2. M = reveal() @@ -694,14 +1103,28 @@ spu::Value _apply_perm_ss(SPUContext *ctx, const Value &x, const Value &perm) { SPU_ENFORCE_EQ(m.shape().ndim(), 1U, "perm should be 1-d tensor"); // 3. sx = apply_perm(x,m) - auto sx = hal::_perm_sp(ctx, x, m); + std::vector sx; + sx.reserve(x.size()); + for (const auto &item : x) { + sx.emplace_back(hal::_perm_sp(ctx, item, m)); + } // 4. ret = unshuffle() - auto ret = hal::_inv_perm_ss(ctx, sx, shuffle_perm); + std::vector ret; + ret.reserve(x.size()); + for (const auto &item : sx) { + ret.emplace_back(hal::_inv_perm_ss(ctx, item, shuffle_perm)); + } return ret; } +spu::Value _apply_perm_ss(SPUContext *ctx, const Value &x, const Value &perm) { + std::vector inputs{x}; + auto ret = _apply_perm_ss(ctx, inputs, perm); + return std::move(ret[0]); +} + // Find mergeable keys from keys. Consecutive public/private(belong to one // owner) keys can be merged. Assume there are six keys, i.e., public_key0, // bob_key0, bob_key1, alice_key0, alice_key1, secret_key0. We can merge the @@ -768,10 +1191,37 @@ spu::Value _apply_inv_perm_sv(SPUContext *ctx, const Value &in, } } -#define MAP_APPLY_PERM_OP(NAME) \ - spu::Value _apply##NAME(SPUContext *ctx, const Value &in, \ - const Value &perm) { \ - return hal::NAME(ctx, in, perm); \ +std::vector _apply_inv_perm_sv(SPUContext *ctx, + absl::Span inputs, + const Value &perm) { + if (ctx->hasKernel("inv_perm_av")) { + std::vector ret; + ret.reserve(inputs.size()); + for (const auto &input : inputs) { + ret.emplace_back( + _apply_inv_perm_sv(ctx, input, perm).setDtype(input.dtype())); + } + return ret; + } else { + return _apply_inv_perm_ss(ctx, inputs, _2s(ctx, perm)); + } +} + +#define MAP_APPLY_PERM_OP(NAME) \ + spu::Value _apply##NAME(SPUContext *ctx, const Value &in, \ + const Value &perm) { \ + return hal::NAME(ctx, in, perm); \ + } \ + \ + std::vector _apply##NAME( \ + SPUContext *ctx, absl::Span inputs, const Value &perm) { \ + std::vector ret; \ + ret.reserve(inputs.size()); \ + for (const auto &input : inputs) { \ + ret.emplace_back( \ + _apply##NAME(ctx, input, perm).setDtype(input.dtype())); \ + } \ + return ret; \ } MAP_APPLY_PERM_OP(_perm_pp); @@ -781,41 +1231,87 @@ MAP_APPLY_PERM_OP(_inv_perm_pp); MAP_APPLY_PERM_OP(_inv_perm_vv); MAP_APPLY_PERM_OP(_inv_perm_sp); +#define MAP_VEC_CONVERT_OP(NAME) \ + std::vector NAME(SPUContext *ctx, absl::Span inputs) { \ + std::vector ret; \ + ret.reserve(inputs.size()); \ + for (const auto &input : inputs) { \ + ret.emplace_back(hal::NAME(ctx, input).setDtype(input.dtype())); \ + } \ + return ret; \ + } + +MAP_VEC_CONVERT_OP(_p2s); +MAP_VEC_CONVERT_OP(_v2s); + +#undef MAP_VEC_CONVERT_OP + +std::vector _p2v(SPUContext *ctx, absl::Span inputs, + int owner) { + std::vector ret; + ret.reserve(inputs.size()); + for (const auto &input : inputs) { + ret.emplace_back(hal::_p2v(ctx, input, owner).setDtype(input.dtype())); + } + return ret; +} + // Given a permutation, apply (inverse) permutation on a 1-d array input -#define MAP_PERM_OP(NAME) \ - spu::Value NAME(SPUContext *ctx, const Value &in, const Value &perm) { \ - SPU_TRACE_HAL_DISP(ctx, in, perm); \ - if (in.isPublic() && perm.isPublic()) { /*PP*/ \ - return NAME##_pp(ctx, in, perm); \ - } else if (in.isPublic() && perm.isSecret()) { /*PS*/ \ - return NAME##_ss(ctx, _p2s(ctx, in), perm); \ - } else if (in.isPublic() && perm.isPrivate()) { /*PV*/ \ - return NAME##_vv(ctx, _p2v(ctx, in, _get_owner(perm)), perm); \ - } else if (in.isPrivate() && perm.isPrivate()) { /*VV*/ \ - if (_has_same_owner(in, perm)) { \ - return NAME##_vv(ctx, in, perm); \ - } else { \ - return NAME##_sv(ctx, _v2s(ctx, in), perm); \ - } \ - } else if (in.isPrivate() && perm.isPublic()) { /*VP*/ \ - return NAME##_vv(ctx, in, _p2v(ctx, perm, _get_owner(in))); \ - } else if (in.isPrivate() && perm.isSecret()) { /*VS*/ \ - return NAME##_ss(ctx, _v2s(ctx, in), perm); \ - } else if (in.isSecret() && perm.isSecret()) { /*SS*/ \ - return NAME##_ss(ctx, in, perm); \ - } else if (in.isSecret() && perm.isPublic()) { /*SP*/ \ - return NAME##_sp(ctx, in, perm); \ - } else if (in.isSecret() && perm.isPrivate()) { /*SV*/ \ - return NAME##_sv(ctx, in, perm); \ - } else { \ - SPU_THROW("should not be here"); \ - } \ +#define MAP_PERM_OP(NAME) \ + std::vector NAME(SPUContext *ctx, absl::Span in, \ + const Value &perm) { \ + SPU_ENFORCE(!in.empty(), "Inputs should not be empty"); \ + SPU_ENFORCE(std::all_of(in.begin(), in.end(), \ + [&in](const spu::Value &v) { \ + return v.vtype() == in[0].vtype(); \ + }), \ + "Inputs visibility mismatched"); \ + if (in[0].isPrivate()) { \ + SPU_ENFORCE(std::all_of(in.begin(), in.end(), \ + [&in](const spu::Value &v) { \ + return internal::_has_same_owner(v, in[0]); \ + }), \ + "Inputs owner mismatched"); \ + } \ + SPU_TRACE_HAL_DISP(ctx, in[0], perm); \ + if (in[0].isPublic() && perm.isPublic()) { /*PP*/ \ + return NAME##_pp(ctx, in, perm); \ + } else if (in[0].isPublic() && perm.isSecret()) { /*PS*/ \ + return NAME##_ss(ctx, _p2s(ctx, in), perm); \ + } else if (in[0].isPublic() && perm.isPrivate()) { /*PV*/ \ + return NAME##_vv(ctx, _p2v(ctx, in, _get_owner(perm)), perm); \ + } else if (in[0].isPrivate() && perm.isPrivate()) { /*VV*/ \ + if (_has_same_owner(in[0], perm)) { \ + return NAME##_vv(ctx, in, perm); \ + } else { \ + return NAME##_sv(ctx, _v2s(ctx, in), perm); \ + } \ + } else if (in[0].isPrivate() && perm.isPublic()) { /*VP*/ \ + return NAME##_vv(ctx, in, hal::_p2v(ctx, perm, _get_owner(in[0]))); \ + } else if (in[0].isPrivate() && perm.isSecret()) { /*VS*/ \ + return NAME##_ss(ctx, _v2s(ctx, in), perm); \ + } else if (in[0].isSecret() && perm.isSecret()) { /*SS*/ \ + return NAME##_ss(ctx, in, perm); \ + } else if (in[0].isSecret() && perm.isPublic()) { /*SP*/ \ + return NAME##_sp(ctx, in, perm); \ + } else if (in[0].isSecret() && perm.isPrivate()) { /*SV*/ \ + return NAME##_sv(ctx, in, perm); \ + } else { \ + SPU_THROW("should not be here"); \ + } \ } // Inverse permute 1-D array x with a permutation perm // ret[perm[i]] = x[i] MAP_PERM_OP(_apply_inv_perm) +spu::Value _apply_inv_perm(SPUContext *ctx, const spu::Value &x, + const spu::Value &perm) { + std::vector inputs{x}; + auto ret = _apply_inv_perm(ctx, inputs, perm); + return std::move(ret[0]); +} + // Given a permutation, generate its inverse permutation // ret[perm[i]] = i spu::Value _inverse(SPUContext *ctx, const Value &perm) { @@ -829,14 +1325,27 @@ spu::Value _apply_perm_sv(SPUContext *ctx, const Value &in, const Value &perm) { if (ctx->hasKernel("inv_perm_av")) { return hal::_inv_perm_sv(ctx, in, _inverse(ctx, perm)); } else { - return _apply_inv_perm_ss(ctx, in, _v2s(ctx, _inverse(ctx, perm))); + return _apply_inv_perm_ss(ctx, in, hal::_v2s(ctx, _inverse(ctx, perm))); } } +std::vector _apply_perm_sv(SPUContext *ctx, + absl::Span inputs, + const Value &perm) { + return _apply_inv_perm_sv(ctx, inputs, _inverse(ctx, perm)); +} + // Permute 1-D array x with a permutation perm // ret[i] = x[perm[i]] MAP_PERM_OP(_apply_perm) +spu::Value _apply_perm(SPUContext *ctx, const spu::Value &x, + const spu::Value &perm) { + std::vector inputs{x}; + auto ret = _apply_perm(ctx, inputs, perm); + return std::move(ret[0]); +} + // Compose two permutations into one permutation // If we have two permutations x and y, we want to get a permutation z from x // and y that apply_inv_perm(in, z) = apply_inv_perm(apply_inv_perm(in, x), y) @@ -844,6 +1353,8 @@ spu::Value _compose_perm(SPUContext *ctx, const Value &x, const Value &y) { return _apply_perm(ctx, y, x); } +#undef MAP_PERM_OP + spu::Value _merge_keys(SPUContext *ctx, absl::Span inputs, bool is_ascending) { if (inputs[0].isPublic()) { @@ -1065,58 +1576,84 @@ std::vector simple_sort1d(SPUContext *ctx, SPU_ENFORCE(num_keys > 0 && num_keys <= static_cast(inputs.size()), "num_keys {} is not valid", num_keys); - bool fallback = false; - // if all keys are public, fallback to public sort + std::vector ret; + const auto sort_method = ctx->config().sort_method(); + + // There are multiple sort methods supported by SPU, we will try to seek the + // best method in the following order if the user does not specify the method + // manually. + // 1. If all keys are Public, then fallback to the plaintext sort. + // 2. Else, sequentially check if it supports radix sort or quick sort. If a + // match is found, execute the corresponding algorithm; otherwise, the + // default sorting network algorithm will be executed. + // + // Some takeaways about the above algorithm: + // 1. Radix sort is currently the only STABLE sorting algorithm, so we + // choose it as the highest priority algorithm (as long as it is supported + // by the underlying MPC protocol). + // 2. It's worth to know that quick sort is indeed faster than radix + // sort when the field is FM64 or FM128 (When in FM32, radix sort is always + // faster). + // 3. However, radix sort can be significantly accelerated if you set + // the valid_bits when you know exactly the ranges of the keys. + // 4. Radix sort and quick sort are more friendly to multiple payloads but + // not to multiple keys. Increasing one payload only adds one secret + // shuffle; however, for n additional keys, the communication/time can be + // roughly considered to multiply by n. + // 5. Quick sort is more adaptable to the expansion of the ring. When the + // ring size doubles, the communication volume of quick sort nearly doubles, + // and the number of rounds increases (poly) logarithmically. In contrast, + // when the ring size doubles in radix sort, the communication (roughly) + // quadruples and the number of rounds doubles. + // + + // if all keys are public, fallback to plaintext sort. if (std::all_of(inputs.begin(), inputs.begin() + num_keys, [](const spu::Value &v) { return v.isPublic(); })) { - fallback = true; - } - // If the protocol supports secret shuffle and unshuffle, we can use radix - // sort for fast 1-D sort. Otherwise, we fallback to generic sort1d - if (!fallback && - !(ctx->hasKernel("rand_perm_m") && ctx->hasKernel("perm_am") && - ctx->hasKernel("perm_ap") && ctx->hasKernel("inv_perm_am") && - ctx->hasKernel("inv_perm_ap"))) { - fallback = true; + return internal::fallback_sort1d(ctx, inputs, num_keys, direction); } - if (!fallback) { - auto ret = - internal::radix_sort(ctx, inputs, direction, num_keys, valid_bits); - return ret; + + // if use default sort method, trying to find the most best method + // currently, radix sort -> quick sort -> sorting network + if (sort_method == RuntimeConfig::SORT_DEFAULT) { + if (internal::_check_method_require(ctx, RuntimeConfig::SORT_RADIX)) { + ret = internal::radix_sort(ctx, inputs, direction, num_keys, valid_bits); + } else if (internal::_check_method_require(ctx, + RuntimeConfig::SORT_QUICK)) { + ret = internal::quick_sort(ctx, inputs, num_keys, direction); + } else if (internal::_check_method_require( + ctx, + RuntimeConfig::SORT_NETWORK)) { // always true now. + ret = internal::fallback_sort1d(ctx, inputs, num_keys, direction); + } else { + SPU_THROW("should not reach here"); + } } else { - auto scalar_cmp = [direction](spu::SPUContext *ctx, const spu::Value &lhs, - const spu::Value &rhs) { - if (direction == SortDirection::Ascending) { - return hal::less(ctx, lhs, rhs); - } - return hal::greater(ctx, lhs, rhs); - }; + auto selected_method = internal::select_sort_method(ctx, sort_method); + if (selected_method != sort_method) { + SPDLOG_WARN( + "Manually set method: {}, which is not supported, falling back to " + "{}.", + sort_method, selected_method); + } - hal::CompFn comp_fn = - [ctx, num_keys, - &scalar_cmp](absl::Span values) -> spu::Value { - spu::Value pre_equal = hal::constant(ctx, true, DT_I1, values[0].shape()); - spu::Value result = scalar_cmp(ctx, values[0], values[1]); - // the idea here is that if the two values of the last key is equal, than - // we compare the two values of the current key, and iteratively to update - // the result which indicates whether to swap values - for (int64_t idx = 2; idx < num_keys * 2; idx += 2) { - pre_equal = hal::bitwise_and( - ctx, pre_equal, hal::equal(ctx, values[idx - 2], values[idx - 1])); - auto current = scalar_cmp(ctx, values[idx], values[idx + 1]); - current = hal::bitwise_and(ctx, pre_equal, current); - result = hal::bitwise_or(ctx, result, current); - } - return result; - }; - Visibility vis = - std::all_of(inputs.begin(), inputs.begin() + num_keys, - [](const spu::Value &v) { return v.isPublic(); }) - ? VIS_PUBLIC - : VIS_SECRET; - auto ret = sort1d(ctx, inputs, comp_fn, vis, false); - return ret; + switch (selected_method) { + case RuntimeConfig::SORT_RADIX: + ret = + internal::radix_sort(ctx, inputs, direction, num_keys, valid_bits); + break; + case RuntimeConfig::SORT_QUICK: + ret = internal::quick_sort(ctx, inputs, num_keys, direction); + break; + case RuntimeConfig::SORT_NETWORK: + ret = internal::fallback_sort1d(ctx, inputs, num_keys, direction); + break; + default: + SPU_THROW("should not reach here"); + } } + + return ret; } std::vector permute(SPUContext *ctx, @@ -1292,4 +1829,22 @@ std::vector topk_1d(SPUContext *ctx, const spu::Value &input, } } +std::vector apply_inv_permute_1d( + SPUContext *ctx, absl::Span inputs, + const spu::Value &perm) { + // Note: the kernel `inv_perm_am` in MPC layer is exactly the `unshuffle` + // semantics, and we implement `apply_inv_perm_ss` in HAL layer. So we wrap + // the `apply_inv_perm` to deal with the all inv_perm stuffs. + return internal::_apply_inv_perm(ctx, inputs, perm); +} + +std::vector apply_permute_1d(SPUContext *ctx, + absl::Span inputs, + const spu::Value &perm) { + // Note: the kernel `perm_am` in MPC layer is exactly the `shuffle` + // semantics, and we implement `apply_perm_ss` in HAL layer. So we wrap the + // `apply_perm` to deal with the all inv_perm stuffs. + return internal::_apply_perm(ctx, inputs, perm); +} + } // namespace spu::kernel::hal diff --git a/libspu/kernel/hal/permute.h b/libspu/kernel/hal/permute.h index 9025bbec5..be3cd5395 100644 --- a/libspu/kernel/hal/permute.h +++ b/libspu/kernel/hal/permute.h @@ -79,4 +79,14 @@ std::vector topk_1d(SPUContext *ctx, const spu::Value &input, const SimpleCompFn &scalar_cmp, const TopKConfig &config); +// For each input x, we get y = perm^{-1} (x), i.e. y[i] = x[perm^{-1}(i)] +std::vector apply_inv_permute_1d( + SPUContext *ctx, absl::Span inputs, + const spu::Value &perm); + +// For each input x, we get y = perm(x), i.e. y[i] = x[perm(i)] +std::vector apply_permute_1d(SPUContext *ctx, + absl::Span inputs, + const spu::Value &perm); + } // namespace spu::kernel::hal \ No newline at end of file diff --git a/libspu/kernel/hal/prot_wrapper.cc b/libspu/kernel/hal/prot_wrapper.cc index 7e03454d1..e1e060e77 100644 --- a/libspu/kernel/hal/prot_wrapper.cc +++ b/libspu/kernel/hal/prot_wrapper.cc @@ -88,9 +88,9 @@ Value _rand_p(SPUContext* ctx, const Shape& shape) { return rnd; } -Value _rand_s(SPUContext* ctx, const Shape& shape) { +Value _rand_s(SPUContext* ctx, const Shape& shape, DataType dtype) { SPU_TRACE_HAL_DISP(ctx, shape); - auto rnd = mpc::rand_s(ctx, shape); + auto rnd = mpc::rand_s(ctx, shape, dtype); return rnd; } diff --git a/libspu/kernel/hal/prot_wrapper.h b/libspu/kernel/hal/prot_wrapper.h index a3c138cac..6c50fadb6 100644 --- a/libspu/kernel/hal/prot_wrapper.h +++ b/libspu/kernel/hal/prot_wrapper.h @@ -120,7 +120,7 @@ Value _bitrev_v(SPUContext* ctx, const Value& in, size_t start, size_t end); Value _make_p(SPUContext* ctx, uint128_t init, const Shape& shape); Value _rand_p(SPUContext* ctx, const Shape& shape); -Value _rand_s(SPUContext* ctx, const Shape& shape); +Value _rand_s(SPUContext* ctx, const Shape& shape, DataType dtype); // FIXME: temporary API, formalize later Value _rand_perm_s(SPUContext* ctx, const Shape& shape); diff --git a/libspu/kernel/hal/random.cc b/libspu/kernel/hal/random.cc index c614d191b..ddf0b5924 100644 --- a/libspu/kernel/hal/random.cc +++ b/libspu/kernel/hal/random.cc @@ -64,7 +64,7 @@ Value random(SPUContext* ctx, Visibility vis, DataType dtype, if (vis == VIS_PUBLIC) { ret = _rand_p(ctx, shape).setDtype(dtype); } else if (vis == VIS_SECRET) { - ret = _rand_s(ctx, shape).setDtype(dtype); + ret = _rand_s(ctx, shape, dtype).setDtype(dtype); } else { SPU_THROW("Invalid visibility={}", vis); } diff --git a/libspu/kernel/hal/soprf.cc b/libspu/kernel/hal/soprf.cc new file mode 100644 index 000000000..4e0356b9d --- /dev/null +++ b/libspu/kernel/hal/soprf.cc @@ -0,0 +1,78 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/kernel/hal/soprf.h" + +#include "libspu/core/trace.h" +#include "libspu/kernel/hal/prot_wrapper.h" +#include "libspu/kernel/hal/ring.h" + +namespace spu::kernel::hal { + +Value soprf(SPUContext* ctx, const Value& x) { + SPU_TRACE_HAL_LEAF(ctx, x); + + // currently, wo only support LowMC block cipher + SPU_ENFORCE(ctx->hasKernel("lowmc_b")); + auto inp = x; + + if (x.isPublic()) { + inp = _p2s(ctx, x); + } else if (x.isPrivate()) { + inp = _v2s(ctx, x); + } + + auto ret = dynDispatch(ctx, "lowmc_b", _prefer_b(ctx, inp)); + + return ret.setDtype(x.dtype()); +} + +namespace { +spu::Value _2s(SPUContext* ctx, const Value& x) { + if (x.isPublic()) { + return _p2s(ctx, x); + } else if (x.isPrivate()) { + return _v2s(ctx, x); + } + return x; +} +} // namespace + +Value soprf(SPUContext* ctx, absl::Span inputs) { + // currently, wo only support LowMC block cipher + SPU_ENFORCE(ctx->hasKernel("multi_key_lowmc_b")); + SPU_ENFORCE(!inputs.empty(), "inputs should not be empty"); + SPU_ENFORCE(std::all_of(inputs.begin() + 1, inputs.end(), + [&inputs](const spu::Value& v) { + return v.shape() == inputs.front().shape(); + }), + "shape mismatch"); + SPU_ENFORCE(std::all_of(inputs.begin() + 1, inputs.end(), + [&inputs](const Value& v) { + return v.dtype() == inputs.front().dtype(); + }), + "not all element has same dtype"); + + std::vector inp; + inp.reserve(inputs.size()); + for (const auto& v : inputs) { + inp.push_back(_prefer_b(ctx, _2s(ctx, v))); + } + + auto ret = dynDispatch(ctx, "multi_key_lowmc_b", inp); + + return ret.setDtype(inputs.front().dtype()); +} + +} // namespace spu::kernel::hal diff --git a/libspu/kernel/hal/soprf.h b/libspu/kernel/hal/soprf.h new file mode 100644 index 000000000..02dd11ee1 --- /dev/null +++ b/libspu/kernel/hal/soprf.h @@ -0,0 +1,35 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "libspu/core/context.h" +#include "libspu/core/value.h" + +namespace spu::kernel::hal { + +// Shared Oblivious PRF +// ret = PRF(x, key), but with x, key in secret share. +// now, `key` is generated inside kernel to guarantee the 128-bits security. +Value soprf(SPUContext* ctx, const Value& x); + +// Multi-Key version of shared oblivious PRF +// We use the scheme in: +// REF: https://eprint.iacr.org/2019/518 +// +// Warning: There may exist collision if you feed too many keys, although we +// limit the probability to be less than 2^{-40} in almost situations; +Value soprf(SPUContext* ctx, absl::Span inputs); + +} // namespace spu::kernel::hal diff --git a/libspu/kernel/hal/type_cast.cc b/libspu/kernel/hal/type_cast.cc index 89ad0142c..b432b8641 100644 --- a/libspu/kernel/hal/type_cast.cc +++ b/libspu/kernel/hal/type_cast.cc @@ -80,7 +80,14 @@ Value reveal(SPUContext* ctx, const Value& x) { Value reveal_to(SPUContext* ctx, const Value& x, size_t rank) { SPU_TRACE_HAL_LEAF(ctx, x, rank); - SPU_ENFORCE(x.isSecret()); + SPU_ENFORCE(!x.isPublic()); + if (x.isPrivate()) { + if (x.owner() == static_cast(rank)) { + return x; + } else { + return _s2v(ctx, _v2s(ctx, x), rank).setDtype(x.dtype()); + } + } return _s2v(ctx, x, rank).setDtype(x.dtype()); } diff --git a/libspu/kernel/hlo/BUILD.bazel b/libspu/kernel/hlo/BUILD.bazel index d9ee64ca5..80c1c9bde 100644 --- a/libspu/kernel/hlo/BUILD.bazel +++ b/libspu/kernel/hlo/BUILD.bazel @@ -305,6 +305,7 @@ spu_cc_test( ":sort", "//libspu/kernel:test_util", "//libspu/kernel/hal:polymorphic", + "//libspu/mpc/utils:simulate", ], ) @@ -349,3 +350,46 @@ spu_cc_test( "//libspu/kernel:test_util", ], ) + +spu_cc_library( + name = "soprf", + srcs = ["soprf.cc"], + hdrs = ["soprf.h"], + deps = [ + ":geometrical", + "//libspu/kernel/hal:soprf", + ], +) + +spu_cc_test( + name = "soprf_test", + srcs = ["soprf_test.cc"], + deps = [ + ":casting", + ":const", + ":soprf", + "//libspu/kernel:test_util", + "//libspu/mpc/utils:simulate", + ], +) + +spu_cc_library( + name = "permute", + srcs = ["permute.cc"], + hdrs = ["permute.h"], + deps = [ + "//libspu/kernel/hal:permute", + ], +) + +spu_cc_test( + name = "permute_test", + srcs = ["permute_test.cc"], + deps = [ + ":casting", + ":const", + ":permute", + "//libspu/kernel:test_util", + "//libspu/mpc/utils:simulate", + ], +) diff --git a/libspu/kernel/hlo/casting_test.cc b/libspu/kernel/hlo/casting_test.cc index 6e59b4b30..415055a42 100644 --- a/libspu/kernel/hlo/casting_test.cc +++ b/libspu/kernel/hlo/casting_test.cc @@ -59,7 +59,8 @@ INSTANTIATE_TEST_SUITE_P( CastingTestInstances, CastingTest, testing::Combine(testing::Values(FieldType::FM64, FieldType::FM128), testing::Values(ProtocolKind::REF2K, ProtocolKind::SEMI2K, - ProtocolKind::ABY3)), + ProtocolKind::ABY3, + ProtocolKind::CHEETAH)), [](const testing::TestParamInfo &p) { return fmt::format("{}x{}", std::get<0>(p.param), std::get<1>(p.param)); }); diff --git a/libspu/kernel/hlo/permute.cc b/libspu/kernel/hlo/permute.cc new file mode 100644 index 000000000..3a6dba272 --- /dev/null +++ b/libspu/kernel/hlo/permute.cc @@ -0,0 +1,56 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/kernel/hlo/permute.h" + +#include "libspu/core/context.h" + +namespace spu::kernel::hlo { + +namespace { + +bool check_permute_kernel(SPUContext* ctx) { + // TODO: Do checks according to visibility of inputs and perm later. + return ctx->hasKernel("rand_perm_m") && ctx->hasKernel("perm_am") && + ctx->hasKernel("perm_ap") && ctx->hasKernel("inv_perm_am") && + ctx->hasKernel("inv_perm_ap"); +} +} // namespace + +std::vector InvPermute(SPUContext* ctx, + absl::Span inputs, + const spu::Value& perm, int64_t perm_dim) { + SPU_ENFORCE(check_permute_kernel(ctx), + "permute related kernel not supported"); + + auto inv_perm_fn = [&](absl::Span input) { + return hal::apply_inv_permute_1d(ctx, input, perm); + }; + + return hal::permute(ctx, inputs, perm_dim, inv_perm_fn); +}; + +std::vector Permute(SPUContext* ctx, + absl::Span inputs, + const spu::Value& perm, int64_t perm_dim) { + SPU_ENFORCE(check_permute_kernel(ctx), + "permute related kernel not supported"); + + auto perm_fn = [&](absl::Span input) { + return hal::apply_permute_1d(ctx, input, perm); + }; + + return hal::permute(ctx, inputs, perm_dim, perm_fn); +} +} // namespace spu::kernel::hlo diff --git a/libspu/kernel/hlo/permute.h b/libspu/kernel/hlo/permute.h new file mode 100644 index 000000000..4f8189a28 --- /dev/null +++ b/libspu/kernel/hlo/permute.h @@ -0,0 +1,48 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "libspu/kernel/hal/permute.h" + +namespace spu::kernel::hlo { + +// Inverse permute vector `inputs` over permutation `perm` +// Let [n] = {0,1,2,...,n-1}, then perm: [n] -> [n] should be an invertible +// permutation, we denote prem^{-1} as its inversion. +// For each input x, we get y = perm^{-1} (x), i.e. y[i] = x[perm^{-1}(i)] +// +// Note: to simplify the implementation, we FORCE the visibility of inputs to be +// the SAME (for Private, the OWNER should also be the SAME). +// IMPORTANT NOTE: when perm is Private (owner i), and inputs include some mix +// of either Secret or Private (with owner j != i), you should Seal the Private +// inputs (with owner j != i) first, and do permute once to improve performance. +std::vector InvPermute(SPUContext* ctx, + absl::Span inputs, + const spu::Value& perm, int64_t perm_dim); + +// Permute vector `inputs` over permutation `perm` +// For each input x, we get y = perm(x), i.e. y[i] = x[perm(i)] +// Note: to simplify the implementation, we force the visibility of inputs to be +// the same (for Private, the owner should also be the same). +// +// Note: to simplify the implementation, we FORCE the visibility of inputs to be +// the SAME (for Private, the OWNER should also be the SAME). +// IMPORTANT NOTE: when perm is Private (owner i), and inputs include some mix +// of either Secret or Private (with owner j != i), you should Seal the Private +// inputs (with owner j != i) first, and do permute once to improve performance. +std::vector Permute(SPUContext* ctx, + absl::Span inputs, + const spu::Value& perm, int64_t perm_dim); +} // namespace spu::kernel::hlo diff --git a/libspu/kernel/hlo/permute_test.cc b/libspu/kernel/hlo/permute_test.cc new file mode 100644 index 000000000..c1b374183 --- /dev/null +++ b/libspu/kernel/hlo/permute_test.cc @@ -0,0 +1,352 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/kernel/hlo/permute.h" + +#include "gtest/gtest.h" +#include "xtensor/xio.hpp" + +#include "libspu/core/encoding.h" +#include "libspu/kernel/hal/constants.h" +#include "libspu/kernel/hlo/casting.h" +#include "libspu/kernel/hlo/const.h" +#include "libspu/kernel/test_util.h" +#include "libspu/mpc/utils/simulate.h" + +namespace spu::kernel::hlo { + +namespace { + +using PermuteFunc = std::function( + SPUContext*, absl::Span inputs, const spu::Value& perm, + int64_t perm_dim)>; + +const FieldType kField = FM64; + +enum class VisType { + VisPriv0 = 0, // private, own by party 0 + VisPriv1 = 1, // private, own by party 1 + VisPub = 2, + VisSec = 3, +}; + +const std::vector kVisTypes = {VisType::VisPub, VisType::VisSec, + VisType::VisPriv0, VisType::VisPriv1}; + +inline std::string get_vis_str(VisType type) { + switch (type) { + case VisType::VisPub: + return "VisPub"; + case VisType::VisSec: + return "VisSec"; + case VisType::VisPriv0: + return "VisPriv0"; + case VisType::VisPriv1: + return "VisPriv1"; + default: + return "Unknown"; + } +} + +bool checkCommFree(VisType x_vis, VisType perm_vis) { + // Permutation is comm. free if: + // 1. perm is Public + // 2. perm is Private, x is Public or Private with same owner + if (perm_vis == VisType::VisPub) { + return true; + } else if (perm_vis == VisType::VisPriv0 && + (x_vis == VisType::VisPriv0 || x_vis == VisType::VisPub)) { + return true; + } else if (perm_vis == VisType::VisPriv1 && + (x_vis == VisType::VisPriv1 || x_vis == VisType::VisPub)) { + return true; + } + + return false; +} + +bool checkSpPass(VisType x_vis, VisType perm_vis) { + // `inv_perm_av` will hit, when: + // 1. perm is Private and x is Secret + // 2. perm is Private and x is Private with different owner + if (perm_vis == VisType::VisPriv0) { + if (x_vis == VisType::VisSec || x_vis == VisType::VisPriv1) { + return true; + } + } else if (perm_vis == VisType::VisPriv1) { + if (x_vis == VisType::VisSec || x_vis == VisType::VisPriv0) { + return true; + } + } + + return false; +} + +Value makeTestValue(SPUContext* ctx, PtBufferView init, VisType vis) { + DataType dtype = getEncodeType(init.pt_type); + + auto res = hal::constant(ctx, init, dtype, {}); + + switch (vis) { + case VisType::VisPub: + return res; + case VisType::VisSec: { + return Seal(ctx, res); + } + case VisType::VisPriv0: { + res = Seal(ctx, res); + return RevealTo(ctx, res, 0); + } + case VisType::VisPriv1: { + res = Seal(ctx, res); + return RevealTo(ctx, res, 1); + } + default: + SPU_THROW("Unknown vis type"); + } +} + +template +xt::xarray evalSinglePermuteOp(SPUContext* ctx, VisType x_vis, + VisType perm_vis, PtBufferView x, + PtBufferView perm, + const PermuteFunc& perm_func, + int64_t perm_dim = 0) { + auto x_v = makeTestValue(ctx, x, x_vis); + auto perm_v = makeTestValue(ctx, perm, perm_vis); + + size_t send_round = ctx->lctx()->GetStats()->sent_actions; + size_t recv_round = ctx->lctx()->GetStats()->recv_actions; + auto perm_ret = perm_func(ctx, {x_v}, perm_v, perm_dim); + send_round = ctx->lctx()->GetStats()->sent_actions - send_round; + recv_round = ctx->lctx()->GetStats()->recv_actions - recv_round; + + // test whether hit the proper kernel. + if (checkCommFree(x_vis, perm_vis)) { + EXPECT_EQ(send_round, 0); + } + if (ctx->hasKernel("inv_perm_av") && checkSpPass(x_vis, perm_vis)) { + auto n_repeat = x_v.shape().numel() / x_v.shape().dim(perm_dim); + // For ss version, at least 3 rounds. + EXPECT_LE(std::min(send_round, recv_round), 2 * n_repeat); + } + EXPECT_EQ(perm_ret.size(), 1); + + auto ret = perm_ret[0]; + if (!ret.isPublic()) { + ret = Reveal(ctx, ret); + } + EXPECT_TRUE(ret.isPublic()); + + return hal::dump_public_as(ctx, ret); +} + +template +std::vector> evalMultiplePermuteOp( + SPUContext* ctx, VisType x_vis, VisType perm_vis, PtBufferView x, + PtBufferView perm, const PermuteFunc& perm_func, int64_t perm_dim = 0) { + std::vector x_vec; + x_vec.reserve(4); + x_vec.push_back(makeTestValue(ctx, x, x_vis)); + x_vec.push_back(makeTestValue(ctx, x, x_vis)); + x_vec.push_back(makeTestValue(ctx, x, x_vis)); + x_vec.push_back(makeTestValue(ctx, x, x_vis)); + + auto perm_v = makeTestValue(ctx, perm, perm_vis); + + auto perm_ret = perm_func(ctx, x_vec, perm_v, perm_dim); + EXPECT_EQ(perm_ret.size(), 4); + + std::vector> ret_vec; + for (auto ret : perm_ret) { + if (!ret.isPublic()) { + ret = Reveal(ctx, ret); + } + EXPECT_TRUE(ret.isPublic()); + ret_vec.push_back(hal::dump_public_as(ctx, ret)); + } + + return ret_vec; +} + +} // namespace + +class PermuteTest : public ::testing::TestWithParam< + std::tuple> {}; + +INSTANTIATE_TEST_SUITE_P( + GeneralPermute, PermuteTest, + testing::Combine(testing::ValuesIn(kVisTypes), // vis of x + testing::ValuesIn(kVisTypes), // vis of perm + testing::Values(SEMI2K, ABY3), // underlying protocol + testing::Values(2, 3) // npc=2 is not valid in ABY3 + ), + [](const testing::TestParamInfo& p) { + return fmt::format("{}x{}x{}x{}", get_vis_str(std::get<0>(p.param)), + get_vis_str(std::get<1>(p.param)), + std::get<2>(p.param), std::get<3>(p.param)); + }); + +TEST_P(PermuteTest, SinglePermuteWork) { + const VisType x_vis = std::get<0>(GetParam()); + const VisType perm_vis = std::get<1>(GetParam()); + const ProtocolKind protocol = std::get<2>(GetParam()); + const size_t npc = std::get<3>(GetParam()); + + if (protocol == ABY3 && npc == 2) { + return; + } + + xt::xarray x = {10, 0, 2, 3, 9, 1, 5, 6}; + xt::xarray perm = {2, 7, 1, 6, 0, 4, 3, 5}; + + xt::xarray expected_inv_perm = {9, 2, 10, 5, 1, 6, 3, 0}; + xt::xarray expected_perm = {2, 6, 0, 5, 10, 9, 3, 1}; + + mpc::utils::simulate( + npc, [&](const std::shared_ptr& lctx) { + SPUContext sctx = test::makeSPUContext(protocol, kField, lctx); + + // test of inv_permute + auto inv_perm_ret = evalSinglePermuteOp(&sctx, x_vis, perm_vis, + x, perm, InvPermute); + EXPECT_TRUE(xt::allclose(expected_inv_perm, inv_perm_ret, 0.001, 0.001)) + << expected_inv_perm << std::endl + << inv_perm_ret << std::endl; + + // test of permute + auto perm_ret = evalSinglePermuteOp(&sctx, x_vis, perm_vis, x, + perm, Permute); + EXPECT_TRUE(xt::allclose(expected_perm, perm_ret, 0.001, 0.001)) + << expected_perm << std::endl + << perm_ret << std::endl; + }); +} + +TEST_P(PermuteTest, PermDimWork) { + const VisType x_vis = std::get<0>(GetParam()); + const VisType perm_vis = std::get<1>(GetParam()); + const ProtocolKind protocol = std::get<2>(GetParam()); + const size_t npc = std::get<3>(GetParam()); + + if (protocol == ABY3 && npc == 2) { + return; + } + + xt::xarray x = {{10, 0, 2, 3, 9, 1, 5, 6}, + {-10, 0, -2, -3, -9, -1, -5, -6}}; + xt::xarray perm = {2, 7, 1, 6, 0, 4, 3, 5}; + + xt::xarray expected_inv_perm = {{9, 2, 10, 5, 1, 6, 3, 0}, + {-9, -2, -10, -5, -1, -6, -3, -0}}; + xt::xarray expected_perm = {{2, 6, 0, 5, 10, 9, 3, 1}, + {-2, -6, -0, -5, -10, -9, -3, -1}}; + + mpc::utils::simulate( + npc, [&](const std::shared_ptr& lctx) { + SPUContext sctx = test::makeSPUContext(protocol, kField, lctx); + + // test of inv_permute + auto inv_perm_ret = evalSinglePermuteOp( + &sctx, x_vis, perm_vis, x, perm, InvPermute, /*perm_dim*/ 1); + EXPECT_TRUE(xt::allclose(expected_inv_perm, inv_perm_ret, 0.001, 0.001)) + << expected_inv_perm << std::endl + << inv_perm_ret << std::endl; + + // test of permute + auto perm_ret = evalSinglePermuteOp( + &sctx, x_vis, perm_vis, x, perm, Permute, /*perm_dim*/ 1); + EXPECT_TRUE(xt::allclose(expected_perm, perm_ret, 0.001, 0.001)) + << expected_perm << std::endl + << perm_ret << std::endl; + }); +} + +TEST_P(PermuteTest, MultiplePermuteWork) { + const VisType x_vis = std::get<0>(GetParam()); + const VisType perm_vis = std::get<1>(GetParam()); + const ProtocolKind protocol = std::get<2>(GetParam()); + const size_t npc = std::get<3>(GetParam()); + + if (protocol == ABY3 && npc == 2) { + return; + } + + xt::xarray x = {10, 0, 2, 3, 9, 1, 5, 6}; + xt::xarray perm = {2, 7, 1, 6, 0, 4, 3, 5}; + + xt::xarray expected_inv_perm = {9, 2, 10, 5, 1, 6, 3, 0}; + xt::xarray expected_perm = {2, 6, 0, 5, 10, 9, 3, 1}; + + mpc::utils::simulate( + npc, [&](const std::shared_ptr& lctx) { + SPUContext sctx = test::makeSPUContext(protocol, kField, lctx); + + // test of inv_permute + auto inv_perm_ret_vec = evalMultiplePermuteOp( + &sctx, x_vis, perm_vis, x, perm, InvPermute); + for (const auto& inv_perm_ret : inv_perm_ret_vec) { + EXPECT_TRUE( + xt::allclose(expected_inv_perm, inv_perm_ret, 0.001, 0.001)) + << expected_inv_perm << std::endl + << inv_perm_ret << std::endl; + } + + // test of permute + auto perm_ret_vec = evalMultiplePermuteOp( + &sctx, x_vis, perm_vis, x, perm, Permute); + for (const auto& perm_ret : perm_ret_vec) { + EXPECT_TRUE(xt::allclose(expected_perm, perm_ret, 0.001, 0.001)) + << expected_perm << std::endl + << perm_ret << std::endl; + } + }); +} + +class PermuteEmptyTest : public ::testing::TestWithParam {}; + +INSTANTIATE_TEST_SUITE_P( + PermuteEmpty, PermuteEmptyTest, + testing::Values(ProtocolKind::SEMI2K, ProtocolKind::ABY3), + [](const testing::TestParamInfo& p) { + return fmt::format("{}", p.param); + }); + +TEST_P(PermuteEmptyTest, Empty) { + ProtocolKind prot = GetParam(); + + mpc::utils::simulate( + 3, [&](const std::shared_ptr& lctx) { + SPUContext sctx = test::makeSPUContext(prot, kField, lctx); + + auto empty_x = + Seal(&sctx, Constant(&sctx, static_cast(1), {0})); + auto empty_perm = + Seal(&sctx, Constant(&sctx, static_cast(0), {0})); + + auto empty_inv_perm_x = InvPermute(&sctx, {empty_x}, empty_perm, 0); + EXPECT_EQ(empty_inv_perm_x.size(), 1); + EXPECT_EQ(empty_inv_perm_x[0].numel(), 0); + EXPECT_EQ(empty_inv_perm_x[0].shape().size(), 1); + EXPECT_EQ(empty_inv_perm_x[0].shape()[0], 0); + + auto empty_perm_x = Permute(&sctx, {empty_x}, empty_perm, 0); + EXPECT_EQ(empty_perm_x.size(), 1); + EXPECT_EQ(empty_perm_x[0].numel(), 0); + EXPECT_EQ(empty_perm_x[0].shape().size(), 1); + EXPECT_EQ(empty_perm_x[0].shape()[0], 0); + }); +} + +} // namespace spu::kernel::hlo diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_stream.h b/libspu/kernel/hlo/soprf.cc similarity index 58% rename from libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_stream.h rename to libspu/kernel/hlo/soprf.cc index 04dcc88e0..953732420 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_stream.h +++ b/libspu/kernel/hlo/soprf.cc @@ -12,20 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once +#include "libspu/kernel/hlo/soprf.h" -#include +#include "libspu/kernel/hlo/geometrical.h" -namespace spu::mpc::semi2k::beaver::ttp_server { +namespace spu::kernel::hlo { -constexpr size_t kReplayChunkSize = 50 * 1024 * 1024; // bytes +Value SoPrf(SPUContext* ctx, const Value& x) { return hal::soprf(ctx, x); } -constexpr size_t kUpStreamChunkSize = 50 * 1024 * 1024; // bytes -constexpr size_t kDownStreamChunkSize = 50 * 1024 * 1024; // bytes +Value SoPrf(SPUContext* ctx, absl::Span inputs) { + return hal::soprf(ctx, inputs); +} -// A list of buffer streams -struct BeaverDownStreamMeta { - int32_t err_code = 0; -}; - -} // namespace spu::mpc::semi2k::beaver::ttp_server \ No newline at end of file +} // namespace spu::kernel::hlo diff --git a/libspu/kernel/hlo/soprf.h b/libspu/kernel/hlo/soprf.h new file mode 100644 index 000000000..1a673b337 --- /dev/null +++ b/libspu/kernel/hlo/soprf.h @@ -0,0 +1,37 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "libspu/kernel/hal/soprf.h" + +namespace spu::kernel::hlo { + +// shared oblivious PRF +// ret = PRF(x, key), but with x, key in secret share. +// However, for safety, key should be 128 bits long, but `x` may be 64 bits or +// even 32 bits, it's hard to pass another `key` param with FM128, so we just +// generate a shared key inside the kernel. +// TODO: add `key` as a param +Value SoPrf(SPUContext* ctx, const Value& x); + +// Multi-Key version of shared oblivious PRF +// We use the scheme in: +// REF: https://eprint.iacr.org/2019/518 +// +// Warning: There may exist collision if you feed too many keys, although we +// limit the probability to be less than 2^{-40} in almost situations; +Value SoPrf(SPUContext* ctx, absl::Span inputs); + +} // namespace spu::kernel::hlo diff --git a/libspu/kernel/hlo/soprf_test.cc b/libspu/kernel/hlo/soprf_test.cc new file mode 100644 index 000000000..6c4534421 --- /dev/null +++ b/libspu/kernel/hlo/soprf_test.cc @@ -0,0 +1,120 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/kernel/hlo/soprf.h" + +#include "gtest/gtest.h" +#include "xtensor/xio.hpp" + +#include "libspu/kernel/hlo/casting.h" +#include "libspu/kernel/hlo/const.h" +#include "libspu/kernel/test_util.h" +#include "libspu/mpc/utils/ring_ops.h" +#include "libspu/mpc/utils/simulate.h" + +namespace spu::kernel::hlo { + +class SoPrfTest + : public ::testing::TestWithParam> {}; + +INSTANTIATE_TEST_SUITE_P( + SoPrfTestInstances, SoPrfTest, + testing::Combine(testing::Values(FieldType::FM64, FieldType::FM128), + testing::Values(ProtocolKind::SEMI2K)), + [](const testing::TestParamInfo &p) { + return fmt::format("{}x{}", std::get<0>(p.param), std::get<1>(p.param)); + }); + +TEST_P(SoPrfTest, EmptyWork) { + FieldType field = std::get<0>(GetParam()); + ProtocolKind prot = std::get<1>(GetParam()); + + mpc::utils::simulate( + 3, [&](const std::shared_ptr &lctx) { + SPUContext sctx = test::makeSPUContext(prot, field, lctx); + + auto empty_x = Seal(&sctx, Constant(&sctx, 1, {0})); + auto empty_ret = SoPrf(&sctx, empty_x); + + EXPECT_EQ(empty_ret.numel(), 0); + EXPECT_EQ(empty_ret.shape().size(), 1); + EXPECT_EQ(empty_ret.shape()[0], 0); + }); +} + +TEST_P(SoPrfTest, Work) { + FieldType field = std::get<0>(GetParam()); + ProtocolKind prot = std::get<1>(GetParam()); + + mpc::utils::simulate( + 3, [&](const std::shared_ptr &lctx) { + SPUContext sctx = test::makeSPUContext(prot, field, lctx); + + const Shape shape = {20, 17}; + xt::xarray x = xt::random::randint(shape, 0); + xt::xarray y = xt::random::randint(shape, 0); + + auto x_share = Seal(&sctx, Constant(&sctx, x, shape)); + auto y_share = Seal(&sctx, Constant(&sctx, y, shape)); + + auto ret_x = SoPrf(&sctx, x_share); + auto ret_y = SoPrf(&sctx, y_share); + EXPECT_EQ(ret_x.shape(), shape); + EXPECT_EQ(ret_x.shape(), ret_y.shape()); + + auto ret_x_pub = Reveal(&sctx, ret_x); + auto ret_y_pub = Reveal(&sctx, ret_y); + + EXPECT_FALSE(mpc::ring_all_equal(ret_x_pub.data(), ret_y_pub.data())); + }); +} + +class MultiKeySoPrfTest : public ::testing::TestWithParam< + std::tuple> {}; + +INSTANTIATE_TEST_SUITE_P( + MultiKeySoPrfTestInstances, MultiKeySoPrfTest, + testing::Combine(testing::Values(FieldType::FM64, FieldType::FM128), + testing::Values(ProtocolKind::SEMI2K), + testing::Values(1, 2, 4)), // num of keys + [](const testing::TestParamInfo &p) { + return fmt::format("{}x{}x{}", std::get<0>(p.param), std::get<1>(p.param), + std::get<2>(p.param)); + }); + +TEST_P(MultiKeySoPrfTest, Work) { + FieldType field = std::get<0>(GetParam()); + ProtocolKind prot = std::get<1>(GetParam()); + size_t num_keys = std::get<2>(GetParam()); + + mpc::utils::simulate( + 3, [&](const std::shared_ptr &lctx) { + SPUContext sctx = test::makeSPUContext(prot, field, lctx); + + const Shape shape = {20, 17}; + std::vector inputs; + inputs.reserve(num_keys); + for (size_t i = 0; i < num_keys; ++i) { + xt::xarray tmp = xt::random::randint(shape, 0); + auto v = Seal(&sctx, Constant(&sctx, tmp, shape)); + inputs.push_back(v); + } + + auto ret = SoPrf(&sctx, absl::MakeSpan(inputs)); + + EXPECT_EQ(ret.shape(), shape); + }); +} + +} // namespace spu::kernel::hlo diff --git a/libspu/kernel/hlo/sort_test.cc b/libspu/kernel/hlo/sort_test.cc index cabcb4fdc..89519b9e6 100644 --- a/libspu/kernel/hlo/sort_test.cc +++ b/libspu/kernel/hlo/sort_test.cc @@ -27,7 +27,14 @@ #include "libspu/kernel/hlo/casting.h" #include "libspu/kernel/hlo/const.h" #include "libspu/kernel/test_util.h" +#include "libspu/mpc/utils/simulate.h" +// to print method name +std::ostream &operator<<(std::ostream &os, + spu::RuntimeConfig_SortMethod method) { + os << spu::RuntimeConfig::SortMethod_Name(method); + return os; +} namespace spu::kernel::hlo { TEST(SortTest, Simple) { @@ -228,34 +235,125 @@ TEST(SortTest, LargeNumel) { } } -TEST(SimpleSortTest, MultiOperands) { - SPUContext ctx = test::makeSPUContext(); - xt::xarray k1 = {7, 6, 5, 5, 4, 4, 4, 1, 3, 3}; - xt::xarray k2 = {1, 2, 3, 6, 7, 6, 5, 2, 1, 2}; +class SimpleSortTest + : public ::testing::TestWithParam> {}; - xt::xarray sorted_k1 = {1, 3, 3, 4, 4, 4, 5, 5, 6, 7}; - xt::xarray sorted_k2 = {2, 1, 2, 5, 6, 7, 3, 6, 2, 1}; +TEST_P(SimpleSortTest, MultiOperands) { + size_t npc = std::get<0>(GetParam()); + FieldType field = std::get<1>(GetParam()); + ProtocolKind prot = std::get<2>(GetParam()); + RuntimeConfig::SortMethod method = std::get<3>(GetParam()); - Value k1_v = test::makeValue(&ctx, k1, VIS_SECRET); - Value k2_v = test::makeValue(&ctx, k2, VIS_SECRET); + mpc::utils::simulate( + npc, [&](const std::shared_ptr &lctx) { + RuntimeConfig cfg; + cfg.set_protocol(prot); + cfg.set_field(field); + cfg.set_enable_action_trace(false); + cfg.set_sort_method(method); + SPUContext ctx = test::makeSPUContext(cfg, lctx); - std::vector rets = - SimpleSort(&ctx, {k1_v, k2_v}, 0, hal::SortDirection::Ascending, 2); + xt::xarray k1 = {7, 6, 5, 5, 4, 4, 4, 1, 3, 3}; + xt::xarray k2 = {1, 2, 3, 6, 7, 6, 5, 2, 1, 2}; - EXPECT_EQ(rets.size(), 2); + xt::xarray sorted_k1 = {1, 3, 3, 4, 4, 4, 5, 5, 6, 7}; + xt::xarray sorted_k2 = {2, 1, 2, 5, 6, 7, 3, 6, 2, 1}; - auto sorted_k1_hat = - hal::dump_public_as(&ctx, hal::reveal(&ctx, rets[0])); - auto sorted_k2_hat = - hal::dump_public_as(&ctx, hal::reveal(&ctx, rets[1])); + Value k1_v = test::makeValue(&ctx, k1, VIS_SECRET); + Value k2_v = test::makeValue(&ctx, k2, VIS_SECRET); - EXPECT_TRUE(xt::allclose(sorted_k1, sorted_k1_hat, 0.01, 0.001)) - << sorted_k1 << std::endl - << sorted_k1_hat << std::endl; + std::vector rets = + SimpleSort(&ctx, {k1_v, k2_v}, 0, hal::SortDirection::Ascending, 2); - EXPECT_TRUE(xt::allclose(sorted_k2, sorted_k2_hat, 0.01, 0.001)) - << sorted_k2 << std::endl - << sorted_k2_hat << std::endl; + EXPECT_EQ(rets.size(), 2); + + auto sorted_k1_hat = + hal::dump_public_as(&ctx, hal::reveal(&ctx, rets[0])); + auto sorted_k2_hat = + hal::dump_public_as(&ctx, hal::reveal(&ctx, rets[1])); + + EXPECT_TRUE(xt::allclose(sorted_k1, sorted_k1_hat, 0.01, 0.001)) + << sorted_k1 << std::endl + << sorted_k1_hat << std::endl; + + EXPECT_TRUE(xt::allclose(sorted_k2, sorted_k2_hat, 0.01, 0.001)) + << sorted_k2 << std::endl + << sorted_k2_hat << std::endl; + }); } +TEST_P(SimpleSortTest, SingleKeyWithPayload) { + size_t npc = std::get<0>(GetParam()); + FieldType field = std::get<1>(GetParam()); + ProtocolKind prot = std::get<2>(GetParam()); + RuntimeConfig::SortMethod method = std::get<3>(GetParam()); + + mpc::utils::simulate( + npc, [&](const std::shared_ptr &lctx) { + RuntimeConfig cfg; + cfg.set_protocol(prot); + cfg.set_field(field); + cfg.set_enable_action_trace(false); + cfg.set_sort_method(method); + SPUContext ctx = test::makeSPUContext(cfg, lctx); + + xt::xarray k1 = {7, 6, 5, 4, 1, 3, 2}; + xt::xarray k2 = {1, 2, 3, 6, 7, 6, 5}; + + xt::xarray sorted_k1 = {1, 2, 3, 4, 5, 6, 7}; + xt::xarray sorted_k2 = {7, 5, 6, 6, 3, 2, 1}; + + Value k1_v = test::makeValue(&ctx, k1, VIS_SECRET); + Value k2_v = test::makeValue(&ctx, k2, VIS_SECRET); + + std::vector rets = + SimpleSort(&ctx, {k1_v, k2_v}, 0, hal::SortDirection::Ascending, 1); + + EXPECT_EQ(rets.size(), 2); + + auto sorted_k1_hat = + hal::dump_public_as(&ctx, hal::reveal(&ctx, rets[0])); + auto sorted_k2_hat = + hal::dump_public_as(&ctx, hal::reveal(&ctx, rets[1])); + + EXPECT_TRUE(xt::allclose(sorted_k1, sorted_k1_hat, 0.01, 0.001)) + << sorted_k1 << std::endl + << sorted_k1_hat << std::endl; + + EXPECT_TRUE(xt::allclose(sorted_k2, sorted_k2_hat, 0.01, 0.001)) + << sorted_k2 << std::endl + << sorted_k2_hat << std::endl; + }); +} + +INSTANTIATE_TEST_SUITE_P( + SimpleSort2PCTestInstances, SimpleSortTest, + testing::Combine( + testing::Values(2), testing::Values(FieldType::FM32, FieldType::FM64), + testing::Values(ProtocolKind::SEMI2K, ProtocolKind::CHEETAH), + testing::Values(RuntimeConfig::SORT_DEFAULT, RuntimeConfig::SORT_RADIX, + RuntimeConfig::SORT_QUICK, + RuntimeConfig::SORT_NETWORK)), + [](const testing::TestParamInfo &p) { + return fmt::format("{}x{}x{}x{}", std::get<0>(p.param), + std::get<1>(p.param), std::get<2>(p.param), + std::get<3>(p.param)); + }); + +INSTANTIATE_TEST_SUITE_P( + SimpleSort3PCTestInstances, SimpleSortTest, + testing::Combine(testing::Values(3), + testing::Values(FieldType::FM32, FieldType::FM64), + testing::Values(ProtocolKind::SEMI2K, ProtocolKind::ABY3), + testing::Values(RuntimeConfig::SORT_DEFAULT, + RuntimeConfig::SORT_RADIX, + RuntimeConfig::SORT_QUICK, + RuntimeConfig::SORT_NETWORK)), + [](const testing::TestParamInfo &p) { + return fmt::format("{}x{}x{}x{}", std::get<0>(p.param), + std::get<1>(p.param), std::get<2>(p.param), + std::get<3>(p.param)); + }); + } // namespace spu::kernel::hlo diff --git a/libspu/mpc/BUILD.bazel b/libspu/mpc/BUILD.bazel index ffced4cb6..712284906 100644 --- a/libspu/mpc/BUILD.bazel +++ b/libspu/mpc/BUILD.bazel @@ -35,7 +35,7 @@ spu_cc_library( ":io_interface", "//libspu/mpc/utils:ring_ops", "//libspu/mpc/utils:simulate", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], alwayslink = True, ) @@ -96,7 +96,7 @@ spu_cc_library( "//libspu/mpc:kernel", "//libspu/mpc/common:communicator", "//libspu/mpc/utils:simulate", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], alwayslink = True, ) @@ -122,7 +122,7 @@ spu_cc_library( "//libspu/mpc:api_test_params", "//libspu/mpc/common:communicator", "//libspu/mpc/utils:simulate", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], alwayslink = True, ) diff --git a/libspu/mpc/aby3/boolean.cc b/libspu/mpc/aby3/boolean.cc index 96033cc56..d83066a33 100644 --- a/libspu/mpc/aby3/boolean.cc +++ b/libspu/mpc/aby3/boolean.cc @@ -64,6 +64,36 @@ NdArrayRef CastTypeB::proc(KernelEvalContext*, const NdArrayRef& in, return out; } +NdArrayRef RandB::proc(KernelEvalContext* ctx, const Shape& shape) const { + auto* prg_state = ctx->getState(); + const auto field = ctx->getState()->getDefaultField(); + + return DISPATCH_ALL_FIELDS(field, [&]() { + auto [r0, r1] = + prg_state->genPrssPair(field, shape, PrgState::GenPrssCtrl::Both); + // only rand bit is supported + const size_t nbits = 1; + const PtType btype = calcBShareBacktype(nbits); + + NdArrayView _r0(r0); + NdArrayView _r1(r1); + return DISPATCH_UINT_PT_TYPES(btype, [&]() { + using bshr_el_t = ScalarT; + using bshr_t = std::array; + + NdArrayRef out(makeType(btype, nbits), shape); + NdArrayView _out(out); + + pforeach(0, shape.numel(), [&](int64_t idx) { + _out[idx][0] = static_cast(_r0[idx] & 1); + _out[idx][1] = static_cast(_r1[idx] & 1); + }); + + return out; + }); + }); +} + NdArrayRef B2P::proc(KernelEvalContext* ctx, const NdArrayRef& in) const { auto* comm = ctx->getState(); const PtType btype = in.eltype().as()->getBacktype(); diff --git a/libspu/mpc/aby3/boolean.h b/libspu/mpc/aby3/boolean.h index dac53b108..b5a0d9584 100644 --- a/libspu/mpc/aby3/boolean.h +++ b/libspu/mpc/aby3/boolean.h @@ -43,6 +43,17 @@ class CastTypeB : public CastTypeKernel { const Type& to_type) const override; }; +class RandB : public RandKernel { + public: + static constexpr const char* kBindName() { return "rand_b"; }; + + ce::CExpr latency() const override { return ce::Const(0); } + + ce::CExpr comm() const override { return ce::Const(0); } + + NdArrayRef proc(KernelEvalContext* ctx, const Shape& shape) const override; +}; + class B2P : public UnaryKernel { public: static constexpr const char* kBindName() { return "b2p"; } diff --git a/libspu/mpc/aby3/protocol.cc b/libspu/mpc/aby3/protocol.cc index 9ca2e0199..e36a32732 100644 --- a/libspu/mpc/aby3/protocol.cc +++ b/libspu/mpc/aby3/protocol.cc @@ -67,7 +67,7 @@ void regAby3Protocol(SPUContext* ctx, aby3::XorBP, aby3::XorBB, // Xor aby3::BitrevB, // bitreverse aby3::BitIntlB, aby3::BitDeintlB, // bit(de)interleave - aby3::RandA, // rand + aby3::RandA, aby3::RandB, // rand #ifdef ENABLE_PRECISE_ABY3_TRUNCPR aby3::TruncAPr, // Trunc #else diff --git a/libspu/mpc/api.cc b/libspu/mpc/api.cc index 3404f00f6..b61a5d3ed 100644 --- a/libspu/mpc/api.cc +++ b/libspu/mpc/api.cc @@ -270,10 +270,14 @@ Value rand_p(SPUContext* ctx, const Shape& shape) { FORCE_DISPATCH(ctx, shape); } -Value rand_s(SPUContext* ctx, const Shape& shape) { +Value rand_s(SPUContext* ctx, const Shape& shape, DataType dtype) { SPU_TRACE_MPC_DISP(ctx, shape); TRY_DISPATCH(ctx, shape); - // always return random a share + // can only get random bit share now. + if (dtype == DT_I1) { + return rand_b(ctx, shape); + } + // else, return random a share return rand_a(ctx, shape); } diff --git a/libspu/mpc/api.h b/libspu/mpc/api.h index 50656f9ad..882bf1002 100644 --- a/libspu/mpc/api.h +++ b/libspu/mpc/api.h @@ -89,7 +89,7 @@ Value make_p(SPUContext* ctx, uint128_t init, const Shape& shape); // parties random a public together. Value rand_p(SPUContext* ctx, const Shape& shape); -Value rand_s(SPUContext* ctx, const Shape& shape); +Value rand_s(SPUContext* ctx, const Shape& shape, DataType dtype); // Compute bitwise not of a value. Value not_p(SPUContext* ctx, const Value& x); diff --git a/libspu/mpc/cheetah/arith/BUILD.bazel b/libspu/mpc/cheetah/arith/BUILD.bazel index 809f63cd8..a61726a0b 100644 --- a/libspu/mpc/cheetah/arith/BUILD.bazel +++ b/libspu/mpc/cheetah/arith/BUILD.bazel @@ -83,7 +83,7 @@ spu_cc_test( srcs = ["matmat_prot_test.cc"], deps = [ ":matmat_prot", - "@com_github_xtensor_xtensor//:xtensor", + "@xtensor", ], ) @@ -94,7 +94,7 @@ spu_cc_test( ":cheetah_mul", "//libspu/mpc/utils:ring_ops", "//libspu/mpc/utils:simulate", - "@com_github_xtensor_xtensor//:xtensor", + "@xtensor", ], ) @@ -106,7 +106,7 @@ spu_cc_test( ":cheetah_dot", "//libspu/mpc/utils:ring_ops", "//libspu/mpc/utils:simulate", - "@com_github_xtensor_xtensor//:xtensor", + "@xtensor", ], ) diff --git a/libspu/mpc/cheetah/boolean.h b/libspu/mpc/cheetah/boolean.h index c3799708e..26c318116 100644 --- a/libspu/mpc/cheetah/boolean.h +++ b/libspu/mpc/cheetah/boolean.h @@ -39,6 +39,17 @@ class CastTypeB : public CastTypeKernel { const Type& to_type) const override; }; +class RandB : public RandKernel { + public: + static constexpr const char* kBindName() { return "rand_b"; }; + + ce::CExpr latency() const override { return ce::Const(0); } + + ce::CExpr comm() const override { return ce::Const(0); } + + NdArrayRef proc(KernelEvalContext* ctx, const Shape& shape) const override; +}; + class B2P : public UnaryKernel { public: static constexpr const char* kBindName() { return "b2p"; } diff --git a/libspu/mpc/cheetah/boolean_semi2k.cc b/libspu/mpc/cheetah/boolean_semi2k.cc index c432e38ab..74c9c6e8a 100644 --- a/libspu/mpc/cheetah/boolean_semi2k.cc +++ b/libspu/mpc/cheetah/boolean_semi2k.cc @@ -61,6 +61,22 @@ NdArrayRef CastTypeB::proc(KernelEvalContext*, const NdArrayRef& in, return in.as(to_type); } +NdArrayRef RandB::proc(KernelEvalContext* ctx, const Shape& shape) const { + auto* prg_state = ctx->getState(); + const auto field = ctx->getState()->getDefaultField(); + + return DISPATCH_ALL_FIELDS(field, [&]() { + auto r = prg_state->genPriv(field, shape); + // only rand bit is supported + const size_t nbits = 1; + NdArrayView _r(r); + + pforeach(0, shape.numel(), [&](int64_t idx) { _r[idx] = _r[idx] & 1; }); + + return makeBShare(r, field, nbits); + }); +} + NdArrayRef B2P::proc(KernelEvalContext* ctx, const NdArrayRef& in) const { const auto field = in.eltype().as()->field(); auto* comm = ctx->getState(); diff --git a/libspu/mpc/cheetah/conversion.cc b/libspu/mpc/cheetah/conversion.cc index c8ec57b37..07ad6a4b8 100644 --- a/libspu/mpc/cheetah/conversion.cc +++ b/libspu/mpc/cheetah/conversion.cc @@ -59,6 +59,10 @@ NdArrayRef A2B::proc(KernelEvalContext* ctx, const NdArrayRef& x) const { NdArrayRef B2A::proc(KernelEvalContext* ctx, const NdArrayRef& x) const { const auto field = ctx->getState()->getDefaultField(); + const auto numel = x.numel(); + if (numel == 0) { // for empty input + return NdArrayRef(makeType(field), x.shape()); + } return TiledDispatchOTFunc( ctx, x, [&](const NdArrayRef& input, diff --git a/libspu/mpc/cheetah/ot/BUILD.bazel b/libspu/mpc/cheetah/ot/BUILD.bazel index 00a1dfc75..30af42854 100644 --- a/libspu/mpc/cheetah/ot/BUILD.bazel +++ b/libspu/mpc/cheetah/ot/BUILD.bazel @@ -29,11 +29,11 @@ spu_cc_library( "//libspu/core:ndarray_ref", "//libspu/core:prelude", "//libspu/mpc/common:communicator", - "@com_google_absl//absl/types:span", + "@abseil-cpp//absl/types:span", "@yacl//yacl/base:int128", ] + select({ "@platforms//cpu:aarch64": [ - "@com_github_dltcollab_sse2neon//:sse2neon", + "@sse2neon", ], "//conditions:default": [], }), diff --git a/libspu/mpc/cheetah/ot/emp/BUILD.bazel b/libspu/mpc/cheetah/ot/emp/BUILD.bazel index e4b753d90..05e1b3c7f 100644 --- a/libspu/mpc/cheetah/ot/emp/BUILD.bazel +++ b/libspu/mpc/cheetah/ot/emp/BUILD.bazel @@ -31,8 +31,8 @@ spu_cc_library( "//libspu/mpc/cheetah/ot:ferret_ot_interface", "//libspu/mpc/cheetah/ot:ot_util", "//libspu/mpc/common:communicator", - "@com_github_emptoolkit_emp_ot//:emp-ot", - "@com_github_emptoolkit_emp_tool//:emp-tool", + "@emp-ot//:emp-ot", + "@emp-tool//:emp-tool", "@yacl//yacl/base:int128", "@yacl//yacl/link", ], diff --git a/libspu/mpc/cheetah/protocol.cc b/libspu/mpc/cheetah/protocol.cc index 8b638d57c..407e6ca2d 100644 --- a/libspu/mpc/cheetah/protocol.cc +++ b/libspu/mpc/cheetah/protocol.cc @@ -79,7 +79,7 @@ void regCheetahProtocol(SPUContext* ctx, cheetah::CommonTypeB, cheetah::CommonTypeV, // cheetah::CastTypeB, cheetah::AndBP, cheetah::AndBB, // cheetah::XorBP, cheetah::XorBB, // - cheetah::RandA>(); + cheetah::RandA, cheetah::RandB>(); } std::unique_ptr makeCheetahProtocol( diff --git a/libspu/mpc/cheetah/rlwe/BUILD.bazel b/libspu/mpc/cheetah/rlwe/BUILD.bazel index 5b08672f0..e97d0b932 100644 --- a/libspu/mpc/cheetah/rlwe/BUILD.bazel +++ b/libspu/mpc/cheetah/rlwe/BUILD.bazel @@ -62,7 +62,7 @@ spu_cc_library( ], deps = [ "//libspu/mpc/utils:ring_ops", - "@com_github_microsoft_seal//:seal", + "@seal", ], ) diff --git a/libspu/mpc/common/communicator.cc b/libspu/mpc/common/communicator.cc index b7dc4089f..315649139 100644 --- a/libspu/mpc/common/communicator.cc +++ b/libspu/mpc/common/communicator.cc @@ -147,17 +147,29 @@ std::vector Communicator::gather(const NdArrayRef& in, size_t root, } NdArrayRef Communicator::broadcast(const NdArrayRef& in, size_t root, + const Type& eltype, const Shape& shape, std::string_view tag) { - const auto array = getOrCreateCompactArray(in); - yacl::ByteContainerView bv(reinterpret_cast(array.data()), - array.elsize() * array.numel()); - auto buf = yacl::link::Broadcast(lctx_, bv, root, tag); - stats_.latency += 1; stats_.comm += in.elsize() * in.numel(); - return NdArrayRef(stealBuffer(std::move(buf)), in.eltype(), in.shape(), - makeCompactStrides(in.shape()), kOffset); + yacl::Buffer buf; + if (lctx_->Rank() == root) { + const auto array = getOrCreateCompactArray(in); + yacl::ByteContainerView bv(reinterpret_cast(array.data()), + array.elsize() * array.numel()); + auto buf = yacl::link::Broadcast(lctx_, bv, root, tag); + return NdArrayRef(stealBuffer(std::move(buf)), in.eltype(), in.shape(), + makeCompactStrides(in.shape()), kOffset); + } else { + // for yacl::link::Broadcast need a legal ByteContainerView + // But the data is not actually used + std::array dummy; + auto buf = yacl::link::Broadcast(lctx_, dummy, root, tag); + SPU_ENFORCE(static_cast(buf.size()) == + shape.numel() * eltype.size()); + return NdArrayRef(stealBuffer(std::move(buf)), eltype, shape, + makeCompactStrides(shape), kOffset); + } } void Communicator::sendAsync(size_t dst_rank, const NdArrayRef& in, diff --git a/libspu/mpc/common/communicator.h b/libspu/mpc/common/communicator.h index f61039376..44a383717 100644 --- a/libspu/mpc/common/communicator.h +++ b/libspu/mpc/common/communicator.h @@ -106,7 +106,8 @@ class Communicator : public State { std::vector gather(const NdArrayRef& in, size_t root, std::string_view tag); - NdArrayRef broadcast(const NdArrayRef& in, size_t root, std::string_view tag); + NdArrayRef broadcast(const NdArrayRef& in, size_t root, const Type& eltype, + const Shape& shape, std::string_view tag); NdArrayRef reduce(ReduceOp op, const NdArrayRef& in, size_t root, std::string_view tag); diff --git a/libspu/mpc/kernel.cc b/libspu/mpc/kernel.cc index 56b5c1bb0..628f949c3 100644 --- a/libspu/mpc/kernel.cc +++ b/libspu/mpc/kernel.cc @@ -275,4 +275,16 @@ void OramReadKernel::evaluate(KernelEvalContext* ctx) const { WrapValue(proc(ctx, UnwrapValue(onehot), UnwrapValue(db), offset))); } +void MultiKeyLowMcKernel::evaluate(KernelEvalContext* ctx) const { + const auto& in = ctx->getParam>(0); + std::vector inputs; + inputs.reserve(in.size()); + for (const auto& item : in) { + inputs.push_back(UnwrapValue(item)); + } + auto y = proc(ctx, inputs); + + ctx->pushOutput(WrapValue(y)); +} + } // namespace spu::mpc diff --git a/libspu/mpc/kernel.h b/libspu/mpc/kernel.h index d75b3426a..90c629dde 100644 --- a/libspu/mpc/kernel.h +++ b/libspu/mpc/kernel.h @@ -225,4 +225,12 @@ class DisassembleKernel : public Kernel { const NdArrayRef& in) const = 0; }; +class MultiKeyLowMcKernel : public Kernel { + public: + void evaluate(KernelEvalContext* ctx) const override; + + virtual NdArrayRef proc(KernelEvalContext* ctx, + const std::vector& inputs) const = 0; +}; + } // namespace spu::mpc diff --git a/libspu/mpc/semi2k/BUILD.bazel b/libspu/mpc/semi2k/BUILD.bazel index dfef1ec7d..661b0f329 100644 --- a/libspu/mpc/semi2k/BUILD.bazel +++ b/libspu/mpc/semi2k/BUILD.bazel @@ -112,6 +112,7 @@ spu_cc_library( ":boolean", ":conversion", ":exp", + ":lowmc", ":permute", ":state", "//libspu/mpc/common:prg_state", @@ -130,6 +131,8 @@ spu_cc_test( "//libspu/mpc:ab_api_test", "//libspu/mpc:api_test", "//libspu/mpc/semi2k/beaver/beaver_impl/ttp_server:beaver_server", + "//libspu/mpc/utils:lowmc", + "@yacl//yacl/utils:elapsed_timer", ], ) @@ -184,3 +187,17 @@ spu_cc_library( "//libspu/mpc/utils:ring_ops", ], ) + +spu_cc_library( + name = "lowmc", + srcs = ["lowmc.cc"], + hdrs = ["lowmc.h"], + deps = [ + ":type", + "//libspu/mpc:ab_api", + "//libspu/mpc:kernel", + "//libspu/mpc/common:prg_state", + "//libspu/mpc/common:pv2k", + "//libspu/mpc/utils:lowmc", + ], +) diff --git a/libspu/mpc/semi2k/beaver/BUILD.bazel b/libspu/mpc/semi2k/beaver/BUILD.bazel index 8fab06b2b..84eb3112a 100644 --- a/libspu/mpc/semi2k/beaver/BUILD.bazel +++ b/libspu/mpc/semi2k/beaver/BUILD.bazel @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("//bazel:spu.bzl", "spu_cc_library", "spu_cc_test") +load("//bazel:spu.bzl", "spu_cc_library") package(default_visibility = ["//visibility:public"]) @@ -25,7 +25,7 @@ spu_cc_library( "//libspu/core:ndarray_ref", "//libspu/core:shape", "//libspu/mpc/common:prg_tensor", - "@com_github_google_leveldb//:leveldb", + "@leveldb", ], ) diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/BUILD.bazel b/libspu/mpc/semi2k/beaver/beaver_impl/BUILD.bazel index 5f0bd1e12..03f1e8702 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/BUILD.bazel +++ b/libspu/mpc/semi2k/beaver/beaver_impl/BUILD.bazel @@ -24,10 +24,10 @@ spu_cc_library( "//libspu/mpc/common:prg_tensor", "//libspu/mpc/semi2k/beaver:beaver_interface", "//libspu/mpc/semi2k/beaver/beaver_impl/trusted_party", - "//libspu/mpc/semi2k/beaver/beaver_impl/ttp_server:beaver_stream", "//libspu/mpc/utils:gfmp_ops", + "//libspu/mpc/utils:permute", "//libspu/mpc/utils:ring_ops", - "@com_github_microsoft_seal//:seal", + "@seal", "@yacl//yacl/link", "@yacl//yacl/utils:parallel", ], @@ -44,7 +44,7 @@ spu_cc_test( "//libspu/mpc/utils:gfmp", "//libspu/mpc/utils:permute", "//libspu/mpc/utils:simulate", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], ) @@ -55,9 +55,9 @@ spu_cc_library( deps = [ "//libspu/mpc/common:prg_tensor", "//libspu/mpc/semi2k/beaver:beaver_interface", - "//libspu/mpc/semi2k/beaver/beaver_impl/ttp_server:beaver_stream", "//libspu/mpc/semi2k/beaver/beaver_impl/ttp_server:service_cc_proto", "//libspu/mpc/utils:gfmp_ops", + "//libspu/mpc/utils:permute", "//libspu/mpc/utils:ring_ops", "@yacl//yacl/crypto/pke:sm2_enc", "@yacl//yacl/link", diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_test.cc b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_test.cc index 300a2f6e7..425fd4fe6 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_test.cc +++ b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_test.cc @@ -1093,22 +1093,20 @@ TEST_P(BeaverTest, PermPair) { const size_t kWorldSize = std::get<1>(GetParam()); const FieldType kField = std::get<2>(GetParam()); const size_t adjust_rank = std::get<4>(GetParam()); - const int64_t kNumel = 10; - std::random_device rd; - uint128_t seed = rd(); - uint64_t ctr = rd(); - const auto r_perm = genRandomPerm(kNumel, seed, &ctr); + const int64_t kNumel = 666 * 1024 + 1; for (size_t r = 0; r < kWorldSize; ++r) { - std::vector pairs(kWorldSize); + std::vector pairs(kWorldSize); + Index perm; utils::simulate( kWorldSize, [&](const std::shared_ptr& lctx) { auto beaver = factory(lctx, ttp_options_, adjust_rank); auto rank = lctx->Rank(); + auto PermPair = beaver->PermPair(kField, kNumel, r); + pairs[lctx->Rank()].first = std::move(std::get<0>(PermPair)); + pairs[lctx->Rank()].second = std::move(std::get<1>(PermPair)); if (rank == r) { - pairs[lctx->Rank()] = beaver->PermPair(kField, kNumel, r, r_perm); - } else { - pairs[lctx->Rank()] = beaver->PermPair(kField, kNumel, r, {}); + perm = std::move(std::get<2>(PermPair)); } yacl::link::Barrier(lctx, "BeaverUT"); }); @@ -1116,7 +1114,7 @@ TEST_P(BeaverTest, PermPair) { EXPECT_EQ(pairs.size(), kWorldSize); auto open = open_buffer(pairs, kField, std::vector(2, {kNumel}), kWorldSize, true); - EXPECT_TRUE(ring_all_equal(applyInvPerm(open[0], r_perm), open[1], 0)); + EXPECT_TRUE(ring_all_equal(applyInvPerm(open[0], perm), open[1], 0)); } } diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.cc b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.cc index f876209d2..a8d19e414 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.cc +++ b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.cc @@ -18,12 +18,14 @@ #include #include "yacl/crypto/rand/rand.h" +#include "yacl/link/algorithm/broadcast.h" #include "yacl/link/algorithm/gather.h" #include "yacl/utils/serialize.h" #include "libspu/mpc/common/prg_tensor.h" #include "libspu/mpc/semi2k/beaver/beaver_impl/trusted_party/trusted_party.h" #include "libspu/mpc/utils/gfmp_ops.h" +#include "libspu/mpc/utils/permute.h" #include "libspu/mpc/utils/ring_ops.h" namespace spu::mpc::semi2k { @@ -373,40 +375,46 @@ BeaverTfpUnsafe::Array BeaverTfpUnsafe::RandBit(FieldType field, int64_t size) { return std::move(*a.buf()); } -BeaverTfpUnsafe::Pair BeaverTfpUnsafe::PermPair( - FieldType field, int64_t size, size_t perm_rank, - absl::Span perm_vec) { +BeaverTfpUnsafe::PremTriple BeaverTfpUnsafe::PermPair(FieldType field, + int64_t size, + size_t perm_rank) { constexpr char kTag[] = "BEAVER_TFP:PERM"; + SPU_ENFORCE(perm_rank < lctx_->WorldSize(), "TODO"); std::vector ops(2); Shape shape({size}); auto a = prgCreateArray(field, shape, seed_, &counter_, &ops[0].desc); auto b = prgCreateArray(field, shape, seed_, &counter_, &ops[1].desc); + Index pi; + + if (lctx_->Rank() == perm_rank) { + pi = genRandomPerm(size, seed_, &counter_); + } if (lctx_->Rank() == 0) { for (auto& op : ops) { op.seeds = seeds_; } - if (perm_rank != lctx_->Rank()) { - auto pv_buf = lctx_->Recv(perm_rank, kTag); - - ring_add_(b, TrustedParty::adjustPerm( - absl::MakeSpan(ops), - absl::MakeSpan(pv_buf.data(), - pv_buf.size() / sizeof(int64_t)))); + if (perm_rank != 0) { + auto pi = genRandomPerm(size, seeds_[perm_rank], &counter_); + ring_add_(b, TrustedParty::adjustPerm(absl::MakeSpan(ops), pi)); } else { - ring_add_(b, TrustedParty::adjustPerm(absl::MakeSpan(ops), perm_vec)); + ring_add_(b, TrustedParty::adjustPerm(absl::MakeSpan(ops), pi)); } - } else if (perm_rank == lctx_->Rank()) { - lctx_->SendAsync( - 0, yacl::Buffer(perm_vec.data(), perm_vec.size() * sizeof(int64_t)), - kTag); } - Pair ret; - ret.first = std::move(*a.buf()); - ret.second = std::move(*b.buf()); + auto new_counter_buf = yacl::link::Broadcast( + lctx_, yacl::SerializeVars(counter_), perm_rank, kTag); + + counter_ = yacl::DeserializeVars(new_counter_buf); + + PremTriple ret; + std::get<0>(ret) = std::move(*a.buf()); + std::get<1>(ret) = std::move(*b.buf()); + if (lctx_->Rank() == perm_rank) { + std::get<2>(ret) = std::move(pi); + } return ret; } diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.h b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.h index 2f26a7168..bd618dd98 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.h +++ b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_tfp.h @@ -66,8 +66,7 @@ class BeaverTfpUnsafe final : public Beaver { Array RandBit(FieldType field, int64_t size) override; - Pair PermPair(FieldType field, int64_t size, size_t perm_rank, - absl::Span perm_vec) override; + PremTriple PermPair(FieldType field, int64_t size, size_t perm_rank) override; std::unique_ptr Spawn() override; diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.cc b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.cc index be2e9e866..496f272cf 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.cc +++ b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.cc @@ -14,19 +14,26 @@ #include "libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.h" +#include #include +#include #include #include +#include "brpc/progressive_reader.h" #include "yacl/crypto/pke/sm2_enc.h" #include "yacl/crypto/rand/rand.h" #include "yacl/link/algorithm/allgather.h" +#include "yacl/link/algorithm/broadcast.h" +#include "yacl/utils/serialize.h" #include "libspu/mpc/common/prg_tensor.h" -#include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_stream.h" #include "libspu/mpc/utils/gfmp_ops.h" +#include "libspu/mpc/utils/permute.h" #include "libspu/mpc/utils/ring_ops.h" +#include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/service.pb.h" + namespace brpc { DECLARE_uint64(max_body_size); @@ -63,7 +70,7 @@ AdjustRequest BuildAdjustRequest( SPU_ENFORCE(!descs.empty()); - uint32_t field_size; + uint32_t field_size = 0; ElementType eltype = ElementType::kRing; for (size_t i = 0; i < descs.size(); i++) { @@ -104,136 +111,205 @@ AdjustRequest BuildAdjustRequest( template struct dependent_false : std::false_type {}; -class StreamReader : public brpc::StreamInputHandler { +// Obtain a tuple containing num_buf and buf_len +template +std::tuple GetBufferLength(const AdjustRequest& req) { + if constexpr (std::is_same_v) { + SPU_ENFORCE_EQ(req.prg_inputs().size(), 3); + return {1, req.prg_inputs()[2].buffer_len()}; + } else if constexpr (std::is_same_v< + AdjustRequest, + beaver::ttp_server::AdjustTruncPrRequest>) { + SPU_ENFORCE_GE(req.prg_inputs().size(), 1); + return {2, req.prg_inputs()[0].buffer_len()}; + } else { + SPU_ENFORCE_GE(req.prg_inputs().size(), 1); + return {1, req.prg_inputs()[0].buffer_len()}; + } +} + +class ProgressiveReader : public brpc::ProgressiveReader { public: - enum class Status : int8_t { - kNotFinished, - kNormalFinished, - kAbnormalFinished, - kStreamFailed, - }; + ProgressiveReader(int32_t num_buf, int64_t buf_len) + : buffer_remain_size_(buf_len * num_buf), - StreamReader(int32_t num_buf, size_t buf_len) { - SPU_ENFORCE(num_buf > 0); - SPU_ENFORCE(buf_len > 0); - buf_vec_.resize(num_buf); - buf_len_ = buf_len; - future_finished_ = promise_finished_.get_future(); - future_closed_ = promise_closed_.get_future(); + receive_buffers_(num_buf) { + for (auto& b : receive_buffers_) { + b.resize(buf_len); + } } - int on_received_messages(brpc::StreamId id, butil::IOBuf* const messages[], - size_t size) override { - SPDLOG_DEBUG("on_received_messages, stream id: {}", id); - for (size_t i = 0; i < size; ++i) { - if (status_ != Status::kNotFinished) { - SPDLOG_ERROR("unexpected messages received"); - return -1; + butil::Status OnReadOnePart(const void* data, size_t length) final { + size_t consumed = 0; + try { + while (consumed < length) { + const auto* consume_data = + reinterpret_cast(data) + consumed; + size_t remain_length = length - consumed; + + if (current_state_ == ReadFlags) { + consumed += copy_to_flags(consume_data, remain_length); + } else if (current_state_ == ReadChunk) { + consumed += copy_to_buffer(consume_data, remain_length); + } else if (current_state_ == ReadError) { + consumed += copy_to_error(consume_data, remain_length); + } else if (current_state_ == End) { + return butil::Status( + -1, "response size mismatch, receive data after end"); + } } - - SPDLOG_DEBUG("receive buf size: {}", messages[i]->size()); - const auto& message = messages[i]; - beaver::ttp_server::BeaverDownStreamMeta meta; - message->copy_to(&meta, sizeof(meta)); - message->pop_front(sizeof(meta)); - if (meta.err_code != 0) { - SPDLOG_ERROR("response error from server, err_code: {}, err_text: {}", - meta.err_code, message->to_string()); - status_ = Status::kAbnormalFinished; - promise_finished_.set_value(status_); - return -2; + if (current_state_ == End && !server_error_msg_.empty()) { + return butil::Status( + -1, + fmt::format("server side error code {}, msg {}", + beaver::ttp_server::ErrorCode_Name(server_error_code_), + server_error_msg_)); } + } catch (const std::exception& e) { + return butil::Status(-1, fmt::format("unexpected error {}", e.what())); + } - SPU_ENFORCE(message->length() % buf_vec_.size() == 0); - size_t msg_len = message->length() / buf_vec_.size(); - for (size_t buf_idx = 0; buf_idx < buf_vec_.size(); ++buf_idx) { - message->append_to(&buf_vec_[buf_idx], msg_len, buf_idx * msg_len); - } + return butil::Status::OK(); + } - SPU_ENFORCE(buf_vec_[0].length() <= buf_len_, - "unexpected bytes received"); - if (buf_vec_[0].length() == buf_len_) { - status_ = Status::kNormalFinished; - promise_finished_.set_value(status_); + void OnEndOfMessage(const butil::Status& status) final { + { + std::lock_guard lk(lock_); + if (current_state_ == End) { + // received all data. + read_status_ = status; + } else if (status.ok()) { + // rpc streaming finished, but we expected more data + read_status_ = + butil::Status(-1, "response size mismatch, need more data"); + } else { + // some error happend in network or OnReadOnePart + read_status_ = status; } } - return 0; + cond_.notify_all(); } - void on_idle_timeout(brpc::StreamId id) override { - SPDLOG_WARN("Stream {} idle timeout", id); + void Wait() { + { + std::unique_lock lk(lock_); + cond_.wait(lk, [this] { return read_status_.has_value(); }); + } + SPU_ENFORCE(read_status_->ok(), "Beaver Streaming data err: {}", + read_status_->error_str()); } - void on_closed(brpc::StreamId id) override { - SPDLOG_DEBUG("Stream {} closed", id); - promise_closed_.set_value(); + std::vector PopBuffer() { + { + std::lock_guard lk(lock_); + SPU_ENFORCE(current_state_ == End, "pls wait streaming finished"); + } + return std::move(receive_buffers_); } - void on_failed(brpc::StreamId id, int error_code, - const std::string& error_text) override { - SPDLOG_ERROR("Stream {} failed, error_code: {}, error_text: {}", id, - error_code, error_text); - status_ = Status::kStreamFailed; - promise_finished_.set_value(status_); + private: + size_t copy_to_flags(const void* data, size_t length) { + size_t cp_size = std::min(flags_.size() - flags_pos_, length); + std::memcpy(flags_.data() + flags_pos_, data, cp_size); + flags_pos_ += cp_size; + if (flags_pos_ == flags_.size()) { + flags_pos_ = 0; + int64_t chunk_size = 0; + std::memcpy(&chunk_size, &flags_[1], sizeof(int64_t)); + chunk_remain_size_ = chunk_size; + if (flags_[0] == 0) { + current_state_ = ReadChunk; + } else if (beaver::ttp_server::ErrorCode_IsValid(flags_[0])) { + server_error_code_ = + static_cast(flags_[0]); + current_state_ = ReadError; + } else { + SPU_THROW("unexpected flags[0] {}", flags_[0]); + } + } + + return cp_size; } - const auto& GetBufVecRef() const { - SPU_ENFORCE(status_ == Status::kNormalFinished); - return buf_vec_; + size_t copy_to_buffer(const void* data, size_t length) { + length = std::min(length, chunk_remain_size_); + chunk_remain_size_ -= length; + if (chunk_remain_size_ == 0) { + current_state_ = ReadFlags; + } + + if (length > buffer_remain_size_) { + SPU_THROW("response size mismatch, too many data for buffer"); + } + + buffer_remain_size_ -= length; + if (buffer_remain_size_ == 0) { + current_state_ = End; + } + + size_t data_pos = 0; + while (data_pos < length) { + if (current_buffer_idx_ >= receive_buffers_.size()) { + SPU_THROW("response size mismatch, outof index"); + } + auto& buffer = receive_buffers_[current_buffer_idx_]; + auto cp_size = std::min(length, buffer.size() - current_buffer_pos_); + std::memcpy(buffer.data() + current_buffer_pos_, + reinterpret_cast(data) + data_pos, cp_size); + current_buffer_pos_ += cp_size; + if (current_buffer_pos_ == static_cast(buffer.size())) { + current_buffer_pos_ = 0; + current_buffer_idx_ += 1; + } + data_pos += cp_size; + } + + return length; } - Status WaitFinished() { return future_finished_.get(); }; + size_t copy_to_error(const void* data, size_t length) { + length = std::min(length, chunk_remain_size_); + chunk_remain_size_ -= length; + if (chunk_remain_size_ == 0) { + current_state_ = End; + } - void WaitClosed() { future_closed_.wait(); } + server_error_msg_.append(reinterpret_cast(data), length); + return length; + } private: - std::vector buf_vec_; - size_t buf_len_; - Status status_ = Status::kNotFinished; - std::promise promise_finished_; - std::promise promise_closed_; - std::future future_finished_; - std::future future_closed_; + enum State : uint8_t { + ReadFlags = 0, + ReadChunk = 1, + ReadError = 2, + End = 3, + }; + State current_state_{ReadFlags}; + size_t flags_pos_{}; + std::array flags_; + size_t chunk_remain_size_{}; + std::string server_error_msg_; + beaver::ttp_server::ErrorCode server_error_code_; + + size_t buffer_remain_size_; + size_t current_buffer_idx_{}; + size_t current_buffer_pos_{}; + std::vector receive_buffers_; + + std::mutex lock_; + std::condition_variable cond_; + std::optional read_status_; }; -// Obtain a tuple containing num_buf and buf_len -template -std::tuple GetBufferLength(const AdjustRequest& req) { - if constexpr (std::is_same_v) { - SPU_ENFORCE_EQ(req.prg_inputs().size(), 3); - return {1, req.prg_inputs()[2].buffer_len()}; - } else if constexpr (std::is_same_v< - AdjustRequest, - beaver::ttp_server::AdjustTruncPrRequest>) { - SPU_ENFORCE_GE(req.prg_inputs().size(), 1); - return {2, req.prg_inputs()[0].buffer_len()}; - } else { - SPU_ENFORCE_GE(req.prg_inputs().size(), 1); - return {1, req.prg_inputs()[0].buffer_len()}; - } -} - template -std::vector RpcCall( - brpc::Channel& channel, AdjustRequest req, FieldType ret_field, - const std::vector* upstream_messages = nullptr) { - brpc::Controller cntl; +std::vector RpcCall(brpc::Channel& channel, + const AdjustRequest& req, FieldType ret_field) { beaver::ttp_server::BeaverService::Stub stub(&channel); beaver::ttp_server::AdjustResponse rsp; - - auto [num_buf, buf_len] = GetBufferLength(req); - StreamReader reader(num_buf, buf_len); - brpc::StreamOptions stream_options; - stream_options.max_buf_size = 2 * beaver::ttp_server::kUpStreamChunkSize; - stream_options.handler = &reader; - brpc::StreamId stream_id; - SPU_ENFORCE_EQ(brpc::StreamCreate(&stream_id, cntl, &stream_options), 0, - "Failed to create stream"); - auto cleanup = absl::MakeCleanup([&stream_id, &reader]() { - SPU_ENFORCE(brpc::StreamClose(stream_id) == 0); - reader.WaitClosed(); - }); + brpc::Controller cntl; + cntl.response_will_be_read_progressively(); if constexpr (std::is_same_v) { @@ -276,35 +352,21 @@ std::vector RpcCall( SPU_ENFORCE(!cntl.Failed(), "Adjust RpcCall failed, code={} error={}", cntl.ErrorCode(), cntl.ErrorText()); - SPU_ENFORCE(rsp.code() == beaver::ttp_server::ErrorCode::OK, - "Adjust server failed code={}, error={}", - ErrorCode_Name(rsp.code()), rsp.message()); - - if (upstream_messages != nullptr) { - for (const auto& message : *upstream_messages) { - int ret = brpc::StreamWrite(stream_id, message); - if (ret == EAGAIN) { - SPU_ENFORCE_EQ(brpc::StreamWait(stream_id, nullptr), 0); - ret = brpc::StreamWrite(stream_id, message); - } - SPU_ENFORCE_EQ(ret, 0, "Write stream failed"); - SPDLOG_DEBUG("write buf size {} to stream id {}", message.length(), - stream_id); - } - } - auto status = reader.WaitFinished(); - SPU_ENFORCE(status == StreamReader::Status::kNormalFinished, - "Stream reader finished abnormally, status: {}", - static_cast(status)); + auto [num_buf, buf_len] = GetBufferLength(req); + ProgressiveReader reader(num_buf, buf_len); + cntl.ReadProgressiveAttachmentBy(&reader); + reader.Wait(); + auto buffers = reader.PopBuffer(); + std::vector ret; - for (const auto& buf : reader.GetBufVecRef()) { - SPU_ENFORCE(buf.length() % SizeOf(ret_field) == 0); - int64_t size = buf.length() / SizeOf(ret_field); + for (auto& buf : buffers) { + SPU_ENFORCE(buf.size() % SizeOf(ret_field) == 0); + int64_t size = buf.size() / SizeOf(ret_field); // FIXME: change beaver interface: change return type to buffer. - NdArrayRef array(makeType(ret_field), {size}); // FIXME: TTP adjuster server and client MUST have same endianness. - buf.copy_to(array.data()); + NdArrayRef array(std::make_shared(std::move(buf)), + makeType(ret_field), {size}); ret.push_back(std::move(array)); } @@ -325,15 +387,18 @@ BeaverTtp::BeaverTtp(std::shared_ptr lctx, Options ops) SPU_ENFORCE(lctx_); { brpc::ChannelOptions brc_options; + SPU_ENFORCE(options_.brpc_channel_protocol == "http" || + options_.brpc_channel_protocol == "h2", + "beaver only support http 1.1 or http 2"); brc_options.protocol = options_.brpc_channel_protocol; - brc_options.connection_type = options_.brpc_channel_connection_type; brc_options.timeout_ms = options_.brpc_timeout_ms; brc_options.max_retry = options_.brpc_max_retry; - // TODO TLS - if (channel_.Init(options_.server_host.c_str(), - options_.brpc_load_balancer_name.c_str(), - &brc_options) != 0) { + if (options_.brpc_ssl_options) { + *brc_options.mutable_ssl_options() = options_.brpc_ssl_options.value(); + } + + if (channel_.Init(options_.server_host.c_str(), &brc_options) != 0) { SPU_THROW("Fail to initialize channel for BeaverTtp, server_host {}", options_.server_host); } @@ -654,9 +719,9 @@ BeaverTtp::Array BeaverTtp::RandBit(FieldType field, int64_t size) { return std::move(*a.buf()); } -BeaverTtp::Pair BeaverTtp::PermPair(FieldType field, int64_t size, - size_t perm_rank, - absl::Span perm_vec) { +BeaverTtp::PremTriple BeaverTtp::PermPair(FieldType field, int64_t size, + size_t perm_rank) { + constexpr char kTag[] = "BEAVER_TFP:PERM"; std::vector descs(2); std::vector> descs_seed(1, encrypted_seeds_); Shape shape({size, 1}); @@ -664,30 +729,34 @@ BeaverTtp::Pair BeaverTtp::PermPair(FieldType field, int64_t size, auto a = prgCreateArray(field, shape, seed_, &counter_, descs.data()); auto b = prgCreateArray(field, shape, seed_, &counter_, &descs[1]); - if (lctx_->Rank() == perm_rank) { + if (lctx_->Rank() == options_.adjust_rank) { auto req = BuildAdjustRequest( descs, descs_seed); - std::vector stream_data; - size_t left_buf_size = perm_vec.size() * sizeof(int64_t); - size_t chunk_idx = 0; - while (left_buf_size > 0) { - using beaver::ttp_server::kUpStreamChunkSize; - size_t cur_chunk_size = std::min(left_buf_size, kUpStreamChunkSize); - stream_data.emplace_back(); - stream_data.back().append(reinterpret_cast(perm_vec.data()) + - (chunk_idx * kUpStreamChunkSize), - cur_chunk_size); - ++chunk_idx; - left_buf_size -= cur_chunk_size; - } - auto adjusts = RpcCall(channel_, req, field, &stream_data); + auto* perm_meta = req.mutable_perm(); + perm_meta->set_prg_count(counter_); + perm_meta->set_size(size); + auto& perm_seed = encrypted_seeds_[perm_rank]; + perm_meta->set_encrypted_seeds(perm_seed.data(), perm_seed.size()); + auto adjusts = RpcCall(channel_, req, field); SPU_ENFORCE_EQ(adjusts.size(), 1U); ring_add_(b, adjusts[0].reshape(b.shape())); } - Pair ret; - ret.first = std::move(*a.buf()); - ret.second = std::move(*b.buf()); + Index pi; + if (lctx_->Rank() == perm_rank) { + pi = genRandomPerm(size, seed_, &counter_); + } + + auto new_counter_buf = yacl::link::Broadcast( + lctx_, yacl::SerializeVars(counter_), perm_rank, kTag); + + counter_ = yacl::DeserializeVars(new_counter_buf); + + PremTriple ret; + std::get<0>(ret) = std::move(*a.buf()); + std::get<1>(ret) = std::move(*b.buf()); + std::get<2>(ret) = std::move(pi); + return ret; } diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.h b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.h index 501d5eac9..adca54760 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.h +++ b/libspu/mpc/semi2k/beaver/beaver_impl/beaver_ttp.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include "brpc/channel.h" #include "yacl/base/buffer.h" @@ -34,17 +35,15 @@ class BeaverTtp final : public Beaver { // asym_crypto_schema: support ["SM2"] // Will support 25519 in the future, after yacl supported it. std::string asym_crypto_schema; - // TODO: Remote Attestation yacl::Buffer server_public_key; size_t adjust_rank; - std::string brpc_channel_protocol = "baidu_std"; - std::string brpc_channel_connection_type = "single"; - std::string brpc_load_balancer_name; - int32_t brpc_timeout_ms = 10 * 1000; + std::string brpc_channel_protocol = "http"; + int32_t brpc_connect_timeout_ms = 10 * 1000; + int32_t brpc_timeout_ms = 300 * 1000; int32_t brpc_max_retry = 5; - // TODO: TLS ops for client/server two-way authentication + std::optional brpc_ssl_options; }; private: @@ -87,8 +86,7 @@ class BeaverTtp final : public Beaver { Array RandBit(FieldType field, int64_t size) override; - Pair PermPair(FieldType field, int64_t size, size_t perm_rank, - absl::Span perm_vec) override; + PremTriple PermPair(FieldType field, int64_t size, size_t perm_rank) override; std::unique_ptr Spawn() override; diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/BUILD.bazel b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/BUILD.bazel index 8ccd5d276..98cb4fef4 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/BUILD.bazel +++ b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/BUILD.bazel @@ -28,9 +28,14 @@ proto_library( srcs = ["service.proto"], ) -spu_cc_library( - name = "beaver_stream", - hdrs = ["beaver_stream.h"], +cc_proto_library( + name = "config_cc_proto", + deps = [":config_proto"], +) + +proto_library( + name = "config_proto", + srcs = ["config.proto"], ) spu_cc_library( @@ -38,10 +43,9 @@ spu_cc_library( srcs = ["beaver_server.cc"], hdrs = ["beaver_server.h"], deps = [ - ":beaver_stream", ":service_cc_proto", "//libspu/mpc/semi2k/beaver/beaver_impl/trusted_party", - "@com_github_brpc_brpc//:brpc", + "@brpc", "@yacl//yacl/crypto/pke:sm2_enc", ], ) @@ -51,6 +55,7 @@ spu_cc_binary( srcs = ["beaver_server_main.cc"], deps = [ ":beaver_server", + ":config_cc_proto", "//libspu/core:logging", ], ) diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.cc b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.cc index 2a5136ec2..5676eb2b1 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.cc +++ b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.cc @@ -14,7 +14,10 @@ #include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h" +#include + #include +#include #include #include @@ -27,7 +30,7 @@ #include "libspu/core/ndarray_ref.h" #include "libspu/mpc/common/prg_tensor.h" #include "libspu/mpc/semi2k/beaver/beaver_impl/trusted_party/trusted_party.h" -#include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_stream.h" +#include "libspu/mpc/utils/permute.h" #include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/service.pb.h" @@ -42,14 +45,22 @@ namespace spu::mpc::semi2k::beaver::ttp_server { namespace { +const int64_t kReplayChunkSize = 32L * 1024 * 1024; + inline size_t CeilDiv(size_t a, size_t b) { return (a + b - 1) / b; } class DecryptError : public yacl::Exception { using yacl::Exception::Exception; }; +struct PermMeta { + uint64_t prg_count; + PrgSeed seed; + int64_t size; +}; + template -std::tuple, +std::tuple, PermMeta, std::vector>, size_t> BuildOperand(const AdjustRequest& req, uint32_t field_size, const std::unique_ptr& decryptor, @@ -150,107 +161,20 @@ BuildOperand(const AdjustRequest& req, uint32_t field_size, } } - return {std::move(ops), std::move(seeds), pad_length}; -} - -std::vector StripNdArray(std::vector& nds, - size_t pad_length) { - std::vector ret; - ret.reserve(nds.size()); - - auto if_pad = [&](NdArrayRef& nd) { - yacl::Buffer buf = std::move(*nd.buf()); - if (pad_length > 0) { - buf.resize(buf.size() - pad_length); - } - return buf; - }; - - for (auto& nd : nds) { - ret.push_back(if_pad(nd)); + PermMeta perm; + if constexpr (std::is_same_v) { + const PrgRandPermMeta& perm_meta = req.perm(); + perm.prg_count = perm_meta.prg_count(); + perm.size = perm_meta.size(); + perm.seed = try_decrypt(perm_meta.encrypted_seeds()); } - return ret; + return {std::move(ops), std::move(perm), std::move(seeds), pad_length}; } template struct dependent_false : std::false_type {}; -class StreamReader : public brpc::StreamInputHandler { - public: - enum class Status : int8_t { - kNotFinished, - kNormalFinished, - kAbnormalFinished, - kStreamFailed, - }; - - explicit StreamReader(size_t total_buf_len) { - total_buf_len_ = total_buf_len; - future_finished_ = promise_finished_.get_future(); - future_closed_ = promise_closed_.get_future(); - } - - int on_received_messages(brpc::StreamId id, butil::IOBuf* const messages[], - size_t size) override { - SPDLOG_DEBUG("on_received_messages, stream id: {}", id); - for (size_t i = 0; i < size; ++i) { - if (status_ != Status::kNotFinished) { - SPDLOG_WARN("unexpected messages received"); - return -1; - } - const auto& message = messages[i]; - SPDLOG_DEBUG("receive buf size: {}", message->size()); - buf_.append(message->movable()); - if (buf_.length() == total_buf_len_) { - status_ = Status::kNormalFinished; - promise_finished_.set_value(status_); - } else if (buf_.length() > total_buf_len_) { - SPDLOG_ERROR("buf length ({}) greater than expected buf size ({})", - buf_.length(), total_buf_len_); - status_ = Status::kAbnormalFinished; - promise_finished_.set_value(status_); - } - } - return 0; - } - - void on_idle_timeout(brpc::StreamId id) override { - SPDLOG_INFO("Stream {} idle timeout", id); - } - - void on_closed(brpc::StreamId id) override { - SPDLOG_DEBUG("Stream {} closed", id); - promise_closed_.set_value(); - } - - void on_failed(brpc::StreamId id, int error_code, - const std::string& error_text) override { - SPDLOG_ERROR("Stream {} failed, error_code: {}, error_text: {}", id, - error_code, error_text); - status_ = Status::kStreamFailed; - promise_finished_.set_value(status_); - } - - const auto& GetBufRef() const { - SPU_ENFORCE(status_ == Status::kNormalFinished); - return buf_; - } - - Status WaitFinished() { return future_finished_.get(); }; - - void WaitClosed() { future_closed_.wait(); } - - private: - butil::IOBuf buf_; - size_t total_buf_len_; - Status status_ = Status::kNotFinished; - std::promise promise_finished_; - std::promise promise_closed_; - std::future future_finished_; - std::future future_closed_; -}; - template size_t GetBufferLength(const AdjustRequest& req) { if constexpr (std::is_same_v buf_vec) { - SPU_ENFORCE(!buf_vec.empty()); - for (size_t idx = 1; idx < buf_vec.size(); ++idx) { - SPU_ENFORCE_EQ(buf_vec[0].size(), buf_vec[idx].size()); - } +void HandleStreamingError( + butil::intrusive_ptr& pa) { + int errsv = errno; + YACL_THROW_IO_ERROR("streaming Write error, errno {}, strerror {}, client {}", + errsv, strerror(errsv), + butil::endpoint2str(pa->remote_side()).c_str()); +} + +void SendStreamData(const std::vector& adjusts, + butil::intrusive_ptr& pa, + int64_t pad_length = 0) { + SPU_ENFORCE(!adjusts.empty()); - size_t chunk_size = kDownStreamChunkSize / buf_vec.size(); // FIXME: TTP adjuster server and client MUST have same endianness. - size_t left_buf_size = buf_vec[0].size(); - int64_t chunk_idx = 0; - while (left_buf_size > 0) { - butil::IOBuf io_buf; - BeaverDownStreamMeta meta; - io_buf.append(&meta, sizeof(meta)); - - size_t cur_chunk_size = std::min(left_buf_size, chunk_size); - for (const auto& buf : buf_vec) { - int ret = io_buf.append(buf.data() + (chunk_idx * chunk_size), - cur_chunk_size); - SPU_ENFORCE_EQ(ret, 0, "Append data to IO buffer failed"); + for (const auto& adjust : adjusts) { + const auto& buf = adjust.buf(); + const auto* data = buf->data(); + const int64_t need_seed = buf->size() - pad_length; + + int64_t pos = 0; + while (pos < need_seed) { + const int64_t send_size = std::min(need_seed - pos, kReplayChunkSize); + std::array flags; + flags[0] = 0; + std::memcpy(&flags[1], &send_size, sizeof(int64_t)); + if (pa->Write(flags.data(), flags.size()) != 0) { + HandleStreamingError(pa); + } + if (pa->Write(data + pos, send_size) != 0) { + HandleStreamingError(pa); + } + pos += send_size; } + } +} - // StreamWrite result cannot be EAGAIN, given that we have not set - // max_buf_size - SPU_ENFORCE_EQ(brpc::StreamWrite(stream_id, io_buf), 0); +void SendError(butil::intrusive_ptr& pa, + ErrorCode code, const std::string& err) { + std::array flags; + int64_t err_size = err.size(); + flags[0] = code; + // FIXME: TTP adjuster server and client MUST have same endianness. + std::memcpy(&flags[1], &err_size, sizeof(int64_t)); - left_buf_size -= cur_chunk_size; - ++chunk_idx; + try { + if (pa->Write(flags.data(), flags.size()) != 0) { + HandleStreamingError(pa); + } + if (pa->Write(err.data(), err.size()) != 0) { + HandleStreamingError(pa); + } + } catch (const std::exception& e) { + // streaming write error, we can do nothing but logging + SPDLOG_ERROR( + "error happend during send error to client, error try to send {}, " + "error happend {}", + err, e.what()); + return; } } template std::vector AdjustImpl(const AdjustRequest& req, absl::Span ops, - StreamReader& stream_reader) { + const PermMeta& perm) { std::vector ret; if constexpr (std::is_same_v) { auto adjust = TrustedParty::adjustMul(ops); @@ -332,14 +285,8 @@ std::vector AdjustImpl(const AdjustRequest& req, auto adjust = TrustedParty::adjustEqz(ops); ret.push_back(std::move(adjust)); } else if constexpr (std::is_same_v) { - auto status = stream_reader.WaitFinished(); - SPU_ENFORCE(status == StreamReader::Status::kNormalFinished, - "Stream reader finished abnormally, status: {}", - static_cast(status)); - const auto& buf = stream_reader.GetBufRef(); - SPU_ENFORCE(buf.length() % sizeof(int64_t) == 0); - std::vector pv(buf.length() / sizeof(int64_t)); - buf.copy_to(pv.data()); + uint64_t prg_count = perm.prg_count; + auto pv = genRandomPerm(perm.size, perm.seed, &prg_count); auto adjust = TrustedParty::adjustPerm(ops, pv); ret.push_back(std::move(adjust)); } else { @@ -352,57 +299,91 @@ std::vector AdjustImpl(const AdjustRequest& req, template void AdjustAndSend( - const AdjustRequest& req, brpc::StreamId stream_id, - StreamReader& stream_reader, + brpc::Controller* cntl, const AdjustRequest* req, + ::google::protobuf::Closure* done, const std::unique_ptr& decryptor) { - size_t field_size; - if constexpr (std::is_same_v) { - field_size = 128 / 8; - } else { - field_size = req.field_size(); - } - ElementType eltype = ElementType::kRing; - // enable eltype for selected requests here - // later all requests may support gfmp - if constexpr (std::is_same_v || - std::is_same_v) { - if (req.element_type() == ElType::GFMP) { - eltype = ElementType::kGfmp; + std::string client_side(butil::endpoint2str(cntl->remote_side()).c_str()); + auto pa = cntl->CreateProgressiveAttachment(); + + std::tuple, PermMeta, + std::vector>, size_t> + adjust_params; + + // AdjustAndSend using streaming send, needs call done before starting + // calculation, done will free req, but calculation needs to use req + // so we make a copy here. + const auto request = *req; + { + brpc::ClosureGuard done_guard(done); + try { + size_t field_size; + if constexpr (std::is_same_v) { + field_size = 128 / 8; + } else { + field_size = request.field_size(); + } + ElementType eltype = ElementType::kRing; + // enable eltype for selected requests here + // later all requests may support gfmp + if constexpr (std::is_same_v || + std::is_same_v) { + if (request.element_type() == ElType::GFMP) { + eltype = ElementType::kGfmp; + } + } + adjust_params = BuildOperand(request, field_size, decryptor, eltype); + } catch (const DecryptError& e) { + auto err = fmt::format("Seed Decrypt error {}", e.what()); + SPDLOG_ERROR("{}, client {}", err, client_side); + SendError(pa, ErrorCode::SeedDecryptError, err); + return; + } catch (const std::exception& e) { + auto err = fmt::format("adjust error {}", e.what()); + SPDLOG_ERROR("{}, client {}", err, client_side); + SendError(pa, ErrorCode::OpAdjustError, err); + return; } } - auto [ops, seeds, pad_length] = - BuildOperand(req, field_size, decryptor, eltype); - - if constexpr (std::is_same_v || - std::is_same_v) { - auto adjusts = AdjustImpl(req, absl::MakeSpan(ops), stream_reader); - auto buf_vec = StripNdArray(adjusts, pad_length); - SendStreamData(stream_id, buf_vec); - return; - } - SPU_ENFORCE_EQ(beaver::ttp_server::kReplayChunkSize % 128, 0U); - SPU_ENFORCE(!ops.empty()); - for (size_t idx = 1; idx < ops.size(); idx++) { - SPU_ENFORCE(ops[0].desc.shape == ops[idx].desc.shape); - } - int64_t left_elements = ops[0].desc.shape.at(0); - int64_t chunk_elements = - beaver::ttp_server::kReplayChunkSize / SizeOf(ops[0].desc.field); - while (left_elements > 0) { - int64_t cur_elements = std::min(left_elements, chunk_elements); - left_elements -= cur_elements; - for (auto& op : ops) { - op.desc.shape[0] = cur_elements; - } - auto adjusts = AdjustImpl(req, absl::MakeSpan(ops), stream_reader); - if (left_elements > 0) { - auto buf_vec = StripNdArray(adjusts, 0); - SendStreamData(stream_id, buf_vec); + try { + auto& [ops, perm, seeds, pad_length] = adjust_params; + if constexpr (std::is_same_v || + std::is_same_v) { + auto adjusts = AdjustImpl(request, absl::MakeSpan(ops), perm); + SendStreamData(adjusts, pa); } else { - auto buf_vec = StripNdArray(adjusts, pad_length); - SendStreamData(stream_id, buf_vec); + SPU_ENFORCE_EQ(beaver::ttp_server::kReplayChunkSize % 128, 0U); + SPU_ENFORCE(!ops.empty()); + for (size_t idx = 1; idx < ops.size(); idx++) { + SPU_ENFORCE(ops[0].desc.shape == ops[idx].desc.shape); + } + int64_t left_elements = ops[0].desc.shape.at(0); + int64_t chunk_elements = + beaver::ttp_server::kReplayChunkSize / SizeOf(ops[0].desc.field); + while (left_elements > 0) { + int64_t cur_elements = std::min(left_elements, chunk_elements); + left_elements -= cur_elements; + for (auto& op : ops) { + op.desc.shape[0] = cur_elements; + } + auto adjusts = AdjustImpl(request, absl::MakeSpan(ops), perm); + if (left_elements > 0) { + SendStreamData(adjusts, pa); + } else { + SendStreamData(adjusts, pa, pad_length); + } + } } + } catch (const yacl::IoError& e) { + // streaming write error, we can do nothing but logging + SPDLOG_ERROR(e.what()); + return; + } catch (const std::exception& e) { + // some other error happened, try send to client. + auto err = fmt::format("adjust error {}", e.what()); + SPDLOG_ERROR("{}, client {}", err, client_side); + SendError(pa, ErrorCode::OpAdjustError, err); + return; } } @@ -429,58 +410,7 @@ class ServiceImpl final : public BeaverService { const AdjustRequest* req, AdjustResponse* rsp, ::google::protobuf::Closure* done) const { auto* cntl = static_cast(controller); - std::string client_side(butil::endpoint2str(cntl->remote_side()).c_str()); - brpc::StreamId stream_id = brpc::INVALID_STREAM_ID; - auto request = *req; - StreamReader reader(GetBufferLength(*req)); - - // To address the scenario where clients transmit data after an RPC - // response, give precedence to setting up absl::MakeCleanup before invoking - // brpc::ClosureGuard to ensure proper resource management - auto cleanup = absl::MakeCleanup([&]() { - auto cleanup = absl::MakeCleanup([&]() { - if (stream_id != brpc::INVALID_STREAM_ID) { - // To avoid encountering a core dump, it is essential to close the - // process stream prior to the destruction of the StreamReader object - reader.WaitClosed(); - } - }); - try { - AdjustAndSend(request, stream_id, reader, decryptor_); - } catch (const DecryptError& e) { - auto err = fmt::format("Seed Decrypt error {}", e.what()); - SPDLOG_ERROR("{}, client {}", err, - client_side); // TODO: catch the function name - BeaverDownStreamMeta meta; - meta.err_code = ErrorCode::SeedDecryptError; - butil::IOBuf buf; - SPU_ENFORCE_EQ(buf.append(&meta, sizeof(meta)), 0); - SPU_ENFORCE_EQ(buf.append(err.c_str()), 0); - brpc::StreamWrite(stream_id, buf); - return; - } catch (const std::exception& e) { - auto err = fmt::format("adjust error {}", e.what()); - SPDLOG_ERROR("{}, client {}", err, client_side); - BeaverDownStreamMeta meta; - meta.err_code = ErrorCode::OpAdjustError; - butil::IOBuf buf; - SPU_ENFORCE_EQ(buf.append(&meta, sizeof(meta)), 0); - SPU_ENFORCE_EQ(buf.append(err.c_str()), 0); - brpc::StreamWrite(stream_id, buf); - return; - } - }); - - brpc::ClosureGuard done_guard(done); - brpc::StreamOptions stream_options; - stream_options.max_buf_size = 0; // there is no flow control for downstream - stream_options.handler = &reader; - if (brpc::StreamAccept(&stream_id, *cntl, &stream_options) != 0) { - SPDLOG_ERROR("Failed to accept stream"); - rsp->set_code(ErrorCode::StreamAcceptError); - return; - } - rsp->set_code(ErrorCode::OK); + AdjustAndSend(cntl, req, done, decryptor_); } void AdjustMul(::google::protobuf::RpcController* controller, @@ -558,9 +488,13 @@ std::unique_ptr RunServer(const ServerOptions& options) { return nullptr; } - // TODO: add TLS options for client/server two-way authentication brpc::ServerOptions brpc_options; - brpc_options.has_builtin_services = true; + + if (options.brpc_ssl_options) { + *brpc_options.mutable_ssl_options() = options.brpc_ssl_options.value(); + } + + brpc_options.has_builtin_services = false; if (server->Start(options.port, &brpc_options) != 0) { SPDLOG_ERROR("Fail to start Server"); return nullptr; diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h index 7824057a8..7850d4d35 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h +++ b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include "brpc/server.h" #include "yacl/base/buffer.h" @@ -26,8 +27,8 @@ struct ServerOptions { // asym_crypto_schema: support ["SM2"] // Will support 25519 in the future, after yacl supported it. std::string asym_crypto_schema; - // TODO: Remote Attestation yacl::Buffer server_private_key; + std::optional brpc_ssl_options; }; std::unique_ptr RunServer(const ServerOptions& options); diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server_main.cc b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server_main.cc index 0a701cf3a..fa72b8b54 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server_main.cc +++ b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server_main.cc @@ -13,23 +13,34 @@ // limitations under the License. #include +#include #include "absl/strings/ascii.h" -#include "butil/base64.h" +#include "butil/file_util.h" #include "gflags/gflags.h" +#include "google/protobuf/util/json_util.h" #include "yacl/crypto/key_utils.h" #include "libspu/core/logging.h" #include "libspu/core/prelude.h" #include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h" +#include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/config.pb.h" + +using spu::mpc::semi2k::beaver::ttp_server::TTPServerConfig; + namespace ttp_server_config { DEFINE_bool( gen_key, false, "if true, gen a pair of asym_crypto_schema key in base64, then exit."); DEFINE_string(asym_crypto_schema, "sm2", "asym_crypto_schema: support [\"SM2\"]"); -DEFINE_string(server_private_key, "", "base64ed server_private_key"); +DEFINE_string(public_key_out, "sm2-key.pub", "file path to save public key"); +DEFINE_string(private_key_out, "sm2-key", "file path to save private key"); +DEFINE_string(config_file, "/home/admin/server-config.json", + "server config file, json format, see config.proto"); +DEFINE_string(private_key_file, "/home/admin/server-private-key", + "private key file path"); DEFINE_int32(port, 9449, "TCP Port of this server"); DEFINE_string(log_dir, "logs", "log directory"); DEFINE_bool(enable_console_logger, true, @@ -37,7 +48,6 @@ DEFINE_bool(enable_console_logger, true, DEFINE_int64(max_log_file_size, 100 * 1024 * 1024, "max file size for each log file"); DEFINE_int64(max_log_file_count, 10, "max rotated log files save in dir"); - } // namespace ttp_server_config void SetupLogging() { @@ -55,24 +65,37 @@ void SetupLogging() { void GenKeyPair(const std::string& asym_crypto_schema) { auto lower_schema = absl::AsciiStrToLower(asym_crypto_schema); - std::pair asym_crypto_key; + yacl::crypto::openssl::UniquePkey asym_crypto_key; if (lower_schema == "sm2") { - asym_crypto_key = yacl::crypto::GenSm2KeyPairToPemBuf(); + asym_crypto_key = yacl::crypto::GenSm2KeyPair(); } else { SPU_THROW("not support asym_crypto_schema {}", asym_crypto_schema); } - std::string base64_pk; - std::string base64_sk; - - butil::Base64Encode(std::string(asym_crypto_key.first.data(), - asym_crypto_key.first.size()), - &base64_pk); - butil::Base64Encode(std::string(asym_crypto_key.second.data(), - asym_crypto_key.second.size()), - &base64_sk); - SPDLOG_INFO("\nbase64ed public key:\n{}\n\nbase64ed private key:\n{}\n", - base64_pk, base64_sk); + yacl::crypto::ExportPublicKeyToPemFile( + asym_crypto_key, ttp_server_config::FLAGS_public_key_out); + yacl::crypto::ExportSecretKeyToDerFile( + asym_crypto_key, ttp_server_config::FLAGS_private_key_out); +} + +std::optional ReadServerConfig() { + std::string json; + if (!butil::ReadFileToString( + butil::FilePath(ttp_server_config::FLAGS_config_file), &json)) { + return std::nullopt; + } + + TTPServerConfig config; + auto status = google::protobuf::util::JsonStringToMessage(json, &config); + SPU_ENFORCE(status.ok(), status.ToString()); + + return config; +} + +yacl::Buffer ReadPrivateKey() { + auto private_key = + yacl::crypto::LoadKeyFromFile(ttp_server_config::FLAGS_private_key_file); + return yacl::crypto::ExportSecretKeyToPemBuf(private_key); } int main(int argc, char* argv[]) { @@ -86,19 +109,25 @@ int main(int argc, char* argv[]) { return 0; } - yacl::Buffer decode_private_key; - { - std::string key; - SPU_ENFORCE( - butil::Base64Decode(ttp_server_config::FLAGS_server_private_key, &key)); - decode_private_key = yacl::Buffer(key.data(), key.size()); + spu::mpc::semi2k::beaver::ttp_server::ServerOptions ops; + ops.server_private_key = ReadPrivateKey(); + auto config = ReadServerConfig(); + if (config.has_value()) { + ops.port = config.value().server_port(); + ops.asym_crypto_schema = config.value().asym_crypto_schema(); + if (config->has_ssl()) { + brpc::ServerSSLOptions ssl_options; + ssl_options.default_cert.certificate = config.value().ssl().cert_file(); + ssl_options.default_cert.private_key = config.value().ssl().key_file(); + ssl_options.verify.ca_file_path = config.value().ssl().ca_file(); + ssl_options.verify.verify_depth = config.value().ssl().verify_depth(); + ops.brpc_ssl_options = std::move(ssl_options); + } + } else { + SPDLOG_INFO("Failed to read config file, use command line options"); + ops.port = ttp_server_config::FLAGS_port; + ops.asym_crypto_schema = ttp_server_config::FLAGS_asym_crypto_schema; } - spu::mpc::semi2k::beaver::ttp_server::ServerOptions ops{ - .port = ttp_server_config::FLAGS_port, - .asym_crypto_schema = ttp_server_config::FLAGS_asym_crypto_schema, - .server_private_key = std::move(decode_private_key), - }; - return spu::mpc::semi2k::beaver::ttp_server::RunUntilAskedToQuit(ops); } diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/config.proto b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/config.proto new file mode 100644 index 000000000..456b5cd56 --- /dev/null +++ b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/config.proto @@ -0,0 +1,45 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package spu.mpc.semi2k.beaver.ttp_server; + +option cc_generic_services = true; + +message SSLConfig { + // Certificate file in PEM format + string cert_file = 1; + + // Private key file in PEM format + string key_file = 2; + + // The trusted CA file to verify the peer's certificate + string ca_file = 3; + + // Maximum depth of the certificate chain for verification + // If 0, turn off the verification + int32 verify_depth = 4; +} + +message TTPServerConfig { + // Listening port + int32 server_port = 1; + + // Asymmetric crypto schema, support ["SM2"] + string asym_crypto_schema = 2; + + // Configurations related to SSL + SSLConfig ssl = 3; +} diff --git a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/service.proto b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/service.proto index 23fd30253..0549d005c 100644 --- a/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/service.proto +++ b/libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/service.proto @@ -22,7 +22,6 @@ enum ErrorCode { OK = 0; OpAdjustError = 1; SeedDecryptError = 2; - StreamAcceptError = 3; } // The type of element in the field. @@ -44,6 +43,13 @@ message PrgBufferMeta { bool transpose = 4; } +message PrgRandPermMeta { + uint64 prg_count = 1; + // permutation size + int64 size = 2; + bytes encrypted_seeds = 3; +} + // TTP Beaver service for semi2k only. service BeaverService { // V1 adjust ops @@ -201,13 +207,12 @@ message AdjustPermRequest { repeated PrgBufferMeta prg_inputs = 1; // What field size should be used to interpret buffer content uint32 field_size = 2; + // Rand permutation + PrgRandPermMeta perm = 3; // output - // adjust_b = (apply inverse permutation perm_vec to ra) - rb + // adjust_b = (apply inverse permutation perm to ra) - rb // make - // (adjust_b + rb) = apply inverse permutation perm_vec to ra + // (adjust_b + rb) = apply inverse permutation perm to ra } -message AdjustResponse { - ErrorCode code = 1; - string message = 2; -} +message AdjustResponse {} diff --git a/libspu/mpc/semi2k/beaver/beaver_interface.h b/libspu/mpc/semi2k/beaver/beaver_interface.h index 89c582671..7a7b7f6a2 100644 --- a/libspu/mpc/semi2k/beaver/beaver_interface.h +++ b/libspu/mpc/semi2k/beaver/beaver_interface.h @@ -18,6 +18,7 @@ #include "yacl/base/buffer.h" +#include "libspu/core/shape.h" #include "libspu/mpc/common/prg_tensor.h" #include "libspu/spu.pb.h" @@ -46,6 +47,7 @@ class Beaver { using Array = yacl::Buffer; using Triple = std::tuple; + using PremTriple = std::tuple; using Pair = std::pair; virtual ~Beaver() = default; @@ -85,20 +87,21 @@ class Beaver { // Generate share permutation pair. /* - ┌───────────────────────┐ - │ │ A i - Perm │ Permutation ├─────► - ───────►│ │ B i - │ Pair Generator ├─────► - │ │ + ┌───────────────────────┐ A i + │ ├─────► + size │ Permutation │ B i + ────────►│ ├─────► + │ Pair Generator │ π + │ ├─────► └───────────────────────┘ - InversePerm(A) = B + InversePermute(A, π) = B - if perm_rank == lctx->Rank(); perm not empty. + if rank == perm_rank ret[2] is π, otherwise, ret[2] is empty. + perm_rank should use ret[2] as a Span(buffer, size) view. */ - virtual Pair PermPair(FieldType field, int64_t size, size_t perm_rank, - absl::Span perm_vec) = 0; + virtual PremTriple PermPair(FieldType field, int64_t size, + size_t perm_rank) = 0; virtual std::unique_ptr Spawn() = 0; diff --git a/libspu/mpc/semi2k/boolean.cc b/libspu/mpc/semi2k/boolean.cc index 707eb91a4..a303f7f2d 100644 --- a/libspu/mpc/semi2k/boolean.cc +++ b/libspu/mpc/semi2k/boolean.cc @@ -87,6 +87,22 @@ NdArrayRef CastTypeB::proc(KernelEvalContext*, const NdArrayRef& in, return in.as(to_type); } +NdArrayRef RandB::proc(KernelEvalContext* ctx, const Shape& shape) const { + auto* prg_state = ctx->getState(); + const auto field = ctx->getState()->getDefaultField(); + + return DISPATCH_ALL_FIELDS(field, [&]() { + auto r = prg_state->genPriv(field, shape); + // only rand bit is supported + const size_t nbits = 1; + NdArrayView _r(r); + + pforeach(0, shape.numel(), [&](int64_t idx) { _r[idx] = _r[idx] & 1; }); + + return makeBShare(r, field, nbits); + }); +} + NdArrayRef B2P::proc(KernelEvalContext* ctx, const NdArrayRef& in) const { const auto field = in.eltype().as()->field(); auto* comm = ctx->getState(); @@ -133,10 +149,12 @@ NdArrayRef AndBP::proc(KernelEvalContext* ctx, const NdArrayRef& lhs, NdArrayRef AndBB::proc(KernelEvalContext* ctx, const NdArrayRef& lhs, const NdArrayRef& rhs) const { SPU_ENFORCE(lhs.shape() == rhs.shape()); + SPU_ENFORCE(lhs.eltype().as()->field() == + rhs.eltype().as()->field()); auto* comm = ctx->getState(); auto* beaver = ctx->getState()->beaver(); - const auto field = ctx->getState()->getDefaultField(); + const auto field = lhs.eltype().as()->field(); const size_t out_nbits = std::min(getNumBits(lhs), getNumBits(rhs)); const PtType backtype = getBacktype(out_nbits); @@ -192,6 +210,8 @@ NdArrayRef AndBB::proc(KernelEvalContext* ctx, const NdArrayRef& lhs, NdArrayRef XorBP::proc(KernelEvalContext* ctx, const NdArrayRef& lhs, const NdArrayRef& rhs) const { SPU_ENFORCE(lhs.numel() == rhs.numel()); + SPU_ENFORCE(lhs.eltype().as()->field() == + rhs.eltype().as()->field()); auto* comm = ctx->getState(); @@ -208,8 +228,10 @@ NdArrayRef XorBP::proc(KernelEvalContext* ctx, const NdArrayRef& lhs, NdArrayRef XorBB::proc(KernelEvalContext* ctx, const NdArrayRef& lhs, const NdArrayRef& rhs) const { SPU_ENFORCE(lhs.numel() == rhs.numel()); + SPU_ENFORCE(lhs.eltype().as()->field() == + rhs.eltype().as()->field()); - const auto field = ctx->getState()->getDefaultField(); + const auto field = lhs.eltype().as()->field(); const size_t out_nbits = std::max(getNumBits(lhs), getNumBits(rhs)); return makeBShare(ring_xor(lhs, rhs), field, out_nbits); } diff --git a/libspu/mpc/semi2k/boolean.h b/libspu/mpc/semi2k/boolean.h index 766e39c81..3ec63505f 100644 --- a/libspu/mpc/semi2k/boolean.h +++ b/libspu/mpc/semi2k/boolean.h @@ -39,6 +39,17 @@ class CastTypeB : public CastTypeKernel { const Type& to_type) const override; }; +class RandB : public RandKernel { + public: + static constexpr const char* kBindName() { return "rand_b"; }; + + ce::CExpr latency() const override { return ce::Const(0); } + + ce::CExpr comm() const override { return ce::Const(0); } + + NdArrayRef proc(KernelEvalContext* ctx, const Shape& shape) const override; +}; + class B2P : public UnaryKernel { public: static constexpr const char* kBindName() { return "b2p"; } diff --git a/libspu/mpc/semi2k/lowmc.cc b/libspu/mpc/semi2k/lowmc.cc new file mode 100644 index 000000000..993e563fa --- /dev/null +++ b/libspu/mpc/semi2k/lowmc.cc @@ -0,0 +1,393 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/mpc/semi2k/lowmc.h" + +#include "libspu/mpc/ab_api.h" +#include "libspu/mpc/common/prg_state.h" +#include "libspu/mpc/common/pv2k.h" +#include "libspu/mpc/semi2k/type.h" +#include "libspu/mpc/utils/lowmc.h" +#include "libspu/mpc/utils/lowmc_utils.h" +#include "libspu/mpc/utils/ring_ops.h" + +namespace spu::mpc::semi2k { + +namespace { + +NdArrayRef wrap_xor_bp(SPUContext* ctx, const NdArrayRef& x, + const NdArrayRef& y) { + SPU_ENFORCE(x.shape() == y.shape()); + return UnwrapValue(xor_bp(ctx, WrapValue(x), WrapValue(y))); +} + +NdArrayRef wrap_xor_bb(SPUContext* ctx, const NdArrayRef& x, + const NdArrayRef& y) { + SPU_ENFORCE(x.shape() == y.shape()); + return UnwrapValue(xor_bb(ctx, WrapValue(x), WrapValue(y))); +} + +NdArrayRef wrap_and_bb(SPUContext* ctx, const NdArrayRef& x, + const NdArrayRef& y) { + SPU_ENFORCE(x.shape() == y.shape()); + return UnwrapValue(and_bb(ctx, WrapValue(x), WrapValue(y))); +} + +/// Some shape utils +NdArrayRef extract_bit_arr(const NdArrayRef& in, int64_t idx) { + const auto field = in.eltype().as()->field(); + SPU_ENFORCE((uint64_t)idx < SizeOf(field) * 8, "bit extract out of range."); + const auto bty = makeType(field, 1); + + NdArrayRef out(bty, in.shape()); + DISPATCH_ALL_FIELDS(field, [&]() { + NdArrayView _in(in); + NdArrayView _out(out); + + pforeach(0, in.numel(), [&](int64_t i) { // + _out[i] = (_in[i] >> idx) & 1; + }); + }); + + return out; +} + +// offset=0 means c, offset=2 means a +NdArrayRef extract_packed_bit_arr(const NdArrayRef& state, int64_t n_boxes, + int64_t offset) { + const auto field = state.eltype().as()->field(); + const auto bty = makeType(field, 1); + + const auto& ori_shape = state.shape(); + const auto ori_numel = ori_shape.numel(); + Shape to_shape = ori_shape; + to_shape[0] = ori_shape[0] * n_boxes; + + NdArrayRef ret(bty, to_shape); + DISPATCH_ALL_FIELDS(field, [&]() { + NdArrayView _state(state); + NdArrayView _ret(ret); + + for (int64_t i = 0; i < n_boxes; ++i) { + const auto start_idx = 3 * i; + pforeach(0, ori_shape.numel(), [&](int64_t idx) { // + _ret[idx + i * ori_numel] = (_state[idx] >> (start_idx + offset)) & 1; + }); + } + }); + + return ret; +} + +// do memory copying by hand, get packed (abc, bca) +std::tuple construct_concat_arr(const NdArrayRef& state, + int64_t n_boxes) { + const auto field = state.eltype().as()->field(); + const auto bty = makeType(field, 3); + + const auto& ori_shape = state.shape(); + const auto ori_numel = ori_shape.numel(); + Shape to_shape = ori_shape; + to_shape[0] = ori_shape[0] * n_boxes; + + NdArrayRef abc(bty, to_shape); + NdArrayRef bca(bty, to_shape); + + DISPATCH_ALL_FIELDS(field, [&]() { + NdArrayView _state(state); + NdArrayView _abc(abc); + NdArrayView _bca(bca); + + for (int64_t i = 0; i < n_boxes; ++i) { + const auto start_idx = 3 * i; + pforeach(0, ori_shape.numel(), [&](int64_t idx) { + // xxxx xabc => 0000 0abc + _abc[idx + i * ori_numel] = (_state[idx] >> start_idx) & 7; + // xxxx xabc => 0000 0bca + _bca[idx + i * ori_numel] = (((_state[idx] >> start_idx) & 3) << 1) | + ((_state[idx] >> (start_idx + 2)) & 1); + }); + } + }); + + return std::make_tuple(abc, bca); +} + +// for shape (k * n0, n1, ...), +// get array with shape = (n0, n1, ...) +NdArrayRef slice_arr(const NdArrayRef& x, int64_t idx, const Shape& ori_shape) { + const auto& whole_shape = x.shape(); + SPU_ENFORCE(ori_shape.ndim() == whole_shape.ndim(), "axis mismatch."); + SPU_ENFORCE(std::equal(whole_shape.begin() + 1, whole_shape.end(), + ori_shape.begin() + 1), + "mismatch of shape."); + + // compute slice indices + Index start_ind(ori_shape.ndim(), 0); + start_ind[0] = idx * ori_shape[0]; + Index end_ind(ori_shape.begin(), ori_shape.end()); + end_ind[0] = start_ind[0] + ori_shape[0]; + + return x.slice(start_ind, end_ind, {}); +} + +/// Some core operations for LowMC layer +NdArrayRef Sbox(KernelEvalContext* ctx, const NdArrayRef& state, + int64_t n_boxes, size_t n_bits) { + // for SboxLayer, the initial definition is a look-up table, we use some + // logical operations to replace it. + // i.e. Sbox(a, b, c) = (a + b * c, a + b + a * c, a + b + c + a * b), + // where `+` is XOR, `*` is AND + // TODO: Lots of memory copying here to save rounds, use FM8 for temporay + // a,b,c to save memory + NdArrayRef abc_arr; + NdArrayRef bca_arr; + // the origin data: ... a2b2c2 a1b1c1 a0b0c0 + // we concat all abc to get [a2b2c2; a1b1c1; a0b0c0] + // we concat all bca to get [b2c2a2; b1c1a1; b0c0a0] + std::tie(abc_arr, bca_arr) = construct_concat_arr(state, n_boxes); + + // doing all expensive secret and op simultaneously + auto abc_and_bca_arr = wrap_and_bb(ctx->sctx(), abc_arr, bca_arr); + auto abc_xor_bca_arr = wrap_xor_bb(ctx->sctx(), abc_arr, bca_arr); + + // extract all ab, bc, ac + auto ab_arr = extract_bit_arr(abc_and_bca_arr, 2); + auto bc_arr = extract_bit_arr(abc_and_bca_arr, 1); + auto ac_arr = extract_bit_arr(abc_and_bca_arr, 0); + + // extract a+b, b+c + auto a_b_arr = extract_bit_arr(abc_xor_bca_arr, 2); + auto b_c_arr = extract_bit_arr(abc_xor_bca_arr, 1); + + // extract a + auto a_arr = extract_packed_bit_arr(state, n_boxes, 2); + + // a + b * c + auto new_a = wrap_xor_bb(ctx->sctx(), a_arr, bc_arr); + // a + b + a * c + auto new_b = wrap_xor_bb(ctx->sctx(), a_b_arr, ac_arr); + // a + b + c + a * b + auto a_b_c_arr = wrap_xor_bb(ctx->sctx(), b_c_arr, a_arr); + auto new_c = wrap_xor_bb(ctx->sctx(), a_b_c_arr, ab_arr); + + std::vector bits_arr; + bits_arr.reserve(n_bits); + const auto& ori_shape = state.shape(); + // collect first 3*n_boxes bits + for (int64_t i = 0; i < n_boxes; ++i) { + bits_arr.push_back(slice_arr(new_c, i, ori_shape)); + bits_arr.push_back(slice_arr(new_b, i, ori_shape)); + bits_arr.push_back(slice_arr(new_a, i, ori_shape)); + } + + // concat all bits + const auto field = state.eltype().as()->field(); + auto ret = ring_zeros(field, state.shape()).as(state.eltype()); + + DISPATCH_ALL_FIELDS(field, [&]() { + NdArrayView _ret(ret); + NdArrayView _state(state); + + for (int64_t i = 0; i < 3 * n_boxes; ++i) { + NdArrayView _tmp(bits_arr[i]); + + pforeach(0, ret.numel(), [&](int64_t idx) { // + _ret[idx] = _ret[idx] | ((_tmp[idx] & 1) << i); + }); + } + + // The rest higher bits stay unchanged in SBoxLayer, so we copy them + pforeach(0, ret.numel(), [&](int64_t idx) { // + _ret[idx] = _ret[idx] | ((_state[idx] >> (3 * n_boxes)) << (3 * n_boxes)); + }); + }); + + return ret; +} + +NdArrayRef Affine(KernelEvalContext* ctx, const LowMC& cipher, + const NdArrayRef& state, int64_t rounds) { + const auto field = state.eltype().as()->field(); + + const auto L_matrix = cipher.Lmat()[rounds]; + return dot_product_gf2(L_matrix, state, field); +} + +} // namespace + +NdArrayRef LowMcB::proc(KernelEvalContext* ctx, const NdArrayRef& in) const { + auto* prg_state = ctx->getState(); + + // generate the shared key, key0 ^ key1 = key + uint128_t key; + prg_state->fillPriv(absl::MakeSpan(&key, 1)); + + // generate public seed + uint128_t seed; + prg_state->fillPubl(absl::MakeSpan(&seed, 1)); + + return encrypt(ctx, in, key, seed); +} + +NdArrayRef LowMcB::encrypt(KernelEvalContext* ctx, const NdArrayRef& in, + uint128_t key, uint128_t seed) const { + const auto field = in.eltype().as()->field(); + const auto numel = in.numel(); + const auto k = SizeOf(field) * 8; + const auto shape = in.shape(); + const auto pub_ty = makeType(field); + + NdArrayRef out; + DISPATCH_ALL_FIELDS(field, [&]() { + auto d = get_data_complexity(numel); + auto cipher = LowMC(field, seed, d); + SPU_ENFORCE(static_cast(k) == cipher.data_block_size(), + "block size must be equal now."); + + // generate round keys + auto round_keys = + generate_round_keys(cipher.Kmat(), key, cipher.rounds(), field); + + // Following the same steps as in plaintext, with MPC primitives for bit + // operations. + // + // 1. key whiten: state = in ^ roundKeys[0] + auto round_key0 = round_keys[0].broadcast_to(shape, {}).as(pub_ty); + out = wrap_xor_bb(ctx->sctx(), in, round_key0); + + // 2. round loop: for i = 1 to r + // state = SboxLayer(state) + // state = GF2Dot(Lmatrix[i-1], state) + // state = state ^ RoundConstants[i-1] + // state = state ^ RoundKeys[i] + const auto n_boxes = cipher.number_of_boxes(); + SPU_ENFORCE((int64_t)k >= 3 * n_boxes, "invalid parameters setting."); + + for (int64_t r = 1; r <= cipher.rounds(); ++r) { + // The only Non Linear Layer in LowMC + out = Sbox(ctx, out, n_boxes, k); + + out = Affine(ctx, cipher, out, /*round idx*/ r - 1).as(in.eltype()); + + auto round_constant = + cipher.RoundConstants()[r - 1].broadcast_to(shape, {}).as(pub_ty); + out = wrap_xor_bp(ctx->sctx(), out, round_constant); + + auto round_key = round_keys[r].broadcast_to(shape, {}).as(pub_ty); + out = wrap_xor_bb(ctx->sctx(), out, round_key); + } + }); + + return out; +} + +namespace { +NdArrayRef wrap_lowmcb(KernelEvalContext* ctx, const NdArrayRef& in) { + return LowMcB().proc(ctx, in); +} + +FieldType get_dst_field(const int64_t k) { + if (k <= 32) { + return FM32; + } else if (k <= 64) { + return FM64; + } else { + // no matther how large k is, we always use FM128. + return FM128; + } +} + +NdArrayRef concate_bits(const std::vector& inputs, + const FieldType dst_field) { + const auto field = inputs[0].eltype().as()->field(); + const auto k = SizeOf(field) * 8; + + SPU_ENFORCE(k * inputs.size() <= SizeOf(dst_field) * 8, + "too much inputs to concat!"); + + auto ret = ring_zeros(dst_field, inputs[0].shape()); + + DISPATCH_ALL_FIELDS(field, [&]() { + using src_el_t = ring2k_t; + + DISPATCH_ALL_FIELDS(dst_field, [&]() { + using dst_el_t = ring2k_t; + NdArrayView _ret(ret); + + for (uint64_t i = 0; i < inputs.size(); ++i) { + NdArrayView _inp(inputs[i]); + const auto shift_bits = k * i; + + pforeach(0, ret.numel(), [&](int64_t idx) { // + _ret[idx] |= (static_cast(_inp[idx]) << shift_bits); + }); + } + }); + }); + + return ret; +} + +} // namespace + +NdArrayRef MultiKeyLowMcB::proc(KernelEvalContext* ctx, + const std::vector& inputs) const { + SPU_ENFORCE(!inputs.empty()); + const auto field = inputs[0].eltype().as()->field(); + SPU_ENFORCE(std::all_of(inputs.begin() + 1, inputs.end(), + [&field](const NdArrayRef& v) { + return v.eltype().as()->field() == field; + }), + "all inputs must have the same field"); + + if (inputs.size() == 1) { + return wrap_lowmcb(ctx, inputs[0]); + } + + // SPU can now only native support FM128. + static constexpr int64_t kMaxBits = 128; + static constexpr FieldType kMaxField = FM128; + + const int64_t k = SizeOf(field) * 8; + const auto total_bits = k * inputs.size(); + + if (total_bits <= kMaxBits) { + // just concat all bits if SPU can handle it. + const auto dst_field = get_dst_field(total_bits); + auto concat_inp = + concate_bits(inputs, dst_field).as(makeType(dst_field)); + return wrap_lowmcb(ctx, concat_inp); + } else { + // re-mapping to FM128 + auto* prg_state = ctx->getState(); + const Shape rand_mat_shape = {kMaxBits}; + auto remapping_inp = ring_zeros(kMaxField, inputs[0].shape()); + // e.g. inputs = [x0, x1, x2, x3], each xi is 64 bits, we want to remap + // these to 128 bits. + // Conceptually, we generate a public random binary matrix M (shape = (128, + // 64*4)), compute gf2dot(M, inputs), which is 128 bits output. + for (const auto& item : inputs) { + // logically, (128, k) binary matrix + const auto rand_mat = prg_state->genPubl(field, rand_mat_shape); + // split the large gf2dot into several small gf2dot and use xor to combine + // them. + auto part_dot = dot_product_gf2(rand_mat, item, kMaxField); + ring_xor_(remapping_inp, part_dot); + } + return wrap_lowmcb(ctx, remapping_inp.as(makeType(kMaxField))); + } +} + +} // namespace spu::mpc::semi2k diff --git a/libspu/mpc/semi2k/lowmc.h b/libspu/mpc/semi2k/lowmc.h new file mode 100644 index 000000000..c64262ffa --- /dev/null +++ b/libspu/mpc/semi2k/lowmc.h @@ -0,0 +1,72 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "libspu/mpc/kernel.h" + +namespace spu::mpc::semi2k { + +// ref: Ciphers for MPC and FHE +// https://eprint.iacr.org/2016/687.pdf +// +// LowMC cipher is a MPC-friendly block cipher which minimizes the depth and +// numbers of And Gates. +// For current implementation, we only support 128-bit key security. But user +// can change the data complexity to achieve higher efficiency. +// +// NOTE: Although LowMC is protocol agnostic (only depends on some boolean ops), +// but we still implement it in each protocol kernel now, for efficiency +// consideration. +class LowMcB : public UnaryKernel { + public: + static constexpr const char* kBindName() { return "lowmc_b"; } + + // the concrete cost depends on the data complexity + Kind kind() const override { return Kind::Dynamic; } + + NdArrayRef proc(KernelEvalContext* ctx, const NdArrayRef& in) const override; + + // inner function, mark as public only for testing + NdArrayRef encrypt(KernelEvalContext* ctx, const NdArrayRef& in, + uint128_t key, uint128_t seed // single key now + ) const; +}; + +// For multi-key condition, we use the scheme in: +// REF: https://eprint.iacr.org/2019/518 +// +// If we have m keys, each key has k bits, logically: +// 1. Concat all these keys and get mk-bits single key `X`. +// 2. Each party sample the same random binary matrix `M` with shape (mk, n), +// where n is the bits that SPU can handle (e.g. 128). +// 3. Then we compute `Y = gf2dot(X, M)`, and use `Y` as the input for LowMc +// encryption. +// +// Collision Prob p: about 2^{-n+q}, where q ~= 2 * log2(D), D is the total +// number of encoding. +// i.e. when n = 128, D = 2**20 (1M) , p ~= 2^{-88} +// when n = 128, D = 2**30 (1B) , p ~= 2^{-68} +class MultiKeyLowMcB : public MultiKeyLowMcKernel { + public: + static constexpr const char* kBindName() { return "multi_key_lowmc_b"; } + + // the concrete cost depends on the data complexity + Kind kind() const override { return Kind::Dynamic; } + + NdArrayRef proc(KernelEvalContext* ctx, + const std::vector& inputs) const override; +}; + +} // namespace spu::mpc::semi2k diff --git a/libspu/mpc/semi2k/permute.cc b/libspu/mpc/semi2k/permute.cc index 71f68ef42..a11424b5d 100644 --- a/libspu/mpc/semi2k/permute.cc +++ b/libspu/mpc/semi2k/permute.cc @@ -40,51 +40,58 @@ inline int64_t getOwner(const NdArrayRef& x) { return x.eltype().as()->owner(); } -Index ring2pv(const NdArrayRef& x) { - SPU_ENFORCE(x.eltype().isa(), "must be ring2k_type, got={}", - x.eltype()); - const auto field = x.eltype().as()->field(); - Index pv(x.numel()); - DISPATCH_ALL_FIELDS(field, [&]() { - NdArrayView _x(x); - pforeach(0, x.numel(), [&](int64_t idx) { pv[idx] = int64_t(_x[idx]); }); - }); - return pv; -} - // Secure inverse permutation of x by perm_rank's permutation pv -// The idea here is: -// Input permutation pv, beaver generates perm pair {, } that -// InversePermute(A, pv) = B. So we can get = InversePermute(open( - -// ), pv) + that y = InversePermute(x, pv). NdArrayRef SecureInvPerm(KernelEvalContext* ctx, const NdArrayRef& x, const NdArrayRef& perm, size_t perm_rank) { + // INPUT: X and private perm owned by perm_rank const auto lctx = ctx->lctx(); const auto field = x.eltype().as()->field(); + auto* comm = ctx->getState(); auto* beaver = ctx->getState()->beaver(); auto numel = x.numel(); - Index pv; - if (perm.eltype().isa() || - (perm.eltype().isa() && isOwner(ctx, perm.eltype()))) { - pv = ring2pv(perm); + if (lctx->Rank() == perm_rank) { + SPU_ENFORCE(perm.numel() == numel); + SPU_ENFORCE(perm.eltype().isa() || + (perm.eltype().isa() && isOwner(ctx, perm.eltype()))); } - auto [a_buf, b_buf] = beaver->PermPair(field, numel, perm_rank, pv); + + // beaver gives ai, bi, pr makes InvPerm(A, pr) = B + // pr is a private random permutation owned by perm_rank. + auto [a_buf, b_buf, pr] = beaver->PermPair(field, numel, perm_rank); + + NdArrayRef po; + if (lctx->Rank() == perm_rank) { + // mask perm by random permutation pr, get po = InvPerm(perm, pr) + auto p = std::move(pr); + po = applyInvPerm(perm, p); + // so: InvPerm(B, po) = InvPerm(InvPerm(A, pr), po) = InvPerm(A, perm) + } + // broadcast po to all rank. + po = comm->broadcast(po, perm_rank, perm.eltype(), perm.shape(), + "perm_open_perm"); NdArrayRef a(std::make_shared(std::move(a_buf)), x.eltype(), x.shape()); NdArrayRef b(std::make_shared(std::move(b_buf)), x.eltype(), x.shape()); - auto t = wrap_a2v(ctx->sctx(), ring_sub(x, a).as(x.eltype()), perm_rank); + // reveal X-A to perm_rank + auto x_a = wrap_a2v(ctx->sctx(), ring_sub(x, a).as(x.eltype()), perm_rank); if (lctx->Rank() == perm_rank) { - SPU_ENFORCE(pv.size()); - ring_add_(b, applyInvPerm(t, pv)); + // perm_rank get InvPerm(X-A, perm) + InvPerm(bi, po) + b = applyInvPerm(b, po); + ring_add_(b, applyInvPerm(x_a, perm)); return b.as(x.eltype()); } else { - return b.as(x.eltype()); + // others rank get InvPerm(bi, po) + return applyInvPerm(b, po).as(x.eltype()); } + // finally get: + // InvPerm(X-A, perm) + ∑InvPerm(bi, po) = + // InvPerm(X, perm) - InvPerm(A, perm) + InvPerm(B, po) = + // InvPerm(X, perm) } } // namespace diff --git a/libspu/mpc/semi2k/protocol.cc b/libspu/mpc/semi2k/protocol.cc index 33d6226b8..cc7c8f0d6 100644 --- a/libspu/mpc/semi2k/protocol.cc +++ b/libspu/mpc/semi2k/protocol.cc @@ -21,6 +21,7 @@ #include "libspu/mpc/semi2k/boolean.h" #include "libspu/mpc/semi2k/conversion.h" #include "libspu/mpc/semi2k/exp.h" +#include "libspu/mpc/semi2k/lowmc.h" #include "libspu/mpc/semi2k/permute.h" #include "libspu/mpc/semi2k/state.h" #include "libspu/mpc/semi2k/type.h" @@ -51,22 +52,24 @@ void regSemi2kProtocol(SPUContext* ctx, ctx->prot()->addState(ctx->config(), lctx); ctx->prot() ->regKernel< - semi2k::P2A, semi2k::A2P, semi2k::A2V, semi2k::V2A, // - semi2k::NegateA, // - semi2k::AddAP, semi2k::AddAA, // - semi2k::MulAP, semi2k::MulAA, semi2k::SquareA, // - semi2k::MatMulAP, semi2k::MatMulAA, // - semi2k::LShiftA, semi2k::LShiftB, semi2k::RShiftB, // - semi2k::ARShiftB, // - semi2k::CommonTypeB, semi2k::CommonTypeV, semi2k::CastTypeB, // - semi2k::B2P, semi2k::P2B, // - semi2k::A2B, semi2k::B2A_Randbit, semi2k::B2A_Disassemble, // - semi2k::AndBP, semi2k::AndBB, semi2k::XorBP, semi2k::XorBB, // - semi2k::BitrevB, // - semi2k::BitIntlB, semi2k::BitDeintlB, // - semi2k::RandA, semi2k::RandPermM, semi2k::PermAM, semi2k::PermAP, // - semi2k::InvPermAM, semi2k::InvPermAP, semi2k::InvPermAV, // - semi2k::EqualAA, semi2k::EqualAP, // + semi2k::P2A, semi2k::A2P, semi2k::A2V, + semi2k::V2A, // + semi2k::NegateA, // + semi2k::AddAP, semi2k::AddAA, // + semi2k::MulAP, semi2k::MulAA, semi2k::SquareA, // + semi2k::MatMulAP, semi2k::MatMulAA, // + semi2k::LShiftA, semi2k::LShiftB, semi2k::RShiftB, // + semi2k::ARShiftB, // + semi2k::CommonTypeB, semi2k::CommonTypeV, semi2k::CastTypeB, // + semi2k::B2P, semi2k::P2B, // + semi2k::A2B, semi2k::B2A_Randbit, semi2k::B2A_Disassemble, // + semi2k::AndBP, semi2k::AndBB, semi2k::XorBP, semi2k::XorBB, // + semi2k::BitrevB, // + semi2k::BitIntlB, semi2k::BitDeintlB, // + semi2k::RandA, semi2k::RandB, // + semi2k::RandPermM, semi2k::PermAM, semi2k::PermAP, // + semi2k::InvPermAM, semi2k::InvPermAP, semi2k::InvPermAV, // + semi2k::EqualAA, semi2k::EqualAP, // semi2k::BeaverCacheKernel>(); if (ctx->config().trunc_allow_msb_error()) { @@ -86,6 +89,8 @@ void regSemi2kProtocol(SPUContext* ctx, } } // ctx->prot()->regKernel(); + ctx->prot()->regKernel(); + ctx->prot()->regKernel(); } std::unique_ptr makeSemi2kProtocol( diff --git a/libspu/mpc/semi2k/protocol_test.cc b/libspu/mpc/semi2k/protocol_test.cc index eb1a6c604..abf75fffc 100644 --- a/libspu/mpc/semi2k/protocol_test.cc +++ b/libspu/mpc/semi2k/protocol_test.cc @@ -18,6 +18,8 @@ #include "gtest/gtest.h" #include "yacl/crypto/key_utils.h" +#include "yacl/crypto/rand/rand.h" +#include "yacl/utils/elapsed_timer.h" #include "libspu/mpc/ab_api.h" #include "libspu/mpc/ab_api_test.h" @@ -26,10 +28,13 @@ #include "libspu/mpc/common/communicator.h" #include "libspu/mpc/semi2k/beaver/beaver_impl/ttp_server/beaver_server.h" #include "libspu/mpc/semi2k/exp.h" +#include "libspu/mpc/semi2k/lowmc.h" #include "libspu/mpc/semi2k/prime_utils.h" #include "libspu/mpc/semi2k/state.h" #include "libspu/mpc/semi2k/type.h" #include "libspu/mpc/utils/gfmp.h" +#include "libspu/mpc/utils/lowmc.h" +#include "libspu/mpc/utils/lowmc_utils.h" #include "libspu/mpc/utils/ring_ops.h" #include "libspu/mpc/utils/simulate.h" @@ -76,7 +81,8 @@ std::unique_ptr makeTTPSemi2kProtocol( ttp->set_adjust_rank(lctx->WorldSize() - 1); ttp->set_server_host(server_host); ttp->set_asym_crypto_schema("SM2"); - ttp->set_server_public_key(key_pair.first.data(), key_pair.first.size()); + ttp->set_server_public_key(key_pair.first.data(), + key_pair.first.size()); return makeSemi2kProtocol(ttp_rt, lctx); } @@ -554,9 +560,6 @@ TEST_P(BeaverCacheTest, ExpA) { bytes = lctx->GetStats()->sent_bytes - bytes; action = lctx->GetStats()->sent_actions - action; - SPDLOG_INFO("ExpA ({}) for n = {}, sent {} MiB ({} B per), actions {}", - field, numel, bytes * 1. / 1024. / 1024., bytes * 1. / numel, - action); }); assert(outp[0].eltype() == ring2k_shr[0].eltype()); auto got = ring_add(outp[0], outp[1]); @@ -571,16 +574,112 @@ TEST_P(BeaverCacheTest, ExpA) { expected = static_cast(std::round((expected * (1L << fxp)))) / (1L << fxp); double got = static_cast(got_view[i]) / (1L << fxp); - // cout left here for future improvement - std::cout << "expected: " << fmt::format("{0:f}", expected) - << ", got: " << fmt::format("{0:f}", got) << std::endl; - std::cout << "expected: " - << fmt::format("{0:b}", - static_cast(expected * (1L << fxp))) - << ", got: " << fmt::format("{0:b}", got_view[i]) << std::endl; max_err = std::max(max_err, std::abs(expected - got)); } ASSERT_LE(max_err, 1e-0); }); } + +using LowMCTestParams = + std::tuple; + +class LowMCTest : public ::testing::TestWithParam {}; + +INSTANTIATE_TEST_SUITE_P( + Semi2k, LowMCTest, + testing::Combine( + testing::Values(CreateObjectFn(makeSemi2kProtocol, "tfp"), + CreateObjectFn(makeTTPSemi2kProtocol, + "ttp")), // TFP or TTP + testing::Values(makeConfig(FieldType::FM32), // Global Field + makeConfig(FieldType::FM64), // + makeConfig(FieldType::FM128)), // + testing::Values(FM32, FM64, FM128), // LowMC runtime Field + testing::Values(2)), // npc + [](const testing::TestParamInfo& p) { + return fmt::format("{}x{}x{}x{}", std::get<0>(p.param).name(), + std::get<1>(p.param).field(), std::get<2>(p.param), + std::get<3>(p.param)); + ; + }); + +TEST_P(LowMCTest, EncryptCorrect) { + const auto factory = std::get<0>(GetParam()); + const RuntimeConfig& conf = std::get<1>(GetParam()); + + // Global Field can be different from LowMC runtime Field + const auto field = std::get<2>(GetParam()); + const size_t npc = std::get<3>(GetParam()); + + const Shape shape = {10, 5}; + // const Shape shape = {1000, 1000}; + + const auto bty = makeType(field); + const auto numel = shape.numel(); + + // sharing of x + NdArrayRef x[2]; + x[0] = ring_rand(field, shape).as(bty); + x[1] = ring_rand(field, shape).as(bty); + auto pub_x = ring_xor(x[0], x[1]); + + // sharing of key + uint128_t key[2]; + key[0] = yacl::crypto::SecureRandSeed(); + key[1] = yacl::crypto::SecureRandSeed(); + auto pub_key = key[0] ^ key[1]; + + uint128_t seed = 0; + + NdArrayRef out[2]; + utils::simulate(npc, [&](const std::shared_ptr& lcxt) { + auto obj = factory(conf, lcxt); + KernelEvalContext kcontext(obj.get()); + + int rank = lcxt->Rank(); + + // test for kernel registration + SPU_ENFORCE(obj->hasKernel("lowmc_b")); + spu::mpc::semi2k::LowMcB cipher; + + size_t b0 = lcxt->GetStats()->sent_bytes; + size_t r0 = lcxt->GetStats()->sent_actions; + yacl::ElapsedTimer pack_timer; + + // To test the correctness, we use the inner api + out[rank] = cipher.encrypt(&kcontext, x[rank], key[rank], seed); + + double pack_time = pack_timer.CountMs() * 1.0; + size_t b1 = lcxt->GetStats()->sent_bytes; + size_t r1 = lcxt->GetStats()->sent_actions; + + SPDLOG_INFO( + "LowMC ({}) for n = {}, elapsed {} ms, sent {} MiB ({} B per), " + "actions {}.", + field, numel, pack_time, (b1 - b0) * 1. / 1024. / 1024., + (b1 - b0) * 1. / numel, r1 - r0); + }); + + SPU_ENFORCE(out[0].eltype().isa()); + SPU_ENFORCE(out[1].eltype().isa()); + + auto got = ring_xor(out[0], out[1]); + DISPATCH_ALL_FIELDS(field, [&]() { // + NdArrayView _got(got); + + auto block_cipher = LowMC(field, seed, get_data_complexity(numel)); + block_cipher.set_key(pub_key); + + auto c = block_cipher.encrypt(pub_x); + NdArrayView _exp(c); + + for (int64_t i = 0; i < numel; ++i) { + auto got_val = _got[i]; + auto exp_val = _exp[i]; + + EXPECT_EQ(got_val, exp_val); + } + }); +} + } // namespace spu::mpc::test diff --git a/libspu/mpc/semi2k/state.h b/libspu/mpc/semi2k/state.h index f9b394ca5..2e88c9f69 100644 --- a/libspu/mpc/semi2k/state.h +++ b/libspu/mpc/semi2k/state.h @@ -50,7 +50,22 @@ class Semi2kState : public State { const auto& key = conf.ttp_beaver_config().server_public_key(); ops.server_public_key = yacl::Buffer(key.data(), key.size()); } - // TODO: TLS & brpc options. + if (!conf.ttp_beaver_config().transport_protocol().empty()) { + ops.brpc_channel_protocol = + conf.ttp_beaver_config().transport_protocol(); + } + if (conf.ttp_beaver_config().has_ssl_config()) { + brpc::ChannelSSLOptions ssl_options; + ssl_options.verify.ca_file_path = + conf.ttp_beaver_config().ssl_config().ca_file_path(); + ssl_options.verify.verify_depth = + conf.ttp_beaver_config().ssl_config().verify_depth(); + ssl_options.client_cert.certificate = + conf.ttp_beaver_config().ssl_config().certificate(); + ssl_options.client_cert.private_key = + conf.ttp_beaver_config().ssl_config().private_key(); + ops.brpc_ssl_options = std::move(ssl_options); + } beaver_ = std::make_unique(lctx, std::move(ops)); } else { SPU_THROW("unsupported beaver type {}", conf.beaver_type()); diff --git a/libspu/mpc/spdz2k/BUILD.bazel b/libspu/mpc/spdz2k/BUILD.bazel index fb1ab48ae..855ece0ad 100644 --- a/libspu/mpc/spdz2k/BUILD.bazel +++ b/libspu/mpc/spdz2k/BUILD.bazel @@ -197,7 +197,7 @@ spu_cc_library( "//libspu/mpc/common:communicator", "//libspu/mpc/common:pv2k", "//libspu/mpc/utils:simulate", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], alwayslink = True, ) diff --git a/libspu/mpc/spdz2k/beaver/BUILD.bazel b/libspu/mpc/spdz2k/beaver/BUILD.bazel index b7f4d80e7..9846a4709 100644 --- a/libspu/mpc/spdz2k/beaver/BUILD.bazel +++ b/libspu/mpc/spdz2k/beaver/BUILD.bazel @@ -35,7 +35,7 @@ spu_cc_library( "//libspu/mpc/common:prg_tensor", "//libspu/mpc/spdz2k:commitment", "//libspu/mpc/utils:ring_ops", - "@com_github_microsoft_seal//:seal", + "@seal", "@yacl//yacl/crypto/block_cipher:symmetric_crypto", "@yacl//yacl/crypto/tools:prg", "@yacl//yacl/link", @@ -51,7 +51,7 @@ spu_cc_test( ":beaver_tfp", ":beaver_tinyot", "//libspu/mpc/utils:simulate", - "@com_google_googletest//:gtest", + "@googletest//:gtest", ], ) diff --git a/libspu/mpc/spdz2k/ot/BUILD.bazel b/libspu/mpc/spdz2k/ot/BUILD.bazel index 124adaf10..937eb3c18 100644 --- a/libspu/mpc/spdz2k/ot/BUILD.bazel +++ b/libspu/mpc/spdz2k/ot/BUILD.bazel @@ -42,7 +42,7 @@ spu_cc_library( copts = AES_COPT_FLAGS + ["-Wno-ignored-attributes"], deps = [ "//libspu/core:prelude", - "@com_github_emptoolkit_emp_tool//:emp-tool", + "@emp-tool//:emp-tool", "@yacl//yacl/crypto/hash:hash_interface", "@yacl//yacl/crypto/hash:hash_utils", "@yacl//yacl/crypto/tools:crhash", @@ -66,7 +66,7 @@ spu_cc_library( "//libspu/mpc/spdz2k:commitment", "//libspu/mpc/spdz2k/ot:kos_ote", "//libspu/mpc/utils:ring_ops", - "@com_github_emptoolkit_emp_tool//:emp-tool", + "@emp-tool//:emp-tool", "@yacl//yacl/crypto/tools:prg", "@yacl//yacl/kernel/type:ot_store", "@yacl//yacl/link", diff --git a/libspu/mpc/tools/BUILD.bazel b/libspu/mpc/tools/BUILD.bazel index 2d2892373..5a430b2e2 100644 --- a/libspu/mpc/tools/BUILD.bazel +++ b/libspu/mpc/tools/BUILD.bazel @@ -58,9 +58,9 @@ spu_cc_binary( "//libspu/mpc/common:communicator", "//libspu/mpc/semi2k", "//libspu/mpc/utils:simulate", - "@com_github_fmtlib_fmt//:fmtlib", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/strings", + "@abseil-cpp//absl/strings", + "@fmt", + "@google_benchmark//:benchmark", "@llvm-project//llvm:Support", "@yacl//yacl/link:context", ], diff --git a/libspu/mpc/utils/BUILD.bazel b/libspu/mpc/utils/BUILD.bazel index 2b494ff1d..3ea28e9b3 100644 --- a/libspu/mpc/utils/BUILD.bazel +++ b/libspu/mpc/utils/BUILD.bazel @@ -126,7 +126,7 @@ spu_cc_binary( srcs = ["ring_ops_bench.cc"], deps = [ ":ring_ops", - "@com_github_google_benchmark//:benchmark", + "@google_benchmark//:benchmark", ], ) @@ -138,7 +138,7 @@ spu_cc_library( linkopts = OMP_LINKFLAGS, deps = [ "//libspu/core:parallel_utils", - "@eigen_archive//:eigen3", + "@eigen", ] + OMP_DEPS, ) @@ -158,3 +158,36 @@ spu_cc_library( "//libspu/core:parallel_utils", ], ) + +spu_cc_library( + name = "lowmc", + srcs = ["lowmc.cc"], + hdrs = ["lowmc.h"], + deps = [ + ":lowmc_utils", + "//libspu/core:ndarray_ref", + "//libspu/mpc/utils:ring_ops", + "@yacl//yacl/crypto/tools:prg", + ], +) + +spu_cc_library( + name = "lowmc_utils", + srcs = ["lowmc_utils.cc"], + hdrs = ["lowmc_utils.h"], + deps = [ + "//libspu/core:ndarray_ref", + "//libspu/core:prelude", + "//libspu/mpc/utils:ring_ops", + ], +) + +spu_cc_test( + name = "lowmc_test", + srcs = ["lowmc_test.cc"], + deps = [ + ":lowmc", + "//libspu/mpc/utils:ring_ops", + "@yacl//yacl/utils:elapsed_timer", + ], +) diff --git a/libspu/mpc/utils/lowmc.cc b/libspu/mpc/utils/lowmc.cc new file mode 100644 index 000000000..a4b37e500 --- /dev/null +++ b/libspu/mpc/utils/lowmc.cc @@ -0,0 +1,372 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/mpc/utils/lowmc.h" + +#include "libspu/mpc/utils/lowmc_utils.h" +#include "libspu/mpc/utils/ring_ops.h" + +namespace spu::mpc { + +namespace { + +template +bool get_bit(const T x, int i) { + return (x >> i) & (1); +} + +// Some linear algebra helper functions +uint64_t rank_of_matrix(const NdArrayRef& matrix) { + SPU_ENFORCE(matrix.shape().size() == 1, "matrix should be a 1-D array"); + + const auto n_rows = static_cast(matrix.numel()); + auto mat = matrix.clone(); + const auto field = mat.eltype().as()->field(); + + // Do Gaussian elimination, and count the non-zero rows + uint64_t row = 0; + + DISPATCH_ALL_FIELDS(field, [&]() { + using block_type = ring2k_t; + NdArrayView _mat(mat); + + // can be `block_size_` or `key_size_`, column size of matrix + const auto size = sizeof(block_type) * 8; + const auto max_rank = std::min(n_rows, size); + + // we try to transform matrix to its upper triangular form + for (uint64_t col = 1; col <= size; ++col) { + // if the pivot is zero, then find the first non-zero row and swap it + if (!get_bit(_mat[row], size - col)) { + uint64_t r = row; + while (r < n_rows && !get_bit(_mat[r], size - col)) { + ++r; + } + // all rows in this column are zero, skip it + if (r >= n_rows) { + continue; + } else { + auto temp = _mat[row]; + _mat[row] = _mat[r]; + _mat[r] = temp; + } + } + for (uint64_t i = row + 1; i < n_rows; ++i) { + if (get_bit(_mat[i], size - col)) { + _mat[i] ^= _mat[row]; + } + } + ++row; + if (row == max_rank) { + break; + } + } + }); + + return row; +} + +// Computing the inv of matrix without checking the rank of matrix by +// Gaussian elimination algorithm: [M | I] -> [I | inv(M)] +NdArrayRef invert_matrix(const NdArrayRef& matrix) { + SPU_ENFORCE(matrix.shape().size() == 1, "matrix should be a 1-D array"); + + const auto n_rows = static_cast(matrix.numel()); + auto mat = matrix.clone(); + const auto field = mat.eltype().as()->field(); + + auto inv_mat = NdArrayRef(matrix.eltype(), matrix.shape()); + + DISPATCH_ALL_FIELDS(field, [&]() { + using block_type = ring2k_t; + const auto size = sizeof(block_type) * 8; + SPU_ENFORCE(n_rows == size, "Not a square matrix."); + + NdArrayView _mat(mat); + NdArrayView _inv_mat(inv_mat); + + // init inv_mat as identity matrix + pforeach(0, n_rows, [&](int64_t idx) { // + _inv_mat[idx] = (static_cast(1) << idx); + }); + + // Transform to upper triangular matrix first + uint64_t row = 0; + for (uint64_t col = 0; col < size; ++col) { + // if the pivot is zero, then find the first non-zero row and swap it + if (!get_bit(_mat[row], col)) { + uint64_t r = row + 1; + while (r < n_rows && !get_bit(_mat[r], col)) { + ++r; + } + if (r >= n_rows) { + continue; + } else { + auto temp = _mat[row]; + _mat[row] = _mat[r]; + _mat[r] = temp; + + temp = _inv_mat[row]; + _inv_mat[row] = _inv_mat[r]; + _inv_mat[r] = temp; + } + } + for (uint64_t i = row + 1; i < n_rows; ++i) { + if (get_bit(_mat[i], col)) { + _mat[i] ^= _mat[row]; + _inv_mat[i] ^= _inv_mat[row]; + } + } + ++row; + } + + // Transform to identity matrix + for (uint64_t col = size; col > 0; --col) { + for (uint64_t r = 0; r < col - 1; ++r) { + if (get_bit(_mat[r], col - 1)) { + _mat[r] ^= _mat[col - 1]; + _inv_mat[r] ^= _inv_mat[col - 1]; + } + } + } + }); + + return inv_mat; +} + +} // namespace + +/// public api implementation + +LowMC::LowMC(FieldType field, uint128_t seed, int64_t d, uint64_t key_size, + bool need_decrypt) { + SPU_ENFORCE(key_size == 128, "key size should always be 128 now"); + + int64_t n_boxes; + int64_t rounds; + if (field == FM32) { + SPU_ENFORCE(d < 32, + "Support at most 2^32 blocks to encrypt for 32-bit blocks."); + // d=20 or d=30 has the same parameter setting. + n_boxes = 9; + rounds = 15; + } else if (field == FM64) { + switch (d) { + case 20: + n_boxes = 15; + rounds = 11; + break; + case 30: + n_boxes = 13; + rounds = 12; + break; + case 40: + n_boxes = 13; + rounds = 13; + break; + default: + SPU_THROW("Not supported data complexity."); + } + } else if (field == FM128) { + switch (d) { + case 20: + n_boxes = 25; + rounds = 10; + break; + case 30: + n_boxes = 25; + rounds = 11; + break; + case 40: + n_boxes = 25; + rounds = 12; + break; + default: + SPU_THROW("Not supported data complexity."); + } + } else { + SPU_THROW("Should not be here."); + } + + field_ = field; + seed_ = seed; + number_of_boxes_ = n_boxes; + rounds_ = rounds; + key_size_ = key_size; + need_decrypt_ = need_decrypt; + block_size_ = SizeOf(field) * 8; + SPU_ENFORCE(block_size_ <= 128, + "data size should be no more than 128 bits now."); + + // S-boxes of LowMC has 3 bits + identity_size_ = block_size_ - number_of_boxes_ * kSboxBits; + + // fill some key-irrelevant random matrixes + fill_matrixes(need_decrypt); +} + +void LowMC::set_key(KeyType key) { + if (key_been_set_) { + return; + } + + round_keys_ = generate_round_keys(key_matrices_, key, rounds_, field_); + key_been_set_ = true; +} + +NdArrayRef LowMC::encrypt(const NdArrayRef& plaintext) { + SPU_ENFORCE(key_been_set_, "key not set."); + SPU_ENFORCE(plaintext.eltype().as()->field() == field_, + "field mismatch"); + const auto& shape = plaintext.shape(); + + // 1. key whiten + auto c = ring_xor(plaintext, round_keys_[0].broadcast_to(shape, {})); + + // 2. round loop + for (uint64_t r = 1; r <= rounds_; r++) { + // S-boxes + c = Substitution(c, kSBox); + + // affine layer + c = dot_product_gf2(lin_matrices_[r - 1], c, field_); + ring_xor_(c, round_constants_[r - 1].broadcast_to(shape, {})); + + // round key xor + ring_xor_(c, round_keys_[r].broadcast_to(shape, {})); + } + + return c; +} + +NdArrayRef LowMC::decrypt(const NdArrayRef& ciphertext) { + SPU_ENFORCE(key_been_set_, "key not set."); + SPU_ENFORCE(ciphertext.eltype().as()->field() == field_, + "field mismatch"); + const auto& shape = ciphertext.shape(); + + // just the inverse procedure of encrypt + auto c = ciphertext; + for (uint64_t r = rounds_; r > 0; r--) { + ring_xor_(c, round_keys_[r].broadcast_to(shape, {})); + + ring_xor_(c, round_constants_[r - 1].broadcast_to(shape, {})); + c = dot_product_gf2(inv_lin_matrices_[r - 1], c, field_); + c = Substitution(c, kInvSBox); + } + + ring_xor_(c, round_keys_[0].broadcast_to(shape, {})); + + return c; +} + +/// private api implementation + +NdArrayRef LowMC::Substitution(const NdArrayRef& data, + absl::Span sbox) const { + NdArrayRef ret(data.eltype(), data.shape()); + + DISPATCH_ALL_FIELDS(ret.eltype().as()->field(), [&]() { + using block_type = ring2k_t; + NdArrayView _data(data); + NdArrayView _ret(ret); + + pforeach(0, data.numel(), [&](int64_t idx) { + block_type tmp = 0; + + // Get the identity part of the data + tmp ^= (_data[idx] >> (3 * number_of_boxes_)); + + // Get the rest through the Sboxes + for (uint64_t i = 1; i <= number_of_boxes_; ++i) { + tmp <<= 3; + auto ind = ((_data[idx] >> 3 * (number_of_boxes_ - i)) & 0x7); + tmp ^= static_cast(sbox[ind]); + } + + _ret[idx] = tmp; + }); + }); + + return ret; +} + +void LowMC::fill_matrixes(bool need_decrypt) { + // 1. create Lmatrixes + lin_matrices_.reserve(rounds_); + // -1 means no rank checking + int64_t desire_rank = -1; + if (need_decrypt) { + inv_lin_matrices_.reserve(rounds_); + // Note: we force block_size_ <= key_size_ = 128, so we can just use the + // same ranks for all Lmatices and key matrices. + desire_rank = block_size_; + } + + for (uint64_t i = 0; i < rounds_; i++) { + auto mat = get_pub_rand_blocks(field_, block_size_, desire_rank); + lin_matrices_.push_back(mat); + + if (need_decrypt) { + inv_lin_matrices_.push_back(invert_matrix(mat)); + } + } + + // 2. create round constants + round_constants_.reserve(rounds_); + for (uint64_t i = 0; i < rounds_; i++) { + round_constants_.push_back(get_pub_rand_blocks(field_, 1)); + } + + // 3. create key matrices + key_matrices_.reserve(rounds_ + 1); // first element is for initial whiten + for (uint64_t i = 0; i < rounds_ + 1; i++) { + // we force the key_size = 128 for safety consideration. + key_matrices_.push_back( + get_pub_rand_blocks(FM128, block_size_, desire_rank)); + } +} + +NdArrayRef LowMC::replay_ring_rand(FieldType field, const Shape& shape) { + NdArrayRef res(makeType(field), shape); + + cnt_ = yacl::crypto::FillPRand( + kCryptoType, seed_, iv_, cnt_, + absl::MakeSpan(res.data(), res.buf()->size())); + + return res; +} + +NdArrayRef LowMC::get_pub_rand_blocks(FieldType field, int64_t n_blocks, + int64_t desire_rank) { + const auto ring_ty = makeType(field); + auto rand = replay_ring_rand(field, {n_blocks}); + + // check the rank for the inverse process (debug only now) + if (desire_rank > 0) { + // The simple constant rounds algorithm to generate invertible or full + // row-rank matrixes: + // e.g. For nxn matrices M, we just fill M + // with random bits, and it's not hard to prove that: P(det(M) != 0) = + // (1-1/2) * (1-1/4) * ... * (1-1/2^n) ~= 0.2888 (when n->inf), so the + // expected repeat times are no more than 4. + while (rank_of_matrix(rand) != static_cast(desire_rank)) { + rand = replay_ring_rand(field, {n_blocks}); + } + } + + return rand.as(ring_ty); +} + +} // namespace spu::mpc diff --git a/libspu/mpc/utils/lowmc.h b/libspu/mpc/utils/lowmc.h new file mode 100644 index 000000000..dfd920bf5 --- /dev/null +++ b/libspu/mpc/utils/lowmc.h @@ -0,0 +1,132 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "yacl/crypto/tools/prg.h" + +#include "libspu/core/ndarray_ref.h" + +namespace spu::mpc { + +// ref: Ciphers for MPC and FHE +// https://eprint.iacr.org/2016/687.pdf +class LowMC { + using KeyType = uint128_t; // key size should always be 128, at least >= 80 + + public: + // To prevent the user to set the wrong parameters, we decide the inner + // parameters of LowMC insides. These parameters are deduced from the 5 + // attacks in the origin LowMC paper. + // Note: currently, we only support encryption functionality. + // + // d: data complexity, the log2 of the numbers of data_blocks + explicit LowMC(FieldType field, uint128_t seed, int64_t d, + uint64_t key_size = 128, bool need_decrypt = false); + + // plaintext set key procedure, debug only + void set_key(KeyType key); + + /// + /// encrypt/decrypt api for plaintext data, debug only now + /// + + NdArrayRef encrypt(const NdArrayRef& plaintext); + + NdArrayRef decrypt(const NdArrayRef& ciphertext); + + std::vector Lmat() const { return lin_matrices_; } + + std::vector RoundConstants() const { return round_constants_; } + + std::vector Kmat() const { return key_matrices_; } + + int64_t rounds() const { return rounds_; } + + int64_t number_of_boxes() const { return number_of_boxes_; } + + int64_t data_block_size() const { return block_size_; } + + private: + // utils functions + + // S-boxes implementation with lookup table + NdArrayRef Substitution(const NdArrayRef& data, + absl::Span sbox) const; + + // key filling functions + void fill_matrixes(bool need_decrypt); + + // random blocks helper functions + // generate public and replay rand array. + NdArrayRef replay_ring_rand(FieldType field, const Shape& shape); + + // Note: To save memory, we compress k bits into a single uint64_t + // or uint128_t number. So for n*k binary matrixes, we store it with an + // shape (n,) NdArrayRef, each element (k bits) is a row of matrix. + NdArrayRef get_pub_rand_blocks(FieldType field, int64_t n_blocks, + int64_t desire_rank = -1); + + // some meta infos of the lowmc + static constexpr int kSboxBits = 3; + uint64_t block_size_; // Data size in bits + FieldType field_; // field of data block + uint64_t number_of_boxes_; // Number of S-boxes in each round + uint64_t identity_size_; // Size of the identity part in the Sbox layer + uint64_t key_size_; // Key size in bits + uint64_t rounds_; + bool need_decrypt_; + bool key_been_set_ = false; + + // random values related + uint128_t seed_; // seed to generate random matrixes and keys + static constexpr yacl::crypto::SymmetricCrypto::CryptoType kCryptoType = + yacl::crypto::SymmetricCrypto::CryptoType::AES128_ECB; + uint128_t iv_ = 0; + uint64_t cnt_ = 0; + + // inner matrixes and keys + // Stores the binary matrices for each round. + // each array, shape: (block_size_,) + // each element is a ROW of matrix, i.e. block_size_ bits + std::vector lin_matrices_; + // Stores the round constants + // each array, shape: (1,) + // each element is block_size_ bits + std::vector round_constants_; + // Stores the matrices that generate the round keys + // each array, shape: (block_size_,) + // each element is a ROW of matrix, i.e. key_size_ bits + std::vector key_matrices_; + // Stores the round keys + // each array, shape: (1,) + // each element is block_size_ bits + std::vector round_keys_; + + // some matrixes for decrypt, valid only for testing + // Stores the inverses of LinMatrices + // each array, shape: (block_size_,) + // each element is a ROW of matrix, i.e. block_size_ bits + std::vector inv_lin_matrices_; + + // The Sbox and its inverse + // The plaintext implementations of the Sbox and its inverse are based on + // Look-Up tables. + static constexpr std::array kSBox = {0x00, 0x01, 0x03, 0x06, + 0x07, 0x04, 0x05, 0x02}; + static constexpr std::array kInvSBox = {0x00, 0x01, 0x07, 0x02, + 0x05, 0x06, 0x03, 0x04}; +}; + +} // namespace spu::mpc diff --git a/libspu/mpc/utils/lowmc_test.cc b/libspu/mpc/utils/lowmc_test.cc new file mode 100644 index 000000000..ae1bdbadb --- /dev/null +++ b/libspu/mpc/utils/lowmc_test.cc @@ -0,0 +1,76 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/mpc/utils/lowmc.h" + +#include "gtest/gtest.h" +#include "yacl/utils/elapsed_timer.h" + +#include "libspu/mpc/utils/ring_ops.h" + +namespace spu::mpc { + +TEST(LowMC, List) { + uint128_t seed = 107; + uint128_t key = 11; + int64_t d = 20; // data complexity + int64_t n = 100; + const Shape shape = {n, n}; + + // 64-bits block + { + FieldType field = FM64; + + yacl::ElapsedTimer pack_timer; + auto cipher = LowMC(field, seed, d, 128, true); + double init_time = pack_timer.CountMs(); + + cipher.set_key(key); + + auto values = ring_rand(field, shape); + + auto c = cipher.encrypt(values); + + auto p = cipher.decrypt(c); + + SPDLOG_INFO("{} blocks, {}-bits block, fill random {} ms", shape.numel(), + 64, init_time); + + EXPECT_TRUE(ring_all_equal(values, p)); + } + + // 128-bits block + { + FieldType field = FM128; + + yacl::ElapsedTimer pack_timer; + auto cipher = LowMC(field, seed, d, 128, true); + double init_time = pack_timer.CountMs(); + + cipher.set_key(key); + + auto values = ring_rand(field, shape); + + auto c = cipher.encrypt(values); + + auto p = cipher.decrypt(c); + + SPDLOG_INFO("{} blocks, {}-bits block, fill random {} ms", shape.numel(), + 128, init_time); + + EXPECT_TRUE(ring_all_equal(values, p)); + } +} + +} // namespace spu::mpc diff --git a/libspu/mpc/utils/lowmc_utils.cc b/libspu/mpc/utils/lowmc_utils.cc new file mode 100644 index 000000000..2354cf03d --- /dev/null +++ b/libspu/mpc/utils/lowmc_utils.cc @@ -0,0 +1,117 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "libspu/mpc/utils/lowmc_utils.h" + +#include "libspu/core/prelude.h" +#include "libspu/mpc/utils/ring_ops.h" + +namespace spu::mpc { + +namespace { + +template +constexpr T bit_parity(const T x) { + static_assert(std::is_unsigned_v); + + auto k = sizeof(T) * 8; + T ret = x; + while (k > 1) { + ret ^= (ret >> (k / 2)); + k /= 2; + } + + return ret & 1; +} + +} // namespace + +NdArrayRef dot_product_gf2(const NdArrayRef& x, const NdArrayRef& y, + FieldType to_field) { + // conceptually, x is an n*k binary matrix, y is a m*k binary matrix (y can + // be multi-dimension, we take 2-d as an example); + // ret is a m*n binary matrix, ret[i] = dot(x, y[i]); + // IMPORTANT: the field of (x,y) and ret may be different! + SPU_ENFORCE(x.elsize() == y.elsize(), "size mismatch"); + SPU_ENFORCE(x.shape().size() == 1, + "x should be a 1-D array, i.e. n*k binary matrix."); + + const auto field = x.eltype().as()->field(); + const auto n = x.shape().dim(0); + SPU_ENFORCE(SizeOf(to_field) * 8 == (uint64_t)n, + "mismatch of output bit size and type."); + + auto out = ring_zeros(to_field, y.shape()); + + DISPATCH_ALL_FIELDS(field, [&]() { + using src_type = ring2k_t; + + DISPATCH_ALL_FIELDS(to_field, [&]() { + using to_type = ring2k_t; + + NdArrayView _out(out); + + Index ind(1, 0); + for (int64_t i = 0; i < n; ++i) { + ind[0] = i; + const auto row = x.slice_scalar_at(ind).broadcast_to(y.shape(), {}); + auto prod = ring_and(y, row); + NdArrayView _prod(prod); + + pforeach(0, out.numel(), [&](int64_t idx) { // + _out[idx] = + _out[idx] | (static_cast(bit_parity(_prod[idx])) << i); + }); + } + }); + }); + + return out; +} + +std::vector generate_round_keys( + const std::vector& key_matrices, uint128_t key, uint64_t rounds, + FieldType to_field) { + SPU_ENFORCE(key_matrices.size() == (rounds + 1), "key matrix size mismatch"); + + NdArrayRef master_key(makeType(FM128), {1}); + NdArrayView _master_key(master_key); + _master_key[0] = key; + + std::vector round_keys; + round_keys.reserve(rounds + 1); + // round keys has rounds + 1 elements, the first one is for initial whiten + for (uint64_t i = 0; i <= rounds; ++i) { + round_keys.push_back( + dot_product_gf2(key_matrices[i], master_key, to_field)); + } + + return round_keys; +} + +int64_t get_data_complexity(int64_t n) { + const auto n_bits = Log2Ceil(n); + + if (n_bits <= 20) { + return 20; + } else if (n_bits <= 30) { + return 30; + } else if (n_bits <= 40) { + return 40; + } + + SPU_THROW("Support at most 2^40 now."); +} + +} // namespace spu::mpc diff --git a/libspu/mpc/utils/lowmc_utils.h b/libspu/mpc/utils/lowmc_utils.h new file mode 100644 index 000000000..c0bd71c21 --- /dev/null +++ b/libspu/mpc/utils/lowmc_utils.h @@ -0,0 +1,43 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "libspu/core/ndarray_ref.h" + +namespace spu::mpc { + +// recap: For n*k binary matrix, we regard it as a (n,) NdArrayRef, each +// element is a row (k bits). +// For n*k binary matrix A, k-bits binary vector B, n-bits C = dot(A, B): +// C[r] = bit_parity(A[r] & B) for r in [0, n) +NdArrayRef dot_product_gf2(const NdArrayRef& x, const NdArrayRef& y, + FieldType to_field); + +// Key is strongly dependent on the sharing semantics, so we leave the key +// setting procedure in kernel layer. +// Here we implement the plaintext scheme, which can also be used in n-n +// xor sharing semantics (e.g. SEMI2K, CHEETAH, etc.). +// For ABY3, can call this function twice to get two sharing of the round +// keys. +std::vector generate_round_keys( + const std::vector& key_matrices, uint128_t key, uint64_t rounds, + FieldType to_field); + +// we only support three choices for data complexity now. +// n <= 2^20 (about 1 million); n <= 2^30 (about 1 billion); n <= 2^40 +// (about 1 trillion) +int64_t get_data_complexity(int64_t n); + +} // namespace spu::mpc diff --git a/libspu/mpc/utils/permute.cc b/libspu/mpc/utils/permute.cc index 62a8e85a3..800fa1dfa 100644 --- a/libspu/mpc/utils/permute.cc +++ b/libspu/mpc/utils/permute.cc @@ -20,6 +20,7 @@ #include "yacl/crypto/rand/rand.h" #include "libspu/core/ndarray_ref.h" +#include "libspu/core/parallel_utils.h" #include "libspu/core/type_util.h" namespace spu::mpc { @@ -44,9 +45,7 @@ NdArrayRef applyInvPerm(const NdArrayRef& x, absl::Span pv) { DISPATCH_ALL_FIELDS(field, [&]() { NdArrayView _x(x); NdArrayView _y(y); - for (int64_t i = 0; i < y.numel(); i++) { - _y[pv[i]] = _x[i]; - } + pforeach(0, y.numel(), [&](int64_t i) { _y[pv[i]] = _x[i]; }); }); return y; } @@ -63,9 +62,7 @@ NdArrayRef applyInvPerm(const NdArrayRef& x, const NdArrayRef& pv) { const auto pv_field = pv.eltype().as()->field(); DISPATCH_ALL_FIELDS(pv_field, [&]() { NdArrayView _pv(pv); - for (int64_t i = 0; i < y.numel(); i++) { - _y[_pv[i]] = _x[i]; - } + pforeach(0, y.numel(), [&](int64_t i) { _y[_pv[i]] = _x[i]; }); }); }); return y; @@ -79,9 +76,7 @@ NdArrayRef applyPerm(const NdArrayRef& x, absl::Span pv) { DISPATCH_ALL_FIELDS(field, [&]() { NdArrayView _x(x); NdArrayView _y(y); - for (int64_t i = 0; i < y.numel(); i++) { - _y[i] = _x[pv[i]]; - } + pforeach(0, y.numel(), [&](int64_t i) { _y[i] = _x[pv[i]]; }); }); return y; } @@ -98,9 +93,7 @@ NdArrayRef applyPerm(const NdArrayRef& x, const NdArrayRef& pv) { const auto pv_field = pv.eltype().as()->field(); DISPATCH_ALL_FIELDS(pv_field, [&]() { NdArrayView _pv(pv); - for (int64_t i = 0; i < y.numel(); i++) { - _y[i] = _x[_pv[i]]; - } + pforeach(0, y.numel(), [&](int64_t i) { _y[i] = _x[_pv[i]]; }); }); }); return y; @@ -112,9 +105,7 @@ NdArrayRef genInversePerm(const NdArrayRef& perm) { DISPATCH_ALL_FIELDS(field, [&]() { NdArrayView _ret(ret); NdArrayView _perm(perm); - for (int64_t i = 0; i < perm.numel(); ++i) { - _ret[_perm[i]] = ring2k_t(i); - } + pforeach(0, perm.numel(), [&](int64_t i) { _ret[_perm[i]] = ring2k_t(i); }); }); return ret; } diff --git a/libspu/spu.proto b/libspu/spu.proto index a9050c5df..5a4adbe2e 100644 --- a/libspu/spu.proto +++ b/libspu/spu.proto @@ -80,13 +80,13 @@ enum PtType { PT_I128 = 9; // int128_t PT_U128 = 10; // uint128_t PT_I1 = 11; // bool - // - PT_F16 = 30; // half - PT_F32 = 31; // float - PT_F64 = 32; // double - // - PT_CF32 = 50; // complex float - PT_CF64 = 51; // complex double + + PT_F16 = 30; // half + PT_F32 = 31; // float + PT_F64 = 32; // double + + PT_CF32 = 50; // complex float + PT_CF64 = 51; // complex double } // A security parameter type. @@ -228,6 +228,23 @@ message RuntimeConfig { // default: 128 * 1024 * 1024 uint64 share_max_chunk_size = 20; + enum SortMethod { + SORT_DEFAULT = 0; // Implementation defined. + SORT_RADIX = 1; // The radix sort (stable sort, need efficient shuffle). + SORT_QUICK = 2; // The quick sort (unstable, need efficient shuffle). + SORT_NETWORK = 3; // The odd-even sorting network (unstable, most general). + } + + // SPU supports multiple sorting algorithms. + // -for 2pc, only sorting network is supported. + // -for 2.5pc or 3pc, all these algorithms are supported. + // -for stable sort, only radix sort is supported. + SortMethod sort_method = 21; + + // threshold for quick sort, when the size of the array is less than this + // value, use merge sort instead + int64 quick_sort_threshold = 22; + // @exclude // Fixed-point arithmetic related, reserved for [50, 100) @@ -353,6 +370,20 @@ message RuntimeConfig { bool experimental_exp_prime_enable_upper_bound = 109; } +message ClientSSLConfig { + // Certificate in PEM format, supported both file path and raw string + string certificate = 1; + // Private key in PEM format, supported both file path and raw string based on + // prefix + string private_key = 2; + // The trusted CA file to verify the peer's certificate + // If empty, use the system default CA files + string ca_file_path = 3; + // Maximum depth of the certificate chain for verification + // If 0, turn off the verification + int32 verify_depth = 4; +} + message TTPBeaverConfig { // TrustedThirdParty beaver server's remote ip:port or load-balance uri. string server_host = 1; @@ -363,10 +394,14 @@ message TTPBeaverConfig { // asym_crypto_schema: support ["SM2"] // Will support 25519 in the future, after yacl supported it. string asym_crypto_schema = 3; - // server's public key - bytes server_public_key = 4; + // Server's public key in PEM format + string server_public_key = 4; + + // Transport protocol, support ["http", "h2"] + string transport_protocol = 5; - // TODO: TLS & brpc options. + // Configurations related to SSL + ClientSSLConfig ssl_config = 6; } enum CheetahOtKind { diff --git a/libspu/version.h b/libspu/version.h.in similarity index 94% rename from libspu/version.h rename to libspu/version.h.in index 8ab54eea4..25251c30f 100644 --- a/libspu/version.h +++ b/libspu/version.h.in @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#define SPU_VERSION "0.9.4.dev$$DATE$$" +#define SPU_VERSION "@SPU_VERSION@" #include diff --git a/requirements-dev.txt b/requirements-dev.txt index 33bcd4da8..6ba68f151 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,12 @@ +# basic deps +grpcio==1.66.0 +numpy>=1.22.0, <2 # FIXME: for SF compatibility +protobuf==5.27.3 +cloudpickle>=2.0.0 +multiprocess>=0.70.12.2 +cachetools>=5.0.0 +jax[cpu]>=0.4.16, <=0.4.34 # FIXME: Jax 0.4.26+ select perf issue +termcolor>=2.0.0 pandas>=1.4.2 flax<0.10.0 scikit-learn<1.6.0 @@ -6,3 +15,17 @@ absl-py>=1.1.0 tensorflow-cpu>=2.12.0; sys_platform == "linux" and platform_machine == 'x86_64' tensorflow>=2.12.0; sys_platform != "linux" or platform_machine != 'x86_64' h5py!=3.11.0; platform_machine == 'aarch64' + +# for examples +dm-haiku +plotnine +jraph +optax +torch==2.3.0 +torch_xla==2.3.0; sys_platform == "linux" and platform_machine == 'x86_64' +torchvision +tensorflow_datasets +keras +setuptools<71.0.0 # https://github.com/pypa/setuptools/issues/4487 +transformers +datasets diff --git a/requirements.txt b/requirements.txt index 2c52e8279..dc76ff0fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -grpcio>=1.42.0,!=1.48.0 +grpcio==1.66.0 numpy>=1.22.0, <2 # FIXME: for SF compatibility -protobuf>=4, <5 +protobuf==5.27.3 cloudpickle>=2.0.0 multiprocess>=0.70.12.2 cachetools>=5.0.0 diff --git a/requirements_dev_lock.txt b/requirements_dev_lock.txt new file mode 100644 index 000000000..d0f118830 --- /dev/null +++ b/requirements_dev_lock.txt @@ -0,0 +1,2732 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# bazel run //:requirements-dev.update +# +absl-py==2.1.0 \ + --hash=sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308 \ + --hash=sha256:7820790efbb316739cde8b4e19357243fc3608a152024288513dd968d7d959ff + # via + # -r requirements-dev.txt + # array-record + # chex + # dm-haiku + # etils + # keras + # optax + # orbax-checkpoint + # tensorboard + # tensorflow-cpu + # tensorflow-datasets + # tensorflow-metadata + # torch-xla +aiohappyeyeballs==2.4.4 \ + --hash=sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745 \ + --hash=sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8 + # via aiohttp +aiohttp==3.11.11 \ + --hash=sha256:0882c2820fd0132240edbb4a51eb8ceb6eef8181db9ad5291ab3332e0d71df5f \ + --hash=sha256:0a6d3fbf2232e3a08c41eca81ae4f1dff3d8f1a30bae415ebe0af2d2458b8a33 \ + --hash=sha256:0b7fb429ab1aafa1f48578eb315ca45bd46e9c37de11fe45c7f5f4138091e2f1 \ + --hash=sha256:0eb98d90b6690827dcc84c246811feeb4e1eea683c0eac6caed7549be9c84665 \ + --hash=sha256:0fd82b8e9c383af11d2b26f27a478640b6b83d669440c0a71481f7c865a51da9 \ + --hash=sha256:10b4ff0ad793d98605958089fabfa350e8e62bd5d40aa65cdc69d6785859f94e \ + --hash=sha256:1642eceeaa5ab6c9b6dfeaaa626ae314d808188ab23ae196a34c9d97efb68350 \ + --hash=sha256:1dac54e8ce2ed83b1f6b1a54005c87dfed139cf3f777fdc8afc76e7841101226 \ + --hash=sha256:1e69966ea6ef0c14ee53ef7a3d68b564cc408121ea56c0caa2dc918c1b2f553d \ + --hash=sha256:1f21bb8d0235fc10c09ce1d11ffbd40fc50d3f08a89e4cf3a0c503dc2562247a \ + --hash=sha256:2170816e34e10f2fd120f603e951630f8a112e1be3b60963a1f159f5699059a6 \ + --hash=sha256:21fef42317cf02e05d3b09c028712e1d73a9606f02467fd803f7c1f39cc59add \ + --hash=sha256:249cc6912405917344192b9f9ea5cd5b139d49e0d2f5c7f70bdfaf6b4dbf3a2e \ + --hash=sha256:3499c7ffbfd9c6a3d8d6a2b01c26639da7e43d47c7b4f788016226b1e711caa8 \ + --hash=sha256:3af41686ccec6a0f2bdc66686dc0f403c41ac2089f80e2214a0f82d001052c03 \ + --hash=sha256:3e23419d832d969f659c208557de4a123e30a10d26e1e14b73431d3c13444c2e \ + --hash=sha256:3ea1b59dc06396b0b424740a10a0a63974c725b1c64736ff788a3689d36c02d2 \ + --hash=sha256:44167fc6a763d534a6908bdb2592269b4bf30a03239bcb1654781adf5e49caf1 \ + --hash=sha256:479b8c6ebd12aedfe64563b85920525d05d394b85f166b7873c8bde6da612f9c \ + --hash=sha256:4af57160800b7a815f3fe0eba9b46bf28aafc195555f1824555fa2cfab6c1538 \ + --hash=sha256:4b4fa1cb5f270fb3eab079536b764ad740bb749ce69a94d4ec30ceee1b5940d5 \ + --hash=sha256:4eed954b161e6b9b65f6be446ed448ed3921763cc432053ceb606f89d793927e \ + --hash=sha256:541d823548ab69d13d23730a06f97460f4238ad2e5ed966aaf850d7c369782d9 \ + --hash=sha256:568c1236b2fde93b7720f95a890741854c1200fba4a3471ff48b2934d2d93fd3 \ + --hash=sha256:5854be2f3e5a729800bac57a8d76af464e160f19676ab6aea74bde18ad19d438 \ + --hash=sha256:620598717fce1b3bd14dd09947ea53e1ad510317c85dda2c9c65b622edc96b12 \ + --hash=sha256:6526e5fb4e14f4bbf30411216780c9967c20c5a55f2f51d3abd6de68320cc2f3 \ + --hash=sha256:6fba278063559acc730abf49845d0e9a9e1ba74f85f0ee6efd5803f08b285853 \ + --hash=sha256:70d1f9dde0e5dd9e292a6d4d00058737052b01f3532f69c0c65818dac26dc287 \ + --hash=sha256:731468f555656767cda219ab42e033355fe48c85fbe3ba83a349631541715ba2 \ + --hash=sha256:81b8fe282183e4a3c7a1b72f5ade1094ed1c6345a8f153506d114af5bf8accd9 \ + --hash=sha256:84a585799c58b795573c7fa9b84c455adf3e1d72f19a2bf498b54a95ae0d194c \ + --hash=sha256:85992ee30a31835fc482468637b3e5bd085fa8fe9392ba0bdcbdc1ef5e9e3c55 \ + --hash=sha256:8811f3f098a78ffa16e0ea36dffd577eb031aea797cbdba81be039a4169e242c \ + --hash=sha256:88a12ad8ccf325a8a5ed80e6d7c3bdc247d66175afedbe104ee2aaca72960d8e \ + --hash=sha256:8be8508d110d93061197fd2d6a74f7401f73b6d12f8822bbcd6d74f2b55d71b1 \ + --hash=sha256:8e2bf8029dbf0810c7bfbc3e594b51c4cc9101fbffb583a3923aea184724203c \ + --hash=sha256:929f3ed33743a49ab127c58c3e0a827de0664bfcda566108989a14068f820194 \ + --hash=sha256:92cde43018a2e17d48bb09c79e4d4cb0e236de5063ce897a5e40ac7cb4878773 \ + --hash=sha256:92fc484e34b733704ad77210c7957679c5c3877bd1e6b6d74b185e9320cc716e \ + --hash=sha256:943a8b052e54dfd6439fd7989f67fc6a7f2138d0a2cf0a7de5f18aa4fe7eb3b1 \ + --hash=sha256:9d73ee3725b7a737ad86c2eac5c57a4a97793d9f442599bea5ec67ac9f4bdc3d \ + --hash=sha256:9f5b3c1ed63c8fa937a920b6c1bec78b74ee09593b3f5b979ab2ae5ef60d7600 \ + --hash=sha256:9fd46ce0845cfe28f108888b3ab17abff84ff695e01e73657eec3f96d72eef34 \ + --hash=sha256:a344d5dc18074e3872777b62f5f7d584ae4344cd6006c17ba12103759d407af3 \ + --hash=sha256:a60804bff28662cbcf340a4d61598891f12eea3a66af48ecfdc975ceec21e3c8 \ + --hash=sha256:a8f5f7515f3552d899c61202d99dcb17d6e3b0de777900405611cd747cecd1b8 \ + --hash=sha256:a9b7371665d4f00deb8f32208c7c5e652059b0fda41cf6dbcac6114a041f1cc2 \ + --hash=sha256:aa54f8ef31d23c506910c21163f22b124facb573bff73930735cf9fe38bf7dff \ + --hash=sha256:aba807f9569455cba566882c8938f1a549f205ee43c27b126e5450dc9f83cc62 \ + --hash=sha256:ae545f31489548c87b0cced5755cfe5a5308d00407000e72c4fa30b19c3220ac \ + --hash=sha256:af01e42ad87ae24932138f154105e88da13ce7d202a6de93fafdafb2883a00ef \ + --hash=sha256:b540bd67cfb54e6f0865ceccd9979687210d7ed1a1cc8c01f8e67e2f1e883d28 \ + --hash=sha256:b6212a60e5c482ef90f2d788835387070a88d52cf6241d3916733c9176d39eab \ + --hash=sha256:b63de12e44935d5aca7ed7ed98a255a11e5cb47f83a9fded7a5e41c40277d104 \ + --hash=sha256:ba74ec819177af1ef7f59063c6d35a214a8fde6f987f7661f4f0eecc468a8f76 \ + --hash=sha256:bb49c7f1e6ebf3821a42d81d494f538107610c3a705987f53068546b0e90303e \ + --hash=sha256:bd176afcf8f5d2aed50c3647d4925d0db0579d96f75a31e77cbaf67d8a87742d \ + --hash=sha256:bd7227b87a355ce1f4bf83bfae4399b1f5bb42e0259cb9405824bd03d2f4336a \ + --hash=sha256:bf8d9bfee991d8acc72d060d53860f356e07a50f0e0d09a8dfedea1c554dd0d5 \ + --hash=sha256:bfde76a8f430cf5c5584553adf9926534352251d379dcb266ad2b93c54a29745 \ + --hash=sha256:c341c7d868750e31961d6d8e60ff040fb9d3d3a46d77fd85e1ab8e76c3e9a5c4 \ + --hash=sha256:c7a06301c2fb096bdb0bd25fe2011531c1453b9f2c163c8031600ec73af1cc99 \ + --hash=sha256:cb23d8bb86282b342481cad4370ea0853a39e4a32a0042bb52ca6bdde132df43 \ + --hash=sha256:d119fafe7b634dbfa25a8c597718e69a930e4847f0b88e172744be24515140da \ + --hash=sha256:d40f9da8cabbf295d3a9dae1295c69975b86d941bc20f0a087f0477fa0a66231 \ + --hash=sha256:d6c9af134da4bc9b3bd3e6a70072509f295d10ee60c697826225b60b9959acdd \ + --hash=sha256:dd7659baae9ccf94ae5fe8bfaa2c7bc2e94d24611528395ce88d009107e00c6d \ + --hash=sha256:de8d38f1c2810fa2a4f1d995a2e9c70bb8737b18da04ac2afbf3971f65781d87 \ + --hash=sha256:e595c591a48bbc295ebf47cb91aebf9bd32f3ff76749ecf282ea7f9f6bb73886 \ + --hash=sha256:ec2aa89305006fba9ffb98970db6c8221541be7bee4c1d027421d6f6df7d1ce2 \ + --hash=sha256:ec82bf1fda6cecce7f7b915f9196601a1bd1a3079796b76d16ae4cce6d0ef89b \ + --hash=sha256:ed9ee95614a71e87f1a70bc81603f6c6760128b140bc4030abe6abaa988f1c3d \ + --hash=sha256:f047569d655f81cb70ea5be942ee5d4421b6219c3f05d131f64088c73bb0917f \ + --hash=sha256:ffa336210cf9cd8ed117011085817d00abe4c08f99968deef0013ea283547204 \ + --hash=sha256:ffb3dc385f6bb1568aa974fe65da84723210e5d9707e360e9ecb51f59406cd2e + # via + # datasets + # fsspec +aiosignal==1.3.2 \ + --hash=sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5 \ + --hash=sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54 + # via aiohttp +array-record==0.6.0 \ + --hash=sha256:035575c271461f26a0684db5e3b65a487233d0921880933f680e7aeb86130a39 \ + --hash=sha256:1ea2596fb8bf19eade5e8c2d0dce9c4dc6a9d14222551863d32238f7e5754afe \ + --hash=sha256:370cf9bdcdaab7537e897aae017ea607f75ac33378991d2fbb1e52b1fedb2bcf \ + --hash=sha256:4c85df128819191a4f85937ab390f59f181ab7b6183626e5d0f5ecab47ecb022 \ + --hash=sha256:5338900974e2f10b3021b874a4f226783ffdbb0be76c931363a557336d33e478 \ + --hash=sha256:af81f6ae5404a42962b96f4efacd9a9b098cb2eeddae068cde9be0b8bfbfc457 \ + --hash=sha256:b28be32f7c81db3ec17d343899a6b5b8ae19f6d6e650448b8044de65774fa3e5 \ + --hash=sha256:c418b2b83410c630e6662d4ce0156e4e5120ee27ea9ed7672dd87c9cda39a060 \ + --hash=sha256:c51b53b90c7d4035ae94e8b265196925e6c5f5673aa35e04874aecca78656de3 + # via tensorflow-datasets +astunparse==1.6.3 \ + --hash=sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872 \ + --hash=sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8 + # via tensorflow-cpu +attrs==24.3.0 \ + --hash=sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff \ + --hash=sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308 + # via aiohttp +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via + # -r requirements-dev.txt + # google-auth +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via requests +charset-normalizer==3.4.1 \ + --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ + --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ + --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ + --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ + --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ + --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ + --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ + --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ + --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ + --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ + --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ + --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ + --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ + --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ + --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ + --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ + --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ + --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ + --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ + --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ + --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ + --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ + --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ + --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ + --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ + --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ + --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ + --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ + --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ + --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ + --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ + --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ + --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ + --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ + --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ + --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ + --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ + --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ + --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ + --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ + --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ + --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ + --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ + --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ + --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ + --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ + --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ + --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ + --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ + --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ + --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ + --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ + --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ + --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ + --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ + --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ + --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ + --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ + --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ + --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ + --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ + --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ + --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ + --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ + --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ + --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ + --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ + --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ + --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ + --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ + --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ + --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ + --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ + --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ + --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ + --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ + --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ + --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ + --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ + --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ + --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ + --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ + --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ + --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ + --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ + --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ + --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ + --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ + --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ + --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ + --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ + --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 + # via requests +chex==0.1.88 \ + --hash=sha256:234b61a5baa8132802e4b9c5657167d6c8a911d90a59a0bec47d537567e41b75 \ + --hash=sha256:565de897b1373232cdfca5e699f50fa49403d2c7d23f6c5a75a97ef713d2fe36 + # via optax +click==8.1.8 \ + --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ + --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a + # via tensorflow-datasets +cloud-tpu-client==0.10 \ + --hash=sha256:e3ee7a0a69c3fdbfc82826d86762f24e43bfcd6096af047185708fe062c7f849 + # via torch-xla +cloudpickle==3.1.0 \ + --hash=sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b \ + --hash=sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e + # via -r requirements-dev.txt +contourpy==1.3.1 \ + --hash=sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1 \ + --hash=sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda \ + --hash=sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d \ + --hash=sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509 \ + --hash=sha256:113231fe3825ebf6f15eaa8bc1f5b0ddc19d42b733345eae0934cb291beb88b6 \ + --hash=sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f \ + --hash=sha256:174e758c66bbc1c8576992cec9599ce8b6672b741b5d336b5c74e35ac382b18e \ + --hash=sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751 \ + --hash=sha256:19d40d37c1c3a4961b4619dd9d77b12124a453cc3d02bb31a07d58ef684d3d86 \ + --hash=sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b \ + --hash=sha256:20914c8c973f41456337652a6eeca26d2148aa96dd7ac323b74516988bea89fc \ + --hash=sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546 \ + --hash=sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec \ + --hash=sha256:31c1b55c1f34f80557d3830d3dd93ba722ce7e33a0b472cba0ec3b6535684d8f \ + --hash=sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82 \ + --hash=sha256:3a04ecd68acbd77fa2d39723ceca4c3197cb2969633836ced1bea14e219d077c \ + --hash=sha256:3e8b974d8db2c5610fb4e76307e265de0edb655ae8169e8b21f41807ccbeec4b \ + --hash=sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c \ + --hash=sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c \ + --hash=sha256:44a29502ca9c7b5ba389e620d44f2fbe792b1fb5734e8b931ad307071ec58c53 \ + --hash=sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80 \ + --hash=sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242 \ + --hash=sha256:4dbbc03a40f916a8420e420d63e96a1258d3d1b58cbdfd8d1f07b49fcbd38e85 \ + --hash=sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124 \ + --hash=sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5 \ + --hash=sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2 \ + --hash=sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3 \ + --hash=sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d \ + --hash=sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc \ + --hash=sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342 \ + --hash=sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1 \ + --hash=sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1 \ + --hash=sha256:974d8145f8ca354498005b5b981165b74a195abfae9a8129df3e56771961d595 \ + --hash=sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30 \ + --hash=sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab \ + --hash=sha256:a0cffcbede75c059f535725c1680dfb17b6ba8753f0c74b14e6a9c68c29d7ea3 \ + --hash=sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2 \ + --hash=sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd \ + --hash=sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7 \ + --hash=sha256:ab29962927945d89d9b293eabd0d59aea28d887d4f3be6c22deaefbb938a7277 \ + --hash=sha256:abbb49fb7dac584e5abc6636b7b2a7227111c4f771005853e7d25176daaf8453 \ + --hash=sha256:ac4578ac281983f63b400f7fe6c101bedc10651650eef012be1ccffcbacf3697 \ + --hash=sha256:adce39d67c0edf383647a3a007de0a45fd1b08dedaa5318404f1a73059c2512b \ + --hash=sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454 \ + --hash=sha256:b2bdca22a27e35f16794cf585832e542123296b4687f9fd96822db6bae17bfc9 \ + --hash=sha256:b2f926efda994cdf3c8d3fdb40b9962f86edbc4457e739277b961eced3d0b4c1 \ + --hash=sha256:b457d6430833cee8e4b8e9b6f07aa1c161e5e0d52e118dc102c8f9bd7dd060d6 \ + --hash=sha256:c414fc1ed8ee1dbd5da626cf3710c6013d3d27456651d156711fa24f24bd1291 \ + --hash=sha256:cb76c1a154b83991a3cbbf0dfeb26ec2833ad56f95540b442c73950af2013750 \ + --hash=sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699 \ + --hash=sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e \ + --hash=sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81 \ + --hash=sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9 \ + --hash=sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375 + # via matplotlib +cycler==0.12.1 \ + --hash=sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30 \ + --hash=sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c + # via matplotlib +datasets==2.2.1 \ + --hash=sha256:1938f3e99599422de50b9b54fe802aca854ed130382dab0b3820c821f7ae6d5e \ + --hash=sha256:d362717c4394589b516c8f397ff20a6fe720454aed877ab61d06f3bc05df9544 + # via -r requirements-dev.txt +dill==0.3.9 \ + --hash=sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a \ + --hash=sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c + # via + # datasets + # multiprocess +dm-haiku==0.0.13 \ + --hash=sha256:029bb91b5b1edb0d3fe23304d3bf12a545ea6e485041f7f5d8c8d85ebcf6e17d \ + --hash=sha256:ee9562c68a059f146ad07f555ca591cb8c11ef751afecc38353863562bd23f43 + # via -r requirements-dev.txt +dm-tree==0.1.8 \ + --hash=sha256:054b461f8176f4bce7a21f7b1870f873a1ced3bdbe1282c816c550bb43c71fa6 \ + --hash=sha256:09964470f76a5201aff2e8f9b26842976de7889300676f927930f6285e256760 \ + --hash=sha256:0d3172394079a86c3a759179c65f64c48d1a42b89495fcf38976d11cc3bb952c \ + --hash=sha256:0e9620ccf06393eb6b613b5e366469304622d4ea96ae6540b28a33840e6c89cf \ + --hash=sha256:0fcaabbb14e7980377439e7140bd05552739ca5e515ecb3119f234acee4b9430 \ + --hash=sha256:1607ce49aa42f010d1e5e616d92ce899d66835d4d8bea49679582435285515de \ + --hash=sha256:181c35521d480d0365f39300542cb6cd7fd2b77351bb43d7acfda15aef63b317 \ + --hash=sha256:1d7c26e431fc93cc7e0cba867eb000db6a05f6f2b25af11ac4e9dada88fc5bca \ + --hash=sha256:1fe962015b2fe1282892b28ebe962faed53c7f98d942da9a4625cbf27baef913 \ + --hash=sha256:250b692fb75f45f02e2f58fbef9ab338904ef334b90557565621fa251df267cf \ + --hash=sha256:2869228d9c619074de501a3c10dc7f07c75422f8fab36ecdcb859b6f1b1ec3ef \ + --hash=sha256:28c52cbf4f8b3dbd0beaedf44f69fa85eec5e9dede612e08035e06ada6ec9426 \ + --hash=sha256:2f7915660f59c09068e428613c480150180df1060561fd0d1470684ae7007bd1 \ + --hash=sha256:343a4a4ebaa127451ff971254a4be4084eb4bdc0b2513c32b46f6f728fd03f9e \ + --hash=sha256:35cc164a79336bfcfafb47e5f297898359123bbd3330c1967f0c4994f9cf9f60 \ + --hash=sha256:378cc8ad93c5fe3590f405a309980721f021c790ca1bdf9b15bb1d59daec57f5 \ + --hash=sha256:39070ba268c0491af9fe7a58644d99e8b4f2cde6e5884ba3380bddc84ed43d5f \ + --hash=sha256:435227cf3c5dc63f4de054cf3d00183790bd9ead4c3623138c74dde7f67f521b \ + --hash=sha256:5483dca4d7eb1a0d65fe86d3b6a53ae717face83c1f17e0887b1a4a64ae5c410 \ + --hash=sha256:694c3654cfd2a81552c08ec66bb5c4a3d48fa292b9a181880fb081c36c5b9134 \ + --hash=sha256:75c5d528bb992981c20793b6b453e91560784215dffb8a5440ba999753c14ceb \ + --hash=sha256:803bfc53b4659f447ac694dbd04235f94a73ef7c1fd1e0df7c84ac41e0bc963b \ + --hash=sha256:81fce77f22a302d7a5968aebdf4efafef4def7ce96528719a354e6990dcd49c7 \ + --hash=sha256:83b7764de0d855338abefc6e3ee9fe40d301668310aa3baea3f778ff051f4393 \ + --hash=sha256:8c60a7eadab64c2278861f56bca320b2720f163dca9d7558103c3b77f2416571 \ + --hash=sha256:8ed3564abed97c806db122c2d3e1a2b64c74a63debe9903aad795167cc301368 \ + --hash=sha256:94d3f0826311f45ee19b75f5b48c99466e4218a0489e81c0f0167bda50cacf22 \ + --hash=sha256:96a548a406a6fb15fe58f6a30a57ff2f2aafbf25f05afab00c8f5e5977b6c715 \ + --hash=sha256:a5d819c38c03f0bb5b3b3703c60e4b170355a0fc6b5819325bf3d4ceb3ae7e80 \ + --hash=sha256:ad16ceba90a56ec47cf45b21856d14962ac314787975ef786efb5e6e9ca75ec7 \ + --hash=sha256:af4b3d372f2477dcd89a6e717e4a575ca35ccc20cc4454a8a4b6f8838a00672d \ + --hash=sha256:b095ba4f8ca1ba19350fd53cf1f8f3eb0bd406aa28af64a6dfc86707b32a810a \ + --hash=sha256:b9bd9b9ccb59409d33d51d84b7668010c04c2af7d4a371632874c1ca356cff3d \ + --hash=sha256:b9f89a454e98806b44fe9d40ec9eee61f848388f7e79ac2371a55679bd5a3ac6 \ + --hash=sha256:bb2d109f42190225112da899b9f3d46d0d5f26aef501c61e43529fe9322530b5 \ + --hash=sha256:c0a94aba18a35457a1b5cd716fd7b46c5dafdc4cf7869b4bae665b91c4682a8e \ + --hash=sha256:c5c8c12e3fda754ef6af94161bacdaeda816d941995fac415d6855c6c386af68 \ + --hash=sha256:d1612fcaecd79023dbc6a6ae48d51a80beb5c385d6f3f6d71688e57bc8d07de8 \ + --hash=sha256:d16e1f2a073604cfcc09f7131ae8d534674f43c3aef4c25742eae295bc60d04f \ + --hash=sha256:d20f2faa3672b52e5013f4077117bfb99c4cfc0b445d3bde1584c34032b57436 \ + --hash=sha256:d40fa4106ca6edc66760246a08f500ec0c85ef55c762fb4a363f6ee739ba02ee \ + --hash=sha256:de287fabc464b8734be251e46e06aa9aa1001f34198da2b6ce07bd197172b9cb \ + --hash=sha256:e4d714371bb08839e4e5e29024fc95832d9affe129825ef38836b143028bd144 \ + --hash=sha256:ea9e59e0451e7d29aece402d9f908f2e2a80922bcde2ebfd5dcb07750fcbfee8 \ + --hash=sha256:f7ac31b9aecccb2c6e1ab29706f6ded3eba0c2c69c770322c9c685929c3d6afb \ + --hash=sha256:fa42a605d099ee7d41ba2b5fb75e21423951fd26e5d50583a00471238fb3021d + # via tensorflow-datasets +docstring-parser==0.16 \ + --hash=sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e \ + --hash=sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637 + # via simple-parsing +etils[array-types,edc,enp,epath,epy,etqdm,etree]==1.11.0 \ + --hash=sha256:a394cf3476bcec51c221426a70c39cd1006e889456ba41e4d7f12fd6814be7a5 \ + --hash=sha256:aff3278a3be7fddf302dfd80335e9f924244666c71239cd91e836f3d055f1c4a + # via + # array-record + # optax + # orbax-checkpoint + # tensorflow-datasets +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 + # via + # huggingface-hub + # torch + # transformers + # triton +flatbuffers==24.12.23 \ + --hash=sha256:2910b0bc6ae9b6db78dd2b18d0b7a0709ba240fb5585f286a3a2b30785c22dac \ + --hash=sha256:c418e0d48890f4142b92fd3e343e73a48f194e1f80075ddcc5793779b3585444 + # via tensorflow-cpu +flax==0.9.0 \ + --hash=sha256:12cd8f7162165ddd56877fb1cd9a4fcb47a31569e4c5343eeb59a36369fa2cfe \ + --hash=sha256:8b7f361eed0f5324e81f9dc8d02ea53da5f993d7c2e37e7aa5b37d3f6331dd53 + # via -r requirements-dev.txt +fonttools==4.55.3 \ + --hash=sha256:07f8288aacf0a38d174445fc78377a97fb0b83cfe352a90c9d9c1400571963c7 \ + --hash=sha256:11e5de1ee0d95af4ae23c1a138b184b7f06e0b6abacabf1d0db41c90b03d834b \ + --hash=sha256:1bc7ad24ff98846282eef1cbeac05d013c2154f977a79886bb943015d2b1b261 \ + --hash=sha256:1dcc07934a2165ccdc3a5a608db56fb3c24b609658a5b340aee4ecf3ba679dc0 \ + --hash=sha256:22f38464daa6cdb7b6aebd14ab06609328fe1e9705bb0fcc7d1e69de7109ee02 \ + --hash=sha256:27e4ae3592e62eba83cd2c4ccd9462dcfa603ff78e09110680a5444c6925d841 \ + --hash=sha256:3983313c2a04d6cc1fe9251f8fc647754cf49a61dac6cb1e7249ae67afaafc45 \ + --hash=sha256:529cef2ce91dc44f8e407cc567fae6e49a1786f2fefefa73a294704c415322a4 \ + --hash=sha256:5323a22eabddf4b24f66d26894f1229261021dacd9d29e89f7872dd8c63f0b8b \ + --hash=sha256:54153c49913f45065c8d9e6d0c101396725c5621c8aee744719300f79771d75a \ + --hash=sha256:546565028e244a701f73df6d8dd6be489d01617863ec0c6a42fa25bf45d43048 \ + --hash=sha256:5480673f599ad410695ca2ddef2dfefe9df779a9a5cda89503881e503c9c7d90 \ + --hash=sha256:5e8d657cd7326eeaba27de2740e847c6b39dde2f8d7cd7cc56f6aad404ddf0bd \ + --hash=sha256:62d65a3022c35e404d19ca14f291c89cc5890032ff04f6c17af0bd1927299674 \ + --hash=sha256:6314bf82c54c53c71805318fcf6786d986461622dd926d92a465199ff54b1b72 \ + --hash=sha256:7a8aa2c5e5b8b3bcb2e4538d929f6589a5c6bdb84fd16e2ed92649fb5454f11c \ + --hash=sha256:827e95fdbbd3e51f8b459af5ea10ecb4e30af50221ca103bea68218e9615de07 \ + --hash=sha256:859c358ebf41db18fb72342d3080bce67c02b39e86b9fbcf1610cca14984841b \ + --hash=sha256:86721fbc389ef5cc1e2f477019e5069e8e4421e8d9576e9c26f840dbb04678de \ + --hash=sha256:89bdc5d88bdeec1b15af790810e267e8332d92561dce4f0748c2b95c9bdf3926 \ + --hash=sha256:8c4491699bad88efe95772543cd49870cf756b019ad56294f6498982408ab03e \ + --hash=sha256:8c5ec45428edaa7022f1c949a632a6f298edc7b481312fc7dc258921e9399628 \ + --hash=sha256:8e75f12c82127486fac2d8bfbf5bf058202f54bf4f158d367e41647b972342ca \ + --hash=sha256:a430178ad3e650e695167cb53242dae3477b35c95bef6525b074d87493c4bf29 \ + --hash=sha256:a8c2794ded89399cc2169c4d0bf7941247b8d5932b2659e09834adfbb01589aa \ + --hash=sha256:aca318b77f23523309eec4475d1fbbb00a6b133eb766a8bdc401faba91261abe \ + --hash=sha256:ae3b6600565b2d80b7c05acb8e24d2b26ac407b27a3f2e078229721ba5698427 \ + --hash=sha256:aedbeb1db64496d098e6be92b2e63b5fac4e53b1b92032dfc6988e1ea9134a4d \ + --hash=sha256:aee3b57643827e237ff6ec6d28d9ff9766bd8b21e08cd13bff479e13d4b14765 \ + --hash=sha256:b54baf65c52952db65df39fcd4820668d0ef4766c0ccdf32879b77f7c804d5c5 \ + --hash=sha256:b586ab5b15b6097f2fb71cafa3c98edfd0dba1ad8027229e7b1e204a58b0e09d \ + --hash=sha256:b8d5e8916c0970fbc0f6f1bece0063363bb5857a7f170121a4493e31c3db3314 \ + --hash=sha256:bc5dbb4685e51235ef487e4bd501ddfc49be5aede5e40f4cefcccabc6e60fb4b \ + --hash=sha256:bdcc9f04b36c6c20978d3f060e5323a43f6222accc4e7fcbef3f428e216d96af \ + --hash=sha256:c3ca99e0d460eff46e033cd3992a969658c3169ffcd533e0a39c63a38beb6831 \ + --hash=sha256:caf8230f3e10f8f5d7593eb6d252a37caf58c480b19a17e250a63dad63834cf3 \ + --hash=sha256:cd70de1a52a8ee2d1877b6293af8a2484ac82514f10b1c67c1c5762d38073e56 \ + --hash=sha256:cf4fe7c124aa3f4e4c1940880156e13f2f4d98170d35c749e6b4f119a872551e \ + --hash=sha256:d342e88764fb201286d185093781bf6628bbe380a913c24adf772d901baa8276 \ + --hash=sha256:da9da6d65cd7aa6b0f806556f4985bcbf603bf0c5c590e61b43aa3e5a0f822d0 \ + --hash=sha256:dc5294a3d5c84226e3dbba1b6f61d7ad813a8c0238fceea4e09aa04848c3d851 \ + --hash=sha256:dd68c87a2bfe37c5b33bcda0fba39b65a353876d3b9006fde3adae31f97b3ef5 \ + --hash=sha256:e6e8766eeeb2de759e862004aa11a9ea3d6f6d5ec710551a88b476192b64fd54 \ + --hash=sha256:e894b5bd60d9f473bed7a8f506515549cc194de08064d829464088d23097331b \ + --hash=sha256:eb6ca911c4c17eb51853143624d8dc87cdcdf12a711fc38bf5bd21521e79715f \ + --hash=sha256:ed63959d00b61959b035c7d47f9313c2c1ece090ff63afea702fe86de00dbed4 \ + --hash=sha256:f412604ccbeee81b091b420272841e5ec5ef68967a9790e80bffd0e30b8e2977 \ + --hash=sha256:f7d66c15ba875432a2d2fb419523f5d3d347f91f48f57b8b08a2dfc3c39b8a3f \ + --hash=sha256:f9e736f60f4911061235603a6119e72053073a12c6d7904011df2d8fad2c0e35 \ + --hash=sha256:fb594b5a99943042c702c550d5494bdd7577f6ef19b0bc73877c948a63184a32 + # via matplotlib +frozenlist==1.5.0 \ + --hash=sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e \ + --hash=sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf \ + --hash=sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6 \ + --hash=sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a \ + --hash=sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d \ + --hash=sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f \ + --hash=sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28 \ + --hash=sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b \ + --hash=sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9 \ + --hash=sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2 \ + --hash=sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec \ + --hash=sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2 \ + --hash=sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c \ + --hash=sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336 \ + --hash=sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4 \ + --hash=sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d \ + --hash=sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b \ + --hash=sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c \ + --hash=sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10 \ + --hash=sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08 \ + --hash=sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942 \ + --hash=sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8 \ + --hash=sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f \ + --hash=sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10 \ + --hash=sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5 \ + --hash=sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6 \ + --hash=sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21 \ + --hash=sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c \ + --hash=sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d \ + --hash=sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923 \ + --hash=sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608 \ + --hash=sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de \ + --hash=sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17 \ + --hash=sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0 \ + --hash=sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f \ + --hash=sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641 \ + --hash=sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c \ + --hash=sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a \ + --hash=sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0 \ + --hash=sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9 \ + --hash=sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab \ + --hash=sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f \ + --hash=sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3 \ + --hash=sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a \ + --hash=sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784 \ + --hash=sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604 \ + --hash=sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d \ + --hash=sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5 \ + --hash=sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03 \ + --hash=sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e \ + --hash=sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953 \ + --hash=sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee \ + --hash=sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d \ + --hash=sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817 \ + --hash=sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3 \ + --hash=sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039 \ + --hash=sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f \ + --hash=sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9 \ + --hash=sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf \ + --hash=sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76 \ + --hash=sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba \ + --hash=sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171 \ + --hash=sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb \ + --hash=sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439 \ + --hash=sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631 \ + --hash=sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972 \ + --hash=sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d \ + --hash=sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869 \ + --hash=sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9 \ + --hash=sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411 \ + --hash=sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723 \ + --hash=sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2 \ + --hash=sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b \ + --hash=sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99 \ + --hash=sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e \ + --hash=sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840 \ + --hash=sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3 \ + --hash=sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb \ + --hash=sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3 \ + --hash=sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0 \ + --hash=sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca \ + --hash=sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45 \ + --hash=sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e \ + --hash=sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f \ + --hash=sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5 \ + --hash=sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307 \ + --hash=sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e \ + --hash=sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2 \ + --hash=sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778 \ + --hash=sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a \ + --hash=sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30 \ + --hash=sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a + # via + # aiohttp + # aiosignal +fsspec[http]==2024.12.0 \ + --hash=sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f \ + --hash=sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2 + # via + # datasets + # etils + # huggingface-hub + # torch +gast==0.6.0 \ + --hash=sha256:52b182313f7330389f72b069ba00f174cfe2a06411099547288839c6cbafbd54 \ + --hash=sha256:88fc5300d32c7ac6ca7b515310862f71e6fdf2c029bbec7c66c0f5dd47b6b1fb + # via tensorflow-cpu +google-api-core==1.16.0 \ + --hash=sha256:859f7392676761f2b160c6ee030c3422135ada4458f0948c5690a6a7c8d86294 \ + --hash=sha256:92e962a087f1c4b8d1c5c88ade1c1dfd550047dcffb320c57ef6a534a20403e2 + # via google-api-python-client +google-api-python-client==1.8.0 \ + --hash=sha256:0f5b42a14e2d2f7dee40f2e4514531dbe95ebde9c2173b1c4040a65c427e7900 \ + --hash=sha256:5032ad1af5046889649b3848f2e871889fbb6ae440198a549fe1699581300386 + # via cloud-tpu-client +google-auth==1.6.3 \ + --hash=sha256:0f7c6a64927d34c1a474da92cfc59e552a5d3b940d3266606c6a28b72888b9e4 \ + --hash=sha256:20705f6803fd2c4d1cc2dcb0df09d4dfcb9a7d51fd59e94a3a28231fd93119ed + # via + # google-api-core + # google-api-python-client + # google-auth-httplib2 +google-auth-httplib2==0.2.0 \ + --hash=sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05 \ + --hash=sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d + # via google-api-python-client +google-pasta==0.2.0 \ + --hash=sha256:4612951da876b1a10fe3960d7226f0c7682cf901e16ac06e473b267a5afa8954 \ + --hash=sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed \ + --hash=sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e + # via tensorflow-cpu +googleapis-common-protos==1.66.0 \ + --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ + --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed + # via + # google-api-core + # tensorflow-metadata +grpcio==1.66.0 \ + --hash=sha256:0f3010bf46b2a01c9e40644cb9ed91b4b8435e5c500a275da5f9f62580e31e80 \ + --hash=sha256:1c5466222470cb7fbc9cc898af1d48eefd297cb2e2f59af6d4a851c862fa90ac \ + --hash=sha256:1eb03524d0f55b965d6c86aa44e5db9e5eaa15f9ed3b164621e652e5b927f4b8 \ + --hash=sha256:230cdd696751e7eb1395718cd308234749daa217bb8d128f00357dc4df102558 \ + --hash=sha256:245b08f9b3c645a6a623f3ed4fa43dcfcd6ad701eb9c32511c1bb7380e8c3d23 \ + --hash=sha256:296a45ea835e12a1cc35ab0c57e455346c272af7b0d178e29c67742167262b4c \ + --hash=sha256:37514b68a42e9cf24536345d3cf9e580ffd29117c158b4eeea34625200256067 \ + --hash=sha256:375b58892301a5fc6ca7d7ff689c9dc9d00895f5d560604ace9f4f0573013c63 \ + --hash=sha256:423ae18637cd99ddcf2e5a6851c61828c49e9b9d022d0442d979b4f230109787 \ + --hash=sha256:49234580a073ce7ac490112f6c67c874cbcb27804c4525978cdb21ba7f3f193c \ + --hash=sha256:508411df1f2b7cfa05d4d7dbf3d576fe4f949cd61c03f3a6f0378c84e3d7b963 \ + --hash=sha256:50cea8ce2552865b87e3dffbb85eb21e6b98d928621600c0feda2f02449cd837 \ + --hash=sha256:516fdbc8e156db71a004bc431a6303bca24cfde186babe96dde7bd01e8f0cc70 \ + --hash=sha256:526d4f6ca19f31b25606d5c470ecba55c0b22707b524e4de8987919e8920437d \ + --hash=sha256:53d4c6706b49e358a2a33345dbe9b6b3bb047cecd7e8c07ba383bd09349bfef8 \ + --hash=sha256:5b15ef1b296c4e78f15f64fc65bf8081f8774480ffcac45642f69d9d753d9c6b \ + --hash=sha256:5e8140b39f10d7be2263afa2838112de29374c5c740eb0afd99146cb5bdbd990 \ + --hash=sha256:5ea27f4ce8c0daccfdd2c7961e6ba404b6599f47c948415c4cca5728739107a3 \ + --hash=sha256:5f4b3357e59dfba9140a51597287297bc638710d6a163f99ee14efc19967a821 \ + --hash=sha256:5f93fc84b72bbc7b84a42f3ca9dc055fa00d2303d9803be011ebf7a10a4eb833 \ + --hash=sha256:643d8d9632a688ae69661e924b862e23c83a3575b24e52917ec5bcc59543d212 \ + --hash=sha256:684a4c07883cbd4ac864f0d08d927267404f5f0c76f31c85f9bbe05f2daae2f2 \ + --hash=sha256:6d586a95c05c82a5354be48bb4537e1accaf2472d8eb7e9086d844cbff934482 \ + --hash=sha256:6ed35bf7da3fb3b1949e32bdf47a8b5ffe0aed11722d948933bd068531cd4682 \ + --hash=sha256:748452dbd5a047475d5413bdef08b0b9ceb2c0c0e249d4ee905a5fb82c6328dc \ + --hash=sha256:7bc9d823e05d63a87511fb456dcc48dc0fced86c282bf60229675e7ee7aac1a1 \ + --hash=sha256:8096a922eb91bc97c839f675c3efa1257c6ef181ae1b25d3fb97f2cae4c57c01 \ + --hash=sha256:832945e64176520520317b50d64ec7d79924429528d5747669b52d0bf2c7bd78 \ + --hash=sha256:8fc5c710ddd51b5a0dc36ef1b6663430aa620e0ce029b87b150dafd313b978c3 \ + --hash=sha256:921b8f7f25d5300d7c6837a1e0639ef145fbdbfb728e0a5db2dbccc9fc0fd891 \ + --hash=sha256:9d5251578767fe44602688c851c2373b5513048ac84c21a0fe946590a8e7933d \ + --hash=sha256:a639d3866bfb5a678b5c0b92cd7ab543033ed8988854290fd86145e71731fd4c \ + --hash=sha256:aaf30c75cbaf30e561ca45f21eb1f729f0fab3f15c592c1074795ed43e3ff96f \ + --hash=sha256:ad7256f224437b2c29c2bef98ddd3130454c5b1ab1f0471fc11794cefd4dbd3d \ + --hash=sha256:ba18cfdc09312eb2eea6fa0ce5d2eec3cf345ea78f6528b2eaed6432105e0bd0 \ + --hash=sha256:ba60ae3b465b3e85080ae3bfbc36fd0305ae495ab16fcf8022fc7d7a23aac846 \ + --hash=sha256:bc008c6afa1e7c8df99bd9154abc4f0470d26b7730ca2521122e99e771baa8c7 \ + --hash=sha256:c072f90a1f0409f827ae86266984cba65e89c5831a0726b9fc7f4b5fb940b853 \ + --hash=sha256:c1ea4c528e7db6660718e4165fd1b5ac24b79a70c870a7bc0b7bdb9babab7c1e \ + --hash=sha256:c3084e590e857ba7585ae91078e4c9b6ef55aaf1dc343ce26400ba59a146eada \ + --hash=sha256:c3f6feb0dc8456d025e566709f7dd02885add99bedaac50229013069242a1bfd \ + --hash=sha256:d0439a970d65327de21c299ea0e0c2ad0987cdaf18ba5066621dea5f427f922b \ + --hash=sha256:dd614370e939f9fceeeb2915111a0795271b4c11dfb5fc0f58449bee40c726a5 \ + --hash=sha256:de9e20a0acb709dcfa15a622c91f584f12c9739a79c47999f73435d2b3cc8a3b \ + --hash=sha256:e36fa838ac1d6c87198ca149cbfcc92e1af06bb8c8cd852622f8e58f33ea3324 \ + --hash=sha256:e8d20308eeae15b3e182f47876f05acbdec1eebd9473a9814a44e46ec4a84c04 + # via + # -r requirements-dev.txt + # tensorboard + # tensorflow-cpu +h5py==3.12.1 \ + --hash=sha256:018a4597f35092ae3fb28ee851fdc756d2b88c96336b8480e124ce1ac6fb9166 \ + --hash=sha256:050a4f2c9126054515169c49cb900949814987f0c7ae74c341b0c9f9b5056834 \ + --hash=sha256:06a903a4e4e9e3ebbc8b548959c3c2552ca2d70dac14fcfa650d9261c66939ed \ + --hash=sha256:1473348139b885393125126258ae2d70753ef7e9cec8e7848434f385ae72069e \ + --hash=sha256:2f0f1a382cbf494679c07b4371f90c70391dedb027d517ac94fa2c05299dacda \ + --hash=sha256:326d70b53d31baa61f00b8aa5f95c2fcb9621a3ee8365d770c551a13dbbcbfdf \ + --hash=sha256:3b15d8dbd912c97541312c0e07438864d27dbca857c5ad634de68110c6beb1c2 \ + --hash=sha256:3fdf95092d60e8130ba6ae0ef7a9bd4ade8edbe3569c13ebbaf39baefffc5ba4 \ + --hash=sha256:4532c7e97fbef3d029735db8b6f5bf01222d9ece41e309b20d63cfaae2fb5c4d \ + --hash=sha256:513171e90ed92236fc2ca363ce7a2fc6f2827375efcbb0cc7fbdd7fe11fecafc \ + --hash=sha256:52ab036c6c97055b85b2a242cb540ff9590bacfda0c03dd0cf0661b311f522f8 \ + --hash=sha256:577d618d6b6dea3da07d13cc903ef9634cde5596b13e832476dd861aaf651f3e \ + --hash=sha256:59400f88343b79655a242068a9c900001a34b63e3afb040bd7cdf717e440f653 \ + --hash=sha256:59685fe40d8c1fbbee088c88cd4da415a2f8bee5c270337dc5a1c4aa634e3307 \ + --hash=sha256:5c4b41d1019322a5afc5082864dfd6359f8935ecd37c11ac0029be78c5d112c9 \ + --hash=sha256:62be1fc0ef195891949b2c627ec06bc8e837ff62d5b911b6e42e38e0f20a897d \ + --hash=sha256:6fdf6d7936fa824acfa27305fe2d9f39968e539d831c5bae0e0d83ed521ad1ac \ + --hash=sha256:7b3b8f3b48717e46c6a790e3128d39c61ab595ae0a7237f06dfad6a3b51d5351 \ + --hash=sha256:84342bffd1f82d4f036433e7039e241a243531a1d3acd7341b35ae58cdab05bf \ + --hash=sha256:ad8a76557880aed5234cfe7279805f4ab5ce16b17954606cca90d578d3e713ef \ + --hash=sha256:ba51c0c5e029bb5420a343586ff79d56e7455d496d18a30309616fdbeed1068f \ + --hash=sha256:cb65f619dfbdd15e662423e8d257780f9a66677eae5b4b3fc9dca70b5fd2d2a3 \ + --hash=sha256:ccd9006d92232727d23f784795191bfd02294a4f2ba68708825cb1da39511a93 \ + --hash=sha256:d2b8dd64f127d8b324f5d2cd1c0fd6f68af69084e9e47d27efeb9e28e685af3e \ + --hash=sha256:d3e465aee0ec353949f0f46bf6c6f9790a2006af896cee7c178a8c3e5090aa32 \ + --hash=sha256:e4d51919110a030913201422fb07987db4338eba5ec8c5a15d6fab8e03d443fc + # via + # keras + # tensorflow-cpu +httplib2==0.22.0 \ + --hash=sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc \ + --hash=sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81 + # via + # google-api-python-client + # google-auth-httplib2 + # oauth2client +huggingface-hub==0.27.0 \ + --hash=sha256:8f2e834517f1f1ddf1ecc716f91b120d7333011b7485f665a9a412eacb1a2a81 \ + --hash=sha256:902cce1a1be5739f5589e560198a65a8edcfd3b830b1666f36e4b961f0454fac + # via + # datasets + # tokenizers + # transformers +humanize==4.11.0 \ + --hash=sha256:b53caaec8532bcb2fff70c8826f904c35943f8cecaca29d272d9df38092736c0 \ + --hash=sha256:e66f36020a2d5a974c504bd2555cf770621dbdbb6d82f94a6857c0b1ea2608be + # via orbax-checkpoint +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via + # requests + # yarl +immutabledict==4.2.1 \ + --hash=sha256:c56a26ced38c236f79e74af3ccce53772827cef5c3bce7cab33ff2060f756373 \ + --hash=sha256:d91017248981c72eb66c8ff9834e99c2f53562346f23e7f51e7a5ebcf66a3bcc + # via tensorflow-datasets +importlib-resources==6.5.2 \ + --hash=sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c \ + --hash=sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec + # via etils +jax[cpu]==0.4.34 \ + --hash=sha256:44196854f40c5f9cea3142824b9f1051f85afc3fcf7593ec5479fc8db01c58db \ + --hash=sha256:b957ca1fc91f7343f91a186af9f19c7f342c946f95a8c11c7f1e5cdfe2e58d9e + # via + # -r requirements-dev.txt + # chex + # flax + # jraph + # optax + # orbax-checkpoint +jaxlib==0.4.34 \ + --hash=sha256:096f0ca309d41fa692a9d1f2f9baab1c5c8ca0749876ebb3f748e738a27c7ff4 \ + --hash=sha256:133070d4fec5525ffea4dc72956398c1cf647a04dcb37f8a935ee82af78d9965 \ + --hash=sha256:1a30771d85fa77f9ab8f18e63240f455ab3a3f87660ed7b8d5eea6ceecbe5c1e \ + --hash=sha256:3bcfa639ca3cfaf86c8ceebd5fc0d47300fd98a078014a1d0cc03133e1523d5f \ + --hash=sha256:3e60bc826933082e99b19b87c21818a8d26fcdb01f418d47cedff554746fd6cc \ + --hash=sha256:45d719a2ce0ebf21255a277b71d756f3609b7b5be70cddc5d88fd58c35219de0 \ + --hash=sha256:48272e9034ff868d4328cf0055a07882fd2be93f59dfb6283af7de491f9d1290 \ + --hash=sha256:571ef03259835458111596a71a2f4a6fabf4ec34595df4cea555035362ac5bf0 \ + --hash=sha256:6b43a974c5d91a19912d138f2658dd8dbb7d30dcdff5c961d896c673e872b611 \ + --hash=sha256:72e22e99a5dc890a64443c3fc12f13f20091f578c405a76de077ba42b4c62cd7 \ + --hash=sha256:7be673a876ebd1aef440fb7e3ebaf99a91abeb550c9728c644b7d7c7b5d7c108 \ + --hash=sha256:87f25a477cd279840e53718403f97092eba0e8a945fcab47bcf435b6f9119dda \ + --hash=sha256:8ee3f93836e53c86556ccd9449a4ea43516ee05184d031a71dd692e81259f7d9 \ + --hash=sha256:901cb4040ed24eae40071d8114ea8d10dff436277fa74a1a5b9e7206f641151c \ + --hash=sha256:b0001c8f0e2b1c7bc99e4f314b524a340d25653505c1a1484d4041a9d3617f6f \ + --hash=sha256:b7a212a3cb5c6acc201c32ae4f4b5f5a9ac09457fbb77ba8db5ce7e7d4adc214 \ + --hash=sha256:c303f5acaf6c56ce5ff133a923c9b6247bdebedde15bd2c893c24be4d8f71306 \ + --hash=sha256:c7b3e724a30426a856070aba0192b5d199e95b4411070e7ad96ad8b196877b10 \ + --hash=sha256:c9d3adcae43a33aad4332be9c2aedc5ef751d1e755f917a5afb30c7872eacaa8 \ + --hash=sha256:d840e64b85f8865404d6d225b9bb340e158df1457152a361b05680e24792b232 + # via + # chex + # jax + # jraph + # optax +jinja2==3.1.5 \ + --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ + --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb + # via torch +jmp==0.0.4 \ + --hash=sha256:5dfeb0fd7c7a9f72a70fff0aab9d0cbfae32a809c02f4037ff3485ceb33e1730 \ + --hash=sha256:6aa7adbddf2bd574b28c7faf6e81a735eb11f53386447896909c6968dc36807d + # via dm-haiku +joblib==1.4.2 \ + --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ + --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e + # via scikit-learn +jraph==0.0.6.dev0 \ + --hash=sha256:350fe37bf717f934f1f84fd3370a480b3178bfcb61dfa217c738971308c57625 \ + --hash=sha256:c3ac3a0b224b344eb6d367e8bc312d95ea41bf825d01ea31b80dd8c22c0dd8b8 + # via -r requirements-dev.txt +keras==3.7.0 \ + --hash=sha256:546a64f302e4779c129c06d9826fa586de752cdfd43d7dc4010c31b282587969 \ + --hash=sha256:a4451a5591e75dfb414d0b84a3fd2fb9c0240cc87ebe7e397f547ce10b0e67b7 + # via + # -r requirements-dev.txt + # tensorflow-cpu +kiwisolver==1.4.8 \ + --hash=sha256:01c3d31902c7db5fb6182832713d3b4122ad9317c2c5877d0539227d96bb2e50 \ + --hash=sha256:034d2c891f76bd3edbdb3ea11140d8510dca675443da7304205a2eaa45d8334c \ + --hash=sha256:085940635c62697391baafaaeabdf3dd7a6c3643577dde337f4d66eba021b2b8 \ + --hash=sha256:08e77738ed7538f036cd1170cbed942ef749137b1311fa2bbe2a7fda2f6bf3cc \ + --hash=sha256:111793b232842991be367ed828076b03d96202c19221b5ebab421ce8bcad016f \ + --hash=sha256:11e1022b524bd48ae56c9b4f9296bce77e15a2e42a502cceba602f804b32bb79 \ + --hash=sha256:151dffc4865e5fe6dafce5480fab84f950d14566c480c08a53c663a0020504b6 \ + --hash=sha256:16523b40aab60426ffdebe33ac374457cf62863e330a90a0383639ce14bf44b2 \ + --hash=sha256:1732e065704b47c9afca7ffa272f845300a4eb959276bf6970dc07265e73b605 \ + --hash=sha256:1c8ceb754339793c24aee1c9fb2485b5b1f5bb1c2c214ff13368431e51fc9a09 \ + --hash=sha256:23454ff084b07ac54ca8be535f4174170c1094a4cff78fbae4f73a4bcc0d4dab \ + --hash=sha256:23d5f023bdc8c7e54eb65f03ca5d5bb25b601eac4d7f1a042888a1f45237987e \ + --hash=sha256:257af1622860e51b1a9d0ce387bf5c2c4f36a90594cb9514f55b074bcc787cfc \ + --hash=sha256:286b18e86682fd2217a48fc6be6b0f20c1d0ed10958d8dc53453ad58d7be0bf8 \ + --hash=sha256:291331973c64bb9cce50bbe871fb2e675c4331dab4f31abe89f175ad7679a4d7 \ + --hash=sha256:2f0121b07b356a22fb0414cec4666bbe36fd6d0d759db3d37228f496ed67c880 \ + --hash=sha256:3452046c37c7692bd52b0e752b87954ef86ee2224e624ef7ce6cb21e8c41cc1b \ + --hash=sha256:34d142fba9c464bc3bbfeff15c96eab0e7310343d6aefb62a79d51421fcc5f1b \ + --hash=sha256:369b75d40abedc1da2c1f4de13f3482cb99e3237b38726710f4a793432b1c5ff \ + --hash=sha256:36dbbfd34838500a31f52c9786990d00150860e46cd5041386f217101350f0d3 \ + --hash=sha256:370fd2df41660ed4e26b8c9d6bbcad668fbe2560462cba151a721d49e5b6628c \ + --hash=sha256:3a96c0e790ee875d65e340ab383700e2b4891677b7fcd30a699146f9384a2bb0 \ + --hash=sha256:3b9b4d2892fefc886f30301cdd80debd8bb01ecdf165a449eb6e78f79f0fabd6 \ + --hash=sha256:3cd3bc628b25f74aedc6d374d5babf0166a92ff1317f46267f12d2ed54bc1d30 \ + --hash=sha256:3ddc373e0eef45b59197de815b1b28ef89ae3955e7722cc9710fb91cd77b7f47 \ + --hash=sha256:4191ee8dfd0be1c3666ccbac178c5a05d5f8d689bbe3fc92f3c4abec817f8fe0 \ + --hash=sha256:54a62808ac74b5e55a04a408cda6156f986cefbcf0ada13572696b507cc92fa1 \ + --hash=sha256:577facaa411c10421314598b50413aa1ebcf5126f704f1e5d72d7e4e9f020d90 \ + --hash=sha256:641f2ddf9358c80faa22e22eb4c9f54bd3f0e442e038728f500e3b978d00aa7d \ + --hash=sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b \ + --hash=sha256:68269e60ee4929893aad82666821aaacbd455284124817af45c11e50a4b42e3c \ + --hash=sha256:69b5637c3f316cab1ec1c9a12b8c5f4750a4c4b71af9157645bf32830e39c03a \ + --hash=sha256:7506488470f41169b86d8c9aeff587293f530a23a23a49d6bc64dab66bedc71e \ + --hash=sha256:768cade2c2df13db52475bd28d3a3fac8c9eff04b0e9e2fda0f3760f20b3f7fc \ + --hash=sha256:77e6f57a20b9bd4e1e2cedda4d0b986ebd0216236f0106e55c28aea3d3d69b16 \ + --hash=sha256:782bb86f245ec18009890e7cb8d13a5ef54dcf2ebe18ed65f795e635a96a1c6a \ + --hash=sha256:7a3ad337add5148cf51ce0b55642dc551c0b9d6248458a757f98796ca7348712 \ + --hash=sha256:7cd2785b9391f2873ad46088ed7599a6a71e762e1ea33e87514b1a441ed1da1c \ + --hash=sha256:7e9a60b50fe8b2ec6f448fe8d81b07e40141bfced7f896309df271a0b92f80f3 \ + --hash=sha256:84a2f830d42707de1d191b9490ac186bf7997a9495d4e9072210a1296345f7dc \ + --hash=sha256:856b269c4d28a5c0d5e6c1955ec36ebfd1651ac00e1ce0afa3e28da95293b561 \ + --hash=sha256:858416b7fb777a53f0c59ca08190ce24e9abbd3cffa18886a5781b8e3e26f65d \ + --hash=sha256:87b287251ad6488e95b4f0b4a79a6d04d3ea35fde6340eb38fbd1ca9cd35bbbc \ + --hash=sha256:88c6f252f6816a73b1f8c904f7bbe02fd67c09a69f7cb8a0eecdbf5ce78e63db \ + --hash=sha256:893f5525bb92d3d735878ec00f781b2de998333659507d29ea4466208df37bed \ + --hash=sha256:89c107041f7b27844179ea9c85d6da275aa55ecf28413e87624d033cf1f6b751 \ + --hash=sha256:918139571133f366e8362fa4a297aeba86c7816b7ecf0bc79168080e2bd79957 \ + --hash=sha256:99cea8b9dd34ff80c521aef46a1dddb0dcc0283cf18bde6d756f1e6f31772165 \ + --hash=sha256:a17b7c4f5b2c51bb68ed379defd608a03954a1845dfed7cc0117f1cc8a9b7fd2 \ + --hash=sha256:a3c44cb68861de93f0c4a8175fbaa691f0aa22550c331fefef02b618a9dcb476 \ + --hash=sha256:a4d3601908c560bdf880f07d94f31d734afd1bb71e96585cace0e38ef44c6d84 \ + --hash=sha256:a5ce1e481a74b44dd5e92ff03ea0cb371ae7a0268318e202be06c8f04f4f1246 \ + --hash=sha256:a66f60f8d0c87ab7f59b6fb80e642ebb29fec354a4dfad687ca4092ae69d04f4 \ + --hash=sha256:b21dbe165081142b1232a240fc6383fd32cdd877ca6cc89eab93e5f5883e1c25 \ + --hash=sha256:b47a465040146981dc9db8647981b8cb96366fbc8d452b031e4f8fdffec3f26d \ + --hash=sha256:b5773efa2be9eb9fcf5415ea3ab70fc785d598729fd6057bea38d539ead28271 \ + --hash=sha256:b83dc6769ddbc57613280118fb4ce3cd08899cc3369f7d0e0fab518a7cf37fdb \ + --hash=sha256:bade438f86e21d91e0cf5dd7c0ed00cda0f77c8c1616bd83f9fc157fa6760d31 \ + --hash=sha256:bcb1ebc3547619c3b58a39e2448af089ea2ef44b37988caf432447374941574e \ + --hash=sha256:be4816dc51c8a471749d664161b434912eee82f2ea66bd7628bd14583a833e85 \ + --hash=sha256:c07b29089b7ba090b6f1a669f1411f27221c3662b3a1b7010e67b59bb5a6f10b \ + --hash=sha256:c2b9a96e0f326205af81a15718a9073328df1173a2619a68553decb7097fd5d7 \ + --hash=sha256:c5020c83e8553f770cb3b5fc13faac40f17e0b205bd237aebd21d53d733adb03 \ + --hash=sha256:c72941acb7b67138f35b879bbe85be0f6c6a70cab78fe3ef6db9c024d9223e5b \ + --hash=sha256:c8bf637892dc6e6aad2bc6d4d69d08764166e5e3f69d469e55427b6ac001b19d \ + --hash=sha256:cc978a80a0db3a66d25767b03688f1147a69e6237175c0f4ffffaaedf744055a \ + --hash=sha256:ce2cf1e5688edcb727fdf7cd1bbd0b6416758996826a8be1d958f91880d0809d \ + --hash=sha256:d47b28d1dfe0793d5e96bce90835e17edf9a499b53969b03c6c47ea5985844c3 \ + --hash=sha256:d47cfb2650f0e103d4bf68b0b5804c68da97272c84bb12850d877a95c056bd67 \ + --hash=sha256:d5536185fce131780ebd809f8e623bf4030ce1b161353166c49a3c74c287897f \ + --hash=sha256:d561d2d8883e0819445cfe58d7ddd673e4015c3c57261d7bdcd3710d0d14005c \ + --hash=sha256:d6af5e8815fd02997cb6ad9bbed0ee1e60014438ee1a5c2444c96f87b8843502 \ + --hash=sha256:d6d6bd87df62c27d4185de7c511c6248040afae67028a8a22012b010bc7ad062 \ + --hash=sha256:dace81d28c787956bfbfbbfd72fdcef014f37d9b48830829e488fdb32b49d954 \ + --hash=sha256:e063ef9f89885a1d68dd8b2e18f5ead48653176d10a0e324e3b0030e3a69adeb \ + --hash=sha256:e7a019419b7b510f0f7c9dceff8c5eae2392037eae483a7f9162625233802b0a \ + --hash=sha256:eaa973f1e05131de5ff3569bbba7f5fd07ea0595d3870ed4a526d486fe57fa1b \ + --hash=sha256:eb158fe28ca0c29f2260cca8c43005329ad58452c36f0edf298204de32a9a3ed \ + --hash=sha256:ed33ca2002a779a2e20eeb06aea7721b6e47f2d4b8a8ece979d8ba9e2a167e34 \ + --hash=sha256:fc2ace710ba7c1dfd1a3b42530b62b9ceed115f19a1656adefce7b1782a37794 + # via matplotlib +libclang==18.1.1 \ + --hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \ + --hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \ + --hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \ + --hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \ + --hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \ + --hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \ + --hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \ + --hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \ + --hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \ + --hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe + # via tensorflow-cpu +markdown==3.7 \ + --hash=sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2 \ + --hash=sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803 + # via tensorboard +markdown-it-py==3.0.0 \ + --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ + --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb + # via rich +markupsafe==3.0.2 \ + --hash=sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4 \ + --hash=sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30 \ + --hash=sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0 \ + --hash=sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9 \ + --hash=sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396 \ + --hash=sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13 \ + --hash=sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028 \ + --hash=sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca \ + --hash=sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557 \ + --hash=sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832 \ + --hash=sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0 \ + --hash=sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b \ + --hash=sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579 \ + --hash=sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a \ + --hash=sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c \ + --hash=sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff \ + --hash=sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c \ + --hash=sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22 \ + --hash=sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094 \ + --hash=sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb \ + --hash=sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e \ + --hash=sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5 \ + --hash=sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a \ + --hash=sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d \ + --hash=sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a \ + --hash=sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b \ + --hash=sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8 \ + --hash=sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225 \ + --hash=sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c \ + --hash=sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144 \ + --hash=sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f \ + --hash=sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87 \ + --hash=sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d \ + --hash=sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93 \ + --hash=sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf \ + --hash=sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158 \ + --hash=sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84 \ + --hash=sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb \ + --hash=sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48 \ + --hash=sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171 \ + --hash=sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c \ + --hash=sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6 \ + --hash=sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd \ + --hash=sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d \ + --hash=sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1 \ + --hash=sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d \ + --hash=sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca \ + --hash=sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a \ + --hash=sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29 \ + --hash=sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe \ + --hash=sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798 \ + --hash=sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c \ + --hash=sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8 \ + --hash=sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f \ + --hash=sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f \ + --hash=sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a \ + --hash=sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178 \ + --hash=sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0 \ + --hash=sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79 \ + --hash=sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430 \ + --hash=sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50 + # via + # jinja2 + # werkzeug +matplotlib==3.10.0 \ + --hash=sha256:01d2b19f13aeec2e759414d3bfe19ddfb16b13a1250add08d46d5ff6f9be83c6 \ + --hash=sha256:12eaf48463b472c3c0f8dbacdbf906e573013df81a0ab82f0616ea4b11281908 \ + --hash=sha256:2c5829a5a1dd5a71f0e31e6e8bb449bc0ee9dbfb05ad28fc0c6b55101b3a4be6 \ + --hash=sha256:2fbbabc82fde51391c4da5006f965e36d86d95f6ee83fb594b279564a4c5d0d2 \ + --hash=sha256:3547d153d70233a8496859097ef0312212e2689cdf8d7ed764441c77604095ae \ + --hash=sha256:359f87baedb1f836ce307f0e850d12bb5f1936f70d035561f90d41d305fdacea \ + --hash=sha256:3b427392354d10975c1d0f4ee18aa5844640b512d5311ef32efd4dd7db106ede \ + --hash=sha256:4659665bc7c9b58f8c00317c3c2a299f7f258eeae5a5d56b4c64226fca2f7c59 \ + --hash=sha256:4673ff67a36152c48ddeaf1135e74ce0d4bce1bbf836ae40ed39c29edf7e2765 \ + --hash=sha256:503feb23bd8c8acc75541548a1d709c059b7184cde26314896e10a9f14df5f12 \ + --hash=sha256:5439f4c5a3e2e8eab18e2f8c3ef929772fd5641876db71f08127eed95ab64683 \ + --hash=sha256:5cdbaf909887373c3e094b0318d7ff230b2ad9dcb64da7ade654182872ab2593 \ + --hash=sha256:5e6c6461e1fc63df30bf6f80f0b93f5b6784299f721bc28530477acd51bfc3d1 \ + --hash=sha256:5fd41b0ec7ee45cd960a8e71aea7c946a28a0b8a4dcee47d2856b2af051f334c \ + --hash=sha256:607b16c8a73943df110f99ee2e940b8a1cbf9714b65307c040d422558397dac5 \ + --hash=sha256:7e8632baebb058555ac0cde75db885c61f1212e47723d63921879806b40bec6a \ + --hash=sha256:81713dd0d103b379de4516b861d964b1d789a144103277769238c732229d7f03 \ + --hash=sha256:845d96568ec873be63f25fa80e9e7fae4be854a66a7e2f0c8ccc99e94a8bd4ef \ + --hash=sha256:95b710fea129c76d30be72c3b38f330269363fbc6e570a5dd43580487380b5ff \ + --hash=sha256:96f2886f5c1e466f21cc41b70c5a0cd47bfa0015eb2d5793c88ebce658600e25 \ + --hash=sha256:994c07b9d9fe8d25951e3202a68c17900679274dadfc1248738dcfa1bd40d7f3 \ + --hash=sha256:9ade1003376731a971e398cc4ef38bb83ee8caf0aee46ac6daa4b0506db1fd06 \ + --hash=sha256:9b0558bae37f154fffda54d779a592bc97ca8b4701f1c710055b609a3bac44c8 \ + --hash=sha256:a2a43cbefe22d653ab34bb55d42384ed30f611bcbdea1f8d7f431011a2e1c62e \ + --hash=sha256:a994f29e968ca002b50982b27168addfd65f0105610b6be7fa515ca4b5307c95 \ + --hash=sha256:ad2e15300530c1a94c63cfa546e3b7864bd18ea2901317bae8bbf06a5ade6dcf \ + --hash=sha256:ae80dc3a4add4665cf2faa90138384a7ffe2a4e37c58d83e115b54287c4f06ef \ + --hash=sha256:b886d02a581b96704c9d1ffe55709e49b4d2d52709ccebc4be42db856e511278 \ + --hash=sha256:c40ba2eb08b3f5de88152c2333c58cee7edcead0a2a0d60fcafa116b17117adc \ + --hash=sha256:c55b20591ced744aa04e8c3e4b7543ea4d650b6c3c4b208c08a05b4010e8b442 \ + --hash=sha256:c58a9622d5dbeb668f407f35f4e6bfac34bb9ecdcc81680c04d0258169747997 \ + --hash=sha256:d44cb942af1693cced2604c33a9abcef6205601c445f6d0dc531d813af8a2f5a \ + --hash=sha256:d907fddb39f923d011875452ff1eca29a9e7f21722b873e90db32e5d8ddff12e \ + --hash=sha256:fd44fc75522f58612ec4a33958a7e5552562b7705b42ef1b4f8c0818e304a363 + # via plotnine +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +mizani==0.13.1 \ + --hash=sha256:7da0dcacd43fbcc01c279ea06a76f1f064ae90dbb387c4a985ba24a92d3c7d7a \ + --hash=sha256:e3247ea12c746c8104767d7e42a2d16473173c7bc314f298d8294a58f4653353 + # via plotnine +ml-dtypes==0.4.1 \ + --hash=sha256:126e7d679b8676d1a958f2651949fbfa182832c3cd08020d8facd94e4114f3e9 \ + --hash=sha256:15fdd922fea57e493844e5abb930b9c0bd0af217d9edd3724479fc3d7ce70e3f \ + --hash=sha256:1fe8b5b5e70cd67211db94b05cfd58dace592f24489b038dc6f9fe347d2e07d5 \ + --hash=sha256:274cc7193dd73b35fb26bef6c5d40ae3eb258359ee71cd82f6e96a8c948bdaa6 \ + --hash=sha256:2d55b588116a7085d6e074cf0cdb1d6fa3875c059dddc4d2c94a4cc81c23e975 \ + --hash=sha256:560be16dc1e3bdf7c087eb727e2cf9c0e6a3d87e9f415079d2491cc419b3ebf5 \ + --hash=sha256:74c6cfb5cf78535b103fde9ea3ded8e9f16f75bc07789054edc7776abfb3d752 \ + --hash=sha256:772426b08a6172a891274d581ce58ea2789cc8abc1c002a27223f314aaf894e7 \ + --hash=sha256:827d3ca2097085cf0355f8fdf092b888890bb1b1455f52801a2d7756f056f54b \ + --hash=sha256:8c09a6d11d8475c2a9fd2bc0695628aec105f97cab3b3a3fb7c9660348ff7d24 \ + --hash=sha256:9f5e8f75fa371020dd30f9196e7d73babae2abd51cf59bdd56cb4f8de7e13354 \ + --hash=sha256:ad0b757d445a20df39035c4cdeed457ec8b60d236020d2560dbc25887533cf50 \ + --hash=sha256:df0fb650d5c582a9e72bb5bd96cfebb2cdb889d89daff621c8fbc60295eba66c \ + --hash=sha256:e138a9b7a48079c900ea969341a5754019a1ad17ae27ee330f7ebf43f23877f9 \ + --hash=sha256:e35e486e97aee577d0890bc3bd9e9f9eece50c08c163304008587ec8cfe7575b \ + --hash=sha256:ef0d7e3fece227b49b544fa69e50e607ac20948f0043e9f76b44f35f229ea450 \ + --hash=sha256:fad5f2de464fd09127e49b7fd1252b9006fb43d2edc1ff112d390c324af5ca7a + # via + # jax + # jaxlib + # keras + # tensorflow-cpu + # tensorstore +mpmath==1.3.0 \ + --hash=sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f \ + --hash=sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c + # via sympy +msgpack==1.1.0 \ + --hash=sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b \ + --hash=sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf \ + --hash=sha256:0907e1a7119b337971a689153665764adc34e89175f9a34793307d9def08e6ca \ + --hash=sha256:0f92a83b84e7c0749e3f12821949d79485971f087604178026085f60ce109330 \ + --hash=sha256:115a7af8ee9e8cddc10f87636767857e7e3717b7a2e97379dc2054712693e90f \ + --hash=sha256:13599f8829cfbe0158f6456374e9eea9f44eee08076291771d8ae93eda56607f \ + --hash=sha256:17fb65dd0bec285907f68b15734a993ad3fc94332b5bb21b0435846228de1f39 \ + --hash=sha256:2137773500afa5494a61b1208619e3871f75f27b03bcfca7b3a7023284140247 \ + --hash=sha256:3180065ec2abbe13a4ad37688b61b99d7f9e012a535b930e0e683ad6bc30155b \ + --hash=sha256:398b713459fea610861c8a7b62a6fec1882759f308ae0795b5413ff6a160cf3c \ + --hash=sha256:3d364a55082fb2a7416f6c63ae383fbd903adb5a6cf78c5b96cc6316dc1cedc7 \ + --hash=sha256:3df7e6b05571b3814361e8464f9304c42d2196808e0119f55d0d3e62cd5ea044 \ + --hash=sha256:41c991beebf175faf352fb940bf2af9ad1fb77fd25f38d9142053914947cdbf6 \ + --hash=sha256:42f754515e0f683f9c79210a5d1cad631ec3d06cea5172214d2176a42e67e19b \ + --hash=sha256:452aff037287acb1d70a804ffd022b21fa2bb7c46bee884dbc864cc9024128a0 \ + --hash=sha256:4676e5be1b472909b2ee6356ff425ebedf5142427842aa06b4dfd5117d1ca8a2 \ + --hash=sha256:46c34e99110762a76e3911fc923222472c9d681f1094096ac4102c18319e6468 \ + --hash=sha256:471e27a5787a2e3f974ba023f9e265a8c7cfd373632247deb225617e3100a3c7 \ + --hash=sha256:4a1964df7b81285d00a84da4e70cb1383f2e665e0f1f2a7027e683956d04b734 \ + --hash=sha256:4b51405e36e075193bc051315dbf29168d6141ae2500ba8cd80a522964e31434 \ + --hash=sha256:4d1b7ff2d6146e16e8bd665ac726a89c74163ef8cd39fa8c1087d4e52d3a2325 \ + --hash=sha256:53258eeb7a80fc46f62fd59c876957a2d0e15e6449a9e71842b6d24419d88ca1 \ + --hash=sha256:534480ee5690ab3cbed89d4c8971a5c631b69a8c0883ecfea96c19118510c846 \ + --hash=sha256:58638690ebd0a06427c5fe1a227bb6b8b9fdc2bd07701bec13c2335c82131a88 \ + --hash=sha256:58dfc47f8b102da61e8949708b3eafc3504509a5728f8b4ddef84bd9e16ad420 \ + --hash=sha256:59caf6a4ed0d164055ccff8fe31eddc0ebc07cf7326a2aaa0dbf7a4001cd823e \ + --hash=sha256:5dbad74103df937e1325cc4bfeaf57713be0b4f15e1c2da43ccdd836393e2ea2 \ + --hash=sha256:5e1da8f11a3dd397f0a32c76165cf0c4eb95b31013a94f6ecc0b280c05c91b59 \ + --hash=sha256:646afc8102935a388ffc3914b336d22d1c2d6209c773f3eb5dd4d6d3b6f8c1cb \ + --hash=sha256:64fc9068d701233effd61b19efb1485587560b66fe57b3e50d29c5d78e7fef68 \ + --hash=sha256:65553c9b6da8166e819a6aa90ad15288599b340f91d18f60b2061f402b9a4915 \ + --hash=sha256:685ec345eefc757a7c8af44a3032734a739f8c45d1b0ac45efc5d8977aa4720f \ + --hash=sha256:6ad622bf7756d5a497d5b6836e7fc3752e2dd6f4c648e24b1803f6048596f701 \ + --hash=sha256:73322a6cc57fcee3c0c57c4463d828e9428275fb85a27aa2aa1a92fdc42afd7b \ + --hash=sha256:74bed8f63f8f14d75eec75cf3d04ad581da6b914001b474a5d3cd3372c8cc27d \ + --hash=sha256:79ec007767b9b56860e0372085f8504db5d06bd6a327a335449508bbee9648fa \ + --hash=sha256:7a946a8992941fea80ed4beae6bff74ffd7ee129a90b4dd5cf9c476a30e9708d \ + --hash=sha256:7ad442d527a7e358a469faf43fda45aaf4ac3249c8310a82f0ccff9164e5dccd \ + --hash=sha256:7c9a35ce2c2573bada929e0b7b3576de647b0defbd25f5139dcdaba0ae35a4cc \ + --hash=sha256:7e7b853bbc44fb03fbdba34feb4bd414322180135e2cb5164f20ce1c9795ee48 \ + --hash=sha256:879a7b7b0ad82481c52d3c7eb99bf6f0645dbdec5134a4bddbd16f3506947feb \ + --hash=sha256:8a706d1e74dd3dea05cb54580d9bd8b2880e9264856ce5068027eed09680aa74 \ + --hash=sha256:8a84efb768fb968381e525eeeb3d92857e4985aacc39f3c47ffd00eb4509315b \ + --hash=sha256:8cf9e8c3a2153934a23ac160cc4cba0ec035f6867c8013cc6077a79823370346 \ + --hash=sha256:8da4bf6d54ceed70e8861f833f83ce0814a2b72102e890cbdfe4b34764cdd66e \ + --hash=sha256:8e59bca908d9ca0de3dc8684f21ebf9a690fe47b6be93236eb40b99af28b6ea6 \ + --hash=sha256:914571a2a5b4e7606997e169f64ce53a8b1e06f2cf2c3a7273aa106236d43dd5 \ + --hash=sha256:a51abd48c6d8ac89e0cfd4fe177c61481aca2d5e7ba42044fd218cfd8ea9899f \ + --hash=sha256:a52a1f3a5af7ba1c9ace055b659189f6c669cf3657095b50f9602af3a3ba0fe5 \ + --hash=sha256:ad33e8400e4ec17ba782f7b9cf868977d867ed784a1f5f2ab46e7ba53b6e1e1b \ + --hash=sha256:b4c01941fd2ff87c2a934ee6055bda4ed353a7846b8d4f341c428109e9fcde8c \ + --hash=sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f \ + --hash=sha256:c40ffa9a15d74e05ba1fe2681ea33b9caffd886675412612d93ab17b58ea2fec \ + --hash=sha256:c5a91481a3cc573ac8c0d9aace09345d989dc4a0202b7fcb312c88c26d4e71a8 \ + --hash=sha256:c921af52214dcbb75e6bdf6a661b23c3e6417f00c603dd2070bccb5c3ef499f5 \ + --hash=sha256:d46cf9e3705ea9485687aa4001a76e44748b609d260af21c4ceea7f2212a501d \ + --hash=sha256:d8ce0b22b890be5d252de90d0e0d119f363012027cf256185fc3d474c44b1b9e \ + --hash=sha256:dd432ccc2c72b914e4cb77afce64aab761c1137cc698be3984eee260bcb2896e \ + --hash=sha256:e0856a2b7e8dcb874be44fea031d22e5b3a19121be92a1e098f46068a11b0870 \ + --hash=sha256:e1f3c3d21f7cf67bcf2da8e494d30a75e4cf60041d98b3f79875afb5b96f3a3f \ + --hash=sha256:f1ba6136e650898082d9d5a5217d5906d1e138024f836ff48691784bbe1adf96 \ + --hash=sha256:f3e9b4936df53b970513eac1758f3882c88658a220b58dcc1e39606dccaaf01c \ + --hash=sha256:f80bc7d47f76089633763f952e67f8214cb7b3ee6bfa489b3cb6a84cfac114cd \ + --hash=sha256:fd2906780f25c8ed5d7b323379f6138524ba793428db5d0e9d226d3fa6aa1788 + # via + # flax + # orbax-checkpoint +multidict==6.1.0 \ + --hash=sha256:052e10d2d37810b99cc170b785945421141bf7bb7d2f8799d431e7db229c385f \ + --hash=sha256:06809f4f0f7ab7ea2cabf9caca7d79c22c0758b58a71f9d32943ae13c7ace056 \ + --hash=sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761 \ + --hash=sha256:0c3f390dc53279cbc8ba976e5f8035eab997829066756d811616b652b00a23a3 \ + --hash=sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b \ + --hash=sha256:0e5f362e895bc5b9e67fe6e4ded2492d8124bdf817827f33c5b46c2fe3ffaca6 \ + --hash=sha256:10524ebd769727ac77ef2278390fb0068d83f3acb7773792a5080f2b0abf7748 \ + --hash=sha256:10a9b09aba0c5b48c53761b7c720aaaf7cf236d5fe394cd399c7ba662d5f9966 \ + --hash=sha256:16e5f4bf4e603eb1fdd5d8180f1a25f30056f22e55ce51fb3d6ad4ab29f7d96f \ + --hash=sha256:188215fc0aafb8e03341995e7c4797860181562380f81ed0a87ff455b70bf1f1 \ + --hash=sha256:189f652a87e876098bbc67b4da1049afb5f5dfbaa310dd67c594b01c10388db6 \ + --hash=sha256:1ca0083e80e791cffc6efce7660ad24af66c8d4079d2a750b29001b53ff59ada \ + --hash=sha256:1e16bf3e5fc9f44632affb159d30a437bfe286ce9e02754759be5536b169b305 \ + --hash=sha256:2090f6a85cafc5b2db085124d752757c9d251548cedabe9bd31afe6363e0aff2 \ + --hash=sha256:20b9b5fbe0b88d0bdef2012ef7dee867f874b72528cf1d08f1d59b0e3850129d \ + --hash=sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a \ + --hash=sha256:22f3105d4fb15c8f57ff3959a58fcab6ce36814486500cd7485651230ad4d4ef \ + --hash=sha256:23bfd518810af7de1116313ebd9092cb9aa629beb12f6ed631ad53356ed6b86c \ + --hash=sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb \ + --hash=sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60 \ + --hash=sha256:3702ea6872c5a2a4eeefa6ffd36b042e9773f05b1f37ae3ef7264b1163c2dcf6 \ + --hash=sha256:37bb93b2178e02b7b618893990941900fd25b6b9ac0fa49931a40aecdf083fe4 \ + --hash=sha256:3914f5aaa0f36d5d60e8ece6a308ee1c9784cd75ec8151062614657a114c4478 \ + --hash=sha256:3a37ffb35399029b45c6cc33640a92bef403c9fd388acce75cdc88f58bd19a81 \ + --hash=sha256:3c8b88a2ccf5493b6c8da9076fb151ba106960a2df90c2633f342f120751a9e7 \ + --hash=sha256:3e97b5e938051226dc025ec80980c285b053ffb1e25a3db2a3aa3bc046bf7f56 \ + --hash=sha256:3ec660d19bbc671e3a6443325f07263be452c453ac9e512f5eb935e7d4ac28b3 \ + --hash=sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6 \ + --hash=sha256:483a6aea59cb89904e1ceabd2b47368b5600fb7de78a6e4a2c2987b2d256cf30 \ + --hash=sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb \ + --hash=sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506 \ + --hash=sha256:4a9cb68166a34117d6646c0023c7b759bf197bee5ad4272f420a0141d7eb03a0 \ + --hash=sha256:4b820514bfc0b98a30e3d85462084779900347e4d49267f747ff54060cc33925 \ + --hash=sha256:4e18b656c5e844539d506a0a06432274d7bd52a7487e6828c63a63d69185626c \ + --hash=sha256:4e9f48f58c2c523d5a06faea47866cd35b32655c46b443f163d08c6d0ddb17d6 \ + --hash=sha256:50b3a2710631848991d0bf7de077502e8994c804bb805aeb2925a981de58ec2e \ + --hash=sha256:55b6d90641869892caa9ca42ff913f7ff1c5ece06474fbd32fb2cf6834726c95 \ + --hash=sha256:57feec87371dbb3520da6192213c7d6fc892d5589a93db548331954de8248fd2 \ + --hash=sha256:58130ecf8f7b8112cdb841486404f1282b9c86ccb30d3519faf301b2e5659133 \ + --hash=sha256:5845c1fd4866bb5dd3125d89b90e57ed3138241540897de748cdf19de8a2fca2 \ + --hash=sha256:59bfeae4b25ec05b34f1956eaa1cb38032282cd4dfabc5056d0a1ec4d696d3aa \ + --hash=sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3 \ + --hash=sha256:5c09fcfdccdd0b57867577b719c69e347a436b86cd83747f179dbf0cc0d4c1f3 \ + --hash=sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436 \ + --hash=sha256:682b987361e5fd7a139ed565e30d81fd81e9629acc7d925a205366877d8c8657 \ + --hash=sha256:6b5d83030255983181005e6cfbac1617ce9746b219bc2aad52201ad121226581 \ + --hash=sha256:6bb5992037f7a9eff7991ebe4273ea7f51f1c1c511e6a2ce511d0e7bdb754492 \ + --hash=sha256:73eae06aa53af2ea5270cc066dcaf02cc60d2994bbb2c4ef5764949257d10f43 \ + --hash=sha256:76f364861c3bfc98cbbcbd402d83454ed9e01a5224bb3a28bf70002a230f73e2 \ + --hash=sha256:820c661588bd01a0aa62a1283f20d2be4281b086f80dad9e955e690c75fb54a2 \ + --hash=sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926 \ + --hash=sha256:87701f25a2352e5bf7454caa64757642734da9f6b11384c1f9d1a8e699758057 \ + --hash=sha256:9079dfc6a70abe341f521f78405b8949f96db48da98aeb43f9907f342f627cdc \ + --hash=sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80 \ + --hash=sha256:957cf8e4b6e123a9eea554fa7ebc85674674b713551de587eb318a2df3e00255 \ + --hash=sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1 \ + --hash=sha256:9f636b730f7e8cb19feb87094949ba54ee5357440b9658b2a32a5ce4bce53972 \ + --hash=sha256:a114d03b938376557927ab23f1e950827c3b893ccb94b62fd95d430fd0e5cf53 \ + --hash=sha256:a185f876e69897a6f3325c3f19f26a297fa058c5e456bfcff8015e9a27e83ae1 \ + --hash=sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423 \ + --hash=sha256:aa466da5b15ccea564bdab9c89175c762bc12825f4659c11227f515cee76fa4a \ + --hash=sha256:aaed8b0562be4a0876ee3b6946f6869b7bcdb571a5d1496683505944e268b160 \ + --hash=sha256:ab7c4ceb38d91570a650dba194e1ca87c2b543488fe9309b4212694174fd539c \ + --hash=sha256:ac10f4c2b9e770c4e393876e35a7046879d195cd123b4f116d299d442b335bcd \ + --hash=sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa \ + --hash=sha256:b1c416351ee6271b2f49b56ad7f308072f6f44b37118d69c2cad94f3fa8a40d5 \ + --hash=sha256:b225d95519a5bf73860323e633a664b0d85ad3d5bede6d30d95b35d4dfe8805b \ + --hash=sha256:b2f59caeaf7632cc633b5cf6fc449372b83bbdf0da4ae04d5be36118e46cc0aa \ + --hash=sha256:b58c621844d55e71c1b7f7c498ce5aa6985d743a1a59034c57a905b3f153c1ef \ + --hash=sha256:bf6bea52ec97e95560af5ae576bdac3aa3aae0b6758c6efa115236d9e07dae44 \ + --hash=sha256:c08be4f460903e5a9d0f76818db3250f12e9c344e79314d1d570fc69d7f4eae4 \ + --hash=sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156 \ + --hash=sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753 \ + --hash=sha256:ce2186a7df133a9c895dea3331ddc5ddad42cdd0d1ea2f0a51e5d161e4762f28 \ + --hash=sha256:d093be959277cb7dee84b801eb1af388b6ad3ca6a6b6bf1ed7585895789d027d \ + --hash=sha256:d094ddec350a2fb899fec68d8353c78233debde9b7d8b4beeafa70825f1c281a \ + --hash=sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304 \ + --hash=sha256:d569388c381b24671589335a3be6e1d45546c2988c2ebe30fdcada8457a31008 \ + --hash=sha256:d618649d4e70ac6efcbba75be98b26ef5078faad23592f9b51ca492953012429 \ + --hash=sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72 \ + --hash=sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399 \ + --hash=sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3 \ + --hash=sha256:db7457bac39421addd0c8449933ac32d8042aae84a14911a757ae6ca3eef1392 \ + --hash=sha256:e27bbb6d14416713a8bd7aaa1313c0fc8d44ee48d74497a0ff4c3a1b6ccb5167 \ + --hash=sha256:e617fb6b0b6953fffd762669610c1c4ffd05632c138d61ac7e14ad187870669c \ + --hash=sha256:e9aa71e15d9d9beaad2c6b9319edcdc0a49a43ef5c0a4c8265ca9ee7d6c67774 \ + --hash=sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351 \ + --hash=sha256:f179dee3b863ab1c59580ff60f9d99f632f34ccb38bf67a33ec6b3ecadd0fd76 \ + --hash=sha256:f4c035da3f544b1882bac24115f3e2e8760f10a0107614fc9839fd232200b875 \ + --hash=sha256:f67f217af4b1ff66c68a87318012de788dd95fcfeb24cc889011f4e1c7454dfd \ + --hash=sha256:f90c822a402cb865e396a504f9fc8173ef34212a342d92e362ca498cad308e28 \ + --hash=sha256:ff3827aef427c89a25cc96ded1759271a93603aba9fb977a6d264648ebf989db + # via + # aiohttp + # yarl +multiprocess==0.70.17 \ + --hash=sha256:1d52f068357acd1e5bbc670b273ef8f81d57863235d9fbf9314751886e141968 \ + --hash=sha256:20c28ca19079a6c879258103a6d60b94d4ffe2d9da07dda93fb1c8bc6243f522 \ + --hash=sha256:27b8409c02b5dd89d336107c101dfbd1530a2cd4fd425fc27dcb7adb6e0b47bf \ + --hash=sha256:2818af14c52446b9617d1b0755fa70ca2f77c28b25ed97bdaa2c69a22c47b46c \ + --hash=sha256:2884701445d0177aec5bd5f6ee0df296773e4fb65b11903b94c613fb46cfb7d1 \ + --hash=sha256:2b12e081df87ab755190e227341b2c3b17ee6587e9c82fecddcbe6aa812cd7f7 \ + --hash=sha256:2ea0939b0f4760a16a548942c65c76ff5afd81fbf1083c56ae75e21faf92e426 \ + --hash=sha256:349525099a0c9ac5936f0488b5ee73199098dac3ac899d81d326d238f9fd3ccd \ + --hash=sha256:38357ca266b51a2e22841b755d9a91e4bb7b937979a54d411677111716c32744 \ + --hash=sha256:4ae2f11a3416809ebc9a48abfc8b14ecce0652a0944731a1493a3c1ba44ff57a \ + --hash=sha256:7ddb24e5bcdb64e90ec5543a1f05a39463068b6d3b804aa3f2a4e16ec28562d6 \ + --hash=sha256:a0f01cd9d079af7a8296f521dc03859d1a414d14c1e2b6e676ef789333421c95 \ + --hash=sha256:a22a6b1a482b80eab53078418bb0f7025e4f7d93cc8e1f36481477a023884861 \ + --hash=sha256:c2c82d0375baed8d8dd0d8c38eb87c5ae9c471f8e384ad203a36f095ee860f67 \ + --hash=sha256:c3feb874ba574fbccfb335980020c1ac631fbf2a3f7bee4e2042ede62558a021 \ + --hash=sha256:d729f55198a3579f6879766a6d9b72b42d4b320c0dcb7844afb774d75b573c62 + # via + # -r requirements-dev.txt + # datasets +namex==0.0.8 \ + --hash=sha256:32a50f6c565c0bb10aa76298c959507abdc0e850efe085dc38f3440fcb3aa90b \ + --hash=sha256:7ddb6c2bb0e753a311b7590f84f6da659dd0c05e65cb89d519d54c0a250c0487 + # via keras +nest-asyncio==1.6.0 \ + --hash=sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe \ + --hash=sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c + # via orbax-checkpoint +networkx==3.4.2 \ + --hash=sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1 \ + --hash=sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f + # via torch +numpy==1.26.4 \ + --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ + --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ + --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ + --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \ + --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \ + --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \ + --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \ + --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \ + --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \ + --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \ + --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \ + --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \ + --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \ + --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \ + --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \ + --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \ + --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \ + --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \ + --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \ + --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \ + --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \ + --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \ + --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \ + --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \ + --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \ + --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \ + --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \ + --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \ + --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \ + --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \ + --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \ + --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \ + --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \ + --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ + --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ + --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f + # via + # -r requirements-dev.txt + # chex + # contourpy + # datasets + # dm-haiku + # etils + # flax + # h5py + # jax + # jaxlib + # jmp + # jraph + # keras + # matplotlib + # mizani + # ml-dtypes + # optax + # orbax-checkpoint + # pandas + # patsy + # plotnine + # scikit-learn + # scipy + # statsmodels + # tensorboard + # tensorflow-cpu + # tensorflow-datasets + # tensorstore + # torchvision + # transformers +nvidia-cublas-cu12==12.1.3.1 \ + --hash=sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906 \ + --hash=sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728 + # via + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 \ + --hash=sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4 \ + --hash=sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e + # via torch +nvidia-cuda-nvrtc-cu12==12.1.105 \ + --hash=sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed \ + --hash=sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2 + # via torch +nvidia-cuda-runtime-cu12==12.1.105 \ + --hash=sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40 \ + --hash=sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344 + # via torch +nvidia-cudnn-cu12==8.9.2.26 \ + --hash=sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9 + # via torch +nvidia-cufft-cu12==11.0.2.54 \ + --hash=sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56 \ + --hash=sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253 + # via torch +nvidia-curand-cu12==10.3.2.106 \ + --hash=sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a \ + --hash=sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0 + # via torch +nvidia-cusolver-cu12==11.4.5.107 \ + --hash=sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5 \ + --hash=sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd + # via torch +nvidia-cusparse-cu12==12.1.0.106 \ + --hash=sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a \ + --hash=sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c + # via + # nvidia-cusolver-cu12 + # torch +nvidia-nccl-cu12==2.20.5 \ + --hash=sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56 \ + --hash=sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01 + # via torch +nvidia-nvjitlink-cu12==12.6.85 \ + --hash=sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41 \ + --hash=sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c \ + --hash=sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a + # via + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 \ + --hash=sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82 \ + --hash=sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5 + # via torch +oauth2client==4.1.3 \ + --hash=sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac \ + --hash=sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6 + # via cloud-tpu-client +opt-einsum==3.4.0 \ + --hash=sha256:69bb92469f86a1565195ece4ac0323943e83477171b91d24c35afe028a90d7cd \ + --hash=sha256:96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac + # via + # jax + # tensorflow-cpu +optax==0.2.4 \ + --hash=sha256:4e05d3d5307e6dde4c319187ae36e6cd3a0c035d4ed25e9e992449a304f47336 \ + --hash=sha256:db35c04e50b52596662efb002334de08c2a0a74971e4da33f467e84fac08886a + # via + # -r requirements-dev.txt + # flax +optree==0.13.1 \ + --hash=sha256:01819c3df950696f32c91faf8d376ae6b695ffdba18f330f1cab6b8e314e4612 \ + --hash=sha256:025d23400b8b579462a251420f0a9ae77d3d3593f84276f3465985731d79d722 \ + --hash=sha256:04252b5f24e5dae716647848b302f5f7849ecb028f8c617666d1b89a42eb988b \ + --hash=sha256:0914ba436d6c0781dc9b04e3b95e06fe5c4fc6a87e94893da971805a3790efe8 \ + --hash=sha256:0adc896018f34b5f37f6c92c35ae639877578725c5281cc9d4a0ac2ab2c46f77 \ + --hash=sha256:0aec6da79a6130b4c76073241c0f31c11b96a38e70c7a00f9ed918d7464394ab \ + --hash=sha256:0f1bde49e41a158af28d99fae1bd425fbd664907c53cf595106fb5b35e5cbe26 \ + --hash=sha256:0f9707547635cfede8d79e4161c066021ffefc401d98bbf8eba452b1355a42c7 \ + --hash=sha256:100d70cc57af5284649f881e6b266fee3a3e86e82024484eaa64ee18d1587e42 \ + --hash=sha256:111172446e8a4f0d3be13a853fa28cb46b5679a1c7ca15b2e6db2b43dbbf9efb \ + --hash=sha256:135e29e0a69149958003443d43f49af0ebb65f03ae52cddf4142e94d5a36b0c8 \ + --hash=sha256:1496f29d5b9633fed4b3f1fd4b7e772d77200eb2370c08ef8e14404309c669b9 \ + --hash=sha256:1891267f9dc76e9ddfed947ff7b755ad438ad483de0537a6b5bcf38478d5a33c \ + --hash=sha256:1935639dd498a42367633e3877797e1330e39d44d48bbca1a136bb4dbe4c1bc9 \ + --hash=sha256:1b291aed475ca5992a0c587ca4b72f074724209e01afca9d015c9a5b2089c68d \ + --hash=sha256:1d74ff3dfe8599935d52b26a2fe5a43242b4d3f47be6fc1c5ce34c25e116d616 \ + --hash=sha256:2063234ef4d58f11277e157d1cf066a8bd07be911da226bff84fc9761b8c1a25 \ + --hash=sha256:22ce30c9d733c2214fa321c8370e4dfc8c7829970364618b2b5cacffbc9e8949 \ + --hash=sha256:2521840d6aded4dac62c787f50bcb1cacbfcda86b9319d666b4025fa0ba5545a \ + --hash=sha256:27d81dc43b522ba47ba7d2e7d91dbb486940348b1bf85caeb0afc2815c0aa492 \ + --hash=sha256:28f083ede9be89503357a6b9e5d304826701596abe13d33e8f6fa2cd85b407fc \ + --hash=sha256:2909cb42add6bb1a5a2b0243bdd8c4b861bf072f3741e26239481907ac8ad4e6 \ + --hash=sha256:2cba7ca4cf991270a9fdd080b091d2cbdbcbf27858acebda6af40ff57312d1ea \ + --hash=sha256:3010ae24e994f6e00071098d34e98e78eb995b7454a2ef629a0bf7df17441b24 \ + --hash=sha256:30b02951c48ecca6fbeb6a3cc7a858267c4d82d1c874481a639061e845168da5 \ + --hash=sha256:34b4dd0f5d73170c7740726cadfca973220ccbed9559beb51fab446d9e584d0a \ + --hash=sha256:360f2e8f7eb22ff131bc7e3e241035908e6b47d41372eb3d68d77bc7036ddb30 \ + --hash=sha256:363939b255a9fa0e077d8297a8301857c859592fc581cee19ec9238e0c145c4a \ + --hash=sha256:37948e2d796db23d6ccd07105b709b827eba26549d34dd2149e95887c89fe9b4 \ + --hash=sha256:395ac2eb69528613fd0f2ee8706890b7921b8ff3159df53b6e9f67eaf519c5cb \ + --hash=sha256:3d0161012d80e4865017e10298ac55652cc3ad9a3eae9440229d4bf00b140e01 \ + --hash=sha256:3da76fc43dcc22fe58d11634a04672ca7cc270aed469ac35fd5c78b7b9bc9125 \ + --hash=sha256:4711f5cac5a2a49c3d6c9f0eca7b77c22b452170bb33ea01c3214ebb17931db9 \ + --hash=sha256:48c29d9c6c64c8dc48c8ee97f7c1d5cdb83e37320f0be0857c06ce4b97994aea \ + --hash=sha256:50dd6a9c8ccef267ab4941f07eac53faf6a00666dce4d209da20525570ffaca3 \ + --hash=sha256:536ecf0e555432cc939d958590e33e00e75cc254ab0dd269e84fc9de8352db61 \ + --hash=sha256:5569b95e214d20a1b7acb7d9477fabbd709d334bc34f3257368ea1418b811a44 \ + --hash=sha256:55e82426bef151149cfa41d68ac957730fcd420996c0db8324fca81aa6a810ba \ + --hash=sha256:587fb8de8e75e80fe7c7240e269630876bec3ee2038724893370976207813e4b \ + --hash=sha256:5b5626c38d4a18a144063db5c1dbb558431d83ca10682324f74665a12214801f \ + --hash=sha256:5b6531cd4eb23fadbbf77faf834e1119da06d7af3154f55786b59953cd87bb8a \ + --hash=sha256:5c6aed6c5eabda59a91376aca08ba508a06f1c68850216a98743b5f8f55af841 \ + --hash=sha256:5c950c85561c47efb3b1a3771ed1b2b2339bd5e28a0ca42bdcedadccc645eeac \ + --hash=sha256:5d21a8b449e47fdbf118ac1938cf6f97d8a60258bc45c6eba3e61f79feeb1ea8 \ + --hash=sha256:5da0fd26325a07354915cc4e3a9aee797cb75dff07c60d24b3f309457069abd3 \ + --hash=sha256:5dec0785bc4bbcabecd7e82be3f189b21f3ce8a1244b243009736912a6d8f737 \ + --hash=sha256:5f94a627c5a2fb776bbfa8f7558db5b918916d37586ba943e74e5f22789c4301 \ + --hash=sha256:63b2749504fe0b9ac3892e26bf55a040ae2973bcf8da1476afe9266a4624be9d \ + --hash=sha256:64032b77420410c3d315a4b9bcbece15853432c155613bb4261d87809b3ee357 \ + --hash=sha256:652287e43fcbb29b8d1821144987e3bc558be4e5eec0d42fce7007cc3ee8e574 \ + --hash=sha256:6bc9aae5ee17a38e3657c8c5db1a60923cc10debd177f6781f352362a846feeb \ + --hash=sha256:6c4ab1d391b89cb88eb3c63383d5eb0930bc21141de9d5acd277feed9e38eb65 \ + --hash=sha256:7abf1c6fe42cb112f0fb169f80d7b26476fa44226d2caf3727b49d210bdc3343 \ + --hash=sha256:7e1c1da6574d59073b6a6b9a13633217f584ec271ddee4e014c7e422f171e9b4 \ + --hash=sha256:84a6a974aa9dc4119fe502865c8e1755090ac17dbb53a964619a8ece1130831e \ + --hash=sha256:8d89891e11a55ad83ab3e2810f8571774b2117a6198b4044fa44e0f37f72855e \ + --hash=sha256:940c739c9957404a9bbe40ed9289792adaf476cece59eca4fe2f32137fa15a8d \ + --hash=sha256:95298846c057cce2e7d114c03c645e86a5381b72388c8c390986bdefe69a759c \ + --hash=sha256:9824a4258b058282eeaee1b388c8dfc704e49beda957b99177db8bd8249a3abe \ + --hash=sha256:9c8ee1e988c634a451146b87d9ebdbf650a75dc1f52a9cffcd89fabb7289321c \ + --hash=sha256:a3058e2d6a6a7d6362d40f7826258204d9fc2cc4cc8f72eaa3dbff14b6622025 \ + --hash=sha256:a408a43f16840475612c7058eb80b53791bf8b8266c5b3cd07f69697958fd97d \ + --hash=sha256:aee696272eece657c2b9e3cf079d8fc7cbbcc8a5c8199dbcd0960ddf7e672fe9 \ + --hash=sha256:af67856aa8073d237fe67313d84f8aeafac32c1cef7239c628a2768d02679c43 \ + --hash=sha256:b21ac55473476007e317500fd5851d0a0d695a0c51742bd65fe7347d18530da2 \ + --hash=sha256:b5e5f09c85ae558a6bdaea57e63168082e728e777391393e9e2792f0d15b7b59 \ + --hash=sha256:b94f9081cd810a59faae4dbac8f0447e59ce0fb2d70cfb388dc123c33a9fd1a8 \ + --hash=sha256:bbc5fa2ff5090389f3a906567446f01d692bd6fe5cfcc5ae2d5861f24e8e0e4d \ + --hash=sha256:bc9c396f64f9aacdf852713bd75f1b9a83f118660fd82e87c937c081b7ddccd1 \ + --hash=sha256:c4d13f55dbd509d27be3af54d53b4ca0751bc518244ced6d0567e518e51452a2 \ + --hash=sha256:c84ecb6977ba7f5d4ba24d0312cbffb74c6860237572701c2716bd811ca9b226 \ + --hash=sha256:c99891c2ea6050738f7e3de5ab4038736cf33555a752b34a06922ebc9bf0488e \ + --hash=sha256:ce962f0dd387137817dcda600bd6cf2e1b65103411807b6cdbbd9ffddf1061f6 \ + --hash=sha256:cf85ba1a7d80b6dc19ef5ca4c17d2ff0290dc9306c5b8b468d51cede287f3c8d \ + --hash=sha256:cfdf7f5cfb5f9b1c0188c667a3dc56551e60a52a918cb8600f84e2f0ad882106 \ + --hash=sha256:d0c5a389c108367007151bcfef494f8c2674e4aa23d80ac9163876f5b213dfb6 \ + --hash=sha256:d1844b966bb5c95b64af5c6f92f99e4037452b92b18d060fbd80097b5b773d86 \ + --hash=sha256:d580f1bf23bb352c4db6b3544f282f1ac08dcb0d9ab537d25e56220353438cf7 \ + --hash=sha256:d866f707b9f3a9f0e670a73fe8feee4993b2dbdbf9eef598e1cf2e5cb2876413 \ + --hash=sha256:de1ae16ea0410497e50fe2b4d48a83c37bfc87da76e1e82f9cc8c800b4fc8be6 \ + --hash=sha256:e40f018f522fcfd244688d1b3a360518e636ba7f636385aae0566eae3e7d29bc \ + --hash=sha256:efbffeec15e4a79ed9921dc2227cbba1b64db353c4b72ce4ce83e62fbce9e652 \ + --hash=sha256:f2a9eadcab78ccc04114a6916e9decdbc886bbe04c1b7a7bb32e723209162998 \ + --hash=sha256:f39c7174a3f3cdc3f5fe6fb4b832f608c40ac174d7567ed6734b2ee952094631 \ + --hash=sha256:f74fb880472572d550d85d2f1563365b6f194e2157a7703790cbd54d9ab5cf29 \ + --hash=sha256:f788b2ad120deb73b4908a74473cd6de79cfb9f33bbe9dcb59cea2e2477d4e28 \ + --hash=sha256:f8e2a546cecc5077ec7d4fe24ec8aede43ca8555b832d115f1ebbb4f3b35bc78 \ + --hash=sha256:fafeda2e35e3270532132e27b471ea3e3aeac18f7966a4d0469137d1f36046ec + # via keras +orbax-checkpoint==0.11.0 \ + --hash=sha256:892a124fce71f3e7c71451a2b2090c0251db1097803a119a00baa377113bc9ba \ + --hash=sha256:d4a0dcc81edd29191cf5a4feb9cf2a4edd31fc5da79d7be616a04f11f2a4d484 + # via flax +packaging==24.2 \ + --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ + --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f + # via + # datasets + # huggingface-hub + # keras + # matplotlib + # statsmodels + # tensorboard + # tensorflow-cpu + # transformers +pandas==2.2.3 \ + --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \ + --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \ + --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \ + --hash=sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4 \ + --hash=sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0 \ + --hash=sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32 \ + --hash=sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea \ + --hash=sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28 \ + --hash=sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f \ + --hash=sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348 \ + --hash=sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18 \ + --hash=sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468 \ + --hash=sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5 \ + --hash=sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e \ + --hash=sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667 \ + --hash=sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645 \ + --hash=sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13 \ + --hash=sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30 \ + --hash=sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3 \ + --hash=sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d \ + --hash=sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb \ + --hash=sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3 \ + --hash=sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039 \ + --hash=sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8 \ + --hash=sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd \ + --hash=sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761 \ + --hash=sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659 \ + --hash=sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57 \ + --hash=sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c \ + --hash=sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c \ + --hash=sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4 \ + --hash=sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a \ + --hash=sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9 \ + --hash=sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42 \ + --hash=sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2 \ + --hash=sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39 \ + --hash=sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc \ + --hash=sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698 \ + --hash=sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed \ + --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \ + --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \ + --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319 + # via + # -r requirements-dev.txt + # datasets + # mizani + # plotnine + # statsmodels +patsy==1.0.1 \ + --hash=sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c \ + --hash=sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4 + # via statsmodels +pillow==11.1.0 \ + --hash=sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83 \ + --hash=sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96 \ + --hash=sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65 \ + --hash=sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a \ + --hash=sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352 \ + --hash=sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f \ + --hash=sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20 \ + --hash=sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c \ + --hash=sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114 \ + --hash=sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49 \ + --hash=sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91 \ + --hash=sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0 \ + --hash=sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2 \ + --hash=sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5 \ + --hash=sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884 \ + --hash=sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e \ + --hash=sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c \ + --hash=sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196 \ + --hash=sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756 \ + --hash=sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861 \ + --hash=sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269 \ + --hash=sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1 \ + --hash=sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb \ + --hash=sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a \ + --hash=sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081 \ + --hash=sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1 \ + --hash=sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8 \ + --hash=sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90 \ + --hash=sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc \ + --hash=sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5 \ + --hash=sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1 \ + --hash=sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3 \ + --hash=sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35 \ + --hash=sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f \ + --hash=sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c \ + --hash=sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2 \ + --hash=sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2 \ + --hash=sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf \ + --hash=sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65 \ + --hash=sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b \ + --hash=sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442 \ + --hash=sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2 \ + --hash=sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade \ + --hash=sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482 \ + --hash=sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe \ + --hash=sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc \ + --hash=sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a \ + --hash=sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec \ + --hash=sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3 \ + --hash=sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a \ + --hash=sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07 \ + --hash=sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6 \ + --hash=sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f \ + --hash=sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e \ + --hash=sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192 \ + --hash=sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0 \ + --hash=sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6 \ + --hash=sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73 \ + --hash=sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f \ + --hash=sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6 \ + --hash=sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547 \ + --hash=sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9 \ + --hash=sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457 \ + --hash=sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8 \ + --hash=sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26 \ + --hash=sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5 \ + --hash=sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab \ + --hash=sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070 \ + --hash=sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71 \ + --hash=sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9 \ + --hash=sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761 + # via + # matplotlib + # torchvision +plotnine==0.14.5 \ + --hash=sha256:4a8bc4360732dd69a0263def4abab285ed8f0f4386186f1e44c642f2cea79b88 \ + --hash=sha256:9e75969e8e10d8d770a4be36d10e075cc10b88ca6fcc99e36ada53436fb5653f + # via -r requirements-dev.txt +promise==2.3 \ + --hash=sha256:dfd18337c523ba4b6a58801c164c1904a9d4d1b1747c7d5dbf45b693a49d93d0 + # via tensorflow-datasets +propcache==0.2.1 \ + --hash=sha256:03ff9d3f665769b2a85e6157ac8b439644f2d7fd17615a82fa55739bc97863f4 \ + --hash=sha256:049324ee97bb67285b49632132db351b41e77833678432be52bdd0289c0e05e4 \ + --hash=sha256:081a430aa8d5e8876c6909b67bd2d937bfd531b0382d3fdedb82612c618bc41a \ + --hash=sha256:0f022d381747f0dfe27e99d928e31bc51a18b65bb9e481ae0af1380a6725dd1f \ + --hash=sha256:12d1083f001ace206fe34b6bdc2cb94be66d57a850866f0b908972f90996b3e9 \ + --hash=sha256:14d86fe14b7e04fa306e0c43cdbeebe6b2c2156a0c9ce56b815faacc193e320d \ + --hash=sha256:160291c60081f23ee43d44b08a7e5fb76681221a8e10b3139618c5a9a291b84e \ + --hash=sha256:1672137af7c46662a1c2be1e8dc78cb6d224319aaa40271c9257d886be4363a6 \ + --hash=sha256:19a0f89a7bb9d8048d9c4370c9c543c396e894c76be5525f5e1ad287f1750ddf \ + --hash=sha256:1ac2f5fe02fa75f56e1ad473f1175e11f475606ec9bd0be2e78e4734ad575034 \ + --hash=sha256:1cd9a1d071158de1cc1c71a26014dcdfa7dd3d5f4f88c298c7f90ad6f27bb46d \ + --hash=sha256:1ffc3cca89bb438fb9c95c13fc874012f7b9466b89328c3c8b1aa93cdcfadd16 \ + --hash=sha256:297878dc9d0a334358f9b608b56d02e72899f3b8499fc6044133f0d319e2ec30 \ + --hash=sha256:2d3af2e79991102678f53e0dbf4c35de99b6b8b58f29a27ca0325816364caaba \ + --hash=sha256:30b43e74f1359353341a7adb783c8f1b1c676367b011709f466f42fda2045e95 \ + --hash=sha256:3156628250f46a0895f1f36e1d4fbe062a1af8718ec3ebeb746f1d23f0c5dc4d \ + --hash=sha256:31f5af773530fd3c658b32b6bdc2d0838543de70eb9a2156c03e410f7b0d3aae \ + --hash=sha256:3935bfa5fede35fb202c4b569bb9c042f337ca4ff7bd540a0aa5e37131659348 \ + --hash=sha256:39d51fbe4285d5db5d92a929e3e21536ea3dd43732c5b177c7ef03f918dff9f2 \ + --hash=sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64 \ + --hash=sha256:4160d9283bd382fa6c0c2b5e017acc95bc183570cd70968b9202ad6d8fc48dce \ + --hash=sha256:4a571d97dbe66ef38e472703067021b1467025ec85707d57e78711c085984e54 \ + --hash=sha256:4e6281aedfca15301c41f74d7005e6e3f4ca143584ba696ac69df4f02f40d629 \ + --hash=sha256:52277518d6aae65536e9cea52d4e7fd2f7a66f4aa2d30ed3f2fcea620ace3c54 \ + --hash=sha256:556fc6c10989f19a179e4321e5d678db8eb2924131e64652a51fe83e4c3db0e1 \ + --hash=sha256:574faa3b79e8ebac7cb1d7930f51184ba1ccf69adfdec53a12f319a06030a68b \ + --hash=sha256:58791550b27d5488b1bb52bc96328456095d96206a250d28d874fafe11b3dfaf \ + --hash=sha256:5b750a8e5a1262434fb1517ddf64b5de58327f1adc3524a5e44c2ca43305eb0b \ + --hash=sha256:5d97151bc92d2b2578ff7ce779cdb9174337390a535953cbb9452fb65164c587 \ + --hash=sha256:5eee736daafa7af6d0a2dc15cc75e05c64f37fc37bafef2e00d77c14171c2097 \ + --hash=sha256:6445804cf4ec763dc70de65a3b0d9954e868609e83850a47ca4f0cb64bd79fea \ + --hash=sha256:647894f5ae99c4cf6bb82a1bb3a796f6e06af3caa3d32e26d2350d0e3e3faf24 \ + --hash=sha256:66d4cfda1d8ed687daa4bc0274fcfd5267873db9a5bc0418c2da19273040eeb7 \ + --hash=sha256:6a9a8c34fb7bb609419a211e59da8887eeca40d300b5ea8e56af98f6fbbb1541 \ + --hash=sha256:6b3f39a85d671436ee3d12c017f8fdea38509e4f25b28eb25877293c98c243f6 \ + --hash=sha256:6b6fb63ae352e13748289f04f37868099e69dba4c2b3e271c46061e82c745634 \ + --hash=sha256:70693319e0b8fd35dd863e3e29513875eb15c51945bf32519ef52927ca883bc3 \ + --hash=sha256:781e65134efaf88feb447e8c97a51772aa75e48b794352f94cb7ea717dedda0d \ + --hash=sha256:819ce3b883b7576ca28da3861c7e1a88afd08cc8c96908e08a3f4dd64a228034 \ + --hash=sha256:857112b22acd417c40fa4595db2fe28ab900c8c5fe4670c7989b1c0230955465 \ + --hash=sha256:887d9b0a65404929641a9fabb6452b07fe4572b269d901d622d8a34a4e9043b2 \ + --hash=sha256:8b3489ff1ed1e8315674d0775dc7d2195fb13ca17b3808721b54dbe9fd020faf \ + --hash=sha256:92fc4500fcb33899b05ba73276dfb684a20d31caa567b7cb5252d48f896a91b1 \ + --hash=sha256:9403db39be1393618dd80c746cb22ccda168efce239c73af13c3763ef56ffc04 \ + --hash=sha256:98110aa363f1bb4c073e8dcfaefd3a5cea0f0834c2aab23dda657e4dab2f53b5 \ + --hash=sha256:999779addc413181912e984b942fbcc951be1f5b3663cd80b2687758f434c583 \ + --hash=sha256:9caac6b54914bdf41bcc91e7eb9147d331d29235a7c967c150ef5df6464fd1bb \ + --hash=sha256:a7a078f5d37bee6690959c813977da5291b24286e7b962e62a94cec31aa5188b \ + --hash=sha256:a7e65eb5c003a303b94aa2c3852ef130230ec79e349632d030e9571b87c4698c \ + --hash=sha256:a96dc1fa45bd8c407a0af03b2d5218392729e1822b0c32e62c5bf7eeb5fb3958 \ + --hash=sha256:aca405706e0b0a44cc6bfd41fbe89919a6a56999157f6de7e182a990c36e37bc \ + --hash=sha256:accb6150ce61c9c4b7738d45550806aa2b71c7668c6942f17b0ac182b6142fd4 \ + --hash=sha256:ad1af54a62ffe39cf34db1aa6ed1a1873bd548f6401db39d8e7cd060b9211f82 \ + --hash=sha256:ae1aa1cd222c6d205853b3013c69cd04515f9d6ab6de4b0603e2e1c33221303e \ + --hash=sha256:b2d0a12018b04f4cb820781ec0dffb5f7c7c1d2a5cd22bff7fb055a2cb19ebce \ + --hash=sha256:b480c6a4e1138e1aa137c0079b9b6305ec6dcc1098a8ca5196283e8a49df95a9 \ + --hash=sha256:b74c261802d3d2b85c9df2dfb2fa81b6f90deeef63c2db9f0e029a3cac50b518 \ + --hash=sha256:ba278acf14471d36316159c94a802933d10b6a1e117b8554fe0d0d9b75c9d536 \ + --hash=sha256:bb6178c241278d5fe853b3de743087be7f5f4c6f7d6d22a3b524d323eecec505 \ + --hash=sha256:bf72af5e0fb40e9babf594308911436c8efde3cb5e75b6f206c34ad18be5c052 \ + --hash=sha256:bfd3223c15bebe26518d58ccf9a39b93948d3dcb3e57a20480dfdd315356baff \ + --hash=sha256:c214999039d4f2a5b2073ac506bba279945233da8c786e490d411dfc30f855c1 \ + --hash=sha256:c2f992c07c0fca81655066705beae35fc95a2fa7366467366db627d9f2ee097f \ + --hash=sha256:cba4cfa1052819d16699e1d55d18c92b6e094d4517c41dd231a8b9f87b6fa681 \ + --hash=sha256:cea7daf9fc7ae6687cf1e2c049752f19f146fdc37c2cc376e7d0032cf4f25347 \ + --hash=sha256:cf6c4150f8c0e32d241436526f3c3f9cbd34429492abddbada2ffcff506c51af \ + --hash=sha256:d09c333d36c1409d56a9d29b3a1b800a42c76a57a5a8907eacdbce3f18768246 \ + --hash=sha256:d27b84d5880f6d8aa9ae3edb253c59d9f6642ffbb2c889b78b60361eed449787 \ + --hash=sha256:d2ccec9ac47cf4e04897619c0e0c1a48c54a71bdf045117d3a26f80d38ab1fb0 \ + --hash=sha256:d71264a80f3fcf512eb4f18f59423fe82d6e346ee97b90625f283df56aee103f \ + --hash=sha256:d93f3307ad32a27bda2e88ec81134b823c240aa3abb55821a8da553eed8d9439 \ + --hash=sha256:d9631c5e8b5b3a0fda99cb0d29c18133bca1e18aea9effe55adb3da1adef80d3 \ + --hash=sha256:ddfab44e4489bd79bda09d84c430677fc7f0a4939a73d2bba3073036f487a0a6 \ + --hash=sha256:e7048abd75fe40712005bcfc06bb44b9dfcd8e101dda2ecf2f5aa46115ad07ca \ + --hash=sha256:e73091191e4280403bde6c9a52a6999d69cdfde498f1fdf629105247599b57ec \ + --hash=sha256:e800776a79a5aabdb17dcc2346a7d66d0777e942e4cd251defeb084762ecd17d \ + --hash=sha256:edc9fc7051e3350643ad929df55c451899bb9ae6d24998a949d2e4c87fb596d3 \ + --hash=sha256:f089118d584e859c62b3da0892b88a83d611c2033ac410e929cb6754eec0ed16 \ + --hash=sha256:f174bbd484294ed9fdf09437f889f95807e5f229d5d93588d34e92106fbf6717 \ + --hash=sha256:f508b0491767bb1f2b87fdfacaba5f7eddc2f867740ec69ece6d1946d29029a6 \ + --hash=sha256:f7a31fc1e1bd362874863fdeed71aed92d348f5336fd84f2197ba40c59f061bd \ + --hash=sha256:f9479aa06a793c5aeba49ce5c5692ffb51fcd9a7016e017d555d5e2b0045d212 + # via + # aiohttp + # yarl +protobuf==5.27.3 \ + --hash=sha256:043853dcb55cc262bf2e116215ad43fa0859caab79bb0b2d31b708f128ece035 \ + --hash=sha256:16ddf3f8c6c41e1e803da7abea17b1793a97ef079a912e42351eabb19b2cffe7 \ + --hash=sha256:68248c60d53f6168f565a8c76dc58ba4fa2ade31c2d1ebdae6d80f969cdc2d4f \ + --hash=sha256:82460903e640f2b7e34ee81a947fdaad89de796d324bcbc38ff5430bcdead82c \ + --hash=sha256:8572c6533e544ebf6899c360e91d6bcbbee2549251643d32c52cf8a5de295ba5 \ + --hash=sha256:a55c48f2a2092d8e213bd143474df33a6ae751b781dd1d1f4d953c128a415b25 \ + --hash=sha256:af7c0b7cfbbb649ad26132e53faa348580f844d9ca46fd3ec7ca48a1ea5db8a1 \ + --hash=sha256:b8a994fb3d1c11156e7d1e427186662b64694a62b55936b2b9348f0a7c6625ce \ + --hash=sha256:c2a105c24f08b1e53d6c7ffe69cb09d0031512f0b72f812dd4005b8112dbe91e \ + --hash=sha256:c84eee2c71ed83704f1afbf1a85c3171eab0fd1ade3b399b3fad0884cbcca8bf \ + --hash=sha256:dcb307cd4ef8fec0cf52cb9105a03d06fbb5275ce6d84a6ae33bc6cf84e0a07b + # via + # -r requirements-dev.txt + # google-api-core + # googleapis-common-protos + # orbax-checkpoint + # tensorboard + # tensorflow-cpu + # tensorflow-datasets + # tensorflow-metadata +psutil==6.1.1 \ + --hash=sha256:018aeae2af92d943fdf1da6b58665124897cfc94faa2ca92098838f83e1b1bca \ + --hash=sha256:0bdd4eab935276290ad3cb718e9809412895ca6b5b334f5a9111ee6d9aff9377 \ + --hash=sha256:1924e659d6c19c647e763e78670a05dbb7feaf44a0e9c94bf9e14dfc6ba50468 \ + --hash=sha256:33431e84fee02bc84ea36d9e2c4a6d395d479c9dd9bba2376c1f6ee8f3a4e0b3 \ + --hash=sha256:384636b1a64b47814437d1173be1427a7c83681b17a450bfc309a1953e329603 \ + --hash=sha256:6d4281f5bbca041e2292be3380ec56a9413b790579b8e593b1784499d0005dac \ + --hash=sha256:8be07491f6ebe1a693f17d4f11e69d0dc1811fa082736500f649f79df7735303 \ + --hash=sha256:8df0178ba8a9e5bc84fed9cfa61d54601b371fbec5c8eebad27575f1e105c0d4 \ + --hash=sha256:97f7cb9921fbec4904f522d972f0c0e1f4fabbdd4e0287813b21215074a0f160 \ + --hash=sha256:9ccc4316f24409159897799b83004cb1e24f9819b0dcf9c0b68bdcb6cefee6a8 \ + --hash=sha256:b6e06c20c05fe95a3d7302d74e7097756d4ba1247975ad6905441ae1b5b66003 \ + --hash=sha256:c777eb75bb33c47377c9af68f30e9f11bc78e0f07fbf907be4a5d70b2fe5f030 \ + --hash=sha256:ca9609c77ea3b8481ab005da74ed894035936223422dc591d6772b147421f777 \ + --hash=sha256:cf8496728c18f2d0b45198f06895be52f36611711746b7f30c464b422b50e2f5 \ + --hash=sha256:eaa912e0b11848c4d9279a93d7e2783df352b082f40111e078388701fd479e53 \ + --hash=sha256:f35cfccb065fff93529d2afb4a2e89e363fe63ca1e4a5da22b603a85833c2649 \ + --hash=sha256:fc0ed7fe2231a444fc219b9c42d0376e0a9a1a72f16c5cfa0f68d19f1a0663e8 + # via tensorflow-datasets +pyarrow==18.1.0 \ + --hash=sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe \ + --hash=sha256:05a5636ec3eb5cc2a36c6edb534a38ef57b2ab127292a716d00eabb887835f1e \ + --hash=sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54 \ + --hash=sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99 \ + --hash=sha256:0b331e477e40f07238adc7ba7469c36b908f07c89b95dd4bd3a0ec84a3d1e21e \ + --hash=sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9 \ + --hash=sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181 \ + --hash=sha256:2c4dd0c9010a25ba03e198fe743b1cc03cd33c08190afff371749c52ccbbaf76 \ + --hash=sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c \ + --hash=sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c \ + --hash=sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56 \ + --hash=sha256:4a4813cb8ecf1809871fd2d64a8eff740a1bd3691bbe55f01a3cf6c5ec869754 \ + --hash=sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b \ + --hash=sha256:4f97b31b4c4e21ff58c6f330235ff893cc81e23da081b1a4b1c982075e0ed4e9 \ + --hash=sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992 \ + --hash=sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc \ + --hash=sha256:73eeed32e724ea3568bb06161cad5fa7751e45bc2228e33dcb10c614044165c7 \ + --hash=sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa \ + --hash=sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b \ + --hash=sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73 \ + --hash=sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812 \ + --hash=sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d \ + --hash=sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052 \ + --hash=sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191 \ + --hash=sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386 \ + --hash=sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324 \ + --hash=sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4 \ + --hash=sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba \ + --hash=sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470 \ + --hash=sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71 \ + --hash=sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30 \ + --hash=sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33 \ + --hash=sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a \ + --hash=sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8 \ + --hash=sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee \ + --hash=sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c \ + --hash=sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6 \ + --hash=sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854 \ + --hash=sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0 \ + --hash=sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21 \ + --hash=sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2 \ + --hash=sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c + # via + # datasets + # tensorflow-datasets +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 + # via + # oauth2client + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c + # via + # google-auth + # oauth2client +pygments==2.19.0 \ + --hash=sha256:4755e6e64d22161d5b61432c0600c923c5927214e7c956e31c23923c89251a9b \ + --hash=sha256:afc4146269910d4bdfabcd27c24923137a74d562a23a320a41a55ad303e19783 + # via rich +pyparsing==3.2.1 \ + --hash=sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1 \ + --hash=sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a + # via + # httplib2 + # matplotlib +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 + # via + # matplotlib + # pandas +pytz==2024.2 \ + --hash=sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a \ + --hash=sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725 + # via + # google-api-core + # pandas +pyyaml==6.0.2 \ + --hash=sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff \ + --hash=sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48 \ + --hash=sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086 \ + --hash=sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e \ + --hash=sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133 \ + --hash=sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5 \ + --hash=sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484 \ + --hash=sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee \ + --hash=sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5 \ + --hash=sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68 \ + --hash=sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a \ + --hash=sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf \ + --hash=sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99 \ + --hash=sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8 \ + --hash=sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85 \ + --hash=sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19 \ + --hash=sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc \ + --hash=sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a \ + --hash=sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1 \ + --hash=sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317 \ + --hash=sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c \ + --hash=sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631 \ + --hash=sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d \ + --hash=sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652 \ + --hash=sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5 \ + --hash=sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e \ + --hash=sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b \ + --hash=sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8 \ + --hash=sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476 \ + --hash=sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706 \ + --hash=sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563 \ + --hash=sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237 \ + --hash=sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b \ + --hash=sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083 \ + --hash=sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180 \ + --hash=sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425 \ + --hash=sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e \ + --hash=sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f \ + --hash=sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725 \ + --hash=sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183 \ + --hash=sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab \ + --hash=sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774 \ + --hash=sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725 \ + --hash=sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e \ + --hash=sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5 \ + --hash=sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d \ + --hash=sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290 \ + --hash=sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44 \ + --hash=sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed \ + --hash=sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4 \ + --hash=sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba \ + --hash=sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12 \ + --hash=sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4 + # via + # flax + # huggingface-hub + # orbax-checkpoint + # torch-xla + # transformers +regex==2024.11.6 \ + --hash=sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c \ + --hash=sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60 \ + --hash=sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d \ + --hash=sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d \ + --hash=sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67 \ + --hash=sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773 \ + --hash=sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0 \ + --hash=sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef \ + --hash=sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad \ + --hash=sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe \ + --hash=sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3 \ + --hash=sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114 \ + --hash=sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4 \ + --hash=sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39 \ + --hash=sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e \ + --hash=sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3 \ + --hash=sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7 \ + --hash=sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d \ + --hash=sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e \ + --hash=sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a \ + --hash=sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7 \ + --hash=sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f \ + --hash=sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0 \ + --hash=sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54 \ + --hash=sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b \ + --hash=sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c \ + --hash=sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd \ + --hash=sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57 \ + --hash=sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34 \ + --hash=sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d \ + --hash=sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f \ + --hash=sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b \ + --hash=sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519 \ + --hash=sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4 \ + --hash=sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a \ + --hash=sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638 \ + --hash=sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b \ + --hash=sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839 \ + --hash=sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07 \ + --hash=sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf \ + --hash=sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff \ + --hash=sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0 \ + --hash=sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f \ + --hash=sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95 \ + --hash=sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4 \ + --hash=sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e \ + --hash=sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13 \ + --hash=sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519 \ + --hash=sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2 \ + --hash=sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008 \ + --hash=sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9 \ + --hash=sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc \ + --hash=sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48 \ + --hash=sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20 \ + --hash=sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89 \ + --hash=sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e \ + --hash=sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf \ + --hash=sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b \ + --hash=sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd \ + --hash=sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84 \ + --hash=sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29 \ + --hash=sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b \ + --hash=sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3 \ + --hash=sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45 \ + --hash=sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3 \ + --hash=sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983 \ + --hash=sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e \ + --hash=sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7 \ + --hash=sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4 \ + --hash=sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e \ + --hash=sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467 \ + --hash=sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577 \ + --hash=sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001 \ + --hash=sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0 \ + --hash=sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55 \ + --hash=sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9 \ + --hash=sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf \ + --hash=sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6 \ + --hash=sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e \ + --hash=sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde \ + --hash=sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62 \ + --hash=sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df \ + --hash=sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51 \ + --hash=sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5 \ + --hash=sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86 \ + --hash=sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2 \ + --hash=sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2 \ + --hash=sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0 \ + --hash=sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c \ + --hash=sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f \ + --hash=sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6 \ + --hash=sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2 \ + --hash=sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9 \ + --hash=sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91 + # via transformers +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # datasets + # google-api-core + # huggingface-hub + # responses + # tensorflow-cpu + # tensorflow-datasets + # transformers +responses==0.18.0 \ + --hash=sha256:15c63ad16de13ee8e7182d99c9334f64fd81f1ee79f90748d527c28f7ca9dd51 \ + --hash=sha256:380cad4c1c1dc942e5e8a8eaae0b4d4edf708f4f010db8b7bcfafad1fcd254ff + # via datasets +rich==13.9.4 \ + --hash=sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098 \ + --hash=sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90 + # via + # flax + # keras +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via + # google-auth + # oauth2client +safetensors==0.5.0 \ + --hash=sha256:0371afd84c200a80eb7103bf715108b0c3846132fb82453ae018609a15551580 \ + --hash=sha256:20067e7a5e63f0cbc88457b2a1161e70ff73af4cc3a24bce90309430cd6f6e7e \ + --hash=sha256:53715e4ea0ef23c08f004baae0f609a7773de7d4148727760417c6760cfd6b76 \ + --hash=sha256:56d936028ac799e18644b08a91fd98b4b62ae3dcd0440b1cfcb56535785589f1 \ + --hash=sha256:5ec7fc8c3d2f32ebf1c7011bc886b362e53ee0a1ec6d828c39d531fed8b325d6 \ + --hash=sha256:6106aa835deb7263f7014f74c05842ab828d6c11d789f2e7e98f26b1a305e72d \ + --hash=sha256:649d6a4aa34d5174ae87289068ccc2fec2a1a998ecf83425aa5a42c3eff69bcf \ + --hash=sha256:a1349611f74f55c5ee1c1c144c536a2743c38f7d8bf60b9fc8267e0efc0591a2 \ + --hash=sha256:a2f26afada2233576ffea6b80042c2c0a8105c164254af56168ec14299ad3122 \ + --hash=sha256:b85565bc2f0456961a788d2f11d9d892eec46603db0e4923aa9512c2355aa727 \ + --hash=sha256:bdf6a3e366ea8ba1a0538db6099229e95811194432c684ea28ea7ae28763b8dc \ + --hash=sha256:c47b34c549fa1e0c655c4644da31332c61332c732c47c8dd9399347e9aac69d1 \ + --hash=sha256:c683b9b485bee43422ba2855f72777c37647190281e03da4c8d2a69fa5336558 \ + --hash=sha256:debff88f41d569a3e93a955469f83864e432af35bb34b16f65a9ddf378daa3ae \ + --hash=sha256:f451941f8aa11e7be5c3fa450e264609a2b1e65fa38ae590a74e55a94d646b76 + # via transformers +scikit-learn==1.5.2 \ + --hash=sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445 \ + --hash=sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3 \ + --hash=sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de \ + --hash=sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6 \ + --hash=sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0 \ + --hash=sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6 \ + --hash=sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8 \ + --hash=sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1 \ + --hash=sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe \ + --hash=sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1 \ + --hash=sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1 \ + --hash=sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8 \ + --hash=sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6 \ + --hash=sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9 \ + --hash=sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540 \ + --hash=sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908 \ + --hash=sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d \ + --hash=sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f \ + --hash=sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113 \ + --hash=sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7 \ + --hash=sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5 \ + --hash=sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd \ + --hash=sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12 \ + --hash=sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675 \ + --hash=sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1 \ + --hash=sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a + # via -r requirements-dev.txt +scipy==1.15.0 \ + --hash=sha256:0e5b34f8894f9904cc578008d1a9467829c1817e9f9cb45e6d6eeb61d2ab7731 \ + --hash=sha256:0fcb16eb04d84670722ce8d93b05257df471704c913cb0ff9dc5a1c31d1e9422 \ + --hash=sha256:129f899ed275c0515d553b8d31696924e2ca87d1972421e46c376b9eb87de3d2 \ + --hash=sha256:161f80a98047c219c257bf5ce1777c574bde36b9d962a46b20d0d7e531f86863 \ + --hash=sha256:1b29e4fc02e155a5fd1165f1e6a73edfdd110470736b0f48bcbe48083f0eee37 \ + --hash=sha256:1e2448acd79c6374583581a1ded32ac71a00c2b9c62dfa87a40e1dd2520be111 \ + --hash=sha256:2240e1fd0782e62e1aacdc7234212ee271d810f67e9cd3b8d521003a82603ef8 \ + --hash=sha256:300742e2cc94e36a2880ebe464a1c8b4352a7b0f3e36ec3d2ac006cdbe0219ac \ + --hash=sha256:327163ad73e54541a675240708244644294cb0a65cca420c9c79baeb9648e479 \ + --hash=sha256:351899dd2a801edd3691622172bc8ea01064b1cada794f8641b89a7dc5418db6 \ + --hash=sha256:35c68f7044b4e7ad73a3e68e513dda946989e523df9b062bd3cf401a1a882192 \ + --hash=sha256:36be480e512d38db67f377add5b759fb117edd987f4791cdf58e59b26962bee4 \ + --hash=sha256:37ce9394cdcd7c5f437583fc6ef91bd290014993900643fdfc7af9b052d1613b \ + --hash=sha256:46e91b5b16909ff79224b56e19cbad65ca500b3afda69225820aa3afbf9ec020 \ + --hash=sha256:4e08c6a36f46abaedf765dd2dfcd3698fa4bd7e311a9abb2d80e33d9b2d72c34 \ + --hash=sha256:52475011be29dfcbecc3dfe3060e471ac5155d72e9233e8d5616b84e2b542054 \ + --hash=sha256:5972e3f96f7dda4fd3bb85906a17338e65eaddfe47f750e240f22b331c08858e \ + --hash=sha256:5abbdc6ede5c5fed7910cf406a948e2c0869231c0db091593a6b2fa78be77e5d \ + --hash=sha256:5beb0a2200372b7416ec73fdae94fe81a6e85e44eb49c35a11ac356d2b8eccc6 \ + --hash=sha256:61513b989ee8d5218fbeb178b2d51534ecaddba050db949ae99eeb3d12f6825d \ + --hash=sha256:6d26f17c64abd6c6c2dfb39920f61518cc9e213d034b45b2380e32ba78fde4c0 \ + --hash=sha256:6f376d7c767731477bac25a85d0118efdc94a572c6b60decb1ee48bf2391a73b \ + --hash=sha256:767e8cf6562931f8312f4faa7ddea412cb783d8df49e62c44d00d89f41f9bbe8 \ + --hash=sha256:82bff2eb01ccf7cea8b6ee5274c2dbeadfdac97919da308ee6d8e5bcbe846443 \ + --hash=sha256:952d2e9eaa787f0a9e95b6e85da3654791b57a156c3e6609e65cc5176ccfe6f2 \ + --hash=sha256:9c8254fe21dd2c6c8f7757035ec0c31daecf3bb3cffd93bc1ca661b731d28136 \ + --hash=sha256:aeac60d3562a7bf2f35549bdfdb6b1751c50590f55ce7322b4b2fc821dc27fca \ + --hash=sha256:b1432102254b6dc7766d081fa92df87832ac25ff0b3d3a940f37276e63eb74ff \ + --hash=sha256:bdca4c7bb8dc41307e5f39e9e5d19c707d8e20a29845e7533b3bb20a9d4ccba0 \ + --hash=sha256:c9624eeae79b18cab1a31944b5ef87aa14b125d6ab69b71db22f0dbd962caf1e \ + --hash=sha256:ccb6248a9987193fe74363a2d73b93bc2c546e0728bd786050b7aef6e17db03c \ + --hash=sha256:cd9d9198a7fd9a77f0eb5105ea9734df26f41faeb2a88a0e62e5245506f7b6df \ + --hash=sha256:d13bbc0658c11f3d19df4138336e4bce2c4fbd78c2755be4bf7b8e235481557f \ + --hash=sha256:d35aef233b098e4de88b1eac29f0df378278e7e250a915766786b773309137c4 \ + --hash=sha256:de112c2dae53107cfeaf65101419662ac0a54e9a088c17958b51c95dac5de56d \ + --hash=sha256:e9baff912ea4f78a543d183ed6f5b3bea9784509b948227daaf6f10727a0e2e5 \ + --hash=sha256:eb1533c59f0ec6c55871206f15a5c72d1fae7ad3c0a8ca33ca88f7c309bbbf8c \ + --hash=sha256:ec915cd26d76f6fc7ae8522f74f5b2accf39546f341c771bb2297f3871934a52 \ + --hash=sha256:fde0f3104dfa1dfbc1f230f65506532d0558d43188789eaf68f97e106249a913 \ + --hash=sha256:fe00169cf875bed0b3c40e4da45b57037dc21d7c7bf0c85ed75f210c281488f1 + # via + # jax + # jaxlib + # mizani + # plotnine + # scikit-learn + # statsmodels +simple-parsing==0.1.6 \ + --hash=sha256:2a6e74b061fb754cc441559e8dcea9d108286d9e0ffaa9cca4eea6bbe85372e1 \ + --hash=sha256:dad192e9633515a5627e343106636590a39a5ce85f6c47ced43507044ed98956 + # via tensorflow-datasets +simplejson==3.19.3 \ + --hash=sha256:01c6657485393f2e9b8177c77a7634f13ebe70d5e6de150aae1677d91516ce6b \ + --hash=sha256:0552eb06e7234da892e1d02365cd2b7b2b1f8233aa5aabdb2981587b7cc92ea0 \ + --hash=sha256:06662392e4913dc8846d6a71a6d5de86db5fba244831abe1dd741d62a4136764 \ + --hash=sha256:0733ecd95ae03ae718ec74aad818f5af5f3155d596f7b242acbc1621e765e5fb \ + --hash=sha256:0766ca6222b410e08e0053a0dda3606cafb3973d5d00538307f631bb59743396 \ + --hash=sha256:0791f64fed7d4abad639491f8a6b1ba56d3c604eb94b50f8697359b92d983f36 \ + --hash=sha256:08f9b443a94e72dd02c87098c96886d35790e79e46b24e67accafbf13b73d43b \ + --hash=sha256:0959e6cb62e3994b5a40e31047ff97ef5c4138875fae31659bead691bed55896 \ + --hash=sha256:0a32859d45d7b85fb803bb68f6bee14526991a1190269116c33399fa0daf9bbf \ + --hash=sha256:0b5ddd2c7d1d3f4d23224bc8a04bbf1430ae9a8149c05b90f8fc610f7f857a23 \ + --hash=sha256:0bc5544e3128891bf613b9f71813ee2ec9c11574806f74dd8bb84e5e95bf64a2 \ + --hash=sha256:101a3c8392028cd704a93c7cba8926594e775ca3c91e0bee82144e34190903f1 \ + --hash=sha256:1069143a8fb3905e1bc0696c62be7e3adf812e9f1976ac9ae15b05112ff57cc9 \ + --hash=sha256:1773cabfba66a6337b547e45dafbd471b09487370bcab75bd28f626520410d29 \ + --hash=sha256:1a53a07320c5ff574d8b1a89c937ce33608832f166f39dff0581ac43dc979abd \ + --hash=sha256:1bd41f2cb1a2c57656ceff67b12d005cb255c728265e222027ad73193a04005a \ + --hash=sha256:1c49eeb94b8f09dc8a5843c156a22b8bde6aa1ddc65ca8ddc62dddcc001e6a2d \ + --hash=sha256:1df0aaf1cb787fdf34484ed4a1f0c545efd8811f6028623290fef1a53694e597 \ + --hash=sha256:1e557712fc79f251673aeb3fad3501d7d4da3a27eff0857af2e1d1afbbcf6685 \ + --hash=sha256:1e662336db50ad665777e6548b5076329a94a0c3d4a0472971c588b3ef27de3a \ + --hash=sha256:212fce86a22188b0c7f53533b0f693ea9605c1a0f02c84c475a30616f55a744d \ + --hash=sha256:23228037dc5d41c36666384062904d74409a62f52283d9858fa12f4c22cffad1 \ + --hash=sha256:23833ee7e791ec968b744dfee2a2d39df7152050051096caf4296506d75608d8 \ + --hash=sha256:256e09d0f94d9c3d177d9e95fd27a68c875a4baa2046633df387b86b652f5747 \ + --hash=sha256:2876027ebdd599d730d36464debe84619b0368e9a642ca6e7c601be55aed439e \ + --hash=sha256:2a6a750d3c7461b1c47cfc6bba8d9e57a455e7c5f80057d2a82f738040dd1129 \ + --hash=sha256:2a954b30810988feeabde843e3263bf187697e0eb5037396276db3612434049b \ + --hash=sha256:2b737a5fefedb8333fa50b8db3dcc9b1d18fd6c598f89fa7debff8b46bf4e511 \ + --hash=sha256:2c78293470313aefa9cfc5e3f75ca0635721fb016fb1121c1c5b0cb8cc74712a \ + --hash=sha256:2f56eb03bc9e432bb81adc8ecff2486d39feb371abb442964ffb44f6db23b332 \ + --hash=sha256:32a3ada8f3ea41db35e6d37b86dade03760f804628ec22e4fe775b703d567426 \ + --hash=sha256:37105d1d708365b91165e1a6e505bdecc88637091348cf4b6adcdcb4f5a5fb8b \ + --hash=sha256:3bbcdc438dc1683b35f7a8dc100960c721f922f9ede8127f63bed7dfded4c64c \ + --hash=sha256:3dc5c1a85ff388e98ea877042daec3d157b6db0d85bac6ba5498034689793e7e \ + --hash=sha256:42e5acf80d4d971238d4df97811286a044d720693092b20a56d5e56b7dcc5d09 \ + --hash=sha256:49549e3d81ab4a58424405aa545602674d8c35c20e986b42bb8668e782a94bac \ + --hash=sha256:49cc4c7b940d43bd12bf87ec63f28cbc4964fc4e12c031cc8cd01650f43eb94e \ + --hash=sha256:4a0710d1a5e41c4f829caa1572793dd3130c8d65c2b194c24ff29c4c305c26e0 \ + --hash=sha256:4dfa420bb9225dd33b6efdabde7c6a671b51150b9b1d9c4e5cd74d3b420b3fe1 \ + --hash=sha256:50d8b742d74c449c4dcac570d08ce0f21f6a149d2d9cf7652dbf2ba9a1bc729a \ + --hash=sha256:56134bbafe458a7b21f6fddbf889d36bec6d903718f4430768e3af822f8e27c2 \ + --hash=sha256:5bf6a3b9a7d7191471b464fe38f684df10eb491ec9ea454003edb45a011ab187 \ + --hash=sha256:5d9e8f836688a8fabe6a6b41b334aa550a6823f7b4ac3d3712fc0ad8655be9a8 \ + --hash=sha256:619756f1dd634b5bdf57d9a3914300526c3b348188a765e45b8b08eabef0c94e \ + --hash=sha256:6300680d83a399be2b8f3b0ef7ef90b35d2a29fe6e9c21438097e0938bbc1564 \ + --hash=sha256:637c4d4b81825c1f4d651e56210bd35b5604034b192b02d2d8f17f7ce8c18f42 \ + --hash=sha256:66a0399e21c2112acacfebf3d832ebe2884f823b1c7e6d1363f2944f1db31a99 \ + --hash=sha256:67a20641afebf4cfbcff50061f07daad1eace6e7b31d7622b6fa2c40d43900ba \ + --hash=sha256:6890ff9cf0bd2e1d487e2a8869ebd620a44684c0a9667fa5ee751d099d5d84c8 \ + --hash=sha256:6d43e24b88c80f997081503f693be832fc90854f278df277dd54f8a4c847ab61 \ + --hash=sha256:6ef9383c5e05f445be60f1735c1816163c874c0b1ede8bb4390aff2ced34f333 \ + --hash=sha256:6f455672f4738b0f47183c5896e3606cd65c9ddee3805a4d18e8c96aa3f47c84 \ + --hash=sha256:6fea0716c593dabb4392c4996d4e902a83b2428e6da82938cf28a523a11eb277 \ + --hash=sha256:7017329ca8d4dca94ad5e59f496e5fc77630aecfc39df381ffc1d37fb6b25832 \ + --hash=sha256:7137e69c6781ecf23afab064be94a277236c9cba31aa48ff1a0ec3995c69171e \ + --hash=sha256:72e8abbc86fcac83629a030888b45fed3a404d54161118be52cb491cd6975d3e \ + --hash=sha256:7355c7203353c36d46c4e7b6055293b3d2be097bbc5e2874a2b8a7259f0325dd \ + --hash=sha256:76f8c28fe2d426182405b18ddf3001fce47835a557dc15c3d8bdea01c03361da \ + --hash=sha256:7923878b7a0142d39763ec2dbecff3053c1bedd3653585a8474666e420fe83f5 \ + --hash=sha256:7a7bfad839c624e139a4863007233a3f194e7c51551081f9789cba52e4da5167 \ + --hash=sha256:7b5c472099b39b274dcde27f1113db8d818c9aa3ba8f78cbb8ad04a4c1ac2118 \ + --hash=sha256:7c0104b4b7d2c75ccedbf1d9d5a3bd2daa75e51053935a44ba012e2fd4c43752 \ + --hash=sha256:7e062767ac165df9a46963f5735aa4eee0089ec1e48b3f2ec46182754b96f55e \ + --hash=sha256:7e2a098c21ad8924076a12b6c178965d88a0ad75d1de67e1afa0a66878f277a5 \ + --hash=sha256:817abad79241ed4a507b3caf4d3f2be5079f39d35d4c550a061988986bffd2ec \ + --hash=sha256:83c87706265ae3028e8460d08b05f30254c569772e859e5ba61fe8af2c883468 \ + --hash=sha256:89b35433186e977fa86ff1fd179c1fadff39cfa3afa1648dab0b6ca53153acd9 \ + --hash=sha256:8e086896c36210ab6050f2f9f095a5f1e03c83fa0e7f296d6cba425411364680 \ + --hash=sha256:8f41bb5370b34f63171e65fdb00e12be1d83675cecb23e627df26f4c88dfc021 \ + --hash=sha256:934a50a614fb831614db5dbfba35127ee277624dda4d15895c957d2f5d48610c \ + --hash=sha256:93be280fc69a952c76e261036312c20b910e7fa9e234f1d89bdfe3fa34f8a023 \ + --hash=sha256:951095be8d4451a7182403354c22ec2de3e513e0cc40408b689af08d02611588 \ + --hash=sha256:a0782cb9bf827f0c488b6aa0f2819f618308a3caf2973cfd792e45d631bec4db \ + --hash=sha256:ab69f811a660c362651ae395eba8ce84f84c944cea0df5718ea0ba9d1e4e7252 \ + --hash=sha256:ad0e0b1ce9bd3edb5cf64b5b5b76eacbfdac8c5367153aeeec8a8b1407f68342 \ + --hash=sha256:add8850db04b98507a8b62d248a326ecc8561e6d24336d1ca5c605bbfaab4cad \ + --hash=sha256:afab2f7f2486a866ff04d6d905e9386ca6a231379181a3838abce1f32fbdcc37 \ + --hash=sha256:b5587feda2b65a79da985ae6d116daf6428bf7489992badc29fc96d16cd27b05 \ + --hash=sha256:b9198c1f1f8910a3b86b60f4fe2556d9d28d3fefe35bffe6be509a27402e694d \ + --hash=sha256:bc164f32dd9691e7082ce5df24b4cf8c6c394bbf9bdeeb5d843127cd07ab8ad2 \ + --hash=sha256:bcde83a553a96dc7533736c547bddaa35414a2566ab0ecf7d3964fc4bdb84c11 \ + --hash=sha256:c40df31a75de98db2cdfead6074d4449cd009e79f54c1ebe5e5f1f153c68ad20 \ + --hash=sha256:c4f614581b61a26fbbba232a1391f6cee82bc26f2abbb6a0b44a9bba25c56a1c \ + --hash=sha256:c9bedebdc5fdad48af8783022bae307746d54006b783007d1d3c38e10872a2c6 \ + --hash=sha256:cb324bb903330cbb35d87cce367a12631cd5720afa06e5b9c906483970946da6 \ + --hash=sha256:d00313681015ac498e1736b304446ee6d1c72c5b287cd196996dad84369998f7 \ + --hash=sha256:d0b0efc7279d768db7c74d3d07f0b5c81280d16ae3fb14e9081dc903e8360771 \ + --hash=sha256:d0d5a63f1768fed7e78cf55712dee81f5a345e34d34224f3507ebf71df2b754d \ + --hash=sha256:d1b8b4d6379fe55f471914345fe6171d81a18649dacf3248abfc9c349b4442eb \ + --hash=sha256:d36608557b4dcd7a62c29ad4cd7c5a1720bbf7dc942eff9dc42d2c542a5f042d \ + --hash=sha256:d43c2d7504eda566c50203cdc9dc043aff6f55f1b7dae0dcd79dfefef9159d1c \ + --hash=sha256:d73efb03c5b39249c82488a994f0998f9e4399e3d085209d2120503305ba77a8 \ + --hash=sha256:d936ae682d5b878af9d9eb4d8bb1fdd5e41275c8eb59ceddb0aeed857bb264a2 \ + --hash=sha256:dd011fc3c1d88b779645495fdb8189fb318a26981eebcce14109460e062f209b \ + --hash=sha256:dd5b9b1783e14803e362a558680d88939e830db2466f3fa22df5c9319f8eea94 \ + --hash=sha256:dd6a7dabcc4c32daf601bc45e01b79175dde4b52548becea4f9545b0a4428169 \ + --hash=sha256:dd7230d061e755d60a4d5445bae854afe33444cdb182f3815cff26ac9fb29a15 \ + --hash=sha256:e0d2b00ecbcd1a3c5ea1abc8bb99a26508f758c1759fd01c3be482a3655a176f \ + --hash=sha256:e1a1452ad5723ff129b081e3c8aa4ba56b8734fee4223355ed7b815a7ece69bc \ + --hash=sha256:e88abff510dcff903a18d11c2a75f9964e768d99c8d147839913886144b2065e \ + --hash=sha256:ea7a4a998c87c5674a27089e022110a1a08a7753f21af3baf09efe9915c23c3c \ + --hash=sha256:eb47ee773ce67476a960e2db4a0a906680c54f662521550828c0cc57d0099426 \ + --hash=sha256:eed8cd98a7b24861da9d3d937f5fbfb6657350c547528a117297fe49e3960667 \ + --hash=sha256:ef28c3b328d29b5e2756903aed888960bc5df39b4c2eab157ae212f70ed5bf74 \ + --hash=sha256:ef59a53be400c1fad2c914b8d74c9d42384fed5174f9321dd021b7017fd40270 \ + --hash=sha256:f39caec26007a2d0efab6b8b1d74873ede9351962707afab622cc2285dd26ed0 \ + --hash=sha256:f8efb03ca77bd7725dfacc9254df00d73e6f43013cf39bd37ef1a8ed0ebb5165 \ + --hash=sha256:fa97278ae6614346b5ca41a45a911f37a3261b57dbe4a00602048652c862c28b \ + --hash=sha256:fc3dc9fb413fc34c396f52f4c87de18d0bd5023804afa8ab5cc224deeb6a9900 \ + --hash=sha256:ff7bc1bbdaa3e487c9469128bf39408e91f5573901cb852e03af378d3582c52d + # via orbax-checkpoint +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via + # astunparse + # google-api-core + # google-api-python-client + # google-auth + # google-pasta + # oauth2client + # promise + # python-dateutil + # tensorboard + # tensorflow-cpu +statsmodels==0.14.4 \ + --hash=sha256:1322286a7bfdde2790bf72d29698a1b76c20b8423a55bdcd0d457969d0041f72 \ + --hash=sha256:17672b30c6b98afe2b095591e32d1d66d4372f2651428e433f16a3667f19eabb \ + --hash=sha256:2a337b731aa365d09bb0eab6da81446c04fde6c31976b1d8e3d3a911f0f1e07b \ + --hash=sha256:3bb2e580d382545a65f298589809af29daeb15f9da2eb252af8f79693e618abc \ + --hash=sha256:46ac7ddefac0c9b7b607eed1d47d11e26fe92a1bc1f4d9af48aeed4e21e87981 \ + --hash=sha256:4793b01b7a5f5424f5a1dbcefc614c83c7608aa2b035f087538253007c339d5d \ + --hash=sha256:4bbb150620b53133d6cd1c5d14c28a4f85701e6c781d9b689b53681effaa655f \ + --hash=sha256:5221dba7424cf4f2561b22e9081de85f5bb871228581124a0d1b572708545199 \ + --hash=sha256:5d69e0f39060dc72c067f9bb6e8033b6dccdb0bae101d76a7ef0bcc94e898b67 \ + --hash=sha256:5ed7e118e6e3e02d6723a079b8c97eaadeed943fa1f7f619f7148dfc7862670f \ + --hash=sha256:631bb52159117c5da42ba94bd94859276b68cab25dc4cac86475bc24671143bc \ + --hash=sha256:6e9ddefba1d4e1107c1f20f601b0581421ea3ad9fd75ce3c2ba6a76b6dc4682c \ + --hash=sha256:6f43da7957e00190104c5dd0f661bfc6dfc68b87313e3f9c4dbd5e7d222e0aeb \ + --hash=sha256:7a62f1fc9086e4b7ee789a6f66b3c0fc82dd8de1edda1522d30901a0aa45e42b \ + --hash=sha256:7f7917a51766b4e074da283c507a25048ad29a18e527207883d73535e0dc6184 \ + --hash=sha256:81030108d27aecc7995cac05aa280cf8c6025f6a6119894eef648997936c2dd0 \ + --hash=sha256:8286f69a5e1d0e0b366ffed5691140c83d3efc75da6dbf34a3d06e88abfaaab6 \ + --hash=sha256:91341cbde9e8bea5fb419a76e09114e221567d03f34ca26e6d67ae2c27d8fe3c \ + --hash=sha256:9729642884147ee9db67b5a06a355890663d21f76ed608a56ac2ad98b94d201a \ + --hash=sha256:a6087ecb0714f7c59eb24c22781491e6f1cfffb660b4740e167625ca4f052056 \ + --hash=sha256:aa74aaa26eaa5012b0a01deeaa8a777595d0835d3d6c7175f2ac65435a7324d2 \ + --hash=sha256:ab5e6312213b8cfb9dca93dd46a0f4dccb856541f91d3306227c3d92f7659245 \ + --hash=sha256:b5a24f5d2c22852d807d2b42daf3a61740820b28d8381daaf59dcb7055bf1a79 \ + --hash=sha256:bb695c2025d122a101c2aca66d2b78813c321b60d3a7c86bb8ec4467bb53b0f9 \ + --hash=sha256:d330da34f59f1653c5193f9fe3a3a258977c880746db7f155fc33713ea858db5 \ + --hash=sha256:d9c8fa28dfd75753d9cf62769ba1fecd7e73a0be187f35cc6f54076f98aa3f3f \ + --hash=sha256:df4f7864606fa843d7e7c0e6af288f034a2160dba14e6ccc09020a3cf67cb092 \ + --hash=sha256:e31b95ac603415887c9f0d344cb523889cf779bc52d68e27e2d23c358958fec7 \ + --hash=sha256:e332c2d9b806083d1797231280602340c5c913f90d4caa0213a6a54679ce9331 \ + --hash=sha256:f5f537f7d000de4a1708c63400755152b862cd4926bb81a86568e347c19c364b + # via plotnine +sympy==1.13.3 \ + --hash=sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73 \ + --hash=sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9 + # via torch +tabulate==0.9.0 \ + --hash=sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c \ + --hash=sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f + # via dm-haiku +tensorboard==2.18.0 \ + --hash=sha256:107ca4821745f73e2aefa02c50ff70a9b694f39f790b11e6f682f7d326745eab + # via tensorflow-cpu +tensorboard-data-server==0.7.2 \ + --hash=sha256:7e0610d205889588983836ec05dc098e80f97b7e7bbff7e994ebb78f578d0ddb \ + --hash=sha256:9fe5d24221b29625dbc7328b0436ca7fc1c23de4acf4d272f1180856e32f9f60 \ + --hash=sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530 + # via tensorboard +tensorflow-cpu==2.18.0 ; sys_platform == "linux" and platform_machine == "x86_64" \ + --hash=sha256:089e71746960ea581dca53401f84b3b99c8537313e337a9e5dbf97036a936f7e \ + --hash=sha256:0b093b727c2f2a8cf4ee4f2c7352c8e958a2a1d27a452961b8d5f43a0798dcd2 \ + --hash=sha256:107775c86045c7a3042cf9d79046be49cd68a3278f5d9f8c75158c049259307c \ + --hash=sha256:39bd421ad125e4163d6e2d41ab0e158b583fb5c6f9254522fb87635b0e70b891 \ + --hash=sha256:482dc228f513c65bbe34abbb7c456765bd8e1599dbeae0924fc96d97578f9987 \ + --hash=sha256:9f98466fde4a307d39b71309b1a9a8a4f1bb972e70d36b1ead0817405cc518f2 \ + --hash=sha256:c2dc132be3215f4f8d56766bf8b6645fa6d45dea70b46ae092c5cdd3e958c0ac \ + --hash=sha256:c61ef38e24686cd460e31f988ea36f73092ccc628193df95116d6c5f690393f9 + # via -r requirements-dev.txt +tensorflow-datasets==4.9.7 \ + --hash=sha256:948d7a68ee693d9709cbfe955f97344b98647e5512e0e42dd2a61f3e7925d68b \ + --hash=sha256:f6fdfe745b5df2a37bde8bb2ef149188ebbca4af546226b461f3bad26bc42875 + # via -r requirements-dev.txt +tensorflow-io-gcs-filesystem==0.37.1 \ + --hash=sha256:0df00891669390078a003cedbdd3b8e645c718b111917535fa1d7725e95cdb95 \ + --hash=sha256:249c12b830165841411ba71e08215d0e94277a49c551e6dd5d72aab54fe5491b \ + --hash=sha256:257aab23470a0796978efc9c2bcf8b0bc80f22e6298612a4c0a50d3f4e88060c \ + --hash=sha256:286389a203a5aee1a4fa2e53718c661091aa5fea797ff4fa6715ab8436b02e6c \ + --hash=sha256:32c50ab4e29a23c1f91cd0f9ab8c381a0ab10f45ef5c5252e94965916041737c \ + --hash=sha256:426de1173cb81fbd62becec2012fc00322a295326d90eb6c737fab636f182aed \ + --hash=sha256:6e1f2796b57e799a8ca1b75bf47c2aaa437c968408cc1a402a9862929e104cda \ + --hash=sha256:8943036bbf84e7a2be3705cb56f9c9df7c48c9e614bb941f0936c58e3ca89d6f \ + --hash=sha256:8febbfcc67c61e542a5ac1a98c7c20a91a5e1afc2e14b1ef0cb7c28bc3b6aa70 \ + --hash=sha256:9679b36e3a80921876f31685ab6f7270f3411a4cc51bc2847e80d0e4b5291e27 \ + --hash=sha256:b02f9c5f94fd62773954a04f69b68c4d576d076fd0db4ca25d5479f0fbfcdbad \ + --hash=sha256:ee5da49019670ed364f3e5fb86b46420841a6c3cb52a300553c63841671b3e6d \ + --hash=sha256:ee7c8ee5fe2fd8cb6392669ef16e71841133041fee8a330eff519ad9b36e4556 \ + --hash=sha256:fbb33f1745f218464a59cecd9a18e32ca927b0f4d77abd8f8671b645cc1a182f \ + --hash=sha256:fe8dcc6d222258a080ac3dfcaaaa347325ce36a7a046277f6b3e19abc1efb3c5 \ + --hash=sha256:ffebb6666a7bfc28005f4fbbb111a455b5e7d6cd3b12752b7050863ecb27d5cc + # via tensorflow-cpu +tensorflow-metadata==1.16.1 \ + --hash=sha256:2ce72ea31d78a00c0c74c6d465482335aa5cb2a3b2a104dedba0b258bc7bb18a + # via tensorflow-datasets +tensorstore==0.1.71 \ + --hash=sha256:0bd87899e1c6049b078e785e8b7871e2579202f5b929e89c3c37340965b922bb \ + --hash=sha256:1a6cdcc52e4b841d23e50a2fa28e016e6d9f61d6ea9188d4555ea189b040a0f6 \ + --hash=sha256:31e39ed7d374f43e45bff52611bad99315c577b44c099b2f6837b801b3467645 \ + --hash=sha256:321d6302e5116b20fda500821240eba7de28477209070728d98edefced97d2b5 \ + --hash=sha256:373558b803d8c2c57fc613b11007ae58139f19a3cddd443a0de5d7b5321e5961 \ + --hash=sha256:46ff0f41ef3b1dbd1a925d62e6475523a587bcd37b277bf4f633f46f5b7e22bd \ + --hash=sha256:52b546f076b2c3bf217c60f05de4124cc1197ce92f8e826e7ec73ae324074a5a \ + --hash=sha256:583f0ec143062176ca21fe8dcc3b3b6f94d7f4ea643443b49942d3d1a2fa29b4 \ + --hash=sha256:5c37c7b385517b568282a7aedded446216335d0cb41187c93c80b53596c92c96 \ + --hash=sha256:6276e279b45eb5d9b95c4df3e7956255f414fd4b128d2de16d8aecde86c36357 \ + --hash=sha256:65c3a1a2a35a1b537403f36403d258caab477e564bc0f64109b941cc77b4f203 \ + --hash=sha256:75a9ff1f7b6759094cc210baa4e8135c4898472e08a7036476374433d03c6a34 \ + --hash=sha256:87a97a34b0475ddc7d2afc40e5dd7f8d12522aa81edfbcccb39628cf591454d5 \ + --hash=sha256:95041b55a2ec86d1f6690512d1883581b18f2f4f46c3d97894aeb0ac2db6af7f \ + --hash=sha256:b961bbbb7a1c6a48e4c1406a98caebeb400461e2e75a08b6df0c013294037a15 \ + --hash=sha256:ced5430bcdfa7fcb3a6bdc44733176158cb877b35bdd233cac82e25b4cc94e92 \ + --hash=sha256:d3a24feb6195f1c222162965c0107c9ff56d322cca23e19f0e66636f6eb80f14 \ + --hash=sha256:de8843fb3462899de7bcdeeaccb92303a9d61006bc36364deb4a88df46320ba4 \ + --hash=sha256:ecf4feb574051f40e81572ea2ff8e5895b2980c5dd3b29fe81c70d25e42d3b6a \ + --hash=sha256:f3e62aa7b473c0715706a809da3591763906059e8731a38c0b495337a1dc55ea \ + --hash=sha256:f40e73bcdc333dfb3f7fe0fcf023bcbec41533c9856657718ff76ece1a1902e0 + # via + # flax + # orbax-checkpoint +termcolor==2.5.0 \ + --hash=sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8 \ + --hash=sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f + # via + # -r requirements-dev.txt + # tensorflow-cpu + # tensorflow-datasets +threadpoolctl==3.5.0 \ + --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ + --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 + # via scikit-learn +tokenizers==0.21.0 \ + --hash=sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b \ + --hash=sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2 \ + --hash=sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273 \ + --hash=sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff \ + --hash=sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193 \ + --hash=sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e \ + --hash=sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c \ + --hash=sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e \ + --hash=sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74 \ + --hash=sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba \ + --hash=sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04 \ + --hash=sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a \ + --hash=sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e \ + --hash=sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4 \ + --hash=sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e + # via transformers +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via tensorflow-datasets +toolz==1.0.0 \ + --hash=sha256:292c8f1c4e7516bf9086f8850935c799a874039c8bcf959d47b600e4c44a6236 \ + --hash=sha256:2c86e3d9a04798ac556793bced838816296a2f085017664e4995cb40a1047a02 + # via chex +torch==2.3.0 \ + --hash=sha256:09c81c5859a5b819956c6925a405ef1cdda393c9d8a01ce3851453f699d3358c \ + --hash=sha256:1bf023aa20902586f614f7682fedfa463e773e26c58820b74158a72470259459 \ + --hash=sha256:20572f426965dd8a04e92a473d7e445fa579e09943cc0354f3e6fef6130ce061 \ + --hash=sha256:493d54ee2f9df100b5ce1d18c96dbb8d14908721f76351e908c9d2622773a788 \ + --hash=sha256:4fb27b35dbb32303c2927da86e27b54a92209ddfb7234afb1949ea2b3effffea \ + --hash=sha256:5515503a193781fd1b3f5c474e89c9dfa2faaa782b2795cc4a7ab7e67de923f6 \ + --hash=sha256:6ae9f64b09516baa4ef890af0672dc981c20b1f0d829ce115d4420a247e88fba \ + --hash=sha256:729804e97b7cf19ae9ab4181f91f5e612af07956f35c8b2c8e9d9f3596a8e877 \ + --hash=sha256:758ef938de87a2653bba74b91f703458c15569f1562bf4b6c63c62d9c5a0c1f5 \ + --hash=sha256:760f8bedff506ce9e6e103498f9b1e9e15809e008368594c3a66bf74a8a51380 \ + --hash=sha256:a306c87a3eead1ed47457822c01dfbd459fe2920f2d38cbdf90de18f23f72542 \ + --hash=sha256:b0de2bdc0486ea7b14fc47ff805172df44e421a7318b7c4d92ef589a75d27410 \ + --hash=sha256:bce43af735c3da16cc14c7de2be7ad038e2fbf75654c2e274e575c6c05772ace \ + --hash=sha256:cd0dc498b961ab19cb3f8dbf0c6c50e244f2f37dbfa05754ab44ea057c944ef9 \ + --hash=sha256:d24e328226d8e2af7cf80fcb1d2f1d108e0de32777fab4aaa2b37b9765d8be73 \ + --hash=sha256:d8ea5a465dbfd8501f33c937d1f693176c9aef9d1c1b0ca1d44ed7b0a18c52ac \ + --hash=sha256:dca986214267b34065a79000cee54232e62b41dff1ec2cab9abc3fc8b3dee0ad \ + --hash=sha256:e05f836559251e4096f3786ee99f4a8cbe67bc7fbedba8ad5e799681e47c5e80 \ + --hash=sha256:e65ba85ae292909cde0dde6369826d51165a3fc8823dc1854cd9432d7f79b932 \ + --hash=sha256:f9b98bf1a3c8af2d4c41f0bf1433920900896c446d1ddc128290ff146d1eb4bd + # via + # -r requirements-dev.txt + # torchvision +torch-xla==2.3.0 ; sys_platform == "linux" and platform_machine == "x86_64" \ + --hash=sha256:262876ab0e95a4ecd131afa33a89ad7f94544f878a74198ee52fcf723af39e6f \ + --hash=sha256:6678b2bea3baeda916cdb314d5ad190eeb388e71a4de04ccfa948ab74d6d4c72 \ + --hash=sha256:8282e0ff92f42e18e22f65c0ec5a17acd5bc51728b1fdeb6b4ccade3a313c6ac \ + --hash=sha256:e0b2f88baf3373b9c0a4f351488dbb9b4b007b52c1c66f65b65e1984b5f0f227 + # via -r requirements-dev.txt +torchvision==0.18.0 \ + --hash=sha256:2115a1906c015f5da9ceedc40a983313b0fd6e2c8a17108a92991706f51f6987 \ + --hash=sha256:36efd87001c6bee2383e043e46a025affb03179747c8f4777b9918527ffce756 \ + --hash=sha256:3d7955398d4ceaad77c487c2c44f6f7813112402c9bab8cd906d346005891048 \ + --hash=sha256:493c45f9937dad37aa1b64b14da17c7a589c72b91adc4837d431009cfe29bd53 \ + --hash=sha256:4c334b3e719ba0a9ba6e15d4aff1178f5e6d029174f346163fed525f0ccfffd3 \ + --hash=sha256:5337f6acfa1fe959d5cb340d01a00614d6b31ce7a4824ccb95435a85c5273b95 \ + --hash=sha256:6323f7e5423ff2594d5891863b919deb9d0de95f01c36bf26fbd879036b6ed08 \ + --hash=sha256:6896a52168befe1105fb3c9335287390ed227e71d1e4ec4d68b62e8a3099fc09 \ + --hash=sha256:6ad70ddfa879bda5ed886b2518fe562640e0059787cbd65cb2bffa7674541410 \ + --hash=sha256:75e22ecf44a13b8f95b8ad421c0261282d859c61816badaca1959e073ccdd691 \ + --hash=sha256:7c770f0f748e0b17f57c0297508d7254f686cdf03fc2e2949f422b20574f4c0f \ + --hash=sha256:925d0a82cccf6f986c18b29b4392a942db65cbdb73c13a129c8493822eb9e36f \ + --hash=sha256:95b42d0dc599b47a01530c7439a5751e67e45b85e3a67113989cf7c7c70f2039 \ + --hash=sha256:a964afbc7ddf50a46b941477f6c35729b416deedd139756befd488245e2e226d \ + --hash=sha256:b657d052d146f24cb3b2a78219bfc82ae70a9706671c50f632528907d10cccec \ + --hash=sha256:bd8e6f3b5beb49965f15c461302488edfa3d8c2d01d3bb79b150d6fb62711e3a \ + --hash=sha256:ccc292e093771d5baacf5535ac4416306b6b5f15676341cd4d010d8542eace25 \ + --hash=sha256:dd61628a3d189c6852a12dc5ed4cd2eece66d2d67f35a866cb16f1dcb06c8c62 \ + --hash=sha256:e5a24d620cea14a4bb89f24aa2b506230c0a16a3ada57fc53ad80cfd256a2128 \ + --hash=sha256:eb9d83c0e1dbb54ecb0fb04c87f786333e3a6fb8b9c400aca7c31081f9aa5707 + # via -r requirements-dev.txt +tqdm==4.67.1 \ + --hash=sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2 \ + --hash=sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2 + # via + # datasets + # etils + # huggingface-hub + # tensorflow-datasets + # transformers +transformers==4.47.1 \ + --hash=sha256:6c29c05a5f595e278481166539202bf8641281536df1c42357ee58a45d0a564a \ + --hash=sha256:d2f5d19bb6283cd66c893ec7e6d931d6370bbf1cc93633326ff1f41a40046c9c + # via -r requirements-dev.txt +triton==2.3.0 \ + --hash=sha256:038e06a09c06a164fef9c48de3af1e13a63dc1ba3c792871e61a8e79720ea440 \ + --hash=sha256:218d742e67480d9581bafb73ed598416cc8a56f6316152e5562ee65e33de01c0 \ + --hash=sha256:381ec6b3dac06922d3e4099cfc943ef032893b25415de295e82b1a82b0359d2c \ + --hash=sha256:3c3d9607f85103afdb279938fc1dd2a66e4f5999a58eb48a346bd42738f986dd \ + --hash=sha256:5ce4b8ff70c48e47274c66f269cce8861cf1dc347ceeb7a67414ca151b1822d8 \ + --hash=sha256:6d8f636e0341ac348899a47a057c3daea99ea7db31528a225a3ba4ded28ccc65 + # via torch +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via + # chex + # etils + # flax + # huggingface-hub + # optree + # orbax-checkpoint + # simple-parsing + # tensorflow-cpu + # torch +tzdata==2024.2 \ + --hash=sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc \ + --hash=sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd + # via pandas +uritemplate==3.0.1 \ + --hash=sha256:07620c3f3f8eed1f12600845892b0e036a2420acf513c53f7de0abd911a5894f \ + --hash=sha256:5af8ad10cec94f215e3f48112de2022e1d5a37ed427fbd88652fa908f2ab7cae + # via google-api-python-client +urllib3==2.3.0 \ + --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ + --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d + # via + # requests + # responses +werkzeug==3.1.3 \ + --hash=sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e \ + --hash=sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746 + # via tensorboard +wheel==0.45.1 \ + --hash=sha256:661e1abd9198507b1409a20c02106d9670b2576e916d58f520316666abca6729 \ + --hash=sha256:708e7481cc80179af0e556bbf0cc00b8444c7321e2700b8d8580231d13017248 + # via astunparse +wrapt==1.17.0 \ + --hash=sha256:0229b247b0fc7dee0d36176cbb79dbaf2a9eb7ecc50ec3121f40ef443155fb1d \ + --hash=sha256:0698d3a86f68abc894d537887b9bbf84d29bcfbc759e23f4644be27acf6da301 \ + --hash=sha256:0a0a1a1ec28b641f2a3a2c35cbe86c00051c04fffcfcc577ffcdd707df3f8635 \ + --hash=sha256:0b48554952f0f387984da81ccfa73b62e52817a4386d070c75e4db7d43a28c4a \ + --hash=sha256:0f2a28eb35cf99d5f5bd12f5dd44a0f41d206db226535b37b0c60e9da162c3ed \ + --hash=sha256:140ea00c87fafc42739bd74a94a5a9003f8e72c27c47cd4f61d8e05e6dec8721 \ + --hash=sha256:16187aa2317c731170a88ef35e8937ae0f533c402872c1ee5e6d079fcf320801 \ + --hash=sha256:17fcf043d0b4724858f25b8826c36e08f9fb2e475410bece0ec44a22d533da9b \ + --hash=sha256:18b956061b8db634120b58f668592a772e87e2e78bc1f6a906cfcaa0cc7991c1 \ + --hash=sha256:2399408ac33ffd5b200480ee858baa58d77dd30e0dd0cab6a8a9547135f30a88 \ + --hash=sha256:2a0c23b8319848426f305f9cb0c98a6e32ee68a36264f45948ccf8e7d2b941f8 \ + --hash=sha256:2dfb7cff84e72e7bf975b06b4989477873dcf160b2fd89959c629535df53d4e0 \ + --hash=sha256:2f495b6754358979379f84534f8dd7a43ff8cff2558dcdea4a148a6e713a758f \ + --hash=sha256:33539c6f5b96cf0b1105a0ff4cf5db9332e773bb521cc804a90e58dc49b10578 \ + --hash=sha256:3c34f6896a01b84bab196f7119770fd8466c8ae3dfa73c59c0bb281e7b588ce7 \ + --hash=sha256:498fec8da10e3e62edd1e7368f4b24aa362ac0ad931e678332d1b209aec93045 \ + --hash=sha256:4d63f4d446e10ad19ed01188d6c1e1bb134cde8c18b0aa2acfd973d41fcc5ada \ + --hash=sha256:4e4b4385363de9052dac1a67bfb535c376f3d19c238b5f36bddc95efae15e12d \ + --hash=sha256:4e547b447073fc0dbfcbff15154c1be8823d10dab4ad401bdb1575e3fdedff1b \ + --hash=sha256:4f643df3d4419ea3f856c5c3f40fec1d65ea2e89ec812c83f7767c8730f9827a \ + --hash=sha256:4f763a29ee6a20c529496a20a7bcb16a73de27f5da6a843249c7047daf135977 \ + --hash=sha256:5ae271862b2142f4bc687bdbfcc942e2473a89999a54231aa1c2c676e28f29ea \ + --hash=sha256:5d8fd17635b262448ab8f99230fe4dac991af1dabdbb92f7a70a6afac8a7e346 \ + --hash=sha256:69c40d4655e078ede067a7095544bcec5a963566e17503e75a3a3e0fe2803b13 \ + --hash=sha256:69d093792dc34a9c4c8a70e4973a3361c7a7578e9cd86961b2bbf38ca71e4e22 \ + --hash=sha256:6a9653131bda68a1f029c52157fd81e11f07d485df55410401f745007bd6d339 \ + --hash=sha256:6ff02a91c4fc9b6a94e1c9c20f62ea06a7e375f42fe57587f004d1078ac86ca9 \ + --hash=sha256:714c12485aa52efbc0fc0ade1e9ab3a70343db82627f90f2ecbc898fdf0bb181 \ + --hash=sha256:7264cbb4a18dc4acfd73b63e4bcfec9c9802614572025bdd44d0721983fc1d9c \ + --hash=sha256:73a96fd11d2b2e77d623a7f26e004cc31f131a365add1ce1ce9a19e55a1eef90 \ + --hash=sha256:74bf625b1b4caaa7bad51d9003f8b07a468a704e0644a700e936c357c17dd45a \ + --hash=sha256:81b1289e99cf4bad07c23393ab447e5e96db0ab50974a280f7954b071d41b489 \ + --hash=sha256:8425cfce27b8b20c9b89d77fb50e368d8306a90bf2b6eef2cdf5cd5083adf83f \ + --hash=sha256:875d240fdbdbe9e11f9831901fb8719da0bd4e6131f83aa9f69b96d18fae7504 \ + --hash=sha256:879591c2b5ab0a7184258274c42a126b74a2c3d5a329df16d69f9cee07bba6ea \ + --hash=sha256:89fc28495896097622c3fc238915c79365dd0ede02f9a82ce436b13bd0ab7569 \ + --hash=sha256:8a5e7cc39a45fc430af1aefc4d77ee6bad72c5bcdb1322cfde852c15192b8bd4 \ + --hash=sha256:8f8909cdb9f1b237786c09a810e24ee5e15ef17019f7cecb207ce205b9b5fcce \ + --hash=sha256:914f66f3b6fc7b915d46c1cc424bc2441841083de01b90f9e81109c9759e43ab \ + --hash=sha256:92a3d214d5e53cb1db8b015f30d544bc9d3f7179a05feb8f16df713cecc2620a \ + --hash=sha256:948a9bd0fb2c5120457b07e59c8d7210cbc8703243225dbd78f4dfc13c8d2d1f \ + --hash=sha256:9c900108df470060174108012de06d45f514aa4ec21a191e7ab42988ff42a86c \ + --hash=sha256:9f2939cd4a2a52ca32bc0b359015718472d7f6de870760342e7ba295be9ebaf9 \ + --hash=sha256:a4192b45dff127c7d69b3bdfb4d3e47b64179a0b9900b6351859f3001397dabf \ + --hash=sha256:a8fc931382e56627ec4acb01e09ce66e5c03c384ca52606111cee50d931a342d \ + --hash=sha256:ad47b095f0bdc5585bced35bd088cbfe4177236c7df9984b3cc46b391cc60627 \ + --hash=sha256:b1ca5f060e205f72bec57faae5bd817a1560fcfc4af03f414b08fa29106b7e2d \ + --hash=sha256:ba1739fb38441a27a676f4de4123d3e858e494fac05868b7a281c0a383c098f4 \ + --hash=sha256:baa7ef4e0886a6f482e00d1d5bcd37c201b383f1d314643dfb0367169f94f04c \ + --hash=sha256:bb90765dd91aed05b53cd7a87bd7f5c188fcd95960914bae0d32c5e7f899719d \ + --hash=sha256:bc7f729a72b16ee21795a943f85c6244971724819819a41ddbaeb691b2dd85ad \ + --hash=sha256:bdf62d25234290db1837875d4dceb2151e4ea7f9fff2ed41c0fde23ed542eb5b \ + --hash=sha256:c30970bdee1cad6a8da2044febd824ef6dc4cc0b19e39af3085c763fdec7de33 \ + --hash=sha256:d2c63b93548eda58abf5188e505ffed0229bf675f7c3090f8e36ad55b8cbc371 \ + --hash=sha256:d751300b94e35b6016d4b1e7d0e7bbc3b5e1751e2405ef908316c2a9024008a1 \ + --hash=sha256:da427d311782324a376cacb47c1a4adc43f99fd9d996ffc1b3e8529c4074d393 \ + --hash=sha256:daba396199399ccabafbfc509037ac635a6bc18510ad1add8fd16d4739cdd106 \ + --hash=sha256:e185ec6060e301a7e5f8461c86fb3640a7beb1a0f0208ffde7a65ec4074931df \ + --hash=sha256:e4a557d97f12813dc5e18dad9fa765ae44ddd56a672bb5de4825527c847d6379 \ + --hash=sha256:e5ed16d95fd142e9c72b6c10b06514ad30e846a0d0917ab406186541fe68b451 \ + --hash=sha256:e711fc1acc7468463bc084d1b68561e40d1eaa135d8c509a65dd534403d83d7b \ + --hash=sha256:f28b29dc158ca5d6ac396c8e0a2ef45c4e97bb7e65522bfc04c989e6fe814575 \ + --hash=sha256:f335579a1b485c834849e9075191c9898e0731af45705c2ebf70e0cd5d58beed \ + --hash=sha256:fce6fee67c318fdfb7f285c29a82d84782ae2579c0e1b385b7f36c6e8074fffb \ + --hash=sha256:fd136bb85f4568fffca995bd3c8d52080b1e5b225dbf1c2b17b66b4c5fa02838 + # via + # tensorflow-cpu + # tensorflow-datasets +xxhash==3.5.0 \ + --hash=sha256:02c2e816896dc6f85922ced60097bcf6f008dedfc5073dcba32f9c8dd786f3c1 \ + --hash=sha256:0691bfcc4f9c656bcb96cc5db94b4d75980b9d5589f2e59de790091028580837 \ + --hash=sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb \ + --hash=sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84 \ + --hash=sha256:0a80ad0ffd78bef9509eee27b4a29e56f5414b87fb01a888353e3d5bda7038bd \ + --hash=sha256:0adfbd36003d9f86c8c97110039f7539b379f28656a04097e7434d3eaf9aa131 \ + --hash=sha256:0ec70a89be933ea49222fafc3999987d7899fc676f688dd12252509434636622 \ + --hash=sha256:1030a39ba01b0c519b1a82f80e8802630d16ab95dc3f2b2386a0b5c8ed5cbb10 \ + --hash=sha256:109b436096d0a2dd039c355fa3414160ec4d843dfecc64a14077332a00aeb7da \ + --hash=sha256:1308fa542bbdbf2fa85e9e66b1077eea3a88bef38ee8a06270b4298a7a62a166 \ + --hash=sha256:1328f6d8cca2b86acb14104e381225a3d7b42c92c4b86ceae814e5c400dbb415 \ + --hash=sha256:13de2b76c1835399b2e419a296d5b38dc4855385d9e96916299170085ef72f57 \ + --hash=sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00 \ + --hash=sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d \ + --hash=sha256:160e0c19ee500482ddfb5d5570a0415f565d8ae2b3fd69c5dcfce8a58107b1c3 \ + --hash=sha256:2014c5b3ff15e64feecb6b713af12093f75b7926049e26a580e94dcad3c73d8c \ + --hash=sha256:2061188a1ba352fc699c82bff722f4baacb4b4b8b2f0c745d2001e56d0dfb514 \ + --hash=sha256:220f3f896c6b8d0316f63f16c077d52c412619e475f9372333474ee15133a558 \ + --hash=sha256:23241ff6423378a731d84864bf923a41649dc67b144debd1077f02e6249a0d54 \ + --hash=sha256:25b5a51dc3dfb20a10833c8eee25903fd2e14059e9afcd329c9da20609a307b2 \ + --hash=sha256:297595fe6138d4da2c8ce9e72a04d73e58725bb60f3a19048bc96ab2ff31c692 \ + --hash=sha256:2b4154c00eb22e4d543f472cfca430e7962a0f1d0f3778334f2e08a7ba59363c \ + --hash=sha256:2e76e83efc7b443052dd1e585a76201e40b3411fe3da7af4fe434ec51b2f163b \ + --hash=sha256:30eb2efe6503c379b7ab99c81ba4a779748e3830241f032ab46bd182bf5873af \ + --hash=sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520 \ + --hash=sha256:33513d6cc3ed3b559134fb307aae9bdd94d7e7c02907b37896a6c45ff9ce51bd \ + --hash=sha256:33eac61d0796ca0591f94548dcfe37bb193671e0c9bcf065789b5792f2eda644 \ + --hash=sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6 \ + --hash=sha256:38c384c434021e4f62b8d9ba0bc9467e14d394893077e2c66d826243025e1f81 \ + --hash=sha256:392f52ebbb932db566973693de48f15ce787cabd15cf6334e855ed22ea0be5b3 \ + --hash=sha256:3dbbd9892c5ebffeca1ed620cf0ade13eb55a0d8c84e0751a6653adc6ac40d0c \ + --hash=sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2 \ + --hash=sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf \ + --hash=sha256:42eca420c8fa072cc1dd62597635d140e78e384a79bb4944f825fbef8bfeeef6 \ + --hash=sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b \ + --hash=sha256:4cc2d67fdb4d057730c75a64c5923abfa17775ae234a71b0200346bfb0a7f482 \ + --hash=sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7 \ + --hash=sha256:4e2febf914ace002132aa09169cc572e0d8959d0f305f93d5828c4836f9bc5a6 \ + --hash=sha256:50ac2184ffb1b999e11e27c7e3e70cc1139047e7ebc1aa95ed12f4269abe98d4 \ + --hash=sha256:531af8845aaadcadf951b7e0c1345c6b9c68a990eeb74ff9acd8501a0ad6a1c9 \ + --hash=sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637 \ + --hash=sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2 \ + --hash=sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9 \ + --hash=sha256:5a74f23335b9689b66eb6dbe2a931a88fcd7a4c2cc4b1cb0edba8ce381c7a1da \ + --hash=sha256:5d0d307d27099bb0cbeea7260eb39ed4fdb99c5542e21e94bb6fd29e49c57a23 \ + --hash=sha256:5d2a01dcce81789cf4b12d478b5464632204f4c834dc2d064902ee27d2d1f0ee \ + --hash=sha256:5d3a10609c51da2a1c0ea0293fc3968ca0a18bd73838455b5bca3069d7f8e32b \ + --hash=sha256:5ed9ebc46f24cf91034544b26b131241b699edbfc99ec5e7f8f3d02d6eb7fba4 \ + --hash=sha256:6027dcd885e21581e46d3c7f682cfb2b870942feeed58a21c29583512c3f09f8 \ + --hash=sha256:602d339548d35a8579c6b013339fb34aee2df9b4e105f985443d2860e4d7ffaa \ + --hash=sha256:604253b2143e13218ff1ef0b59ce67f18b8bd1c4205d2ffda22b09b426386898 \ + --hash=sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793 \ + --hash=sha256:61c722ed8d49ac9bc26c7071eeaa1f6ff24053d553146d5df031802deffd03da \ + --hash=sha256:63107013578c8a730419adc05608756c3fa640bdc6abe806c3123a49fb829f43 \ + --hash=sha256:683b94dbd1ca67557850b86423318a2e323511648f9f3f7b1840408a02b9a48c \ + --hash=sha256:685c4f4e8c59837de103344eb1c8a3851f670309eb5c361f746805c5471b8c88 \ + --hash=sha256:695735deeddfb35da1677dbc16a083445360e37ff46d8ac5c6fcd64917ff9ade \ + --hash=sha256:6e5f70f6dca1d3b09bccb7daf4e087075ff776e3da9ac870f86ca316736bb4aa \ + --hash=sha256:6e93a5ad22f434d7876665444a97e713a8f60b5b1a3521e8df11b98309bff833 \ + --hash=sha256:6fa0b72f2423e2aa53077e54a61c28e181d23effeaafd73fcb9c494e60930c8e \ + --hash=sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90 \ + --hash=sha256:74752ecaa544657d88b1d1c94ae68031e364a4d47005a90288f3bab3da3c970f \ + --hash=sha256:7a46e1d6d2817ba8024de44c4fd79913a90e5f7265434cef97026215b7d30df6 \ + --hash=sha256:7c5d3e570ef46adaf93fc81b44aca6002b5a4d8ca11bd0580c07eac537f36680 \ + --hash=sha256:7cb29a034301e2982df8b1fe6328a84f4b676106a13e9135a0d7e0c3e9f806da \ + --hash=sha256:7ccb800c9418e438b44b060a32adeb8393764da7441eb52aa2aa195448935306 \ + --hash=sha256:7ce379bcaa9fcc00f19affa7773084dd09f5b59947b3fb47a1ceb0179f91aaa1 \ + --hash=sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc \ + --hash=sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43 \ + --hash=sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c \ + --hash=sha256:82b833d5563fefd6fceafb1aed2f3f3ebe19f84760fdd289f8b926731c2e6e91 \ + --hash=sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f \ + --hash=sha256:893074d651cf25c1cc14e3bea4fceefd67f2921b1bb8e40fcfeba56820de80c6 \ + --hash=sha256:89997aa1c4b6a5b1e5b588979d1da048a3c6f15e55c11d117a56b75c84531f5a \ + --hash=sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7 \ + --hash=sha256:8d47ebd9f5d9607fd039c1fbf4994e3b071ea23eff42f4ecef246ab2b7334198 \ + --hash=sha256:924361811732ddad75ff23e90efd9ccfda4f664132feecb90895bade6a1b4623 \ + --hash=sha256:963be41bcd49f53af6d795f65c0da9b4cc518c0dd9c47145c98f61cb464f4839 \ + --hash=sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5 \ + --hash=sha256:9bed5144c6923cc902cd14bb8963f2d5e034def4486ab0bbe1f58f03f042f9a9 \ + --hash=sha256:9c770750cc80e8694492244bca7251385188bc5597b6a39d98a9f30e8da984e0 \ + --hash=sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6 \ + --hash=sha256:a5bc08f33c4966f4eb6590d6ff3ceae76151ad744576b5fc6c4ba8edd459fdec \ + --hash=sha256:a606c8070ada8aa2a88e181773fa1ef17ba65ce5dd168b9d08038e2a61b33754 \ + --hash=sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c \ + --hash=sha256:a7b1d8315d9b5e9f89eb2933b73afae6ec9597a258d52190944437158b49d38e \ + --hash=sha256:a8fb786fb754ef6ff8c120cb96629fb518f8eb5a61a16aac3a979a9dbd40a084 \ + --hash=sha256:a905ad00ad1e1c34fe4e9d7c1d949ab09c6fa90c919860c1534ff479f40fd12d \ + --hash=sha256:a9d360a792cbcce2fe7b66b8d51274ec297c53cbc423401480e53b26161a290d \ + --hash=sha256:b150b8467852e1bd844387459aa6fbe11d7f38b56e901f9f3b3e6aba0d660240 \ + --hash=sha256:b702f806693201ad6c0a05ddbbe4c8f359626d0b3305f766077d51388a6bac58 \ + --hash=sha256:b96d559e0fcddd3343c510a0fe2b127fbff16bf346dd76280b82292567523442 \ + --hash=sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326 \ + --hash=sha256:bfc8cdd7f33d57f0468b0614ae634cc38ab9202c6957a60e31d285a71ebe0301 \ + --hash=sha256:c0342aafd421795d740e514bc9858ebddfc705a75a8c5046ac56d85fe97bf196 \ + --hash=sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f \ + --hash=sha256:c28b2fdcee797e1c1961cd3bcd3d545cab22ad202c846235197935e1df2f8ef7 \ + --hash=sha256:c3bc7bf8cb8806f8d1c9bf149c18708cb1c406520097d6b0a73977460ea03602 \ + --hash=sha256:c4dcb4120d0cc3cc448624147dba64e9021b278c63e34a38789b688fd0da9bf3 \ + --hash=sha256:c8aa771ff2c13dd9cda8166d685d7333d389fae30a4d2bb39d63ab5775de8606 \ + --hash=sha256:cc1276d369452040cbb943300dc8abeedab14245ea44056a2943183822513a18 \ + --hash=sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3 \ + --hash=sha256:d30bbc1644f726b825b3278764240f449d75f1a8bdda892e641d4a688b1494ae \ + --hash=sha256:d5e9db7ef3ecbfc0b4733579cea45713a76852b002cf605420b12ef3ef1ec148 \ + --hash=sha256:dbd2ecfbfee70bc1a4acb7461fa6af7748ec2ab08ac0fa298f281c51518f982c \ + --hash=sha256:dd86b8e7f703ec6ff4f351cfdb9f428955859537125904aa8c963604f2e9d3e7 \ + --hash=sha256:dee1316133c9b463aa81aca676bc506d3f80d8f65aeb0bba2b78d0b30c51d7bd \ + --hash=sha256:e0c48b6300cd0b0106bf49169c3e0536408dfbeb1ccb53180068a18b03c662ab \ + --hash=sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27 \ + --hash=sha256:e6a4dd644d72ab316b580a1c120b375890e4c52ec392d4aef3c63361ec4d77d1 \ + --hash=sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab \ + --hash=sha256:ec28adb204b759306a3d64358a5e5c07d7b1dd0ccbce04aa76cb9377b7b70296 \ + --hash=sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212 \ + --hash=sha256:eefc37f6138f522e771ac6db71a6d4838ec7933939676f3753eafd7d3f4c40bc \ + --hash=sha256:f0b48edbebea1b7421a9c687c304f7b44d0677c46498a046079d445454504737 \ + --hash=sha256:f1abffa122452481a61c3551ab3c89d72238e279e517705b8b03847b1d93d738 \ + --hash=sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be \ + --hash=sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8 \ + --hash=sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e \ + --hash=sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e \ + --hash=sha256:fab81ef75003eda96239a23eda4e4543cedc22e34c373edcaf744e721a163986 \ + --hash=sha256:fd1b2281d01723f076df3c8188f43f2472248a6b63118b036e641243656b1b0f \ + --hash=sha256:fe1a92cfbaa0a1253e339ccec42dbe6db262615e52df591b68726ab10338003f + # via datasets +yarl==1.18.3 \ + --hash=sha256:00e5a1fea0fd4f5bfa7440a47eff01d9822a65b4488f7cff83155a0f31a2ecba \ + --hash=sha256:02ddb6756f8f4517a2d5e99d8b2f272488e18dd0bfbc802f31c16c6c20f22193 \ + --hash=sha256:045b8482ce9483ada4f3f23b3774f4e1bf4f23a2d5c912ed5170f68efb053318 \ + --hash=sha256:09c7907c8548bcd6ab860e5f513e727c53b4a714f459b084f6580b49fa1b9cee \ + --hash=sha256:0b0cad37311123211dc91eadcb322ef4d4a66008d3e1bdc404808992260e1a0e \ + --hash=sha256:0b3c92fa08759dbf12b3a59579a4096ba9af8dd344d9a813fc7f5070d86bbab1 \ + --hash=sha256:0fb2171a4486bb075316ee754c6d8382ea6eb8b399d4ec62fde2b591f879778a \ + --hash=sha256:1a74a13a4c857a84a845505fd2d68e54826a2cd01935a96efb1e9d86c728e186 \ + --hash=sha256:1d407181cfa6e70077df3377938c08012d18893f9f20e92f7d2f314a437c30b1 \ + --hash=sha256:1dd4bdd05407ced96fed3d7f25dbbf88d2ffb045a0db60dbc247f5b3c5c25d50 \ + --hash=sha256:25b411eddcfd56a2f0cd6a384e9f4f7aa3efee14b188de13048c25b5e91f1640 \ + --hash=sha256:2d06d3005e668744e11ed80812e61efd77d70bb7f03e33c1598c301eea20efbb \ + --hash=sha256:2ec9bbba33b2d00999af4631a3397d1fd78290c48e2a3e52d8dd72db3a067ac8 \ + --hash=sha256:3236da9272872443f81fedc389bace88408f64f89f75d1bdb2256069a8730ccc \ + --hash=sha256:35098b24e0327fc4ebdc8ffe336cee0a87a700c24ffed13161af80124b7dc8e5 \ + --hash=sha256:41f7ce59d6ee7741af71d82020346af364949314ed3d87553763a2df1829cc58 \ + --hash=sha256:436c4fc0a4d66b2badc6c5fc5ef4e47bb10e4fd9bf0c79524ac719a01f3607c2 \ + --hash=sha256:4891ed92157e5430874dad17b15eb1fda57627710756c27422200c52d8a4e393 \ + --hash=sha256:4ac515b860c36becb81bb84b667466885096b5fc85596948548b667da3bf9f24 \ + --hash=sha256:5094d9206c64181d0f6e76ebd8fb2f8fe274950a63890ee9e0ebfd58bf9d787b \ + --hash=sha256:54d6921f07555713b9300bee9c50fb46e57e2e639027089b1d795ecd9f7fa910 \ + --hash=sha256:578e281c393af575879990861823ef19d66e2b1d0098414855dd367e234f5b3c \ + --hash=sha256:5a3f356548e34a70b0172d8890006c37be92995f62d95a07b4a42e90fba54272 \ + --hash=sha256:602d98f2c2d929f8e697ed274fbadc09902c4025c5a9963bf4e9edfc3ab6f7ed \ + --hash=sha256:61b1a825a13bef4a5f10b1885245377d3cd0bf87cba068e1d9a88c2ae36880e1 \ + --hash=sha256:61e5e68cb65ac8f547f6b5ef933f510134a6bf31bb178be428994b0cb46c2a04 \ + --hash=sha256:61ee62ead9b68b9123ec24bc866cbef297dd266175d53296e2db5e7f797f902d \ + --hash=sha256:6333c5a377c8e2f5fae35e7b8f145c617b02c939d04110c76f29ee3676b5f9a5 \ + --hash=sha256:6748dbf9bfa5ba1afcc7556b71cda0d7ce5f24768043a02a58846e4a443d808d \ + --hash=sha256:67a283dd2882ac98cc6318384f565bffc751ab564605959df4752d42483ad889 \ + --hash=sha256:75674776d96d7b851b6498f17824ba17849d790a44d282929c42dbb77d4f17ae \ + --hash=sha256:757e81cae69244257d125ff31663249b3013b5dc0a8520d73694aed497fb195b \ + --hash=sha256:77a6e85b90a7641d2e07184df5557132a337f136250caafc9ccaa4a2a998ca2c \ + --hash=sha256:7c33dd1931a95e5d9a772d0ac5e44cac8957eaf58e3c8da8c1414de7dd27c576 \ + --hash=sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34 \ + --hash=sha256:7e2ee16578af3b52ac2f334c3b1f92262f47e02cc6193c598502bd46f5cd1477 \ + --hash=sha256:80316a8bd5109320d38eef8833ccf5f89608c9107d02d2a7f985f98ed6876990 \ + --hash=sha256:82123d0c954dc58db301f5021a01854a85bf1f3bb7d12ae0c01afc414a882ca2 \ + --hash=sha256:84b2deecba4a3f1a398df819151eb72d29bfeb3b69abb145a00ddc8d30094512 \ + --hash=sha256:8503ad47387b8ebd39cbbbdf0bf113e17330ffd339ba1144074da24c545f0069 \ + --hash=sha256:877d209b6aebeb5b16c42cbb377f5f94d9e556626b1bfff66d7b0d115be88d0a \ + --hash=sha256:8874027a53e3aea659a6d62751800cf6e63314c160fd607489ba5c2edd753cf6 \ + --hash=sha256:88a19f62ff30117e706ebc9090b8ecc79aeb77d0b1f5ec10d2d27a12bc9f66d0 \ + --hash=sha256:8d39d351e7faf01483cc7ff7c0213c412e38e5a340238826be7e0e4da450fdc8 \ + --hash=sha256:90adb47ad432332d4f0bc28f83a5963f426ce9a1a8809f5e584e704b82685dcb \ + --hash=sha256:913829534200eb0f789d45349e55203a091f45c37a2674678744ae52fae23efa \ + --hash=sha256:93b2e109287f93db79210f86deb6b9bbb81ac32fc97236b16f7433db7fc437d8 \ + --hash=sha256:9d41beda9dc97ca9ab0b9888cb71f7539124bc05df02c0cff6e5acc5a19dcc6e \ + --hash=sha256:a440a2a624683108a1b454705ecd7afc1c3438a08e890a1513d468671d90a04e \ + --hash=sha256:a4bb030cf46a434ec0225bddbebd4b89e6471814ca851abb8696170adb163985 \ + --hash=sha256:a9ca04806f3be0ac6d558fffc2fdf8fcef767e0489d2684a21912cc4ed0cd1b8 \ + --hash=sha256:ac1801c45cbf77b6c99242eeff4fffb5e4e73a800b5c4ad4fc0be5def634d2e1 \ + --hash=sha256:ac36703a585e0929b032fbaab0707b75dc12703766d0b53486eabd5139ebadd5 \ + --hash=sha256:b1771de9944d875f1b98a745bc547e684b863abf8f8287da8466cf470ef52690 \ + --hash=sha256:b464c4ab4bfcb41e3bfd3f1c26600d038376c2de3297760dfe064d2cb7ea8e10 \ + --hash=sha256:b4f6450109834af88cb4cc5ecddfc5380ebb9c228695afc11915a0bf82116789 \ + --hash=sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b \ + --hash=sha256:b643562c12680b01e17239be267bc306bbc6aac1f34f6444d1bded0c5ce438ca \ + --hash=sha256:b958ddd075ddba5b09bb0be8a6d9906d2ce933aee81100db289badbeb966f54e \ + --hash=sha256:b9d60031cf568c627d028239693fd718025719c02c9f55df0a53e587aab951b5 \ + --hash=sha256:ba23302c0c61a9999784e73809427c9dbedd79f66a13d84ad1b1943802eaaf59 \ + --hash=sha256:ba87babd629f8af77f557b61e49e7c7cac36f22f871156b91e10a6e9d4f829e9 \ + --hash=sha256:c017a3b6df3a1bd45b9fa49a0f54005e53fbcad16633870104b66fa1a30a29d8 \ + --hash=sha256:c1e1cc06da1491e6734f0ea1e6294ce00792193c463350626571c287c9a704db \ + --hash=sha256:c654d5207c78e0bd6d749f6dae1dcbbfde3403ad3a4b11f3c5544d9906969dde \ + --hash=sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7 \ + --hash=sha256:c7d79f7d9aabd6011004e33b22bc13056a3e3fb54794d138af57f5ee9d9032cb \ + --hash=sha256:ccaa3a4b521b780a7e771cc336a2dba389a0861592bbce09a476190bb0c8b4b3 \ + --hash=sha256:ccd17349166b1bee6e529b4add61727d3f55edb7babbe4069b5764c9587a8cc6 \ + --hash=sha256:ce1af883b94304f493698b00d0f006d56aea98aeb49d75ec7d98cd4a777e9285 \ + --hash=sha256:d0e883008013c0e4aef84dcfe2a0b172c4d23c2669412cf5b3371003941f72bb \ + --hash=sha256:d980e0325b6eddc81331d3f4551e2a333999fb176fd153e075c6d1c2530aa8a8 \ + --hash=sha256:e17c9361d46a4d5addf777c6dd5eab0715a7684c2f11b88c67ac37edfba6c482 \ + --hash=sha256:e2c08cc9b16f4f4bc522771d96734c7901e7ebef70c6c5c35dd0f10845270bcd \ + --hash=sha256:e35ef8683211db69ffe129a25d5634319a677570ab6b2eba4afa860f54eeaf75 \ + --hash=sha256:e3b9fd71836999aad54084906f8663dffcd2a7fb5cdafd6c37713b2e72be1760 \ + --hash=sha256:ef9f7768395923c3039055c14334ba4d926f3baf7b776c923c93d80195624782 \ + --hash=sha256:f52a265001d830bc425f82ca9eabda94a64a4d753b07d623a9f2863fde532b53 \ + --hash=sha256:f91c4803173928a25e1a55b943c81f55b8872f0018be83e3ad4938adffb77dd2 \ + --hash=sha256:fbd6748e8ab9b41171bb95c6142faf068f5ef1511935a0aa07025438dd9a9bc1 \ + --hash=sha256:fe57328fbc1bfd0bd0514470ac692630f3901c0ee39052ae47acd1d90a436719 \ + --hash=sha256:fea09ca13323376a2fdfb353a5fa2e59f90cd18d7ca4eaa1fd31f0a8b4f91e62 + # via aiohttp +zipp==3.21.0 \ + --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \ + --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931 + # via etils + +# The following packages are considered to be unsafe in a requirements file: +setuptools==70.3.0 \ + --hash=sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5 \ + --hash=sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc + # via + # -r requirements-dev.txt + # google-api-core + # tensorboard + # tensorflow-cpu diff --git a/requirements_lock_3_10.txt b/requirements_lock_3_10.txt new file mode 100644 index 000000000..bd93e0ff6 --- /dev/null +++ b/requirements_lock_3_10.txt @@ -0,0 +1,236 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# bazel run //:requirements_3_10.update +# +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via -r requirements.txt +cloudpickle==3.1.0 \ + --hash=sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b \ + --hash=sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e + # via -r requirements.txt +dill==0.3.9 \ + --hash=sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a \ + --hash=sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c + # via multiprocess +grpcio==1.66.0 \ + --hash=sha256:0f3010bf46b2a01c9e40644cb9ed91b4b8435e5c500a275da5f9f62580e31e80 \ + --hash=sha256:1c5466222470cb7fbc9cc898af1d48eefd297cb2e2f59af6d4a851c862fa90ac \ + --hash=sha256:1eb03524d0f55b965d6c86aa44e5db9e5eaa15f9ed3b164621e652e5b927f4b8 \ + --hash=sha256:230cdd696751e7eb1395718cd308234749daa217bb8d128f00357dc4df102558 \ + --hash=sha256:245b08f9b3c645a6a623f3ed4fa43dcfcd6ad701eb9c32511c1bb7380e8c3d23 \ + --hash=sha256:296a45ea835e12a1cc35ab0c57e455346c272af7b0d178e29c67742167262b4c \ + --hash=sha256:37514b68a42e9cf24536345d3cf9e580ffd29117c158b4eeea34625200256067 \ + --hash=sha256:375b58892301a5fc6ca7d7ff689c9dc9d00895f5d560604ace9f4f0573013c63 \ + --hash=sha256:423ae18637cd99ddcf2e5a6851c61828c49e9b9d022d0442d979b4f230109787 \ + --hash=sha256:49234580a073ce7ac490112f6c67c874cbcb27804c4525978cdb21ba7f3f193c \ + --hash=sha256:508411df1f2b7cfa05d4d7dbf3d576fe4f949cd61c03f3a6f0378c84e3d7b963 \ + --hash=sha256:50cea8ce2552865b87e3dffbb85eb21e6b98d928621600c0feda2f02449cd837 \ + --hash=sha256:516fdbc8e156db71a004bc431a6303bca24cfde186babe96dde7bd01e8f0cc70 \ + --hash=sha256:526d4f6ca19f31b25606d5c470ecba55c0b22707b524e4de8987919e8920437d \ + --hash=sha256:53d4c6706b49e358a2a33345dbe9b6b3bb047cecd7e8c07ba383bd09349bfef8 \ + --hash=sha256:5b15ef1b296c4e78f15f64fc65bf8081f8774480ffcac45642f69d9d753d9c6b \ + --hash=sha256:5e8140b39f10d7be2263afa2838112de29374c5c740eb0afd99146cb5bdbd990 \ + --hash=sha256:5ea27f4ce8c0daccfdd2c7961e6ba404b6599f47c948415c4cca5728739107a3 \ + --hash=sha256:5f4b3357e59dfba9140a51597287297bc638710d6a163f99ee14efc19967a821 \ + --hash=sha256:5f93fc84b72bbc7b84a42f3ca9dc055fa00d2303d9803be011ebf7a10a4eb833 \ + --hash=sha256:643d8d9632a688ae69661e924b862e23c83a3575b24e52917ec5bcc59543d212 \ + --hash=sha256:684a4c07883cbd4ac864f0d08d927267404f5f0c76f31c85f9bbe05f2daae2f2 \ + --hash=sha256:6d586a95c05c82a5354be48bb4537e1accaf2472d8eb7e9086d844cbff934482 \ + --hash=sha256:6ed35bf7da3fb3b1949e32bdf47a8b5ffe0aed11722d948933bd068531cd4682 \ + --hash=sha256:748452dbd5a047475d5413bdef08b0b9ceb2c0c0e249d4ee905a5fb82c6328dc \ + --hash=sha256:7bc9d823e05d63a87511fb456dcc48dc0fced86c282bf60229675e7ee7aac1a1 \ + --hash=sha256:8096a922eb91bc97c839f675c3efa1257c6ef181ae1b25d3fb97f2cae4c57c01 \ + --hash=sha256:832945e64176520520317b50d64ec7d79924429528d5747669b52d0bf2c7bd78 \ + --hash=sha256:8fc5c710ddd51b5a0dc36ef1b6663430aa620e0ce029b87b150dafd313b978c3 \ + --hash=sha256:921b8f7f25d5300d7c6837a1e0639ef145fbdbfb728e0a5db2dbccc9fc0fd891 \ + --hash=sha256:9d5251578767fe44602688c851c2373b5513048ac84c21a0fe946590a8e7933d \ + --hash=sha256:a639d3866bfb5a678b5c0b92cd7ab543033ed8988854290fd86145e71731fd4c \ + --hash=sha256:aaf30c75cbaf30e561ca45f21eb1f729f0fab3f15c592c1074795ed43e3ff96f \ + --hash=sha256:ad7256f224437b2c29c2bef98ddd3130454c5b1ab1f0471fc11794cefd4dbd3d \ + --hash=sha256:ba18cfdc09312eb2eea6fa0ce5d2eec3cf345ea78f6528b2eaed6432105e0bd0 \ + --hash=sha256:ba60ae3b465b3e85080ae3bfbc36fd0305ae495ab16fcf8022fc7d7a23aac846 \ + --hash=sha256:bc008c6afa1e7c8df99bd9154abc4f0470d26b7730ca2521122e99e771baa8c7 \ + --hash=sha256:c072f90a1f0409f827ae86266984cba65e89c5831a0726b9fc7f4b5fb940b853 \ + --hash=sha256:c1ea4c528e7db6660718e4165fd1b5ac24b79a70c870a7bc0b7bdb9babab7c1e \ + --hash=sha256:c3084e590e857ba7585ae91078e4c9b6ef55aaf1dc343ce26400ba59a146eada \ + --hash=sha256:c3f6feb0dc8456d025e566709f7dd02885add99bedaac50229013069242a1bfd \ + --hash=sha256:d0439a970d65327de21c299ea0e0c2ad0987cdaf18ba5066621dea5f427f922b \ + --hash=sha256:dd614370e939f9fceeeb2915111a0795271b4c11dfb5fc0f58449bee40c726a5 \ + --hash=sha256:de9e20a0acb709dcfa15a622c91f584f12c9739a79c47999f73435d2b3cc8a3b \ + --hash=sha256:e36fa838ac1d6c87198ca149cbfcc92e1af06bb8c8cd852622f8e58f33ea3324 \ + --hash=sha256:e8d20308eeae15b3e182f47876f05acbdec1eebd9473a9814a44e46ec4a84c04 + # via -r requirements.txt +jax[cpu]==0.4.34 \ + --hash=sha256:44196854f40c5f9cea3142824b9f1051f85afc3fcf7593ec5479fc8db01c58db \ + --hash=sha256:b957ca1fc91f7343f91a186af9f19c7f342c946f95a8c11c7f1e5cdfe2e58d9e + # via -r requirements.txt +jaxlib==0.4.34 \ + --hash=sha256:096f0ca309d41fa692a9d1f2f9baab1c5c8ca0749876ebb3f748e738a27c7ff4 \ + --hash=sha256:133070d4fec5525ffea4dc72956398c1cf647a04dcb37f8a935ee82af78d9965 \ + --hash=sha256:1a30771d85fa77f9ab8f18e63240f455ab3a3f87660ed7b8d5eea6ceecbe5c1e \ + --hash=sha256:3bcfa639ca3cfaf86c8ceebd5fc0d47300fd98a078014a1d0cc03133e1523d5f \ + --hash=sha256:3e60bc826933082e99b19b87c21818a8d26fcdb01f418d47cedff554746fd6cc \ + --hash=sha256:45d719a2ce0ebf21255a277b71d756f3609b7b5be70cddc5d88fd58c35219de0 \ + --hash=sha256:48272e9034ff868d4328cf0055a07882fd2be93f59dfb6283af7de491f9d1290 \ + --hash=sha256:571ef03259835458111596a71a2f4a6fabf4ec34595df4cea555035362ac5bf0 \ + --hash=sha256:6b43a974c5d91a19912d138f2658dd8dbb7d30dcdff5c961d896c673e872b611 \ + --hash=sha256:72e22e99a5dc890a64443c3fc12f13f20091f578c405a76de077ba42b4c62cd7 \ + --hash=sha256:7be673a876ebd1aef440fb7e3ebaf99a91abeb550c9728c644b7d7c7b5d7c108 \ + --hash=sha256:87f25a477cd279840e53718403f97092eba0e8a945fcab47bcf435b6f9119dda \ + --hash=sha256:8ee3f93836e53c86556ccd9449a4ea43516ee05184d031a71dd692e81259f7d9 \ + --hash=sha256:901cb4040ed24eae40071d8114ea8d10dff436277fa74a1a5b9e7206f641151c \ + --hash=sha256:b0001c8f0e2b1c7bc99e4f314b524a340d25653505c1a1484d4041a9d3617f6f \ + --hash=sha256:b7a212a3cb5c6acc201c32ae4f4b5f5a9ac09457fbb77ba8db5ce7e7d4adc214 \ + --hash=sha256:c303f5acaf6c56ce5ff133a923c9b6247bdebedde15bd2c893c24be4d8f71306 \ + --hash=sha256:c7b3e724a30426a856070aba0192b5d199e95b4411070e7ad96ad8b196877b10 \ + --hash=sha256:c9d3adcae43a33aad4332be9c2aedc5ef751d1e755f917a5afb30c7872eacaa8 \ + --hash=sha256:d840e64b85f8865404d6d225b9bb340e158df1457152a361b05680e24792b232 + # via jax +ml-dtypes==0.5.0 \ + --hash=sha256:099e09edd54e676903b4538f3815b5ab96f5b119690514602d96bfdb67172cbe \ + --hash=sha256:2e7534392682c3098bc7341648c650864207169c654aed83143d7a19c67ae06f \ + --hash=sha256:3e7d3a380fe73a63c884f06136f8baa7a5249cc8e9fdec677997dd78549f8128 \ + --hash=sha256:54415257f00eb44fbcc807454efac3356f75644f1cbfc2d4e5522a72ae1dacab \ + --hash=sha256:5f2b59233a0dbb6a560b3137ed6125433289ccba2f8d9c3695a52423a369ed15 \ + --hash=sha256:60275f2b51b56834e840c4809fca840565f9bf8e9a73f6d8c94f5b5935701215 \ + --hash=sha256:76942f6aeb5c40766d5ea62386daa4148e6a54322aaf5b53eae9e7553240222f \ + --hash=sha256:7ee9c320bb0f9ffdf9f6fa6a696ef2e005d1f66438d6f1c1457338e00a02e8cf \ + --hash=sha256:8c32138975797e681eb175996d64356bcfa124bdbb6a70460b9768c2b35a6fa4 \ + --hash=sha256:968fede07d1f9b926a63df97d25ac656cac1a57ebd33701734eaf704bc55d8d8 \ + --hash=sha256:a03fc861b86cc586728e3d093ba37f0cc05e65330c3ebd7688e7bae8290f8859 \ + --hash=sha256:a38df8df61194aeaae1ab7579075779b4ad32cd1cffd012c28be227fa7f2a70a \ + --hash=sha256:a988bac6572630e1e9c2edd9b1277b4eefd1c86209e52b0d061b775ac33902ff \ + --hash=sha256:ab046f2ff789b1f11b2491909682c5d089934835f9a760fafc180e47dcb676b8 \ + --hash=sha256:afa08343069874a30812871d639f9c02b4158ace065601406a493a8511180c02 \ + --hash=sha256:c7a9152f5876fef565516aa5dd1dccd6fc298a5891b2467973905103eb5c7856 \ + --hash=sha256:cb5cc7b25acabd384f75bbd78892d0c724943f3e2e1986254665a1aa10982e07 \ + --hash=sha256:d3b3db9990c3840986a0e70524e122cfa32b91139c3653df76121ba7776e015f \ + --hash=sha256:d4b1a70a3e5219790d6b55b9507606fc4e02911d1497d16c18dd721eb7efe7d0 \ + --hash=sha256:dc74fd9995513d33eac63d64e436240f5494ec74d522a9f0920194942fc3d2d7 \ + --hash=sha256:e04fde367b2fe901b1d47234426fe8819909bd1dd862a5adb630f27789c20599 + # via + # jax + # jaxlib +multiprocess==0.70.17 \ + --hash=sha256:1d52f068357acd1e5bbc670b273ef8f81d57863235d9fbf9314751886e141968 \ + --hash=sha256:20c28ca19079a6c879258103a6d60b94d4ffe2d9da07dda93fb1c8bc6243f522 \ + --hash=sha256:27b8409c02b5dd89d336107c101dfbd1530a2cd4fd425fc27dcb7adb6e0b47bf \ + --hash=sha256:2818af14c52446b9617d1b0755fa70ca2f77c28b25ed97bdaa2c69a22c47b46c \ + --hash=sha256:2884701445d0177aec5bd5f6ee0df296773e4fb65b11903b94c613fb46cfb7d1 \ + --hash=sha256:2b12e081df87ab755190e227341b2c3b17ee6587e9c82fecddcbe6aa812cd7f7 \ + --hash=sha256:2ea0939b0f4760a16a548942c65c76ff5afd81fbf1083c56ae75e21faf92e426 \ + --hash=sha256:349525099a0c9ac5936f0488b5ee73199098dac3ac899d81d326d238f9fd3ccd \ + --hash=sha256:38357ca266b51a2e22841b755d9a91e4bb7b937979a54d411677111716c32744 \ + --hash=sha256:4ae2f11a3416809ebc9a48abfc8b14ecce0652a0944731a1493a3c1ba44ff57a \ + --hash=sha256:7ddb24e5bcdb64e90ec5543a1f05a39463068b6d3b804aa3f2a4e16ec28562d6 \ + --hash=sha256:a0f01cd9d079af7a8296f521dc03859d1a414d14c1e2b6e676ef789333421c95 \ + --hash=sha256:a22a6b1a482b80eab53078418bb0f7025e4f7d93cc8e1f36481477a023884861 \ + --hash=sha256:c2c82d0375baed8d8dd0d8c38eb87c5ae9c471f8e384ad203a36f095ee860f67 \ + --hash=sha256:c3feb874ba574fbccfb335980020c1ac631fbf2a3f7bee4e2042ede62558a021 \ + --hash=sha256:d729f55198a3579f6879766a6d9b72b42d4b320c0dcb7844afb774d75b573c62 + # via -r requirements.txt +numpy==1.26.4 \ + --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ + --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ + --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ + --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \ + --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \ + --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \ + --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \ + --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \ + --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \ + --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \ + --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \ + --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \ + --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \ + --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \ + --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \ + --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \ + --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \ + --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \ + --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \ + --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \ + --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \ + --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \ + --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \ + --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \ + --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \ + --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \ + --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \ + --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \ + --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \ + --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \ + --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \ + --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \ + --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \ + --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ + --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ + --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f + # via + # -r requirements.txt + # jax + # jaxlib + # ml-dtypes + # scipy +opt-einsum==3.4.0 \ + --hash=sha256:69bb92469f86a1565195ece4ac0323943e83477171b91d24c35afe028a90d7cd \ + --hash=sha256:96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac + # via jax +protobuf==5.27.3 \ + --hash=sha256:043853dcb55cc262bf2e116215ad43fa0859caab79bb0b2d31b708f128ece035 \ + --hash=sha256:16ddf3f8c6c41e1e803da7abea17b1793a97ef079a912e42351eabb19b2cffe7 \ + --hash=sha256:68248c60d53f6168f565a8c76dc58ba4fa2ade31c2d1ebdae6d80f969cdc2d4f \ + --hash=sha256:82460903e640f2b7e34ee81a947fdaad89de796d324bcbc38ff5430bcdead82c \ + --hash=sha256:8572c6533e544ebf6899c360e91d6bcbbee2549251643d32c52cf8a5de295ba5 \ + --hash=sha256:a55c48f2a2092d8e213bd143474df33a6ae751b781dd1d1f4d953c128a415b25 \ + --hash=sha256:af7c0b7cfbbb649ad26132e53faa348580f844d9ca46fd3ec7ca48a1ea5db8a1 \ + --hash=sha256:b8a994fb3d1c11156e7d1e427186662b64694a62b55936b2b9348f0a7c6625ce \ + --hash=sha256:c2a105c24f08b1e53d6c7ffe69cb09d0031512f0b72f812dd4005b8112dbe91e \ + --hash=sha256:c84eee2c71ed83704f1afbf1a85c3171eab0fd1ade3b399b3fad0884cbcca8bf \ + --hash=sha256:dcb307cd4ef8fec0cf52cb9105a03d06fbb5275ce6d84a6ae33bc6cf84e0a07b + # via -r requirements.txt +scipy==1.14.1 \ + --hash=sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e \ + --hash=sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79 \ + --hash=sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37 \ + --hash=sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5 \ + --hash=sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675 \ + --hash=sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d \ + --hash=sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f \ + --hash=sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310 \ + --hash=sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617 \ + --hash=sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e \ + --hash=sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e \ + --hash=sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417 \ + --hash=sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d \ + --hash=sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94 \ + --hash=sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad \ + --hash=sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8 \ + --hash=sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0 \ + --hash=sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69 \ + --hash=sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066 \ + --hash=sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3 \ + --hash=sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5 \ + --hash=sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07 \ + --hash=sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2 \ + --hash=sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389 \ + --hash=sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d \ + --hash=sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84 \ + --hash=sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2 \ + --hash=sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3 \ + --hash=sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73 \ + --hash=sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06 \ + --hash=sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc \ + --hash=sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1 \ + --hash=sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2 + # via + # jax + # jaxlib +termcolor==2.5.0 \ + --hash=sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8 \ + --hash=sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f + # via -r requirements.txt diff --git a/requirements_lock_3_11.txt b/requirements_lock_3_11.txt new file mode 100644 index 000000000..26200fd67 --- /dev/null +++ b/requirements_lock_3_11.txt @@ -0,0 +1,236 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# bazel run //:requirements_3_11.update +# +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via -r requirements.txt +cloudpickle==3.1.0 \ + --hash=sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b \ + --hash=sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e + # via -r requirements.txt +dill==0.3.9 \ + --hash=sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a \ + --hash=sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c + # via multiprocess +grpcio==1.66.0 \ + --hash=sha256:0f3010bf46b2a01c9e40644cb9ed91b4b8435e5c500a275da5f9f62580e31e80 \ + --hash=sha256:1c5466222470cb7fbc9cc898af1d48eefd297cb2e2f59af6d4a851c862fa90ac \ + --hash=sha256:1eb03524d0f55b965d6c86aa44e5db9e5eaa15f9ed3b164621e652e5b927f4b8 \ + --hash=sha256:230cdd696751e7eb1395718cd308234749daa217bb8d128f00357dc4df102558 \ + --hash=sha256:245b08f9b3c645a6a623f3ed4fa43dcfcd6ad701eb9c32511c1bb7380e8c3d23 \ + --hash=sha256:296a45ea835e12a1cc35ab0c57e455346c272af7b0d178e29c67742167262b4c \ + --hash=sha256:37514b68a42e9cf24536345d3cf9e580ffd29117c158b4eeea34625200256067 \ + --hash=sha256:375b58892301a5fc6ca7d7ff689c9dc9d00895f5d560604ace9f4f0573013c63 \ + --hash=sha256:423ae18637cd99ddcf2e5a6851c61828c49e9b9d022d0442d979b4f230109787 \ + --hash=sha256:49234580a073ce7ac490112f6c67c874cbcb27804c4525978cdb21ba7f3f193c \ + --hash=sha256:508411df1f2b7cfa05d4d7dbf3d576fe4f949cd61c03f3a6f0378c84e3d7b963 \ + --hash=sha256:50cea8ce2552865b87e3dffbb85eb21e6b98d928621600c0feda2f02449cd837 \ + --hash=sha256:516fdbc8e156db71a004bc431a6303bca24cfde186babe96dde7bd01e8f0cc70 \ + --hash=sha256:526d4f6ca19f31b25606d5c470ecba55c0b22707b524e4de8987919e8920437d \ + --hash=sha256:53d4c6706b49e358a2a33345dbe9b6b3bb047cecd7e8c07ba383bd09349bfef8 \ + --hash=sha256:5b15ef1b296c4e78f15f64fc65bf8081f8774480ffcac45642f69d9d753d9c6b \ + --hash=sha256:5e8140b39f10d7be2263afa2838112de29374c5c740eb0afd99146cb5bdbd990 \ + --hash=sha256:5ea27f4ce8c0daccfdd2c7961e6ba404b6599f47c948415c4cca5728739107a3 \ + --hash=sha256:5f4b3357e59dfba9140a51597287297bc638710d6a163f99ee14efc19967a821 \ + --hash=sha256:5f93fc84b72bbc7b84a42f3ca9dc055fa00d2303d9803be011ebf7a10a4eb833 \ + --hash=sha256:643d8d9632a688ae69661e924b862e23c83a3575b24e52917ec5bcc59543d212 \ + --hash=sha256:684a4c07883cbd4ac864f0d08d927267404f5f0c76f31c85f9bbe05f2daae2f2 \ + --hash=sha256:6d586a95c05c82a5354be48bb4537e1accaf2472d8eb7e9086d844cbff934482 \ + --hash=sha256:6ed35bf7da3fb3b1949e32bdf47a8b5ffe0aed11722d948933bd068531cd4682 \ + --hash=sha256:748452dbd5a047475d5413bdef08b0b9ceb2c0c0e249d4ee905a5fb82c6328dc \ + --hash=sha256:7bc9d823e05d63a87511fb456dcc48dc0fced86c282bf60229675e7ee7aac1a1 \ + --hash=sha256:8096a922eb91bc97c839f675c3efa1257c6ef181ae1b25d3fb97f2cae4c57c01 \ + --hash=sha256:832945e64176520520317b50d64ec7d79924429528d5747669b52d0bf2c7bd78 \ + --hash=sha256:8fc5c710ddd51b5a0dc36ef1b6663430aa620e0ce029b87b150dafd313b978c3 \ + --hash=sha256:921b8f7f25d5300d7c6837a1e0639ef145fbdbfb728e0a5db2dbccc9fc0fd891 \ + --hash=sha256:9d5251578767fe44602688c851c2373b5513048ac84c21a0fe946590a8e7933d \ + --hash=sha256:a639d3866bfb5a678b5c0b92cd7ab543033ed8988854290fd86145e71731fd4c \ + --hash=sha256:aaf30c75cbaf30e561ca45f21eb1f729f0fab3f15c592c1074795ed43e3ff96f \ + --hash=sha256:ad7256f224437b2c29c2bef98ddd3130454c5b1ab1f0471fc11794cefd4dbd3d \ + --hash=sha256:ba18cfdc09312eb2eea6fa0ce5d2eec3cf345ea78f6528b2eaed6432105e0bd0 \ + --hash=sha256:ba60ae3b465b3e85080ae3bfbc36fd0305ae495ab16fcf8022fc7d7a23aac846 \ + --hash=sha256:bc008c6afa1e7c8df99bd9154abc4f0470d26b7730ca2521122e99e771baa8c7 \ + --hash=sha256:c072f90a1f0409f827ae86266984cba65e89c5831a0726b9fc7f4b5fb940b853 \ + --hash=sha256:c1ea4c528e7db6660718e4165fd1b5ac24b79a70c870a7bc0b7bdb9babab7c1e \ + --hash=sha256:c3084e590e857ba7585ae91078e4c9b6ef55aaf1dc343ce26400ba59a146eada \ + --hash=sha256:c3f6feb0dc8456d025e566709f7dd02885add99bedaac50229013069242a1bfd \ + --hash=sha256:d0439a970d65327de21c299ea0e0c2ad0987cdaf18ba5066621dea5f427f922b \ + --hash=sha256:dd614370e939f9fceeeb2915111a0795271b4c11dfb5fc0f58449bee40c726a5 \ + --hash=sha256:de9e20a0acb709dcfa15a622c91f584f12c9739a79c47999f73435d2b3cc8a3b \ + --hash=sha256:e36fa838ac1d6c87198ca149cbfcc92e1af06bb8c8cd852622f8e58f33ea3324 \ + --hash=sha256:e8d20308eeae15b3e182f47876f05acbdec1eebd9473a9814a44e46ec4a84c04 + # via -r requirements.txt +jax[cpu]==0.4.34 \ + --hash=sha256:44196854f40c5f9cea3142824b9f1051f85afc3fcf7593ec5479fc8db01c58db \ + --hash=sha256:b957ca1fc91f7343f91a186af9f19c7f342c946f95a8c11c7f1e5cdfe2e58d9e + # via -r requirements.txt +jaxlib==0.4.34 \ + --hash=sha256:096f0ca309d41fa692a9d1f2f9baab1c5c8ca0749876ebb3f748e738a27c7ff4 \ + --hash=sha256:133070d4fec5525ffea4dc72956398c1cf647a04dcb37f8a935ee82af78d9965 \ + --hash=sha256:1a30771d85fa77f9ab8f18e63240f455ab3a3f87660ed7b8d5eea6ceecbe5c1e \ + --hash=sha256:3bcfa639ca3cfaf86c8ceebd5fc0d47300fd98a078014a1d0cc03133e1523d5f \ + --hash=sha256:3e60bc826933082e99b19b87c21818a8d26fcdb01f418d47cedff554746fd6cc \ + --hash=sha256:45d719a2ce0ebf21255a277b71d756f3609b7b5be70cddc5d88fd58c35219de0 \ + --hash=sha256:48272e9034ff868d4328cf0055a07882fd2be93f59dfb6283af7de491f9d1290 \ + --hash=sha256:571ef03259835458111596a71a2f4a6fabf4ec34595df4cea555035362ac5bf0 \ + --hash=sha256:6b43a974c5d91a19912d138f2658dd8dbb7d30dcdff5c961d896c673e872b611 \ + --hash=sha256:72e22e99a5dc890a64443c3fc12f13f20091f578c405a76de077ba42b4c62cd7 \ + --hash=sha256:7be673a876ebd1aef440fb7e3ebaf99a91abeb550c9728c644b7d7c7b5d7c108 \ + --hash=sha256:87f25a477cd279840e53718403f97092eba0e8a945fcab47bcf435b6f9119dda \ + --hash=sha256:8ee3f93836e53c86556ccd9449a4ea43516ee05184d031a71dd692e81259f7d9 \ + --hash=sha256:901cb4040ed24eae40071d8114ea8d10dff436277fa74a1a5b9e7206f641151c \ + --hash=sha256:b0001c8f0e2b1c7bc99e4f314b524a340d25653505c1a1484d4041a9d3617f6f \ + --hash=sha256:b7a212a3cb5c6acc201c32ae4f4b5f5a9ac09457fbb77ba8db5ce7e7d4adc214 \ + --hash=sha256:c303f5acaf6c56ce5ff133a923c9b6247bdebedde15bd2c893c24be4d8f71306 \ + --hash=sha256:c7b3e724a30426a856070aba0192b5d199e95b4411070e7ad96ad8b196877b10 \ + --hash=sha256:c9d3adcae43a33aad4332be9c2aedc5ef751d1e755f917a5afb30c7872eacaa8 \ + --hash=sha256:d840e64b85f8865404d6d225b9bb340e158df1457152a361b05680e24792b232 + # via jax +ml-dtypes==0.5.0 \ + --hash=sha256:099e09edd54e676903b4538f3815b5ab96f5b119690514602d96bfdb67172cbe \ + --hash=sha256:2e7534392682c3098bc7341648c650864207169c654aed83143d7a19c67ae06f \ + --hash=sha256:3e7d3a380fe73a63c884f06136f8baa7a5249cc8e9fdec677997dd78549f8128 \ + --hash=sha256:54415257f00eb44fbcc807454efac3356f75644f1cbfc2d4e5522a72ae1dacab \ + --hash=sha256:5f2b59233a0dbb6a560b3137ed6125433289ccba2f8d9c3695a52423a369ed15 \ + --hash=sha256:60275f2b51b56834e840c4809fca840565f9bf8e9a73f6d8c94f5b5935701215 \ + --hash=sha256:76942f6aeb5c40766d5ea62386daa4148e6a54322aaf5b53eae9e7553240222f \ + --hash=sha256:7ee9c320bb0f9ffdf9f6fa6a696ef2e005d1f66438d6f1c1457338e00a02e8cf \ + --hash=sha256:8c32138975797e681eb175996d64356bcfa124bdbb6a70460b9768c2b35a6fa4 \ + --hash=sha256:968fede07d1f9b926a63df97d25ac656cac1a57ebd33701734eaf704bc55d8d8 \ + --hash=sha256:a03fc861b86cc586728e3d093ba37f0cc05e65330c3ebd7688e7bae8290f8859 \ + --hash=sha256:a38df8df61194aeaae1ab7579075779b4ad32cd1cffd012c28be227fa7f2a70a \ + --hash=sha256:a988bac6572630e1e9c2edd9b1277b4eefd1c86209e52b0d061b775ac33902ff \ + --hash=sha256:ab046f2ff789b1f11b2491909682c5d089934835f9a760fafc180e47dcb676b8 \ + --hash=sha256:afa08343069874a30812871d639f9c02b4158ace065601406a493a8511180c02 \ + --hash=sha256:c7a9152f5876fef565516aa5dd1dccd6fc298a5891b2467973905103eb5c7856 \ + --hash=sha256:cb5cc7b25acabd384f75bbd78892d0c724943f3e2e1986254665a1aa10982e07 \ + --hash=sha256:d3b3db9990c3840986a0e70524e122cfa32b91139c3653df76121ba7776e015f \ + --hash=sha256:d4b1a70a3e5219790d6b55b9507606fc4e02911d1497d16c18dd721eb7efe7d0 \ + --hash=sha256:dc74fd9995513d33eac63d64e436240f5494ec74d522a9f0920194942fc3d2d7 \ + --hash=sha256:e04fde367b2fe901b1d47234426fe8819909bd1dd862a5adb630f27789c20599 + # via + # jax + # jaxlib +multiprocess==0.70.17 \ + --hash=sha256:1d52f068357acd1e5bbc670b273ef8f81d57863235d9fbf9314751886e141968 \ + --hash=sha256:20c28ca19079a6c879258103a6d60b94d4ffe2d9da07dda93fb1c8bc6243f522 \ + --hash=sha256:27b8409c02b5dd89d336107c101dfbd1530a2cd4fd425fc27dcb7adb6e0b47bf \ + --hash=sha256:2818af14c52446b9617d1b0755fa70ca2f77c28b25ed97bdaa2c69a22c47b46c \ + --hash=sha256:2884701445d0177aec5bd5f6ee0df296773e4fb65b11903b94c613fb46cfb7d1 \ + --hash=sha256:2b12e081df87ab755190e227341b2c3b17ee6587e9c82fecddcbe6aa812cd7f7 \ + --hash=sha256:2ea0939b0f4760a16a548942c65c76ff5afd81fbf1083c56ae75e21faf92e426 \ + --hash=sha256:349525099a0c9ac5936f0488b5ee73199098dac3ac899d81d326d238f9fd3ccd \ + --hash=sha256:38357ca266b51a2e22841b755d9a91e4bb7b937979a54d411677111716c32744 \ + --hash=sha256:4ae2f11a3416809ebc9a48abfc8b14ecce0652a0944731a1493a3c1ba44ff57a \ + --hash=sha256:7ddb24e5bcdb64e90ec5543a1f05a39463068b6d3b804aa3f2a4e16ec28562d6 \ + --hash=sha256:a0f01cd9d079af7a8296f521dc03859d1a414d14c1e2b6e676ef789333421c95 \ + --hash=sha256:a22a6b1a482b80eab53078418bb0f7025e4f7d93cc8e1f36481477a023884861 \ + --hash=sha256:c2c82d0375baed8d8dd0d8c38eb87c5ae9c471f8e384ad203a36f095ee860f67 \ + --hash=sha256:c3feb874ba574fbccfb335980020c1ac631fbf2a3f7bee4e2042ede62558a021 \ + --hash=sha256:d729f55198a3579f6879766a6d9b72b42d4b320c0dcb7844afb774d75b573c62 + # via -r requirements.txt +numpy==1.26.4 \ + --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ + --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ + --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ + --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \ + --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \ + --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \ + --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \ + --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \ + --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \ + --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \ + --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \ + --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \ + --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \ + --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \ + --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \ + --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \ + --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \ + --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \ + --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \ + --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \ + --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \ + --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \ + --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \ + --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \ + --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \ + --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \ + --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \ + --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \ + --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \ + --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \ + --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \ + --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \ + --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \ + --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ + --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ + --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f + # via + # -r requirements.txt + # jax + # jaxlib + # ml-dtypes + # scipy +opt-einsum==3.4.0 \ + --hash=sha256:69bb92469f86a1565195ece4ac0323943e83477171b91d24c35afe028a90d7cd \ + --hash=sha256:96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac + # via jax +protobuf==5.27.3 \ + --hash=sha256:043853dcb55cc262bf2e116215ad43fa0859caab79bb0b2d31b708f128ece035 \ + --hash=sha256:16ddf3f8c6c41e1e803da7abea17b1793a97ef079a912e42351eabb19b2cffe7 \ + --hash=sha256:68248c60d53f6168f565a8c76dc58ba4fa2ade31c2d1ebdae6d80f969cdc2d4f \ + --hash=sha256:82460903e640f2b7e34ee81a947fdaad89de796d324bcbc38ff5430bcdead82c \ + --hash=sha256:8572c6533e544ebf6899c360e91d6bcbbee2549251643d32c52cf8a5de295ba5 \ + --hash=sha256:a55c48f2a2092d8e213bd143474df33a6ae751b781dd1d1f4d953c128a415b25 \ + --hash=sha256:af7c0b7cfbbb649ad26132e53faa348580f844d9ca46fd3ec7ca48a1ea5db8a1 \ + --hash=sha256:b8a994fb3d1c11156e7d1e427186662b64694a62b55936b2b9348f0a7c6625ce \ + --hash=sha256:c2a105c24f08b1e53d6c7ffe69cb09d0031512f0b72f812dd4005b8112dbe91e \ + --hash=sha256:c84eee2c71ed83704f1afbf1a85c3171eab0fd1ade3b399b3fad0884cbcca8bf \ + --hash=sha256:dcb307cd4ef8fec0cf52cb9105a03d06fbb5275ce6d84a6ae33bc6cf84e0a07b + # via -r requirements.txt +scipy==1.14.1 \ + --hash=sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e \ + --hash=sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79 \ + --hash=sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37 \ + --hash=sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5 \ + --hash=sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675 \ + --hash=sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d \ + --hash=sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f \ + --hash=sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310 \ + --hash=sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617 \ + --hash=sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e \ + --hash=sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e \ + --hash=sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417 \ + --hash=sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d \ + --hash=sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94 \ + --hash=sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad \ + --hash=sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8 \ + --hash=sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0 \ + --hash=sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69 \ + --hash=sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066 \ + --hash=sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3 \ + --hash=sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5 \ + --hash=sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07 \ + --hash=sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2 \ + --hash=sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389 \ + --hash=sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d \ + --hash=sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84 \ + --hash=sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2 \ + --hash=sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3 \ + --hash=sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73 \ + --hash=sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06 \ + --hash=sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc \ + --hash=sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1 \ + --hash=sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2 + # via + # jax + # jaxlib +termcolor==2.5.0 \ + --hash=sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8 \ + --hash=sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f + # via -r requirements.txt diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 04e47948b..000000000 --- a/setup.cfg +++ /dev/null @@ -1,9 +0,0 @@ -[pep8] -max-line-length = 80 - -[pycodestyle] -max-line-length = 80 - -[yapf] -based_on_style = pep8 -column_limit = 80 diff --git a/setup.py b/setup.py deleted file mode 100644 index cb1cac998..000000000 --- a/setup.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright 2023 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Ideas borrowed from: https://github.com/ray-project/ray/blob/master/python/setup.py - -import io -import logging -import os -import platform -import re -import shutil -import subprocess -import sys -from datetime import datetime, timedelta - -import setuptools -import setuptools.command.build_ext - -logger = logging.getLogger(__name__) - -# 3.8 is the minimum python version we can support -SUPPORTED_PYTHONS = [(3, 9), (3, 10), (3, 11)] - -BAZEL_MAX_JOBS = os.getenv("BAZEL_MAX_JOBS") -ROOT_DIR = os.path.dirname(__file__) -SKIP_BAZEL_CLEAN = os.getenv("SKIP_BAZEL_CLEAN") -ENABLE_GPU_BUILD = os.getenv("ENABLE_GPU_BUILD") - -pyd_suffix = ".so" - - -def add_date_to_version(*filepath): - local_time = datetime.utcnow() - chn_time = local_time + timedelta(hours=8) - dstr = chn_time.strftime("%Y%m%d") - with open(os.path.join(ROOT_DIR, *filepath), "r") as fp: - content = fp.read() - - content = content.replace("$$DATE$$", dstr) - - with open(os.path.join(ROOT_DIR, *filepath), "w+") as fp: - fp.write(content) - - -def find_version(*filepath): - add_date_to_version(*filepath) - # Extract version information from filepath - with open(os.path.join(ROOT_DIR, *filepath)) as fp: - version_match = re.search( - r"^#define SPU_VERSION ['\"]([^'\"]*)['\"]", fp.read(), re.M - ) - if version_match: - return version_match.group(1) - raise RuntimeError("Unable to find version string.") - - -def read_requirements(*filepath): - requirements = [] - with open(os.path.join(ROOT_DIR, *filepath)) as file: - requirements = file.read().splitlines() - return requirements - - -class SetupSpec: - def __init__(self, name: str, description: str): - self.name: str = name - self.version = find_version("libspu", "version.h") - self.description: str = description - self.files_to_include: list = [] - self.install_requires: list = [] - self.extras: dict = {} - - def get_packages(self): - return setuptools.find_packages() - - -setup_spec = SetupSpec( - "spu", - "SPU aims to be a 'provable', 'measurable' secure computation device.", -) - -# Ideally, we could include these files by putting them in a -# MANIFEST.in or using the package_data argument to setup, but the -# MANIFEST.in gets applied at the very beginning when setup.py runs -# before these files have been created, so we have to move the files -# manually. - -# NOTE: The lists below must be kept in sync with spu/BUILD.bazel. -spu_lib_files = [ - "bazel-bin/spu/libspu" + pyd_suffix, - "bazel-bin/spu/libpsi" + pyd_suffix, -] - -# These are the directories where automatically generated Python protobuf -# bindings are created. -generated_python_directories = [ - "bazel-bin/spu", - "bazel-bin/libspu", - "bazel-bin/spu/utils", -] - -setup_spec.install_requires = read_requirements('requirements.txt') - -files_to_remove = [ - "spu/intrinsic/add_new_intrinsic.py", -] - - -# Calls Bazel in PATH -def bazel_invoke(invoker, cmdline, *args, **kwargs): - try: - result = invoker(['bazel'] + cmdline, *args, **kwargs) - return result - except IOError: - raise - - -def build(build_python, build_cpp): - if tuple(sys.version_info[:2]) not in SUPPORTED_PYTHONS: - msg = ( - "Detected Python version {}, which is not supported. " - "Only Python {} are supported." - ).format( - ".".join(map(str, sys.version_info[:2])), - ", ".join(".".join(map(str, v)) for v in SUPPORTED_PYTHONS), - ) - raise RuntimeError(msg) - - bazel_env = dict(os.environ, PYTHON3_BIN_PATH=sys.executable) - - bazel_flags = ["--verbose_failures"] - if BAZEL_MAX_JOBS: - n = int(BAZEL_MAX_JOBS) # the value must be an int - bazel_flags.append("--jobs") - bazel_flags.append(f"{n}") - - bazel_precmd_flags = [] - - bazel_targets = [] - bazel_targets += ["//spu:init", "//spu/utils:distributed"] if build_python else [] - bazel_targets += ["//spu:api"] if build_cpp else [] - - bazel_flags.extend(["-c", "opt"]) - - if sys.platform == "linux" and ENABLE_GPU_BUILD: - bazel_flags.extend(["--config=gpu"]) - - if platform.machine() == "x86_64": - bazel_flags.extend(["--config=avx"]) - - print(f"Build with extra flags = {bazel_flags}") - - return bazel_invoke( - subprocess.check_call, - bazel_precmd_flags + ["build"] + bazel_flags + ["--"] + bazel_targets, - env=bazel_env, - ) - - -def remove_prefix(text, prefix): - return text[text.startswith(prefix) and len(prefix) :] - - -def copy_file(target_dir, filename, rootdir): - source = os.path.relpath(filename, rootdir) - destination = os.path.join(target_dir, remove_prefix(source, 'bazel-bin/')) - - # Create the target directory if it doesn't already exist. - os.makedirs(os.path.dirname(destination), exist_ok=True) - if not os.path.exists(destination): - print(f"Copy file from {source} to {destination}") - shutil.copy(source, destination, follow_symlinks=True) - return 1 - return 0 - - -def remove_file(target_dir, filename): - file = os.path.join(target_dir, filename) - if os.path.exists(file): - print(f"delete {file}") - os.remove(file) - return 1 - return 0 - - -def fix_pb(file, old_pattern, new_pattern): - os.chmod(file, 0o666) - with open(file, 'r+') as f: - content = f.read() - content = content.replace(old_pattern, new_pattern) - - with open(file, 'w+') as f: - f.write(content) - - -def pip_run(build_ext): - build(True, True) - - # Change __module__ in psi_pb2.py and pir_pb2.py - fix_pb('bazel-bin/spu/psi_pb2.py', 'psi.psi.psi_pb2', 'spu.psi_pb2') - fix_pb('bazel-bin/spu/link_pb2.py', 'yacl.link.link_pb2', 'link.pir_pb2') - fix_pb('bazel-bin/spu/psi_v2_pb2.py', 'psi.proto.psi_v2_pb2', 'spu.psi_pb2') - fix_pb('bazel-bin/spu/pir_pb2.py', 'psi.pir.pir_pb2', 'spu.pir_pb2') - - setup_spec.files_to_include += spu_lib_files - - # Copy over the autogenerated protobuf Python bindings. - for directory in generated_python_directories: - for filename in os.listdir(directory): - if filename[-3:] == ".py": - setup_spec.files_to_include.append(os.path.join(directory, filename)) - - copied_files = 0 - for filename in setup_spec.files_to_include: - copied_files += copy_file(build_ext.build_lib, filename, ROOT_DIR) - print("# of files copied to {}: {}".format(build_ext.build_lib, copied_files)) - - deleted_files = 0 - for filename in files_to_remove: - deleted_files += remove_file(build_ext.build_lib, filename) - print("# of files deleted in {}: {}".format(build_ext.build_lib, deleted_files)) - - -class build_ext(setuptools.command.build_ext.build_ext): - def run(self): - return pip_run(self) - - -class BinaryDistribution(setuptools.Distribution): - def has_ext_modules(self): - return True - - -# Ensure no remaining lib files. -build_dir = os.path.join(ROOT_DIR, "build") -if os.path.isdir(build_dir): - shutil.rmtree(build_dir) - -if not SKIP_BAZEL_CLEAN: - bazel_invoke(subprocess.check_call, ['clean']) - -# Default Linux platform tag -plat_name = "manylinux2014_x86_64" - -if sys.platform == "darwin": - # Due to a bug in conda x64 python, platform tag has to be 10_16 for X64 wheel - if platform.machine() == "x86_64": - plat_name = "macosx_13_0_x86_64" - else: - plat_name = "macosx_13_0_arm64" -elif platform.machine() == "aarch64": - # Linux aarch64 - plat_name = "manylinux_2_28_aarch64" - -setuptools.setup( - name=setup_spec.name, - version=setup_spec.version, - author="SecretFlow Team", - author_email='secretflow-contact@service.alipay.com', - description=(setup_spec.description), - long_description=io.open( - os.path.join(ROOT_DIR, "README.md"), "r", encoding="utf-8" - ).read(), - long_description_content_type='text/markdown', - url="https://github.com/secretflow/spu", - keywords=("spu mpc secretflow compiler vm ABY3 secure computation"), - classifiers=[ - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - ], - packages=setup_spec.get_packages(), - cmdclass={"build_ext": build_ext}, - # The BinaryDistribution argument triggers build_ext. - distclass=BinaryDistribution, - install_requires=setup_spec.install_requires, - setup_requires=["wheel"], - extras_require=setup_spec.extras, - license="Apache 2.0", - options={'bdist_wheel': {'plat_name': plat_name}}, -) diff --git a/sml/cluster/BUILD.bazel b/sml/cluster/BUILD.bazel index 641962ee9..5f789eb76 100644 --- a/sml/cluster/BUILD.bazel +++ b/sml/cluster/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_binary") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_binary( name = "kmeans", srcs = ["kmeans.py"], ) diff --git a/sml/cluster/tests/BUILD.bazel b/sml/cluster/tests/BUILD.bazel index 9e30b6f68..413069c74 100644 --- a/sml/cluster/tests/BUILD.bazel +++ b/sml/cluster/tests/BUILD.bazel @@ -12,16 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_test") +load("//bazel:spu.bzl", "spu_py_test") package(default_visibility = ["//visibility:public"]) -py_test( +spu_py_test( name = "kmeans_test", srcs = ["kmeans_test.py"], deps = [ "//sml/cluster:kmeans", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/decomposition/BUILD.bazel b/sml/decomposition/BUILD.bazel index c80a6751d..38e4d92f4 100644 --- a/sml/decomposition/BUILD.bazel +++ b/sml/decomposition/BUILD.bazel @@ -12,17 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "pca", srcs = ["pca.py"], deps = ["//sml/utils:extmath"], ) -py_library( +spu_py_library( name = "nmf", srcs = ["nmf.py"], ) diff --git a/sml/decomposition/tests/BUILD.bazel b/sml/decomposition/tests/BUILD.bazel index fff722206..ae5784aa7 100644 --- a/sml/decomposition/tests/BUILD.bazel +++ b/sml/decomposition/tests/BUILD.bazel @@ -23,6 +23,7 @@ py_test( "//sml/decomposition:pca", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -33,5 +34,6 @@ py_test( "//sml/decomposition:nmf", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/ensemble/BUILD.bazel b/sml/ensemble/BUILD.bazel index 2572dc683..88ab3d102 100644 --- a/sml/ensemble/BUILD.bazel +++ b/sml/ensemble/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "adaboost", srcs = ["adaboost.py"], deps = [ @@ -24,7 +24,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "forest", srcs = ["forest.py"], deps = [ diff --git a/sml/ensemble/tests/BUILD.bazel b/sml/ensemble/tests/BUILD.bazel index 6815cf853..5b068b310 100644 --- a/sml/ensemble/tests/BUILD.bazel +++ b/sml/ensemble/tests/BUILD.bazel @@ -23,6 +23,7 @@ py_test( "//sml/ensemble:adaboost", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -33,5 +34,6 @@ py_test( "//sml/ensemble:forest", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/feature_selection/BUILD.bazel b/sml/feature_selection/BUILD.bazel index dc512dcb3..e2acd5f66 100644 --- a/sml/feature_selection/BUILD.bazel +++ b/sml/feature_selection/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "univariate_selection", srcs = ["univariate_selection.py"], ) diff --git a/sml/feature_selection/tests/BUILD.bazel b/sml/feature_selection/tests/BUILD.bazel index 1a776e0bb..4a570e135 100644 --- a/sml/feature_selection/tests/BUILD.bazel +++ b/sml/feature_selection/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/feature_selection:univariate_selection", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/gaussian_process/BUILD.bazel b/sml/gaussian_process/BUILD.bazel index 0c8f6d1a4..b0e219ddf 100644 --- a/sml/gaussian_process/BUILD.bazel +++ b/sml/gaussian_process/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "_gpc", srcs = ["_gpc.py"], deps = [ @@ -25,12 +25,12 @@ py_library( ], ) -py_library( +spu_py_library( name = "kernels", srcs = ["kernels.py"], ) -py_library( +spu_py_library( name = "ovo_ovr", srcs = ["ovo_ovr.py"], ) diff --git a/sml/gaussian_process/tests/BUILD.bazel b/sml/gaussian_process/tests/BUILD.bazel index 52e9a0a3c..da0c809df 100644 --- a/sml/gaussian_process/tests/BUILD.bazel +++ b/sml/gaussian_process/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/gaussian_process:_gpc", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/linear_model/BUILD.bazel b/sml/linear_model/BUILD.bazel index fa4fdd158..69276ee8b 100644 --- a/sml/linear_model/BUILD.bazel +++ b/sml/linear_model/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "sgd_classifier", srcs = ["sgd_classifier.py"], deps = [ @@ -24,7 +24,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "logistic", srcs = ["logistic.py"], deps = [ @@ -32,7 +32,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "ridge", srcs = ["ridge.py"], deps = [ @@ -40,12 +40,12 @@ py_library( ], ) -py_library( +spu_py_library( name = "pla", srcs = ["pla.py"], ) -py_binary( +spu_py_library( name = "glm", srcs = ["glm.py"], deps = [ @@ -55,7 +55,7 @@ py_binary( ], ) -py_library( +spu_py_library( name = "quantile", srcs = ["quantile.py"], deps = [ diff --git a/sml/linear_model/tests/BUILD.bazel b/sml/linear_model/tests/BUILD.bazel index f729c2067..327be63d9 100644 --- a/sml/linear_model/tests/BUILD.bazel +++ b/sml/linear_model/tests/BUILD.bazel @@ -27,6 +27,8 @@ py_test( "//sml/linear_model:sgd_classifier", "//spu:init", "//spu/utils:simulation", + "@spu_pip//jax:pkg", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -37,6 +39,8 @@ py_test( "//sml/linear_model:logistic", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//pandas:pkg", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -48,6 +52,7 @@ py_test( "//sml/linear_model:ridge", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -58,6 +63,9 @@ py_test( "//sml/linear_model:pla", "//spu:init", "//spu/utils:simulation", + "@spu_pip//jax:pkg", + "@spu_pip_dev//pandas:pkg", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -68,6 +76,7 @@ py_test( "//sml/linear_model:glm", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) @@ -78,5 +87,8 @@ py_test( "//sml/linear_model:quantile", "//spu:init", "//spu/utils:simulation", + "@spu_pip//jax:pkg", + "@spu_pip_dev//pandas:pkg", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/linear_model/utils/BUILD.bazel b/sml/linear_model/utils/BUILD.bazel index 273290734..c7ff6fe2f 100644 --- a/sml/linear_model/utils/BUILD.bazel +++ b/sml/linear_model/utils/BUILD.bazel @@ -12,27 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "link", srcs = ["link.py"], ) -py_library( +spu_py_library( name = "loss", srcs = ["loss.py"], deps = [":link"], ) -py_library( +spu_py_library( name = "solver", srcs = ["solver.py"], ) -py_library( +spu_py_library( name = "_linprog_simplex", srcs = ["_linprog_simplex.py"], ) diff --git a/sml/metrics/classification/BUILD.bazel b/sml/metrics/classification/BUILD.bazel index 0bcaebe59..446294dbf 100644 --- a/sml/metrics/classification/BUILD.bazel +++ b/sml/metrics/classification/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library", "py_test") +load("//bazel:spu.bzl", "spu_py_library", "spu_py_test") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "classification", srcs = ["classification.py"], deps = [ @@ -26,23 +26,24 @@ py_library( ], ) -py_library( +spu_py_library( name = "auc", srcs = ["auc.py"], deps = ["//spu/ops/groupby"], ) -py_test( +spu_py_test( name = "classification_test", srcs = ["classification_test.py"], deps = [ ":classification", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) -py_binary( +spu_py_library( name = "classification_emul", srcs = ["classification_emul.py"], deps = [ diff --git a/sml/metrics/regression/BUILD.bazel b/sml/metrics/regression/BUILD.bazel index 88961748e..7e9ef59c4 100644 --- a/sml/metrics/regression/BUILD.bazel +++ b/sml/metrics/regression/BUILD.bazel @@ -12,26 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library", "py_test") +load("//bazel:spu.bzl", "spu_py_binary", "spu_py_library", "spu_py_test") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "regression", srcs = ["regression.py"], ) -py_test( +spu_py_test( name = "regression_test", srcs = ["regression_test.py"], deps = [ ":regression", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) -py_binary( +spu_py_binary( name = "regression_emul", srcs = ["regression_emul.py"], deps = [ diff --git a/sml/naive_bayes/BUILD.bazel b/sml/naive_bayes/BUILD.bazel index 369cd2c5d..7b698db21 100644 --- a/sml/naive_bayes/BUILD.bazel +++ b/sml/naive_bayes/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "gnb", srcs = ["gnb.py"], ) diff --git a/sml/naive_bayes/tests/BUILD.bazel b/sml/naive_bayes/tests/BUILD.bazel index ca41e4605..8d4680fc8 100644 --- a/sml/naive_bayes/tests/BUILD.bazel +++ b/sml/naive_bayes/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/naive_bayes:gnb", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/neighbors/BUILD.bazel b/sml/neighbors/BUILD.bazel index ff98f7eb8..beb0cc908 100644 --- a/sml/neighbors/BUILD.bazel +++ b/sml/neighbors/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "knn", srcs = ["knn.py"], ) diff --git a/sml/neighbors/tests/BUILD.bazel b/sml/neighbors/tests/BUILD.bazel index 5c7052793..c294ab560 100644 --- a/sml/neighbors/tests/BUILD.bazel +++ b/sml/neighbors/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/neighbors:knn", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/preprocessing/BUILD.bazel b/sml/preprocessing/BUILD.bazel index 958236c84..ddc2ad7ca 100644 --- a/sml/preprocessing/BUILD.bazel +++ b/sml/preprocessing/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "preprocessing", srcs = ["preprocessing.py"], deps = [ diff --git a/sml/preprocessing/tests/BUILD.bazel b/sml/preprocessing/tests/BUILD.bazel index 994ed985d..ffb24ef30 100644 --- a/sml/preprocessing/tests/BUILD.bazel +++ b/sml/preprocessing/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/preprocessing", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/svm/BUILD.bazel b/sml/svm/BUILD.bazel index fccd63ff4..514cf7854 100644 --- a/sml/svm/BUILD.bazel +++ b/sml/svm/BUILD.bazel @@ -12,16 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "smo", srcs = ["smo.py"], ) -py_library( +spu_py_library( name = "svm", srcs = ["svm.py"], deps = [ diff --git a/sml/svm/tests/BUILD.bazel b/sml/svm/tests/BUILD.bazel index 91f54ba01..011da2277 100644 --- a/sml/svm/tests/BUILD.bazel +++ b/sml/svm/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/svm", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/tree/BUILD.bazel b/sml/tree/BUILD.bazel index 439ac5ea0..db4487c74 100644 --- a/sml/tree/BUILD.bazel +++ b/sml/tree/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "tree", srcs = ["tree.py"], ) diff --git a/sml/tree/tests/BUILD.bazel b/sml/tree/tests/BUILD.bazel index 2b60bdb16..e96e9140a 100644 --- a/sml/tree/tests/BUILD.bazel +++ b/sml/tree/tests/BUILD.bazel @@ -23,5 +23,6 @@ py_test( "//sml/tree", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//scikit_learn:pkg", ], ) diff --git a/sml/utils/BUILD.bazel b/sml/utils/BUILD.bazel index 26866239c..27bb8f7fa 100644 --- a/sml/utils/BUILD.bazel +++ b/sml/utils/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "emulation", srcs = [ "emulation.py", @@ -31,12 +31,12 @@ py_library( ], ) -py_library( +spu_py_library( name = "fxp_approx", srcs = ["fxp_approx.py"], ) -py_library( +spu_py_library( name = "extmath", srcs = ["extmath.py"], ) diff --git a/spu/BUILD.bazel b/spu/BUILD.bazel index 839f5fad4..2fbe08df3 100644 --- a/spu/BUILD.bazel +++ b/spu/BUILD.bazel @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@protobuf//bazel:py_proto_library.bzl", "py_proto_library") load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") -load("@rules_proto_grpc//python:defs.bzl", "python_proto_compile") -load("@rules_python//python:defs.bzl", "py_library") load("@rules_python//python:packaging.bzl", "py_package") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) @@ -66,13 +66,13 @@ pybind_extension( deps = [ ":exported_symbols.lds", ":version_script.lds", - "@psi//psi:launch", + "@psi//psi/apps/psi_launcher:launch", "@psi//psi/legacy:memory_psi", "@yacl//yacl/link", ], ) -py_library( +spu_py_library( name = "api", srcs = [ "api.py", @@ -84,22 +84,19 @@ py_library( ], ) -python_proto_compile( +py_proto_library( name = "psi_py_proto", - output_mode = "NO_PREFIX_FLAT", - protos = ["@psi//psi/proto:psi_proto"], + deps = ["@psi//psi/proto:psi_proto"], ) -python_proto_compile( +py_proto_library( name = "link_py_proto", - output_mode = "NO_PREFIX_FLAT", - protos = ["@yacl//yacl/link:link_proto"], + deps = ["@yacl//yacl/link:link_proto"], ) -python_proto_compile( +py_proto_library( name = "psi_v2_py_proto", - output_mode = "NO_PREFIX", - protos = ["@psi//psi/proto:psi_v2_proto"], + deps = ["@psi//psi/proto:psi_v2_proto"], ) # Hack generated protobuf due to https://github.com/protocolbuffers/protobuf/issues/1491 @@ -107,16 +104,39 @@ genrule( name = "psi_v2_py_proto_fixed", srcs = [":psi_v2_py_proto"], outs = ["psi_v2_pb2.py"], - cmd = "sed 's#from yacl.link import#from . import#g;s#from psi.proto import#from . import#g' $(SRCS) > $(OUTS)", + cmd = "sed 's#from yacl.link import#from . import#g;s#from psi.proto import#from . import#g;s#psi.proto.psi_v2_pb2#spu.psi_pb2#g' $(SRCS) > $(OUTS)", ) -py_library( +genrule( + name = "psi_py_proto_fixed", + srcs = [":psi_py_proto"], + outs = ["psi_pb2.py"], + cmd = "sed 's/psi.proto.psi_pb2/spu.psi_pb2/g' $(SRCS) > $(OUTS)", +) + +genrule( + name = "pir_py_proto_fixed", + srcs = [":pir_py_proto"], + outs = ["pir_pb2.py"], + cmd = """ + sed "s/psi.proto.pir_pb2/spu.pir_pb2/g" $(SRCS) > $(OUTS) + """, +) + +genrule( + name = "link_py_proto_fixed", + srcs = [":link_py_proto"], + outs = ["link_pb2.py"], + cmd = "sed 's/yacl.link.link_pb2/link.pir_pb2/g' $(SRCS) > $(OUTS)", +) + +spu_py_library( name = "psi", srcs = [ "psi.py", - ":link_py_proto", - ":pir_py_proto", - ":psi_py_proto", + ":link_py_proto_fixed", + ":pir_py_proto_fixed", + ":psi_py_proto_fixed", ":psi_v2_py_proto_fixed", ], data = [ @@ -125,13 +145,12 @@ py_library( ], ) -python_proto_compile( +py_proto_library( name = "pir_py_proto", - output_mode = "NO_PREFIX_FLAT", - protos = ["@psi//psi/proto:pir_proto"], + deps = ["@psi//psi/proto:pir_proto"], ) -py_library( +spu_py_library( name = "init", srcs = [ "__init__.py", diff --git a/spu/experimental/BUILD.bazel b/spu/experimental/BUILD.bazel index 09f592d25..82dbfe193 100644 --- a/spu/experimental/BUILD.bazel +++ b/spu/experimental/BUILD.bazel @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") -py_library( +spu_py_library( name = "experimentals", srcs = [ "__init__.py", @@ -28,7 +28,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "make_cached_var", srcs = [ "make_cached_var_impl.py", @@ -38,7 +38,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "drop_cached_var", srcs = [ "drop_cached_var_impl.py", diff --git a/spu/intrinsic/BUILD.bazel b/spu/intrinsic/BUILD.bazel index 691e3fa4a..3af31bf86 100644 --- a/spu/intrinsic/BUILD.bazel +++ b/spu/intrinsic/BUILD.bazel @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") -py_library( +spu_py_library( name = "all_intrinsics", srcs = [ "__init__.py", @@ -29,7 +29,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "example", srcs = [ "example_impl.py", @@ -39,7 +39,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "example_binary", srcs = [ "example_binary_impl.py", diff --git a/spu/intrinsic/add_new_intrinsic.py b/spu/intrinsic/add_new_intrinsic.py index 5c77092c2..0aad6bd8d 100755 --- a/spu/intrinsic/add_new_intrinsic.py +++ b/spu/intrinsic/add_new_intrinsic.py @@ -98,7 +98,7 @@ def adapt_build(module_path, check_name): content = ( content + f""" -py_library( +spu_py_library( name = "{check_name}", srcs = [ "{check_name}_impl.py", diff --git a/spu/libpsi.cc b/spu/libpsi.cc index 0beb06eb9..9c7ca4d1d 100644 --- a/spu/libpsi.cc +++ b/spu/libpsi.cc @@ -18,7 +18,7 @@ #include "yacl/base/exception.h" #include "yacl/link/context.h" -#include "psi/launch.h" +#include "psi/apps/psi_launcher/launch.h" #include "psi/legacy/memory_psi.h" #include "psi/utils/progress.h" diff --git a/spu/ops/groupby/BUILD.bazel b/spu/ops/groupby/BUILD.bazel index 2b6747a04..b38f10a40 100644 --- a/spu/ops/groupby/BUILD.bazel +++ b/spu/ops/groupby/BUILD.bazel @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_library", "py_test") +load("//bazel:spu.bzl", "spu_py_library", "spu_py_test") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "groupby", srcs = [ "__init__.py", @@ -31,7 +31,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "segmentation", srcs = [ "segmentation.py", @@ -39,7 +39,7 @@ py_library( deps = [":utils"], ) -py_library( +spu_py_library( name = "aggregation", srcs = [ "aggregation.py", @@ -47,14 +47,14 @@ py_library( deps = [":utils"], ) -py_library( +spu_py_library( name = "utils", srcs = [ "utils.py", ], ) -py_library( +spu_py_library( name = "groupby_via_shuffle", srcs = [ "groupby_via_shuffle.py", @@ -65,7 +65,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "shuffle", srcs = [ "shuffle.py", @@ -73,14 +73,14 @@ py_library( deps = [":utils"], ) -py_library( +spu_py_library( name = "postprocess", srcs = [ "postprocess.py", ], ) -py_test( +spu_py_test( name = "groupby_test", srcs = ["groupby_test.py"], deps = [ @@ -90,5 +90,6 @@ py_test( ":segmentation", "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//pandas:pkg", ], ) diff --git a/spu/tests/BUILD.bazel b/spu/tests/BUILD.bazel index d9e348cbb..3c0289316 100644 --- a/spu/tests/BUILD.bazel +++ b/spu/tests/BUILD.bazel @@ -12,20 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") +load("//bazel:spu.bzl", "spu_py_binary", "spu_py_library", "spu_py_test") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "jnp_testbase", srcs = ["jnp_testbase.py"], deps = [ "//spu:api", + "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//absl_py:pkg", ], ) -py_library( +spu_py_library( name = "utils", srcs = ["utils.py"], deps = [ @@ -33,7 +35,7 @@ py_library( ], ) -py_binary( +spu_py_binary( name = "np_op_status", srcs = ["np_op_status.py"], deps = [ @@ -41,7 +43,7 @@ py_binary( ], ) -py_test( +spu_py_test( name = "jnp_aby3_r128_test", timeout = "long", srcs = ["jnp_aby3_r128_test.py"], @@ -50,7 +52,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_aby3_r128_test_x64", timeout = "long", srcs = ["jnp_aby3_r128_test.py"], @@ -63,7 +65,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_aby3_r64_test", timeout = "long", srcs = ["jnp_aby3_r64_test.py"], @@ -72,7 +74,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_aby3_r64_test_x64", timeout = "long", srcs = ["jnp_aby3_r64_test.py"], @@ -85,7 +87,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_cheetah_r64_test", size = "enormous", srcs = ["jnp_cheetah_r64_test.py"], @@ -94,7 +96,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_cheetah_r64_test_x64", size = "enormous", srcs = ["jnp_cheetah_r64_test.py"], @@ -107,7 +109,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_semi2k_r128_test", timeout = "long", srcs = ["jnp_semi2k_r128_test.py"], @@ -116,7 +118,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_semi2k_r128_test_x64", timeout = "long", srcs = ["jnp_semi2k_r128_test.py"], @@ -129,7 +131,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_semi2k_r64_test", timeout = "long", srcs = ["jnp_semi2k_r64_test.py"], @@ -138,7 +140,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_semi2k_r64_test_x64", timeout = "long", srcs = ["jnp_semi2k_r64_test.py"], @@ -151,7 +153,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_ref2k_r64_test", timeout = "long", srcs = ["jnp_ref2k_r64_test.py"], @@ -160,7 +162,7 @@ py_test( ], ) -py_test( +spu_py_test( name = "jnp_ref2k_r64_test_x64", timeout = "long", srcs = ["jnp_ref2k_r64_test.py"], @@ -173,7 +175,7 @@ py_test( ], ) -py_binary( +spu_py_binary( name = "jnp_debug", srcs = ["jnp_debug.py"], deps = [ @@ -182,52 +184,58 @@ py_binary( ], ) -py_test( +spu_py_test( name = "spu_compiler_test", srcs = ["spu_compiler_test.py"], deps = [ "//spu:api", + "//spu:init", "//spu/utils:frontend", "//spu/utils:simulation", ], ) -py_test( +spu_py_test( name = "jax_sanity_test", srcs = ["jax_sanity_test.py"], deps = [ ":jnp_testbase", + "@spu_pip_dev//scikit_learn:pkg", ], ) -py_test( +spu_py_test( name = "spu_runtime_test", srcs = ["spu_runtime_test.py"], deps = [ "//spu:api", + "//spu:init", "//spu/utils:simulation", ], ) -py_test( +spu_py_test( name = "spu_io_test", srcs = ["spu_io_test.py"], deps = [ "//spu:api", + "//spu:init", "//spu/utils:simulation", + "@spu_pip_dev//absl_py:pkg", ], ) -py_test( +spu_py_test( name = "link_test", srcs = ["link_test.py"], deps = [ ":utils", "//spu:api", + "//spu:init", ], ) -py_test( +spu_py_test( name = "legacy_psi_test", srcs = ["legacy_psi_test.py"], data = [ @@ -239,12 +247,13 @@ py_test( ], deps = [ ":utils", + "//spu:init", "//spu:psi", "//spu/utils:simulation", ], ) -py_test( +spu_py_test( name = "psi_test", srcs = ["psi_test.py"], data = [ @@ -256,11 +265,12 @@ py_test( ], deps = [ ":utils", + "//spu:init", "//spu:psi", ], ) -py_test( +spu_py_test( name = "ub_psi_test", srcs = ["ub_psi_test.py"], data = [ @@ -272,11 +282,12 @@ py_test( ], deps = [ ":utils", + "//spu:init", "//spu:psi", ], ) -py_test( +spu_py_test( name = "pir_test", srcs = ["pir_test.py"], data = [ @@ -288,19 +299,29 @@ py_test( ], deps = [ ":utils", + "//spu:init", "//spu:psi", + "@spu_pip_dev//pandas:pkg", ], ) -py_test( +spu_py_test( name = "frontend_test", srcs = ["frontend_test.py"], deps = [ + "//spu:init", "//spu/utils:frontend", - ], + ] + select({ + "@bazel_tools//src/conditions:linux_x86_64": [ + "@spu_pip_dev//tensorflow_cpu:pkg", + ], + "//conditions:default": [ + "@spu_pip_dev//tensorflow:pkg", + ], + }), ) -py_test( +spu_py_test( name = "distributed_test", timeout = "short", srcs = ["distributed_test.py"], @@ -310,14 +331,24 @@ py_test( ], deps = [ ":utils", + "//spu:init", "//spu/utils:distributed", - ], + "@spu_pip_dev//grpcio:pkg", + ] + select({ + "@bazel_tools//src/conditions:linux_x86_64": [ + "@spu_pip_dev//tensorflow_cpu:pkg", + ], + "//conditions:default": [ + "@spu_pip_dev//tensorflow:pkg", + ], + }), ) -py_test( +spu_py_test( name = "jax_compile_test", srcs = ["jax_compile_test.py"], deps = [ ":jnp_testbase", + "@spu_pip_dev//flax:pkg", ], ) diff --git a/spu/tests/legacy_psi_test.py b/spu/tests/legacy_psi_test.py index 477e964c7..e286bf2c7 100644 --- a/spu/tests/legacy_psi_test.py +++ b/spu/tests/legacy_psi_test.py @@ -109,20 +109,6 @@ def prep_data(self): return data, expected - def test_reveal(self): - data, expected = self.prep_data() - expected.sort() - - def fn(lctx): - config = psi.MemoryPsiConfig( - psi_type=psi.PsiType.ECDH_PSI_2PC, broadcast_result=True - ) - joint = psi.mem_psi(lctx, config, data[lctx.rank]) - joint.sort() - return self.assertEqual(joint, expected) - - self.run_psi(fn) - def test_reveal_to(self): data, expected = self.prep_data() expected.sort() @@ -161,28 +147,6 @@ def test_ecdh_3pc(self): 3, inputs, outputs, selected_fields, psi.PsiType.ECDH_PSI_3PC ) - def test_kkrt_2pc(self): - print("----------test_kkrt_2pc-------------") - - inputs = ["spu/tests/data/alice.csv", "spu/tests/data/bob.csv"] - outputs = ["./alice-kkrt.csv", "./bob-kkrt.csv"] - selected_fields = ["id", "idx"] - - self.run_streaming_psi( - 2, inputs, outputs, selected_fields, psi.PsiType.KKRT_PSI_2PC - ) - - def test_ecdh_2pc(self): - print("----------test_ecdh_2pc-------------") - - inputs = ["spu/tests/data/alice.csv", "spu/tests/data/bob.csv"] - outputs = ["./alice-ecdh.csv", "./bob-ecdh.csv"] - selected_fields = ["id", "idx"] - - self.run_streaming_psi( - 2, inputs, outputs, selected_fields, psi.PsiType.ECDH_PSI_2PC - ) - def test_dppsi_2pc(self): print("----------test_dppsi_2pc-------------") diff --git a/spu/utils/BUILD.bazel b/spu/utils/BUILD.bazel index 71f9e5448..f537897ab 100644 --- a/spu/utils/BUILD.bazel +++ b/spu/utils/BUILD.bazel @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@grpc//bazel:python_rules.bzl", "py_grpc_library", "py_proto_library") load("@rules_proto//proto:defs.bzl", "proto_library") -load("@rules_proto_grpc//python:defs.bzl", "python_grpc_compile") -load("@rules_python//python:defs.bzl", "py_library") +load("//bazel:spu.bzl", "spu_py_library") package(default_visibility = ["//visibility:public"]) -py_library( +spu_py_library( name = "simulation", srcs = ["simulation.py"], deps = [ @@ -32,14 +32,19 @@ proto_library( srcs = ["distributed.proto"], ) -python_grpc_compile( +py_proto_library( + name = "distributed_py_proto", + deps = [":distributed_proto"], +) + +py_grpc_library( name = "distributed_py_proto_grpc", - output_mode = "NO_PREFIX", - prefix_path = "../..", - protos = ["distributed_proto"], + srcs = [":distributed_proto"], + strip_prefixes = ["../.."], + deps = [":distributed_py_proto"], ) -py_library( +spu_py_library( name = "distributed_impl", srcs = [ "distributed_impl.py", @@ -52,7 +57,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "distributed", srcs = [ "distributed.py", @@ -63,7 +68,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "frontend", srcs = ["frontend.py"], deps = [ @@ -71,7 +76,7 @@ py_library( ], ) -py_library( +spu_py_library( name = "polyfill", srcs = ["polyfill.py"], ) diff --git a/version.bzl b/version.bzl new file mode 100644 index 000000000..04b358354 --- /dev/null +++ b/version.bzl @@ -0,0 +1,38 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SPU_VERSION = "0.9.4.dev20250103" + +def _spu_version_gen(ctx): + ctx.actions.expand_template( + template = ctx.file.template, + output = ctx.outputs.out, + substitutions = { + "@SPU_VERSION@": SPU_VERSION, + }, + ) + +spu_version_gen = rule( + implementation = _spu_version_gen, + attrs = { + "template": attr.label( + mandatory = True, + allow_single_file = True, + ), + "out": attr.output( + mandatory = True, + ), + }, + output_to_genfiles = True, +)