diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml deleted file mode 100644 index 7aed5e1a..00000000 --- a/.github/workflows/cifuzz.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: CIFuzz -on: [pull_request] -jobs: - Fuzzing: - runs-on: ubuntu-latest - steps: - - name: Build Fuzzers - id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master - with: - oss-fuzz-project-name: 'sentencepiece' - dry-run: false - language: c++ - - name: Run Fuzzers - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master - with: - oss-fuzz-project-name: 'sentencepiece' - fuzz-seconds: 300 - dry-run: false - language: c++ - - name: Upload Crash - uses: actions/upload-artifact@v3 - if: failure() && steps.build.outcome == 'success' - with: - name: artifacts - path: ./out/artifacts diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml deleted file mode 100644 index eabb2925..00000000 --- a/.github/workflows/cmake.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: CI for general build - -on: - push: - branches: [ master ] - tags: - - 'v*' - pull_request: - branches: [ master ] - -jobs: - build: - strategy: - matrix: - os: [ ubuntu-latest, ubuntu-20.04, windows-latest, macOS-11 ] - arch: [ x64 ] - include: - - os: windows-latest - arch: x86 - runs-on: ${{ matrix.os }} - - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.x' - architecture: ${{matrix.arch}} - - - name: Config for Windows - if: runner.os == 'Windows' - run: | - if ("${{matrix.arch}}" -eq "x64") { - $msbuildPlatform = "x64" - } else { - $msbuildPlatform = "Win32" - } - cmake -A $msbuildPlatform -B ${{github.workspace}}/build -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root - - - name: Config for Unix - if: runner.os != 'Windows' - run: cmake -B ${{github.workspace}}/build -DSPM_BUILD_TEST=ON -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root - - - name: Build - run: cmake --build ${{github.workspace}}/build --config Release --target install --parallel 8 - - - name: Test - working-directory: ${{github.workspace}}/build - run: ctest -C Release --output-on-failure - - - name: Package - working-directory: ${{github.workspace}}/build - run: cpack - - - name: Build Python wrapper - working-directory: ${{github.workspace}}/python - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - python setup.py test - python setup.py bdist_wheel - - - name: Upload artifcacts - uses: actions/upload-artifact@v3 - with: - path: ./build/*.7z - - - name: Upload Release Assets - if: startsWith(github.ref, 'refs/tags/') - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: ./build/*.7z - tag: ${{ github.ref }} - overwrite: true - prerelease: true - file_glob: true - body: "This is my release text" diff --git a/.github/workflows/cross_build.yml b/.github/workflows/cross_build.yml deleted file mode 100644 index 5fc6d3a3..00000000 --- a/.github/workflows/cross_build.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: CrossBuild - -on: - push: - branches: [ master ] - tags: - - 'v*' - pull_request: - branches: [ master ] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - arch: [ i686, arm, aarch64, riscv64, powerpc, powerpc64, powerpc64le, s390x, sparc64, m68k, sh4, alpha ] - - steps: - - uses: actions/checkout@v3 - - - name: Install cross tools - run: | - sudo apt-get install -y sudo qemu-user gdb zstd dwarfdump {gcc,g++}-10-{i686,aarch64,riscv64,powerpc,powerpc64,powerpc64le,s390x,sparc64,m68k,sh4,alpha}-linux-gnu {gcc,g++}-10-arm-linux-gnueabihf - sudo ln -sf /usr/bin/arm-linux-gnueabihf-gcc-10 /usr/bin/arm-linux-gnu-gcc-10 - sudo ln -sf /usr/bin/arm-linux-gnueabihf-g++-10 /usr/bin/arm-linux-gnu-g++-10 - sudo ln -sf /usr/arm-linux-gnueabihf /usr/arm-linux-gnu - - - name: Build - run: | - mkdir -p ${{github.workspace}}/build - cd ${{github.workspace}}/build - env CXX=/usr/bin/${{matrix.arch}}-linux-gnu-g++-10 CC=/usr/bin/${{matrix.arch}}-linux-gnu-gcc-10 cmake .. -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_FIND_ROOT_PATH=/usr/${{matrix.arch}}-linux-gnu -DSPM_CROSS_SYSTEM_PROCESSOR=${{matrix.arch}} - make -j$(nproc) - - - name: Test on QEMU - if: matrix.arch != 'sparc64' && matrix.arch != 'm68k' && matrix.arch != 'sh4' - run: | - cd ${{github.workspace}}/build - qemu_arch=`echo ${{matrix.arch}} | sed -e s/powerpc/ppc/ -e s/686/386/` - qemu-${qemu_arch} -L /usr/${{matrix.arch}}-linux-gnu src/spm_test diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 48d584d3..ebf1f7a6 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -1,44 +1,50 @@ -name: Build Wheels +name: Build and Publish Wheels on: - push: - branches: [ master ] - tags: - - 'v*' - pull_request: - branches: [ master ] + workflow_dispatch: + +env: + DOMAIN: poolside + REPOSITORY: poolside-dagster + WHEEL_DST: /tmp/sp_wheelhouse jobs: build_wheels: - outputs: - digests-linux: ${{ steps.hash-linux.outputs.digests }} - digests-macos: ${{ steps.hash-macos.outputs.digests }} - digests-windows: ${{ steps.hash-windows.outputs.digests }} strategy: matrix: - os: [ubuntu-latest, windows-latest, macOS-11] + os: [ubuntu-latest, macOS-latest] runs-on: ${{ matrix.os }} name: Build wheels on ${{ matrix.os }} + permissions: + id-token: write # This is required for requesting the JWT + contents: read # This is required for actions/checkout + steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 with: platforms: arm64 - - name: Build for Windows - if: runner.os == 'Windows' + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::939990436136:role/gh-action-publish-artifacts-role + aws-region: us-east-1 + + - name: Make sure destination dir exists run: | - cmake -A Win32 -B ${{github.workspace}}/build_win32 -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root_win32 - cmake --build ${{github.workspace}}/build_win32 --config Release --target install --parallel 8 - cmake -A x64 -B ${{github.workspace}}/build_amd64 -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root_amd64 - cmake --build ${{github.workspace}}/build_amd64 --config Release --target install --parallel 8 + mkdir -p ${{env.WHEEL_DST}} + if [ ! -d ${{env.WHEEL_DST}} ]; then + echo "wheel dest dir does not exist!" + exit 1 + fi - name: Build for Mac if: runner.os == 'macOS' @@ -47,21 +53,23 @@ jobs: cmake --build ${{github.workspace}}/build --config Release --target install --parallel 8 env: CMAKE_OSX_ARCHITECTURES: arm64;x86_64 + MACOSX_DEPLOYMENT_TARGET: 10.13 - - name: Install cibuildwheel + - name: Install dependencies working-directory: ${{github.workspace}}/python run: | python -m pip install --upgrade pip - pip install setuptools wheel twine - python -m pip install cibuildwheel==2.12.0 + pip install setuptools wheel twine build cibuildwheel - name: Build wheels working-directory: ${{github.workspace}}/python - run: python -m cibuildwheel --output-dir wheelhouse + run: python -m cibuildwheel --output-dir ${{env.WHEEL_DST}} env: + CIBW_BUILD: "cp311-* cp312-*" CIBW_ARCHS_LINUX: auto aarch64 CIBW_ARCHS_MACOS: x86_64 universal2 arm64 - CIBW_SKIP: "pp* *-musllinux_*" + CIBW_ARCHS_WINDOWS: auto ARM64 + CIBW_SKIP: "pp* *-musllinux_* *i686*" CIBW_BUILD_VERBOSITY: 1 - name: Build sdist archive @@ -69,79 +77,15 @@ jobs: run: sh build_sdist.sh - name: Fetch sdist archive - uses: tj-actions/glob@v17 + uses: tj-actions/glob@2944188f585a0ec102a6a82d9eeb3aed69785393 # v22.0.1 id: sdist with: files: ./python/dist/*.tar.gz - + - name: Build wheel from sdist - run: python -m pip wheel "${{ steps.sdist.outputs.paths }}" --verbose - - - name: Copy sdist - working-directory: ${{github.workspace}}/python - if: runner.os == 'macOS' - run: cp -f dist/*.tar.gz wheelhouse/ - - - name: Upload artifact - uses: actions/upload-artifact@v3 - with: - path: | - ./python/wheelhouse/*.whl - ./python/wheelhouse/*.tar.gz - - - name: Upload wheel release - if: startsWith(github.ref, 'refs/tags/') - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: ./python/wheelhouse/* - tag: ${{ github.ref }} - overwrite: true - prerelease: true - file_glob: true - - - name: Generate SLSA subjects - Macos - id: hash-macos - if: runner.os == 'macOS' - run: echo "digests=$(shasum -a 256 ./python/wheelhouse/* | base64)" >> $GITHUB_OUTPUT - - - name: Generate SLSA subjects - Linux - id: hash-linux - if: runner.os == 'Linux' - run: echo "digests=$(sha256sum ./python/wheelhouse/* | base64 -w0)" >> $GITHUB_OUTPUT - - - name: Generate SLSA subjects - Windows - id: hash-windows - if: runner.os == 'Windows' - run: echo "digests=$(sha256sum ./python/wheelhouse/* | base64 -w0)" >> $GITHUB_OUTPUT - - gather-disgests: - needs: [build_wheels] - outputs: - digests: ${{ steps.hash.outputs.digests }} - runs-on: ubuntu-latest - steps: - - name: Merge results - id: hash - env: - LINUX_DIGESTS: "${{ needs.build_wheels.outputs.digests-linux }}" - MACOS_DIGESTS: "${{ needs.build_wheels.outputs.digests-macos }}" - WINDOWS_DIGESTS: "${{ needs.build_wheels.outputs.digests-windows }}" + run: python -m pip wheel "${{ steps.sdist.outputs.paths }}" --wheel-dir ${{env.WHEEL_DST}} --verbose + + - name: Publish run: | - set -euo pipefail - echo "$LINUX_DIGESTS" | base64 -d > checksums.txt - echo "$MACOS_DIGESTS" | base64 -d >> checksums.txt - echo "$WINDOWS_DIGESTS" | base64 -d >> checksums.txt - echo "digests=$(cat checksums.txt | base64 -w0)" >> $GITHUB_OUTPUT - - provenance: - if: startsWith(github.ref, 'refs/tags/') - needs: [build_wheels, gather-disgests] - permissions: - actions: read # To read the workflow path. - id-token: write # To sign the provenance. - contents: write # To add assets to a release. - uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v1.2.0 - with: - base64-subjects: "${{ needs.gather-disgests.outputs.digests }}" - upload-assets: true # Optional: Upload to a new release + aws codeartifact login --tool twine --domain ${{env.DOMAIN}} --repository ${{ env.REPOSITORY }} + twine upload --verbose --repository codeartifact ${{env.WHEEL_DST}}/* diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a66696d..6445aa1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,6 +94,7 @@ if (APPLE) set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + set(CMAKE_OSX_DEPLOYMENT_TARGET 14.0) list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) if ("${isSystemDir}" STREQUAL "-1") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") diff --git a/python/build_bundled.sh b/python/build_bundled.sh index c870ec6a..052a3d87 100755 --- a/python/build_bundled.sh +++ b/python/build_bundled.sh @@ -13,10 +13,10 @@ elif [ -f ../src/CMakeLists.txt ]; then SRC_DIR=.. else # Try taged version. Othewise, use head. - git clone https://github.com/google/sentencepiece.git -b v"${VERSION}" --depth 1 || \ - git clone https://github.com/google/sentencepiece.git --depth 1 + git clone https://github.com/poolsideai/sentencepiece.git -b v"${VERSION}" --depth 1 || \ + git clone https://github.com/poolsideai/sentencepiece.git --depth 1 SRC_DIR=./sentencepiece fi -cmake ${SRC_DIR} -B ${BUILD_DIR} -DCMAKE_BUILD_TYPE=RelWithDebInfo -DSPM_ENABLE_SHARED=OFF -DSPM_ENABLE_TCMALLOC=OFF -DSPM_ONLY_LIB=on -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -cmake --build ${BUILD_DIR} --config RelWithDebInfo --target install --parallel $(nproc) +cmake ${SRC_DIR} -B ${BUILD_DIR} -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} +cmake --build ${BUILD_DIR} --config Release --target install --parallel $(nproc) diff --git a/python/setup.py b/python/setup.py index 1d11cd11..6885ca94 100755 --- a/python/setup.py +++ b/python/setup.py @@ -22,6 +22,7 @@ import subprocess import sys import os +import platform sys.path.append(os.path.join('.', 'test')) @@ -92,9 +93,10 @@ def build_extension(self, ext): if sys.platform == 'darwin': cflags.append('-mmacosx-version-min=10.9') else: - pass - # cflags.append('-Wl,-strip-all') - # libs.append('-Wl,-strip-all') + cflags.append('-Wl,-strip-all') + libs.append('-Wl,-strip-all') + if sys.platform == 'linux': + libs.append('-Wl,-Bsymbolic') print('## cflags={}'.format(' '.join(cflags))) print('## libs={}'.format(' '.join(libs))) ext.extra_compile_args = cflags @@ -197,6 +199,5 @@ def build_extension(self, ext): 'Programming Language :: Python', 'Topic :: Text Processing :: Linguistic', 'Topic :: Software Development :: Libraries :: Python Modules', - ], - test_suite='sentencepiece_test.suite', + ] )