diff --git a/.github/workflows/GithubActionTests.yml b/.github/workflows/GithubActionTests.yml index bb319d1a28d..7b8e74a1d45 100644 --- a/.github/workflows/GithubActionTests.yml +++ b/.github/workflows/GithubActionTests.yml @@ -33,15 +33,12 @@ jobs: conda activate bioconda python setup.py install - - name: Build docker container - run: | - docker build -t quay.io/bioconda/bioconda-utils-build-env-cos7:latest ./ - docker history quay.io/bioconda/bioconda-utils-build-env-cos7:latest - docker run --rm -t quay.io/bioconda/bioconda-utils-build-env-cos7:latest sh -lec 'type -t conda && conda info -a && conda list' - docker build -t quay.io/bioconda/bioconda-utils-test-env-cos7:latest -f ./Dockerfile.test ./ - - name: Run tests '${{ matrix.py_test_marker }}' run: | + export MULLED_CONDA_IMAGE="quay.io/bioconda/create-env:latest" + export DEFAULT_BASE_IMAGE="quay.io/bioconda/base-glibc-busybox-bash:latest" + export DEFAULT_EXTENDED_BASE_IMAGE="quay.io/bioconda/base-glibc-debian-bash:latest" + export DOCKER_BASE_IMAGE="quay.io/bioconda/bioconda-utils-build-env-cos7:latest" eval "$(conda shell.bash hook)" conda activate bioconda if git diff --name-only origin/master...HEAD | grep -vE ^docs; then diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml deleted file mode 100644 index 1ae6a9ec7bd..00000000000 --- a/.github/workflows/build-image.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: Build image -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -on: - pull_request: - paths-ignore: - - '.circleci/**' - - 'docs/**' - - 'test/**' - -jobs: - build: - name: Build image - runs-on: ubuntu-20.04 - strategy: - matrix: - include: - - arch: arm64 - image: bioconda-utils-build-env-cos7-aarch64 - base_image: quay.io/condaforge/linux-anvil-aarch64 - - arch: amd64 - image: bioconda-utils-build-env-cos7 - base_image: quay.io/condaforge/linux-anvil-cos7-x86_64 - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - id: get-tag - run: | - tag=${{ github.event.release && github.event.release.tag_name || github.sha }} - - # https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/ - # printf %s "::set-output name=tag::${tag#v}" - printf %s "tag=${tag#v}" >> $GITHUB_OUTPUT - - - name: Install qemu dependency - run: | - sudo apt-get update - sudo apt-get install -y qemu-user-static - - - name: Build image - id: buildah-build - uses: redhat-actions/buildah-build@v2 - with: - image: ${{ matrix.image }} - arch: ${{ matrix.arch }} - build-args: | - BASE_IMAGE=${{ matrix.base_image }} - tags: >- - latest - ${{ steps.get-tag.outputs.tag }} - dockerfiles: | - ./Dockerfile - - - name: Test built image - run: | - image='${{ steps.buildah-build.outputs.image }}' - for tag in ${{ steps.buildah-build.outputs.tags }} ; do - podman run --rm "${image}:${tag}" bioconda-utils --version - done diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml new file mode 100644 index 00000000000..7daf2f80aed --- /dev/null +++ b/.github/workflows/build-images.yml @@ -0,0 +1,382 @@ +# Build all container images. +# +# Most of the work is done in generic_build.bash, so see that file for details. + +name: Build images +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +on: + pull_request: + paths-ignore: + - '.circleci/**' + - 'docs/**' + - 'test/**' + +env: + BIOCONDA_UTILS_FOLDER: bioconda-utils + DEBIAN_VERSION: "12.2" + BUSYBOX_VERSION: "1.36.1" + BASE_TAG: "0.1.6" # "latest" will always be added during the build. + BUILD_ENV_IMAGE_NAME: tmp-build-env + CREATE_ENV_IMAGE_NAME: tmp-create-env + BASE_DEBIAN_IMAGE_NAME: tmp-debian + BASE_BUSYBOX_IMAGE_NAME: tmp-busybox + ARCHS: "amd64 arm64" + +jobs: + + build-base-debian: + # NOTE: base-debian can be a separate job since it is independent of the + # others. create-env depends on build-env, and both depend on base-busybox, + # so we can't split that out. + # + # Later steps for other containers are similar, so comments are only added to + # this first job. + name: Build base-debian + outputs: + TAG_EXISTS_base-debian: ${{ steps.base-debian.outputs.TAG_EXISTS_base-debian }} + runs-on: ubuntu-20.04 + steps: + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Required for emulating ARM + - name: Install qemu dependency + run: | + sudo apt-get update + sudo apt-get install -y qemu-user-static + + - name: Build base-debian + id: base-debian + run: | + # See generic_build.bash for expected env vars. The script will exit 64 + # if the tag exists. That's OK, and we don't want the entire Actions + # workflow to fail because of it, so we check the exit code. + IMAGE_NAME=$BASE_DEBIAN_IMAGE_NAME \ + IMAGE_DIR=images/base-glibc-debian-bash \ + TYPE="base-debian" \ + DEBIAN_VERSION=$DEBIAN_VERSION \ + ARCHS=$ARCHS \ + TAG=$BASE_TAG \ + ./generic_build.bash || [ $? == 64 ] + + # generic_build.bash will write key=val lines to the log ($TYPE.log); + # these lines are added to $GITHUB_OUTPUT so that later steps can use + # steps.id.outputs.key to get the value. See generic_build.bash for + # what it's writing to the log (and therefore which keys are available + # via the step's outputs). + cat "base-debian.log" >> $GITHUB_OUTPUT + + - name: push to ghcr + if: '${{ ! steps.base-debian.outputs.TAG_EXISTS_base-debian }}' + run: | + echo '${{ secrets.GITHUB_TOKEN }}' | podman login ghcr.io -u '${{ github.actor }}' --password-stdin + podman push localhost/${BASE_DEBIAN_IMAGE_NAME}:${BASE_TAG} ghcr.io/bioconda/${BASE_DEBIAN_IMAGE_NAME}:${BASE_TAG} + podman push localhost/${BASE_DEBIAN_IMAGE_NAME}:latest ghcr.io/bioconda/${BASE_DEBIAN_IMAGE_NAME}:latest + + build-others: + # Other containers are interdependent, we so build them sequentially. + # The steps are largely similar to base-debian above, so check there for + # comments on common parts. + name: Build base-busybox, build-env, and create-env images + outputs: + TAG_EXISTS_base-busybox: ${{ steps.base-busybox.outputs.TAG_EXISTS_base-busybox }} + TAG_EXISTS_build-env: ${{ steps.build-env.outputs.TAG_EXISTS_build-env }} + TAG_EXISTS_create-env: ${{ steps.create-env.outputs.TAG_EXISTS_create-env }} + BIOCONDA_UTILS_TAG: ${{ steps.get-tag.outputs.tag }} + + runs-on: ubuntu-20.04 + steps: + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - id: get-tag + # Get an appropriate tag to represent the version of bioconda-utils being + # used, and make it available to other steps as outputs. This will be used + # as BIOCONDA_UTILS_VERSION in later steps. + run: | + tag=${{ github.event.release && github.event.release.tag_name || github.head_ref || github.ref_name }} + printf %s "tag=${tag#v}" >> $GITHUB_OUTPUT + + - name: Install qemu dependency + run: | + sudo apt-get update + sudo apt-get install -y qemu-user-static + + - name: Build base-busybox + id: base-busybox + run: | + IMAGE_NAME=$BASE_BUSYBOX_IMAGE_NAME \ + IMAGE_DIR=images/base-glibc-busybox-bash \ + TYPE="base-busybox" \ + ARCHS=$ARCHS \ + DEBIAN_VERSION=$DEBIAN_VERSION \ + BUSYBOX_VERSION=$BUSYBOX_VERSION \ + TAG=$BASE_TAG \ + ./generic_build.bash || [ $? == 64 ] + cat "base-busybox.log" >> $GITHUB_OUTPUT + + - name: push base-busybox to ghcr + if: '${{ ! steps.base-busybox.outputs.TAG_EXISTS_base-busybox }}' + run: | + echo '${{ secrets.GITHUB_TOKEN }}' | podman login ghcr.io -u '${{ github.actor }}' --password-stdin + podman push localhost/${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG} ghcr.io/bioconda/${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG} + podman push localhost/${BASE_BUSYBOX_IMAGE_NAME}:latest ghcr.io/bioconda/${BASE_BUSYBOX_IMAGE_NAME}:latest + + - name: Build build-env + id: build-env + run: | + # The build-env Dockerfile expects bioconda-utils to be cloned; even + # though this CI is operating in the bioconda-utils repo, the code + # needs to be available in the build context, which is in the + # respective image dir. + if [ ! -e "images/bioconda-utils-build-env-cos7/bioconda-utils" ]; then + git clone https://github.com/bioconda/bioconda-utils images/bioconda-utils-build-env-cos7/bioconda-utils + else + (cd images/bioconda-utils-build-env-cos7/bioconda-utils && git fetch) + fi + + # If the busybox image was not built in this CI run (e.g. if the + # specified tags already exist on quay.io) then we'll get it from + # quay.io. Otherwise use the just-built one. + REGISTRY="localhost" + if [ ${{ steps.base-busybox.outputs.TAG_EXISTS_base-busybox }} ]; then + REGISTRY="quay.io/bioconda" + fi + + BIOCONDA_UTILS_VERSION='${{ steps.get-tag.outputs.tag }}' + + IMAGE_NAME=$BUILD_ENV_IMAGE_NAME \ + IMAGE_DIR=images/bioconda-utils-build-env-cos7 \ + ARCHS=$ARCHS \ + TYPE="build-env" \ + BIOCONDA_UTILS_VERSION=$BIOCONDA_UTILS_VERSION \ + TAG="${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" \ + BUSYBOX_IMAGE="${REGISTRY}/${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG}" \ + ./generic_build.bash || [ $? == 64 ] + cat "build-env.log" >> $GITHUB_OUTPUT + + - name: push build-env to ghcr + if: '${{ ! steps.build-env.outputs.TAG_EXISTS_build-env }}' + run: | + echo '${{ secrets.GITHUB_TOKEN }}' | podman login ghcr.io -u '${{ github.actor }}' --password-stdin + BIOCONDA_UTILS_VERSION='${{ steps.get-tag.outputs.tag }}' + podman push "localhost/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" "ghcr.io/bioconda/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" + podman push "localhost/${BUILD_ENV_IMAGE_NAME}:latest" "ghcr.io/bioconda/${BUILD_ENV_IMAGE_NAME}:latest" + + - name: Build create-env + id: create-env + run: | + # Here we extract the conda and mamba versions from the just-created + # build-env container (or, if it was not created in this CI run because + # it already exists, then pull from quay.io). This ensures that when + # creating environments, we use the exact same conda/mamba versions + # that were used when building the package. + BIOCONDA_UTILS_VERSION='${{ steps.get-tag.outputs.tag }}' + REGISTRY="localhost" + if [ ${{ steps.build-env.outputs.TAG_EXISTS_build-env }} ]; then + REGISTRY="quay.io/bioconda" + fi + CONDA_VERSION=$( + podman run -t "${REGISTRY}/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" \ + bash -c "/opt/conda/bin/conda list --export '^conda$'| sed -n 's/=[^=]*$//p'" + ) + MAMBA_VERSION=$( + podman run -t "${REGISTRY}/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" \ + bash -c "/opt/conda/bin/conda list --export '^mamba$'| sed -n 's/=[^=]*$//p'" + ) + + # Remove trailing \r with parameter expansion + export CONDA_VERSION=${CONDA_VERSION%$'\r'} + export MAMBA_VERSION=${MAMBA_VERSION%$'\r'} + + # See build-env for explanation + REGISTRY="localhost" + if [ ${{ steps.base-busybox.outputs.TAG_EXISTS_base-busybox }} ]; then + REGISTRY="quay.io/bioconda" + fi + + IMAGE_NAME=$CREATE_ENV_IMAGE_NAME \ + IMAGE_DIR=images/create-env \ + ARCHS=$ARCHS \ + TYPE="create-env" \ + BIOCONDA_UTILS_VERSION=$BIOCONDA_UTILS_VERSION \ + TAG="${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" \ + BUSYBOX_IMAGE="${REGISTRY}/${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG}" \ + ./generic_build.bash || [ $? == 64 ] + cat "create-env.log" >> $GITHUB_OUTPUT + + - name: push create-env to ghcr + if: '${{ ! steps.create-env.outputs.TAG_EXISTS_create-env }}' + run: | + echo '${{ secrets.GITHUB_TOKEN }}' | podman login ghcr.io -u '${{ github.actor }}' --password-stdin + BIOCONDA_UTILS_VERSION='${{ steps.get-tag.outputs.tag }}' + podman push "localhost/${CREATE_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" "ghcr.io/bioconda/${CREATE_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" + podman push "localhost/${CREATE_ENV_IMAGE_NAME}:latest" "ghcr.io/bioconda/${CREATE_ENV_IMAGE_NAME}:latest" + + # END OF BUILDING IMAGES + # ---------------------------------------------------------------------- + # START TESTING + + test: + name: test bioconda-utils with images + runs-on: ubuntu-20.04 + needs: [build-base-debian, build-others] + steps: + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Clone bioconda-recipes to use as part of the tests. + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: bioconda/bioconda-recipes + path: recipes + + - name: set path + run: echo "/opt/mambaforge/bin" >> $GITHUB_PATH + + - name: Install bioconda-utils + run: | + export BIOCONDA_DISABLE_BUILD_PREP=1 + wget https://raw.githubusercontent.com/bioconda/bioconda-common/master/{common,install-and-set-up-conda,configure-conda}.sh + bash install-and-set-up-conda.sh + eval "$(conda shell.bash hook)" + mamba create -n bioconda -y --file test-requirements.txt --file bioconda_utils/bioconda_utils-requirements.txt + conda activate bioconda + python setup.py install + + - name: test + run: | + + BIOCONDA_UTILS_VERSION='${{ needs.build-others.outputs.BIOCONDA_UTILS_TAG }}' + + # bioconda-utils uses docker, so log in to ghcr.io with docker. + echo '${{ secrets.GITHUB_TOKEN }}' | docker login ghcr.io -u '${{ github.actor }}' --password-stdin + + # we also want to use podman to push to quay.io, but we need the images + # locally to this runner to do so, hence also logging in with podman. + echo '${{ secrets.GITHUB_TOKEN }}' | podman login ghcr.io -u '${{ github.actor }}' --password-stdin + + # Decide, for each image, whether it was just built as part of this run + # (in which case it would have been just uploaded to ghcr.io) or + # otherwise pull from quay.io. + # + # If ghcr.io, then also pull the image with podman so it will be + # available to upload to quay.io in subsequent steps. We do this even + # for base-debian, even if it's not used for the test. + if [ ${{ ! needs.build-base-debian.outputs.TAG_EXISTS_base-debian }} ]; then + podman pull "ghcr.io/bioconda/${BASE_DEBIAN_IMAGE_NAME}:${BASE_TAG}" + podman pull "ghcr.io/bioconda/${BASE_DEBIAN_IMAGE_NAME}:latest" + fi + + if [ ${{ needs.build-others.outputs.TAG_EXISTS_base-busybox }} ]; then + DEST_BASE_IMAGE_REGISTRY='quay.io/bioconda' + else + DEST_BASE_IMAGE_REGISTRY="ghcr.io/bioconda" + podman pull "${DEST_BASE_IMAGE_REGISTRY}/${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG}" + podman pull "${DEST_BASE_IMAGE_REGISTRY}/${BASE_BUSYBOX_IMAGE_NAME}:latest" + fi + + if [ ${{ needs.build-others.outputs.TAG_EXISTS_build-env }} ]; then + BUILD_ENV_REGISTRY='quay.io/bioconda' + else + BUILD_ENV_REGISTRY="ghcr.io/bioconda" + podman pull "${BUILD_ENV_REGISTRY}/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" + podman pull "${BUILD_ENV_REGISTRY}/${BUILD_ENV_IMAGE_NAME}:latest" + fi + + if [ ${{ needs.build-others.outputs.TAG_EXISTS_create-env }} ]; then + CREATE_ENV_REGISTRY='quay.io/bioconda' + else + CREATE_ENV_REGISTRY="ghcr.io/bioconda" + podman pull "${CREATE_ENV_REGISTRY}/${CREATE_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" + podman pull "${CREATE_ENV_REGISTRY}/${CREATE_ENV_IMAGE_NAME}:latest" + fi + + cd recipes + + # Run a test build, specifying the exact images to use. + eval "$(conda shell.bash hook)" + conda activate bioconda + + # Used to tell mulled-build which image to use + export DEST_BASE_IMAGE="${DEST_BASE_IMAGE_REGISTRY}/${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG}" + + # Build a package with containers. + bioconda-utils build \ + --docker-base-image "${BUILD_ENV_REGISTRY}/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" \ + --mulled-conda-image "${CREATE_ENV_REGISTRY}/${CREATE_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" \ + --packages seqtk \ + --docker \ + --mulled-test \ + --force + + # END TESTING + # ------------------------------------------------------------------------ + # START PUSHING IMAGES + + # For these push steps, a repository must first exist on quay.io/bioconda + # AND that repository must also be configured to allow write access for the + # appropriate service account. This must be done by a user with admin + # access to quay.io/bioconda. + # + # generic_build.bash reported whether the tag exists to the log; that was + # added to GITHUB_OUTPUT, those outputs are exposed to the jobs, and + # those jobs are dependencies of this job. So now we can use those + # outputs to determine if we should upload. + # + # Note that "latest" is built by generic_build.bash as well, and we're + # including it here in the upload. + + - name: Push base-debian + id: push-base-debian + uses: redhat-actions/push-to-registry@v2 + if: ${{ ! needs.base-debian.outputs.TAG_EXISTS_base-debian }} + with: + image: ${{ env.BASE_DEBIAN_IMAGE_NAME }} + tags: latest ${{ env.BASE_TAG }} + registry: quay.io/bioconda + username: ${{ secrets.QUAY_BIOCONDA_USERNAME }} + password: ${{ secrets.QUAY_BIOCONDA_TOKEN }} + + - name: Push base-busybox + id: push-base-busybox + uses: redhat-actions/push-to-registry@v2 + if: ${{ ! needs.build-others.outputs.TAG_EXISTS_base-busybox }} + with: + image: ${{ env.BASE_BUSYBOX_IMAGE_NAME }} + tags: latest ${{ env.BASE_TAG }} + registry: quay.io/bioconda + username: ${{ secrets.QUAY_BIOCONDA_USERNAME }} + password: ${{ secrets.QUAY_BIOCONDA_TOKEN }} + + - name: Push build-env + id: push-build-env + uses: redhat-actions/push-to-registry@v2 + if: ${{ ! needs.build-others.outputs.TAG_EXISTS_build-env }} + with: + image: ${{ env.BUILD_ENV_IMAGE_NAME }} + tags: latest ${{ needs.build-others.outputs.BIOCONDA_UTILS_TAG }}-base${{ env.BASE_TAG }} + registry: quay.io/bioconda + username: ${{ secrets.QUAY_BIOCONDA_USERNAME }} + password: ${{ secrets.QUAY_BIOCONDA_TOKEN }} + + - name: Push create-env + id: push-create-env + uses: redhat-actions/push-to-registry@v2 + if: ${{ ! needs.build-others.outputs.TAG_EXISTS_create-env }} + with: + image: ${{ env.CREATE_ENV_IMAGE_NAME }} + tags: latest ${{ needs.build-others.outputs.BIOCONDA_UTILS_TAG }}-base${{ env.BASE_TAG }} + registry: quay.io/bioconda + username: ${{ secrets.QUAY_BIOCONDA_USERNAME }} + password: ${{ secrets.QUAY_BIOCONDA_TOKEN }} diff --git a/.gitignore b/.gitignore index 1b98ca9bb87..8e7c1e872d1 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ docs/source/developer/_autosummary # Mac OS Files .DS_Store +env +recipes/ diff --git a/bioconda_utils/pkg_test.py b/bioconda_utils/pkg_test.py index efeb802e67e..5e2b74f21d2 100644 --- a/bioconda_utils/pkg_test.py +++ b/bioconda_utils/pkg_test.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -MULLED_CONDA_IMAGE = "quay.io/bioconda/create-env:latest" +MULLED_CONDA_IMAGE = os.getenv("MULLED_CONDA_IMAGE", "quay.io/bioconda/create-env:latest") def get_tests(path): diff --git a/build.sh b/build.sh new file mode 100644 index 00000000000..9b5de3a1fe1 --- /dev/null +++ b/build.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +# create-env depends on base-busybox and build-env (which in turn also depends +# on base-busybox). base-debian is independent. +# +# This can be run locally for testing, and can be used as a template for CI. +# +# base-busybox base-debian +# | | +# build-env | +# \ | +# \ | +# create-env + +set -euo + +# Used for build-env. +# bioconda-utils will be cloned to this folder inside the image dir (where the +# Dockerfile is) and the version will be checked out. +export BIOCONDA_UTILS_FOLDER=bioconda-utils +export BIOCONDA_UTILS_VERSION=v2.11.1 + +export DEBIAN_VERSION="12.2" +export BUSYBOX_VERSION="1.36.1" + +# Use same tags for base-busybox and base-debian +export BASE_TAG="0.1" + +# If the repository doesn't already exist on quay.io, by default this is +# considered an error. Set to false to avoid this (e.g., when building images +# with new names, or local test ones). +export ERROR_IF_MISSING=false + +# Architectures to build for (under emulation) +export ARCHS="arm64 amd64" + +# Store as separate vars so we can use these for dependencies. +BUILD_ENV_IMAGE_NAME=tmp-build-env +CREATE_ENV_IMAGE_NAME=tmp-create-env +BASE_DEBIAN_IMAGE_NAME=tmp-debian +BASE_BUSYBOX_IMAGE_NAME=tmp-busybox + +BUILD_BUSYBOX=false # build busybox image? +BUILD_DEBIAN=true # build debian image? +BUILD_BUILD_ENV=false # build build-env image? +BUILD_CREATE_ENV=false # build create-env image? + +# # Build base-busybox------------------------------------------------------------ +if [ $BUILD_BUSYBOX == "true" ]; then + + buildah manifest rm "${BASE_BUSYBOX_IMAGE_NAME}:${BASE_TAG}" || true + buildah manifest rm "${BASE_BUSYBOX_IMAGE_NAME}:latest" || true + + IMAGE_NAME=$BASE_BUSYBOX_IMAGE_NAME \ + IMAGE_DIR=images/base-glibc-busybox-bash \ + ARCHS=$ARCHS \ + TYPE="base-busybox" \ + TAG=$BASE_TAG \ + ./generic_build.bash +fi + +# Build base-debian------------------------------------------------------------- +if [ $BUILD_DEBIAN == "true" ]; then + + buildah manifest rm "${BASE_DEBIAN_IMAGE_NAME}:${BASE_TAG}" || true + buildah manifest rm "${BASE_DEBIAN_IMAGE_NAME}:latest" || true + + IMAGE_NAME=$BASE_DEBIAN_IMAGE_NAME \ + IMAGE_DIR=images/base-glibc-debian-bash \ + ARCHS=$ARCHS \ + TYPE="base-debian" \ + TAG=$BASE_TAG \ + ./generic_build.bash +fi + +# Build build-env--------------------------------------------------------------- + +if [ $BUILD_BUILD_ENV == "true" ]; then + # Clone bioconda-utils into same directory as Dockerfile + if [ ! -e "images/bioconda-utils-build-env-cos7/bioconda-utils" ]; then + git clone https://github.com/bioconda/bioconda-utils images/bioconda-utils-build-env-cos7/bioconda-utils + else + (cd images/bioconda-utils-build-env-cos7/bioconda-utils && git fetch) + fi + + buildah manifest rm "${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" || true + buildah manifest rm "${BUILD_ENV_IMAGE_NAME}:latest" || true + + IMAGE_NAME=$BUILD_ENV_IMAGE_NAME \ + IMAGE_DIR=images/bioconda-utils-build-env-cos7 \ + ARCHS=$ARCHS \ + TYPE="build-env" \ + TAG=$BASE_TAG \ + BUSYBOX_IMAGE=localhost/$BASE_BUSYBOX_IMAGE_NAME \ + ./generic_build.bash +fi +# # Build create-env-------------------------------------------------------------- + +if [ $BUILD_CREATE_ENV == "true" ]; then + + buildah manifest rm "${CREATE_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION}-base${BASE_TAG}" || true + buildah manifest rm "${CREATE_ENV_IMAGE_NAME}:latest" || true + + # Get the exact versions of mamba and conda that were installed in build-env. + CONDA_VERSION=$( + podman run -t localhost/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION} \ + bash -c "/opt/conda/bin/conda list --export '^conda$'| sed -n 's/=[^=]*$//p'" + ) + MAMBA_VERSION=$( + podman run -t localhost/${BUILD_ENV_IMAGE_NAME}:${BIOCONDA_UTILS_VERSION} \ + bash -c "/opt/conda/bin/conda list --export '^mamba$'| sed -n 's/=[^=]*$//p'" + ) + # Remove trailing \r with parameter expansion + export CONDA_VERSION=${CONDA_VERSION%$'\r'} + export MAMBA_VERSION=${MAMBA_VERSION%$'\r'} + + IMAGE_NAME=$CREATE_ENV_IMAGE_NAME \ + IMAGE_DIR=images/create-env \ + ARCHS=$ARCHS \ + TYPE="create-env" \ + TAG=$BASE_TAG \ + BUSYBOX_IMAGE=localhost/$BASE_BUSYBOX_IMAGE_NAME \ + ./generic_build.bash +fi diff --git a/generic_build.bash b/generic_build.bash new file mode 100755 index 00000000000..5ed7b4a381d --- /dev/null +++ b/generic_build.bash @@ -0,0 +1,396 @@ +#!/bin/bash + +# This single script builds the following images depending on the value of the +# env var TYPE: +# +# - build-env: contains conda + conda-build + bioconda-utils, used for building +# package +# - create-env: contains the exact version of conda from build-env (which is +# expected to have been built beforehand). Used for creating env from +# package + depdendencies +# - base-busybox: the minimal container into which created conda envs are +# copied. This is the image uploaded to quay.io +# - base-debian: an extended version of the busybox image for special cases +# +# Built images are added to a manifest. If multiple architectures are provided, +# they will all be added to a manifest which can be subsequently uploaded to +# a registry. +# +# After images are built, they are tested. +# +# This script does NOT upload anything, that must be handled separately. + +USAGE=' +Builds various containers. + +Set env vars immediately before running. + +REQUIRED ARGS FOR ALL TYPES +=========================== + TYPE: base-busybox | base-debian | build-env | create-env + IMAGE_DIR: Location of Dockerfile. + IMAGE_NAME: Image name to upload. + ARCHS: Space-separated architectures e.g. "amd64 arm64" + TAG: image tag + +REQUIRED for base-busybox +------------------------- + DEBIAN_VERSION + BUSYBOX_VERSION + +REQUIRED for base-debian +------------------------ + DEBIAN_VERSION + +REQUIRED for build-env +---------------------- + BIOCONDA_UTILS_VERSION + BIOCONDA_UTILS_FOLDER: relative to the Dockerfile + +REQUIRED for create-env +----------------------- + BIOCONDA_UTILS_VERSION + BIOCONDA_UTILS_FOLDER: relative to the Dockerfile + CONDA_VERSION: conda version to install, typically of the form "conda=x.y.z" extracted from build-env + MAMBA_VERSION: mamba version to install, typically of the form "mamba=x.y.z" extracted from build-env + BUSYBOX_IMAGE: the image to use as a base; typically this will be the results + of building base-busybox in a previous run of this script. + +OPTIONAL args +------------- + + WARN_IF_MISSING: true | false + If true (default), will exit if there is no remote repository yet. Set to + false when testing with custom image names. + + LOG: filename + Write info here so other jobs can read from it. Defaults to $TYPE.log + + +EXAMPLE USAGE +============= + + IMAGE_NAME=base-glibc-debian-bash \ + IMAGE_DIR=../../../images/base-glibc-debian-bash \ + TYPE="base-debian" \ + TAGS="0.1.1 0.1" \ + ARCHS="arm64 amd64" \ + DEBIAN_VERSION="12.2" \ + ./generic_build.bash + +' +# ------------------------------------------------------------------------------ +# HANDLE REQUIRED ENV VARS +[ -z "$IMAGE_NAME" ] && echo -e "$USAGE error: please set IMAGE_NAME" && exit 1 +[ -z "$IMAGE_DIR" ] && echo "error: please set IMAGE_DIR, where Dockerfile is found." && exit 1 +[ -z "$TYPE" ] && echo "error: please set TYPE: [ base-debian | base-busybox | build-env | create-env ]" && exit 1 +[ -z "$ARCHS" ] && echo "error: please set ARCHS" && exit 1 +[ -z "$TAG" ] && echo "error: please set TAG" && exit 1 + +if [ "$TYPE" == "build-env" ] || [ "$TYPE" == "create-env" ]; then + [ -z "$BIOCONDA_UTILS_VERSION" ] && echo "error: please set BIOCONDA_UTILS_VERSION for build-env and create-env" && exit 1 + + if [ "$TYPE" == "build-env" ]; then + [ -z "$BIOCONDA_UTILS_FOLDER" ] && echo "error: please set BIOCONDA_UTILS_FOLDER for build-env" && exit 1 + [ -z "$BUSYBOX_IMAGE" ] && echo "error: please set BUSYBOX_IMAGE for create-env" && exit 1 + fi + + if [ "$TYPE" == "create-env" ]; then + [ -z "$BUSYBOX_IMAGE" ] && echo "error: please set BUSYBOX_IMAGE for create-env" && exit 1 + [ -z "$CONDA_VERSION" ] && echo "error: please set CONDA_VERSION for create-env" && exit 1 + [ -z "$MAMBA_VERSION" ] && echo "error: please set MAMBA_VERSION for create-env" && exit 1 + fi +fi + +if [ "$TYPE" == "base-debian" ] || [ "$TYPE" == "base-busybox" ]; then + [ -z "${DEBIAN_VERSION}" ] && echo "error: please set DEBIAN VERSION" && exit 1 +fi + +if [ "$TYPE" == "base-busybox" ]; then + [ -z "$BUSYBOX_VERSION" ] && echo "error: please set BUSYBOX_VERSION" && exit 1 +fi + +LOG=${LOG:="${TYPE}.log"} +touch $LOG + +# Also add "latest" tag. +TAGS="$TAG latest" + +# ------------------------------------------------------------------------------ + + +# ------------------------------------------------------------------------------ +# CHECK FOR EXISTING TAGS. This is because quay.io does not support immutable +# images and we don't want to clobber existing. `latest` will likely always be +# present though, so don't consider that existing. If you know that the +# repository doesn't exist (e.g., you're testing using different names) then +# set ERROR_IF_MISSING=false. +response="$(curl -sL "https://quay.io/api/v1/repository/bioconda/${IMAGE_NAME}/tag/")" + +# Images can be set to expire; the jq query selects only non-expired images. +existing_tags="$( + printf %s "${response}" \ + | jq -r '.tags[]|select(.end_ts == null or .end_ts >= now)|.name' + )" \ + || { + if [ ${ERROR_IF_MISSING:-true} == "true" ]; then + printf %s\\n \ + 'Could not get list of image tags.' \ + 'Does the repository exist on Quay.io?' \ + 'Quay.io REST API response was:' \ + "${response}" + exit 1 + fi + } +for tag in $TAGS ; do + case "${tag}" in + "latest" ) ;; + * ) + if printf %s "${existing_tags}" | grep -qxF "${tag}" ; then + printf 'Tag %s already exists for %s on quay.io! Logging, and exiting with code 64\n' "${tag}" "${IMAGE_NAME}" >&2 + echo "TAG_EXISTS_${TYPE}=true" >> $LOG + exit 64 + fi + esac +done + +echo "TAG_EXISTS_${TYPE}=false" + +#------------------------------------------------------------------------------- +# SETUP + +set -xeu + +# Dockerfile lives here +cd $IMAGE_DIR + +# One manifest per tag; multiple archs will go in the same manifest. +for tag in ${TAGS} ; do + buildah manifest create "${IMAGE_NAME}:${tag}" +done + +# Read space-separated archs input string into an array +read -r -a archs_and_images <<<"$ARCHS" + +# ------------------------------------------------------------------------------ +# BUILD_ARGS: Incrementally compose build args array, depending on which inputs +# were provided. This will eventually be provided to buildah bud. +# +BUILD_ARGS=() +if [ "$TYPE" == "base-debian" ]; then + BUILD_ARGS+=("--build-arg=debian_version=$DEBIAN_VERSION") # version of debian to use as base +fi + +if [ "$TYPE" == "create-env" ]; then + BUILD_ARGS+=("--build-arg=BUSYBOX_IMAGE=$BUSYBOX_IMAGE") # which image to use as base + BUILD_ARGS+=("--build-arg=CONDA_VERSION=$CONDA_VERSION") # conda version to install + BUILD_ARGS+=("--build-arg=MAMBA_VERSION=$MAMBA_VERSION") # mamba version to install +fi + +if [ "$TYPE" == "build-env" ]; then + BUILD_ARGS+=("--build-arg=BUSYBOX_IMAGE=$BUSYBOX_IMAGE") # which image to use as base + BUILD_ARGS+=("--build-arg=BIOCONDA_UTILS_FOLDER=$BIOCONDA_UTILS_FOLDER") # git clone, relative to Dockerfile + BUILD_ARGS+=("--build-arg=bioconda_utils_version=$BIOCONDA_UTILS_VERSION") # specify version to checkout and install, also used as part of tag +fi + +if [ "$TYPE" == "base-busybox" ]; then + BUILD_ARGS+=("--build-arg=debian_version=$DEBIAN_VERSION") # version of debian to use as base for building busybox + BUILD_ARGS+=("--build-arg=busybox_version=$BUSYBOX_VERSION") # busybox version to build and use + + # Make a busybox image that we'll use further below. As shown in the + # Dockerfile.busybox, this uses the build-busybox script which in turn + # cross-compiles for x86_64 and aarch64, and these executables are later + # copied into an arch-specific container. + # + # Note that --iidfile (used here and in later commands) prints the built + # image ID to the specified file so we can refer to the image later. + iidfile="$( mktemp )" + echo $BUILD_ARGS + buildah bud \ + --iidfile="${iidfile}" \ + --file=Dockerfile.busybox \ + ${BUILD_ARGS[@]} + busybox_image="$( cat "${iidfile}" )" + rm "${iidfile}" + + BUILD_ARGS+=("--build-arg=busybox_image=${busybox_image}") # just-built image from which busybox executable will be copied +fi + +# ------------------------------------------------------------------------------ +# BUILDING: +# - Build each arch's image. +# - Extract info +# - Add info as labels +# - Add tags to image +# - Add image to manifest +# +for arch in $ARCHS; do + + # For build-env, need to use different base image from upstream conda-forge + # depending on arch. + BASE_IMAGE_BUILD_ARG="" + if [ "$TYPE" == "build-env" ]; then + if [ "$arch" == "amd64" ]; then + BASE_IMAGE_BUILD_ARG="--build-arg=base_image=quay.io/condaforge/linux-anvil-cos7-x86_64" + fi + if [ "$arch" == "arm64" ]; then + BASE_IMAGE_BUILD_ARG="--build-arg=base_image=quay.io/condaforge/linux-anvil-aarch64" + fi + fi + + # Actual building happens here. + iidfile="$( mktemp )" + buildah bud \ + --arch="${arch}" \ + --iidfile="${iidfile}" \ + --file=Dockerfile \ + ${BUILD_ARGS[@]} \ + $BASE_IMAGE_BUILD_ARG + image_id="$( cat "${iidfile}" )" + rm "${iidfile}" + + # Extract various package info and version info, then store that info + # as labels. Container is removed at the end to avoid e.g. having these + # commands in the history of the container. + container="$( buildah from "${image_id}" )" + run() { buildah run "${container}" "${@}" ; } + LABELS=() + + # See + # https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry; + # this allows the container visibility to inherit that of the linked repo + # (public in the case of bioconda-utils) + LABELS+=("--label=org.opencontainers.image.source=https://github.com/bioconda/bioconda-utils") + LABELS+=("--label=deb-list=$( run cat /.deb.lst | tr '\n' '|' | sed 's/|$//' )") + LABELS+=("--label=pkg-list=$( run cat /.pkg.lst | tr '\n' '|' | sed 's/|$//' )") + LABELS+=("--label=glibc=$( run sh -c 'exec "$( find -xdev -name libc.so.6 -print -quit )"' | sed '1!d' )") + LABELS+=("--label=debian=$( run cat /etc/debian_version | sed '1!d' )") + LABELS+=("--label=bash=$( run bash --version | sed '1!d' )") + if [ "$TYPE" == "build-env" ]; then + bioconda_utils="$( + run sh -c '. /opt/conda/etc/profile.d/conda.sh && conda activate base && bioconda-utils --version' \ + | rev | cut -f1 -d " " | rev + )" + LABELS+=("--label=bioconda-utils=${bioconda_utils}") + fi + + if [ "$TYPE" == "base-busybox" ]; then + LABELS+=("--label=busybox-version=${BUSYBOX_VERSION}") + fi + buildah rm "${container}" + + # Add labels to a new container... + container="$( buildah from "${image_id}" )" + buildah config "${LABELS[@]}" "${container}" + + # ...then store the container (now with labels) as a new image. + # This is what we'll eventually upload. + image_id="$( buildah commit "${container}" )" + buildah rm "${container}" + + # Add images to manifest. Note that individual **image** tags include arch; + # manifest does not. + for tag in ${TAGS} ; do + buildah tag \ + "${image_id}" \ + "${IMAGE_NAME}:${tag}-${arch}" + buildah manifest add \ + "${IMAGE_NAME}:${tag}" \ + "${image_id}" + + # Inspect image details, but remove the most verbose (like history) and + # redundant (just need one of Docker or OCIv1) fields. + buildah inspect -t image ${IMAGE_NAME}:${tag}-$arch} \ + | jq 'del( + .History, + .OCIv1.history, + .Config, + .Manifest, + .Docker, + .NamespaceOptions)' + + done # tags +done # archs_and_images + +for tag in ${TAGS}; do + buildah inspect -t manifest ${IMAGE_NAME}:${tag} +done + +# ------------------------------------------------------------------------------ +# TESTING +# +# Args to be used specifically when testing with Dockerfile.test +TEST_BUILD_ARGS=() +if [ "$TYPE" == "create-env" ]; then + TEST_BUILD_ARGS+=("--build-arg=BUSYBOX_IMAGE=$BUSYBOX_IMAGE") +fi + +# Turns out that buildah cannot use --arch and and provide an image ID as the +# `base` build-arg at the same time, because we get the error: +# +# "error creating build container: pull policy is always but image has been +# referred to by ID". +# +# This happens even when using --pull-never. This may be fixed in later +# versions, in which case we can use the code below in the "EXTRA" section. +# +# Since the rest of this script builds a single image and assigns possibly +# multiple tags, we just use the first tag to use as the `base` build-arg. + +tag=$(echo $TAGS | cut -f1 -d " ") +for arch in $ARCHS; do + echo "[LOG] Starting test for ${IMAGE_NAME}:${tag}, $arch." + buildah bud \ + --arch="$arch" \ + --build-arg=base="localhost/${IMAGE_NAME}:${tag}" \ + ${TEST_BUILD_ARGS[@]} \ + --file=Dockerfile.test +done + + +# EXTRA ------------------------------------------------------------------------ +# The following demonstrates how to extract images from corresponding manifest +# digests. This may be a better approach in the future, but as noted above we +# cannot use FROM and --arch and instead use name:tag. +# +# It may be useful in the future but it is disabled for now. +# +if [ "" ] ; then + # Manifests provide a digest; we then need to look up the corresponding image + # name for that digest. + ids="$( + for tag in $TAGS ; do + buildah manifest inspect "${IMAGE_NAME}:${tag}" \ + | jq -r '.manifests[]|.digest' \ + | while read id ; do + buildah images --format '{{.ID}}{{.Digest}}' \ + | sed -n "s/${id}//p" + done + done + )" + + # N.B. need to unique since one image can have multiple tags. In general, + # this should be one image for each arch, no matter how many tags. + ids="$( printf %s "${ids}" | sort -u )" + + # Run the tests; see Dockerfile.test in the relevant image dir for the + # actual tests that are run. + for id in ${ids} ; do + + podman history "${id}" + + # Make sure we're explicit with the arch so that the right image is pulled + # from the respective container. + arch=$(buildah inspect "${id}" | jq -r '.OCIv1.architecture' | sort -u) + + buildah bud \ + --arch="$arch" \ + --build-arg=base="localhost/${IMAGE_NAME}" \ + ${TEST_BUILD_ARGS[@]} \ + --file=Dockerfile.test + done +fi +# ------------------------------------------------------------------------------- + +# Clean up +buildah rmi --prune || true diff --git a/images/base-glibc-busybox-bash/Dockerfile b/images/base-glibc-busybox-bash/Dockerfile new file mode 100644 index 00000000000..e875a2d41ac --- /dev/null +++ b/images/base-glibc-busybox-bash/Dockerfile @@ -0,0 +1,116 @@ +# Don't use Debian's busybox package since it only provides a smaller subset of +# BusyBox's functions (e.g., no administrative tools like adduser etc.). +# Since we create a glibc image anyway, we can also use a the slightly smaller +# dynamically linked binary. + +ARG debian_version +FROM "debian:${debian_version}-slim" AS build_base +RUN [ ! -f /etc/apt/sources.list ] || sed --in-place= --regexp-extended \ + '/ stretch/ { s,-updates,-backports, ; s,/(deb|security)\.,/archive., }' \ + /etc/apt/sources.list + + +FROM build_base AS rootfs_builder + +ARG busybox_image +COPY --from="${busybox_image}" /build /build +WORKDIR /busybox-rootfs +RUN arch="$( uname -m )" \ + && \ + mkdir -p ./bin ./sbin ./usr/bin ./usr/sbin \ + && \ + cp -al "/build/busybox.${arch}" ./bin/busybox \ + && \ + ldd ./bin/busybox \ + | grep --only-matching --extended-regexp '/lib\S+' \ + | xargs -n1 sh -xc 'mkdir -p ".${1%/*}" && cp -aL "${1}" ".${1%/*}"' -- \ + && \ + chroot . /bin/busybox --install \ + && \ + rm -rf ./lib* + +WORKDIR /rootfs + +RUN mkdir -p ./etc ./home ./opt ./root ./run /tmp ./usr ./var/log \ + && \ + for dir in bin lib sbin ; do \ + mkdir "./usr/${dir}" \ + && \ + if [ -L "/bin" ] ; then \ + ln -s "usr/${dir}" "./${dir}" ; \ + else \ + mkdir "./${dir}" ; \ + fi ; \ + done + +RUN find /busybox-rootfs -type f \ + -exec sh -c 'cp -al -- "${1}" "./${1#/busybox-rootfs/}"' -- '{}' ';' + +# Install helper tools used by install-pkgs. +RUN apt-get update -qq \ + && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + patchelf + +COPY install-pkgs /usr/local/bin +RUN install-pkgs "$( pwd )" /tmp/work \ + bash \ + base-passwd \ + libc-bin \ + login \ + ncurses-base \ + && \ + # Remove contents of /usr/local as downstream images overwrite those. + find ./usr/local/ \ + -mindepth 1 -depth \ + -delete + +RUN while IFS=: read _ _ uid gid _ home _ ; do \ + [ -n "${home##/var/run/*}" ] || home="${home#/var}" \ + && \ + [ -d "./${home#/}" ] || [ "${home}" = "/nonexistent" ] && continue ; \ + mkdir -p "./${home#/}" \ + && \ + chown "${uid}:${gid}" "./${home#/}" \ + && \ + chmod 775 "./${home#/}" \ + ; done < ./etc/passwd \ + && \ + pwck --read-only --root "$( pwd )" \ + | { ! grep -v -e 'no changes' -e '/nonexistent' ; } \ + && \ + grpck --read-only --root "$( pwd )" \ + && \ + find \ + -xdev -type f \! -path ./var/\* \! -path ./usr/share/\* \! -name \*.pl \ + | xargs -P0 -n100 sh -c \ + 'chroot . ldd -- "${@}" 2> /dev/null | sed -n "/:/h; /not found/{x;p;x;p}"' -- \ + | { ! grep . ; } + +# env-activate.sh (+ optionally env-execute) should be overwritten downstream. +# - env-activate.sh: +# Is sourced (via symlink in /etc/profile.d/) to activate the /usr/local env. +# - env-execute: +# Is set as the ENTRYPOINT to activate /usr/local before exec'ing CMD. +RUN touch ./usr/local/env-activate.sh \ + && \ + touch ./usr/local/env-execute \ + && \ + chmod +x ./usr/local/env-execute \ + && \ + ln -s \ + /usr/local/env-activate.sh \ + ./etc/profile.d/env-activate.sh \ + && \ + printf '%s\n' \ + '#! /bin/bash' \ + ". '/usr/local/env-activate.sh'" \ + 'exec "${@}"' \ + > ./usr/local/env-execute + +FROM scratch +COPY --from=rootfs_builder /rootfs / +ENV LANG=C.UTF-8 +ENTRYPOINT [ "/usr/local/env-execute" ] +CMD [ "bash" ] diff --git a/images/base-glibc-busybox-bash/Dockerfile.busybox b/images/base-glibc-busybox-bash/Dockerfile.busybox new file mode 100644 index 00000000000..fcbd60bd350 --- /dev/null +++ b/images/base-glibc-busybox-bash/Dockerfile.busybox @@ -0,0 +1,23 @@ +# Build busybox ourselves to have more fine-grained control over what we want +# (or not want) to include. +# Use old Debian version to ensure compatible (low glibc requirement) binaries. +FROM debian:9-slim AS busybox_builder +RUN [ ! -f /etc/apt/sources.list ] || sed --in-place= --regexp-extended \ + '/ stretch/ { s,-updates,-backports, ; s,/(deb|security)\.,/archive., }' \ + /etc/apt/sources.list \ + && \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + bzip2 curl ca-certificates tar \ + gcc libc6-dev \ + gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ + make patch + +WORKDIR /build +COPY build-busybox ./ +ARG busybox_version +RUN ./build-busybox \ + "${busybox_version}" \ + x86_64 aarch64 + diff --git a/images/base-glibc-busybox-bash/Dockerfile.test b/images/base-glibc-busybox-bash/Dockerfile.test new file mode 100644 index 00000000000..feba4402b8a --- /dev/null +++ b/images/base-glibc-busybox-bash/Dockerfile.test @@ -0,0 +1,27 @@ +ARG base +FROM "${base}" + +# Check if env-activate.sh gets sourced for login shell and in env-execute. +RUN [ "$( sh -lc 'printf world' )" = 'world' ] \ + && \ + [ "$( /usr/local/env-execute sh -c 'printf world' )" = 'world' ] \ + && \ + printf '%s\n' \ + 'printf "hello "' \ + > /usr/local/env-activate.sh \ + && \ + [ "$( sh -lc 'printf world' )" = 'hello world' ] \ + && \ + [ "$( /usr/local/env-execute sh -c 'printf world' )" = 'hello world' ] \ + && \ + printf '' \ + > /usr/local/env-activate.sh + +RUN arch=$(uname -m) \ + && \ + wget --quiet \ + "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${arch}.sh" \ + && \ + sh ./Miniforge3-Linux-${arch}.sh -bp /opt/conda \ + && \ + /opt/conda/bin/conda info --all diff --git a/images/base-glibc-busybox-bash/build-busybox b/images/base-glibc-busybox-bash/build-busybox new file mode 100755 index 00000000000..902b33753d8 --- /dev/null +++ b/images/base-glibc-busybox-bash/build-busybox @@ -0,0 +1,140 @@ +#! /bin/sh +set -xeu + +download() { + curl --location --silent \ + "https://busybox.net/downloads/busybox-${version}.tar.bz2" \ + | tar -xjf- --strip-components=1 +} + +patch() { + case "${version}" in 1.36.* ) + # Small fix to let it build with older glibc versions. + curl --location --silent \ + 'https://git.busybox.net/busybox/patch/miscutils/seedrng.c?id=200a9669fbf6f06894e4243cccc9fc11a1a6073a' \ + 'https://git.busybox.net/busybox/patch/miscutils/seedrng.c?id=cb57abb46f06f4ede8d9ccbdaac67377fdf416cf' \ + | command patch --strip=1 + esac + + # Add support for running busybox wget without OpenSSL under QEMU. + # (NB: If we run into other QEMU+BusyBox problems that needs debugging: That + # vfork issue might affect other BusyBox parts, so check for it first.) + command patch --strip=1 <<'EOP' +From e7b57533ffcd5842fa93f5aa96949b3eaed54b67 Mon Sep 17 00:00:00 2001 +From: Marcel Bargull +Date: Sat, 14 Oct 2023 22:58:42 +0200 +Subject: [PATCH] wget: don't assume vfork blocking for openssl exec + +Under QEMU, busybox wget fails to fallback to busybox ssl_client in case +openssl s_client can't be executed because QEMU's vfork does not block. +Ref.: https://man7.org/linux/man-pages/man2/vfork.2.html#VERSIONS + +Signed-off-by: Marcel Bargull +--- + networking/wget.c | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/networking/wget.c b/networking/wget.c +index 9ec0e67b9..4bcc26e86 100644 +--- a/networking/wget.c ++++ b/networking/wget.c +@@ -683,3 +683,9 @@ static int spawn_https_helper_openssl(const char *host, unsigned port) + int pid; +- IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;) ++ ++# if ENABLE_FEATURE_WGET_HTTPS ++ struct fd_pair status; ++ int exec_errno = 0; ++ ++ xpiped_pair(status); ++# endif + +@@ -701,2 +707,7 @@ static int spawn_https_helper_openssl(const char *host, unsigned port) + ++# if ENABLE_FEATURE_WGET_HTTPS ++ close(status.rd); ++ if (fcntl(status.wr, F_SETFD, FD_CLOEXEC) != 0) ++ bb_simple_perror_msg_and_die("fcntl"); ++# endif + close(sp[0]); +@@ -743,5 +754,8 @@ static int spawn_https_helper_openssl(const char *host, unsigned port) + BB_EXECVP(argv[0], argv); ++ exec_errno = errno; + xmove_fd(3, 2); + # if ENABLE_FEATURE_WGET_HTTPS +- child_failed = 1; ++ if (write(status.wr, &exec_errno, sizeof(exec_errno)) != sizeof(exec_errno)) ++ bb_simple_perror_msg_and_die("write"); ++ close(status.wr); + xfunc_die(); +@@ -758,3 +772,7 @@ static int spawn_https_helper_openssl(const char *host, unsigned port) + # if ENABLE_FEATURE_WGET_HTTPS +- if (child_failed) { ++ close(status.wr); ++ if (read(status.rd, &exec_errno, sizeof(exec_errno)) == -1) ++ bb_simple_perror_msg_and_die("read"); ++ close(status.rd); ++ if (exec_errno) { + close(sp[0]); +EOP +} + +config() { + make defconfig + mv .config .defconfig + # Set CONFIG_SUBST_WCHAR=0 for better Unicode support and remove big components. + printf %s\\n \ + CONFIG_AR=y \ + CONFIG_FEATURE_AR_CREATE=y \ + CONFIG_FEATURE_AR_LONG_FILENAMES=y \ + CONFIG_SUBST_WCHAR=0 \ + CONFIG_RPM=n \ + CONFIG_RPM2CPIO=n \ + CONFIG_FSCK_MINIX=n \ + CONFIG_MKFS_MINIX=n \ + CONFIG_BC=n \ + CONFIG_DC=n \ + CONFIG_HDPARM=n \ + CONFIG_HEXEDIT=n \ + CONFIG_I2CGET=n \ + CONFIG_I2CSET=n \ + CONFIG_I2CDUMP=n \ + CONFIG_I2CDETECT=n \ + CONFIG_I2CTRANSFER=n \ + CONFIG_DNSD=n \ + CONFIG_FTPD=n \ + CONFIG_HTTPD=n \ + CONFIG_TCPSVD=n \ + CONFIG_UDPSVD=n \ + CONFIG_UDHCPD=n \ + CONFIG_SH_IS_ASH=n \ + CONFIG_SH_IS_NONE=y \ + CONFIG_SHELL_ASH=n \ + CONFIG_ASH=n \ + CONFIG_HUSH=n \ + CONFIG_SHELL_HUSH=n \ + | cat - .defconfig \ + > .config + # make still asks which shell to use for sh although CONFIG_SH_IS_NONE=y is set!? + printf \\n | make oldconfig +} + +build() { + make -j "$( nproc )" busybox +} + +main() { + version="${1}" + shift + download + patch + for target ; do + export MAKEFLAGS="ARCH=${target} CROSS_COMPILE=${target}-linux-gnu-" + make clean + config + build + cp -al ./busybox "./busybox.${target}" + done +} + +main "${@}" diff --git a/images/base-glibc-busybox-bash/install-pkgs b/images/base-glibc-busybox-bash/install-pkgs new file mode 100755 index 00000000000..fdb483dd268 --- /dev/null +++ b/images/base-glibc-busybox-bash/install-pkgs @@ -0,0 +1,361 @@ +#! /bin/sh +set -xeu + +arch=$(uname -m) + +prepare_remove_docs() { + # remove lintian and docs (apart from copyright) + rm -rf \ + ./usr/share/lintian \ + ./usr/share/man + find ./usr/share/doc/ -type f ! -name copyright -delete + find ./usr/share/doc/ -type d -empty -delete +} + + +prepare_usrmerge() { + # If we are on Debian >=12, /bin et al. are symlinks to /usr/ counterparts. + # Since we don't do full apt installs, we accomodate for it here. + if [ -L "${root_fs}/bin" ] ; then + for dir in bin lib* sbin ; do + [ -d "./${dir}" ] || continue + [ -L "./${dir}" ] && continue + mkdir -p ./usr + cp -ral "./${dir}" ./usr/ + rm -rf "./${dir}" + ln -s "usr/${dir}" "${dir}" + done + fi +} + + +add_rpath() { + local binary="${1}" + shift + local new_rpath="${1}" + shift + local rpath + rpath="$( + patchelf \ + --print-rpath \ + "${binary}" + )" + patchelf \ + --set-rpath \ + "${rpath:+${rpath}:}${new_rpath}" \ + "${binary}" +} + + +prepare() { + local pkg="${1}" + shift + local destdir="${1}" + shift + + case "${pkg}" in + libc6 ) + # To reduce image size, remove all charset conversion modules apart + # from smaller ones for some common encodings. + # Update gconv-modules accordingly. + # NOTE: When adding/removing any, check required dyn. linked libs! + + local gconv_path="./usr/lib/${arch}-linux-gnu/gconv" + local gconv_modules_regex + if [ -e "${gconv_path}/gconv-modules.d/gconv-modules-extra.conf" ] ; then + gconv_modules_regex="$( + sed -nE 's/^module\s+\S+\s+\S+\s+(\S+)\s+.*/\1/p' \ + < "${gconv_path}/gconv-modules" \ + | sort -u \ + | tr '\n' '|' \ + | sed 's/|$//' + )" + : > "${gconv_path}/gconv-modules.d/gconv-modules-extra.conf" + else + gconv_modules_regex='UTF-\w+|UNICODE|ISO8859-(1|15)|CP1252|ANSI_X3\.110' + local gconv_modules_file_tmp='./.tmp.gconv-modules' + + mv "${gconv_path}"/gconv-modules "${gconv_modules_file_tmp}" + + grep -E \ + '^\s*$|^#|^(alias\s+.*|module\s+[^\s]+\s+[^\s]+)\s+\<('"${gconv_modules_regex}"')(//|\s)' \ + "${gconv_modules_file_tmp}" \ + | sed -nEe '1N;N;/^(#.*)\n.*\1/{D;D};P;D' | cat -s \ + > "${gconv_path}"/gconv-modules + rm "${gconv_modules_file_tmp}" + fi + + find "${gconv_path}" \ + -mindepth 1 -maxdepth 1 \ + -name '*.so' \ + -type f \ + -regextype posix-extended \ + ! -regex '.*/('"${gconv_modules_regex}"').so' \ + -print -delete + + iconvconfig --prefix ./ + + ;; + bash ) + rm -rf ./usr/share/locale + # Add custom rpath for libtinfo (see below) to bash binaries. + local new_rpath="/lib/${arch}-linux-gnu/terminfo:/usr/lib/${arch}-linux-gnu/terminfo" + add_rpath ./bin/bash "${new_rpath}" + add_rpath ./usr/bin/clear_console "${new_rpath}" + ;; + libtinfo* ) + # Move libtinfo libraries to a custom path to ensure it is not + # unintentionally used in downstream images. + find ./usr/lib/${arch}-linux-gnu -type f \ + | { + while read binary ; do + add_rpath "${binary}" "/lib/${arch}-linux-gnu/terminfo" + done + } + + mv ./lib/${arch}-linux-gnu ./temp + mkdir ./lib/${arch}-linux-gnu + mv ./temp ./lib/${arch}-linux-gnu/terminfo + + mv ./usr/lib/${arch}-linux-gnu ./temp + mkdir ./usr/lib/${arch}-linux-gnu + mv ./temp ./usr/lib/${arch}-linux-gnu/terminfo + ;; + base-passwd ) + # The dependencies libdebconfclient0 (and libselinux1 for Debian>=12) + # are needed for update-passwd, but we ignore them => remove the binary. + rm ./usr/sbin/update-passwd + ;; + login ) + rm -rf ./usr/share/locale + # The following binaries provided by BusyBox or pull in more dependencies + # (PAM, libselinux1, and their dependencies) => remove them. + rm -f \ + ./bin/login \ + ./bin/su \ + ./usr/bin/lastlog \ + ./usr/bin/newgrp \ + ./usr/bin/sg + ;; + libc-bin | \ + libgcc1 | \ + base-files | \ + gcc-*-base | \ + libcrypt1 | \ + libgcc-s1 | \ + libdebconfclient0 | \ + libpcre* | \ + libselinux1 | \ + ncurses-base | \ + zlib1g ) + : + ;; + * ) + # Abort if we get an unexpected package. + printf %s\\n "\`prepare\` not defined for ${pkg}" >&2 + return 1 + ;; + esac + prepare_remove_docs + prepare_usrmerge +} + + +postinst_ldconfig_trigger() { + ldconfig --verbose -r ./ +} + + +postinst() { + local pkg="${1}" + shift + local destdir="${1}" + shift + + case "${pkg}" in + libc-bin ) + cp -p --remove-destination \ + ./usr/share/libc-bin/nsswitch.conf \ + ./etc/nsswitch.conf + postinst_ldconfig_trigger + ;; + base-files ) + cp "${destdir}/DEBIAN/postinst" ./base-files-postinst + chroot ./ sh /base-files-postinst configure + rm ./base-files-postinst + ;; + base-passwd ) + mkdir -p "${destdir}/etc" + cp -p --remove-destination \ + "${destdir}/usr/share/base-passwd/group.master" \ + ./etc/group + cp -p --remove-destination \ + "${destdir}/usr/share/base-passwd/passwd.master" \ + ./etc/passwd + DPKG_ROOT="$( pwd )" \ + shadowconfig on + ;; + login ) + for file in /var/log/faillog /etc/subuid /etc/subgid ; do + [ -f "./${file}" ] || continue + touch "${file}" + chown 0:0 "${file}" + chmod 644 "${file}" + done + ;; + bash ) + # Replace BusyBox's sh by Bash + rm -f ./bin/sh + ln -s /bin/bash ./bin/sh + chroot ./ add-shell /bin/sh + chroot ./ add-shell /bin/bash + chroot ./ add-shell /bin/rbash + # Bash 4.* did not have default key bindings for control-arrow-key key + # combinations. Add some for convenience: + cat >> ./etc/inputrc <<'EOF' + +"\e[5C": forward-word +"\e[5D": backward-word +"\e\e[C": forward-word +"\e\e[D": backward-word +"\e[1;5C": forward-word +"\e[1;5D": backward-word +EOF + ;; + libc6 | \ + libdebconfclient0 | \ + libgcc1 | \ + libcrypt1 | \ + libgcc-s1 | \ + libpcre* | \ + libselinux1 | \ + libtinfo* | \ + zlib1g ) + postinst_ldconfig_trigger + ;; + gcc-*-base | \ + ncurses-base ) + : + ;; + * ) + # Abort if we get an unexpected package. + printf %s\\n "\`postinst\` not defined for ${pkg}" >&2 + return 1 + ;; + esac +} + + +install_pkg() { + local pkg="${1}" + shift + + local work_dir="${work_base}/${pkg}" + mkdir "${work_dir}" + cd "${work_dir}" + + # Download package + apt-get download "${pkg}" + local deb_file + deb_file="$( find "$( pwd )" -maxdepth 1 -name '*.deb' )" + + # Prepare package + local destdir="${work_dir}/destdir" + mkdir "${destdir}" + cd "${destdir}" + dpkg-deb --raw-extract "${deb_file}" ./ + prepare "${pkg}" "${destdir}" + dpkg-deb --build ./ "${deb_file}" + cd "${work_dir}" + + # Extract package + dpkg-deb --vextract "${deb_file}" "${root_fs}" + rm "${deb_file}" + printf %s\\n "$( basename "${deb_file}" )" >> "${root_fs}/.deb.lst" + + # Finalize package installation + cd "${root_fs}" + postinst "${pkg}" "${destdir}" + + cd "${work_base}" + rm -rf "${work_dir}" + printf %s\\n "${pkg}" >> "${root_fs}/.pkg.lst" +} + + +get_deps() { + [ -z "${*}" ] && return 0 + + # Instead of using `apt-cache depends --recurse` or `debfoster -d`, recurse + # manually so that we can exclude some packages that are either already + # installed or would pull in files/packages we don't need. + + local ignore_pkgs + ignore_pkgs="$( + printf %s\\n \ + base-files '' debianutils dash \ + libdebconfclient0 libselinux1 \ + libaudit1 libpam-modules libpam-runtime libpam0g \ + | grep -vFx "$( printf %s\\n "${@}" )" + )" + [ -f "${root_fs}/.pkg.lst" ] && \ + ignore_pkgs=$( printf %s\\n ${ignore_pkgs} $( cat -s "${root_fs}/.pkg.lst" ) ) + + local new_pkgs="${*}" + local old_pkgs='' + while ! [ "${new_pkgs}" = "${old_pkgs}" ] ; do + old_pkgs="${new_pkgs}" + new_pkgs="$( + apt-cache depends \ + --no-recommends --no-suggests --no-conflicts \ + --no-breaks --no-replaces --no-enhances \ + ${old_pkgs} \ + | sed -n 's/.*Depends: //p' | cat -s + )" + new_pkgs="$( + printf %s\\n ${old_pkgs} ${new_pkgs} \ + | sort -u \ + | grep -vFx "$( printf %s\\n ${ignore_pkgs} )" + )" + done + printf %s\\n ${new_pkgs} +} + + +install_with_deps() { + get_deps "${@}" | while read -r pkg ; do + install_pkg "${pkg}" + done +} + + +main() { + root_fs="${1}" + shift + work_base="${1}" + shift + + mkdir -p "${work_base}" + cd "${work_base}" + + apt-get update + + # Unconditionally install glibc (package libc6). + # Also install dependencies acc. to `apt-cache depends`: + # - libgcc1 only consists of libgcc_s.so.1 (+ docs, which we remove). + # - gcc-*-base only has empty directories (+ docs, which we remove). + install_with_deps libc6 + + # libc-bin must be in ${@} for Unicode support (C.UTF-8 locale). + install_with_deps "${@}" + + # base-files contains /usr/share/common-licenses/, /etc/profile, etc. + # Install base-files afterwards so we have a working sh for the postinst. + install_with_deps base-files + + cd "${root_fs}" + rm -rf "${work_base}" +} + + +main "${@}" diff --git a/images/base-glibc-debian-bash/Dockerfile b/images/base-glibc-debian-bash/Dockerfile new file mode 100644 index 00000000000..c0adc29222d --- /dev/null +++ b/images/base-glibc-debian-bash/Dockerfile @@ -0,0 +1,131 @@ +ARG debian_version + +FROM "debian:${debian_version}-slim" +RUN [ ! -f /etc/apt/sources.list ] || sed --in-place= --regexp-extended \ + '/ stretch/ { s,-updates,-backports, ; s,/(deb|security)\.,/archive., }' \ + /etc/apt/sources.list \ + && \ + apt-get update -qq \ + && \ + # Add en_US.UTF-8 locale. + printf '%s\n' 'en_US.UTF-8 UTF-8' \ + >> /etc/locale.gen \ + && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + $( \ + . /etc/os-release \ + && \ + [ "${VERSION_ID-10}" -lt 10 ] \ + && \ + printf '%s\n' \ + libegl1-mesa \ + libgl1-mesa-glx \ + || \ + printf '%s\n' \ + libegl1 \ + libgl1 \ + libglx-mesa0 \ + ) \ + libglvnd0 \ + libopengl0 \ + locales \ + openssh-client \ + procps \ + && \ + # Remove "locales" package, but keep the generated locale. + sed -i \ + 's/\s*rm .*locale-archive$/: &/' \ + /var/lib/dpkg/info/locales.prerm \ + && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get remove --yes \ + locales \ + && \ + # On Debian 10 (and 11) libgl1-mesa-glx pulls in libgl1-mesa-dri (which in + # turn has more heavy-weight dependencies). We leave these out of the image + # (by manually removing it from "Depends:" list) like we do with Debian 9. + sed -i \ + '/^Depends:/ s/, libgl1-mesa-dri\>//g' \ + /var/lib/dpkg/status \ + && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get autoremove --yes \ + && \ + # Remove apt package lists. + rm -rf /var/lib/apt/lists/* \ + && \ + # Remove contents of /usr/local as downstream images overwrite those. + find ./usr/local/ \ + -mindepth 1 -depth \ + -delete + +RUN dpkg-query --show --showformat \ + '${db:Status-Status} ${Package}\n' \ + | sed -n 's/:/%3a/g ; s/^installed //p' \ + > /.pkg.lst \ + && \ + dpkg-query --show --showformat \ + '${db:Status-Status} ${Package}_${Version}_${Architecture}\n' \ + | sed -n 's/:/%3a/g ; s/$/.deb/ ; s/^installed //p' \ + > /.deb.lst + +RUN while IFS=: read _ _ uid gid _ home _ ; do \ + [ -n "${home##/var/run/*}" ] || home="${home#/var}" \ + && \ + [ -d "./${home#/}" ] || [ "${home}" = "/nonexistent" ] && continue ; \ + mkdir -p "./${home#/}" \ + && \ + chown "${uid}:${gid}" "./${home#/}" \ + && \ + chmod 775 "./${home#/}" \ + ; done < ./etc/passwd \ + && \ + pwck --read-only --root "$( pwd )" \ + | { ! grep -v -e 'no changes' -e '/nonexistent' ; } \ + && \ + grpck --read-only --root "$( pwd )" \ + && \ + find \ + -xdev -type f \! -path ./var/\* \! -path ./usr/share/\* \! -name \*.pl \ + | xargs -P0 -n100 sh -c \ + 'chroot . ldd -- "${@}" 2> /dev/null | sed -n "/:/h; /not found/{x;p;x;p}"' -- \ + | { ! grep . ; } + +# Bash 4.* did not have default key bindings for control-arrow-key key +# combinations. Add some for convenience: +RUN >> /etc/inputrc \ + printf '%s\n' \ + '' \ + '"\e[5C": forward-word' \ + '"\e[5D": backward-word' \ + '"\e\e[C": forward-word' \ + '"\e\e[D": backward-word' \ + '"\e[1;5C": forward-word' \ + '"\e[1;5D": backward-word' \ + ; + +# env-activate.sh (+ optionally env-execute) should be overwritten downstream. +# - env-activate.sh: +# Is sourced (via symlink in /etc/profile.d/) to activate the /usr/local env. +# - env-execute: +# Is set as the ENTRYPOINT to activate /usr/local before exec'ing CMD. +RUN touch /usr/local/env-activate.sh \ + && \ + touch /usr/local/env-execute \ + && \ + chmod +x /usr/local/env-execute \ + && \ + ln -s \ + /usr/local/env-activate.sh \ + /etc/profile.d/env-activate.sh \ + && \ + printf '%s\n' \ + '#! /bin/bash' \ + ". '/usr/local/env-activate.sh'" \ + 'exec "${@}"' \ + > /usr/local/env-execute + +ENV LANG=C.UTF-8 +ENTRYPOINT [ "/usr/local/env-execute" ] +CMD [ "bash" ] diff --git a/images/base-glibc-debian-bash/Dockerfile.test b/images/base-glibc-debian-bash/Dockerfile.test new file mode 100644 index 00000000000..f2f0bace3a8 --- /dev/null +++ b/images/base-glibc-debian-bash/Dockerfile.test @@ -0,0 +1,39 @@ +ARG base +FROM "${base}" + +# Check if env-activate.sh gets sourced for login shell and in env-execute. +RUN [ "$( sh -lc 'printf world' )" = 'world' ] \ + && \ + [ "$( /usr/local/env-execute sh -c 'printf world' )" = 'world' ] \ + && \ + printf '%s\n' \ + 'printf "hello "' \ + > /usr/local/env-activate.sh \ + && \ + [ "$( sh -lc 'printf world' )" = 'hello world' ] \ + && \ + [ "$( /usr/local/env-execute sh -c 'printf world' )" = 'hello world' ] \ + && \ + printf '' \ + > /usr/local/env-activate.sh + +# Check if all desired locales are there. +RUN locale -a | grep -i 'c\.utf-\?8' \ + && \ + locale -a | grep -i 'en_us\.utf-\?8' + +RUN apt-get update -qq \ + && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + ca-certificates \ + wget \ + && \ + arch=$(uname -m) \ + && \ + wget --quiet \ + "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${arch}.sh" \ + && \ + sh ./Miniforge3-Linux-${arch}.sh -bp /opt/conda \ + && \ + /opt/conda/bin/conda info --all diff --git a/images/bioconda-recipes-issue-responder/Dockerfile b/images/bioconda-recipes-issue-responder/Dockerfile new file mode 100644 index 00000000000..9b94896414c --- /dev/null +++ b/images/bioconda-recipes-issue-responder/Dockerfile @@ -0,0 +1,40 @@ +ARG base=quay.io/bioconda/base-glibc-busybox-bash:2.0.0 + +FROM quay.io/bioconda/create-env:2.0.0 as build +RUN /opt/create-env/env-execute \ + create-env \ + --conda=mamba \ + --strip-files=\* \ + --remove-paths=\*.a \ + --remove-paths=\*.pyc \ + /usr/local \ + aiohttp \ + anaconda-client \ + ca-certificates \ + git \ + openssh \ + python=3.8 \ + pyyaml \ + skopeo \ + && \ + # Workaround for https://github.com/conda/conda/issues/10490 + export CONDA_REPODATA_THREADS=1 && \ + # We don't need Perl (used by Git for some functionalities). + # => Remove perl package to reduce image size. + /opt/create-env/env-execute \ + conda remove --yes \ + --prefix=/usr/local \ + --force-remove \ + perl + +FROM "${base}" +COPY --from=build /usr/local /usr/local +COPY ./issue-responder /usr/local/bin/ + +# Used environment variables: +# - JOB_CONTEXT +# - BOT_TOKEN +# - GITTER_TOKEN +# - ANACONDA_TOKEN +# - QUAY_OAUTH_TOKEN +# - QUAY_LOGIN diff --git a/images/bioconda-recipes-issue-responder/Dockerfile.test b/images/bioconda-recipes-issue-responder/Dockerfile.test new file mode 100644 index 00000000000..665dc72ed0a --- /dev/null +++ b/images/bioconda-recipes-issue-responder/Dockerfile.test @@ -0,0 +1,7 @@ +ARG base + + +FROM "${base}" +RUN JOB_CONTEXT='{"event": {"issue": {}}}' \ + /usr/local/env-execute \ + issue-responder diff --git a/images/bioconda-recipes-issue-responder/issue-responder b/images/bioconda-recipes-issue-responder/issue-responder new file mode 100755 index 00000000000..9d915f2f528 --- /dev/null +++ b/images/bioconda-recipes-issue-responder/issue-responder @@ -0,0 +1,615 @@ +#! /usr/bin/env python + +import logging +import os +import re +import sys +from asyncio import gather, run, sleep +from asyncio.subprocess import create_subprocess_exec +from pathlib import Path +from shutil import which +from subprocess import check_call +from typing import Any, Dict, List, Optional, Set, Tuple +from zipfile import ZipFile + +from aiohttp import ClientSession +from yaml import safe_load + +logger = logging.getLogger(__name__) +log = logger.info + + +async def async_exec( + command: str, *arguments: str, env: Optional[Dict[str, str]] = None +) -> None: + process = await create_subprocess_exec(command, *arguments, env=env) + return_code = await process.wait() + if return_code != 0: + raise RuntimeError( + f"Failed to execute {command} {arguments} (return code: {return_code})" + ) + + +# Post a comment on a given issue/PR with text in message +async def send_comment(session: ClientSession, issue_number: int, message: str) -> None: + token = os.environ["BOT_TOKEN"] + url = ( + f"https://api.github.com/repos/bioconda/bioconda-recipes/issues/{issue_number}/comments" + ) + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + payload = {"body": message} + log("Sending comment: url=%s", url) + log("Sending comment: payload=%s", payload) + async with session.post(url, headers=headers, json=payload) as response: + status_code = response.status + log("the response code was %d", status_code) + if status_code < 200 or status_code > 202: + sys.exit(1) + + +def list_zip_contents(fname: str) -> [str]: + f = ZipFile(fname) + return [e.filename for e in f.infolist() if e.filename.endswith('.tar.gz') or e.filename.endswith('.tar.bz2')] + + +# Download a zip file from url to zipName.zip and return that path +# Timeout is 30 minutes to compensate for any network issues +async def download_file(session: ClientSession, zipName: str, url: str) -> str: + async with session.get(url, timeout=60*30) as response: + if response.status == 200: + ofile = f"{zipName}.zip" + with open(ofile, 'wb') as fd: + while True: + chunk = await response.content.read(1024*1024*1024) + if not chunk: + break + fd.write(chunk) + return ofile + return None + + +# Find artifact zip files, download them and return their URLs and contents +async def fetch_azure_zip_files(session: ClientSession, buildId: str) -> [(str, str)]: + artifacts = [] + + url = f"https://dev.azure.com/bioconda/bioconda-recipes/_apis/build/builds/{buildId}/artifacts?api-version=4.1" + log("contacting azure %s", url) + async with session.get(url) as response: + # Sometimes we get a 301 error, so there are no longer artifacts available + if response.status == 301: + return artifacts + res = await response.text() + + res_object = safe_load(res) + if res_object['count'] == 0: + return artifacts + + for artifact in res_object['value']: + zipName = artifact['name'] # LinuxArtifacts or OSXArtifacts + zipUrl = artifact['resource']['downloadUrl'] + log(f"zip name is {zipName} url {zipUrl}") + fname = await download_file(session, zipName, zipUrl) + if not fname: + continue + pkgsImages = list_zip_contents(fname) + for pkg in pkgsImages: + artifacts.append((zipUrl, pkg)) + + return artifacts + + +def parse_azure_build_id(url: str) -> str: + return re.search("buildId=(\d+)", url).group(1) + + +# Given a PR and commit sha, fetch a list of the artifact zip files URLs and their contents +async def fetch_pr_sha_artifacts(session: ClientSession, pr: int, sha: str) -> List[Tuple[str, str]]: + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/commits/{sha}/check-runs" + + headers = { + "User-Agent": "BiocondaCommentResponder", + "Accept": "application/vnd.github.antiope-preview+json", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + check_runs = safe_load(res) + log(f"DEBUG url was {url} returned {check_runs}") + + for check_run in check_runs["check_runs"]: + # The names are "bioconda.bioconda-recipes (test_osx test_osx)" or similar + if check_run["name"].startswith("bioconda.bioconda-recipes (test_"): + # The azure build ID is in the details_url as buildId=\d+ + buildID = parse_azure_build_id(check_run["details_url"]) + log(f"DEBUG buildID is {buildID}") + zipFiles = await fetch_azure_zip_files(session, buildID) + log(f"DEBUG zipFiles are {zipFiles}") + return zipFiles # We've already fetched all possible artifacts + + return [] + + +# Given a PR and commit sha, post a comment with any artifacts +async def make_artifact_comment(session: ClientSession, pr: int, sha: str) -> None: + artifacts = await fetch_pr_sha_artifacts(session, pr, sha) + nPackages = len(artifacts) + log(f"DEBUG the artifacts are {artifacts}") + + if nPackages > 0: + comment = "Package(s) built on Azure are ready for inspection:\n\n" + comment += "Arch | Package | Zip File\n-----|---------|---------\n" + install_noarch = "" + install_linux = "" + install_osx = "" + + # Table of packages and repodata.json + for URL, artifact in artifacts: + if not (package_match := re.match(r"^((.+)\/(.+)\/(.+)\/(.+\.tar\.bz2))$", artifact)): + continue + url, archdir, basedir, subdir, packageName = package_match.groups() + urlBase = URL[:-3] # trim off zip from format= + urlBase += "file&subPath=%2F{}".format("%2F".join([basedir, subdir])) + conda_install_url = urlBase + # N.B., the zip file URL is nearly identical to the URL for the individual member files. It's unclear if there's an API for getting the correct URL to the files themselves + #pkgUrl = "%2F".join([urlBase, packageName]) + #repoUrl = "%2F".join([urlBase, "current_repodata.json"]) + #resp = await session.get(repoUrl) + + if subdir == "noarch": + comment += "noarch |" + elif subdir == "linux-64": + comment += "linux-64 |" + else: + comment += "osx-64 |" + comment += f" {packageName} | [{archdir}]({URL})\n" + + # Conda install examples + comment += "***\n\nYou may also use `conda` to install these after downloading and extracting the appropriate zip file. From the LinuxArtifacts or OSXArtifacts directories:\n\n" + comment += "```conda install -c ./packages \n```\n" + + # Table of containers + comment += "***\n\nDocker image(s) built (images are in the LinuxArtifacts zip file above):\n\n" + comment += "Package | Tag | Install with `docker`\n" + comment += "--------|-----|----------------------\n" + + for URL, artifact in artifacts: + if artifact.endswith(".tar.gz"): + image_name = artifact.split("/").pop()[: -len(".tar.gz")] + if ':' in image_name: + package_name, tag = image_name.split(':', 1) + #image_url = URL[:-3] # trim off zip from format= + #image_url += "file&subPath=%2F{}.tar.gz".format("%2F".join(["images", '%3A'.join([package_name, tag])])) + comment += f"[{package_name}] | {tag} | " + comment += f'
show`gzip -dc LinuxArtifacts/images/{image_name}.tar.gz \\| docker load`\n' + comment += "\n\n" + else: + comment = ( + "No artifacts found on the most recent Azure build. " + "Either the build failed, the artifacts have were removed due to age, or the recipe was blacklisted/skipped." + ) + await send_comment(session, pr, comment) + + +# Post a comment on a given PR with its CircleCI artifacts +async def artifact_checker(session: ClientSession, issue_number: int) -> None: + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}" + headers = { + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + pr_info = safe_load(res) + + await make_artifact_comment(session, issue_number, pr_info["head"]["sha"]) + + +# Return true if a user is a member of bioconda +async def is_bioconda_member(session: ClientSession, user: str) -> bool: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/orgs/bioconda/members/{user}" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + rc = 404 + async with session.get(url, headers=headers) as response: + try: + response.raise_for_status() + rc = response.status + except: + # Do nothing, this just prevents things from crashing on 404 + pass + + return rc == 204 + + +# Reposts a quoted message in a given issue/PR if the user isn't a bioconda member +async def comment_reposter(session: ClientSession, user: str, pr: int, message: str) -> None: + if await is_bioconda_member(session, user): + log("Not reposting for %s", user) + return + log("Reposting for %s", user) + await send_comment( + session, + pr, + f"Reposting for @{user} to enable pings (courtesy of the BiocondaBot):\n\n> {message}", + ) + + +# Fetch and return the JSON of a PR +# This can be run to trigger a test merge +async def get_pr_info(session: ClientSession, pr: int) -> Any: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{pr}" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + pr_info = safe_load(res) + return pr_info + + +# Update a branch from upstream master, this should be run in a try/catch +async def update_from_master_runner(session: ClientSession, pr: int) -> None: + async def git(*args: str) -> None: + return await async_exec("git", *args) + + # Setup git, otherwise we can't push + await git("config", "--global", "user.email", "biocondabot@gmail.com") + await git("config", "--global", "user.name", "BiocondaBot") + + pr_info = await get_pr_info(session, pr) + remote_branch = pr_info["head"]["ref"] + remote_repo = pr_info["head"]["repo"]["full_name"] + + max_depth = 2000 + # Clone + await git( + "clone", + f"--depth={max_depth}", + f"--branch={remote_branch}", + f"git@github.com:{remote_repo}.git", + "bioconda-recipes", + ) + + async def git_c(*args: str) -> None: + return await git("-C", "bioconda-recipes", *args) + + # Add/pull upstream + await git_c("remote", "add", "upstream", "https://github.com/bioconda/bioconda-recipes") + await git_c("fetch", f"--depth={max_depth}", "upstream", "master") + + # Merge + await git_c("merge", "upstream/master") + + await git_c("push") + + +# Merge the upstream master branch into a PR branch, leave a message on error +async def update_from_master(session: ClientSession, pr: int) -> None: + try: + await update_from_master_runner(session, pr) + except Exception as e: + await send_comment( + session, + pr, + "I encountered an error updating your PR branch. You can report this to bioconda/core if you'd like.\n-The Bot", + ) + sys.exit(1) + + +# Ensure there's at least one approval by a member +async def approval_review(session: ClientSession, issue_number: int) -> bool: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}/reviews" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + reviews = safe_load(res) + + approved_reviews = [review for review in reviews if review["state"] == "APPROVED"] + if not approved_reviews: + return False + + # Ensure the review author is a member + return any( + gather( + *( + is_bioconda_member(session, review["user"]["login"]) + for review in approved_reviews + ) + ) + ) + + +# Check the mergeable state of a PR +async def check_is_mergeable( + session: ClientSession, issue_number: int, second_try: bool = False +) -> bool: + token = os.environ["BOT_TOKEN"] + # Sleep a couple of seconds to allow the background process to finish + if second_try: + await sleep(3) + + # PR info + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + pr_info = safe_load(res) + + # We need mergeable == true and mergeable_state == clean, an approval by a member and + if pr_info.get("mergeable") is None and not second_try: + return await check_is_mergeable(session, issue_number, True) + elif ( + pr_info.get("mergeable") is None + or not pr_info["mergeable"] + or pr_info["mergeable_state"] != "clean" + ): + return False + + return await approval_review(session, issue_number) + + +# Ensure uploaded containers are in repos that have public visibility +async def toggle_visibility(session: ClientSession, container_repo: str) -> None: + url = f"https://quay.io/api/v1/repository/biocontainers/{container_repo}/changevisibility" + QUAY_OAUTH_TOKEN = os.environ["QUAY_OAUTH_TOKEN"] + headers = { + "Authorization": f"Bearer {QUAY_OAUTH_TOKEN}", + "Content-Type": "application/json", + } + body = {"visibility": "public"} + rc = 0 + try: + async with session.post(url, headers=headers, json=body) as response: + rc = response.status + except: + # Do nothing + pass + log("Trying to toggle visibility (%s) returned %d", url, rc) + + +# Download an artifact from CircleCI, rename and upload it +async def download_and_upload(session: ClientSession, x: str) -> None: + basename = x.split("/").pop() + # the tarball needs a regular name without :, the container needs pkg:tag + image_name = basename.replace("%3A", ":").replace("\n", "").replace(".tar.gz", "") + file_name = basename.replace("%3A", "_").replace("\n", "") + + async with session.get(x) as response: + with open(file_name, "wb") as file: + logged = 0 + loaded = 0 + while chunk := await response.content.read(256 * 1024): + file.write(chunk) + loaded += len(chunk) + if loaded - logged >= 50 * 1024 ** 2: + log("Downloaded %.0f MiB: %s", max(1, loaded / 1024 ** 2), x) + logged = loaded + log("Downloaded %.0f MiB: %s", max(1, loaded / 1024 ** 2), x) + + if x.endswith(".gz"): + # Container + log("uploading with skopeo: %s", file_name) + # This can fail, retry with 5 second delays + count = 0 + maxTries = 5 + success = False + QUAY_LOGIN = os.environ["QUAY_LOGIN"] + env = os.environ.copy() + # TODO: Fix skopeo package to find certificates on its own. + skopeo_path = which("skopeo") + if not skopeo_path: + raise RuntimeError("skopeo not found") + env["SSL_CERT_DIR"] = str(Path(skopeo_path).parents[1].joinpath("ssl")) + while count < maxTries: + try: + await async_exec( + "skopeo", + "--command-timeout", + "600s", + "copy", + f"docker-archive:{file_name}", + f"docker://quay.io/biocontainers/{image_name}", + "--dest-creds", + QUAY_LOGIN, + env=env, + ) + success = True + break + except: + count += 1 + if count == maxTries: + raise + await sleep(5) + if success: + await toggle_visibility(session, basename.split("%3A")[0]) + elif x.endswith(".bz2"): + # Package + log("uploading package") + ANACONDA_TOKEN = os.environ["ANACONDA_TOKEN"] + await async_exec("anaconda", "-t", ANACONDA_TOKEN, "upload", file_name, "--force") + + log("cleaning up") + os.remove(file_name) + + +# Upload artifacts to quay.io and anaconda, return the commit sha +# Only call this for mergeable PRs! +async def upload_artifacts(session: ClientSession, pr: int) -> str: + # Get last sha + pr_info = await get_pr_info(session, pr) + sha: str = pr_info["head"]["sha"] + + # Fetch the artifacts + artifacts = await fetch_pr_sha_artifacts(session, pr, sha) + artifacts = [artifact for artifact in artifacts if artifact.endswith((".gz", ".bz2"))] + assert artifacts + + # Download/upload Artifacts + for artifact in artifacts: + await download_and_upload(session, artifact) + + return sha + + +# Assume we have no more than 250 commits in a PR, which is probably reasonable in most cases +async def get_pr_commit_message(session: ClientSession, issue_number: int) -> str: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}/commits" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + commits = safe_load(res) + message = "".join(f" * {commit['commit']['message']}\n" for commit in reversed(commits)) + return message + + +# Merge a PR +async def merge_pr(session: ClientSession, pr: int) -> None: + token = os.environ["BOT_TOKEN"] + await send_comment( + session, + pr, + "I will attempt to upload artifacts and merge this PR. This may take some time, please have patience.", + ) + + try: + mergeable = await check_is_mergeable(session, pr) + log("mergeable state of %s is %s", pr, mergeable) + if not mergeable: + await send_comment(session, pr, "Sorry, this PR cannot be merged at this time.") + else: + log("uploading artifacts") + sha = await upload_artifacts(session, pr) + log("artifacts uploaded") + + # Carry over last 250 commit messages + msg = await get_pr_commit_message(session, pr) + + # Hit merge + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{pr}/merge" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + payload = { + "sha": sha, + "commit_title": f"[ci skip] Merge PR {pr}", + "commit_message": f"Merge PR #{pr}, commits were: \n{msg}", + "merge_method": "squash", + } + log("Putting merge commit") + async with session.put(url, headers=headers, json=payload) as response: + rc = response.status + log("body %s", payload) + log("merge_pr the response code was %s", rc) + except: + await send_comment( + session, + pr, + "I received an error uploading the build artifacts or merging the PR!", + ) + logger.exception("Upload failed", exc_info=True) + + +# Add the "Please review and merge" label to a PR +async def add_pr_label(session: ClientSession, pr: int) -> None: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/issues/{pr}/labels" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + payload = {"labels": ["please review & merge"]} + async with session.post(url, headers=headers, json=payload) as response: + response.raise_for_status() + + +async def gitter_message(session: ClientSession, msg: str) -> None: + token = os.environ["GITTER_TOKEN"] + room_id = "57f3b80cd73408ce4f2bba26" + url = f"https://api.gitter.im/v1/rooms/{room_id}/chatMessages" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + "User-Agent": "BiocondaCommentResponder", + } + payload = {"text": msg} + log("Sending request to %s", url) + async with session.post(url, headers=headers, json=payload) as response: + response.raise_for_status() + + +async def notify_ready(session: ClientSession, pr: int) -> None: + try: + await gitter_message( + session, + f"PR ready for review: https://github.com/bioconda/bioconda-recipes/pull/{pr}", + ) + except Exception: + logger.exception("Posting to Gitter failed", exc_info=True) + # Do not die if we can't post to gitter! + + +# This requires that a JOB_CONTEXT environment variable, which is made with `toJson(github)` +async def main() -> None: + job_context = safe_load(os.environ["JOB_CONTEXT"]) + log("%s", job_context) + if job_context["event"]["issue"].get("pull_request") is None: + return + issue_number = job_context["event"]["issue"]["number"] + + original_comment = job_context["event"]["comment"]["body"] + log("the comment is: %s", original_comment) + + comment = original_comment.lower() + async with ClientSession() as session: + if comment.startswith(("@bioconda-bot", "@biocondabot")): + if "please update" in comment: + await update_from_master(session, issue_number) + elif " hello" in comment: + await send_comment(session, issue_number, "Yes?") + elif " please fetch artifacts" in comment or " please fetch artefacts" in comment: + await artifact_checker(session, issue_number) + elif " please merge" in comment: + await send_comment(session, issue_number, "Sorry, I'm currently disabled") + #await merge_pr(session, issue_number) + elif " please add label" in comment: + await add_pr_label(session, issue_number) + await notify_ready(session, issue_number) + # else: + # # Methods in development can go below, flanked by checking who is running them + # if job_context["actor"] != "dpryan79": + # console.log("skipping") + # sys.exit(0) + elif "@bioconda/" in comment: + await comment_reposter( + session, job_context["actor"], issue_number, original_comment + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + run(main()) diff --git a/images/bioconda-utils-build-env-cos7/Dockerfile b/images/bioconda-utils-build-env-cos7/Dockerfile new file mode 100644 index 00000000000..55c2b309d7d --- /dev/null +++ b/images/bioconda-utils-build-env-cos7/Dockerfile @@ -0,0 +1,70 @@ +ARG base_image +FROM ${base_image} as base + +ARG BUSYBOX_IMAGE +COPY --from=${BUSYBOX_IMAGE} /usr/lib/locale/C.utf8 /usr/lib/locale/C.utf8 + +# Provide system deps unconditionally until we are able to offer per-recipe installs. +# (Addresses, e.g., "ImportError: libGL.so.1" in tests directly invoked by conda-build.) +# Also install packages that have been installed historically (openssh-client). +RUN yum install -y mesa-libGL-devel \ + && \ + yum install -y openssh-clients \ + && \ + yum install -y git \ + && \ + yum clean all && \ + rm -rf /var/cache/yum/* + +# This changes root's .condarc which ENTRYPOINT copies to /home/conda/.condarc later. +RUN . /opt/conda/etc/profile.d/conda.sh && \ + conda config \ + --add channels defaults \ + --add channels bioconda \ + --add channels conda-forge \ + && \ + { conda config --remove repodata_fns current_repodata.json 2> /dev/null || true ; } && \ + conda config --prepend repodata_fns repodata.json && \ + conda config --set channel_priority strict && \ + conda config --set auto_update_conda False + +FROM base as build +WORKDIR /tmp/repo +ARG BIOCONDA_UTILS_FOLDER=./bioconda-utils +COPY ${BIOCONDA_UTILS_FOLDER} ./ + +# Make sure we're using the configured version of bioconda-utils for this +# build. +RUN git checkout ${bioconda_utils_version} +RUN . /opt/conda/etc/profile.d/conda.sh && conda list +RUN . /opt/conda/etc/profile.d/conda.sh && conda activate base && \ + pip wheel . && \ + mkdir - /opt/bioconda-utils && \ + cp ./bioconda_utils-*.whl \ + ./bioconda_utils/bioconda_utils-requirements.txt \ + /opt/bioconda-utils/ \ + && \ + chgrp -R lucky /opt/bioconda-utils && \ + chmod -R g=u /opt/bioconda-utils + +FROM base +COPY --from=build /opt/bioconda-utils /opt/bioconda-utils +RUN . /opt/conda/etc/profile.d/conda.sh && conda activate base && \ + # Make sure we get the (working) conda we want before installing the rest. + sed -nE \ + '/^conda([>/d' recipe/meta.yaml \ +# && \ +# conda-build -m .ci_support/linux_64_.yaml recipe/ +ARG packages= +ARG python=3.8 +ARG prefix=/usr/local +RUN . /opt/create-env/env-activate.sh && \ + export CONDA_ADD_PIP_AS_PYTHON_DEPENDENCY=0 \ + && \ + create-env \ + --conda=mamba \ + --strip-files=\* \ + --remove-paths=\*.a \ + --remove-paths=\*.c \ + --remove-paths=\*.pyc \ + --remove-paths=\*.pyi \ + --remove-paths=\*.pyx \ + --remove-paths=\*.pyx \ + --remove-paths=include/\* \ + --remove-paths=share/doc/\* \ + --remove-paths=share/man/\* \ + --remove-paths='share/terminfo/[!x]/*' \ + --remove-paths=share/locale/\* \ + --remove-paths=lib/python*/ensurepip/\* \ + "${prefix}" \ + --channel=local \ + --channel=conda-forge \ + --override-channels \ + pip wheel setuptools \ + python="${python}" \ + aiohttp \ + ca-certificates \ + idna\<3 \ + pyyaml \ + ${packages} \ + && \ + # Remove tk since no tkinter & co. are needed. + conda remove \ + --yes \ + --force-remove \ + --prefix="${prefix}" \ + tk \ + && \ + # Get rid of Perl pulled in by Git. + # (Bot only uses non-Perl Git functionality => remove baggage.) + if conda list --prefix="${prefix}" | grep -q '^perl\s' ; then \ + conda remove \ + --yes \ + --force-remove \ + --prefix="${prefix}" \ + perl \ + ; fi +# Install bioconda_bot. +WORKDIR /tmp/bot +COPY . ./ +RUN . "${prefix}/env-activate.sh" && \ + pip wheel --no-deps . \ + && \ + pip install --no-deps --find-links . bioconda_bot + +FROM "${base}" +COPY --from=build /usr/local /usr/local diff --git a/images/bot/Dockerfile.test b/images/bot/Dockerfile.test new file mode 100644 index 00000000000..5a6fdcbbd5b --- /dev/null +++ b/images/bot/Dockerfile.test @@ -0,0 +1,9 @@ +ARG base +FROM "${base}" +RUN . /usr/local/env-activate.sh && \ + ls -lA /usr/local/conda-meta/*.json && \ + bioconda-bot --help && \ + bioconda-bot comment --help && \ + bioconda-bot merge --help && \ + bioconda-bot update --help && \ + bioconda-bot change --help diff --git a/images/bot/pyproject.toml b/images/bot/pyproject.toml new file mode 100644 index 00000000000..9787c3bdf00 --- /dev/null +++ b/images/bot/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/images/bot/setup.cfg b/images/bot/setup.cfg new file mode 100644 index 00000000000..749dfc7ed74 --- /dev/null +++ b/images/bot/setup.cfg @@ -0,0 +1,20 @@ +[metadata] +name = bioconda-bot +version = 0.0.1 + +[options] +python_requires = >=3.8 +install_requires = + aiohttp + PyYaml + +packages = find: +package_dir = + = src + +[options.packages.find] +where = src + +[options.entry_points] +console_scripts = + bioconda-bot = bioconda_bot.cli:main diff --git a/images/bot/src/bioconda_bot/__init__.py b/images/bot/src/bioconda_bot/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/images/bot/src/bioconda_bot/automerge.py b/images/bot/src/bioconda_bot/automerge.py new file mode 100644 index 00000000000..a09ee4148d0 --- /dev/null +++ b/images/bot/src/bioconda_bot/automerge.py @@ -0,0 +1,138 @@ +import logging +import os + +from typing import Any, Dict, List, Optional, Set, Tuple + +from aiohttp import ClientSession +from yaml import safe_load + +from .common import ( + get_job_context, + get_prs_for_sha, + get_sha_for_status_check, + get_sha_for_workflow_run, +) +from .merge import MergeState, request_merge + +logger = logging.getLogger(__name__) +log = logger.info + + +async def get_pr_labels(session: ClientSession, pr: int) -> Set[str]: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/issues/{pr}/labels" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + labels = safe_load(res) + return {label["name"] for label in labels} + + +async def is_automerge_labeled(session: ClientSession, pr: int) -> bool: + labels = await get_pr_labels(session, pr) + return "automerge" in labels + + +async def merge_if_labeled(session: ClientSession, pr: int) -> MergeState: + if not await is_automerge_labeled(session, pr): + return MergeState.UNKNOWN + return await request_merge(session, pr) + + +async def get_check_runs(session: ClientSession, sha: str) -> Any: + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/commits/{sha}/check-runs" + + headers = { + "User-Agent": "BiocondaCommentResponder", + "Accept": "application/vnd.github.antiope-preview+json", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + check_runs = [ + check_run + for check_run in safe_load(res)["check_runs"] or [] + if check_run["name"] != "bioconda-bot automerge" + ] + log("Got %d check_runs for SHA %s", len(check_runs or []), sha) + return check_runs + + +async def all_checks_completed(session: ClientSession, sha: str) -> bool: + check_runs = await get_check_runs(session, sha) + + is_all_completed = all(check_run["status"] == "completed" for check_run in check_runs) + if not is_all_completed: + log("Some check_runs are not completed yet.") + for i, check_run in enumerate(check_runs, 1): + log("check_run %d / %d: %s", i, len(check_runs), check_run) + return is_all_completed + + +async def all_checks_passed(session: ClientSession, sha: str) -> bool: + check_runs = await get_check_runs(session, sha) + + # TODO: "neutral" might be a valid conclusion to consider in the future. + valid_conclusions = {"success", "skipped"} + if any(check_run["conclusion"] not in valid_conclusions for check_run in check_runs): + log(f"Some check_runs are not marked as {'/'.join(valid_conclusions)} yet.") + for i, check_run in enumerate(check_runs, 1): + log("check_run %d / %d: %s", i, len(check_runs), check_run) + return False + return True + + +async def merge_automerge_passed(sha: str) -> None: + async with ClientSession() as session: + if not await all_checks_passed(session, sha): + return + prs = await get_prs_for_sha(session, sha) + if not prs: + log("No PRs found for SHA %s", sha) + for pr in prs: + merge_state = await merge_if_labeled(session, pr) + log("PR %d has merge state %s", pr, merge_state) + if merge_state is MergeState.MERGED: + break + + +async def get_sha_for_review(job_context: Dict[str, Any]) -> Optional[str]: + if job_context["event_name"] != "pull_request_review": + return None + log("Got %s event", "pull_request_review") + event = job_context["event"] + if event["review"]["state"] != "approved": + return None + sha: Optional[str] = event["pull_request"]["head"]["sha"] + log("Use %s event SHA %s", "pull_request_review", sha) + return sha + + +async def get_sha_for_labeled_pr(job_context: Dict[str, Any]) -> Optional[str]: + if job_context["event_name"] != "pull_request": + return None + log("Got %s event", "pull_request") + event = job_context["event"] + if event["action"] != "labeled" or event["label"]["name"] != "automerge": + return None + sha: Optional[str] = event["pull_request"]["head"]["sha"] + log("Use %s event SHA %s", "pull_request", sha) + return sha + + +# This requires that a JOB_CONTEXT environment variable, which is made with `toJson(github)` +async def main() -> None: + job_context = await get_job_context() + + sha = ( + await get_sha_for_status_check(job_context) + or await get_sha_for_workflow_run(job_context) + or await get_sha_for_review(job_context) + or await get_sha_for_labeled_pr(job_context) + ) + if sha: + await merge_automerge_passed(sha) diff --git a/images/bot/src/bioconda_bot/changeVisibility.py b/images/bot/src/bioconda_bot/changeVisibility.py new file mode 100644 index 00000000000..ba036f83479 --- /dev/null +++ b/images/bot/src/bioconda_bot/changeVisibility.py @@ -0,0 +1,63 @@ +import logging +import os +import re +import sys +from asyncio import gather, sleep +from asyncio.subprocess import create_subprocess_exec +from enum import Enum, auto +from pathlib import Path +from shutil import which +from typing import Any, Dict, List, Optional, Set, Tuple +from zipfile import ZipFile, ZipInfo + +from aiohttp import ClientSession +from yaml import safe_load + +from .common import ( + async_exec, + fetch_pr_sha_artifacts, + get_job_context, + get_pr_comment, + get_pr_info, + is_bioconda_member, + send_comment, +) + +logger = logging.getLogger(__name__) +log = logger.info + + +# Ensure uploaded containers are in repos that have public visibility +# TODO: This should ping @bioconda/core if it fails +async def toggle_visibility(session: ClientSession, container_repo: str) -> None: + url = f"https://quay.io/api/v1/repository/biocontainers/{container_repo}/changevisibility" + QUAY_OAUTH_TOKEN = os.environ["QUAY_OAUTH_TOKEN"] + headers = { + "Authorization": f"Bearer {QUAY_OAUTH_TOKEN}", + "Content-Type": "application/json", + } + body = {"visibility": "public"} + rc = 0 + try: + async with session.post(url, headers=headers, json=body) as response: + rc = response.status + except: + # Do nothing + pass + log("Trying to toggle visibility (%s) returned %d", url, rc) + + +# This requires that a JOB_CONTEXT environment variable, which is made with `toJson(github)` +async def main() -> None: + job_context = await get_job_context() + issue_number, original_comment = await get_pr_comment(job_context) + if issue_number is None or original_comment is None: + return + + comment = original_comment.lower() + if comment.startswith(("@bioconda-bot", "@biocondabot")): + if " please toggle visibility" in comment: + pkg = comment.split("please change visibility")[1].strip().split()[0] + async with ClientSession() as session: + await toggle_visibility(session, pkg) + await send_comment(session, issue_number, "Visibility changed.") diff --git a/images/bot/src/bioconda_bot/cli.py b/images/bot/src/bioconda_bot/cli.py new file mode 100644 index 00000000000..a88601d5370 --- /dev/null +++ b/images/bot/src/bioconda_bot/cli.py @@ -0,0 +1,81 @@ +from logging import INFO, basicConfig + +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +from asyncio import run +from typing import List, Optional + + +def build_parser_comment(parser: ArgumentParser) -> None: + def run_command() -> None: + from .comment import main as main_ + + run(main_()) + + parser.set_defaults(run_command=run_command) + + +def build_parser_merge(parser: ArgumentParser) -> None: + def run_command() -> None: + from .merge import main as main_ + + run(main_()) + + parser.set_defaults(run_command=run_command) + + +def build_parser_update(parser: ArgumentParser) -> None: + def run_command() -> None: + from .update import main as main_ + + run(main_()) + + parser.set_defaults(run_command=run_command) + + +def build_parser_automerge(parser: ArgumentParser) -> None: + def run_command() -> None: + from .automerge import main as main_ + + run(main_()) + + parser.set_defaults(run_command=run_command) + + +def build_parser_changeVisibility(parser: ArgumentParser) -> None: + def run_command() -> None: + from .changeVisibility import main as main_ + + run(main_()) + + parser.set_defaults(run_command=run_command) + + +def get_argument_parser() -> ArgumentParser: + parser = ArgumentParser( + prog="bioconda-bot", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + sub_parsers = parser.add_subparsers( + dest="command", + required=True, + ) + for command_name, build_parser in ( + ("comment", build_parser_comment), + ("merge", build_parser_merge), + ("update", build_parser_update), + ("automerge", build_parser_automerge), + ("change", build_parser_changeVisibility), + ): + sub_parser = sub_parsers.add_parser( + command_name, + formatter_class=ArgumentDefaultsHelpFormatter, + ) + build_parser(sub_parser) + return parser + + +def main(args: Optional[List[str]] = None) -> None: + basicConfig(level=INFO) + parser = get_argument_parser() + parsed_args = parser.parse_args(args) + parsed_args.run_command() diff --git a/images/bot/src/bioconda_bot/comment.py b/images/bot/src/bioconda_bot/comment.py new file mode 100644 index 00000000000..eb9e13fb7b0 --- /dev/null +++ b/images/bot/src/bioconda_bot/comment.py @@ -0,0 +1,197 @@ +import logging +import os +import re + +from aiohttp import ClientSession +from yaml import safe_load + +from .common import ( + async_exec, + fetch_pr_sha_artifacts, + get_job_context, + get_pr_comment, + get_pr_info, + get_prs_for_sha, + get_sha_for_status_check, + is_bioconda_member, + send_comment, +) + +logger = logging.getLogger(__name__) +log = logger.info + + +# Given a PR and commit sha, post a comment with any artifacts +async def make_artifact_comment(session: ClientSession, pr: int, sha: str) -> None: + artifacts = await fetch_pr_sha_artifacts(session, pr, sha) + nPackages = len(artifacts) + + if nPackages > 0: + comment = "Package(s) built on Azure are ready for inspection:\n\n" + comment += "Arch | Package | Zip File\n-----|---------|---------\n" + install_noarch = "" + install_linux = "" + install_osx = "" + + # Table of packages and repodata.json + for URL, artifact in artifacts: + if not (package_match := re.match(r"^((.+)\/(.+)\/(.+)\/(.+\.tar\.bz2))$", artifact)): + continue + url, archdir, basedir, subdir, packageName = package_match.groups() + urlBase = URL[:-3] # trim off zip from format= + urlBase += "file&subPath=%2F{}".format("%2F".join([basedir, subdir])) + conda_install_url = urlBase + # N.B., the zip file URL is nearly identical to the URL for the individual member files. It's unclear if there's an API for getting the correct URL to the files themselves + #pkgUrl = "%2F".join([urlBase, packageName]) + #repoUrl = "%2F".join([urlBase, "current_repodata.json"]) + #resp = await session.get(repoUrl) + + if subdir == "noarch": + comment += "noarch |" + elif subdir == "linux-64": + comment += "linux-64 |" + elif subdir == "linux-aarch64": + comment += "linux-aarch64 |" + else: + comment += "osx-64 |" + comment += f" {packageName} | [{archdir}]({URL})\n" + + # Conda install examples + comment += "***\n\nYou may also use `conda` to install these after downloading and extracting the appropriate zip file. From the LinuxArtifacts or OSXArtifacts directories:\n\n" + comment += "```\nconda install -c ./packages \n```\n" + + # Table of containers + comment += "***\n\nDocker image(s) built (images are in the LinuxArtifacts zip file above):\n\n" + comment += "Package | Tag | Install with `docker`\n" + comment += "--------|-----|----------------------\n" + + for URL, artifact in artifacts: + if artifact.endswith(".tar.gz"): + image_name = artifact.split("/").pop()[: -len(".tar.gz")] + if ':' in image_name: + package_name, tag = image_name.split(':', 1) + #image_url = URL[:-3] # trim off zip from format= + #image_url += "file&subPath=%2F{}.tar.gz".format("%2F".join(["images", '%3A'.join([package_name, tag])])) + comment += f"{package_name} | {tag} | " + comment += f'
show`gzip -dc LinuxArtifacts/images/{image_name}.tar.gz \\| docker load`\n' + comment += "\n\n" + else: + comment = ( + "No artifacts found on the most recent Azure build. " + "Either the build failed, the artifacts have were removed due to age, or the recipe was blacklisted/skipped." + ) + await send_comment(session, pr, comment) + + +# Post a comment on a given PR with its CircleCI artifacts +async def artifact_checker(session: ClientSession, issue_number: int) -> None: + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}" + headers = { + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + pr_info = safe_load(res) + + await make_artifact_comment(session, issue_number, pr_info["head"]["sha"]) + + +# Reposts a quoted message in a given issue/PR if the user isn't a bioconda member +async def comment_reposter(session: ClientSession, user: str, pr: int, message: str) -> None: + if await is_bioconda_member(session, user): + log("Not reposting for %s", user) + return + log("Reposting for %s", user) + await send_comment( + session, + pr, + f"Reposting for @{user} to enable pings (courtesy of the BiocondaBot):\n\n> {message}", + ) + + +# Add the "Please review and merge" label to a PR +async def add_pr_label(session: ClientSession, pr: int) -> None: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/issues/{pr}/labels" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + payload = {"labels": ["please review & merge"]} + async with session.post(url, headers=headers, json=payload) as response: + response.raise_for_status() + + +async def gitter_message(session: ClientSession, msg: str) -> None: + token = os.environ["GITTER_TOKEN"] + room_id = "57f3b80cd73408ce4f2bba26" + url = f"https://api.gitter.im/v1/rooms/{room_id}/chatMessages" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + "User-Agent": "BiocondaCommentResponder", + } + payload = {"text": msg} + log("Sending request to %s", url) + async with session.post(url, headers=headers, json=payload) as response: + response.raise_for_status() + + +async def notify_ready(session: ClientSession, pr: int) -> None: + try: + await gitter_message( + session, + f"PR ready for review: https://github.com/bioconda/bioconda-recipes/pull/{pr}", + ) + except Exception: + logger.exception("Posting to Gitter failed", exc_info=True) + # Do not die if we can't post to gitter! + + +# This requires that a JOB_CONTEXT environment variable, which is made with `toJson(github)` +async def main() -> None: + job_context = await get_job_context() + + sha = await get_sha_for_status_check(job_context) + if sha: + # This is a successful status or check_suite event => post artifact lists. + async with ClientSession() as session: + for pr in await get_prs_for_sha(session, sha): + await artifact_checker(session, pr) + return + + issue_number, original_comment = await get_pr_comment(job_context) + if issue_number is None or original_comment is None: + return + + comment = original_comment.lower() + async with ClientSession() as session: + if comment.startswith(("@bioconda-bot", "@biocondabot")): + if "please update" in comment: + log("This should have been directly invoked via bioconda-bot-update") + from .update import update_from_master + + await update_from_master(session, issue_number) + elif " hello" in comment: + await send_comment(session, issue_number, "Yes?") + elif " please fetch artifacts" in comment or " please fetch artefacts" in comment: + await artifact_checker(session, issue_number) + #elif " please merge" in comment: + # await send_comment(session, issue_number, "Sorry, I'm currently disabled") + # #log("This should have been directly invoked via bioconda-bot-merge") + # #from .merge import request_merge + # #await request_merge(session, issue_number) + elif " please add label" in comment: + await add_pr_label(session, issue_number) + await notify_ready(session, issue_number) + # else: + # # Methods in development can go below, flanked by checking who is running them + # if job_context["actor"] != "dpryan79": + # console.log("skipping") + # sys.exit(0) + elif "@bioconda/" in comment: + await comment_reposter( + session, job_context["actor"], issue_number, original_comment + ) diff --git a/images/bot/src/bioconda_bot/common.py b/images/bot/src/bioconda_bot/common.py new file mode 100644 index 00000000000..565674fdd00 --- /dev/null +++ b/images/bot/src/bioconda_bot/common.py @@ -0,0 +1,249 @@ +import logging +import os +import re +import sys +from asyncio import gather, sleep +from asyncio.subprocess import create_subprocess_exec +from pathlib import Path +from shutil import which +from typing import Any, Dict, List, Optional, Set, Tuple +from zipfile import ZipFile + +from aiohttp import ClientSession +from yaml import safe_load + +logger = logging.getLogger(__name__) +log = logger.info + + +async def async_exec( + command: str, *arguments: str, env: Optional[Dict[str, str]] = None +) -> None: + process = await create_subprocess_exec(command, *arguments, env=env) + return_code = await process.wait() + if return_code != 0: + raise RuntimeError( + f"Failed to execute {command} {arguments} (return code: {return_code})" + ) + + +# Post a comment on a given issue/PR with text in message +async def send_comment(session: ClientSession, issue_number: int, message: str) -> None: + token = os.environ["BOT_TOKEN"] + url = ( + f"https://api.github.com/repos/bioconda/bioconda-recipes/issues/{issue_number}/comments" + ) + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + payload = {"body": message} + log("Sending comment: url=%s", url) + log("Sending comment: payload=%s", payload) + async with session.post(url, headers=headers, json=payload) as response: + status_code = response.status + log("the response code was %d", status_code) + if status_code < 200 or status_code > 202: + sys.exit(1) + + +# Return true if a user is a member of bioconda +async def is_bioconda_member(session: ClientSession, user: str) -> bool: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/orgs/bioconda/members/{user}" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + rc = 404 + async with session.get(url, headers=headers) as response: + try: + response.raise_for_status() + rc = response.status + except: + # Do nothing, this just prevents things from crashing on 404 + pass + + return rc == 204 + + +# Fetch and return the JSON of a PR +# This can be run to trigger a test merge +async def get_pr_info(session: ClientSession, pr: int) -> Any: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{pr}" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + pr_info = safe_load(res) + return pr_info + + +def list_zip_contents(fname: str) -> [str]: + f = ZipFile(fname) + return [e.filename for e in f.infolist() if e.filename.endswith('.tar.gz') or e.filename.endswith('.tar.bz2')] + + +# Download a zip file from url to zipName.zip and return that path +# Timeout is 30 minutes to compensate for any network issues +async def download_file(session: ClientSession, zipName: str, url: str) -> str: + async with session.get(url, timeout=60*30) as response: + if response.status == 200: + ofile = f"{zipName}.zip" + with open(ofile, 'wb') as fd: + while True: + chunk = await response.content.read(1024*1024*1024) + if not chunk: + break + fd.write(chunk) + return ofile + return None + + +# Find artifact zip files, download them and return their URLs and contents +async def fetch_azure_zip_files(session: ClientSession, buildId: str) -> [(str, str)]: + artifacts = [] + + url = f"https://dev.azure.com/bioconda/bioconda-recipes/_apis/build/builds/{buildId}/artifacts?api-version=4.1" + log("contacting azure %s", url) + async with session.get(url) as response: + # Sometimes we get a 301 error, so there are no longer artifacts available + if response.status == 301: + return artifacts + res = await response.text() + + res_object = safe_load(res) + if res_object['count'] == 0: + return artifacts + + for artifact in res_object['value']: + zipName = artifact['name'] # LinuxArtifacts or OSXArtifacts + zipUrl = artifact['resource']['downloadUrl'] + log(f"zip name is {zipName} url {zipUrl}") + fname = await download_file(session, zipName, zipUrl) + if not fname: + continue + pkgsImages = list_zip_contents(fname) + for pkg in pkgsImages: + artifacts.append((zipUrl, pkg)) + + return artifacts + + +def parse_azure_build_id(url: str) -> str: + return re.search("buildId=(\d+)", url).group(1) + + +# Given a PR and commit sha, fetch a list of the artifact zip files URLs and their contents +async def fetch_pr_sha_artifacts(session: ClientSession, pr: int, sha: str) -> List[Tuple[str, str]]: + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/commits/{sha}/check-runs" + + headers = { + "User-Agent": "BiocondaCommentResponder", + "Accept": "application/vnd.github.antiope-preview+json", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + check_runs = safe_load(res) + + for check_run in check_runs["check_runs"]: + # The names are "bioconda.bioconda-recipes (test_osx test_osx)" or similar + if check_run["name"].startswith("bioconda.bioconda-recipes (test_"): + # The azure build ID is in the details_url as buildId=\d+ + buildID = parse_azure_build_id(check_run["details_url"]) + zipFiles = await fetch_azure_zip_files(session, buildID) + return zipFiles # We've already fetched all possible artifacts + + return [] + + +async def get_sha_for_status(job_context: Dict[str, Any]) -> Optional[str]: + if job_context["event_name"] != "status": + return None + log("Got %s event", "status") + event = job_context["event"] + if event["state"] != "success": + return None + branches = event.get("branches") + if not branches: + return None + sha: Optional[str] = branches[0]["commit"]["sha"] + log("Use %s event SHA %s", "status", sha) + return sha + + +async def get_sha_for_check_suite_or_workflow( + job_context: Dict[str, Any], event_name: str +) -> Optional[str]: + if job_context["event_name"] != event_name: + return None + log("Got %s event", event_name) + event_source = job_context["event"][event_name] + if event_source["conclusion"] != "success": + return None + sha: Optional[str] = event_source.get("head_sha") + if not sha: + pull_requests = event_source.get("pull_requests") + if pull_requests: + sha = pull_requests[0]["head"]["sha"] + if not sha: + return None + log("Use %s event SHA %s", event_name, sha) + return sha + + +async def get_sha_for_check_suite(job_context: Dict[str, Any]) -> Optional[str]: + return await get_sha_for_check_suite_or_workflow(job_context, "check_suite") + + +async def get_sha_for_workflow_run(job_context: Dict[str, Any]) -> Optional[str]: + return await get_sha_for_check_suite_or_workflow(job_context, "workflow_run") + + +async def get_prs_for_sha(session: ClientSession, sha: str) -> List[int]: + headers = { + "User-Agent": "BiocondaCommentResponder", + "Accept": "application/vnd.github.v3+json", + } + pr_numbers: List[int] = [] + per_page = 100 + for page in range(1, 20): + url = ( + "https://api.github.com/repos/bioconda/bioconda-recipes/pulls" + f"?per_page={per_page}" + f"&page={page}" + ) + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + prs = safe_load(res) + pr_numbers.extend(pr["number"] for pr in prs if pr["head"]["sha"] == sha) + if len(prs) < per_page: + break + return pr_numbers + + +async def get_sha_for_status_check(job_context: Dict[str, Any]) -> Optional[str]: + return await get_sha_for_status(job_context) or await get_sha_for_check_suite(job_context) + + +async def get_job_context() -> Any: + job_context = safe_load(os.environ["JOB_CONTEXT"]) + log("%s", job_context) + return job_context + + +async def get_pr_comment(job_context: Dict[str, Any]) -> Tuple[Optional[int], Optional[str]]: + event = job_context["event"] + if event["issue"].get("pull_request") is None: + return None, None + issue_number = event["issue"]["number"] + + original_comment = event["comment"]["body"] + log("the comment is: %s", original_comment) + return issue_number, original_comment diff --git a/images/bot/src/bioconda_bot/merge.py b/images/bot/src/bioconda_bot/merge.py new file mode 100644 index 00000000000..455c7f31d39 --- /dev/null +++ b/images/bot/src/bioconda_bot/merge.py @@ -0,0 +1,371 @@ +import logging +import os +import re +import sys +from asyncio import gather, sleep +from asyncio.subprocess import create_subprocess_exec +from enum import Enum, auto +from pathlib import Path +from shutil import which +from typing import Any, Dict, List, Optional, Set, Tuple +from zipfile import ZipFile, ZipInfo + +from aiohttp import ClientSession +from yaml import safe_load + +from .common import ( + async_exec, + fetch_pr_sha_artifacts, + get_job_context, + get_pr_comment, + get_pr_info, + is_bioconda_member, + send_comment, +) + +logger = logging.getLogger(__name__) +log = logger.info + + +class MergeState(Enum): + UNKNOWN = auto() + MERGEABLE = auto() + NOT_MERGEABLE = auto() + NEEDS_REVIEW = auto() + MERGED = auto() + + +# Ensure there's at least one approval by a member +async def approval_review(session: ClientSession, issue_number: int) -> bool: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}/reviews" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + reviews = safe_load(res) + + approved_reviews = [review for review in reviews if review["state"] == "APPROVED"] + if not approved_reviews: + return False + + # Ensure the review author is a member + return any( + gather( + *( + is_bioconda_member(session, review["user"]["login"]) + for review in approved_reviews + ) + ) + ) + + +# Check the mergeable state of a PR +async def check_is_mergeable( + session: ClientSession, issue_number: int, second_try: bool = False +) -> MergeState: + token = os.environ["BOT_TOKEN"] + # Sleep a couple of seconds to allow the background process to finish + if second_try: + await sleep(3) + + # PR info + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + pr_info = safe_load(res) + + if pr_info.get("merged"): + return MergeState.MERGED + + # We need mergeable == true and mergeable_state == clean, an approval by a member and + if pr_info.get("mergeable") is None and not second_try: + return await check_is_mergeable(session, issue_number, True) + + # Check approved reviews beforehand because we (somehow?) get NOT_MERGEABLE otherwise. + if not await approval_review(session, issue_number): + return MergeState.NEEDS_REVIEW + + if ( + pr_info.get("mergeable") is None + or not pr_info["mergeable"] + or pr_info["mergeable_state"] != "clean" + ): + return MergeState.NOT_MERGEABLE + + return MergeState.MERGEABLE + + +# Ensure uploaded containers are in repos that have public visibility +# TODO: This should ping @bioconda/core if it fails +async def toggle_visibility(session: ClientSession, container_repo: str) -> None: + url = f"https://quay.io/api/v1/repository/biocontainers/{container_repo}/changevisibility" + QUAY_OAUTH_TOKEN = os.environ["QUAY_OAUTH_TOKEN"] + headers = { + "Authorization": f"Bearer {QUAY_OAUTH_TOKEN}", + "Content-Type": "application/json", + } + body = {"visibility": "public"} + rc = 0 + try: + async with session.post(url, headers=headers, json=body) as response: + rc = response.status + except: + # Do nothing + pass + log("Trying to toggle visibility (%s) returned %d", url, rc) + + +## Download an artifact from CircleCI, rename and upload it +#async def download_and_upload(session: ClientSession, x: str) -> None: +# basename = x.split("/").pop() +# # the tarball needs a regular name without :, the container needs pkg:tag +# image_name = basename.replace("%3A", ":").replace("\n", "").replace(".tar.gz", "") +# file_name = basename.replace("%3A", "_").replace("\n", "") +# +# async with session.get(x) as response: +# with open(file_name, "wb") as file: +# logged = 0 +# loaded = 0 +# while chunk := await response.content.read(256 * 1024): +# file.write(chunk) +# loaded += len(chunk) +# if loaded - logged >= 50 * 1024 ** 2: +# log("Downloaded %.0f MiB: %s", max(1, loaded / 1024 ** 2), x) +# logged = loaded +# log("Downloaded %.0f MiB: %s", max(1, loaded / 1024 ** 2), x) +# +# if x.endswith(".gz"): +# # Container +# log("uploading with skopeo: %s", file_name) +# # This can fail, retry with 5 second delays +# count = 0 +# maxTries = 5 +# success = False +# QUAY_LOGIN = os.environ["QUAY_LOGIN"] +# env = os.environ.copy() +# # TODO: Fix skopeo package to find certificates on its own. +# skopeo_path = which("skopeo") +# if not skopeo_path: +# raise RuntimeError("skopeo not found") +# env["SSL_CERT_DIR"] = str(Path(skopeo_path).parents[1].joinpath("ssl")) +# while count < maxTries: +# try: +# await async_exec( +# "skopeo", +# "--command-timeout", +# "600s", +# "copy", +# f"docker-archive:{file_name}", +# f"docker://quay.io/biocontainers/{image_name}", +# "--dest-creds", +# QUAY_LOGIN, +# env=env, +# ) +# success = True +# break +# except: +# count += 1 +# if count == maxTries: +# raise +# await sleep(5) +# if success: +# await toggle_visibility(session, basename.split("%3A")[0]) +# elif x.endswith(".bz2"): +# # Package +# log("uploading package") +# ANACONDA_TOKEN = os.environ["ANACONDA_TOKEN"] +# await async_exec("anaconda", "-t", ANACONDA_TOKEN, "upload", file_name, "--force") +# +# log("cleaning up") +# os.remove(file_name) + + +async def upload_package(session: ClientSession, zf: ZipFile, e: ZipInfo): + log(f"extracting {e.filename}") + fName = zf.extract(e) + + log(f"uploading {fName}") + ANACONDA_TOKEN = os.environ["ANACONDA_TOKEN"] + await async_exec("anaconda", "-t", ANACONDA_TOKEN, "upload", fName, "--force") + + log("cleaning up") + os.remove(fName) + + +async def upload_image(session: ClientSession, zf: ZipFile, e: ZipInfo): + basename = e.filename.split("/").pop() + image_name = basename.replace("\n", "").replace(".tar.gz", "") + + log(f"extracting {e.filename}") + fName = zf.extract(e) + # Skopeo can't handle a : in the file name, so we need to remove it + newFName = fName.replace(":", "") + os.rename(fName, newFName) + + log(f"uploading with skopeo: {newFName} {image_name}") + # This can fail, retry with 5 second delays + count = 0 + maxTries = 5 + success = False + QUAY_LOGIN = os.environ["QUAY_LOGIN"] + env = os.environ.copy() + # TODO: Fix skopeo package to find certificates on its own. + skopeo_path = which("skopeo") + if not skopeo_path: + raise RuntimeError("skopeo not found") + env["SSL_CERT_DIR"] = str(Path(skopeo_path).parents[1].joinpath("ssl")) + while count < maxTries: + try: + await async_exec( + "skopeo", + "--command-timeout", + "600s", + "copy", + f"docker-archive:{newFName}", + f"docker://quay.io/biocontainers/{image_name}", + "--dest-creds", + QUAY_LOGIN, + env=env, + ) + success = True + break + except: + count += 1 + if count == maxTries: + raise + await sleep(5) + if success: + await toggle_visibility(session, basename.split(":")[0] if ":" in basename else basename.split("%3A")[0]) + + log("cleaning up") + os.remove(newFName) + + +# Given an already downloaded zip file name in the current working directory, upload the contents +async def extract_and_upload(session: ClientSession, fName: str) -> int: + if os.path.exists(fName): + zf = ZipFile(fName) + for e in zf.infolist(): + if e.filename.endswith('.tar.bz2'): + await upload_package(session, zf, e) + elif e.filename.endswith('.tar.gz'): + await upload_image(session, zf, e) + return 0 + return 1 + + +# Upload artifacts to quay.io and anaconda, return the commit sha +# Only call this for mergeable PRs! +async def upload_artifacts(session: ClientSession, pr: int) -> str: + # Get last sha + pr_info = await get_pr_info(session, pr) + sha: str = pr_info["head"]["sha"] + + # Fetch the artifacts (a list of (URL, artifact) tuples actually) + artifacts = await fetch_pr_sha_artifacts(session, pr, sha) + artifacts = [artifact for (URL, artifact) in artifacts if artifact.endswith((".gz", ".bz2"))] + assert artifacts + + # Download/upload Artifacts + for zipFileName in ["LinuxArtifacts.zip", "OSXArtifacts.zip"]: + await extract_and_upload(session, zipFileName) + + return sha + + +# Assume we have no more than 250 commits in a PR, which is probably reasonable in most cases +async def get_pr_commit_message(session: ClientSession, issue_number: int) -> str: + token = os.environ["BOT_TOKEN"] + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{issue_number}/commits" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + async with session.get(url, headers=headers) as response: + response.raise_for_status() + res = await response.text() + commits = safe_load(res) + message = "".join(f" * {commit['commit']['message']}\n" for commit in reversed(commits)) + return message + + +# Merge a PR +async def merge_pr(session: ClientSession, pr: int, init_message: str) -> MergeState: + token = os.environ["BOT_TOKEN"] + mergeable = await check_is_mergeable(session, pr) + log("mergeable state of %s is %s", pr, mergeable) + if mergeable is not MergeState.MERGEABLE: + return mergeable + + if init_message: + await send_comment(session, pr, init_message) + try: + log("uploading artifacts") + sha = await upload_artifacts(session, pr) + log("artifacts uploaded") + + # Carry over last 250 commit messages + msg = await get_pr_commit_message(session, pr) + + # Hit merge + url = f"https://api.github.com/repos/bioconda/bioconda-recipes/pulls/{pr}/merge" + headers = { + "Authorization": f"token {token}", + "User-Agent": "BiocondaCommentResponder", + } + payload = { + "sha": sha, + "commit_title": f"[ci skip] Merge PR {pr}", + "commit_message": f"Merge PR #{pr}, commits were: \n{msg}", + "merge_method": "squash", + } + log("Putting merge commit") + async with session.put(url, headers=headers, json=payload) as response: + rc = response.status + log("body %s", payload) + log("merge_pr the response code was %s", rc) + except: + await send_comment( + session, + pr, + "I received an error uploading the build artifacts or merging the PR!", + ) + logger.exception("Upload failed", exc_info=True) + return MergeState.MERGED + + +async def request_merge(session: ClientSession, pr: int) -> MergeState: + init_message = "I will attempt to upload artifacts and merge this PR. This may take some time, please have patience." + merged = await merge_pr(session, pr, init_message) + if merged is MergeState.NEEDS_REVIEW: + await send_comment( + session, + pr, + "Sorry, this PR cannot be merged until it's approved by a Bioconda member.", + ) + elif merged is MergeState.NOT_MERGEABLE: + await send_comment(session, pr, "Sorry, this PR cannot be merged at this time.") + return merged + + +# This requires that a JOB_CONTEXT environment variable, which is made with `toJson(github)` +async def main() -> None: + job_context = await get_job_context() + issue_number, original_comment = await get_pr_comment(job_context) + if issue_number is None or original_comment is None: + return + + comment = original_comment.lower() + if comment.startswith(("@bioconda-bot", "@biocondabot")): + if " please merge" in comment: + async with ClientSession() as session: + await request_merge(session, issue_number) diff --git a/images/bot/src/bioconda_bot/update.py b/images/bot/src/bioconda_bot/update.py new file mode 100644 index 00000000000..0af1f8db09e --- /dev/null +++ b/images/bot/src/bioconda_bot/update.py @@ -0,0 +1,78 @@ +import logging +import sys + +from aiohttp import ClientSession + +from .common import ( + async_exec, + get_job_context, + get_pr_comment, + get_pr_info, + send_comment, +) + +logger = logging.getLogger(__name__) +log = logger.info + + +# Update a branch from upstream master, this should be run in a try/catch +async def update_from_master_runner(session: ClientSession, pr: int) -> None: + async def git(*args: str) -> None: + return await async_exec("git", *args) + + # Setup git, otherwise we can't push + await git("config", "--global", "user.email", "biocondabot@gmail.com") + await git("config", "--global", "user.name", "BiocondaBot") + + pr_info = await get_pr_info(session, pr) + remote_branch = pr_info["head"]["ref"] + remote_repo = pr_info["head"]["repo"]["full_name"] + + max_depth = 2000 + # Clone + await git( + "clone", + f"--depth={max_depth}", + f"--branch={remote_branch}", + f"git@github.com:{remote_repo}.git", + "bioconda-recipes", + ) + + async def git_c(*args: str) -> None: + return await git("-C", "bioconda-recipes", *args) + + # Add/pull upstream + await git_c("remote", "add", "upstream", "https://github.com/bioconda/bioconda-recipes") + await git_c("fetch", f"--depth={max_depth}", "upstream", "master") + + # Merge + await git_c("merge", "upstream/master") + + await git_c("push") + + +# Merge the upstream master branch into a PR branch, leave a message on error +async def update_from_master(session: ClientSession, pr: int) -> None: + try: + await update_from_master_runner(session, pr) + except Exception as e: + await send_comment( + session, + pr, + "I encountered an error updating your PR branch. You can report this to bioconda/core if you'd like.\n-The Bot", + ) + sys.exit(1) + + +# This requires that a JOB_CONTEXT environment variable, which is made with `toJson(github)` +async def main() -> None: + job_context = await get_job_context() + issue_number, original_comment = await get_pr_comment(job_context) + if issue_number is None or original_comment is None: + return + + comment = original_comment.lower() + if comment.startswith(("@bioconda-bot", "@biocondabot")): + if "please update" in comment: + async with ClientSession() as session: + await update_from_master(session, issue_number) diff --git a/images/create-env/CHANGELOG.md b/images/create-env/CHANGELOG.md new file mode 100644 index 00000000000..cd5a8d32db5 --- /dev/null +++ b/images/create-env/CHANGELOG.md @@ -0,0 +1,152 @@ +# Changelog + + +## bioconda/create-env 3.0 (2023-10-17) + +### Changed + +- Add linux-aarch64 image; bioconda/create-env is now a multiplatform manifest. + +- Change to a simple "major.minor" version scheme and offer mutable "major" tag. + +- Drop defaults channel from included config. + +- Use Miniforge installer to build this image. + +- Rebuilt on the latest base image with Debian 12.2 / BusyBox 1.36.1. + +- Do not install findutils, sed if provided by the base image (as is currently). + + +## bioconda/create-env 2.2.1 (2022-10-14) + +### Changed + +- Limit open fd (ulimit -n) for strip (small number chosen arbitrarily). + + The container image itself had unstripped binaries in 2.2.0. + + +## bioconda/create-env 2.2.0 (2022-10-14) + +### Changed + +- Use the exact conda, mamba versions as used in bioconda-recipes' builds. + + +## bioconda/create-env 2.1.0 (2021-04-14) + +### Changed + +- Copy instead of hardlink licenses, exit on error + + Hardlink fails if copying spans cross devices (e.g., via bound volumes). + + +## bioconda/create-env 2.0.0 (2021-04-13) + +### Changed + +- Rename `--remove-files` to `--remove-paths` + +- Replace `--strip` by `--strip-files=GLOB` + +- Replace `CONDA_ALWAYS_COPY=1` usage by config option + +- Use `/bin/bash` for entrypoints + + `/bin/sh` fails on some Conda packages' activations scripts' Bashisms. + + +## bioconda/create-env 1.2.1 (2021-04-09) + +### Fixed + +- Fail `--strip` if `strip` is not available + +### Changed + +- Delete links/dirs for `--remove-files` + + +## bioconda/create-env 1.2.0 (2021-03-30) + +### Added + +- Add license copying + +- Add status messages + +- Add help texts + +### Changed + +- Suppress `bash -i` ioctl warning + + +## bioconda/create-env 1.1.1 (2021-03-27) + +### Changed + +- Use `CONDA_ALWAYS_COPY=1` + + +## bioconda/create-env 1.1.0 (2021-03-27) + +### Added + +- Add option to change `create --copy` + +### Changed + +- Rebuild with `python` pinned to `3.8` + + To avoid hitting + - https://github.com/conda/conda/issues/10490 + - https://bugs.python.org/issue43517 + + +## bioconda/create-env 1.0.2 (2021-03-22) + +### Changed + +- Rebuild on new Debian 10 base images + + +## bioconda/create-env 1.0.1 (2021-03-22) + +### Fixed + +- Use entrypoint from `/opt/create-env/` + + `/usr/local` gets "overwritten" (=bind-mounted) when building via mulled. + + +## bioconda/create-env 1.0.0 (2021-03-21) + +### Added + +- Initial release + + + diff --git a/images/create-env/Dockerfile b/images/create-env/Dockerfile new file mode 100644 index 00000000000..ea72c88d931 --- /dev/null +++ b/images/create-env/Dockerfile @@ -0,0 +1,39 @@ +ARG BUSYBOX_IMAGE +FROM ${BUSYBOX_IMAGE} as build + +WORKDIR /tmp/work +COPY install-conda print-env-activate create-env ./ +RUN arch="$( uname -m )" \ + && \ + wget --quiet -O ./miniconda.sh \ + "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-${arch}.sh" + +# Install exact versions of conda/mamba +ARG CONDA_VERSION +ARG MAMBA_VERSION +RUN echo $CONDA_VERSION > requirements.txt && echo $MAMBA_VERSION >> requirements.txt +RUN ./install-conda ./requirements.txt /opt/create-env + +ARG BUSYBOX_IMAGE +FROM ${BUSYBOX_IMAGE} +COPY --from=build /opt/create-env /opt/create-env + +# Copy (Bioconda-specific) Conda configuration created by the install-conda script. +COPY --from=build /root/.condarc /root/ + +RUN \ + # Use a per-user config (instead of conda config --sys) for more flexibility. + cp /root/.condarc /etc/skel/ \ + && \ + # Enable conda shell function for login shells. + ln -s /opt/create-env/etc/profile.d/conda.sh /etc/profile.d/ \ + && \ + # Enable conda function in interactive Bash (via .bashrc) and POSIX shells (via ENV). + printf '%s\n' \ + '\. /etc/profile.d/conda.sh' \ + | tee -a /root/.bashrc \ + >> /etc/skel/.bashrc +ENV ENV=/etc/profile.d/conda.sh + +ENTRYPOINT [ "/opt/create-env/bin/tini", "--", "/opt/create-env/env-execute" ] +CMD [ "bash" ] diff --git a/images/create-env/Dockerfile.test b/images/create-env/Dockerfile.test new file mode 100644 index 00000000000..5de59c76993 --- /dev/null +++ b/images/create-env/Dockerfile.test @@ -0,0 +1,83 @@ +ARG base +ARG BUSYBOX_IMAGE +FROM "${base}" +RUN set -x && \ + CONDA_PKGS_DIRS="/tmp/pkgs" \ + /opt/create-env/env-execute \ + create-env \ + --conda=mamba \ + /usr/local \ + file findutils grep +RUN set -x && \ + . /usr/local/env-activate.sh && \ + if find /opt/create-env \ + -xdev \ + -type f \ + -exec file {} \+ \ + | grep 'not stripped' \ + ; then \ + >&2 printf 'found unstripped binaries\n' ; exit 1 \ + ; fi +RUN set -x && \ + . /usr/local/env-activate.sh && \ + if find /opt/create-env \ + -xdev \ + -type f \ + -name \*.a \ + | grep . \ + ; then \ + >&2 printf 'found static libraries\n' ; exit 1 \ + ; fi + + +FROM "${base}" as build_bioconda_package +RUN set -x && \ + /opt/create-env/env-execute \ + create-env \ + --conda=mamba \ + --strip-files=\* \ + /usr/local \ + python + +FROM "${BUSYBOX_IMAGE}" +COPY --from=build_bioconda_package /usr/local /usr/local +RUN set -x && \ + /usr/local/env-execute \ + python --version \ + && \ + [ ! "${CONDA_PREFIX}" = /usr/local ] \ + && \ + { set -x && . /usr/local/env-activate.sh && set +x ; } \ + && \ + [ "${CONDA_PREFIX}" = /usr/local ] \ + && \ + python --version + + +FROM "${base}" as build_conda +RUN set -x && \ + /opt/create-env/env-execute \ + create-env \ + --conda=mamba \ + --env-activate-args='--prefix-is-base' \ + --strip-files=\* \ + --remove-paths=\*.a \ + --remove-paths=\*.pyc \ + /opt/conda \ + conda + +FROM "${BUSYBOX_IMAGE}" +COPY --from=build_conda /opt/conda /opt/conda +COPY --from=build_conda /opt/conda/env-activate.sh /usr/local/ +RUN set -x && \ + /usr/local/env-execute \ + conda info --all \ + && \ + { set -x && . /usr/local/env-activate.sh && set +x ; } \ + && \ + . "${CONDA_PREFIX}/etc/profile.d/conda.sh" \ + && \ + conda activate \ + && \ + conda info \ + | grep 'base environment.*/opt/conda' diff --git a/images/create-env/README.md b/images/create-env/README.md new file mode 100644 index 00000000000..ca9a7ed9a47 --- /dev/null +++ b/images/create-env/README.md @@ -0,0 +1,99 @@ +# bioconda/create-env + +The `create-env` container image, available as [`quay.io/bioconda/create-env`](https://quay.io/repository/bioconda/create-env?tab=tags), provides [`conda`](https://github.com/conda/conda/) (and [`mamba`](https://github.com/mamba-org/mamba)) alongside a convenience wrapper `create-env` to create small container images based on Conda packages. + + +## Options + +`create-env` runs `conda create` for a given `PREFIX` plus a set of packages and (optionally) runs post-processing steps on the created environment. + +Post-processing steps are triggered by arguments to `create-env`: + +- `--env-activate-script=FILE`: + + Create a shell activation script `FILE` (defaults to `PREFIX/env-activate.sh`) which contains the environment activation instructions as executed per `conda activate PREFIX`. + + Example usage: `sh -c '. PREFIX/env-activate.sh && command-to-run-from-PREFIX'`. + +- `--env-execute-script=FILE`: + + Create an executable `FILE` (defaults to `PREFIX/env-execute`) which runs a given program in the activated `PREFIX` environment. + + Example usage: `PREFIX/env-execute command-to-run-from-PREFIX`. + +- `--remove-paths=GLOB`: + + Remove some paths from `PREFIX` to reduce the target container image size. + +- `--strip-files=GLOB`: + + Run [`strip`](https://sourceware.org/binutils/docs/binutils/strip.html) on files in `PREFIX` whose paths match `GLOB` to reduce the target container image size. + +- `licenses-path=PATH`: + + Directory in which to copy license files for the installed packages (defaults to `PREFIX/conda-meta`). + + +## Usage example: +```Dockerfile +FROM quay.io/bioconda/create-env:2.1.0 as build +# Create an environment containing python=3.9 at /usr/local using mamba, strip +# files and remove some less important files: +RUN export CONDA_ADD_PIP_AS_PYTHON_DEPENDENCY=0 \ + && \ + /opt/create-env/env-execute \ + create-env \ + --conda=mamba \ + --strip-files='bin/*' \ + --strip-files='lib/*' \ + --remove-paths='*.a' \ + --remove-paths='share/terminfo/[!x]*' \ + /usr/local \ + python=3.9 + +# The base image below (quay.io/bioconda/base-glibc-busybox-bash:2.1.0) defines +# /usr/local/env-execute as the ENTRYPOINT so that created containers always +# start in an activated environment. +FROM quay.io/bioconda/base-glibc-busybox-bash:2.1.0 as target +COPY --from=build /usr/local /usr/local + +FROM target as test +RUN /usr/local/env-execute python -c 'import sys; print(sys.version)' +RUN /usr/local/env-activate.sh && python -c 'import sys; print(sys.version)' + +# Build and test with, e.g.: +# buildah bud --target=target --tag=localhost/python:3.9 . +# podman run --rm localhost/python:3.9 python -c 'import sys; print(sys.version)' +``` + +## Miscellaneous information: + +- Run `podman run --rm quay.io/bioconda/create-env create-env --help` for usage information. + +- Run `podman run --rm quay.io/bioconda/create-env conda config --show-sources` to see predefined configuration options. + +- The environment in which `create-env` runs has been itself created by `create-env`. + As such, `/opt/create-env/env-activate.sh` and `/opt/create-env/env-execute` scripts can be used to activate/execute in `create-env`'s environment in a `Dockerfile` context. + In other contexts when a container is run via the image's entrypoint, the environments is activated automatically. + + The separate `/opt/create-env` path is used to avoid collisions with environments created at, e.g., `/usr/local` or `/opt/conda`. + +- By default, package files are copied rather than hard-linked to avoid altering Conda package cachge files when running `strip`. + + If the target image should contain multiple environments, it is advisable to set `CONDA_ALWAYS_COPY=0` to allow hardlinks between the environments (to reduce the overall image size) and run `strip` after the environments have been created. + This can be done by invoking `create-env` twice whilst omitting the environment creation during the second invocation (using `--conda=:`). + + E.g.: + ```sh + . /opt/create-env/env-activate.sh + export CONDA_ALWAYS_COPY=0 + create-env --conda=mamba /opt/python-3.8 python=3.8 + create-env --conda=mamba /opt/python-3.9 python=3.9 + create-env --conda=: --strip-files=\* /opt/python-3.8 + create-env --conda=: --strip-files=\* /opt/python-3.9 + ``` + +- Container images created as in the example above are meant to be lightweight and as such do **not** contain `conda`. + Hence, there is no `conda activate PREFIX` available but only the source-able `PREFIX/env-activate.sh` scripts and the `PREFIX/env-execute` launchers. + These scripts are generated at build time and assume no previously activated Conda environment. + Likewise, the environments are not expected to be deactivated, which is why no corresponding deactivate scripts are provided. diff --git a/images/create-env/create-env b/images/create-env/create-env new file mode 100755 index 00000000000..fde5bffc334 --- /dev/null +++ b/images/create-env/create-env @@ -0,0 +1,242 @@ +#! /bin/sh -eu + +for arg do + case "${arg}" in + --help ) + cat <<'end-of-help' +Usage: create-env [OPTIONS]... [--] PREFIX [CONDA_CREATE_ARGS]... +Use conda (or mamba via --conda=mamba) to create a Conda environment at PREFIX +according to specifications given by CONDA_CREATE_ARGS. + + --conda=CONDA Conda implementation to run CONDA CREATE for. + E.g.: "conda", "mamba", "conda env", "mamba env". + Use ":" to skip env creation. (default: conda) + --create-command=CREATE Conda command to run. E.g.: "create", "install". + (default: create) + --env-activate-args=ARGS Single string of arguments to pass on to + print-env-activate. (default: --prefix=PREFIX) + --env-activate-script=FILE Destination path of environment activation + script. (default: PREFIX/env-activate.sh) + --env-execute-script=FILE Destination path of environment execution script. + (default: PREFIX/env-execute) + --remove-paths=GLOB Glob of paths to remove from PREFIX after its + creation. Can be passed on multiple times. Will + be passed on to `find -path PREFIX/GLOB`. + (no default) + --strip-files=GLOB Glob of paths in PREFIX to run `strip` on. Will + be passed on to `find -type f -path PREFIX/GLOB`. + Error messages from `strip` are suppressed, i.e., + --strip-files=* may be used to run `strip` on all + files. Can be passed on multiple times. + (no default) + --licenses-path=PATH Destination path to copy package license files + to (relative to PREFIX or absolute). Pass on + empty path (--licenses-path=) to skip copying. + (default: conda-meta) +end-of-help + exit 0 ;; + --conda=* ) + conda_impl="${arg#--conda=}" + shift ;; + --create-command=* ) + create_command="${arg#--create-command=}" + shift ;; + --env-activate-args=* ) + env_activate_args="${arg#--env-activate-args=}" + shift ;; + --env-activate-script=* ) + env_activate_file="${arg#--env-activate-script=}" + shift ;; + --env-execute-script=* ) + env_execute_file="${arg#--env-execute-script=}" + shift ;; + --remove-paths=* ) + remove_paths_globs="$( + printf '%s\n' \ + ${remove_paths_globs+"${remove_paths_globs}"} \ + "${arg#--remove-paths=}" + )" + shift ;; + --strip-files=* ) + strip_files_globs="$( + printf '%s\n' \ + ${strip_files_globs+"${strip_files_globs}"} \ + "${arg#--strip-files=}" + )" + shift ;; + --licenses-path=* ) + licenses_path="${arg#--licenses-path=}" + shift ;; + -- ) + break ;; + -* ) + printf 'unknown option: %s\n' "${arg}" + exit 1 ;; + * ) + break + esac +done + +if [ $# -eq 0 ] ; then + printf 'missing argument: environment path\n' + exit 1 +fi + +prefix="${1%%/}" +shift + +conda_impl="${conda_impl:-conda}" +create_command="${create_command-create}" +env_activate_args="--prefix='${prefix}' ${env_activate_args-}" +env_activate_file="${env_activate_file-"${prefix}/env-activate.sh"}" +env_execute_file="${env_execute_file-"${prefix}/env-execute"}" +remove_paths_globs="$( printf '%s\n' "${remove_paths_globs-}" | sort -u )" +strip_files_globs="$( printf '%s\n' "${strip_files_globs-}" | sort -u )" +licenses_path="${licenses_path-conda-meta}" + + +set +u +eval "$( conda shell.posix activate base )" +set -u + +printf 'creating environment at %s ...\n' "${prefix}" 1>&2 +CONDA_YES=1 \ + ${conda_impl} \ + ${create_command} \ + --prefix="${prefix}" \ + "${@}" + +if [ -n "${env_activate_file}${env_execute_file}" ] ; then + printf 'generating activation script...\n' 1>&2 + activate_script="$( + eval "set -- ${env_activate_args}" + print-env-activate "${@}" + )" + if [ -n "${env_activate_file-}" ] ; then + printf 'writing activation script to %s ...\n' "${env_activate_file}" 1>&2 + printf '%s\n' \ + "${activate_script}" \ + > "${env_activate_file}" + activate_script=". '${env_activate_file}'" + fi + if [ -n "${env_execute_file-}" ] ; then + printf 'writing execution script to %s ...\n' "${env_execute_file}" 1>&2 + printf '%s\n' \ + '#! /bin/bash' \ + "${activate_script}" \ + 'exec "${@}"' \ + > "${env_execute_file}" + chmod +x "${env_execute_file}" + fi +fi + + +if [ -n "${remove_paths_globs}" ] ; then + printf 'removing paths from %s ...\n' "${prefix}" 1>&2 + ( + eval "set -- $( + printf %s "${remove_paths_globs}" \ + | sed -e "s|.*|-path '${prefix}/&'|" -e '1!s/^/-o /' \ + | tr '\n' ' ' + )" + find "${prefix}" \ + \( "${@}" \) \ + -delete + ) +fi + +if [ -n "${strip_files_globs}" ] ; then + # Ensure "strip" is available beforehand because errors are ignored later on. + strip --version > /dev/null + printf 'stripping binaries in %s ...\n' "${prefix}" 1>&2 + ( + eval "set -- $( + printf %s "${strip_files_globs}" \ + | sed -e "s|.*|-path '${prefix}/&'|" -e '1!s/^/-o /' \ + | tr '\n' ' ' + )" + # Strip binaries. (Run strip on all files; ignore errors for non-ELF files.) + # Limit open fds (ulimit -n) for strip (small number chosen arbitrarily). + # (To avoid "could not create temporary file to hold stripped copy: Too many open files") + + # Filter out the binaries currently in use by the pipeline via sed below. + skip_inode_expressions="$( + command -v -- find xargs sed strip \ + | xargs -- stat -L -c '-e /^%d,%i:/d' -- + )" + find "${prefix}" \ + -type f \ + \( "${@}" \) \ + -print0 \ + | xargs \ + -0 \ + -n 64 \ + -- \ + stat -L -c '%d,%i:%n' -- \ + | sed \ + ${skip_inode_expressions} \ + -e 's/^[^:]*://' \ + | tr \\n \\0 \ + | + xargs \ + -0 \ + -n 64 \ + -- \ + strip -- \ + 2>&1 \ + | sed '/: file format not recognized/d' \ + || true + ) +fi + + +if [ -n "${licenses_path}" ] ; then + abs_licenses_path="$( + cd "${prefix}" + mkdir -p "${licenses_path}" + cd "${licenses_path}" + pwd + )" + printf 'copying license files to %s ...\n' "${abs_licenses_path}" 1>&2 + pkgs_dirs="$( + conda config --show pkgs_dirs \ + | sed -n 's|[^/]*\(/.*\)|"\1"|p' \ + | tr '\n' ' ' + )" + ( + eval "set -- $( + find "${prefix}/conda-meta" \ + -maxdepth 1 \ + -name \*.json \ + | sed 's|.*/\(.*\)\.json|"\1"|' \ + | tr '\n' ' ' + )" + for pkg do + pkg_info="$( + eval "set -- ${pkgs_dirs}" + for pkgs_dir ; do + if [ -d "${pkgs_dir}/${pkg}/info" ] ; then + printf %s "${pkgs_dir}/${pkg}/info" + exit + fi + done + printf 'missing metadata for %s\n' "${pkg}" 1>&2 + exit 1 + )" + find "${pkg_info}" \ + -maxdepth 1 \ + \( -name LICENSE.txt -o -name licenses \) \ + -exec sh -ec ' + dest_dir="${1}" ; shift + mkdir -p "${dest_dir}" + cp -fR "${@}" "${dest_dir}/" + ' -- "${abs_licenses_path}/${pkg}" {} \+ \ + || { + printf 'failed to copy licenses for %s\n' "${pkg}" 1>&2 + exit 1 + } + done + ) +fi + +printf 'finished create-env for %s\n' "${prefix}" 1>&2 diff --git a/images/create-env/install-conda b/images/create-env/install-conda new file mode 100755 index 00000000000..a3b9b33272e --- /dev/null +++ b/images/create-env/install-conda @@ -0,0 +1,124 @@ +#! /bin/bash -eux + +requirements_file="${1}" +conda_install_prefix="${2}" + +# Install a bootstrap Miniconda installation. +miniconda_boostrap_prefix="$( pwd )/miniconda" +# Run the following in a subshell to avoid environment changes from bootstrap. +( + + # Use the base image-provided tools if they work for us: + tools='' + find -print0 -maxdepth 0 && xargs -0 true < /dev/null \ + || tools="${tools} findutils" + sed -e '' < /dev/null \ + || tools="${tools} sed" + + sh ./miniconda.sh \ + -b \ + -p "${miniconda_boostrap_prefix}" + + # Install the base Conda installation. + . "${miniconda_boostrap_prefix}/etc/profile.d/conda.sh" + + # Install conda, mamba and some additional tools: + # - tini: init program, + # - binutils, findutils: tools to strip down image/environment size, + + # Only need `strip` executable from binutils. Other binaries from the package + # and especially the "sysroot" dependency is only bloat for this container + # image. (NOTE: The binary needs libgcc-ng which is explicitly added later.) + mamba create --yes \ + --prefix="${conda_install_prefix}" \ + --channel=conda-forge \ + binutils + cp -aL "${conda_install_prefix}/bin/strip" ./strip + conda run --prefix="${conda_install_prefix}" strip -- ./strip + mamba remove --yes --all \ + --prefix="${conda_install_prefix}" + + mamba create --yes \ + --prefix="${conda_install_prefix}" \ + --channel=conda-forge \ + \ + --file="${requirements_file}" \ + \ + tini \ + \ + libgcc-ng \ + ${tools} \ + ; + + mv \ + ./print-env-activate \ + ./create-env \ + ./strip \ + "${conda_install_prefix}/bin/" +) + +# Activate the new base environment. +activate_script="$( + "${conda_install_prefix}/bin/conda" shell.posix activate base +)" +set +u +eval "${activate_script}" +set -u +unset activate_script + +# Strip find/xargs/sed beforehand as they are excluded in the strip pipeline. +for prog in find xargs sed ; do + case "$( command -v "${prog}" )" in + "${conda_install_prefix%%/}"/* ) + strip -- "$( command -v "${prog}" )" + esac +done + +# Use --conda=: to turn the `conda create` into a no-op, but do continue to +# run strip, remove files and output the activate/execute scripts. +CONDA_PKGS_DIRS="${miniconda_boostrap_prefix}/pkgs" \ + create-env \ + --conda=: \ + --strip-files=\* \ + --remove-paths=\*.a \ + --remove-paths=\*.pyc \ + --env-activate-args=--prefix-is-base \ + "${conda_install_prefix}" + +# Remove bootstrap Miniconda files. +rm -rf "${miniconda_boostrap_prefix}" + +# Add standard Bioconda config to root's Conda config. +conda config \ + --append channels conda-forge \ + --append channels bioconda \ + ; +conda config \ + --remove channels defaults \ + 2> /dev/null \ + || true +conda config \ + --remove repodata_fns current_repodata.json \ + 2> /dev/null \ + || true +conda config \ + --prepend repodata_fns repodata.json + +# Use `always_copy` to cut links to package cache. +# (Which is esp. important if files are manipulated via --strip-files !) +conda config \ + --set always_copy true \ + --set allow_softlinks false + + +# Log information of the newly created Conda installation. +# NB: Running conda after the .pyc removal will recreate some .pyc files. +# This is intentional as it speeds up conda startup time. +conda list --name=base +conda info --all +mamba --version +# Make sure we have the requested conda, mamba versions installed. +conda list \ + --export '^(conda|mamba)$' \ + | sed -n 's/=[^=]*$//p' \ + | diff "${requirements_file}" - diff --git a/images/create-env/print-env-activate b/images/create-env/print-env-activate new file mode 100755 index 00000000000..fbaa4a405b2 --- /dev/null +++ b/images/create-env/print-env-activate @@ -0,0 +1,95 @@ +#! /bin/bash -eu + +for arg do + case "${arg}" in + --help ) + cat <<'end-of-help' +Usage: print-env-activate [OPTIONS]... [--] [PREFIX] +Print shell activation script contents conda creates for environment at PREFIX. + + --prefix=PREFIX Optionally pass on PREFIX path as option-argument + instead of operand. + --prefix-is-base[=yes|=no] Specify if PREFIX is a base environment and use + `PREFIX/bin/conda` to create a full base + environment activation script. (default: no) +end-of-help + exit 0 ;; + --prefix=* ) + prefix="${arg#--prefix=}" + shift ;; + --prefix-is-base=yes | --prefix-is-base ) + prefix_is_base=1 + shift ;; + --prefix-is-base=no ) + prefix_is_base=0 + shift ;; + -- ) + break ;; + -* ) + printf 'unknown option: %s\n' "${arg}" + exit 1 ;; + * ) + break + esac +done + +if [ -z "${prefix:-}" ] ; then + prefix="${1}" + shift +fi + +if [ $# -ne 0 ] ; then + printf 'excess argument: %s\n' "${@}" + exit +fi + +if [ "${prefix_is_base-}" = 1 ] ; then + conda_exe="${prefix}/bin/conda" +else + conda_exe="$( command -v conda )" +fi + +# Deactivate current active env for full `conda shell.posix activate` changes. +deactivate_script="$( + conda shell.posix deactivate +)" +if [ "${prefix_is_base-}" = 1 ] ; then + deactivate_script="$( + printf %s "${deactivate_script}" \ + | sed "s|/[^\"'=:]*/condabin:||g" + )" +fi +set +u +eval "${deactivate_script}" +set -u +unset deactivate_script + +# NOTE: The following gets a proper PS1 value from an interactive Bash which +# `conda shell posix.activate` can reuse. +# NB: Ideally, conda activate should not use the current PS1 but rather write +# out something like PS1="${CONDA_PROMPT_MODIFIER}${PS1}". +# (Also, running this in the build instead of final container might not +# reflect the actual PS1 the target container image would provide.) +PS1="$( + bash -ic 'printf %s "${PS1}"' 2>/dev/null + printf . +)" +PS1="${PS1%.}" + +activate_script="$( + export PS1 + if [ ! "${prefix_is_base-}" = 1 ] ; then + export CONDA_ENV_PROMPT= + fi + "${conda_exe}" shell.posix activate "${prefix}" +)" + +printf '%s\n' "${activate_script}" \ + | { + if [ "${prefix_is_base-}" = 1 ] ; then + cat + else + grep -vE '^export (_CE_M|_CE_CONDA|CONDA_EXE|CONDA_PYTHON_EXE)=' \ + | sed "s|/[^\"'=:]*/condabin:||g" + fi + } diff --git a/test/test_utils.py b/test/test_utils.py index 916fb108943..85b616805d6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -37,7 +37,7 @@ # docker, once without). On OSX, only the non-docker runs. # Docker ref for build container -DOCKER_BASE_IMAGE = "quay.io/bioconda/bioconda-utils-test-env-cos7:latest" +DOCKER_BASE_IMAGE = os.getenv("DOCKER_BASE_IMAGE", "quay.io/bioconda/bioconda-utils-build-env-cos7:latest") SKIP_DOCKER_TESTS = sys.platform.startswith('darwin') SKIP_NOT_OSX = not sys.platform.startswith('darwin')