From c72fccd02902386b3a0a4645abe1d7d2c9217ca3 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:12:33 -0700 Subject: [PATCH 01/15] Prepare for cross-compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the builder stage into two stages, and install xx¹ which provides tools to support cross-compilation. Note that the first stage doesn't need Python so debian:11-slim is used as the base image. There is additional work to be done on individual commands to have proper cross-compilation, but those fixes will come in subsequent commits to keep them atomic. ¹ https://github.com/tonistiigi/xx --- Dockerfile | 159 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 108 insertions(+), 51 deletions(-) diff --git a/Dockerfile b/Dockerfile index 569bfd7c..82a57672 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,24 +1,33 @@ # This is a multi-stage image build. # -# We first create a "builder" image and then create our final image by copying -# things from the builder image. The point is to avoid bloating the final -# image with tools only needed during the image build. +# We first create two builder images (builder-build-platform, +# builder-target-platform). Then we create our final image by copying things +# from the builder images. The point is to avoid bloating the final image with +# tools only needed during the image build. -# First build the temporary image. -FROM python:3.10-slim-bullseye AS builder +# Setup: pull cross-compilation tools. +FROM --platform=$BUILDPLATFORM tonistiigi/xx AS xx + +# ———————————————————————————————————————————————————————————————————— # + +# Define a builder stage that runs on the build platform. +# Even if the target platform is different, instructions will run natively for +# faster compilation. +FROM --platform=$BUILDPLATFORM debian:11-slim AS builder-build-platform SHELL ["/bin/bash", "-e", "-u", "-o", "pipefail", "-c"] +# Copy cross-compilation tools. +COPY --from=xx / / + # Add system deps for building # autoconf, automake: for building VCFtools; may be used by package managers to build from source # build-essential: contains gcc, g++, make, etc. for building various tools; may be used by package managers to build from source # ca-certificates: for secure HTTPS connections # curl: for downloading source files -# git: for git pip installs -# jq: used in builder-scripts/latest-augur-release-tag -# libsqlite3-dev: for building pyfastx (for Augur) +# git: used in builder-scripts/download-repo # pkg-config: for building VCFtools; may be used by package managers to build from source -# zlib1g-dev: for building VCFtools and pyfastx; may be used by package managers to build from source +# zlib1g-dev: for building VCFtools; may be used by package managers to build from source # nodejs: for installing Auspice RUN apt-get update && apt-get install -y --no-install-recommends \ autoconf \ @@ -27,8 +36,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ curl \ git \ - jq \ - libsqlite3-dev \ pkg-config \ zlib1g-dev @@ -42,6 +49,14 @@ ARG TARGETPLATFORM ARG TARGETOS ARG TARGETARCH +# Install packages that generate binaries for the target architecture. +# https://github.com/tonistiigi/xx#building-on-debian +# binutils, gcc, libc6-dev: for compiling C/C++ programs (TODO: verify) +RUN xx-apt-get install -y \ + binutils \ + gcc \ + libc6-dev + # Add dependencies. All should be pinned to specific versions, except # Nextstrain-maintained software. # This includes pathogen-specific workflow dependencies. Since we only maintain a @@ -145,7 +160,75 @@ RUN curl -fsSL https://github.com/lh3/minimap2/releases/download/v2.24/minimap2- | tar xjvpf - --no-same-owner --strip-components=1 -C /final/bin minimap2-2.24_x64-linux/minimap2 -# 3. Install programs via pip +# 3. Add unpinned programs + +# Allow caching to be avoided from here on out in this stage by calling +# docker build --build-arg CACHE_DATE="$(date)" +# NOTE: All versioned software added below should be checked in +# devel/validate-platforms. +ARG CACHE_DATE + +# Add helper scripts +COPY builder-scripts/ /builder-scripts/ + +# Nextclade/Nextalign v2 are downloaded directly but using the latest version, +# so they belong after CACHE_DATE (unlike Nextclade/Nextalign v1). + +# Download Nextalign v2 +# Set default Nextalign version to 2 +RUN curl -fsSL -o /final/bin/nextalign2 https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-$(/builder-scripts/target-triple) \ + && ln -sv nextalign2 /final/bin/nextalign + +# Download Nextclade v2 +# Set default Nextclade version to 2 +RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-$(/builder-scripts/target-triple) \ + && ln -sv nextclade2 /final/bin/nextclade + +# Auspice +# Install Node deps, build Auspice, and link it into the global search path. A +# fresh install is only ~40 seconds, so we're not worrying about caching these +# as we did the Python deps. Building auspice means we can run it without +# hot-reloading, which is time-consuming and generally unnecessary in the +# container image. Linking is equivalent to an editable Python install and +# used for the same reasons described above. +WORKDIR /nextstrain/auspice +RUN /builder-scripts/download-repo https://github.com/nextstrain/auspice release . \ + && npm update && npm install && npm run build && npm link + +# Add NCBI Datasets command line tools for access to NCBI Datsets Virus Data Packages +RUN curl -fsSL -o /final/bin/datasets https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-${TARGETARCH}/datasets +RUN curl -fsSL -o /final/bin/dataformat https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-${TARGETARCH}/dataformat + +# ———————————————————————————————————————————————————————————————————— # + +# Define a builder stage that runs on the target platform. +# If the target platform is different from the build platform, instructions will +# run under emulation which can be slower. +# This is in place for Python programs which are not easy to install for a +# different target platform¹. +# ¹ https://github.com/pypa/pip/issues/5453 +FROM --platform=$TARGETPLATFORM python:3.10-slim-bullseye AS builder-target-platform + +SHELL ["/bin/bash", "-e", "-u", "-o", "pipefail", "-c"] + +# Used for platform-specific instructions +ARG TARGETPLATFORM +ARG TARGETOS +ARG TARGETARCH + +# Add system deps for building +# curl, jq: used in builder-scripts/latest-augur-release-tag +# git: for git pip installs +# libsqlite3-dev, zlib1g-dev: for building pyfastx (for Augur) +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + git \ + jq \ + libsqlite3-dev \ + zlib1g-dev + + +# 1. Install programs via pip # Install jaxlib on linux/arm64 # jaxlib, an evofr dependency, does not have official pre-built binaries for @@ -184,32 +267,20 @@ RUN pip3 install pysam==0.19.1 # Install pango_aliasor (for forecasts-ncov) RUN pip3 install pango_aliasor==0.3.0 -# 4. Add unpinned programs -# Allow caching to be avoided from here on out by calling +# 2. Add unpinned programs + +# Allow caching to be avoided from here on out in this stage by calling # docker build --build-arg CACHE_DATE="$(date)" # NOTE: All versioned software added below should be checked in # devel/validate-platforms. ARG CACHE_DATE -# Install our own CLI so builds can do things like `nextstrain deploy` -RUN pip3 install nextstrain-cli - # Add helper scripts COPY builder-scripts/ /builder-scripts/ -# Nextclade/Nextalign v2 are downloaded directly but using the latest version, -# so they belong after CACHE_DATE (unlike Nextclade/Nextalign v1). - -# Download Nextalign v2 -# Set default Nextalign version to 2 -RUN curl -fsSL -o /final/bin/nextalign2 https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-$(/builder-scripts/target-triple) \ - && ln -sv nextalign2 /final/bin/nextalign - -# Download Nextclade v2 -# Set default Nextclade version to 2 -RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-$(/builder-scripts/target-triple) \ - && ln -sv nextclade2 /final/bin/nextclade +# Install our own CLI so builds can do things like `nextstrain deploy` +RUN pip3 install nextstrain-cli # Fauna WORKDIR /nextstrain/fauna @@ -252,24 +323,9 @@ WORKDIR /nextstrain/augur RUN /builder-scripts/download-repo https://github.com/nextstrain/augur "$(/builder-scripts/latest-augur-release-tag)" . \ && pip3 install --editable . -# Auspice -# Install Node deps, build Auspice, and link it into the global search path. A -# fresh install is only ~40 seconds, so we're not worrying about caching these -# as we did the Python deps. Building auspice means we can run it without -# hot-reloading, which is time-consuming and generally unnecessary in the -# container image. Linking is equivalent to an editable Python install and -# used for the same reasons described above. -WORKDIR /nextstrain/auspice -RUN /builder-scripts/download-repo https://github.com/nextstrain/auspice release . \ - && npm update && npm install && npm run build && npm link - # Add evofr for forecasting RUN pip3 install evofr -# Add NCBI Datasets command line tools for access to NCBI Datsets Virus Data Packages -RUN curl -fsSL -o /final/bin/datasets https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-${TARGETARCH}/datasets -RUN curl -fsSL -o /final/bin/dataformat https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-${TARGETARCH}/dataformat - # ———————————————————————————————————————————————————————————————————— # # Now build the final image. @@ -333,9 +389,9 @@ RUN if [[ "$TARGETPLATFORM" == linux/arm64 ]]; then \ COPY bashrc /etc/bash.bashrc # Copy binaries -COPY --from=builder /final/bin/ /usr/local/bin/ -COPY --from=builder /final/share/ /usr/local/share/ -COPY --from=builder /final/libexec/ /usr/local/libexec/ +COPY --from=builder-build-platform /final/bin/ /usr/local/bin/ +COPY --from=builder-build-platform /final/share/ /usr/local/share/ +COPY --from=builder-build-platform /final/libexec/ /usr/local/libexec/ # Set MAFFT_BINARIES explicitly for MAFFT ENV MAFFT_BINARIES=/usr/local/libexec @@ -344,7 +400,7 @@ ENV MAFFT_BINARIES=/usr/local/libexec RUN chmod a+rx /usr/local/bin/* /usr/local/libexec/* # Add installed Python libs -COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/ +COPY --from=builder-target-platform /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/ # Add installed Python scripts that we need. # @@ -355,7 +411,7 @@ COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/pyth # as the set of things to copy) in the future if the maintenance burden becomes # troublesome or excessive. # -trs, 15 June 2018 -COPY --from=builder \ +COPY --from=builder-target-platform \ /usr/local/bin/augur \ /usr/local/bin/aws \ /usr/local/bin/envdir \ @@ -368,7 +424,7 @@ COPY --from=builder \ /usr/local/bin/ # Add installed Node libs -COPY --from=builder /usr/lib/node_modules/ /usr/lib/node_modules/ +COPY --from=builder-build-platform /usr/lib/node_modules/ /usr/lib/node_modules/ # Add globally linked Auspice script. # @@ -379,7 +435,8 @@ COPY --from=builder /usr/lib/node_modules/ /usr/lib/node_modules/ RUN ln -sv /usr/lib/node_modules/auspice/auspice.js /usr/local/bin/auspice # Add Nextstrain components -COPY --from=builder /nextstrain /nextstrain +COPY --from=builder-build-platform /nextstrain /nextstrain +COPY --from=builder-target-platform /nextstrain /nextstrain # Add our entrypoints and helpers COPY entrypoint entrypoint-aws-batch drop-privs create-envd delete-envd /sbin/ From 24d025ffef2b762a67c80d62a79ec77ef9e42e39 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 1 Dec 2022 13:26:06 -0800 Subject: [PATCH 02/15] Dockerfile: Add gcc to build datrie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required since datrie only provies wheels up through Python 3.8¹. ¹ https://pypi.org/project/datrie/0.8.2/#files --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 82a57672..ffff5150 100644 --- a/Dockerfile +++ b/Dockerfile @@ -219,9 +219,11 @@ ARG TARGETARCH # Add system deps for building # curl, jq: used in builder-scripts/latest-augur-release-tag # git: for git pip installs +# gcc: for building datrie (for Snakemake) # libsqlite3-dev, zlib1g-dev: for building pyfastx (for Augur) RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ + gcc \ git \ jq \ libsqlite3-dev \ From 0880c09b300a5e45097ed408cd45f4463f2c9094 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 22:28:24 +0000 Subject: [PATCH 03/15] Replace build-essential with make, gcc, libc6-dev build-essential should not be installed for cross-compilation since it contains native-only compilers. Instead, target-specific compilers provided by xx (e.g. aarch64-linux-gnu-gcc) should be used. Add dependencies of build-essential that are still needed in separate stages. --- Dockerfile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index ffff5150..6c3d7c48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,20 +22,20 @@ COPY --from=xx / / # Add system deps for building # autoconf, automake: for building VCFtools; may be used by package managers to build from source -# build-essential: contains gcc, g++, make, etc. for building various tools; may be used by package managers to build from source # ca-certificates: for secure HTTPS connections # curl: for downloading source files # git: used in builder-scripts/download-repo +# make: used for building from Makefiles (search for usage); may be used by package managers to build from source # pkg-config: for building VCFtools; may be used by package managers to build from source # zlib1g-dev: for building VCFtools; may be used by package managers to build from source # nodejs: for installing Auspice RUN apt-get update && apt-get install -y --no-install-recommends \ autoconf \ automake \ - build-essential \ ca-certificates \ curl \ git \ + make \ pkg-config \ zlib1g-dev @@ -298,6 +298,8 @@ RUN pip3 install phylo-treetime # CVXOPT, an Augur dependency, does not have pre-built binaries for linux/arm64. # # First, add system deps for building¹: +# - gcc: C compiler. +# - libc6-dev: C libraries and header files. # - libopenblas-dev: Contains optimized versions of BLAS and LAPACK. # - SuiteSparse: Download the source code so it can be built alongside CVXOPT. # @@ -309,6 +311,8 @@ RUN pip3 install phylo-treetime WORKDIR /cvxopt RUN if [[ "$TARGETPLATFORM" == linux/arm64 ]]; then \ apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc6-dev \ libopenblas-dev \ && mkdir SuiteSparse \ && curl -fsSL https://api.github.com/repos/DrTimothyAldenDavis/SuiteSparse/tarball/v5.8.1 \ From e7ad175d8df891bc2bac783df4e141f71711dcae Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 22:50:13 +0000 Subject: [PATCH 04/15] Install native clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xx-clang internally calls the native clang binary with additional configuration for correct cross-compilation¹. This means it should be installed. ¹ https://github.com/tonistiigi/xx/blob/dad71a2d84fa9f1321ad3b91e1f36e228fb31876/README.md#cc --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 6c3d7c48..7069e3bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,9 +29,11 @@ COPY --from=xx / / # pkg-config: for building VCFtools; may be used by package managers to build from source # zlib1g-dev: for building VCFtools; may be used by package managers to build from source # nodejs: for installing Auspice +# clang: for compiling C/C++ projects; may be used by package managers to build from source RUN apt-get update && apt-get install -y --no-install-recommends \ autoconf \ automake \ + clang \ ca-certificates \ curl \ git \ From 91e0544e717aa03c89cb9f3294613a4fc12980f4 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 23:25:16 +0000 Subject: [PATCH 05/15] Install zlib for target architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is how it's done in the xx examples¹. Prevents the VCFtools build from breaking. ¹ https://github.com/tonistiigi/xx/blob/dad71a2d84fa9f1321ad3b91e1f36e228fb31876/README.md#xx-apk-xx-apt-xx-apt-get---installing-packages-for-target-architecture --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7069e3bd..466c2211 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,6 @@ COPY --from=xx / / # git: used in builder-scripts/download-repo # make: used for building from Makefiles (search for usage); may be used by package managers to build from source # pkg-config: for building VCFtools; may be used by package managers to build from source -# zlib1g-dev: for building VCFtools; may be used by package managers to build from source # nodejs: for installing Auspice # clang: for compiling C/C++ projects; may be used by package managers to build from source RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -38,8 +37,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ git \ make \ - pkg-config \ - zlib1g-dev + pkg-config # Install a specific Node.js version # https://github.com/nodesource/distributions/blob/0d81da75/README.md#installation-instructions @@ -54,10 +52,12 @@ ARG TARGETARCH # Install packages that generate binaries for the target architecture. # https://github.com/tonistiigi/xx#building-on-debian # binutils, gcc, libc6-dev: for compiling C/C++ programs (TODO: verify) +# zlib1g-dev: for building VCFtools; may be used by package managers to build from source RUN xx-apt-get install -y \ binutils \ gcc \ - libc6-dev + libc6-dev \ + zlib1g-dev # Add dependencies. All should be pinned to specific versions, except # Nextstrain-maintained software. From 3b269c31379789927af6c72fe80793219b25290a Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 22:53:15 +0000 Subject: [PATCH 06/15] Fix RAxML build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RAxML's Makefile, which was patched previously for proper arm64 builds, needs another patch in order to cross-compile. The compiler is hardcoded as gcc, but it should be a target-specific compiler. $(xx-info)-gcc can be used, but xx-clang is recommended¹. ¹ https://github.com/tonistiigi/xx/blob/dad71a2d84fa9f1321ad3b91e1f36e228fb31876/README.md#cc --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 466c2211..85fc36f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -74,13 +74,13 @@ RUN mkdir -p /final/bin /final/share /final/libexec # 1. Build programs from source # Build RAxML -# linux/arm64 does not support -mavx and -msse3 compilation flags which are used in the official repository. -# Make these changes in a fork for now: https://github.com/nextstrain/standard-RAxML/tree/simde +# Some changes are necessary to allow the Makefile to work with cross-compilation. +# Make these changes in a fork for now: https://github.com/nextstrain/standard-RAxML/tree/fix-cross-compile # TODO: Use the official repository if this PR is ever merged: https://github.com/stamatak/standard-RAxML/pull/50 WORKDIR /build/RAxML -RUN curl -fsSL https://api.github.com/repos/nextstrain/standard-RAxML/tarball/4621552064304a219ff03810f5f0d91e1063b68f \ +RUN curl -fsSL https://api.github.com/repos/nextstrain/standard-RAxML/tarball/4868de62a62be8901259807cfea26f336c2ca477 \ | tar xzvpf - --no-same-owner --strip-components=1 \ - && make -f Makefile.AVX.PTHREADS.gcc \ + && CC=xx-clang make -f Makefile.AVX.PTHREADS.gcc \ && cp -p raxmlHPC-PTHREADS-AVX /final/bin # Build FastTree From a22625a2f5cecaf00051e73db0c1db55735cb193 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 23:00:07 +0000 Subject: [PATCH 07/15] Fix FastTree build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FastTree's custom Makefile supports cross-compiling using a target-specific compiler. xx-clang is recommended¹, but it fails with a linker error: xx-clang -O3 -finline-functions -funroll-loops -Wall -DUSE_DOUBLE -DOPENMP -fopenmp -o FastTreeDblMP FastTree.c -lm /usr/bin/aarch64-linux-gnu-ld: cannot find -lomp clang: error: linker command failed with exit code 1 $(xx-info)-gcc is able to compile the program successfully. ¹ https://github.com/tonistiigi/xx/blob/dad71a2d84fa9f1321ad3b91e1f36e228fb31876/README.md#cc --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 85fc36f6..e09909af 100644 --- a/Dockerfile +++ b/Dockerfile @@ -85,9 +85,9 @@ RUN curl -fsSL https://api.github.com/repos/nextstrain/standard-RAxML/tarball/48 # Build FastTree WORKDIR /build/FastTree -RUN curl -fsSL https://api.github.com/repos/tsibley/FastTree/tarball/50c5b098ea085b46de30bfc29da5e3f113353e6f \ +RUN curl -fsSL https://api.github.com/repos/nextstrain/FastTree/tarball/df4212c8c9991e7e0d432e42d53c21cd8408a181 \ | tar xzvpf - --no-same-owner --strip-components=1 \ - && make FastTreeDblMP \ + && CC=$(xx-info)-gcc make FastTreeDblMP \ && cp -p FastTreeDblMP /final/bin # Build vcftools From adf0cb7aa056428ea029bb288dd2cf71f1e28ccd Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 23:40:02 +0000 Subject: [PATCH 08/15] Fix VCFtools build: Use --build and --host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xx guide says --host should be sufficient¹, but Autotools did not detect cross-compilation until --build was added. ¹ https://github.com/tonistiigi/xx/blob/dad71a2d84fa9f1321ad3b91e1f36e228fb31876/README.md#autotools --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index e09909af..83eae486 100644 --- a/Dockerfile +++ b/Dockerfile @@ -95,6 +95,8 @@ WORKDIR /build/vcftools RUN curl -fsSL https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \ | tar xzvpf - --no-same-owner --strip-components=2 \ && ./configure --prefix=$PWD/built \ + --build=$(TARGETPLATFORM= xx-clang --print-target-triple) \ + --host=$(xx-clang --print-target-triple) \ && make && make install \ && cp -rp built/bin/* /final/bin \ && cp -rp built/share/* /final/share From 04dd2298e80e0ae5b11bc3daa53813a31e49c4db Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 23:53:52 +0000 Subject: [PATCH 09/15] Fix VCFtools build: Install dpkg-dev Install dpkg-dev to use pkg-config when cross-building. Otherwise, the configure script output shows: checking pkg-config is at least version 0.9.0... Please install dpkg-dev to use pkg-config when cross-building no --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 83eae486..e25e8448 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ git \ make \ - pkg-config + pkg-config \ + dpkg-dev # Install a specific Node.js version # https://github.com/nodesource/distributions/blob/0d81da75/README.md#installation-instructions From 6ba9476b2e8224c9311f7723bda1e1f9669d3949 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 14 Oct 2022 23:34:49 +0000 Subject: [PATCH 10/15] Fix VCFtools build: Install g++ Without this or an equivalent, native c++ is used. That is undesirable when the target platform is different from the build platform. --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index e25e8448..ed6fddae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,10 +53,12 @@ ARG TARGETARCH # Install packages that generate binaries for the target architecture. # https://github.com/tonistiigi/xx#building-on-debian # binutils, gcc, libc6-dev: for compiling C/C++ programs (TODO: verify) +# g++: for building VCFtools; may be used by package managers to build from source # zlib1g-dev: for building VCFtools; may be used by package managers to build from source RUN xx-apt-get install -y \ binutils \ gcc \ + g++ \ libc6-dev \ zlib1g-dev From cc6d7df07271bd4dfca3c6cc0fef373114f4cedb Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Sat, 15 Oct 2022 00:26:56 +0000 Subject: [PATCH 11/15] Fix VCFtools build: Build from an unreleased commit See changes for reasoning. --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index ed6fddae..98ae52bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -94,10 +94,12 @@ RUN curl -fsSL https://api.github.com/repos/nextstrain/FastTree/tarball/df4212c8 && cp -p FastTreeDblMP /final/bin # Build vcftools +# Some unreleased changes are necessary to allow Autoconf to work with cross-compilation¹. +# ¹ https://github.com/vcftools/vcftools/commit/1cab5204eb0ce01664178bafd0ad6104525709d1 WORKDIR /build/vcftools -RUN curl -fsSL https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \ - | tar xzvpf - --no-same-owner --strip-components=2 \ - && ./configure --prefix=$PWD/built \ +RUN curl -fsSL https://api.github.com/repos/vcftools/vcftools/tarball/1cab5204eb0ce01664178bafd0ad6104525709d1 \ + | tar xzvpf - --no-same-owner --strip-components=1 \ + && ./autogen.sh && ./configure --prefix=$PWD/built \ --build=$(TARGETPLATFORM= xx-clang --print-target-triple) \ --host=$(xx-clang --print-target-triple) \ && make && make install \ From b78680c90c96052ec2badb653097ad3dfa195afa Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 13 Jun 2023 16:18:25 -0700 Subject: [PATCH 12/15] Update comment on Auspice install Remove the comment on build time since it may differ nowadays, and there is another reason for not caching (unpinned status, implied by other context in the Dockerfile). --- Dockerfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 98ae52bc..a816e0e4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -194,12 +194,11 @@ RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/ && ln -sv nextclade2 /final/bin/nextclade # Auspice -# Install Node deps, build Auspice, and link it into the global search path. A -# fresh install is only ~40 seconds, so we're not worrying about caching these -# as we did the Python deps. Building auspice means we can run it without -# hot-reloading, which is time-consuming and generally unnecessary in the -# container image. Linking is equivalent to an editable Python install and -# used for the same reasons described above. +# Building auspice means we can run it without hot-reloading, which is +# time-consuming and generally unnecessary in the container image. +# Linking is used so we can overlay the auspice version in the image with +# --volume=.../auspice:/nextstrain/auspice and still have it globally accessible +# and importable. WORKDIR /nextstrain/auspice RUN /builder-scripts/download-repo https://github.com/nextstrain/auspice release . \ && npm update && npm install && npm run build && npm link From deabcc1a6fa34a75a0b9e4b6b8440ca04e71962d Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Sat, 15 Oct 2022 00:35:43 +0000 Subject: [PATCH 13/15] Add comment on Auspice cross-compilation Pure JavaScript packages (including Auspice) are not platform-specific. Evidence that the arm64 image variant of build-20230621T190343Z (latest tagged build) does not have any platform-specific Auspice runtime dependencies: $ apt-get update && apt-get install file -y $ find /nextstrain/auspice/node_modules -executable -type f -exec file {} + | grep 64 | sed -e 's=^/nextstrain/auspice/node_modules/==' node-notifier/vendor/mac.noindex/terminal-notifier.app/Contents/MacOS/terminal-notifier: Mach-O 64-bit x86_64 executable, flags: node-notifier/vendor/notifu/notifu64.exe: PE32+ executable (GUI) x86-64, for MS Windows puppeteer/.local-chromium/linux-722234/chrome-linux/nacl_irt_x86_64.nexe: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=7aeb4f45ea5cec7d8e4184264ad39f0f77bcaee2, stripped puppeteer/.local-chromium/linux-722234/chrome-linux/nacl_helper_bootstrap: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=5c5f3935c8f8a15ba325f0d73dfa585fa9390cf9, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/ClearKeyCdm/_platform_specific/linux_x64/libclearkeycdm.so: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/libEGL.so: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/crashpad_handler: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/chrome_sandbox: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/chrome: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/libGLESv2.so: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/swiftshader/libEGL.so: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/swiftshader/libGLESv2.so: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped puppeteer/.local-chromium/linux-722234/chrome-linux/nacl_helper: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, not stripped --- Dockerfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Dockerfile b/Dockerfile index a816e0e4..51ea8d9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -199,6 +199,19 @@ RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/ # Linking is used so we can overlay the auspice version in the image with # --volume=.../auspice:/nextstrain/auspice and still have it globally accessible # and importable. +# +# Versions of NPM might differ in platform between where Auspice is installed +# and where it is used (the final image). This does not matter since Auspice +# (and its runtime dependencies at the time of writing) are not +# platform-specific. +# This may change in the future, which would call for cross-platform +# installation using npm_config_arch (if using node-gyp¹ or prebuild-install²) +# or npm_config_target_arch (if using node-pre-gyp³⁴). +# +# ¹ https://github.com/nodejs/node-gyp#environment-variables +# ² https://github.com/prebuild/prebuild-install#help +# ³ https://github.com/mapbox/node-pre-gyp#options +# ⁴ https://github.com/mapbox/node-pre-gyp/blob/v1.0.10/lib/node-pre-gyp.js#L186 WORKDIR /nextstrain/auspice RUN /builder-scripts/download-repo https://github.com/nextstrain/auspice release . \ && npm update && npm install && npm run build && npm link From cb555708dc1fc5b5245f67f32e2ede8bb992f250 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:12:44 -0700 Subject: [PATCH 14/15] Update scripts to reflect 3-stage build Apart from this commit, it was necessary to create 2 new repositories in the nextstrain Docker Hub organization. --- devel/build | 42 +++++++++++++++++++++++++++++---------- devel/copy-images | 33 ++++++++++++++++++++---------- devel/delete-from-ghcr.js | 6 +++++- devel/pull-from-registry | 6 ++++-- 4 files changed, 62 insertions(+), 25 deletions(-) diff --git a/devel/build b/devel/build index 72e7d121..656b6176 100755 --- a/devel/build +++ b/devel/build @@ -53,20 +53,36 @@ if ! docker buildx inspect "$builder" &>/dev/null; then docker buildx create --name "$builder" --driver docker-container --driver-opt network=host fi -BUILDER_IMAGE=nextstrain/base-builder +BUILDER_BUILD_PLATFORM_IMAGE=nextstrain/base-builder-build-platform +BUILDER_TARGET_PLATFORM_IMAGE=nextstrain/base-builder-target-platform FINAL_IMAGE=nextstrain/base docker buildx build \ - --target builder \ + --target builder-build-platform \ --builder "$builder" \ --platform "$platform" \ --build-arg CACHE_DATE \ - --cache-from "$BUILDER_IMAGE:latest" \ - --cache-from "$BUILDER_IMAGE:$tag" \ - --cache-from "$registry/$BUILDER_IMAGE:latest" \ - --cache-from "$registry/$BUILDER_IMAGE:$tag" \ + --cache-from "$BUILDER_BUILD_PLATFORM_IMAGE:latest" \ + --cache-from "$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ + --cache-from "$registry/$BUILDER_BUILD_PLATFORM_IMAGE:latest" \ + --cache-from "$registry/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ --cache-to type=inline \ - --tag "$registry/$BUILDER_IMAGE:$tag" \ + --tag "$registry/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ + --push \ + --provenance false \ + . + +docker buildx build \ + --target builder-target-platform \ + --builder "$builder" \ + --platform "$platform" \ + --build-arg CACHE_DATE \ + --cache-from "$BUILDER_TARGET_PLATFORM_IMAGE:latest" \ + --cache-from "$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ + --cache-from "$registry/$BUILDER_TARGET_PLATFORM_IMAGE:latest" \ + --cache-from "$registry/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ + --cache-to type=inline \ + --tag "$registry/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ --push \ --provenance false \ . @@ -77,12 +93,16 @@ docker buildx build \ --platform "$platform" \ --build-arg GIT_REVISION \ --build-arg CACHE_DATE \ - --cache-from "$BUILDER_IMAGE:latest" \ - --cache-from "$BUILDER_IMAGE:$tag" \ + --cache-from "$BUILDER_BUILD_PLATFORM_IMAGE:latest" \ + --cache-from "$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ + --cache-from "$BUILDER_TARGET_PLATFORM_IMAGE:latest" \ + --cache-from "$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ --cache-from "$FINAL_IMAGE:latest" \ --cache-from "$FINAL_IMAGE:$tag" \ - --cache-from "$registry/$BUILDER_IMAGE:latest" \ - --cache-from "$registry/$BUILDER_IMAGE:$tag" \ + --cache-from "$registry/$BUILDER_BUILD_PLATFORM_IMAGE:latest" \ + --cache-from "$registry/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ + --cache-from "$registry/$BUILDER_TARGET_PLATFORM_IMAGE:latest" \ + --cache-from "$registry/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ --cache-from "$registry/$FINAL_IMAGE:latest" \ --cache-from "$registry/$FINAL_IMAGE:$tag" \ --cache-to type=inline \ diff --git a/devel/copy-images b/devel/copy-images index a9c372e5..c7a6e8fe 100755 --- a/devel/copy-images +++ b/devel/copy-images @@ -33,7 +33,8 @@ if [[ "$tag" = "" ]]; then exit 1 fi -BUILDER_IMAGE=nextstrain/base-builder +BUILDER_BUILD_PLATFORM_IMAGE=nextstrain/base-builder-build-platform +BUILDER_TARGET_PLATFORM_IMAGE=nextstrain/base-builder-target-platform FINAL_IMAGE=nextstrain/base @@ -77,26 +78,36 @@ copy-image() { # Copy $tag between registries. +echo "Copying $registry_in/$BUILDER_BUILD_PLATFORM_IMAGE:$tag to $registry_out/$BUILDER_BUILD_PLATFORM_IMAGE:$tag." +copy-image \ + "$registry_in/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ + "$registry_out/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" + +echo "Copying $registry_in/$BUILDER_TARGET_PLATFORM_IMAGE:$tag to $registry_out/$BUILDER_TARGET_PLATFORM_IMAGE:$tag." +copy-image \ + "$registry_in/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ + "$registry_out/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" + echo "Copying $registry_in/$FINAL_IMAGE:$tag to $registry_out/$FINAL_IMAGE:$tag." copy-image \ "$registry_in/$FINAL_IMAGE:$tag" \ "$registry_out/$FINAL_IMAGE:$tag" -echo "Copying $registry_in/$BUILDER_IMAGE:$tag to $registry_out/$BUILDER_IMAGE:$tag." -copy-image \ - "$registry_in/$BUILDER_IMAGE:$tag" \ - "$registry_out/$BUILDER_IMAGE:$tag" - if [[ "$push_latest" = true ]]; then # Copy $tag to latest. + echo "Copying $registry_in/$BUILDER_BUILD_PLATFORM_IMAGE:$tag to $registry_out/$BUILDER_BUILD_PLATFORM_IMAGE:latest." + copy-image \ + "$registry_in/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" \ + "$registry_out/$BUILDER_BUILD_PLATFORM_IMAGE:latest" + + echo "Copying $registry_in/$BUILDER_TARGET_PLATFORM_IMAGE:$tag to $registry_out/$BUILDER_TARGET_PLATFORM_IMAGE:latest." + copy-image \ + "$registry_in/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" \ + "$registry_out/$BUILDER_TARGET_PLATFORM_IMAGE:latest" + echo "Copying $registry_in/$FINAL_IMAGE:$tag to $registry_out/$FINAL_IMAGE:latest." copy-image \ "$registry_in/$FINAL_IMAGE:$tag" \ "$registry_out/$FINAL_IMAGE:latest" - - echo "Copying $registry_in/$BUILDER_IMAGE:$tag to $registry_out/$BUILDER_IMAGE:latest." - copy-image \ - "$registry_in/$BUILDER_IMAGE:$tag" \ - "$registry_out/$BUILDER_IMAGE:latest" fi diff --git a/devel/delete-from-ghcr.js b/devel/delete-from-ghcr.js index acb86acb..5a792f6f 100644 --- a/devel/delete-from-ghcr.js +++ b/devel/delete-from-ghcr.js @@ -9,7 +9,11 @@ module.exports = async ({fetch, octokit, tag, token}) => { org = 'nextstrain'; - packages = ['base', 'base-builder']; + packages = [ + 'base', + 'base-builder-build-platform', + 'base-builder-target-platform', + ]; // Try all packages before terminating with any errors. let errorEncountered = false; diff --git a/devel/pull-from-registry b/devel/pull-from-registry index 4a6a0fe0..43f4fba1 100755 --- a/devel/pull-from-registry +++ b/devel/pull-from-registry @@ -17,8 +17,10 @@ while getopts "r:t:" opt; do esac done -BUILDER_IMAGE=nextstrain/base-builder +BUILDER_BUILD_PLATFORM_IMAGE=nextstrain/base-builder-build-platform +BUILDER_TARGET_PLATFORM_IMAGE=nextstrain/base-builder-target-platform FINAL_IMAGE=nextstrain/base -docker pull "$registry/$BUILDER_IMAGE:$tag" +docker pull "$registry/$BUILDER_BUILD_PLATFORM_IMAGE:$tag" +docker pull "$registry/$BUILDER_TARGET_PLATFORM_IMAGE:$tag" docker pull "$registry/$FINAL_IMAGE:$tag" From d6f39afd044940d88fbddba21754338b22eab3c7 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 28 Feb 2023 16:25:57 -0800 Subject: [PATCH 15/15] Update README for cross-compilation --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 88ad6b9d..9a13844d 100644 --- a/README.md +++ b/README.md @@ -109,9 +109,8 @@ To push images you've built locally to Docker Hub, you can run: ./devel/copy-images -t -This will copy the `nextstrain/base:` and `nextstrain/base-builder:` -images from the local Docker registry to Docker Hub. See instructions at the top -of the script for more options. +This will copy the Nextstrain images from the local Docker registry to Docker +Hub. See instructions at the top of the script for more options. ### Adding a new software program @@ -136,12 +135,16 @@ To add a software program to `nextstrain/base`, follow steps in this order: 4. The last resort is to build from source. Look for instructions on the software's website. Add a build command to the section labeled with `Build programs from source`. Note that this can require platform-specific - instructions. + instructions. You should utilize cross-compilation tool available in the + builder stage that runs on the build platform. If possible, pin the software to a specific version. Otherwise, add the download/install/build command to the section labeled with `Add unpinned programs` to ensure the latest version is included in every Docker image build. +If possible, add the program to the builder stage that runs on the build +platform to avoid slowness that may arise from emulation. + ### Best practices The smaller the image size, the better. To this end we build upon a ["slim"