From bb02f5cc959ad1ee870a70614ef577129d3cc1a9 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Mon, 6 Jan 2025 16:27:45 -0800 Subject: [PATCH 01/15] preparing to refactor rapids.sh into a template --- rapids/rapids.sh => templates/rapids/rapids.sh.in | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename rapids/rapids.sh => templates/rapids/rapids.sh.in (100%) diff --git a/rapids/rapids.sh b/templates/rapids/rapids.sh.in similarity index 100% rename from rapids/rapids.sh rename to templates/rapids/rapids.sh.in From d46cadf8e3e79518f6c7ea601f7d5b2c267663d2 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Mon, 6 Jan 2025 23:05:18 -0800 Subject: [PATCH 02/15] refactored to be nearer in shape to template output --- templates/rapids/rapids.sh.in | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/templates/rapids/rapids.sh.in b/templates/rapids/rapids.sh.in index 6c5c9d411..3fd48089e 100644 --- a/templates/rapids/rapids.sh.in +++ b/templates/rapids/rapids.sh.in @@ -69,9 +69,6 @@ function get_metadata_attribute() ( get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" ) -function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } -function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } - function execute_with_retries() { local -r cmd="$*" for i in {0..9} ; do @@ -83,6 +80,15 @@ function execute_with_retries() { return 1 } +function restart_knox() { + systemctl stop knox + rm -rf "${KNOX_HOME}/data/deployments/*" + systemctl start knox +} + +function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } +function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } + function configure_dask_yarn() { readonly DASK_YARN_CONFIG_DIR=/etc/dask/ readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml @@ -201,12 +207,6 @@ function install_systemd_dask_service() { install_systemd_dask_worker } -function restart_knox() { - systemctl stop knox - rm -rf "${KNOX_HOME}/data/deployments/*" - systemctl start knox -} - function configure_knox_for_dask() { if [[ ! -d "${KNOX_HOME}" ]]; then echo "Skip configuring Knox rules for Dask" From d6be3a4b41342a96eb5b426052f67e89fea93f1e Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Mon, 6 Jan 2025 23:47:57 -0800 Subject: [PATCH 03/15] included templates from which rapids.sh is built and instructions in presubmit.sh for generating actions --- cloudbuild/presubmit.sh | 10 +- integration_tests/dataproc_test_case.py | 26 +- templates/common/template_disclaimer | 5 + templates/common/util_functions | 687 ++++++++++++++++++++++++ templates/dask/util_functions | 555 +++++++++++++++++++ templates/generate-action.pl | 24 + templates/gpu/util_functions | 220 ++++++++ templates/legal/license_header | 13 + templates/rapids/rapids.sh.in | 644 +--------------------- 9 files changed, 1555 insertions(+), 629 deletions(-) create mode 100644 templates/common/template_disclaimer create mode 100644 templates/common/util_functions create mode 100644 templates/dask/util_functions create mode 100644 templates/generate-action.pl create mode 100644 templates/gpu/util_functions create mode 100644 templates/legal/license_header diff --git a/cloudbuild/presubmit.sh b/cloudbuild/presubmit.sh index eec7adb76..fc664f1bf 100644 --- a/cloudbuild/presubmit.sh +++ b/cloudbuild/presubmit.sh @@ -48,8 +48,14 @@ initialize_git_repo() { # to determine all changed files and looks for tests in directories with changed files. determine_tests_to_run() { # Infer the files that changed + mapfile -t CHANGED_ACTION_TEMPLATES < <(git diff origin/master --name-only | grep 'templates/.*/.*\.sh\.in') + for tt in "${CHANGED_ACTION_TEMPLATES[@]}"; do + local genfile=`perl -e "print( q{${tt}} =~ m:templates/(.*?.sh).in: )"` + perl templates/generate-action.pl "${genfile}" > "${genfile}" + done + mapfile -t DELETED_BUILD_FILES < <(git diff origin/master --name-only --diff-filter=D | grep BUILD) - mapfile -t CHANGED_FILES < <(git diff origin/master --name-only) + mapfile -t CHANGED_FILES < <(git diff origin/master --name-only | grep -v template) echo "Deleted BUILD files: ${DELETED_BUILD_FILES[*]}" echo "Changed files: ${CHANGED_FILES[*]}" @@ -70,6 +76,7 @@ determine_tests_to_run() { changed_dir="${changed_dir%%/*}/" # Run all tests if common directories modified if [[ ${changed_dir} =~ ^(integration_tests|util|cloudbuild)/$ ]]; then + continue echo "All tests will be run: '${changed_dir}' was changed" TESTS_TO_RUN=(":DataprocInitActionsTestSuite") return 0 @@ -104,7 +111,6 @@ run_tests() { bazel test \ --jobs="${max_parallel_tests}" \ --local_test_jobs="${max_parallel_tests}" \ - --flaky_test_attempts=3 \ --action_env="INTERNAL_IP_SSH=true" \ --test_output="all" \ --noshow_progress \ diff --git a/integration_tests/dataproc_test_case.py b/integration_tests/dataproc_test_case.py index 936718498..683109125 100644 --- a/integration_tests/dataproc_test_case.py +++ b/integration_tests/dataproc_test_case.py @@ -7,6 +7,8 @@ import string import subprocess import sys +import time +import random from threading import Timer import pkg_resources @@ -123,7 +125,7 @@ def createCluster(self, for i in init_actions: if "install_gpu_driver.sh" in i or "horovod.sh" in i or \ - "dask-rapids.sh" in i or "mlvm.sh" in i or \ + "rapids.sh" in i or "mlvm.sh" in i or \ "spark-rapids.sh" in i: args.append("--no-shielded-secure-boot") @@ -287,10 +289,24 @@ def assert_instance_command(self, AssertionError: if command returned non-0 exit code. """ - ret_code, stdout, stderr = self.assert_command( - 'gcloud compute ssh {} --zone={} --command="{}"'.format( - instance, self.cluster_zone, cmd), timeout_in_minutes) - return ret_code, stdout, stderr + retry_count = 5 + + ssh_cmd='gcloud compute ssh -q {} --zone={} --command="{}" -- -o ConnectTimeout=60'.format( + instance, self.cluster_zone, cmd) + + while retry_count > 0: + try: + ret_code, stdout, stderr = self.assert_command( + ssh_cmd, timeout_in_minutes ) + return ret_code, stdout, stderr + except Exception as e: + print("An error occurred: ", e) + retry_count -= 1 + if retry_count > 0: + time.sleep( 3 + random.randint(1, 10) ) + continue + else: + raise def assert_dataproc_job(self, cluster_name, diff --git a/templates/common/template_disclaimer b/templates/common/template_disclaimer new file mode 100644 index 000000000..3b417deff --- /dev/null +++ b/templates/common/template_disclaimer @@ -0,0 +1,5 @@ +# This initialization action is generated from +# initialization-actions/templates/[% template_path %] +# +# Modifications made directly to the generated file will be lost when +# the template is re-evaluated diff --git a/templates/common/util_functions b/templates/common/util_functions new file mode 100644 index 000000000..0f0bfeaa6 --- /dev/null +++ b/templates/common/util_functions @@ -0,0 +1,687 @@ +function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function os_version() ( set +x ; grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function os_codename() ( set +x ; grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; ) + +# For version (or real number) comparison +# if first argument is greater than or equal to, greater than, less than or equal to, or less than the second +# ( version_ge 2.0 2.1 ) evaluates to false +# ( version_ge 2.2 2.1 ) evaluates to true +function version_ge() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | tail -n1)" ] ; ) +function version_gt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_ge $1 $2 ; ) +function version_le() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; ) +function version_lt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_le $1 $2 ; ) + +function define_os_comparison_functions() { + + readonly -A supported_os=( + ['debian']="10 11 12" + ['rocky']="8 9" + ['ubuntu']="18.04 20.04 22.04" + ) + + # dynamically define OS version test utility functions + if [[ "$(os_id)" == "rocky" ]]; + then _os_version=$(os_version | sed -e 's/[^0-9].*$//g') + else _os_version="$(os_version)"; fi + for os_id_val in 'rocky' 'ubuntu' 'debian' ; do + eval "function is_${os_id_val}() ( set +x ; [[ \"$(os_id)\" == '${os_id_val}' ]] ; )" + + for osver in $(echo "${supported_os["${os_id_val}"]}") ; do + eval "function is_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && [[ \"${_os_version}\" == \"${osver}\" ]] ; )" + eval "function ge_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_ge \"${_os_version}\" \"${osver}\" ; )" + eval "function le_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_le \"${_os_version}\" \"${osver}\" ; )" + done + done + eval "function is_debuntu() ( set +x ; is_debian || is_ubuntu ; )" +} + +function os_vercat() ( set +x + if is_ubuntu ; then os_version | sed -e 's/[^0-9]//g' + elif is_rocky ; then os_version | sed -e 's/[^0-9].*$//g' + else os_version ; fi ; ) + +function repair_old_backports { + if ! is_debuntu ; then return ; fi + # This script uses 'apt-get update' and is therefore potentially dependent on + # backports repositories which have been archived. In order to mitigate this + # problem, we will use archive.debian.org for the oldoldstable repo + + # https://github.com/GoogleCloudDataproc/initialization-actions/issues/1157 + debdists="https://deb.debian.org/debian/dists" + oldoldstable=$(curl -s "${debdists}/oldoldstable/Release" | awk '/^Codename/ {print $2}'); + oldstable=$( curl -s "${debdists}/oldstable/Release" | awk '/^Codename/ {print $2}'); + stable=$( curl -s "${debdists}/stable/Release" | awk '/^Codename/ {print $2}'); + + matched_files=( $(test -d /etc/apt && grep -rsil '\-backports' /etc/apt/sources.list*||:) ) + + for filename in "${matched_files[@]}"; do + # Fetch from archive.debian.org for ${oldoldstable}-backports + perl -pi -e "s{^(deb[^\s]*) https?://[^/]+/debian ${oldoldstable}-backports } + {\$1 https://archive.debian.org/debian ${oldoldstable}-backports }g" "${filename}" + done +} + +function print_metadata_value() { + local readonly tmpfile=$(mktemp) + http_code=$(curl -f "${1}" -H "Metadata-Flavor: Google" -w "%{http_code}" \ + -s -o ${tmpfile} 2>/dev/null) + local readonly return_code=$? + # If the command completed successfully, print the metadata value to stdout. + if [[ ${return_code} == 0 && ${http_code} == 200 ]]; then + cat ${tmpfile} + fi + rm -f ${tmpfile} + return ${return_code} +} + +function print_metadata_value_if_exists() { + local return_code=1 + local readonly url=$1 + print_metadata_value ${url} + return_code=$? + return ${return_code} +} + +# replicates /usr/share/google/get_metadata_value +function get_metadata_value() ( + set +x + local readonly varname=$1 + local -r MDS_PREFIX=http://metadata.google.internal/computeMetadata/v1 + # Print the instance metadata value. + print_metadata_value_if_exists ${MDS_PREFIX}/instance/${varname} + return_code=$? + # If the instance doesn't have the value, try the project. + if [[ ${return_code} != 0 ]]; then + print_metadata_value_if_exists ${MDS_PREFIX}/project/${varname} + return_code=$? + fi + + return ${return_code} +) + +function get_metadata_attribute() ( + set +x + local -r attribute_name="$1" + local -r default_value="${2:-}" + get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" +) + +function execute_with_retries() ( + set +x + local -r cmd="$*" + + if [[ "$cmd" =~ "^apt-get install" ]] ; then + apt-get -y clean + apt-get -o DPkg::Lock::Timeout=60 -y autoremove + fi + for ((i = 0; i < 3; i++)); do + set -x + time eval "$cmd" > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; } + set +x + if [[ $retval == 0 ]] ; then return 0 ; fi + sleep 5 + done + return 1 +) + +function cache_fetched_package() { + local src_url="$1" + local gcs_fn="$2" + local local_fn="$3" + + while ! command -v gcloud ; do sleep 5s ; done + + if gsutil ls "${gcs_fn}" 2>&1 | grep -q "${gcs_fn}" ; then + time gcloud storage cp "${gcs_fn}" "${local_fn}" + else + time ( curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 "${src_url}" -o "${local_fn}" && \ + gcloud storage cp "${local_fn}" "${gcs_fn}" ; ) + fi +} + +function add_contrib_component() { + if ! is_debuntu ; then return ; fi + if ge_debian12 ; then + # Include in sources file components on which nvidia-kernel-open-dkms depends + local -r debian_sources="/etc/apt/sources.list.d/debian.sources" + local components="main contrib" + + sed -i -e "s/Components: .*$/Components: ${components}/" "${debian_sources}" + elif is_debian ; then + sed -i -e 's/ main$/ main contrib/' /etc/apt/sources.list + fi +} + +function set_hadoop_property() { + local -r config_file=$1 + local -r property=$2 + local -r value=$3 + "${bdcfg}" set_property \ + --configuration_file "${HADOOP_CONF_DIR}/${config_file}" \ + --name "${property}" --value "${value}" \ + --clobber +} + +function clean_up_sources_lists() { + # + # bigtop (primary) + # + local -r dataproc_repo_file="/etc/apt/sources.list.d/dataproc.list" + + if [[ -f "${dataproc_repo_file}" ]] && ! grep -q signed-by "${dataproc_repo_file}" ; then + region="$(get_metadata_value zone | perl -p -e 's:.*/:: ; s:-[a-z]+$::')" + + local regional_bigtop_repo_uri + regional_bigtop_repo_uri=$(cat ${dataproc_repo_file} | + sed "s#/dataproc-bigtop-repo/#/goog-dataproc-bigtop-repo-${region}/#" | + grep "deb .*goog-dataproc-bigtop-repo-${region}.* dataproc contrib" | + cut -d ' ' -f 2 | + head -1) + + if [[ "${regional_bigtop_repo_uri}" == */ ]]; then + local -r bigtop_key_uri="${regional_bigtop_repo_uri}archive.key" + else + local -r bigtop_key_uri="${regional_bigtop_repo_uri}/archive.key" + fi + + local -r bigtop_kr_path="/usr/share/keyrings/bigtop-keyring.gpg" + rm -f "${bigtop_kr_path}" + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 \ + "${bigtop_key_uri}" | gpg --dearmor -o "${bigtop_kr_path}" + + sed -i -e "s:deb https:deb [signed-by=${bigtop_kr_path}] https:g" "${dataproc_repo_file}" + sed -i -e "s:deb-src https:deb-src [signed-by=${bigtop_kr_path}] https:g" "${dataproc_repo_file}" + fi + + # + # adoptium + # + # https://adoptium.net/installation/linux/#_deb_installation_on_debian_or_ubuntu + local -r key_url="https://packages.adoptium.net/artifactory/api/gpg/key/public" + local -r adoptium_kr_path="/usr/share/keyrings/adoptium.gpg" + rm -f "${adoptium_kr_path}" + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${key_url}" \ + | gpg --dearmor -o "${adoptium_kr_path}" + echo "deb [signed-by=${adoptium_kr_path}] https://packages.adoptium.net/artifactory/deb/ $(os_codename) main" \ + > /etc/apt/sources.list.d/adoptium.list + + + # + # docker + # + local docker_kr_path="/usr/share/keyrings/docker-keyring.gpg" + local docker_repo_file="/etc/apt/sources.list.d/docker.list" + local -r docker_key_url="https://download.docker.com/linux/$(os_id)/gpg" + + rm -f "${docker_kr_path}" + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${docker_key_url}" \ + | gpg --dearmor -o "${docker_kr_path}" + echo "deb [signed-by=${docker_kr_path}] https://download.docker.com/linux/$(os_id) $(os_codename) stable" \ + > ${docker_repo_file} + + # + # google cloud + logging/monitoring + # + if ls /etc/apt/sources.list.d/google-cloud*.list ; then + rm -f /usr/share/keyrings/cloud.google.gpg + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg + for list in google-cloud google-cloud-logging google-cloud-monitoring ; do + list_file="/etc/apt/sources.list.d/${list}.list" + if [[ -f "${list_file}" ]]; then + sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https:g' "${list_file}" + fi + done + fi + + # + # cran-r + # + if [[ -f /etc/apt/sources.list.d/cran-r.list ]]; then + keyid="0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7" + if is_ubuntu18 ; then keyid="0x51716619E084DAB9"; fi + rm -f /usr/share/keyrings/cran-r.gpg + curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" | \ + gpg --dearmor -o /usr/share/keyrings/cran-r.gpg + sed -i -e 's:deb http:deb [signed-by=/usr/share/keyrings/cran-r.gpg] http:g' /etc/apt/sources.list.d/cran-r.list + fi + + # + # mysql + # + if [[ -f /etc/apt/sources.list.d/mysql.list ]]; then + rm -f /usr/share/keyrings/mysql.gpg + curl 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xBCA43417C3B485DD128EC6D4B7B3B788A8D3785C' | \ + gpg --dearmor -o /usr/share/keyrings/mysql.gpg + sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/mysql.gpg] https:g' /etc/apt/sources.list.d/mysql.list + fi + + if [[ -f /etc/apt/trusted.gpg ]] ; then mv /etc/apt/trusted.gpg /etc/apt/old-trusted.gpg ; fi + +} + +function set_proxy(){ + METADATA_HTTP_PROXY="$(get_metadata_attribute http-proxy '')" + + if [[ -z "${METADATA_HTTP_PROXY}" ]] ; then return ; fi + + export METADATA_HTTP_PROXY + export http_proxy="${METADATA_HTTP_PROXY}" + export https_proxy="${METADATA_HTTP_PROXY}" + export HTTP_PROXY="${METADATA_HTTP_PROXY}" + export HTTPS_PROXY="${METADATA_HTTP_PROXY}" + no_proxy="localhost,127.0.0.0/8,::1,metadata.google.internal,169.254.169.254" + local no_proxy_svc + for no_proxy_svc in compute secretmanager dns servicedirectory logging \ + bigquery composer pubsub bigquerydatatransfer dataflow \ + storage datafusion ; do + no_proxy="${no_proxy},${no_proxy_svc}.googleapis.com" + done + + export NO_PROXY="${no_proxy}" +} + +function is_ramdisk() { + if [[ "${1:-}" == "-f" ]] ; then unset IS_RAMDISK ; fi + if ( test -v IS_RAMDISK && "${IS_RAMDISK}" == "true" ) ; then return 0 + elif ( test -v IS_RAMDISK && "${IS_RAMDISK}" == "false" ) ; then return 1 ; fi + + if ( test -d /mnt/shm && grep -q /mnt/shm /proc/mounts ) ; then + IS_RAMDISK="true" + return 0 + else + IS_RAMDISK="false" + return 1 + fi +} + +function mount_ramdisk(){ + local free_mem + free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + if [[ ${free_mem} -lt 10500000 ]]; then return 0 ; fi + + # Write to a ramdisk instead of churning the persistent disk + + tmpdir="/mnt/shm" + mkdir -p "${tmpdir}/pkgs_dirs" + mount -t tmpfs tmpfs "${tmpdir}" + + # Download conda packages to tmpfs + /opt/conda/miniconda3/bin/conda config --add pkgs_dirs "${tmpdir}/pkgs_dirs" + + # Download OS packages to tmpfs + if is_debuntu ; then + mount -t tmpfs tmpfs /var/cache/apt/archives + else + mount -t tmpfs tmpfs /var/cache/dnf + fi + is_ramdisk -f +} + +function check_os() { + if is_debian && ( ! is_debian10 && ! is_debian11 && ! is_debian12 ) ; then + echo "Error: The Debian version ($(os_version)) is not supported. Please use a compatible Debian version." + exit 1 + elif is_ubuntu && ( ! is_ubuntu18 && ! is_ubuntu20 && ! is_ubuntu22 ) ; then + echo "Error: The Ubuntu version ($(os_version)) is not supported. Please use a compatible Ubuntu version." + exit 1 + elif is_rocky && ( ! is_rocky8 && ! is_rocky9 ) ; then + echo "Error: The Rocky Linux version ($(os_version)) is not supported. Please use a compatible Rocky Linux version." + exit 1 + fi + + SPARK_VERSION="$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1)" + readonly SPARK_VERSION + if version_lt "${SPARK_VERSION}" "3.1" || \ + version_ge "${SPARK_VERSION}" "4.0" ; then + echo "Error: Your Spark version is not supported. Please upgrade Spark to one of the supported versions." + exit 1 + fi + + # Detect dataproc image version + if (! test -v DATAPROC_IMAGE_VERSION) ; then + if test -v DATAPROC_VERSION ; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" + else + if version_lt "${SPARK_VERSION}" "3.2" ; then DATAPROC_IMAGE_VERSION="2.0" + elif version_lt "${SPARK_VERSION}" "3.4" ; then DATAPROC_IMAGE_VERSION="2.1" + elif version_lt "${SPARK_VERSION}" "3.6" ; then DATAPROC_IMAGE_VERSION="2.2" + else echo "Unknown dataproc image version" ; exit 1 ; fi + fi + fi +} + +function configure_dkms_certs() { + if test -v PSN && [[ -z "${PSN}" ]]; then + echo "No signing secret provided. skipping"; + return 0 + fi + + mkdir -p "${CA_TMPDIR}" + + # If the private key exists, verify it + if [[ -f "${CA_TMPDIR}/db.rsa" ]]; then + echo "Private key material exists" + + local expected_modulus_md5sum + expected_modulus_md5sum=$(get_metadata_attribute modulus_md5sum) + if [[ -n "${expected_modulus_md5sum}" ]]; then + modulus_md5sum="${expected_modulus_md5sum}" + + # Verify that cert md5sum matches expected md5sum + if [[ "${modulus_md5sum}" != "$(openssl rsa -noout -modulus -in "${CA_TMPDIR}/db.rsa" | openssl md5 | awk '{print $2}')" ]]; then + echo "unmatched rsa key" + fi + + # Verify that key md5sum matches expected md5sum + if [[ "${modulus_md5sum}" != "$(openssl x509 -noout -modulus -in ${mok_der} | openssl md5 | awk '{print $2}')" ]]; then + echo "unmatched x509 cert" + fi + else + modulus_md5sum="$(openssl rsa -noout -modulus -in "${CA_TMPDIR}/db.rsa" | openssl md5 | awk '{print $2}')" + fi + ln -sf "${CA_TMPDIR}/db.rsa" "${mok_key}" + + return + fi + + # Retrieve cloud secrets keys + local sig_priv_secret_name + sig_priv_secret_name="${PSN}" + local sig_pub_secret_name + sig_pub_secret_name="$(get_metadata_attribute public_secret_name)" + local sig_secret_project + sig_secret_project="$(get_metadata_attribute secret_project)" + local sig_secret_version + sig_secret_version="$(get_metadata_attribute secret_version)" + + # If metadata values are not set, do not write mok keys + if [[ -z "${sig_priv_secret_name}" ]]; then return 0 ; fi + + # Write private material to volatile storage + gcloud secrets versions access "${sig_secret_version}" \ + --project="${sig_secret_project}" \ + --secret="${sig_priv_secret_name}" \ + | dd status=none of="${CA_TMPDIR}/db.rsa" + + # Write public material to volatile storage + gcloud secrets versions access "${sig_secret_version}" \ + --project="${sig_secret_project}" \ + --secret="${sig_pub_secret_name}" \ + | base64 --decode \ + | dd status=none of="${CA_TMPDIR}/db.der" + + local mok_directory="$(dirname "${mok_key}")" + mkdir -p "${mok_directory}" + + # symlink private key and copy public cert from volatile storage to DKMS directory + ln -sf "${CA_TMPDIR}/db.rsa" "${mok_key}" + cp -f "${CA_TMPDIR}/db.der" "${mok_der}" + + modulus_md5sum="$(openssl rsa -noout -modulus -in "${mok_key}" | openssl md5 | awk '{print $2}')" +} + +function clear_dkms_key { + if [[ -z "${PSN}" ]]; then + echo "No signing secret provided. skipping" >&2 + return 0 + fi + rm -rf "${CA_TMPDIR}" "${mok_key}" +} + +function check_secure_boot() { + local SECURE_BOOT="disabled" + SECURE_BOOT=$(mokutil --sb-state|awk '{print $2}') + + PSN="$(get_metadata_attribute private_secret_name)" + readonly PSN + + if [[ "${SECURE_BOOT}" == "enabled" ]] && le_debian11 ; then + echo "Error: Secure Boot is not supported on Debian before image 2.2. Consider disabling Secure Boot while creating the cluster." + return + elif [[ "${SECURE_BOOT}" == "enabled" ]] && [[ -z "${PSN}" ]]; then + echo "Secure boot is enabled, but no signing material provided." + echo "Consider either disabling secure boot or provide signing material as per" + echo "https://github.com/GoogleCloudDataproc/custom-images/tree/master/examples/secure-boot" + return + fi + + CA_TMPDIR="$(mktemp -u -d -p /run/tmp -t ca_dir-XXXX)" + readonly CA_TMPDIR + + if is_ubuntu ; then mok_key=/var/lib/shim-signed/mok/MOK.priv + mok_der=/var/lib/shim-signed/mok/MOK.der + else mok_key=/var/lib/dkms/mok.key + mok_der=/var/lib/dkms/mok.pub ; fi +} + +function restart_knox() { + systemctl stop knox + rm -rf "${KNOX_HOME}/data/deployments/*" + systemctl start knox +} + +function is_complete() { + phase="$1" + test -f "${workdir}/complete/${phase}" +} + +function mark_complete() { + phase="$1" + touch "${workdir}/complete/${phase}" +} + +function mark_incomplete() { + phase="$1" + rm -f "${workdir}/complete/${phase}" +} + +function install_dependencies() { + is_complete install-dependencies && return 0 + + pkg_list="screen" + if is_debuntu ; then execute_with_retries apt-get -y -q install ${pkg_list} + elif is_rocky ; then execute_with_retries dnf -y -q install ${pkg_list} ; fi + mark_complete install-dependencies +} + +function prepare_pip_env() { + # Clear pip cache + # TODO: make this conditional on which OSs have pip without cache purge + test -d "${workdir}/python-venv" || python3 -m venv "${workdir}/python-venv" + source "${workdir}/python-venv/bin/activate" + + pip cache purge || echo "unable to purge pip cache" + if is_ramdisk ; then + # Download pip packages to tmpfs + mkdir -p "${tmpdir}/cache-dir" + pip config set global.cache-dir "${tmpdir}/cache-dir" || echo "unable to set global.cache-dir" + fi +} + +function prepare_conda_env() { + CONDA=/opt/conda/miniconda3/bin/conda + touch ~/.condarc + cp ~/.condarc ~/.condarc.default + if is_ramdisk ; then + # Download conda packages to tmpfs + mkdir -p "${tmpdir}/conda_cache" + ${CONDA} config --add pkgs_dirs "${tmpdir}/conda_cache" + fi +} + +function prepare_common_env() { + define_os_comparison_functions + + # Verify OS compatability and Secure boot state + check_os + check_secure_boot + + readonly _shortname="$(os_id)$(os_version|perl -pe 's/(\d+).*/$1/')" + + # Dataproc configurations + readonly HADOOP_CONF_DIR='/etc/hadoop/conf' + readonly HIVE_CONF_DIR='/etc/hive/conf' + readonly SPARK_CONF_DIR='/etc/spark/conf' + + OS_NAME="$(lsb_release -is | tr '[:upper:]' '[:lower:]')" + readonly OS_NAME + + # node role + ROLE="$(get_metadata_attribute dataproc-role)" + readonly ROLE + + # master node + MASTER="$(get_metadata_attribute dataproc-master)" + readonly MASTER + + workdir=/opt/install-dpgce + tmpdir=/tmp/ + temp_bucket="$(get_metadata_attribute dataproc-temp-bucket)" + readonly temp_bucket + readonly pkg_bucket="gs://${temp_bucket}/dpgce-packages" + uname_r=$(uname -r) + readonly uname_r + readonly bdcfg="/usr/local/bin/bdconfig" + export DEBIAN_FRONTEND=noninteractive + + # Knox config + readonly KNOX_HOME=/usr/lib/knox + + mkdir -p "${workdir}/complete" + set_proxy + mount_ramdisk + + readonly install_log="${tmpdir}/install.log" + + is_complete prepare.common && return + + repair_old_backports + + if is_debuntu ; then + clean_up_sources_lists + apt-get update -qq + apt-get -y clean + apt-get -o DPkg::Lock::Timeout=60 -y autoremove + if ge_debian12 ; then + apt-mark unhold systemd libsystemd0 ; fi + if is_ubuntu ; then + while ! command -v gcloud ; do sleep 5s ; done + fi + else + dnf clean all + fi + + # When creating a disk image: + if [[ -n "$(get_metadata_attribute creating-image "")" ]]; then + df / > "/run/disk-usage.log" + + # zero free disk space + ( set +e + time dd if=/dev/zero of=/zero status=none ; sync ; sleep 3s ; rm -f /zero + ) + + install_dependencies + + # Monitor disk usage in a screen session + touch "/run/keep-running-df" + screen -d -m -LUS keep-running-df \ + bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done" + fi + + mark_complete prepare.common +} + +function pip_exit_handler() { + if is_ramdisk ; then + # remove the tmpfs pip cache-dir + pip config unset global.cache-dir || echo "unable to unset global pip cache" + fi +} + +function conda_exit_handler() { + mv ~/.condarc.default ~/.condarc +} + +function common_exit_handler() { + set +ex + echo "Exit handler invoked" + + # If system memory was sufficient to mount memory-backed filesystems + if is_ramdisk ; then + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do + if ( grep -q "^tmpfs ${shmdir}" /proc/mounts && ! grep -q "^tmpfs ${shmdir}" /etc/fstab ) ; then + umount -f ${shmdir} + fi + done + fi + + if is_debuntu ; then + # Clean up OS package cache + apt-get -y -qq clean + apt-get -y -qq -o DPkg::Lock::Timeout=60 autoremove + # re-hold systemd package + if ge_debian12 ; then + apt-mark hold systemd libsystemd0 ; fi + else + dnf clean all + fi + + # When creating image, print disk usage statistics, zero unused disk space + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then + # print disk usage statistics for large components + if is_ubuntu ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /opt/nvidia/* \ + /opt/conda/miniconda3 | sort -h + elif is_debian ; then + du -x -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu,} \ + /var/lib/{docker,mysql,} \ + /opt/nvidia/* \ + /opt/{conda,google-cloud-ops-agent,install-nvidia,} \ + /usr/bin \ + /usr \ + /var \ + / 2>/dev/null | sort -h + else + du -hs \ + /var/lib/docker \ + /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas,} \ + /usr/lib64/google-cloud-sdk \ + /opt/nvidia/* \ + /opt/conda/miniconda3 + fi + + # Process disk usage logs from installation period + rm -f /run/keep-running-df + sync + sleep 5.01s + # compute maximum size of disk during installation + # Log file contains logs like the following (minus the preceeding #): +#Filesystem 1K-blocks Used Available Use% Mounted on +#/dev/vda2 7096908 2611344 4182932 39% / + df / | tee -a "/run/disk-usage.log" + + perl -e \ + '@siz=( sort { $a => $b } + map { (split)[2] =~ /^(\d+)/ } + grep { m:^/: } ); +$max=$siz[0]; $min=$siz[-1]; $starting="unknown"; $inc=q{$max-$starting}; +print( " samples-taken: ", scalar @siz, $/, + "starting-disk-used: $starting", $/, + "maximum-disk-used: $max", $/, + "minimum-disk-used: $min", $/, + " increased-by: $inc", $/ )' < "/run/disk-usage.log" + + + # zero free disk space + dd if=/dev/zero of=/zero + sync + sleep 3s + rm -f /zero + fi + echo "exit_handler has completed" +} diff --git a/templates/dask/util_functions b/templates/dask/util_functions new file mode 100644 index 000000000..d67da1fc1 --- /dev/null +++ b/templates/dask/util_functions @@ -0,0 +1,555 @@ +function configure_dask_yarn() { + readonly DASK_YARN_CONFIG_DIR=/etc/dask/ + readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml + # Minimal custom configuration is required for this + # setup. Please see https://yarn.dask.org/en/latest/quickstart.html#usage + # for information on tuning Dask-Yarn environments. + mkdir -p "${DASK_YARN_CONFIG_DIR}" + + local worker_class="dask.distributed.Nanny" + local gpu_count="0" + if command -v nvidia-smi ; then + gpu_count="1" + worker_class="dask_cuda.CUDAWorker" + fi + + cat <"${DASK_YARN_CONFIG_FILE}" +# Config file for Dask Yarn. +# +# These values are joined on top of the default config, found at +# https://yarn.dask.org/en/latest/configuration.html#default-configuration + +yarn: + environment: python://${DASK_CONDA_ENV}/bin/python + + worker: + count: 2 + gpus: ${gpu_count} + worker_class: ${worker_class} +EOF +} + +function install_systemd_dask_worker() { + echo "Installing systemd Dask Worker service..." + local -r dask_worker_local_dir="/tmp/${DASK_WORKER_SERVICE}" + + mkdir -p "${dask_worker_local_dir}" + + local DASK_WORKER_LAUNCHER="/usr/local/bin/${DASK_WORKER_SERVICE}-launcher.sh" + + local compute_mode_cmd="" + if command -v nvidia-smi ; then compute_mode_cmd="nvidia-smi --compute-mode=DEFAULT" ; fi + local worker_name="dask worker" + if test -f "${DASK_CONDA_ENV}/bin/dask-cuda" ; then worker_name="dask-cuda worker" ; fi + local worker="${DASK_CONDA_ENV}/bin/${worker_name}" + cat <"${DASK_WORKER_LAUNCHER}" +#!/bin/bash +LOGFILE="/var/log/${DASK_WORKER_SERVICE}.log" +${compute_mode_cmd} +echo "${worker_name} starting, logging to \${LOGFILE}" +${worker} --local-directory="${dask_worker_local_dir}" --memory-limit=auto "${MASTER}:8786" >> "\${LOGFILE}" 2>&1 +EOF + + chmod 750 "${DASK_WORKER_LAUNCHER}" + + local -r dask_service_file="/usr/lib/systemd/system/${DASK_WORKER_SERVICE}.service" + cat <"${dask_service_file}" +[Unit] +Description=Dask Worker Service +[Service] +Type=simple +Restart=on-failure +ExecStart=/bin/bash -c 'exec ${DASK_WORKER_LAUNCHER}' +[Install] +WantedBy=multi-user.target +EOF + chmod a+r "${dask_service_file}" + + systemctl daemon-reload + + # Enable the service + enable_systemd_dask_worker_service="0" + if [[ "${ROLE}" != "Master" ]]; then + enable_systemd_dask_worker_service="1" + else + # Enable service on single-node cluster (no workers) + local worker_count="$(get_metadata_attribute dataproc-worker-count)" + if ( [[ "${worker_count}" == "0" ]] || + ( [[ "$(get_metadata_attribute dask-cuda-worker-on-master 'true')" == "true" ]] && + [[ "$(get_metadata_attribute dask-worker-on-master 'true')" == "true" ]] ) + ) ; then + enable_systemd_dask_worker_service="1" + fi + fi + readonly enable_systemd_dask_worker_service + + if [[ "${enable_systemd_dask_worker_service}" == "1" ]]; then + systemctl enable "${DASK_WORKER_SERVICE}" + systemctl restart "${DASK_WORKER_SERVICE}" + fi +} + +function install_systemd_dask_scheduler() { + # only run scheduler on primary master + if [[ "$(hostname -s)" != "${MASTER}" ]]; then return ; fi + echo "Installing systemd Dask Scheduler service..." + local -r dask_scheduler_local_dir="/tmp/${DASK_SCHEDULER_SERVICE}" + + mkdir -p "${dask_scheduler_local_dir}" + + local DASK_SCHEDULER_LAUNCHER="/usr/local/bin/${DASK_SCHEDULER_SERVICE}-launcher.sh" + + cat <"${DASK_SCHEDULER_LAUNCHER}" +#!/bin/bash +LOGFILE="/var/log/${DASK_SCHEDULER_SERVICE}.log" +echo "dask scheduler starting, logging to \${LOGFILE}" +${DASK_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1 +EOF + + chmod 750 "${DASK_SCHEDULER_LAUNCHER}" + + local -r dask_service_file="/usr/lib/systemd/system/${DASK_SCHEDULER_SERVICE}.service" + cat <"${dask_service_file}" +[Unit] +Description=Dask Scheduler Service +[Service] +Type=simple +Restart=on-failure +ExecStart=/bin/bash -c 'exec ${DASK_SCHEDULER_LAUNCHER}' +[Install] +WantedBy=multi-user.target +EOF + chmod a+r "${dask_service_file}" + + systemctl daemon-reload + + # Enable the service + systemctl enable "${DASK_SCHEDULER_SERVICE}" +} + +function install_systemd_dask_service() { + install_systemd_dask_scheduler + install_systemd_dask_worker +} + +function start_systemd_dask_service() { + # only run scheduler on primary master + if [[ "$(hostname -s)" == "${MASTER}" ]]; then + date + time systemctl start "${DASK_SCHEDULER_SERVICE}" + local substate_val="$(systemctl show ${DASK_SCHEDULER_SERVICE} -p SubState --value)" + if [[ "${substate_val}" != 'running' ]] ; then + cat "/var/log/${DASK_SCHEDULER_SERVICE}.log" + fi + systemctl status "${DASK_SCHEDULER_SERVICE}" + fi + + echo "Starting Dask 'standalone' cluster..." + if [[ "${enable_systemd_dask_worker_service}" == "1" ]]; then + date + # Pause while scheduler comes online + retries=30 + while ! nc -vz "${MASTER}" 8786 ; do + sleep 3s + ((retries--)) + if [[ "${retries}" == "0" ]] ; then echo "dask scheduler unreachable" ; exit 1 ; fi + done + time systemctl start "${DASK_WORKER_SERVICE}" + local substate_val="$(systemctl show ${DASK_WORKER_SERVICE} -p SubState --value)" + if [[ "${substate_val}" != 'running' ]] ; then + cat "/var/log/${DASK_WORKER_SERVICE}.log" + fi + systemctl status "${DASK_WORKER_SERVICE}" + fi + + date +} + +function configure_knox_for_dask() { + if [[ ! -d "${KNOX_HOME}" ]]; then + echo "Skip configuring Knox rules for Dask" + return 0 + fi + + local DASK_UI_PORT=8787 + if [[ -f /etc/knox/conf/topologies/default.xml ]]; then + sed -i \ + "/<\/topology>/i DASK<\/role>http://localhost:${DASK_UI_PORT}<\/url><\/service> DASKWS<\/role>ws:\/\/${MASTER}:${DASK_UI_PORT}<\/url><\/service>" \ + /etc/knox/conf/topologies/default.xml + fi + + mkdir -p "${KNOX_DASK_DIR}" + + cat >"${KNOX_DASK_DIR}/service.xml" <<'EOF' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +EOF + + cat >"${KNOX_DASK_DIR}/rewrite.xml" <<'EOF' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +EOF + + mkdir -p "${KNOX_DASKWS_DIR}" + + cat >"${KNOX_DASKWS_DIR}/service.xml" <<'EOF' + + + + + + + + + + + + + + + + + + + +EOF + + cat >"${KNOX_DASKWS_DIR}/rewrite.xml" <<'EOF' + + + + + + + +EOF + + chown -R knox:knox "${KNOX_DASK_DIR}" "${KNOX_DASKWS_DIR}" + + # Do not restart knox during pre-init script run + if [[ -n "${ROLE}" ]]; then + restart_knox + fi +} + +function configure_fluentd_for_dask() { + if [[ "$(hostname -s)" == "${MASTER}" ]]; then + cat >/etc/google-fluentd/config.d/dataproc-dask.conf < + @type tail + path /var/log/dask-scheduler.log + pos_file /var/tmp/fluentd.dataproc.dask.scheduler.pos + read_from_head true + tag google.dataproc.dask-scheduler + + @type none + + + + + @type record_transformer + + filename dask-scheduler.log + + +EOF + fi + + if [[ "${enable_systemd_dask_worker_service}" == "1" ]]; then + cat >>/etc/google-fluentd/config.d/dataproc-dask.conf < + @type tail + path /var/log/dask-worker.log + pos_file /var/tmp/fluentd.dataproc.dask.worker.pos + read_from_head true + tag google.dataproc.dask-worker + + @type none + + + + + @type record_transformer + + filename dask-worker.log + + +EOF + fi + + systemctl restart google-fluentd +} + +function install_dask() { + is_complete install.dask && return + + local python_spec="python>=3.11" + local dask_version="2024.12.1" + local dask_spec="dask>=${dask_version}" + local cache_key_name="dask-${dask_version}" + + CONDA_PACKAGES=() + if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then + dask_yarn_version="0.9" + cache_key_name="dask-yarn-${dask_yarn_version}" + # Pin `distributed` and `dask` package versions to old release + # because `dask-yarn` 0.9 uses skein in a way which + # is not compatible with `distributed` package 2022.2 and newer: + # https://github.com/dask/dask-yarn/issues/155 + + dask_spec="dask<2022.2" + python_spec="python>=3.7,<3.8.0a0" + if is_ubuntu18 ; then + # the libuuid.so.1 distributed with fiona 1.8.22 dumps core when calling uuid_generate_time_generic + CONDA_PACKAGES+=("fiona<1.8.22") + fi + CONDA_PACKAGES+=('dask-yarn=${dask_yarn_version}' "distributed<2022.2") + fi + + CONDA_PACKAGES+=( + "${dask_spec}" + "dask-bigquery" + "dask-ml" + "dask-sql" + ) + + unset CONDA_CHANNEL_ARGS + local cache_key="${cache_key_name}_${DATAPROC_IMAGE_VERSION}-${_shortname}" + install_conda_packages "${cache_key}" + + mark_complete install.dask +} + +function install_dask_rapids() { + if ( is_complete install.dask-rapids && test -d "${DASK_CONDA_ENV}" ) ; then return ; fi + + local numba_spec="numba" + local dask_version="2024.7" + local dask_spec="dask>=${dask_version}" + + local python_spec="python>=3.11" + local cuda_spec="cuda-version>=12,<13" + local cudart_spec="cuda-cudart" + if is_cuda11 ; then + python_spec="python>=3.9" + cuda_spec="cuda-version>=11,<12.0a0" + cudart_spec="cudatoolkit" + fi + + local rapids_spec="rapids>=${RAPIDS_VERSION}" + CONDA_PACKAGES=() + local cache_key_name="dask-rapids-${RAPIDS_VERSION}" + if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then + local rapids_version="24.05" + cache_key_name="dask-rapids-yarn-${rapids_version}" + # Pin `distributed` and `dask` package versions to old release + # because `dask-yarn` 0.9 uses skein in a way which + # is not compatible with `distributed` package 2022.2 and newer: + # https://github.com/dask/dask-yarn/issues/155 + + dask_spec="dask<2022.2" + python_spec="python>=3.9" + rapids_spec="rapids<=${rapids_version}" + if is_ubuntu18 ; then + # the libuuid.so.1 distributed with fiona 1.8.22 dumps core when calling uuid_generate_time_generic + CONDA_PACKAGES+=("fiona<1.8.22") + fi + CONDA_PACKAGES+=('dask-yarn=0.9' "distributed<2022.2") + fi + + CONDA_PACKAGES+=( + "${cuda_spec}" + "${cudart_spec}" + "${rapids_spec}" + "${dask_spec}" + "dask-bigquery" + "dask-ml" + "dask-sql" + "cudf" + "${numba_spec}" + ) + + CONDA_CHANNEL_ARGS="-c conda-forge -c nvidia -c rapidsai" + + local cache_key="${cache_key_name}_${DATAPROC_IMAGE_VERSION}-${_shortname}" + install_conda_packages "${cache_key}" + + mark_complete install.dask-rapids +} + +# The bash array CONDA_PACKAGES must contain a set of package +# specifications before calling this function + +# The bash string CONDA_CHANNEL_ARGS may contain arguments to specify +# conda channels. Default is "-c 'conda-forge'" + +function install_conda_packages() { + local cache_key="${1}" + + local build_tarball="${cache_key}.tar.gz" + local gcs_tarball="${pkg_bucket}/conda/${cache_key%%_*}/${build_tarball}" + local local_tarball="${tmpdir}/${build_tarball}" + + if gsutil ls "${gcs_tarball}" 2>&1 | grep -q "${gcs_tarball}" ; then + echo "cache hit" + mkdir -p "${DASK_CONDA_ENV}" + time ( gcloud storage cat "${gcs_tarball}" | tar -C "${DASK_CONDA_ENV}" -xz ) + return 0 + fi + + # Install cuda, rapids, dask + mamba="/opt/conda/miniconda3/bin/mamba" + conda="/opt/conda/miniconda3/bin/conda" + + ( set +e + local is_installed="0" + for installer in "${mamba}" "${conda}" ; do + test -d "${DASK_CONDA_ENV}" || \ + time "${installer}" "create" -m -n "${conda_env}" -y --no-channel-priority \ + ${CONDA_CHANNEL_ARGS:- -c 'conda-forge'} \ + ${CONDA_PACKAGES[*]} \ + "${python_spec}" \ + > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; } + sync + if [[ "$retval" == "0" ]] ; then + is_installed="1" + pushd "${DASK_CONDA_ENV}" + time ( + tar czf "${local_tarball}" . + gcloud storage cp "${local_tarball}" "${gcs_tarball}" + rm "${local_tarball}" + ) + popd + break + fi + "${conda}" config --set channel_priority flexible + done + + if [[ "${is_installed}" == "0" ]]; then + echo "failed to install dask" + return 1 + fi + ) +} + +function prepare_dask_env() { + # Dask config + DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')" + readonly DASK_RUNTIME + readonly DASK_SERVICE=dask-cluster + readonly DASK_WORKER_SERVICE=dask-worker + readonly DASK_SCHEDULER_SERVICE=dask-scheduler + readonly DASK_CONDA_ENV="/opt/conda/miniconda3/envs/${conda_env}" + # Knox dask config + readonly KNOX_DASK_DIR="${KNOX_HOME}/data/services/dask/0.1.0" + readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0" +} + +function prepare_dask_rapids_env(){ + prepare_dask_env + + # Default rapids runtime + readonly DEFAULT_RAPIDS_RUNTIME='DASK' + + local DEFAULT_DASK_RAPIDS_VERSION="24.08" + if [[ "${DATAPROC_IMAGE_VERSION}" == "2.0" ]] ; then + DEFAULT_DASK_RAPIDS_VERSION="23.08" # Final release to support spark 3.1.3 + fi + readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION}) +} diff --git a/templates/generate-action.pl b/templates/generate-action.pl new file mode 100644 index 000000000..7cc954a67 --- /dev/null +++ b/templates/generate-action.pl @@ -0,0 +1,24 @@ +#!/usr/bin/perl -w +# -*-CPerl-*- + +# Usage: Run this script from the root directory of the git clone: +# perl templates/generate-action.pl gpu/install_gpu_driver.sh + +use Template; +use strict; + +my $action = $ARGV[0]; +my $v = { template_path => "${action}.in" }; + +sub usage{ die "Usage: $0 " } + +usage unless( $action && -f "$ENV{PWD}/templates/$v->{template_path}" ); + +my $tt = Template->new( { + INCLUDE_PATH => "$ENV{PWD}/templates", + VARIABLES => $v, + INTERPOLATE => 0, +}) || die "$Template::ERROR$/"; + + +$tt->process($v->{template_path}) or die( $tt->error(), "\n" ); diff --git a/templates/gpu/util_functions b/templates/gpu/util_functions new file mode 100644 index 000000000..48473d13b --- /dev/null +++ b/templates/gpu/util_functions @@ -0,0 +1,220 @@ +function set_support_matrix() { + # CUDA version and Driver version + # https://docs.nvidia.com/deploy/cuda-compatibility/ + # https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html + # https://developer.nvidia.com/cuda-downloads + + # Minimum supported version for open kernel driver is 515.43.04 + # https://github.com/NVIDIA/open-gpu-kernel-modules/tags + # Rocky8: 12.0: 525.147.05 + local latest + latest="$(curl -s https://download.nvidia.com/XFree86/Linux-x86_64/latest.txt | awk '{print $1}')" + readonly -A DRIVER_FOR_CUDA=( + ["11.7"]="515.65.01" ["11.8"]="525.147.05" + ["12.0"]="525.147.05" ["12.1"]="530.30.02" ["12.4"]="550.135" ["12.5"]="555.42.02" ["12.6"]="560.35.03" + ) + readonly -A DRIVER_SUBVER=( + ["515"]="515.48.07" ["520"]="525.147.05" ["525"]="525.147.05" ["530"]="530.41.03" ["535"]="535.216.01" + ["545"]="545.29.06" ["550"]="550.135" ["555"]="555.58.02" ["560"]="560.35.03" ["565"]="565.57.01" + ) + # https://developer.nvidia.com/cudnn-downloads + if is_debuntu ; then + readonly -A CUDNN_FOR_CUDA=( + ["11.7"]="9.5.1.17" ["11.8"]="9.5.1.17" + ["12.0"]="9.5.1.17" ["12.1"]="9.5.1.17" ["12.4"]="9.5.1.17" ["12.5"]="9.5.1.17" ["12.6"]="9.5.1.17" + ) + elif is_rocky ; then + # rocky: + # 12.0: 8.8.1.3 + # 12.1: 8.9.3.28 + # 12.2: 8.9.7.29 + # 12.3: 9.0.0.312 + # 12.4: 9.1.1.17 + # 12.5: 9.2.1.18 + # 12.6: 9.5.1.17 + readonly -A CUDNN_FOR_CUDA=( + ["11.7"]="8.9.7.29" ["11.8"]="9.5.1.17" + ["12.0"]="8.8.1.3" ["12.1"]="8.9.3.28" ["12.4"]="9.1.1.17" ["12.5"]="9.2.1.18" ["12.6"]="9.5.1.17" + ) + fi + # https://developer.nvidia.com/nccl/nccl-download + # 12.2: 2.19.3, 12.5: 2.21.5 + readonly -A NCCL_FOR_CUDA=( + ["11.7"]="2.21.5" ["11.8"]="2.21.5" + ["12.0"]="2.16.5" ["12.1"]="2.18.3" ["12.4"]="2.23.4" ["12.5"]="2.21.5" ["12.6"]="2.23.4" + ) + readonly -A CUDA_SUBVER=( + ["11.7"]="11.7.1" ["11.8"]="11.8.0" + ["12.0"]="12.0.1" ["12.1"]="12.1.1" ["12.2"]="12.2.2" ["12.3"]="12.3.2" ["12.4"]="12.4.1" ["12.5"]="12.5.1" ["12.6"]="12.6.2" + ) +} + +function set_cuda_version() { + case "${DATAPROC_IMAGE_VERSION}" in + "2.0" ) DEFAULT_CUDA_VERSION="12.1.1" ;; # Cuda 12.1.1 - Driver v530.30.02 is the latest version supported by Ubuntu 18) + "2.1" ) DEFAULT_CUDA_VERSION="12.4.1" ;; + "2.2" ) DEFAULT_CUDA_VERSION="12.6.2" ;; + * ) + echo "unrecognized Dataproc image version: ${DATAPROC_IMAGE_VERSION}" + exit 1 + ;; + esac + local cuda_url + cuda_url=$(get_metadata_attribute 'cuda-url' '') + if [[ -n "${cuda_url}" ]] ; then + # if cuda-url metadata variable has been passed, extract default version from url + local CUDA_URL_VERSION + CUDA_URL_VERSION="$(echo "${cuda_url}" | perl -pe 's{^.*/cuda_(\d+\.\d+\.\d+)_\d+\.\d+\.\d+_linux.run$}{$1}')" + if [[ "${CUDA_URL_VERSION}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] ; then + DEFAULT_CUDA_VERSION="${CUDA_URL_VERSION%.*}" + fi + fi + readonly DEFAULT_CUDA_VERSION + + CUDA_VERSION=$(get_metadata_attribute 'cuda-version' "${DEFAULT_CUDA_VERSION}") + if test -n "$(echo "${CUDA_VERSION}" | perl -ne 'print if /\d+\.\d+\.\d+/')" ; then + CUDA_FULL_VERSION="${CUDA_VERSION}" + CUDA_VERSION="${CUDA_VERSION%.*}" + fi + readonly CUDA_VERSION + if ( ! test -v CUDA_FULL_VERSION ) ; then + CUDA_FULL_VERSION=${CUDA_SUBVER["${CUDA_VERSION}"]} + fi + readonly CUDA_FULL_VERSION +} + +function is_cuda12() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "12" ]] ; ) +function le_cuda12() ( set +x ; version_le "${CUDA_VERSION%%.*}" "12" ; ) +function ge_cuda12() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "12" ; ) + +function is_cuda11() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "11" ]] ; ) +function le_cuda11() ( set +x ; version_le "${CUDA_VERSION%%.*}" "11" ; ) +function ge_cuda11() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "11" ; ) + +function set_driver_version() { + local gpu_driver_url + gpu_driver_url=$(get_metadata_attribute 'gpu-driver-url' '') + + local cuda_url + cuda_url=$(get_metadata_attribute 'cuda-url' '') + + local DEFAULT_DRIVER + # Take default from gpu-driver-url metadata value + if [[ -n "${gpu_driver_url}" ]] ; then + DRIVER_URL_DRIVER_VERSION="$(echo "${gpu_driver_url}" | perl -pe 's{^.*/NVIDIA-Linux-x86_64-(\d+\.\d+\.\d+).run$}{$1}')" + if [[ "${DRIVER_URL_DRIVER_VERSION}" =~ ^[0-9]+.*[0-9]$ ]] ; then DEFAULT_DRIVER="${DRIVER_URL_DRIVER_VERSION}" ; fi + # Take default from cuda-url metadata value as a backup + elif [[ -n "${cuda_url}" ]] ; then + local CUDA_URL_DRIVER_VERSION="$(echo "${cuda_url}" | perl -pe 's{^.*/cuda_\d+\.\d+\.\d+_(\d+\.\d+\.\d+)_linux.run$}{$1}')" + if [[ "${CUDA_URL_DRIVER_VERSION}" =~ ^[0-9]+.*[0-9]$ ]] ; then + major_driver_version="${CUDA_URL_DRIVER_VERSION%%.*}" + driver_max_maj_version=${DRIVER_SUBVER["${major_driver_version}"]} + if curl -s --head "https://download.nvidia.com/XFree86/Linux-x86_64/${CUDA_URL_DRIVER_VERSION}/NVIDIA-Linux-x86_64-${CUDA_URL_DRIVER_VERSION}.run" | grep -E -q '^HTTP.*200\s*$' ; then + # use the version indicated by the cuda url as the default if it exists + DEFAULT_DRIVER="${CUDA_URL_DRIVER_VERSION}" + elif curl -s --head "https://download.nvidia.com/XFree86/Linux-x86_64/${driver_max_maj_version}/NVIDIA-Linux-x86_64-${driver_max_maj_version}.run" | grep -E -q '^HTTP.*200\s*$' ; then + # use the maximum sub-version available for the major version indicated in cuda url as the default + DEFAULT_DRIVER="${driver_max_maj_version}" + fi + fi + fi + + if ( ! test -v DEFAULT_DRIVER ) ; then + # If a default driver version has not been extracted, use the default for this version of CUDA + DEFAULT_DRIVER=${DRIVER_FOR_CUDA["${CUDA_VERSION}"]} + fi + + DRIVER_VERSION=$(get_metadata_attribute 'gpu-driver-version' "${DEFAULT_DRIVER}") + + readonly DRIVER_VERSION + readonly DRIVER="${DRIVER_VERSION%%.*}" + + export DRIVER_VERSION DRIVER + + gpu_driver_url="https://download.nvidia.com/XFree86/Linux-x86_64/${DRIVER_VERSION}/NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run" + if ! curl -s --head "${gpu_driver_url}" | grep -E -q '^HTTP.*200\s*$' ; then + echo "No NVIDIA driver exists for DRIVER_VERSION=${DRIVER_VERSION}" + exit 1 + fi +} + +function is_src_nvidia() ( set +x ; [[ "${GPU_DRIVER_PROVIDER}" == "NVIDIA" ]] ; ) +function is_src_os() ( set +x ; [[ "${GPU_DRIVER_PROVIDER}" == "OS" ]] ; ) + +function nvsmi() { + local nvsmi="/usr/bin/nvidia-smi" + if [[ "${nvsmi_works}" == "1" ]] ; then echo -n '' + elif [[ ! -f "${nvsmi}" ]] ; then echo "nvidia-smi not installed" >&2 ; return 0 + elif ! eval "${nvsmi} > /dev/null" ; then echo "nvidia-smi fails" >&2 ; return 0 + else nvsmi_works="1" ; fi + + if test -v 1 && [[ "$1" == "-L" ]] ; then + local NV_SMI_L_CACHE_FILE="/var/run/nvidia-smi_-L.txt" + if [[ -f "${NV_SMI_L_CACHE_FILE}" ]]; then cat "${NV_SMI_L_CACHE_FILE}" + else "${nvsmi}" $* | tee "${NV_SMI_L_CACHE_FILE}" ; fi + + return 0 + fi + + "${nvsmi}" $* +} + +function clear_nvsmi_cache() { + if ( test -v nvsmi_query_xml && test -f "${nvsmi_query_xml}" ) ; then + rm "${nvsmi_query_xml}" + fi +} + +function query_nvsmi() { + if [[ "${nvsmi_works}" != "1" ]] ; then return ; fi + if ( test -v nvsmi_query_xml && test -f "${nvsmi_query_xml}" ) ; then return ; fi + nvsmi -q -x --dtd > "${nvsmi_query_xml}" +} + +function prepare_gpu_env(){ + set_support_matrix + + set_cuda_version + set_driver_version + + set +e + gpu_count="$(grep -i PCI_ID=10DE /sys/bus/pci/devices/*/uevent | wc -l)" + set -e + echo "gpu_count=[${gpu_count}]" + nvsmi_works="0" + nvsmi_query_xml="${tmpdir}/nvsmi.xml" + xmllint="/opt/conda/miniconda3/bin/xmllint" + NVIDIA_SMI_PATH='/usr/bin' + MIG_MAJOR_CAPS=0 + IS_MIG_ENABLED=0 + CUDNN_PKG_NAME="" + CUDNN8_PKG_NAME="" + CUDA_LOCAL_REPO_INSTALLED="0" + + if ! test -v DEFAULT_RAPIDS_RUNTIME ; then + readonly DEFAULT_RAPIDS_RUNTIME='SPARK' + fi + + # Verify SPARK compatability + RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' "${DEFAULT_RAPIDS_RUNTIME}") + readonly RAPIDS_RUNTIME + + # determine whether we have nvidia-smi installed and working + nvsmi +} + +# Hold all NVIDIA-related packages from upgrading unintenionally or services like unattended-upgrades +# Users should run apt-mark unhold before they wish to upgrade these packages +function hold_nvidia_packages() { + if ! is_debuntu ; then return ; fi + + apt-mark hold nvidia-* + apt-mark hold libnvidia-* + if dpkg -l | grep -q "xserver-xorg-video-nvidia"; then + apt-mark hold xserver-xorg-video-nvidia* + fi +} + +function gpu_exit_handler() { + echo "no operations in gpu exit handler" +} diff --git a/templates/legal/license_header b/templates/legal/license_header new file mode 100644 index 000000000..0230ca951 --- /dev/null +++ b/templates/legal/license_header @@ -0,0 +1,13 @@ +# Copyright 2015 Google LLC and contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/templates/rapids/rapids.sh.in b/templates/rapids/rapids.sh.in index 3fd48089e..61b7247c0 100644 --- a/templates/rapids/rapids.sh.in +++ b/templates/rapids/rapids.sh.in @@ -1,493 +1,19 @@ #!/bin/bash - -# Copyright 2019,2020,2021,2022,2024 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +[% INSERT legal/license_header %] # -# http://www.apache.org/licenses/LICENSE-2.0 +[% PROCESS common/template_disclaimer %] # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # This initialization action script will install rapids on a Dataproc # cluster. set -euxo pipefail -function os_id() { grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; } -function is_ubuntu() { [[ "$(os_id)" == 'ubuntu' ]] ; } -function is_ubuntu18() { is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; } -function is_debian() { [[ "$(os_id)" == 'debian' ]] ; } -function is_debuntu() { is_debian || is_ubuntu ; } - -function print_metadata_value() { - local readonly tmpfile=$(mktemp) - http_code=$(curl -f "${1}" -H "Metadata-Flavor: Google" -w "%{http_code}" \ - -s -o ${tmpfile} 2>/dev/null) - local readonly return_code=$? - # If the command completed successfully, print the metadata value to stdout. - if [[ ${return_code} == 0 && ${http_code} == 200 ]]; then - cat ${tmpfile} - fi - rm -f ${tmpfile} - return ${return_code} -} - -function print_metadata_value_if_exists() { - local return_code=1 - local readonly url=$1 - print_metadata_value ${url} - return_code=$? - return ${return_code} -} - -function get_metadata_value() { - set +x - local readonly varname=$1 - local -r MDS_PREFIX=http://metadata.google.internal/computeMetadata/v1 - # Print the instance metadata value. - print_metadata_value_if_exists ${MDS_PREFIX}/instance/${varname} - return_code=$? - # If the instance doesn't have the value, try the project. - if [[ ${return_code} != 0 ]]; then - print_metadata_value_if_exists ${MDS_PREFIX}/project/${varname} - return_code=$? - fi - set -x - return ${return_code} -} - -function get_metadata_attribute() ( - set +x - local -r attribute_name="$1" - local -r default_value="${2:-}" - get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" -) - -function execute_with_retries() { - local -r cmd="$*" - for i in {0..9} ; do - if eval "$cmd"; then - return 0 ; fi - sleep 5 - done - echo "Cmd '${cmd}' failed." - return 1 -} - -function restart_knox() { - systemctl stop knox - rm -rf "${KNOX_HOME}/data/deployments/*" - systemctl start knox -} - -function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } -function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } - -function configure_dask_yarn() { - readonly DASK_YARN_CONFIG_DIR=/etc/dask/ - readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml - # Minimal custom configuration is required for this - # setup. Please see https://yarn.dask.org/en/latest/quickstart.html#usage - # for information on tuning Dask-Yarn environments. - mkdir -p "${DASK_YARN_CONFIG_DIR}" - - cat <"${DASK_YARN_CONFIG_FILE}" -# Config file for Dask Yarn. -# -# These values are joined on top of the default config, found at -# https://yarn.dask.org/en/latest/configuration.html#default-configuration - -yarn: - environment: python://${DASK_CONDA_ENV}/bin/python - - worker: - count: 2 - gpus: 1 - class: "dask_cuda.CUDAWorker" -EOF -} - -function install_systemd_dask_worker() { - echo "Installing systemd Dask Worker service..." - local -r dask_worker_local_dir="/tmp/${DASK_WORKER_SERVICE}" - - mkdir -p "${dask_worker_local_dir}" - - local DASK_WORKER_LAUNCHER="/usr/local/bin/${DASK_WORKER_SERVICE}-launcher.sh" - - cat <"${DASK_WORKER_LAUNCHER}" -#!/bin/bash -LOGFILE="/var/log/${DASK_WORKER_SERVICE}.log" -nvidia-smi -c DEFAULT -echo "dask-cuda-worker starting, logging to \${LOGFILE}" -${DASK_CONDA_ENV}/bin/dask-cuda-worker "${MASTER}:8786" --local-directory="${dask_worker_local_dir}" --memory-limit=auto >> "\${LOGFILE}" 2>&1 -EOF - - chmod 750 "${DASK_WORKER_LAUNCHER}" - - local -r dask_service_file="/usr/lib/systemd/system/${DASK_WORKER_SERVICE}.service" - cat <"${dask_service_file}" -[Unit] -Description=Dask Worker Service -[Service] -Type=simple -Restart=on-failure -ExecStart=/bin/bash -c 'exec ${DASK_WORKER_LAUNCHER}' -[Install] -WantedBy=multi-user.target -EOF - chmod a+r "${dask_service_file}" - - systemctl daemon-reload - - # Enable the service - if [[ "${ROLE}" != "Master" ]]; then - enable_worker_service="1" - else - local RUN_WORKER_ON_MASTER=$(get_metadata_attribute dask-cuda-worker-on-master 'true') - # Enable service on single-node cluster (no workers) - local worker_count="$(get_metadata_attribute dataproc-worker-count)" - if [[ "${worker_count}" == "0" || "${RUN_WORKER_ON_MASTER}" == "true" ]]; then - enable_worker_service="1" - fi - fi - - if [[ "${enable_worker_service}" == "1" ]]; then - systemctl enable "${DASK_WORKER_SERVICE}" - systemctl restart "${DASK_WORKER_SERVICE}" - fi -} - -function install_systemd_dask_scheduler() { - # only run scheduler on primary master - if [[ "$(hostname -s)" != "${MASTER}" ]]; then return ; fi - echo "Installing systemd Dask Scheduler service..." - local -r dask_scheduler_local_dir="/tmp/${DASK_SCHEDULER_SERVICE}" - - mkdir -p "${dask_scheduler_local_dir}" - - local DASK_SCHEDULER_LAUNCHER="/usr/local/bin/${DASK_SCHEDULER_SERVICE}-launcher.sh" - - cat <"${DASK_SCHEDULER_LAUNCHER}" -#!/bin/bash -LOGFILE="/var/log/${DASK_SCHEDULER_SERVICE}.log" -echo "dask scheduler starting, logging to \${LOGFILE}" -${DASK_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1 -EOF - - chmod 750 "${DASK_SCHEDULER_LAUNCHER}" - - local -r dask_service_file="/usr/lib/systemd/system/${DASK_SCHEDULER_SERVICE}.service" - cat <"${dask_service_file}" -[Unit] -Description=Dask Scheduler Service -[Service] -Type=simple -Restart=on-failure -ExecStart=/bin/bash -c 'exec ${DASK_SCHEDULER_LAUNCHER}' -[Install] -WantedBy=multi-user.target -EOF - chmod a+r "${dask_service_file}" - - systemctl daemon-reload - - # Enable the service - systemctl enable "${DASK_SCHEDULER_SERVICE}" -} - -function install_systemd_dask_service() { - install_systemd_dask_scheduler - install_systemd_dask_worker -} - -function configure_knox_for_dask() { - if [[ ! -d "${KNOX_HOME}" ]]; then - echo "Skip configuring Knox rules for Dask" - return 0 - fi - - local DASK_UI_PORT=8787 - if [[ -f /etc/knox/conf/topologies/default.xml ]]; then - sed -i \ - "/<\/topology>/i DASK<\/role>http://localhost:${DASK_UI_PORT}<\/url><\/service> DASKWS<\/role>ws:\/\/${MASTER}:${DASK_UI_PORT}<\/url><\/service>" \ - /etc/knox/conf/topologies/default.xml - fi - - mkdir -p "${KNOX_DASK_DIR}" - - cat >"${KNOX_DASK_DIR}/service.xml" <<'EOF' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -EOF - - cat >"${KNOX_DASK_DIR}/rewrite.xml" <<'EOF' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -EOF - - mkdir -p "${KNOX_DASKWS_DIR}" - - cat >"${KNOX_DASKWS_DIR}/service.xml" <<'EOF' - - - - - - - - - - - +[% INSERT common/util_functions %] - - - +[% INSERT gpu/util_functions %] - - - -EOF - - cat >"${KNOX_DASKWS_DIR}/rewrite.xml" <<'EOF' - - - - - - - -EOF - - chown -R knox:knox "${KNOX_DASK_DIR}" "${KNOX_DASKWS_DIR}" - - # Do not restart knox during pre-init script run - if [[ -n "${ROLE}" ]]; then - restart_knox - fi -} - -function configure_fluentd_for_dask() { - if [[ "$(hostname -s)" == "${MASTER}" ]]; then - cat >/etc/google-fluentd/config.d/dataproc-dask.conf < - @type tail - path /var/log/dask-scheduler.log - pos_file /var/tmp/fluentd.dataproc.dask.scheduler.pos - read_from_head true - tag google.dataproc.dask-scheduler - - @type none - - - - - @type record_transformer - - filename dask-scheduler.log - - -EOF - fi - - if [[ "${enable_worker_service}" == "1" ]]; then - cat >>/etc/google-fluentd/config.d/dataproc-dask.conf < - @type tail - path /var/log/dask-worker.log - pos_file /var/tmp/fluentd.dataproc.dask.worker.pos - read_from_head true - tag google.dataproc.dask-worker - - @type none - - - - - @type record_transformer - - filename dask-worker.log - - -EOF - fi - - systemctl restart google-fluentd -} - -function install_dask_rapids() { - if is_cuda12 ; then - local python_spec="python>=3.11" - local cuda_spec="cuda-version>=12,<13" - local dask_spec="dask>=2024.7" - local numba_spec="numba" - elif is_cuda11 ; then - local python_spec="python>=3.9" - local cuda_spec="cuda-version>=11,<12.0a0" - local dask_spec="dask" - local numba_spec="numba" - fi - - rapids_spec="rapids>=${RAPIDS_VERSION}" - CONDA_PACKAGES=() - if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then - # Pin `distributed` and `dask` package versions to old release - # because `dask-yarn` 0.9 uses skein in a way which - # is not compatible with `distributed` package 2022.2 and newer: - # https://github.com/dask/dask-yarn/issues/155 - - dask_spec="dask<2022.2" - python_spec="python>=3.7,<3.8.0a0" - rapids_spec="rapids<=24.05" - if is_ubuntu18 ; then - # the libuuid.so.1 distributed with fiona 1.8.22 dumps core when calling uuid_generate_time_generic - CONDA_PACKAGES+=("fiona<1.8.22") - fi - CONDA_PACKAGES+=('dask-yarn=0.9' "distributed<2022.2") - fi - - CONDA_PACKAGES+=( - "${cuda_spec}" - "${rapids_spec}" - "${dask_spec}" - "dask-bigquery" - "dask-ml" - "dask-sql" - "cudf" - "${numba_spec}" - ) - - # Install cuda, rapids, dask - mamba="/opt/conda/miniconda3/bin/mamba" - conda="/opt/conda/miniconda3/bin/conda" - - "${conda}" remove -n dask --all || echo "unable to remove conda environment [dask]" - - ( set +e - local is_installed="0" - for installer in "${mamba}" "${conda}" ; do - test -d "${DASK_CONDA_ENV}" || \ - time "${installer}" "create" -m -n 'dask-rapids' -y --no-channel-priority \ - -c 'conda-forge' -c 'nvidia' -c 'rapidsai' \ - ${CONDA_PACKAGES[*]} \ - "${python_spec}" \ - > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; } - sync - if [[ "$retval" == "0" ]] ; then - is_installed="1" - break - fi - "${conda}" config --set channel_priority flexible - done - if [[ "${is_installed}" == "0" ]]; then - echo "failed to install dask" - return 1 - fi - ) -} +[% INSERT dask/util_functions %] function main() { # Install Dask with RAPIDS @@ -496,166 +22,40 @@ function main() { # In "standalone" mode, Dask relies on a systemd unit to launch. # In "yarn" mode, it relies a config.yaml file. if [[ "${DASK_RUNTIME}" == "yarn" ]]; then - # Create Dask YARN config file + # Create cuda accelerated Dask YARN config file configure_dask_yarn + echo "yarn setup complete" else # Create Dask service install_systemd_dask_service - - if [[ "$(hostname -s)" == "${MASTER}" ]]; then - systemctl start "${DASK_SCHEDULER_SERVICE}" - systemctl status "${DASK_SCHEDULER_SERVICE}" - fi - - echo "Starting Dask 'standalone' cluster..." - if [[ "${enable_worker_service}" == "1" ]]; then - systemctl start "${DASK_WORKER_SERVICE}" - systemctl status "${DASK_WORKER_SERVICE}" - fi + start_systemd_dask_service configure_knox_for_dask - local DASK_CLOUD_LOGGING="$(get_metadata_attribute dask-cloud-logging || echo 'false')" + local DASK_CLOUD_LOGGING="$(get_metadata_attribute dask-cloud-logging 'false')" if [[ "${DASK_CLOUD_LOGGING}" == "true" ]]; then configure_fluentd_for_dask fi fi - - echo "Dask RAPIDS for ${DASK_RUNTIME} successfully initialized." - if [[ "${ROLE}" == "Master" ]]; then - systemctl restart hadoop-yarn-resourcemanager.service - # Restart NodeManager on Master as well if this is a single-node-cluster. - if systemctl list-units | grep hadoop-yarn-nodemanager; then - systemctl restart hadoop-yarn-nodemanager.service - fi - else - systemctl restart hadoop-yarn-nodemanager.service - fi } -function exit_handler() ( - set +e - echo "Exit handler invoked" - - # Free conda cache - /opt/conda/miniconda3/bin/conda clean -a > /dev/null 2>&1 - - # Clear pip cache - pip cache purge || echo "unable to purge pip cache" - - # remove the tmpfs conda pkgs_dirs - if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi - - # Clean up shared memory mounts - for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do - if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then - rm -rf ${shmdir}/* - umount -f ${shmdir} - fi - done - - # Clean up OS package cache ; re-hold systemd package - if is_debuntu ; then - apt-get -y -qq clean - apt-get -y -qq autoremove - else - dnf clean all - fi - - # print disk usage statistics - if is_debuntu ; then - # Rocky doesn't have sort -h and fails when the argument is passed - du --max-depth 3 -hx / | sort -h | tail -10 - fi - - # Process disk usage logs from installation period - rm -f "${tmpdir}/keep-running-df" - sleep 6s - # compute maximum size of disk during installation - # Log file contains logs like the following (minus the preceeding #): -#Filesystem Size Used Avail Use% Mounted on -#/dev/vda2 6.8G 2.5G 4.0G 39% / - df -h / | tee -a "${tmpdir}/disk-usage.log" - perl -e '$max=( sort - map { (split)[2] =~ /^(\d+)/ } - grep { m:^/: } )[-1]; -print( "maximum-disk-used: $max", $/ );' < "${tmpdir}/disk-usage.log" - - echo "exit_handler has completed" - - # zero free disk space - if [[ -n "$(get_metadata_attribute creating-image)" ]]; then - dd if=/dev/zero of=/zero ; sync ; rm -f /zero - fi - +function exit_handler() { + gpu_exit_handler + pip_exit_handler + conda_exit_handler + common_exit_handler return 0 -) +} function prepare_to_install(){ - readonly DEFAULT_CUDA_VERSION="12.4" - CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION}) - readonly CUDA_VERSION - - readonly ROLE=$(get_metadata_attribute dataproc-role) - readonly MASTER=$(get_metadata_attribute dataproc-master) - - # RAPIDS config - RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'DASK') - readonly RAPIDS_RUNTIME - - readonly DEFAULT_DASK_RAPIDS_VERSION="24.08" - readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION}) - - # Dask config - DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')" - readonly DASK_RUNTIME - readonly DASK_SERVICE=dask-cluster - readonly DASK_WORKER_SERVICE=dask-worker - readonly DASK_SCHEDULER_SERVICE=dask-scheduler - readonly DASK_CONDA_ENV="/opt/conda/miniconda3/envs/dask-rapids" - - # Knox config - readonly KNOX_HOME=/usr/lib/knox - readonly KNOX_DASK_DIR="${KNOX_HOME}/data/services/dask/0.1.0" - readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0" - enable_worker_service="0" - - free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" - # Write to a ramdisk instead of churning the persistent disk - if [[ ${free_mem} -ge 5250000 ]]; then - tmpdir=/mnt/shm - mkdir -p /mnt/shm - mount -t tmpfs tmpfs /mnt/shm - - # Download conda packages to tmpfs - /opt/conda/miniconda3/bin/conda config --add pkgs_dirs /mnt/shm - mount -t tmpfs tmpfs /mnt/shm - - # Download pip packages to tmpfs - pip config set global.cache-dir /mnt/shm || echo "unable to set global.cache-dir" - - # Download OS packages to tmpfs - if is_debuntu ; then - mount -t tmpfs tmpfs /var/cache/apt/archives - else - mount -t tmpfs tmpfs /var/cache/dnf - fi - else - tmpdir=/tmp - fi - install_log="${tmpdir}/install.log" + prepare_common_env + conda_env="$(get_metadata_attribute conda-env 'dask-rapids')" + readonly conda_env + prepare_dask_rapids_env + prepare_conda_env + prepare_pip_env + prepare_gpu_env trap exit_handler EXIT - - # Monitor disk usage in a screen session - if is_debuntu ; then - apt-get install -y -qq screen - else - dnf -y -q install screen - fi - df -h / | tee "${tmpdir}/disk-usage.log" - touch "${tmpdir}/keep-running-df" - screen -d -m -US keep-running-df \ - bash -c "while [[ -f ${tmpdir}/keep-running-df ]] ; do df -h / | tee -a ${tmpdir}/disk-usage.log ; sleep 5s ; done" } prepare_to_install From 1a42f2ace3283f658f034b065a076375001d75ae Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 00:04:26 -0800 Subject: [PATCH 04/15] Template::Toolkit dependency --- cloudbuild/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cloudbuild/Dockerfile b/cloudbuild/Dockerfile index aebaffd84..644219305 100644 --- a/cloudbuild/Dockerfile +++ b/cloudbuild/Dockerfile @@ -22,7 +22,8 @@ RUN /usr/bin/curl -s https://bazel.build/bazel-release.pub.gpg | \ dd of="${bazel_repo_file}" status=none && \ apt-get update -qq RUN apt-get autoremove -y -qq > /dev/null 2>&1 && \ - apt-get install -y -qq default-jdk python3-setuptools bazel-${bazel_version} > /dev/null 2>&1 && \ + apt-get install -y -qq default-jdk python3-setuptools bazel-${bazel_version} \ + libtemplate-perl > /dev/null 2>&1 && \ apt-get clean # Set bazel-${bazel_version} as the default bazel alternative in this container From b923a13d65ab2d0a3cb0e194ce253ce56b9ffb36 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 14:57:03 -0800 Subject: [PATCH 05/15] mark generated actions as changed when templates are changed --- cloudbuild/presubmit.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudbuild/presubmit.sh b/cloudbuild/presubmit.sh index fc664f1bf..d7f2edb04 100644 --- a/cloudbuild/presubmit.sh +++ b/cloudbuild/presubmit.sh @@ -48,14 +48,14 @@ initialize_git_repo() { # to determine all changed files and looks for tests in directories with changed files. determine_tests_to_run() { # Infer the files that changed - mapfile -t CHANGED_ACTION_TEMPLATES < <(git diff origin/master --name-only | grep 'templates/.*/.*\.sh\.in') - for tt in "${CHANGED_ACTION_TEMPLATES[@]}"; do + mapfile -t DELETED_BUILD_FILES < <(git diff origin/master --name-only --diff-filter=D | grep BUILD) + mapfile -t CHANGED_FILES < <(git diff origin/master --name-only | grep -v template) + for tt in $(git diff origin/master --name-only | grep 'templates/.*/.*\.sh\.in'); do local genfile=`perl -e "print( q{${tt}} =~ m:templates/(.*?.sh).in: )"` perl templates/generate-action.pl "${genfile}" > "${genfile}" + CHANGED_FILES+=("${genfile}") done - mapfile -t DELETED_BUILD_FILES < <(git diff origin/master --name-only --diff-filter=D | grep BUILD) - mapfile -t CHANGED_FILES < <(git diff origin/master --name-only | grep -v template) echo "Deleted BUILD files: ${DELETED_BUILD_FILES[*]}" echo "Changed files: ${CHANGED_FILES[*]}" From c20684fc4c35f4061899a152829b9838df6cec82 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 17:22:29 -0800 Subject: [PATCH 06/15] retry tar creation and verify before caching to gcs --- templates/dask/util_functions | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/dask/util_functions b/templates/dask/util_functions index d67da1fc1..019bd6778 100644 --- a/templates/dask/util_functions +++ b/templates/dask/util_functions @@ -510,9 +510,9 @@ function install_conda_packages() { if [[ "$retval" == "0" ]] ; then is_installed="1" pushd "${DASK_CONDA_ENV}" - time ( - tar czf "${local_tarball}" . - gcloud storage cp "${local_tarball}" "${gcs_tarball}" + time ( set -e + execute_with_retries "tar czf ${local_tarball} . && tar tzf ${local_tarball}" + execute_with_retries gcloud storage cp "${local_tarball}" "${gcs_tarball}" rm "${local_tarball}" ) popd From 2102ef38ff77212272ce4d7eff95357af91408bb Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 18:26:24 -0800 Subject: [PATCH 07/15] root cause was ramdisk exhaustion. increase minimum memory requirements for ramdisk --- templates/common/util_functions | 2 +- templates/dask/util_functions | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/common/util_functions b/templates/common/util_functions index 0f0bfeaa6..29282ca31 100644 --- a/templates/common/util_functions +++ b/templates/common/util_functions @@ -297,7 +297,7 @@ function is_ramdisk() { function mount_ramdisk(){ local free_mem free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" - if [[ ${free_mem} -lt 10500000 ]]; then return 0 ; fi + if [[ ${free_mem} -lt 30000000 ]]; then return 0 ; fi # Write to a ramdisk instead of churning the persistent disk diff --git a/templates/dask/util_functions b/templates/dask/util_functions index 019bd6778..ce6964e94 100644 --- a/templates/dask/util_functions +++ b/templates/dask/util_functions @@ -511,8 +511,8 @@ function install_conda_packages() { is_installed="1" pushd "${DASK_CONDA_ENV}" time ( set -e - execute_with_retries "tar czf ${local_tarball} . && tar tzf ${local_tarball}" - execute_with_retries gcloud storage cp "${local_tarball}" "${gcs_tarball}" + tar czf "${local_tarball}" . && tar tzf "${local_tarball}" + gcloud storage cp "${local_tarball}" "${gcs_tarball}" rm "${local_tarball}" ) popd From 19124c0ce80aa6117ec12fc743a3d7b0188947d1 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 19:35:43 -0800 Subject: [PATCH 08/15] using larger machine type to make use of ramdisk ; relaxing free_mem requirement a bit --- rapids/test_rapids.py | 3 +-- templates/common/util_functions | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rapids/test_rapids.py b/rapids/test_rapids.py index 63fa72a7f..02838ff08 100644 --- a/rapids/test_rapids.py +++ b/rapids/test_rapids.py @@ -43,7 +43,6 @@ def run_dask_script(self, name): self.assert_instance_command(name, verify_cmd) self.remove_test_script(self.DASK_RAPIDS_TEST_SCRIPT_FILE_NAME, name) - @parameterized.parameters( # If a new version of dask-yarn is released, add this test back in. # ("STANDARD", ["m", "w-0"], GPU_T4, "yarn"), @@ -61,7 +60,7 @@ def test_rapids_dask(self, configuration, machine_suffixes, accelerator, configuration, self.INIT_ACTIONS, metadata=metadata, - machine_type="n1-standard-8", + machine_type="n1-highmem-8", master_accelerator=accelerator, worker_accelerator=accelerator, boot_disk_size="50GB", diff --git a/templates/common/util_functions b/templates/common/util_functions index 29282ca31..336af37f8 100644 --- a/templates/common/util_functions +++ b/templates/common/util_functions @@ -297,7 +297,7 @@ function is_ramdisk() { function mount_ramdisk(){ local free_mem free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" - if [[ ${free_mem} -lt 30000000 ]]; then return 0 ; fi + if [[ ${free_mem} -lt 20500000 ]]; then return 0 ; fi # Write to a ramdisk instead of churning the persistent disk From 798423dcd7530a2c47e1bea49cd94584f1931cc2 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 20:22:12 -0800 Subject: [PATCH 09/15] increasing the max-idle time --- integration_tests/dataproc_test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/dataproc_test_case.py b/integration_tests/dataproc_test_case.py index 683109125..ce7656c29 100644 --- a/integration_tests/dataproc_test_case.py +++ b/integration_tests/dataproc_test_case.py @@ -182,7 +182,7 @@ def createCluster(self, if not FLAGS.skip_cleanup: args.append("--max-age=60m") - args.append("--max-idle=25m") + args.append("--max-idle=45m") cmd = "{} dataproc clusters create {} {}".format( "gcloud beta" if beta else "gcloud", self.name, " ".join(args)) From c30880a819ab593d2fdc13da963fbcdb7422f6a1 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 21:12:23 -0800 Subject: [PATCH 10/15] use a more recent gpu installer --- gpu/install_gpu_driver.sh | 2930 +++++++++++++++++++++++-------------- 1 file changed, 1841 insertions(+), 1089 deletions(-) diff --git a/gpu/install_gpu_driver.sh b/gpu/install_gpu_driver.sh index 25efb2a49..20beac086 100644 --- a/gpu/install_gpu_driver.sh +++ b/gpu/install_gpu_driver.sh @@ -1,5 +1,7 @@ #!/bin/bash # +# Copyright 2015 Google LLC and contributors +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +13,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# +# This initialization action is generated from +# initialization-actions/templates/gpu/install_gpu_driver.sh.in +# +# Modifications made directly to the generated file will be lost when +# the template is re-evaluated + # # This script installs NVIDIA GPU drivers and collects GPU utilization metrics. @@ -20,32 +30,38 @@ function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | x function os_version() ( set +x ; grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; ) function os_codename() ( set +x ; grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; ) +# For version (or real number) comparison +# if first argument is greater than or equal to, greater than, less than or equal to, or less than the second +# ( version_ge 2.0 2.1 ) evaluates to false +# ( version_ge 2.2 2.1 ) evaluates to true function version_ge() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | tail -n1)" ] ; ) function version_gt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_ge $1 $2 ; ) function version_le() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; ) function version_lt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_le $1 $2 ; ) -readonly -A supported_os=( - ['debian']="10 11 12" - ['rocky']="8 9" - ['ubuntu']="18.04 20.04 22.04" -) - -# dynamically define OS version test utility functions -if [[ "$(os_id)" == "rocky" ]]; -then _os_version=$(os_version | sed -e 's/[^0-9].*$//g') -else _os_version="$(os_version)"; fi -for os_id_val in 'rocky' 'ubuntu' 'debian' ; do - eval "function is_${os_id_val}() ( set +x ; [[ \"$(os_id)\" == '${os_id_val}' ]] ; )" - - for osver in $(echo "${supported_os["${os_id_val}"]}") ; do - eval "function is_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && [[ \"${_os_version}\" == \"${osver}\" ]] ; )" - eval "function ge_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_ge \"${_os_version}\" \"${osver}\" ; )" - eval "function le_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_le \"${_os_version}\" \"${osver}\" ; )" +function define_os_comparison_functions() { + + readonly -A supported_os=( + ['debian']="10 11 12" + ['rocky']="8 9" + ['ubuntu']="18.04 20.04 22.04" + ) + + # dynamically define OS version test utility functions + if [[ "$(os_id)" == "rocky" ]]; + then _os_version=$(os_version | sed -e 's/[^0-9].*$//g') + else _os_version="$(os_version)"; fi + for os_id_val in 'rocky' 'ubuntu' 'debian' ; do + eval "function is_${os_id_val}() ( set +x ; [[ \"$(os_id)\" == '${os_id_val}' ]] ; )" + + for osver in $(echo "${supported_os["${os_id_val}"]}") ; do + eval "function is_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && [[ \"${_os_version}\" == \"${osver}\" ]] ; )" + eval "function ge_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_ge \"${_os_version}\" \"${osver}\" ; )" + eval "function le_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_le \"${_os_version}\" \"${osver}\" ; )" + done done -done - -function is_debuntu() ( set +x ; is_debian || is_ubuntu ; ) + eval "function is_debuntu() ( set +x ; is_debian || is_ubuntu ; )" +} function os_vercat() ( set +x if is_ubuntu ; then os_version | sed -e 's/[^0-9]//g' @@ -53,7 +69,7 @@ function os_vercat() ( set +x else os_version ; fi ; ) function repair_old_backports { - if ge_debian12 || ! is_debuntu ; then return ; fi + if ! is_debuntu ; then return ; fi # This script uses 'apt-get update' and is therefore potentially dependent on # backports repositories which have been archived. In order to mitigate this # problem, we will use archive.debian.org for the oldoldstable repo @@ -94,6 +110,7 @@ function print_metadata_value_if_exists() { return ${return_code} } +# replicates /usr/share/google/get_metadata_value function get_metadata_value() ( set +x local readonly varname=$1 @@ -117,226 +134,13 @@ function get_metadata_attribute() ( get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" ) -OS_NAME=$(lsb_release -is | tr '[:upper:]' '[:lower:]') -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) -readonly OS_NAME - -# node role -ROLE="$(get_metadata_attribute dataproc-role)" -readonly ROLE - -# CUDA version and Driver version -# https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html -# https://developer.nvidia.com/cuda-downloads -# Rocky8: 12.0: 525.147.05 -readonly -A DRIVER_FOR_CUDA=( - ["11.8"]="560.35.03" - ["12.0"]="525.60.13" ["12.4"]="560.35.03" ["12.6"]="560.35.03" -) -# https://developer.nvidia.com/cudnn-downloads -if is_debuntu ; then -readonly -A CUDNN_FOR_CUDA=( - ["11.8"]="9.5.1.17" - ["12.0"]="9.5.1.17" ["12.4"]="9.5.1.17" ["12.6"]="9.5.1.17" -) -elif is_rocky ; then -# rocky: -# 12.0: 8.8.1.3 -# 12.1: 8.9.3.28 -# 12.2: 8.9.7.29 -# 12.3: 9.0.0.312 -# 12.4: 9.1.1.17 -# 12.5: 9.2.1.18 -# 12.6: 9.5.1.17 -readonly -A CUDNN_FOR_CUDA=( - ["11.8"]="9.5.1.17" - ["12.0"]="8.8.1.3" ["12.4"]="9.1.1.17" ["12.6"]="9.5.1.17" -) -fi -# https://developer.nvidia.com/nccl/nccl-download -# 12.2: 2.19.3, 12.5: 2.21.5 -readonly -A NCCL_FOR_CUDA=( - ["11.8"]="2.15.5" - ["12.0"]="2.16.5" ["12.4"]="2.23.4" ["12.6"]="2.23.4" -) -readonly -A CUDA_SUBVER=( - ["11.8"]="11.8.0" - ["12.0"]="12.0.0" ["12.4"]="12.4.1" ["12.6"]="12.6.2" -) - -RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'SPARK') -readonly DEFAULT_CUDA_VERSION='12.4' -CUDA_VERSION=$(get_metadata_attribute 'cuda-version' "${DEFAULT_CUDA_VERSION}") -if ( ( ge_debian12 || ge_rocky9 ) && version_le "${CUDA_VERSION%%.*}" "11" ) ; then - # CUDA 11 no longer supported on debian12 - 2024-11-22, rocky9 - 2024-11-27 - CUDA_VERSION="${DEFAULT_CUDA_VERSION}" -fi - -if ( version_ge "${CUDA_VERSION}" "12" && (le_debian11 || le_ubuntu18) ) ; then - # Only CUDA 12.0 supported on older debuntu - CUDA_VERSION="12.0" -fi -readonly CUDA_VERSION -readonly CUDA_FULL_VERSION="${CUDA_SUBVER["${CUDA_VERSION}"]}" - -function is_cuda12() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "12" ]] ; ) -function le_cuda12() ( set +x ; version_le "${CUDA_VERSION%%.*}" "12" ; ) -function ge_cuda12() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "12" ; ) - -function is_cuda11() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "11" ]] ; ) -function le_cuda11() ( set +x ; version_le "${CUDA_VERSION%%.*}" "11" ; ) -function ge_cuda11() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "11" ; ) - -DEFAULT_DRIVER="${DRIVER_FOR_CUDA[${CUDA_VERSION}]}" -if ( ge_ubuntu22 && version_le "${CUDA_VERSION}" "12.0" ) ; then - DEFAULT_DRIVER="560.28.03" ; fi -if ( is_debian11 || is_ubuntu20 ) ; then DEFAULT_DRIVER="560.28.03" ; fi -if ( is_rocky && le_cuda11 ) ; then DEFAULT_DRIVER="525.147.05" ; fi -if ( is_ubuntu20 && le_cuda11 ) ; then DEFAULT_DRIVER="535.183.06" ; fi -if ( is_rocky9 && ge_cuda12 ) ; then DEFAULT_DRIVER="565.57.01" ; fi -DRIVER_VERSION=$(get_metadata_attribute 'gpu-driver-version' "${DEFAULT_DRIVER}") - -readonly DRIVER_VERSION -readonly DRIVER=${DRIVER_VERSION%%.*} - -readonly DEFAULT_CUDNN8_VERSION="8.0.5.39" -readonly DEFAULT_CUDNN9_VERSION="9.1.0.70" - -# Parameters for NVIDIA-provided cuDNN library -readonly DEFAULT_CUDNN_VERSION=${CUDNN_FOR_CUDA["${CUDA_VERSION}"]} -CUDNN_VERSION=$(get_metadata_attribute 'cudnn-version' "${DEFAULT_CUDNN_VERSION}") -function is_cudnn8() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; ) -function is_cudnn9() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; ) -# The minimum cuDNN version supported by rocky is ${DEFAULT_CUDNN8_VERSION} -if is_rocky && (version_le "${CUDNN_VERSION}" "${DEFAULT_CUDNN8_VERSION}") ; then - CUDNN_VERSION="${DEFAULT_CUDNN8_VERSION}" -elif (ge_ubuntu20 || ge_debian12) && is_cudnn8 ; then - # cuDNN v8 is not distribution for ubuntu20+, debian12 - CUDNN_VERSION="${DEFAULT_CUDNN9_VERSION}" -elif (le_ubuntu18 || le_debian11) && is_cudnn9 ; then - # cuDNN v9 is not distributed for ubuntu18, debian10, debian11 ; fall back to 8 - CUDNN_VERSION="8.8.0.121" -fi -readonly CUDNN_VERSION - -readonly DEFAULT_NCCL_VERSION=${NCCL_FOR_CUDA["${CUDA_VERSION}"]} -readonly NCCL_VERSION=$(get_metadata_attribute 'nccl-version' ${DEFAULT_NCCL_VERSION}) - -# Parameters for NVIDIA-provided Debian GPU driver -readonly DEFAULT_USERSPACE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${DRIVER_VERSION}/NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run" - -readonly USERSPACE_URL=$(get_metadata_attribute 'gpu-driver-url' "${DEFAULT_USERSPACE_URL}") - -# Short name for urls -if is_ubuntu22 ; then - # at the time of writing 20241125 there is no ubuntu2204 in the index of repos at - # https://developer.download.nvidia.com/compute/machine-learning/repos/ - # use packages from previous release until such time as nvidia - # release ubuntu2204 builds - - nccl_shortname="ubuntu2004" - shortname="$(os_id)$(os_vercat)" -elif ge_rocky9 ; then - # use packages from previous release until such time as nvidia - # release rhel9 builds - - nccl_shortname="rhel8" - shortname="rhel9" -elif is_rocky ; then - shortname="$(os_id | sed -e 's/rocky/rhel/')$(os_vercat)" - nccl_shortname="${shortname}" -else - shortname="$(os_id)$(os_vercat)" - nccl_shortname="${shortname}" -fi - -# Parameters for NVIDIA-provided package repositories -readonly NVIDIA_BASE_DL_URL='https://developer.download.nvidia.com/compute' -readonly NVIDIA_REPO_URL="${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64" - -# Parameters for NVIDIA-provided NCCL library -readonly DEFAULT_NCCL_REPO_URL="${NVIDIA_BASE_DL_URL}/machine-learning/repos/${nccl_shortname}/x86_64/nvidia-machine-learning-repo-${nccl_shortname}_1.0.0-1_amd64.deb" -NCCL_REPO_URL=$(get_metadata_attribute 'nccl-repo-url' "${DEFAULT_NCCL_REPO_URL}") -readonly NCCL_REPO_URL -readonly NCCL_REPO_KEY="${NVIDIA_BASE_DL_URL}/machine-learning/repos/${nccl_shortname}/x86_64/7fa2af80.pub" # 3bf863cc.pub - -function set_cuda_runfile_url() { - local RUNFILE_DRIVER_VERSION="${DRIVER_VERSION}" - local RUNFILE_CUDA_VERSION="${CUDA_FULL_VERSION}" - - if ge_cuda12 ; then - if ( le_debian11 || le_ubuntu18 ) ; then - RUNFILE_DRIVER_VERSION="525.60.13" - RUNFILE_CUDA_VERSION="12.0.0" - elif ( le_rocky8 && version_le "${DATAPROC_IMAGE_VERSION}" "2.0" ) ; then - RUNFILE_DRIVER_VERSION="525.147.05" - RUNFILE_CUDA_VERSION="12.0.0" - fi - else - RUNFILE_DRIVER_VERSION="520.61.05" - RUNFILE_CUDA_VERSION="11.8.0" - fi - - readonly RUNFILE_FILENAME="cuda_${RUNFILE_CUDA_VERSION}_${RUNFILE_DRIVER_VERSION}_linux.run" - CUDA_RELEASE_BASE_URL="${NVIDIA_BASE_DL_URL}/cuda/${RUNFILE_CUDA_VERSION}" - DEFAULT_NVIDIA_CUDA_URL="${CUDA_RELEASE_BASE_URL}/local_installers/${RUNFILE_FILENAME}" - readonly DEFAULT_NVIDIA_CUDA_URL - - NVIDIA_CUDA_URL=$(get_metadata_attribute 'cuda-url' "${DEFAULT_NVIDIA_CUDA_URL}") - readonly NVIDIA_CUDA_URL -} - -set_cuda_runfile_url - -# Parameter for NVIDIA-provided Rocky Linux GPU driver -readonly NVIDIA_ROCKY_REPO_URL="${NVIDIA_REPO_URL}/cuda-${shortname}.repo" - -CUDNN_TARBALL="cudnn-${CUDA_VERSION}-linux-x64-v${CUDNN_VERSION}.tgz" -CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/${CUDNN_TARBALL}" -if ( version_ge "${CUDNN_VERSION}" "8.3.1.22" ); then - # When version is greater than or equal to 8.3.1.22 but less than 8.4.1.50 use this format - CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%.*}-archive.tar.xz" - if ( version_le "${CUDNN_VERSION}" "8.4.1.50" ); then - # When cuDNN version is greater than or equal to 8.4.1.50 use this format - CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION}-archive.tar.xz" - fi - # Use legacy url format with one of the tarball name formats depending on version as above - CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/local_installers/${CUDA_VERSION}/${CUDNN_TARBALL}" -fi -if ( version_ge "${CUDA_VERSION}" "12.0" ); then - # Use modern url format When cuda version is greater than or equal to 12.0 - CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%%.*}-archive.tar.xz" - CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/cudnn/redist/cudnn/linux-x86_64/${CUDNN_TARBALL}" -fi -readonly CUDNN_TARBALL -readonly CUDNN_TARBALL_URL - -# Whether to install NVIDIA-provided or OS-provided GPU driver -GPU_DRIVER_PROVIDER=$(get_metadata_attribute 'gpu-driver-provider' 'NVIDIA') -readonly GPU_DRIVER_PROVIDER - -# Stackdriver GPU agent parameters -readonly GPU_AGENT_REPO_URL='https://raw.githubusercontent.com/GoogleCloudPlatform/ml-on-gcp/master/dlvm/gcp-gpu-utilization-metrics' -# Whether to install GPU monitoring agent that sends GPU metrics to Stackdriver -INSTALL_GPU_AGENT=$(get_metadata_attribute 'install-gpu-agent' 'false') -readonly INSTALL_GPU_AGENT - -# Dataproc configurations -readonly HADOOP_CONF_DIR='/etc/hadoop/conf' -readonly HIVE_CONF_DIR='/etc/hive/conf' -readonly SPARK_CONF_DIR='/etc/spark/conf' - -NVIDIA_SMI_PATH='/usr/bin' -MIG_MAJOR_CAPS=0 -IS_MIG_ENABLED=0 - function execute_with_retries() ( set +x local -r cmd="$*" if [[ "$cmd" =~ "^apt-get install" ]] ; then apt-get -y clean - apt-get -y autoremove + apt-get -o DPkg::Lock::Timeout=60 -y autoremove fi for ((i = 0; i < 3; i++)); do set -x @@ -348,222 +152,234 @@ function execute_with_retries() ( return 1 ) -CUDA_KEYRING_PKG_INSTALLED="0" -function install_cuda_keyring_pkg() { - if [[ "${CUDA_KEYRING_PKG_INSTALLED}" == "1" ]]; then return ; fi - local kr_ver=1.1 - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${NVIDIA_REPO_URL}/cuda-keyring_${kr_ver}-1_all.deb" \ - -o "${tmpdir}/cuda-keyring.deb" - dpkg -i "${tmpdir}/cuda-keyring.deb" - rm -f "${tmpdir}/cuda-keyring.deb" - CUDA_KEYRING_PKG_INSTALLED="1" -} - -function uninstall_cuda_keyring_pkg() { - apt-get purge -yq cuda-keyring - CUDA_KEYRING_PKG_INSTALLED="0" -} +function cache_fetched_package() { + local src_url="$1" + local gcs_fn="$2" + local local_fn="$3" -CUDA_LOCAL_REPO_INSTALLED="0" -function install_local_cuda_repo() { - if [[ "${CUDA_LOCAL_REPO_INSTALLED}" == "1" ]]; then return ; fi - CUDA_LOCAL_REPO_INSTALLED="1" - pkgname="cuda-repo-${shortname}-${CUDA_VERSION//./-}-local" - CUDA_LOCAL_REPO_PKG_NAME="${pkgname}" - readonly LOCAL_INSTALLER_DEB="${pkgname}_${CUDA_FULL_VERSION}-${DRIVER_VERSION}-1_amd64.deb" - readonly LOCAL_DEB_URL="${NVIDIA_BASE_DL_URL}/cuda/${CUDA_FULL_VERSION}/local_installers/${LOCAL_INSTALLER_DEB}" - readonly DIST_KEYRING_DIR="/var/${pkgname}" + while ! command -v gcloud ; do sleep 5s ; done - curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${LOCAL_DEB_URL}" -o "${tmpdir}/${LOCAL_INSTALLER_DEB}" + if gsutil ls "${gcs_fn}" 2>&1 | grep -q "${gcs_fn}" ; then + time gcloud storage cp "${gcs_fn}" "${local_fn}" + else + time ( curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 "${src_url}" -o "${local_fn}" && \ + gcloud storage cp "${local_fn}" "${gcs_fn}" ; ) + fi +} - dpkg -i "${tmpdir}/${LOCAL_INSTALLER_DEB}" - rm "${tmpdir}/${LOCAL_INSTALLER_DEB}" - cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/ +function add_contrib_component() { + if ! is_debuntu ; then return ; fi + if ge_debian12 ; then + # Include in sources file components on which nvidia-kernel-open-dkms depends + local -r debian_sources="/etc/apt/sources.list.d/debian.sources" + local components="main contrib" - if is_ubuntu ; then - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${NVIDIA_REPO_URL}/cuda-${shortname}.pin" \ - -o /etc/apt/preferences.d/cuda-repository-pin-600 + sed -i -e "s/Components: .*$/Components: ${components}/" "${debian_sources}" + elif is_debian ; then + sed -i -e 's/ main$/ main contrib/' /etc/apt/sources.list fi } -function uninstall_local_cuda_repo(){ - apt-get purge -yq "${CUDA_LOCAL_REPO_PKG_NAME}" - CUDA_LOCAL_REPO_INSTALLED="0" + +function set_hadoop_property() { + local -r config_file=$1 + local -r property=$2 + local -r value=$3 + "${bdcfg}" set_property \ + --configuration_file "${HADOOP_CONF_DIR}/${config_file}" \ + --name "${property}" --value "${value}" \ + --clobber } -CUDNN_LOCAL_REPO_INSTALLED="0" -CUDNN_PKG_NAME="" -function install_local_cudnn_repo() { - if [[ "${CUDNN_LOCAL_REPO_INSTALLED}" == "1" ]]; then return ; fi - pkgname="cudnn-local-repo-${shortname}-${CUDNN}" - CUDNN_PKG_NAME="${pkgname}" - local_deb_fn="${pkgname}_1.0-1_amd64.deb" - local_deb_url="${NVIDIA_BASE_DL_URL}/cudnn/${CUDNN}/local_installers/${local_deb_fn}" +function clean_up_sources_lists() { + # + # bigtop (primary) + # + local -r dataproc_repo_file="/etc/apt/sources.list.d/dataproc.list" - # ${NVIDIA_BASE_DL_URL}/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz - curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${local_deb_url}" -o "${tmpdir}/local-installer.deb" + if [[ -f "${dataproc_repo_file}" ]] && ! grep -q signed-by "${dataproc_repo_file}" ; then + region="$(get_metadata_value zone | perl -p -e 's:.*/:: ; s:-[a-z]+$::')" - dpkg -i "${tmpdir}/local-installer.deb" + local regional_bigtop_repo_uri + regional_bigtop_repo_uri=$(cat ${dataproc_repo_file} | + sed "s#/dataproc-bigtop-repo/#/goog-dataproc-bigtop-repo-${region}/#" | + grep "deb .*goog-dataproc-bigtop-repo-${region}.* dataproc contrib" | + cut -d ' ' -f 2 | + head -1) - rm -f "${tmpdir}/local-installer.deb" + if [[ "${regional_bigtop_repo_uri}" == */ ]]; then + local -r bigtop_key_uri="${regional_bigtop_repo_uri}archive.key" + else + local -r bigtop_key_uri="${regional_bigtop_repo_uri}/archive.key" + fi - cp /var/cudnn-local-repo-*-${CUDNN}*/cudnn-local-*-keyring.gpg /usr/share/keyrings + local -r bigtop_kr_path="/usr/share/keyrings/bigtop-keyring.gpg" + rm -f "${bigtop_kr_path}" + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 \ + "${bigtop_key_uri}" | gpg --dearmor -o "${bigtop_kr_path}" - CUDNN_LOCAL_REPO_INSTALLED="1" -} + sed -i -e "s:deb https:deb [signed-by=${bigtop_kr_path}] https:g" "${dataproc_repo_file}" + sed -i -e "s:deb-src https:deb-src [signed-by=${bigtop_kr_path}] https:g" "${dataproc_repo_file}" + fi -function uninstall_local_cudnn_repo() { - apt-get purge -yq "${CUDNN_PKG_NAME}" - CUDNN_LOCAL_REPO_INSTALLED="0" -} + # + # adoptium + # + # https://adoptium.net/installation/linux/#_deb_installation_on_debian_or_ubuntu + local -r key_url="https://packages.adoptium.net/artifactory/api/gpg/key/public" + local -r adoptium_kr_path="/usr/share/keyrings/adoptium.gpg" + rm -f "${adoptium_kr_path}" + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${key_url}" \ + | gpg --dearmor -o "${adoptium_kr_path}" + echo "deb [signed-by=${adoptium_kr_path}] https://packages.adoptium.net/artifactory/deb/ $(os_codename) main" \ + > /etc/apt/sources.list.d/adoptium.list -CUDNN8_LOCAL_REPO_INSTALLED="0" -CUDNN8_PKG_NAME="" -function install_local_cudnn8_repo() { - if [[ "${CUDNN8_LOCAL_REPO_INSTALLED}" == "1" ]]; then return ; fi - if is_ubuntu ; then cudnn8_shortname="ubuntu2004" - elif is_debian ; then cudnn8_shortname="debian11" - else return 0 ; fi - if is_cuda12 ; then CUDNN8_CUDA_VER=12.0 - elif is_cuda11 ; then CUDNN8_CUDA_VER=11.8 - else CUDNN8_CUDA_VER="${CUDA_VERSION}" ; fi - cudnn_pkg_version="${CUDNN_VERSION}-1+cuda${CUDNN8_CUDA_VER}" - pkgname="cudnn-local-repo-${cudnn8_shortname}-${CUDNN_VERSION}" - CUDNN8_PKG_NAME="${pkgname}" + # + # docker + # + local docker_kr_path="/usr/share/keyrings/docker-keyring.gpg" + local docker_repo_file="/etc/apt/sources.list.d/docker.list" + local -r docker_key_url="https://download.docker.com/linux/$(os_id)/gpg" - deb_fn="${pkgname}_1.0-1_amd64.deb" - local_deb_fn="${tmpdir}/${deb_fn}" - local_deb_url="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN}/local_installers/${CUDNN8_CUDA_VER}/${deb_fn}" - curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${local_deb_url}" -o "${local_deb_fn}" + rm -f "${docker_kr_path}" + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${docker_key_url}" \ + | gpg --dearmor -o "${docker_kr_path}" + echo "deb [signed-by=${docker_kr_path}] https://download.docker.com/linux/$(os_id) $(os_codename) stable" \ + > ${docker_repo_file} - dpkg -i "${local_deb_fn}" + # + # google cloud + logging/monitoring + # + if ls /etc/apt/sources.list.d/google-cloud*.list ; then + rm -f /usr/share/keyrings/cloud.google.gpg + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg + for list in google-cloud google-cloud-logging google-cloud-monitoring ; do + list_file="/etc/apt/sources.list.d/${list}.list" + if [[ -f "${list_file}" ]]; then + sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https:g' "${list_file}" + fi + done + fi - rm -f "${local_deb_fn}" + # + # cran-r + # + if [[ -f /etc/apt/sources.list.d/cran-r.list ]]; then + keyid="0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7" + if is_ubuntu18 ; then keyid="0x51716619E084DAB9"; fi + rm -f /usr/share/keyrings/cran-r.gpg + curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" | \ + gpg --dearmor -o /usr/share/keyrings/cran-r.gpg + sed -i -e 's:deb http:deb [signed-by=/usr/share/keyrings/cran-r.gpg] http:g' /etc/apt/sources.list.d/cran-r.list + fi - cp /var/cudnn-local-repo-*-${CUDNN}*/cudnn-local-*-keyring.gpg /usr/share/keyrings - CUDNN8_LOCAL_REPO_INSTALLED="1" -} + # + # mysql + # + if [[ -f /etc/apt/sources.list.d/mysql.list ]]; then + rm -f /usr/share/keyrings/mysql.gpg + curl 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xBCA43417C3B485DD128EC6D4B7B3B788A8D3785C' | \ + gpg --dearmor -o /usr/share/keyrings/mysql.gpg + sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/mysql.gpg] https:g' /etc/apt/sources.list.d/mysql.list + fi + + if [[ -f /etc/apt/trusted.gpg ]] ; then mv /etc/apt/trusted.gpg /etc/apt/old-trusted.gpg ; fi -function uninstall_local_cudnn8_repo() { - apt-get purge -yq "${CUDNN8_PKG_NAME}" - CUDNN8_LOCAL_REPO_INSTALLED="0" } -function install_nvidia_nccl() { - local -r nccl_version="${NCCL_VERSION}-1+cuda${CUDA_VERSION}" +function set_proxy(){ + METADATA_HTTP_PROXY="$(get_metadata_attribute http-proxy '')" - if is_rocky ; then - execute_with_retries \ - dnf -y -q install \ - "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}" - sync - elif is_ubuntu ; then - install_cuda_keyring_pkg + if [[ -z "${METADATA_HTTP_PROXY}" ]] ; then return ; fi - apt-get update -qq + export METADATA_HTTP_PROXY + export http_proxy="${METADATA_HTTP_PROXY}" + export https_proxy="${METADATA_HTTP_PROXY}" + export HTTP_PROXY="${METADATA_HTTP_PROXY}" + export HTTPS_PROXY="${METADATA_HTTP_PROXY}" + no_proxy="localhost,127.0.0.0/8,::1,metadata.google.internal,169.254.169.254" + local no_proxy_svc + for no_proxy_svc in compute secretmanager dns servicedirectory logging \ + bigquery composer pubsub bigquerydatatransfer dataflow \ + storage datafusion ; do + no_proxy="${no_proxy},${no_proxy_svc}.googleapis.com" + done - if is_ubuntu18 ; then - execute_with_retries \ - apt-get install -q -y \ - libnccl2 libnccl-dev - sync - else - execute_with_retries \ - apt-get install -q -y \ - "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}" - sync - fi + export NO_PROXY="${no_proxy}" +} + +function is_ramdisk() { + if [[ "${1:-}" == "-f" ]] ; then unset IS_RAMDISK ; fi + if ( test -v IS_RAMDISK && "${IS_RAMDISK}" == "true" ) ; then return 0 + elif ( test -v IS_RAMDISK && "${IS_RAMDISK}" == "false" ) ; then return 1 ; fi + + if ( test -d /mnt/shm && grep -q /mnt/shm /proc/mounts ) ; then + IS_RAMDISK="true" + return 0 else - echo "Unsupported OS: '${OS_NAME}'" - # NB: this tarball is 10GB in size, but can be used to install NCCL on non-ubuntu systems - # wget https://developer.download.nvidia.com/hpc-sdk/24.7/nvhpc_2024_247_Linux_x86_64_cuda_multi.tar.gz - # tar xpzf nvhpc_2024_247_Linux_x86_64_cuda_multi.tar.gz - # nvhpc_2024_247_Linux_x86_64_cuda_multi/install - return + IS_RAMDISK="false" + return 1 fi } -function is_src_nvidia() ( set +x ; [[ "${GPU_DRIVER_PROVIDER}" == "NVIDIA" ]] ; ) -function is_src_os() ( set +x ; [[ "${GPU_DRIVER_PROVIDER}" == "OS" ]] ; ) +function mount_ramdisk(){ + local free_mem + free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + if [[ ${free_mem} -lt 20500000 ]]; then return 0 ; fi -function install_nvidia_cudnn() { - local major_version - major_version="${CUDNN_VERSION%%.*}" - local cudnn_pkg_version - cudnn_pkg_version="${CUDNN_VERSION}-1+cuda${CUDA_VERSION}" + # Write to a ramdisk instead of churning the persistent disk - if is_rocky ; then - if is_cudnn8 ; then - execute_with_retries dnf -y -q install \ - "libcudnn${major_version}" \ - "libcudnn${major_version}-devel" - sync - elif is_cudnn9 ; then - execute_with_retries dnf -y -q install \ - "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" - sync - else - echo "Unsupported cudnn version: '${major_version}'" - fi - elif is_debuntu; then - if ge_debian12 && is_src_os ; then - apt-get -y install nvidia-cudnn - else - local CUDNN="${CUDNN_VERSION%.*}" - if is_cudnn8 ; then - install_local_cudnn8_repo + tmpdir="/mnt/shm" + mkdir -p "${tmpdir}/pkgs_dirs" + mount -t tmpfs tmpfs "${tmpdir}" - apt-get update -qq + # Download conda packages to tmpfs + /opt/conda/miniconda3/bin/conda config --add pkgs_dirs "${tmpdir}/pkgs_dirs" - execute_with_retries \ - apt-get -y install --no-install-recommends \ - "libcudnn8=${cudnn_pkg_version}" \ - "libcudnn8-dev=${cudnn_pkg_version}" - sync - elif is_cudnn9 ; then - install_cuda_keyring_pkg + # Download OS packages to tmpfs + if is_debuntu ; then + mount -t tmpfs tmpfs /var/cache/apt/archives + else + mount -t tmpfs tmpfs /var/cache/dnf + fi + is_ramdisk -f +} - apt-get update -qq +function check_os() { + if is_debian && ( ! is_debian10 && ! is_debian11 && ! is_debian12 ) ; then + echo "Error: The Debian version ($(os_version)) is not supported. Please use a compatible Debian version." + exit 1 + elif is_ubuntu && ( ! is_ubuntu18 && ! is_ubuntu20 && ! is_ubuntu22 ) ; then + echo "Error: The Ubuntu version ($(os_version)) is not supported. Please use a compatible Ubuntu version." + exit 1 + elif is_rocky && ( ! is_rocky8 && ! is_rocky9 ) ; then + echo "Error: The Rocky Linux version ($(os_version)) is not supported. Please use a compatible Rocky Linux version." + exit 1 + fi - execute_with_retries \ - apt-get -y install --no-install-recommends \ - "libcudnn9-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-dev-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" - sync - else - echo "Unsupported cudnn version: [${CUDNN_VERSION}]" - fi - fi - elif is_ubuntu ; then - local -a packages - packages=( - "libcudnn${major_version}=${cudnn_pkg_version}" - "libcudnn${major_version}-dev=${cudnn_pkg_version}") - execute_with_retries \ - apt-get install -q -y --no-install-recommends "${packages[*]}" - sync - else - echo "Unsupported OS: '${OS_NAME}'" + SPARK_VERSION="$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1)" + readonly SPARK_VERSION + if version_lt "${SPARK_VERSION}" "3.1" || \ + version_ge "${SPARK_VERSION}" "4.0" ; then + echo "Error: Your Spark version is not supported. Please upgrade Spark to one of the supported versions." exit 1 fi - ldconfig - - echo "NVIDIA cuDNN successfully installed for ${OS_NAME}." + # Detect dataproc image version + if (! test -v DATAPROC_IMAGE_VERSION) ; then + if test -v DATAPROC_VERSION ; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" + else + if version_lt "${SPARK_VERSION}" "3.2" ; then DATAPROC_IMAGE_VERSION="2.0" + elif version_lt "${SPARK_VERSION}" "3.4" ; then DATAPROC_IMAGE_VERSION="2.1" + elif version_lt "${SPARK_VERSION}" "3.6" ; then DATAPROC_IMAGE_VERSION="2.2" + else echo "Unknown dataproc image version" ; exit 1 ; fi + fi + fi } -CA_TMPDIR="$(mktemp -u -d -p /run/tmp -t ca_dir-XXXX)" -PSN="$(get_metadata_attribute private_secret_name)" -readonly PSN function configure_dkms_certs() { - if [[ -z "${PSN}" ]]; then + if test -v PSN && [[ -z "${PSN}" ]]; then echo "No signing secret provided. skipping"; return 0 fi @@ -575,28 +391,27 @@ function configure_dkms_certs() { echo "Private key material exists" local expected_modulus_md5sum - expected_modulus_md5sum=$(get_metadata_attribute cert_modulus_md5sum) + expected_modulus_md5sum=$(get_metadata_attribute modulus_md5sum) if [[ -n "${expected_modulus_md5sum}" ]]; then modulus_md5sum="${expected_modulus_md5sum}" - else - modulus_md5sum="bd40cf5905c7bba4225d330136fdbfd3" - fi - # Verify that cert md5sum matches expected md5sum - if [[ "${modulus_md5sum}" != "$(openssl rsa -noout -modulus -in \"${CA_TMPDIR}/db.rsa\" | openssl md5 | awk '{print $2}')" ]]; then - echo "unmatched rsa key modulus" - fi - ln -sf "${CA_TMPDIR}/db.rsa" /var/lib/dkms/mok.key + # Verify that cert md5sum matches expected md5sum + if [[ "${modulus_md5sum}" != "$(openssl rsa -noout -modulus -in "${CA_TMPDIR}/db.rsa" | openssl md5 | awk '{print $2}')" ]]; then + echo "unmatched rsa key" + fi - # Verify that key md5sum matches expected md5sum - if [[ "${modulus_md5sum}" != "$(openssl x509 -noout -modulus -in /var/lib/dkms/mok.pub | openssl md5 | awk '{print $2}')" ]]; then - echo "unmatched x509 cert modulus" + # Verify that key md5sum matches expected md5sum + if [[ "${modulus_md5sum}" != "$(openssl x509 -noout -modulus -in ${mok_der} | openssl md5 | awk '{print $2}')" ]]; then + echo "unmatched x509 cert" + fi + else + modulus_md5sum="$(openssl rsa -noout -modulus -in "${CA_TMPDIR}/db.rsa" | openssl md5 | awk '{print $2}')" fi + ln -sf "${CA_TMPDIR}/db.rsa" "${mok_key}" return fi - # Retrieve cloud secrets keys local sig_priv_secret_name sig_priv_secret_name="${PSN}" @@ -623,16 +438,14 @@ function configure_dkms_certs() { | base64 --decode \ | dd status=none of="${CA_TMPDIR}/db.der" - # symlink private key and copy public cert from volatile storage for DKMS - if is_ubuntu ; then - mkdir -p /var/lib/shim-signed/mok - ln -sf "${CA_TMPDIR}/db.rsa" /var/lib/shim-signed/mok/MOK.priv - cp -f "${CA_TMPDIR}/db.der" /var/lib/shim-signed/mok/MOK.der - else - mkdir -p /var/lib/dkms/ - ln -sf "${CA_TMPDIR}/db.rsa" /var/lib/dkms/mok.key - cp -f "${CA_TMPDIR}/db.der" /var/lib/dkms/mok.pub - fi + local mok_directory="$(dirname "${mok_key}")" + mkdir -p "${mok_directory}" + + # symlink private key and copy public cert from volatile storage to DKMS directory + ln -sf "${CA_TMPDIR}/db.rsa" "${mok_key}" + cp -f "${CA_TMPDIR}/db.der" "${mok_der}" + + modulus_md5sum="$(openssl rsa -noout -modulus -in "${mok_key}" | openssl md5 | awk '{print $2}')" } function clear_dkms_key { @@ -640,430 +453,474 @@ function clear_dkms_key { echo "No signing secret provided. skipping" >&2 return 0 fi - rm -rf "${CA_TMPDIR}" /var/lib/dkms/mok.key /var/lib/shim-signed/mok/MOK.priv + rm -rf "${CA_TMPDIR}" "${mok_key}" } -function add_contrib_component() { - if ge_debian12 ; then - # Include in sources file components on which nvidia-kernel-open-dkms depends - local -r debian_sources="/etc/apt/sources.list.d/debian.sources" - local components="main contrib" +function check_secure_boot() { + local SECURE_BOOT="disabled" + SECURE_BOOT=$(mokutil --sb-state|awk '{print $2}') - sed -i -e "s/Components: .*$/Components: ${components}/" "${debian_sources}" - elif is_debian ; then - sed -i -e 's/ main$/ main contrib/' /etc/apt/sources.list + PSN="$(get_metadata_attribute private_secret_name)" + readonly PSN + + if [[ "${SECURE_BOOT}" == "enabled" ]] && le_debian11 ; then + echo "Error: Secure Boot is not supported on Debian before image 2.2. Consider disabling Secure Boot while creating the cluster." + return + elif [[ "${SECURE_BOOT}" == "enabled" ]] && [[ -z "${PSN}" ]]; then + echo "Secure boot is enabled, but no signing material provided." + echo "Consider either disabling secure boot or provide signing material as per" + echo "https://github.com/GoogleCloudDataproc/custom-images/tree/master/examples/secure-boot" + return fi -} -function add_nonfree_components() { - if is_src_nvidia ; then return; fi - if ge_debian12 ; then - # Include in sources file components on which nvidia-open-kernel-dkms depends - local -r debian_sources="/etc/apt/sources.list.d/debian.sources" - local components="main contrib non-free non-free-firmware" + CA_TMPDIR="$(mktemp -u -d -p /run/tmp -t ca_dir-XXXX)" + readonly CA_TMPDIR - sed -i -e "s/Components: .*$/Components: ${components}/" "${debian_sources}" - elif is_debian ; then - sed -i -e 's/ main$/ main contrib non-free/' /etc/apt/sources.list - fi + if is_ubuntu ; then mok_key=/var/lib/shim-signed/mok/MOK.priv + mok_der=/var/lib/shim-signed/mok/MOK.der + else mok_key=/var/lib/dkms/mok.key + mok_der=/var/lib/dkms/mok.pub ; fi } -function add_repo_nvidia_container_toolkit() { - if is_debuntu ; then - local kr_path=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg - local sources_list_path=/etc/apt/sources.list.d/nvidia-container-toolkit.list - # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html - test -f "${kr_path}" || - curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ - | gpg --dearmor -o "${kr_path}" - - test -f "${sources_list_path}" || - curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ - | perl -pe "s#deb https://#deb [signed-by=${kr_path}] https://#g" \ - | tee "${sources_list_path}" - fi +function restart_knox() { + systemctl stop knox + rm -rf "${KNOX_HOME}/data/deployments/*" + systemctl start knox } -function add_repo_cuda() { - if is_debuntu ; then - local kr_path=/usr/share/keyrings/cuda-archive-keyring.gpg - local sources_list_path="/etc/apt/sources.list.d/cuda-${shortname}-x86_64.list" - echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \ - | sudo tee "${sources_list_path}" - curl "${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64/cuda-archive-keyring.gpg" \ - -o "${kr_path}" - elif is_rocky ; then - execute_with_retries "dnf config-manager --add-repo ${NVIDIA_ROCKY_REPO_URL}" - execute_with_retries "dnf clean all" - fi +function is_complete() { + phase="$1" + test -f "${workdir}/complete/${phase}" } -readonly uname_r=$(uname -r) -function build_driver_from_github() { - if is_ubuntu ; then - mok_key=/var/lib/shim-signed/mok/MOK.priv - mok_der=/var/lib/shim-signed/mok/MOK.der - else - mok_key=/var/lib/dkms/mok.key - mok_der=/var/lib/dkms/mok.pub - fi - workdir=/opt/install-nvidia-driver - mkdir -p "${workdir}" - pushd "${workdir}" - test -d "${workdir}/open-gpu-kernel-modules" || { - tarball_fn="${DRIVER_VERSION}.tar.gz" - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${tarball_fn}" \ - | tar xz - mv "open-gpu-kernel-modules-${DRIVER_VERSION}" open-gpu-kernel-modules - } - cd open-gpu-kernel-modules +function mark_complete() { + phase="$1" + touch "${workdir}/complete/${phase}" +} - time make -j$(nproc) modules \ - > /var/log/open-gpu-kernel-modules-build.log \ - 2> /var/log/open-gpu-kernel-modules-build_error.log - sync +function mark_incomplete() { + phase="$1" + rm -f "${workdir}/complete/${phase}" +} - if [[ -n "${PSN}" ]]; then - #configure_dkms_certs - for module in $(find kernel-open -name '*.ko'); do - "/lib/modules/${uname_r}/build/scripts/sign-file" sha256 \ - "${mok_key}" \ - "${mok_der}" \ - "${module}" - done - #clear_dkms_key - fi +function install_dependencies() { + is_complete install-dependencies && return 0 - make modules_install \ - >> /var/log/open-gpu-kernel-modules-build.log \ - 2>> /var/log/open-gpu-kernel-modules-build_error.log - popd + pkg_list="screen" + if is_debuntu ; then execute_with_retries apt-get -y -q install ${pkg_list} + elif is_rocky ; then execute_with_retries dnf -y -q install ${pkg_list} ; fi + mark_complete install-dependencies } -function build_driver_from_packages() { - if is_debuntu ; then - if [[ -n "$(apt-cache search -n "nvidia-driver-${DRIVER}-server-open")" ]] ; then - local pkglist=("nvidia-driver-${DRIVER}-server-open") ; else - local pkglist=("nvidia-driver-${DRIVER}-open") ; fi - if is_debian ; then - pkglist=( - "firmware-nvidia-gsp=${DRIVER_VERSION}-1" - "nvidia-smi=${DRIVER_VERSION}-1" - "nvidia-alternative=${DRIVER_VERSION}-1" - "nvidia-kernel-open-dkms=${DRIVER_VERSION}-1" - "nvidia-kernel-support=${DRIVER_VERSION}-1" - "nvidia-modprobe=${DRIVER_VERSION}-1" - "libnvidia-ml1=${DRIVER_VERSION}-1" - ) - fi - add_contrib_component - apt-get update -qq - execute_with_retries apt-get install -y -qq --no-install-recommends dkms - #configure_dkms_certs - execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}" - sync +function prepare_pip_env() { + # Clear pip cache + # TODO: make this conditional on which OSs have pip without cache purge + test -d "${workdir}/python-venv" || python3 -m venv "${workdir}/python-venv" + source "${workdir}/python-venv/bin/activate" - elif is_rocky ; then - #configure_dkms_certs - if execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" ; then - echo "nvidia-driver:${DRIVER}-dkms installed successfully" - else - execute_with_retries dnf -y -q module install 'nvidia-driver:latest' - fi - sync + pip cache purge || echo "unable to purge pip cache" + if is_ramdisk ; then + # Download pip packages to tmpfs + mkdir -p "${tmpdir}/cache-dir" + pip config set global.cache-dir "${tmpdir}/cache-dir" || echo "unable to set global.cache-dir" fi - #clear_dkms_key } -function install_nvidia_userspace_runfile() { - if test -f "${tmpdir}/userspace-complete" ; then return ; fi - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${USERSPACE_URL}" -o "${tmpdir}/userspace.run" - execute_with_retries bash "${tmpdir}/userspace.run" --no-kernel-modules --silent --install-libglvnd --tmpdir="${tmpdir}" - rm -f "${tmpdir}/userspace.run" - touch "${tmpdir}/userspace-complete" - sync +function prepare_conda_env() { + CONDA=/opt/conda/miniconda3/bin/conda + touch ~/.condarc + cp ~/.condarc ~/.condarc.default + if is_ramdisk ; then + # Download conda packages to tmpfs + mkdir -p "${tmpdir}/conda_cache" + ${CONDA} config --add pkgs_dirs "${tmpdir}/conda_cache" + fi } -function install_cuda_runfile() { - if test -f "${tmpdir}/cuda-complete" ; then return ; fi - time curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${NVIDIA_CUDA_URL}" -o "${tmpdir}/cuda.run" - execute_with_retries bash "${tmpdir}/cuda.run" --silent --toolkit --no-opengl-libs --tmpdir="${tmpdir}" - rm -f "${tmpdir}/cuda.run" - touch "${tmpdir}/cuda-complete" - sync -} +function prepare_common_env() { + define_os_comparison_functions -function install_cuda_toolkit() { - local cudatk_package=cuda-toolkit - if ge_debian12 && is_src_os ; then - cudatk_package="${cudatk_package}=${CUDA_FULL_VERSION}-1" - elif [[ -n "${CUDA_VERSION}" ]]; then - cudatk_package="${cudatk_package}-${CUDA_VERSION//./-}" - fi - cuda_package="cuda=${CUDA_FULL_VERSION}-1" - readonly cudatk_package - if is_debuntu ; then -# if is_ubuntu ; then execute_with_retries "apt-get install -y -qq --no-install-recommends cuda-drivers-${DRIVER}=${DRIVER_VERSION}-1" ; fi - execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package} - sync - elif is_rocky ; then - # rocky9: cuda-11-[7,8], cuda-12-[1..6] - execute_with_retries dnf -y -q install "${cudatk_package}" - sync - fi -} + # Verify OS compatability and Secure boot state + check_os + check_secure_boot -function load_kernel_module() { - # for some use cases, the kernel module needs to be removed before first use of nvidia-smi - for module in nvidia_uvm nvidia_drm nvidia_modeset nvidia ; do - rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}" - done + readonly _shortname="$(os_id)$(os_version|perl -pe 's/(\d+).*/$1/')" - depmod -a - modprobe nvidia - for suffix in uvm modeset drm; do - modprobe "nvidia-${suffix}" - done - # TODO: if peermem is available, also modprobe nvidia-peermem -} + # Dataproc configurations + readonly HADOOP_CONF_DIR='/etc/hadoop/conf' + readonly HIVE_CONF_DIR='/etc/hive/conf' + readonly SPARK_CONF_DIR='/etc/spark/conf' -# Install NVIDIA GPU driver provided by NVIDIA -function install_nvidia_gpu_driver() { - if ( ge_debian12 && is_src_os ) ; then - add_nonfree_components - add_repo_nvidia_container_toolkit - apt-get update -qq - #configure_dkms_certs - apt-get -yq install \ - nvidia-container-toolkit \ - dkms \ - nvidia-open-kernel-dkms \ - nvidia-open-kernel-support \ - nvidia-smi \ - libglvnd0 \ - libcuda1 - #clear_dkms_key - elif ( le_ubuntu18 || le_debian10 || (ge_debian12 && le_cuda11) ) ; then + OS_NAME="$(lsb_release -is | tr '[:upper:]' '[:lower:]')" + readonly OS_NAME - install_nvidia_userspace_runfile + # node role + ROLE="$(get_metadata_attribute dataproc-role)" + readonly ROLE - build_driver_from_github + # master node + MASTER="$(get_metadata_attribute dataproc-master)" + readonly MASTER - install_cuda_runfile - elif is_debuntu ; then - install_cuda_keyring_pkg + workdir=/opt/install-dpgce + tmpdir=/tmp/ + temp_bucket="$(get_metadata_attribute dataproc-temp-bucket)" + readonly temp_bucket + readonly pkg_bucket="gs://${temp_bucket}/dpgce-packages" + uname_r=$(uname -r) + readonly uname_r + readonly bdcfg="/usr/local/bin/bdconfig" + export DEBIAN_FRONTEND=noninteractive - build_driver_from_packages + # Knox config + readonly KNOX_HOME=/usr/lib/knox - install_cuda_toolkit - elif is_rocky ; then - add_repo_cuda + mkdir -p "${workdir}/complete" + set_proxy + mount_ramdisk - build_driver_from_packages + readonly install_log="${tmpdir}/install.log" - install_cuda_toolkit - else - echo "Unsupported OS: '${OS_NAME}'" - exit 1 - fi - ldconfig - if is_src_os ; then - echo "NVIDIA GPU driver provided by ${OS_NAME} was installed successfully" + is_complete prepare.common && return + + repair_old_backports + + if is_debuntu ; then + clean_up_sources_lists + apt-get update -qq + apt-get -y clean + apt-get -o DPkg::Lock::Timeout=60 -y autoremove + if ge_debian12 ; then + apt-mark unhold systemd libsystemd0 ; fi + if is_ubuntu ; then + while ! command -v gcloud ; do sleep 5s ; done + fi else - echo "NVIDIA GPU driver provided by NVIDIA was installed successfully" + dnf clean all fi + + # When creating a disk image: + if [[ -n "$(get_metadata_attribute creating-image "")" ]]; then + df / > "/run/disk-usage.log" + + # zero free disk space + ( set +e + time dd if=/dev/zero of=/zero status=none ; sync ; sleep 3s ; rm -f /zero + ) + + install_dependencies + + # Monitor disk usage in a screen session + touch "/run/keep-running-df" + screen -d -m -LUS keep-running-df \ + bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done" + fi + + mark_complete prepare.common } -# Collects 'gpu_utilization' and 'gpu_memory_utilization' metrics -function install_gpu_agent() { - if ! command -v pip; then - execute_with_retries "apt-get install -y -qq python-pip" +function pip_exit_handler() { + if is_ramdisk ; then + # remove the tmpfs pip cache-dir + pip config unset global.cache-dir || echo "unable to unset global pip cache" fi - local install_dir=/opt/gpu-utilization-agent - mkdir -p "${install_dir}" - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${GPU_AGENT_REPO_URL}/requirements.txt" -o "${install_dir}/requirements.txt" - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${GPU_AGENT_REPO_URL}/report_gpu_metrics.py" \ - | sed -e 's/-u --format=/--format=/' \ - | dd status=none of="${install_dir}/report_gpu_metrics.py" - execute_with_retries pip install -r "${install_dir}/requirements.txt" - sync - - # Generate GPU service. - cat </lib/systemd/system/gpu-utilization-agent.service -[Unit] -Description=GPU Utilization Metric Agent - -[Service] -Type=simple -PIDFile=/run/gpu_agent.pid -ExecStart=/bin/bash --login -c 'python "${install_dir}/report_gpu_metrics.py"' -User=root -Group=root -WorkingDirectory=/ -Restart=always - -[Install] -WantedBy=multi-user.target -EOF - # Reload systemd manager configuration - systemctl daemon-reload - # Enable gpu-utilization-agent service - systemctl --no-reload --now enable gpu-utilization-agent.service } -function set_hadoop_property() { - local -r config_file=$1 - local -r property=$2 - local -r value=$3 - "${bdcfg}" set_property \ - --configuration_file "${HADOOP_CONF_DIR}/${config_file}" \ - --name "${property}" --value "${value}" \ - --clobber +function conda_exit_handler() { + mv ~/.condarc.default ~/.condarc } -function configure_yarn() { - if [[ -d "${HADOOP_CONF_DIR}" && ! -f "${HADOOP_CONF_DIR}/resource-types.xml" ]]; then - printf '\n' >"${HADOOP_CONF_DIR}/resource-types.xml" +function common_exit_handler() { + set +ex + echo "Exit handler invoked" + + # If system memory was sufficient to mount memory-backed filesystems + if is_ramdisk ; then + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do + if ( grep -q "^tmpfs ${shmdir}" /proc/mounts && ! grep -q "^tmpfs ${shmdir}" /etc/fstab ) ; then + umount -f ${shmdir} + fi + done fi - set_hadoop_property 'resource-types.xml' 'yarn.resource-types' 'yarn.io/gpu' - set_hadoop_property 'capacity-scheduler.xml' \ - 'yarn.scheduler.capacity.resource-calculator' \ - 'org.apache.hadoop.yarn.util.resource.DominantResourceCalculator' + if is_debuntu ; then + # Clean up OS package cache + apt-get -y -qq clean + apt-get -y -qq -o DPkg::Lock::Timeout=60 autoremove + # re-hold systemd package + if ge_debian12 ; then + apt-mark hold systemd libsystemd0 ; fi + else + dnf clean all + fi - set_hadoop_property 'yarn-site.xml' 'yarn.resource-types' 'yarn.io/gpu' -} + # When creating image, print disk usage statistics, zero unused disk space + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then + # print disk usage statistics for large components + if is_ubuntu ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /opt/nvidia/* \ + /opt/conda/miniconda3 | sort -h + elif is_debian ; then + du -x -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu,} \ + /var/lib/{docker,mysql,} \ + /opt/nvidia/* \ + /opt/{conda,google-cloud-ops-agent,install-nvidia,} \ + /usr/bin \ + /usr \ + /var \ + / 2>/dev/null | sort -h + else + du -hs \ + /var/lib/docker \ + /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas,} \ + /usr/lib64/google-cloud-sdk \ + /opt/nvidia/* \ + /opt/conda/miniconda3 + fi -# This configuration should be applied only if GPU is attached to the node -function configure_yarn_nodemanager() { - set_hadoop_property 'yarn-site.xml' 'yarn.nodemanager.resource-plugins' 'yarn.io/gpu' - set_hadoop_property 'yarn-site.xml' \ - 'yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices' 'auto' - set_hadoop_property 'yarn-site.xml' \ - 'yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables' $NVIDIA_SMI_PATH - set_hadoop_property 'yarn-site.xml' \ - 'yarn.nodemanager.linux-container-executor.cgroups.mount' 'true' - set_hadoop_property 'yarn-site.xml' \ - 'yarn.nodemanager.linux-container-executor.cgroups.mount-path' '/sys/fs/cgroup' - set_hadoop_property 'yarn-site.xml' \ - 'yarn.nodemanager.linux-container-executor.cgroups.hierarchy' 'yarn' - set_hadoop_property 'yarn-site.xml' \ - 'yarn.nodemanager.container-executor.class' \ - 'org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor' - set_hadoop_property 'yarn-site.xml' 'yarn.nodemanager.linux-container-executor.group' 'yarn' + # Process disk usage logs from installation period + rm -f /run/keep-running-df + sync + sleep 5.01s + # compute maximum size of disk during installation + # Log file contains logs like the following (minus the preceeding #): +#Filesystem 1K-blocks Used Available Use% Mounted on +#/dev/vda2 7096908 2611344 4182932 39% / + df / | tee -a "/run/disk-usage.log" - # Fix local dirs access permissions - local yarn_local_dirs=() + perl -e \ + '@siz=( sort { $a => $b } + map { (split)[2] =~ /^(\d+)/ } + grep { m:^/: } ); +$max=$siz[0]; $min=$siz[-1]; $starting="unknown"; $inc=q{$max-$starting}; +print( " samples-taken: ", scalar @siz, $/, + "starting-disk-used: $starting", $/, + "maximum-disk-used: $max", $/, + "minimum-disk-used: $min", $/, + " increased-by: $inc", $/ )' < "/run/disk-usage.log" - readarray -d ',' yarn_local_dirs < <("${bdcfg}" get_property_value \ - --configuration_file "${HADOOP_CONF_DIR}/yarn-site.xml" \ - --name "yarn.nodemanager.local-dirs" 2>/dev/null | tr -d '\n') - if [[ "${#yarn_local_dirs[@]}" -ne "0" && "${yarn_local_dirs[@]}" != "None" ]]; then - chown yarn:yarn -R "${yarn_local_dirs[@]/,/}" + # zero free disk space + dd if=/dev/zero of=/zero + sync + sleep 3s + rm -f /zero fi + echo "exit_handler has completed" } -function configure_gpu_exclusive_mode() { - # check if running spark 3, if not, enable GPU exclusive mode - local spark_version - spark_version=$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1) - if [[ ${spark_version} != 3.* ]]; then - # include exclusive mode on GPU - nvsmi -c EXCLUSIVE_PROCESS + +# +# Generate repo file under /etc/apt/sources.list.d/ +# +function apt_add_repo() { + local -r repo_name="$1" + local -r repo_data="$3" # "http(s)://host/path/uri argument0 .. argumentN" + local -r include_src="${4:-yes}" + local -r kr_path="${5:-/usr/share/keyrings/${repo_name}.gpg}" + local -r repo_path="${6:-/etc/apt/sources.list.d/${repo_name}.list}" + + echo "deb [signed-by=${kr_path}] ${repo_data}" > "${repo_path}" + if [[ "${include_src}" == "yes" ]] ; then + echo "deb-src [signed-by=${kr_path}] ${repo_data}" >> "${repo_path}" fi -} -function fetch_mig_scripts() { - mkdir -p /usr/local/yarn-mig-scripts - sudo chmod 755 /usr/local/yarn-mig-scripts - wget -P /usr/local/yarn-mig-scripts/ https://raw.githubusercontent.com/NVIDIA/spark-rapids-examples/branch-22.10/examples/MIG-Support/yarn-unpatched/scripts/nvidia-smi - wget -P /usr/local/yarn-mig-scripts/ https://raw.githubusercontent.com/NVIDIA/spark-rapids-examples/branch-22.10/examples/MIG-Support/yarn-unpatched/scripts/mig2gpu.sh - sudo chmod 755 /usr/local/yarn-mig-scripts/* + apt-get update -qq } -function configure_gpu_script() { - # Download GPU discovery script - local -r spark_gpu_script_dir='/usr/lib/spark/scripts/gpu' - mkdir -p ${spark_gpu_script_dir} - # need to update the getGpusResources.sh script to look for MIG devices since if multiple GPUs nvidia-smi still - # lists those because we only disable the specific GIs via CGROUPs. Here we just create it based off of: - # https://raw.githubusercontent.com/apache/spark/master/examples/src/main/scripts/getGpusResources.sh - local -r gpus_resources_script="${spark_gpu_script_dir}/getGpusResources.sh" - cat > "${gpus_resources_script}" <<'EOF' -#!/usr/bin/env bash - # -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at +# Generate repo file under /etc/yum.repos.d/ # -# http://www.apache.org/licenses/LICENSE-2.0 +function dnf_add_repo() { + local -r repo_name="$1" + local -r repo_url="$3" # "http(s)://host/path/filename.repo" + local -r kr_path="${5:-/etc/pki/rpm-gpg/${repo_name}.gpg}" + local -r repo_path="${6:-/etc/yum.repos.d/${repo_name}.repo}" + + curl -s -L "${repo_url}" \ + | dd of="${repo_path}" status=progress +# | perl -p -e "s{^gpgkey=.*$}{gpgkey=file://${kr_path}}" \ +} + # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Keyrings default to +# /usr/share/keyrings/${repo_name}.gpg (debian/ubuntu) or +# /etc/pki/rpm-gpg/${repo_name}.gpg (rocky/RHEL) # +function os_add_repo() { + local -r repo_name="$1" + local -r signing_key_url="$2" + local -r repo_data="$3" # "http(s)://host/path/uri argument0 .. argumentN" + local kr_path + if is_debuntu ; then kr_path="${5:-/usr/share/keyrings/${repo_name}.gpg}" + else kr_path="${5:-/etc/pki/rpm-gpg/${repo_name}.gpg}" ; fi -ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}))') + mkdir -p "$(dirname "${kr_path}")" -echo {\"name\": \"gpu\", \"addresses\":[${ADDRS}]} -EOF + curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${signing_key_url}" \ + | gpg --import --no-default-keyring --keyring "${kr_path}" - chmod a+rx "${gpus_resources_script}" + if is_debuntu ; then apt_add_repo "${repo_name}" "${signing_key_url}" "${repo_data}" "${4:-yes}" "${kr_path}" "${6:-}" + else dnf_add_repo "${repo_name}" "${signing_key_url}" "${repo_data}" "${4:-yes}" "${kr_path}" "${6:-}" ; fi +} - local spark_defaults_conf="/etc/spark/conf.dist/spark-defaults.conf" - if ! grep spark.executor.resource.gpu.discoveryScript "${spark_defaults_conf}" ; then - echo "spark.executor.resource.gpu.discoveryScript=${gpus_resources_script}" >> "${spark_defaults_conf}" + +function set_support_matrix() { + # CUDA version and Driver version + # https://docs.nvidia.com/deploy/cuda-compatibility/ + # https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html + # https://developer.nvidia.com/cuda-downloads + + # Minimum supported version for open kernel driver is 515.43.04 + # https://github.com/NVIDIA/open-gpu-kernel-modules/tags + # Rocky8: 12.0: 525.147.05 + local latest + latest="$(curl -s https://download.nvidia.com/XFree86/Linux-x86_64/latest.txt | awk '{print $1}')" + readonly -A DRIVER_FOR_CUDA=( + ["11.7"]="515.65.01" ["11.8"]="525.147.05" + ["12.0"]="525.147.05" ["12.1"]="530.30.02" ["12.4"]="550.135" ["12.5"]="555.42.02" ["12.6"]="560.35.03" + ) + readonly -A DRIVER_SUBVER=( + ["515"]="515.48.07" ["520"]="525.147.05" ["525"]="525.147.05" ["530"]="530.41.03" ["535"]="535.216.01" + ["545"]="545.29.06" ["550"]="550.135" ["555"]="555.58.02" ["560"]="560.35.03" ["565"]="565.57.01" + ) + # https://developer.nvidia.com/cudnn-downloads + if is_debuntu ; then + readonly -A CUDNN_FOR_CUDA=( + ["11.7"]="9.5.1.17" ["11.8"]="9.5.1.17" + ["12.0"]="9.5.1.17" ["12.1"]="9.5.1.17" ["12.4"]="9.5.1.17" ["12.5"]="9.5.1.17" ["12.6"]="9.5.1.17" + ) + elif is_rocky ; then + # rocky: + # 12.0: 8.8.1.3 + # 12.1: 8.9.3.28 + # 12.2: 8.9.7.29 + # 12.3: 9.0.0.312 + # 12.4: 9.1.1.17 + # 12.5: 9.2.1.18 + # 12.6: 9.5.1.17 + readonly -A CUDNN_FOR_CUDA=( + ["11.7"]="8.9.7.29" ["11.8"]="9.5.1.17" + ["12.0"]="8.8.1.3" ["12.1"]="8.9.3.28" ["12.4"]="9.1.1.17" ["12.5"]="9.2.1.18" ["12.6"]="9.5.1.17" + ) fi + # https://developer.nvidia.com/nccl/nccl-download + # 12.2: 2.19.3, 12.5: 2.21.5 + readonly -A NCCL_FOR_CUDA=( + ["11.7"]="2.21.5" ["11.8"]="2.21.5" + ["12.0"]="2.16.5" ["12.1"]="2.18.3" ["12.4"]="2.23.4" ["12.5"]="2.21.5" ["12.6"]="2.23.4" + ) + readonly -A CUDA_SUBVER=( + ["11.7"]="11.7.1" ["11.8"]="11.8.0" + ["12.0"]="12.0.1" ["12.1"]="12.1.1" ["12.2"]="12.2.2" ["12.3"]="12.3.2" ["12.4"]="12.4.1" ["12.5"]="12.5.1" ["12.6"]="12.6.2" + ) } -function configure_gpu_isolation() { - # enable GPU isolation - sed -i "s/yarn\.nodemanager\.linux\-container\-executor\.group\=.*$/yarn\.nodemanager\.linux\-container\-executor\.group\=yarn/g" "${HADOOP_CONF_DIR}/container-executor.cfg" - if [[ $IS_MIG_ENABLED -ne 0 ]]; then - # configure the container-executor.cfg to have major caps - printf '\n[gpu]\nmodule.enabled=true\ngpu.major-device-number=%s\n\n[cgroups]\nroot=/sys/fs/cgroup\nyarn-hierarchy=yarn\n' $MIG_MAJOR_CAPS >> "${HADOOP_CONF_DIR}/container-executor.cfg" - printf 'export MIG_AS_GPU_ENABLED=1\n' >> "${HADOOP_CONF_DIR}/yarn-env.sh" - printf 'export ENABLE_MIG_GPUS_FOR_CGROUPS=1\n' >> "${HADOOP_CONF_DIR}/yarn-env.sh" - else - printf '\n[gpu]\nmodule.enabled=true\n[cgroups]\nroot=/sys/fs/cgroup\nyarn-hierarchy=yarn\n' >> "${HADOOP_CONF_DIR}/container-executor.cfg" +function set_cuda_version() { + case "${DATAPROC_IMAGE_VERSION}" in + "2.0" ) DEFAULT_CUDA_VERSION="12.1.1" ;; # Cuda 12.1.1 - Driver v530.30.02 is the latest version supported by Ubuntu 18) + "2.1" ) DEFAULT_CUDA_VERSION="12.4.1" ;; + "2.2" ) DEFAULT_CUDA_VERSION="12.6.2" ;; + * ) + echo "unrecognized Dataproc image version: ${DATAPROC_IMAGE_VERSION}" + exit 1 + ;; + esac + local cuda_url + cuda_url=$(get_metadata_attribute 'cuda-url' '') + if [[ -n "${cuda_url}" ]] ; then + # if cuda-url metadata variable has been passed, extract default version from url + local CUDA_URL_VERSION + CUDA_URL_VERSION="$(echo "${cuda_url}" | perl -pe 's{^.*/cuda_(\d+\.\d+\.\d+)_\d+\.\d+\.\d+_linux.run$}{$1}')" + if [[ "${CUDA_URL_VERSION}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] ; then + DEFAULT_CUDA_VERSION="${CUDA_URL_VERSION%.*}" + fi fi + readonly DEFAULT_CUDA_VERSION - # Configure a systemd unit to ensure that permissions are set on restart - cat >/etc/systemd/system/dataproc-cgroup-device-permissions.service<&2 + if [[ "${nvsmi_works}" == "1" ]] ; then echo -n '' elif [[ ! -f "${nvsmi}" ]] ; then echo "nvidia-smi not installed" >&2 ; return 0 elif ! eval "${nvsmi} > /dev/null" ; then echo "nvidia-smi fails" >&2 ; return 0 else nvsmi_works="1" ; fi - if [[ "$1" == "-L" ]] ; then + if test -v 1 && [[ "$1" == "-L" ]] ; then local NV_SMI_L_CACHE_FILE="/var/run/nvidia-smi_-L.txt" if [[ -f "${NV_SMI_L_CACHE_FILE}" ]]; then cat "${NV_SMI_L_CACHE_FILE}" else "${nvsmi}" $* | tee "${NV_SMI_L_CACHE_FILE}" ; fi @@ -1074,394 +931,1289 @@ function nvsmi() { "${nvsmi}" $* } -function install_dependencies() { - if is_debuntu ; then - execute_with_retries apt-get install -y -qq pciutils "linux-headers-${uname_r}" screen - elif is_rocky ; then - execute_with_retries dnf -y -q install pciutils gcc screen +function clear_nvsmi_cache() { + if ( test -v nvsmi_query_xml && test -f "${nvsmi_query_xml}" ) ; then + rm "${nvsmi_query_xml}" + fi +} - local dnf_cmd="dnf -y -q install kernel-devel-${uname_r}" - local install_log="${tmpdir}/install.log" - set +e - eval "${dnf_cmd}" > "${install_log}" 2>&1 - local retval="$?" - set -e +function query_nvsmi() { + if [[ "${nvsmi_works}" != "1" ]] ; then return ; fi + if ( test -v nvsmi_query_xml && test -f "${nvsmi_query_xml}" ) ; then return ; fi + nvsmi -q -x --dtd > "${nvsmi_query_xml}" +} - if [[ "${retval}" == "0" ]] ; then return ; fi +function prepare_gpu_env(){ + set_support_matrix - if grep -q 'Unable to find a match: kernel-devel-' "${install_log}" ; then - # this kernel-devel may have been migrated to the vault - local os_ver="$(echo $uname_r | perl -pe 's/.*el(\d+_\d+)\..*/$1/; s/_/./')" - local vault="https://download.rockylinux.org/vault/rocky/${os_ver}" - dnf_cmd="$(echo dnf -y -q --setopt=localpkg_gpgcheck=1 install \ - "${vault}/BaseOS/x86_64/os/Packages/k/kernel-${uname_r}.rpm" \ - "${vault}/BaseOS/x86_64/os/Packages/k/kernel-core-${uname_r}.rpm" \ - "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-${uname_r}.rpm" \ - "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-core-${uname_r}.rpm" \ - "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" - )" - fi + set_cuda_version + set_driver_version - execute_with_retries "${dnf_cmd}" + set +e + gpu_count="$(grep -i PCI_ID=10DE /sys/bus/pci/devices/*/uevent | wc -l)" + set -e + echo "gpu_count=[${gpu_count}]" + nvsmi_works="0" + nvsmi_query_xml="${tmpdir}/nvsmi.xml" + xmllint="/opt/conda/miniconda3/bin/xmllint" + NVIDIA_SMI_PATH='/usr/bin' + MIG_MAJOR_CAPS=0 + IS_MIG_ENABLED=0 + CUDNN_PKG_NAME="" + CUDNN8_PKG_NAME="" + CUDA_LOCAL_REPO_INSTALLED="0" + + if ! test -v DEFAULT_RAPIDS_RUNTIME ; then + readonly DEFAULT_RAPIDS_RUNTIME='SPARK' fi -} -function main() { - # This configuration should be run on all nodes - # regardless if they have attached GPUs - configure_yarn - - # Detect NVIDIA GPU - if (lspci | grep -q NVIDIA); then - # if this is called without the MIG script then the drivers are not installed - migquery_result="$(nvsmi --query-gpu=mig.mode.current --format=csv,noheader)" - if [[ "${migquery_result}" == "[N/A]" ]] ; then migquery_result="" ; fi - NUM_MIG_GPUS="$(echo ${migquery_result} | uniq | wc -l)" - - if [[ "${NUM_MIG_GPUS}" -gt "0" ]] ; then - if [[ "${NUM_MIG_GPUS}" -eq "1" ]]; then - if (echo "${migquery_result}" | grep Enabled); then - IS_MIG_ENABLED=1 - NVIDIA_SMI_PATH='/usr/local/yarn-mig-scripts/' - MIG_MAJOR_CAPS=`grep nvidia-caps /proc/devices | cut -d ' ' -f 1` - fetch_mig_scripts - fi - fi - fi + # Verify SPARK compatability + RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' "${DEFAULT_RAPIDS_RUNTIME}") + readonly RAPIDS_RUNTIME - # if mig is enabled drivers would have already been installed - if [[ $IS_MIG_ENABLED -eq 0 ]]; then - install_nvidia_gpu_driver + # determine whether we have nvidia-smi installed and working + nvsmi +} - load_kernel_module +# Hold all NVIDIA-related packages from upgrading unintenionally or services like unattended-upgrades +# Users should run apt-mark unhold before they wish to upgrade these packages +function hold_nvidia_packages() { + if ! is_debuntu ; then return ; fi - if [[ -n ${CUDNN_VERSION} ]]; then - install_nvidia_nccl - install_nvidia_cudnn - fi - #Install GPU metrics collection in Stackdriver if needed - if [[ "${INSTALL_GPU_AGENT}" == "true" ]]; then - install_gpu_agent - echo 'GPU metrics agent successfully deployed.' - else - echo 'GPU metrics agent will not be installed.' - fi + apt-mark hold nvidia-* + apt-mark hold libnvidia-* + if dpkg -l | grep -q "xserver-xorg-video-nvidia"; then + apt-mark hold xserver-xorg-video-nvidia* + fi +} - # for some use cases, the kernel module needs to be removed before first use of nvidia-smi - for module in nvidia_uvm nvidia_drm nvidia_modeset nvidia ; do - rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}" - done +function gpu_exit_handler() { + echo "no operations in gpu exit handler" +} - MIG_GPU_LIST="$(nvsmi -L | grep -e MIG -e P100 -e H100 -e A100 || echo -n "")" - if test -n "$(nvsmi -L)" ; then - # cache the result of the gpu query - ADDRS=$(nvsmi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}))') - echo "{\"name\": \"gpu\", \"addresses\":[$ADDRS]}" | tee "/var/run/nvidia-gpu-index.txt" - fi - NUM_MIG_GPUS="$(test -n "${MIG_GPU_LIST}" && echo "${MIG_GPU_LIST}" | wc -l || echo "0")" - if [[ "${NUM_MIG_GPUS}" -gt "0" ]] ; then - # enable MIG on every GPU - for GPU_ID in $(echo ${MIG_GPU_LIST} | awk -F'[: ]' -e '{print $2}') ; do - nvsmi -i "${GPU_ID}" --multi-instance-gpu 1 - done - - NVIDIA_SMI_PATH='/usr/local/yarn-mig-scripts/' - MIG_MAJOR_CAPS="$(grep nvidia-caps /proc/devices | cut -d ' ' -f 1)" - fetch_mig_scripts - else - configure_gpu_exclusive_mode - fi - fi - configure_yarn_nodemanager - configure_gpu_script - configure_gpu_isolation - elif [[ "${ROLE}" == "Master" ]]; then - configure_yarn_nodemanager - configure_gpu_script +function set_cudnn_version() { + readonly DEFAULT_CUDNN8_VERSION="8.0.5.39" + readonly DEFAULT_CUDNN9_VERSION="9.1.0.70" + + # Parameters for NVIDIA-provided cuDNN library + DEFAULT_CUDNN_VERSION=${CUDNN_FOR_CUDA["${CUDA_VERSION}"]} + readonly DEFAULT_CUDNN_VERSION + CUDNN_VERSION=$(get_metadata_attribute 'cudnn-version' "${DEFAULT_CUDNN_VERSION}") + # The minimum cuDNN version supported by rocky is ${DEFAULT_CUDNN8_VERSION} + if is_rocky && (version_le "${CUDNN_VERSION}" "${DEFAULT_CUDNN8_VERSION}") ; then + CUDNN_VERSION="${DEFAULT_CUDNN8_VERSION}" + elif (ge_ubuntu20 || ge_debian12) && [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; then + # cuDNN v8 is not distribution for ubuntu20+, debian12 + CUDNN_VERSION="${DEFAULT_CUDNN9_VERSION}" + elif (le_ubuntu18 || le_debian11) && [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; then + # cuDNN v9 is not distributed for ubuntu18, debian10, debian11 ; fall back to 8 + CUDNN_VERSION="8.8.0.121" fi + readonly CUDNN_VERSION +} - # Restart YARN services if they are running already - if [[ $(systemctl show hadoop-yarn-resourcemanager.service -p SubState --value) == 'running' ]]; then - systemctl restart hadoop-yarn-resourcemanager.service - fi - if [[ $(systemctl show hadoop-yarn-nodemanager.service -p SubState --value) == 'running' ]]; then - systemctl restart hadoop-yarn-nodemanager.service + +function is_cudnn8() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; ) +function is_cudnn9() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; ) + +function set_cuda_repo_shortname() { +# Short name for urls +# https://developer.download.nvidia.com/compute/cuda/repos/${shortname} + if is_rocky ; then + shortname="$(os_id | sed -e 's/rocky/rhel/')$(os_vercat)" + else + shortname="$(os_id)$(os_vercat)" fi } -function clean_up_sources_lists() { - # - # bigtop (primary) - # - local -r dataproc_repo_file="/etc/apt/sources.list.d/dataproc.list" - - if [[ -f "${dataproc_repo_file}" ]] && ! grep -q signed-by "${dataproc_repo_file}" ; then - region="$(get_metadata_value zone | perl -p -e 's:.*/:: ; s:-[a-z]+$::')" +function set_nv_urls() { + # Parameters for NVIDIA-provided package repositories + readonly NVIDIA_BASE_DL_URL='https://developer.download.nvidia.com/compute' + readonly NVIDIA_REPO_URL="${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64" - local regional_bigtop_repo_uri - regional_bigtop_repo_uri=$(cat ${dataproc_repo_file} | - sed "s#/dataproc-bigtop-repo/#/goog-dataproc-bigtop-repo-${region}/#" | - grep "deb .*goog-dataproc-bigtop-repo-${region}.* dataproc contrib" | - cut -d ' ' -f 2 | - head -1) + # Parameter for NVIDIA-provided Rocky Linux GPU driver + readonly NVIDIA_ROCKY_REPO_URL="${NVIDIA_REPO_URL}/cuda-${shortname}.repo" +} - if [[ "${regional_bigtop_repo_uri}" == */ ]]; then - local -r bigtop_key_uri="${regional_bigtop_repo_uri}archive.key" +function set_cuda_runfile_url() { + local MAX_DRIVER_VERSION + local MAX_CUDA_VERSION + + local MIN_OPEN_DRIVER_VER="515.48.07" + local MIN_DRIVER_VERSION="${MIN_OPEN_DRIVER_VER}" + local MIN_CUDA_VERSION="11.7.1" # matches MIN_OPEN_DRIVER_VER + + if is_cuda12 ; then + if is_debian12 ; then + MIN_DRIVER_VERSION="545.23.06" + MIN_CUDA_VERSION="12.3.0" + elif is_debian10 ; then + MAX_DRIVER_VERSION="555.42.02" + MAX_CUDA_VERSION="12.5.0" + elif is_ubuntu18 ; then + MAX_DRIVER_VERSION="530.30.02" + MAX_CUDA_VERSION="12.1.1" + fi + elif version_ge "${CUDA_VERSION}" "${MIN_CUDA_VERSION}" ; then + if le_debian10 ; then + # cuda 11 is not supported for <= debian10 + MAX_CUDA_VERSION="0" + MAX_DRIVER_VERSION="0" + fi + else + echo "Minimum CUDA version supported is ${MIN_CUDA_VERSION}. Specified: ${CUDA_VERSION}" + fi + + if version_lt "${CUDA_VERSION}" "${MIN_CUDA_VERSION}" ; then + echo "Minimum CUDA version for ${shortname} is ${MIN_CUDA_VERSION}. Specified: ${CUDA_VERSION}" + elif ( test -v MAX_CUDA_VERSION && version_gt "${CUDA_VERSION}" "${MAX_CUDA_VERSION}" ) ; then + echo "Maximum CUDA version for ${shortname} is ${MAX_CUDA_VERSION}. Specified: ${CUDA_VERSION}" + fi + if version_lt "${DRIVER_VERSION}" "${MIN_DRIVER_VERSION}" ; then + echo "Minimum kernel driver version for ${shortname} is ${MIN_DRIVER_VERSION}. Specified: ${DRIVER_VERSION}" + elif ( test -v MAX_DRIVER_VERSION && version_gt "${DRIVER_VERSION}" "${MAX_DRIVER_VERSION}" ) ; then + echo "Maximum kernel driver version for ${shortname} is ${MAX_DRIVER_VERSION}. Specified: ${DRIVER_VERSION}" + fi + + # driver version named in cuda runfile filename + # (these may not be actual driver versions - see https://download.nvidia.com/XFree86/Linux-x86_64/) + readonly -A drv_for_cuda=( + ["11.7.0"]="515.43.04" ["11.7.1"]="515.65.01" + ["11.8.0"]="520.61.05" + ["12.0.0"]="525.60.13" ["12.0.1"]="525.85.12" + ["12.1.0"]="530.30.02" ["12.1.1"]="530.30.02" + ["12.2.0"]="535.54.03" ["12.2.1"]="535.86.10" ["12.2.2"]="535.104.05" + ["12.3.0"]="545.23.06" ["12.3.1"]="545.23.08" ["12.3.2"]="545.23.08" + ["12.4.0"]="550.54.14" ["12.4.1"]="550.54.15" # 550.54.15 is not a driver indexed at https://download.nvidia.com/XFree86/Linux-x86_64/ + ["12.5.0"]="555.42.02" ["12.5.1"]="555.42.06" # 555.42.02 is indexed, 555.42.06 is not + ["12.6.0"]="560.28.03" ["12.6.1"]="560.35.03" ["12.6.2"]="560.35.03" + ) + + # Verify that the file with the indicated combination exists + local drv_ver=${drv_for_cuda["${CUDA_FULL_VERSION}"]} + CUDA_RUNFILE="cuda_${CUDA_FULL_VERSION}_${drv_ver}_linux.run" + local CUDA_RELEASE_BASE_URL="${NVIDIA_BASE_DL_URL}/cuda/${CUDA_FULL_VERSION}" + local DEFAULT_NVIDIA_CUDA_URL="${CUDA_RELEASE_BASE_URL}/local_installers/${CUDA_RUNFILE}" + + NVIDIA_CUDA_URL=$(get_metadata_attribute 'cuda-url' "${DEFAULT_NVIDIA_CUDA_URL}") + readonly NVIDIA_CUDA_URL + + CUDA_RUNFILE="$(echo ${NVIDIA_CUDA_URL} | perl -pe 's{^.+/}{}')" + readonly CUDA_RUNFILE + + if ! curl -s --head "${NVIDIA_CUDA_URL}" | grep -E -q '^HTTP.*200\s*$' ; then + echo "No CUDA distribution exists for this combination of DRIVER_VERSION=${drv_ver}, CUDA_VERSION=${CUDA_FULL_VERSION}" + exit 1 + fi + + if ( version_lt "${CUDA_FULL_VERSION}" "12.3.0" && ge_debian12 ) ; then + echo "CUDA 12.3.0 is the minimum CUDA 12 version supported on Debian 12" + elif ( version_gt "${CUDA_VERSION}" "12.1.1" && is_ubuntu18 ) ; then + echo "CUDA 12.1.1 is the maximum CUDA version supported on ubuntu18. Requested version: ${CUDA_VERSION}" + elif ( version_lt "${CUDA_VERSION%%.*}" "12" && ge_debian12 ) ; then + echo "CUDA 11 not supported on Debian 12. Requested version: ${CUDA_VERSION}" + elif ( version_lt "${CUDA_VERSION}" "11.8" && is_rocky9 ) ; then + echo "CUDA 11.8.0 is the minimum version for Rocky 9. Requested version: ${CUDA_VERSION}" + fi +} + +function set_cudnn_tarball_url() { +CUDNN_TARBALL="cudnn-${CUDA_VERSION}-linux-x64-v${CUDNN_VERSION}.tgz" +CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/${CUDNN_TARBALL}" +if ( version_ge "${CUDNN_VERSION}" "8.3.1.22" ); then + # When version is greater than or equal to 8.3.1.22 but less than 8.4.1.50 use this format + CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%.*}-archive.tar.xz" + if ( version_le "${CUDNN_VERSION}" "8.4.1.50" ); then + # When cuDNN version is greater than or equal to 8.4.1.50 use this format + CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION}-archive.tar.xz" + fi + # Use legacy url format with one of the tarball name formats depending on version as above + CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/local_installers/${CUDA_VERSION}/${CUDNN_TARBALL}" +fi +if ( version_ge "${CUDA_VERSION}" "12.0" ); then + # Use modern url format When cuda version is greater than or equal to 12.0 + CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%%.*}-archive.tar.xz" + CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/cudnn/redist/cudnn/linux-x86_64/${CUDNN_TARBALL}" +fi +readonly CUDNN_TARBALL +readonly CUDNN_TARBALL_URL +} + +function install_cuda_keyring_pkg() { + if ( test -v CUDA_KEYRING_PKG_INSTALLED && + [[ "${CUDA_KEYRING_PKG_INSTALLED}" == "1" ]] ); then return ; fi + local kr_ver=1.1 + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "${NVIDIA_REPO_URL}/cuda-keyring_${kr_ver}-1_all.deb" \ + -o "${tmpdir}/cuda-keyring.deb" + dpkg -i "${tmpdir}/cuda-keyring.deb" + rm -f "${tmpdir}/cuda-keyring.deb" + CUDA_KEYRING_PKG_INSTALLED="1" +} + +function uninstall_cuda_keyring_pkg() { + apt-get purge -yq cuda-keyring + CUDA_KEYRING_PKG_INSTALLED="0" +} + +function install_local_cuda_repo() { + is_complete install-local-cuda-repo && return + + if [[ "${CUDA_LOCAL_REPO_INSTALLED}" == "1" ]]; then return ; fi + CUDA_LOCAL_REPO_INSTALLED="1" + pkgname="cuda-repo-${shortname}-${CUDA_VERSION//./-}-local" + CUDA_LOCAL_REPO_PKG_NAME="${pkgname}" + readonly LOCAL_INSTALLER_DEB="${pkgname}_${CUDA_FULL_VERSION}-${DRIVER_VERSION}-1_amd64.deb" + readonly LOCAL_DEB_URL="${NVIDIA_BASE_DL_URL}/cuda/${CUDA_FULL_VERSION}/local_installers/${LOCAL_INSTALLER_DEB}" + readonly DIST_KEYRING_DIR="/var/${pkgname}" + + curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ + "${LOCAL_DEB_URL}" -o "${tmpdir}/${LOCAL_INSTALLER_DEB}" + + dpkg -i "${tmpdir}/${LOCAL_INSTALLER_DEB}" + rm "${tmpdir}/${LOCAL_INSTALLER_DEB}" + cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/ + + if is_ubuntu ; then + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "${NVIDIA_REPO_URL}/cuda-${shortname}.pin" \ + -o /etc/apt/preferences.d/cuda-repository-pin-600 + fi + + mark_complete install-local-cuda-repo +} +function uninstall_local_cuda_repo(){ + apt-get purge -yq "${CUDA_LOCAL_REPO_PKG_NAME}" + rm -f "${workdir}/complete/install-local-cuda-repo" +} + +function install_local_cudnn_repo() { + is_complete install-local-cudnn-repo && return + + pkgname="cudnn-local-repo-${shortname}-${CUDNN_VERSION%.*}" + CUDNN_PKG_NAME="${pkgname}" + local_deb_fn="${pkgname}_1.0-1_amd64.deb" + local_deb_url="${NVIDIA_BASE_DL_URL}/cudnn/${CUDNN_VERSION%.*}/local_installers/${local_deb_fn}" + + # ${NVIDIA_BASE_DL_URL}/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz + curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ + "${local_deb_url}" -o "${tmpdir}/local-installer.deb" + + dpkg -i "${tmpdir}/local-installer.deb" + + rm -f "${tmpdir}/local-installer.deb" + + cp /var/cudnn-local-repo-*-${CUDNN_VERSION%.*}*/cudnn-local-*-keyring.gpg /usr/share/keyrings + + mark_complete install-local-cudnn-repo +} + +function uninstall_local_cudnn_repo() { + apt-get purge -yq "${CUDNN_PKG_NAME}" + rm -f "${workdir}/complete/install-local-cudnn-repo" +} + +function install_local_cudnn8_repo() { + is_complete install-local-cudnn8-repo && return + + if is_ubuntu ; then cudnn8_shortname="ubuntu2004" + elif is_debian ; then cudnn8_shortname="debian11" + else return 0 ; fi + if is_cuda12 ; then CUDNN8_CUDA_VER=12.0 + elif is_cuda11 ; then CUDNN8_CUDA_VER=11.8 + else CUDNN8_CUDA_VER="${CUDA_VERSION}" ; fi + cudnn_pkg_version="${CUDNN_VERSION}-1+cuda${CUDNN8_CUDA_VER}" + + pkgname="cudnn-local-repo-${cudnn8_shortname}-${CUDNN_VERSION}" + CUDNN8_PKG_NAME="${pkgname}" + + deb_fn="${pkgname}_1.0-1_amd64.deb" + local_deb_fn="${tmpdir}/${deb_fn}" + local_deb_url="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/local_installers/${CUDNN8_CUDA_VER}/${deb_fn}" + + # cache the cudnn package + cache_fetched_package "${local_deb_url}" \ + "${pkg_bucket}/${CUDNN8_CUDA_VER}/${deb_fn}" \ + "${local_deb_fn}" + + local cudnn_path="$(dpkg -c ${local_deb_fn} | perl -ne 'if(m{(/var/cudnn-local-repo-.*)/\s*$}){print $1}')" + # If we are using a ram disk, mount another where we will unpack the cudnn local installer + if [[ "${tmpdir}" == "/mnt/shm" ]] && ! grep -q '/var/cudnn-local-repo' /proc/mounts ; then + mkdir -p "${cudnn_path}" + mount -t tmpfs tmpfs "${cudnn_path}" + fi + + dpkg -i "${local_deb_fn}" + + rm -f "${local_deb_fn}" + + cp "${cudnn_path}"/cudnn-local-*-keyring.gpg /usr/share/keyrings + mark_complete install-local-cudnn8-repo +} + +function uninstall_local_cudnn8_repo() { + apt-get purge -yq "${CUDNN8_PKG_NAME}" + mark_incomplete install-local-cudnn8-repo +} + +function install_nvidia_nccl() { + readonly DEFAULT_NCCL_VERSION=${NCCL_FOR_CUDA["${CUDA_VERSION}"]} + readonly NCCL_VERSION=$(get_metadata_attribute 'nccl-version' ${DEFAULT_NCCL_VERSION}) + + is_complete nccl && return + + if is_cuda11 && is_debian12 ; then + echo "NCCL cannot be compiled for CUDA 11 on ${_shortname}" + return + fi + + local -r nccl_version="${NCCL_VERSION}-1+cuda${CUDA_VERSION}" + + # https://github.com/NVIDIA/nccl/blob/master/README.md + # https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Fermi: SM_20, compute_30 + # Kepler: SM_30,SM_35,SM_37, compute_30,compute_35,compute_37 + # Maxwell: SM_50,SM_52,SM_53, compute_50,compute_52,compute_53 + # Pascal: SM_60,SM_61,SM_62, compute_60,compute_61,compute_62 + + # The following architectures are suppored by open kernel driver + # Volta: SM_70,SM_72, compute_70,compute_72 + # Ampere: SM_80,SM_86,SM_87, compute_80,compute_86,compute_87 + + # The following architectures are supported by CUDA v11.8+ + # Ada: SM_89, compute_89 + # Hopper: SM_90,SM_90a compute_90,compute_90a + # Blackwell: SM_100, compute_100 + NVCC_GENCODE="-gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_72,code=sm_72" + NVCC_GENCODE="${NVCC_GENCODE} -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_87,code=sm_87" + if version_ge "${CUDA_VERSION}" "11.8" ; then + NVCC_GENCODE="${NVCC_GENCODE} -gencode=arch=compute_89,code=sm_89" + fi + if version_ge "${CUDA_VERSION}" "12.0" ; then + NVCC_GENCODE="${NVCC_GENCODE} -gencode=arch=compute_90,code=sm_90 -gencode=arch=compute_90a,code=compute_90a" + fi + + mkdir -p "${workdir}" + pushd "${workdir}" + + test -d "${workdir}/nccl" || { + local tarball_fn="v${NCCL_VERSION}-1.tar.gz" + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "https://github.com/NVIDIA/nccl/archive/refs/tags/${tarball_fn}" \ + | tar xz + mv "nccl-${NCCL_VERSION}-1" nccl + } + + local build_path + if is_debuntu ; then build_path="nccl/build/pkg/deb" ; else + build_path="nccl/build/pkg/rpm/x86_64" ; fi + + test -d "${workdir}/nccl/build" || { + local build_tarball="nccl-build_${_shortname}_${nccl_version}.tar.gz" + local local_tarball="${workdir}/${build_tarball}" + local gcs_tarball="${pkg_bucket}/${_shortname}/${build_tarball}" + + output=$(gsutil ls "${gcs_tarball}" 2>&1 || echo '') + if echo "${output}" | grep -q "${gcs_tarball}" ; then + # cache hit - unpack from cache + echo "cache hit" else - local -r bigtop_key_uri="${regional_bigtop_repo_uri}/archive.key" + # build and cache + pushd nccl + # https://github.com/NVIDIA/nccl?tab=readme-ov-file#install + install_build_dependencies + if is_debuntu ; then + # These packages are required to build .deb packages from source + execute_with_retries \ + apt-get install -y -qq build-essential devscripts debhelper fakeroot + export NVCC_GENCODE + execute_with_retries make -j$(nproc) pkg.debian.build + elif is_rocky ; then + # These packages are required to build .rpm packages from source + execute_with_retries \ + dnf -y -q install rpm-build rpmdevtools + export NVCC_GENCODE + execute_with_retries make -j$(nproc) pkg.redhat.build + fi + tar czvf "/${local_tarball}" "../${build_path}" + gcloud storage cp "${local_tarball}" "${gcs_tarball}" + rm "${local_tarball}" + make clean + popd fi + gcloud storage cat "${gcs_tarball}" | tar xz + } - local -r bigtop_kr_path="/usr/share/keyrings/bigtop-keyring.gpg" - rm -f "${bigtop_kr_path}" - curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 \ - "${bigtop_key_uri}" | gpg --dearmor -o "${bigtop_kr_path}" + if is_debuntu ; then + dpkg -i "${build_path}/libnccl${NCCL_VERSION%%.*}_${nccl_version}_amd64.deb" "${build_path}/libnccl-dev_${nccl_version}_amd64.deb" + elif is_rocky ; then + rpm -ivh "${build_path}/libnccl-${nccl_version}.x86_64.rpm" "${build_path}/libnccl-devel-${nccl_version}.x86_64.rpm" + fi - sed -i -e "s:deb https:deb [signed-by=${bigtop_kr_path}] https:g" "${dataproc_repo_file}" - sed -i -e "s:deb-src https:deb-src [signed-by=${bigtop_kr_path}] https:g" "${dataproc_repo_file}" + popd + mark_complete nccl +} + +function install_nvidia_cudnn() { + is_complete cudnn && return + + local major_version + major_version="${CUDNN_VERSION%%.*}" + local cudnn_pkg_version + cudnn_pkg_version="${CUDNN_VERSION}-1+cuda${CUDA_VERSION}" + + if is_rocky ; then + if is_cudnn8 ; then + execute_with_retries dnf -y -q install \ + "libcudnn${major_version}" \ + "libcudnn${major_version}-devel" + sync + elif is_cudnn9 ; then + execute_with_retries dnf -y -q install \ + "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \ + "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" + sync + else + echo "Unsupported cudnn version: '${major_version}'" + fi + elif is_debuntu; then + if ge_debian12 && is_src_os ; then + apt-get -y install nvidia-cudnn + else + if is_cudnn8 ; then + install_local_cudnn8_repo + + apt-get update -qq + + execute_with_retries \ + apt-get -y install --no-install-recommends \ + "libcudnn8=${cudnn_pkg_version}" \ + "libcudnn8-dev=${cudnn_pkg_version}" + + uninstall_local_cudnn8_repo + sync + elif is_cudnn9 ; then + install_cuda_keyring_pkg + + apt-get update -qq + + execute_with_retries \ + apt-get -y install --no-install-recommends \ + "libcudnn9-cuda-${CUDA_VERSION%%.*}" \ + "libcudnn9-dev-cuda-${CUDA_VERSION%%.*}" \ + "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" + sync + else + echo "Unsupported cudnn version: [${CUDNN_VERSION}]" + fi + fi + else + echo "Unsupported OS: '${_shortname}'" + exit 1 + fi + + ldconfig + + echo "NVIDIA cuDNN successfully installed for ${_shortname}." + mark_complete cudnn +} + +function add_nonfree_components() { + if is_src_nvidia ; then return; fi + if ge_debian12 ; then + # Include in sources file components on which nvidia-open-kernel-dkms depends + local -r debian_sources="/etc/apt/sources.list.d/debian.sources" + local components="main contrib non-free non-free-firmware" + + sed -i -e "s/Components: .*$/Components: ${components}/" "${debian_sources}" + elif is_debian ; then + sed -i -e 's/ main$/ main contrib non-free/' /etc/apt/sources.list + fi +} + +# +# Install package signing key and add corresponding repository +# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +function add_repo_nvidia_container_toolkit() { + local nvctk_root="https://nvidia.github.io/libnvidia-container" + local signing_key_url="${nvctk_root}/gpgkey" + local repo_data + + if is_debuntu ; then repo_data="${nvctk_root}/stable/deb/\$(ARCH) /" + else repo_data="${nvctk_root}/stable/rpm/nvidia-container-toolkit.repo" ; fi + + os_add_repo nvidia-container-toolkit \ + "${signing_key_url}" \ + "${repo_data}" \ + "no" +} + +function add_repo_cuda() { + if is_debuntu ; then + install_cuda_keyring_pkg # 11.7+, 12.0+ + elif is_rocky ; then + execute_with_retries "dnf config-manager --add-repo ${NVIDIA_ROCKY_REPO_URL}" + fi +} + +function build_driver_from_github() { + # non-GPL driver will have been built on rocky8 + if is_rocky8 ; then return 0 ; fi + pushd "${workdir}" + + test -d "${workdir}/open-gpu-kernel-modules" || { + local tarball_fn="${DRIVER_VERSION}.tar.gz" + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${tarball_fn}" \ + | tar xz + mv "open-gpu-kernel-modules-${DRIVER_VERSION}" open-gpu-kernel-modules + } + + local nvidia_ko_path="$(find /lib/modules/$(uname -r)/ -name 'nvidia.ko')" + test -n "${nvidia_ko_path}" && test -f "${nvidia_ko_path}" || { + local build_tarball="kmod_${_shortname}_${DRIVER_VERSION}.tar.gz" + local local_tarball="${workdir}/${build_tarball}" + local def_dir="${modulus_md5sum:-unsigned}" + local build_dir=$(get_metadata_attribute modulus_md5sum "${def_dir}") + + local gcs_tarball="${pkg_bucket}/${_shortname}/${uname_r}/${build_dir}/${build_tarball}" + + if gsutil ls "${gcs_tarball}" 2>&1 | grep -q "${gcs_tarball}" ; then + echo "cache hit" + else + # build the kernel modules + pushd open-gpu-kernel-modules + install_build_dependencies + if ( is_cuda11 && is_ubuntu22 ) ; then + echo "Kernel modules cannot be compiled for CUDA 11 on ${_shortname}" + exit 1 + fi + execute_with_retries make -j$(nproc) modules \ + > kernel-open/build.log \ + 2> kernel-open/build_error.log + # Sign kernel modules + if [[ -n "${PSN}" ]]; then + configure_dkms_certs + for module in $(find open-gpu-kernel-modules/kernel-open -name '*.ko'); do + "/lib/modules/${uname_r}/build/scripts/sign-file" sha256 \ + "${mok_key}" \ + "${mok_der}" \ + "${module}" + done + clear_dkms_key + fi + make modules_install \ + >> kernel-open/build.log \ + 2>> kernel-open/build_error.log + # Collect build logs and installed binaries + tar czvf "${local_tarball}" \ + "${workdir}/open-gpu-kernel-modules/kernel-open/"*.log \ + $(find /lib/modules/${uname_r}/ -iname 'nvidia*.ko') + gcloud storage cp "${local_tarball}" "${gcs_tarball}" + rm "${local_tarball}" + make clean + popd + fi + gcloud storage cat "${gcs_tarball}" | tar -C / -xzv + depmod -a + } + + popd +} + +function build_driver_from_packages() { + if is_debuntu ; then + if [[ -n "$(apt-cache search -n "nvidia-driver-${DRIVER}-server-open")" ]] ; then + local pkglist=("nvidia-driver-${DRIVER}-server-open") ; else + local pkglist=("nvidia-driver-${DRIVER}-open") ; fi + if is_debian ; then + pkglist=( + "firmware-nvidia-gsp=${DRIVER_VERSION}-1" + "nvidia-smi=${DRIVER_VERSION}-1" + "nvidia-alternative=${DRIVER_VERSION}-1" + "nvidia-kernel-open-dkms=${DRIVER_VERSION}-1" + "nvidia-kernel-support=${DRIVER_VERSION}-1" + "nvidia-modprobe=${DRIVER_VERSION}-1" + "libnvidia-ml1=${DRIVER_VERSION}-1" + ) + fi + add_contrib_component + apt-get update -qq + execute_with_retries apt-get install -y -qq --no-install-recommends dkms + execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}" + sync + + elif is_rocky ; then + if execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" ; then + echo "nvidia-driver:${DRIVER}-dkms installed successfully" + else + execute_with_retries dnf -y -q module install 'nvidia-driver:latest' + fi + sync + fi +} + +function install_nvidia_userspace_runfile() { + # Parameters for NVIDIA-provided Debian GPU driver + readonly DEFAULT_USERSPACE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${DRIVER_VERSION}/NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run" + + readonly USERSPACE_URL=$(get_metadata_attribute 'gpu-driver-url' "${DEFAULT_USERSPACE_URL}") + + USERSPACE_FILENAME="$(echo ${USERSPACE_URL} | perl -pe 's{^.+/}{}')" + readonly USERSPACE_FILENAME + + # This .run file contains NV's OpenGL implementation as well as + # nvidia optimized implementations of the gtk+ 2,3 stack(s) not + # including glib (https://docs.gtk.org/glib/), and what appears to + # be a copy of the source from the kernel-open directory of for + # example DRIVER_VERSION=560.35.03 + # + # https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/560.35.03.tar.gz + # + # wget https://us.download.nvidia.com/XFree86/Linux-x86_64/560.35.03/NVIDIA-Linux-x86_64-560.35.03.run + # sh ./NVIDIA-Linux-x86_64-560.35.03.run -x # this will allow you to review the contents of the package without installing it. + is_complete userspace && return + + local local_fn="${tmpdir}/userspace.run" + + cache_fetched_package "${USERSPACE_URL}" \ + "${pkg_bucket}/${USERSPACE_FILENAME}" \ + "${local_fn}" + + local runfile_args + runfile_args="" + local cache_hit="0" + local local_tarball + + if is_rocky8 ; then + local nvidia_ko_path="$(find /lib/modules/$(uname -r)/ -name 'nvidia.ko')" + test -n "${nvidia_ko_path}" && test -f "${nvidia_ko_path}" || { + local build_tarball="kmod_${_shortname}_${DRIVER_VERSION}.tar.gz" + local_tarball="${workdir}/${build_tarball}" + local def_dir="${modulus_md5sum:-unsigned}" + local build_dir=$(get_metadata_attribute modulus_md5sum "${def_dir}") + + local gcs_tarball="${pkg_bucket}/${_shortname}/${uname_r}/${build_dir}/${build_tarball}" + + if gsutil ls "${gcs_tarball}" 2>&1 | grep -q "${gcs_tarball}" ; then + cache_hit="1" + runfile_args="--no-kernel-modules" + echo "cache hit" + else + install_build_dependencies + configure_dkms_certs + local signing_options + signing_options="" + if [[ -n "${PSN}" ]]; then + signing_options="--module-signing-hash sha256 \ + --module-signing-x509-hash sha256 \ + --module-signing-secret-key \"${mok_key}\" \ + --module-signing-public-key \"${mok_der}\" \ + --module-signing-script \"/lib/modules/${uname_r}/build/scripts/sign-file\" \ + " + fi + runfile_args="--no-dkms ${signing_options}" + fi + } + else + runfile_args="--no-kernel-modules" + fi + + execute_with_retries bash "${local_fn}" -e -q \ + ${runfile_args} \ + --ui=none \ + --install-libglvnd \ + --tmpdir="${tmpdir}" + + if is_rocky8 ; then + if [[ "${cache_hit}" == "1" ]] ; then + gcloud storage cat "${gcs_tarball}" | tar -C / -xzv + depmod -a + else + clear_dkms_key + tar czf "${local_tarball}" \ + /var/log/nvidia-installer.log \ + $(find /lib/modules/${uname_r}/ -iname 'nvidia*.ko') + gcloud storage cp "${local_tarball}" "${gcs_tarball}" + fi + fi + + rm -f "${local_fn}" + mark_complete userspace + sync +} + +function install_cuda_runfile() { + is_complete cuda && return + + local local_fn="${tmpdir}/cuda.run" + + cache_fetched_package "${NVIDIA_CUDA_URL}" \ + "${pkg_bucket}/${CUDA_RUNFILE}" \ + "${local_fn}" + + execute_with_retries bash "${local_fn}" --toolkit --no-opengl-libs --silent --tmpdir="${tmpdir}" + rm -f "${local_fn}" + mark_complete cuda + sync +} + +function install_cuda_toolkit() { + local cudatk_package=cuda-toolkit + if ge_debian12 && is_src_os ; then + cudatk_package="${cudatk_package}=${CUDA_FULL_VERSION}-1" + elif [[ -n "${CUDA_VERSION}" ]]; then + cudatk_package="${cudatk_package}-${CUDA_VERSION//./-}" + fi + cuda_package="cuda=${CUDA_FULL_VERSION}-1" + readonly cudatk_package + if is_debuntu ; then +# if is_ubuntu ; then execute_with_retries "apt-get install -y -qq --no-install-recommends cuda-drivers-${DRIVER}=${DRIVER_VERSION}-1" ; fi + execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package} + elif is_rocky ; then + # rocky9: cuda-11-[7,8], cuda-12-[1..6] + execute_with_retries dnf -y -q install "${cudatk_package}" + fi + sync +} + +function load_kernel_module() { + # for some use cases, the kernel module needs to be removed before first use of nvidia-smi + for module in nvidia_uvm nvidia_drm nvidia_modeset nvidia ; do + rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}" + done + + depmod -a + modprobe nvidia + for suffix in uvm modeset drm; do + modprobe "nvidia-${suffix}" + done + # TODO: if peermem is available, also modprobe nvidia-peermem +} + +function install_cuda(){ + is_complete cuda-repo && return + + if ( ge_debian12 && is_src_os ) ; then + echo "installed with the driver on ${_shortname}" + return 0 + fi + + # The OS package distributions are unreliable + install_cuda_runfile + + # Includes CUDA packages + add_repo_cuda + + mark_complete cuda-repo +} + +function install_nvidia_container_toolkit() { + is_complete install-nvtk && return + + local container_runtime_default + if command -v docker ; then container_runtime_default='docker' + elif command -v containerd ; then container_runtime_default='containerd' + elif command -v crio ; then container_runtime_default='crio' + else container_runtime_default='' ; fi + CONTAINER_RUNTIME=$(get_metadata_attribute 'container-runtime' "${container_runtime_default}") + + if test -z "${CONTAINER_RUNTIME}" ; then return ; fi + + add_repo_nvidia_container_toolkit + if is_debuntu ; then + execute_with_retries apt-get install -y -q nvidia-container-toolkit ; else + execute_with_retries dnf install -y -q nvidia-container-toolkit ; fi + nvidia-ctk runtime configure --runtime="${CONTAINER_RUNTIME}" + systemctl restart "${CONTAINER_RUNTIME}" + + mark_complete install-nvtk +} + +# Install NVIDIA GPU driver provided by NVIDIA +function install_nvidia_gpu_driver() { + is_complete gpu-driver && return + + if ( ge_debian12 && is_src_os ) ; then + add_nonfree_components + apt-get update -qq + apt-get -yq install \ + dkms \ + nvidia-open-kernel-dkms \ + nvidia-open-kernel-support \ + nvidia-smi \ + libglvnd0 \ + libcuda1 + echo "NVIDIA GPU driver provided by ${_shortname} was installed successfully" + return 0 + fi + + # OS driver packages do not produce reliable driver ; use runfile + install_nvidia_userspace_runfile + + build_driver_from_github + + echo "NVIDIA GPU driver provided by NVIDIA was installed successfully" + mark_complete gpu-driver +} + +function install_ops_agent(){ + is_complete ops-agent && return + + mkdir -p /opt/google + cd /opt/google + # https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/installation + curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh + execute_with_retries bash add-google-cloud-ops-agent-repo.sh --also-install + + is_complete ops-agent +} + +# Collects 'gpu_utilization' and 'gpu_memory_utilization' metrics +function install_gpu_monitoring_agent() { + download_gpu_monitoring_agent + install_gpu_monitoring_agent_dependency + start_gpu_monitoring_agent_service +} + +function download_gpu_monitoring_agent(){ + if is_rocky ; then + execute_with_retries "dnf -y -q install git" + else + execute_with_retries "apt-get install git -y" + fi + mkdir -p /opt/google + chmod 777 /opt/google + cd /opt/google + test -d compute-gpu-monitoring || \ + execute_with_retries "git clone https://github.com/GoogleCloudPlatform/compute-gpu-monitoring.git" +} + +function install_gpu_monitoring_agent_dependency(){ + cd /opt/google/compute-gpu-monitoring/linux + /opt/conda/miniconda3/bin/python3 -m venv venv + ( + source venv/bin/activate + pip install wheel + pip install -Ur requirements.txt + ) +} + +function start_gpu_monitoring_agent_service(){ + cp /opt/google/compute-gpu-monitoring/linux/systemd/google_gpu_monitoring_agent_venv.service /lib/systemd/system + systemctl daemon-reload + systemctl --no-reload --now enable /lib/systemd/system/google_gpu_monitoring_agent_venv.service +} + +# Collects 'gpu_utilization' and 'gpu_memory_utilization' metrics +function install_gpu_agent() { + # Stackdriver GPU agent parameters +# local -r GPU_AGENT_REPO_URL='https://raw.githubusercontent.com/GoogleCloudPlatform/ml-on-gcp/master/dlvm/gcp-gpu-utilization-metrics' + local -r GPU_AGENT_REPO_URL='https://raw.githubusercontent.com/GoogleCloudPlatform/ml-on-gcp/refs/heads/master/dlvm/gcp-gpu-utilization-metrics' + if ( ! command -v pip && is_debuntu ) ; then + execute_with_retries "apt-get install -y -qq python3-pip" + fi + local install_dir=/opt/gpu-utilization-agent + mkdir -p "${install_dir}" + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "${GPU_AGENT_REPO_URL}/requirements.txt" -o "${install_dir}/requirements.txt" + curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "${GPU_AGENT_REPO_URL}/report_gpu_metrics.py" \ + | sed -e 's/-u --format=/--format=/' \ + | dd status=none of="${install_dir}/report_gpu_metrics.py" + local venv="${install_dir}/venv" + /opt/conda/miniconda3/bin/python3 -m venv "${venv}" +( + source "${venv}/bin/activate" + python3 -m pip install --upgrade pip + execute_with_retries python3 -m pip install -r "${install_dir}/requirements.txt" +) + sync + + # Generate GPU service. + cat </lib/systemd/system/gpu-utilization-agent.service +[Unit] +Description=GPU Utilization Metric Agent + +[Service] +Type=simple +PIDFile=/run/gpu_agent.pid +ExecStart=/bin/bash --login -c '. ${venv}/bin/activate ; python3 "${install_dir}/report_gpu_metrics.py"' +User=root +Group=root +WorkingDirectory=/ +Restart=always + +[Install] +WantedBy=multi-user.target +EOF + # Reload systemd manager configuration + systemctl daemon-reload + # Enable gpu-utilization-agent service + systemctl --no-reload --now enable gpu-utilization-agent.service +} + +function configure_gpu_exclusive_mode() { + # only run this function when spark < 3.0 + if version_ge "${SPARK_VERSION}" "3.0" ; then return 0 ; fi + # include exclusive mode on GPU + nvsmi -c EXCLUSIVE_PROCESS + clear_nvsmi_cache +} + +function install_build_dependencies() { + is_complete build-dependencies && return + + if is_debuntu ; then + if is_ubuntu22 && is_cuda12 ; then + # On ubuntu22, the default compiler does not build some kernel module versions + # https://forums.developer.nvidia.com/t/linux-new-kernel-6-5-0-14-ubuntu-22-04-can-not-compile-nvidia-display-card-driver/278553/11 + execute_with_retries apt-get install -y -qq gcc-12 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 + update-alternatives --set gcc /usr/bin/gcc-12 + fi + + elif is_rocky ; then + execute_with_retries dnf -y -q install gcc + + local dnf_cmd="dnf -y -q install kernel-devel-${uname_r}" + set +e + eval "${dnf_cmd}" > "${install_log}" 2>&1 + local retval="$?" + set -e + + if [[ "${retval}" == "0" ]] ; then return ; fi + + if grep -q 'Unable to find a match: kernel-devel-' "${install_log}" ; then + # this kernel-devel may have been migrated to the vault + local os_ver="$(echo $uname_r | perl -pe 's/.*el(\d+_\d+)\..*/$1/; s/_/./')" + local vault="https://download.rockylinux.org/vault/rocky/${os_ver}" + dnf_cmd="$(echo dnf -y -q --setopt=localpkg_gpgcheck=1 install \ + "${vault}/BaseOS/x86_64/os/Packages/k/kernel-${uname_r}.rpm" \ + "${vault}/BaseOS/x86_64/os/Packages/k/kernel-core-${uname_r}.rpm" \ + "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-${uname_r}.rpm" \ + "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-core-${uname_r}.rpm" \ + "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" + )" + fi + + execute_with_retries "${dnf_cmd}" fi + mark_complete build-dependencies +} - # - # adoptium - # - # https://adoptium.net/installation/linux/#_deb_installation_on_debian_or_ubuntu - local -r key_url="https://packages.adoptium.net/artifactory/api/gpg/key/public" - local -r adoptium_kr_path="/usr/share/keyrings/adoptium.gpg" - rm -f "${adoptium_kr_path}" - curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${key_url}" \ - | gpg --dearmor -o "${adoptium_kr_path}" - echo "deb [signed-by=${adoptium_kr_path}] https://packages.adoptium.net/artifactory/deb/ $(os_codename) main" \ - > /etc/apt/sources.list.d/adoptium.list +function install_gpu_driver_and_cuda() { + install_nvidia_gpu_driver + install_cuda + load_kernel_module +} +function prepare_gpu_install_env() { + # Whether to install NVIDIA-provided or OS-provided GPU driver + GPU_DRIVER_PROVIDER=$(get_metadata_attribute 'gpu-driver-provider' 'NVIDIA') + readonly GPU_DRIVER_PROVIDER - # - # docker - # - local docker_kr_path="/usr/share/keyrings/docker-keyring.gpg" - local docker_repo_file="/etc/apt/sources.list.d/docker.list" - local -r docker_key_url="https://download.docker.com/linux/$(os_id)/gpg" + # Whether to install GPU monitoring agent that sends GPU metrics to Stackdriver + INSTALL_GPU_AGENT=$(get_metadata_attribute 'install-gpu-agent' 'false') + readonly INSTALL_GPU_AGENT - rm -f "${docker_kr_path}" - curl -fsS --retry-connrefused --retry 10 --retry-max-time 30 "${docker_key_url}" \ - | gpg --dearmor -o "${docker_kr_path}" - echo "deb [signed-by=${docker_kr_path}] https://download.docker.com/linux/$(os_id) $(os_codename) stable" \ - > ${docker_repo_file} + set_cuda_repo_shortname + set_nv_urls + set_cuda_runfile_url + set_cudnn_version + set_cudnn_tarball_url - # - # google cloud + logging/monitoring - # - if ls /etc/apt/sources.list.d/google-cloud*.list ; then - rm -f /usr/share/keyrings/cloud.google.gpg - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg - for list in google-cloud google-cloud-logging google-cloud-monitoring ; do - list_file="/etc/apt/sources.list.d/${list}.list" - if [[ -f "${list_file}" ]]; then - sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https:g' "${list_file}" + if is_cuda11 ; then gcc_ver="11" + elif is_cuda12 ; then gcc_ver="12" ; fi +} + +function gpu_install_exit_handler() { + if is_ramdisk ; then + for shmdir in /var/cudnn-local ; do + if ( grep -q "^tmpfs ${shmdir}" /proc/mounts && ! grep -q "^tmpfs ${shmdir}" /etc/fstab ) ; then + umount -f ${shmdir} fi done fi + hold_nvidia_packages +} - # - # cran-r - # - if [[ -f /etc/apt/sources.list.d/cran-r.list ]]; then - keyid="0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7" - if is_ubuntu18 ; then keyid="0x51716619E084DAB9"; fi - rm -f /usr/share/keyrings/cran-r.gpg - curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" | \ - gpg --dearmor -o /usr/share/keyrings/cran-r.gpg - sed -i -e 's:deb http:deb [signed-by=/usr/share/keyrings/cran-r.gpg] http:g' /etc/apt/sources.list.d/cran-r.list +# This configuration should be applied only if GPU is attached to the node +function configure_yarn_nodemanager() { + set_hadoop_property 'yarn-site.xml' \ + 'yarn.nodemanager.linux-container-executor.cgroups.mount' 'true' + set_hadoop_property 'yarn-site.xml' \ + 'yarn.nodemanager.linux-container-executor.cgroups.mount-path' '/sys/fs/cgroup' + set_hadoop_property 'yarn-site.xml' \ + 'yarn.nodemanager.linux-container-executor.cgroups.hierarchy' 'yarn' + set_hadoop_property 'yarn-site.xml' \ + 'yarn.nodemanager.container-executor.class' \ + 'org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor' + set_hadoop_property 'yarn-site.xml' 'yarn.nodemanager.linux-container-executor.group' 'yarn' + + # Fix local dirs access permissions + local yarn_local_dirs=() + + readarray -d ',' yarn_local_dirs < <("${bdcfg}" get_property_value \ + --configuration_file "${HADOOP_CONF_DIR}/yarn-site.xml" \ + --name "yarn.nodemanager.local-dirs" 2>/dev/null | tr -d '\n') + + if [[ "${#yarn_local_dirs[@]}" -ne "0" && "${yarn_local_dirs[@]}" != "None" ]]; then + chown yarn:yarn -R "${yarn_local_dirs[@]/,/}" fi +} - # - # mysql - # - if [[ -f /etc/apt/sources.list.d/mysql.list ]]; then - rm -f /usr/share/keyrings/mysql.gpg - curl 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xBCA43417C3B485DD128EC6D4B7B3B788A8D3785C' | \ - gpg --dearmor -o /usr/share/keyrings/mysql.gpg - sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/mysql.gpg] https:g' /etc/apt/sources.list.d/mysql.list +function yarn_exit_handler() { + # Restart YARN services if they are running already + for svc in resourcemanager nodemanager; do + if [[ "$(systemctl show hadoop-yarn-${svc}.service -p SubState --value)" == 'running' ]]; then + systemctl stop "hadoop-yarn-${svc}.service" + systemctl start "hadoop-yarn-${svc}.service" + fi + done + # restart services stopped during preparation stage + # systemctl list-units | perl -n -e 'qx(systemctl start $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/' +} + + +function configure_yarn_gpu_resources() { + if [[ ! -d "${HADOOP_CONF_DIR}" ]] ; then return 0 ; fi # pre-init scripts + if [[ ! -f "${HADOOP_CONF_DIR}/resource-types.xml" ]]; then + printf '\n' >"${HADOOP_CONF_DIR}/resource-types.xml" fi + set_hadoop_property 'resource-types.xml' 'yarn.resource-types' 'yarn.io/gpu' - if [[ -f /etc/apt/trusted.gpg ]] ; then mv /etc/apt/trusted.gpg /etc/apt/old-trusted.gpg ; fi + set_hadoop_property 'capacity-scheduler.xml' \ + 'yarn.scheduler.capacity.resource-calculator' \ + 'org.apache.hadoop.yarn.util.resource.DominantResourceCalculator' + set_hadoop_property 'yarn-site.xml' 'yarn.resource-types' 'yarn.io/gpu' } -function exit_handler() { - set +ex - echo "Exit handler invoked" +function configure_gpu_script() { + # Download GPU discovery script + local -r spark_gpu_script_dir='/usr/lib/spark/scripts/gpu' + mkdir -p ${spark_gpu_script_dir} + # need to update the getGpusResources.sh script to look for MIG devices since if multiple GPUs nvidia-smi still + # lists those because we only disable the specific GIs via CGROUPs. Here we just create it based off of: + # https://raw.githubusercontent.com/apache/spark/master/examples/src/main/scripts/getGpusResources.sh + local -r gpus_resources_script="${spark_gpu_script_dir}/getGpusResources.sh" + cat > "${gpus_resources_script}" <<'EOF' +#!/usr/bin/env bash - # Purge private key material until next grant - clear_dkms_key +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Example output: {"name": "gpu", "addresses":["0","1","2","3","4","5","6","7"]} - # Clear pip cache - pip cache purge || echo "unable to purge pip cache" +ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}))') - # If system memory was sufficient to mount memory-backed filesystems - if [[ "${tmpdir}" == "/mnt/shm" ]] ; then - # remove the tmpfs pip cache-dir - pip config unset global.cache-dir || echo "unable to unset global pip cache" +echo {\"name\": \"gpu\", \"addresses\":[${ADDRS}]} +EOF - # Clean up shared memory mounts - for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do - if grep -q "^tmpfs ${shmdir}" /proc/mounts && ! grep -q "^tmpfs ${shmdir}" /etc/fstab ; then - umount -f ${shmdir} - fi - done + chmod a+rx "${gpus_resources_script}" - # restart services stopped during preparation stage - # systemctl list-units | perl -n -e 'qx(systemctl start $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/' - fi + local spark_defaults_conf="/etc/spark/conf.dist/spark-defaults.conf" - if is_debuntu ; then - # Clean up OS package cache - apt-get -y -qq clean - apt-get -y -qq autoremove - # re-hold systemd package - if ge_debian12 ; then - apt-mark hold systemd libsystemd0 ; fi + local executor_cores + executor_cores="$(nproc | perl -MPOSIX -pe '$_ = POSIX::floor( $_ * 0.75 ); $_-- if $_ % 2')" + local executor_memory + executor_memory_gb="$(awk '/^MemFree/ {print $2}' /proc/meminfo | perl -MPOSIX -pe '$_ *= 0.75; $_ = POSIX::floor( $_ / (1024*1024) )')" + local task_cpus=2 + local gpu_amount + + # The current setting of spark.task.resource.gpu.amount (0.333) is + # not ideal to get the best performance from the RAPIDS Accelerator + # plugin. It's recommended to be 1/{executor core count} unless you + # have a special use case. +# gpu_amount="$(echo $executor_cores | perl -pe "\$_ = ( ${gpu_count} / (\$_ / ${task_cpus}) )")" + gpu_amount="$(perl -e "print 1 / ${executor_cores}")" + +# cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.aggregate.ComplexTypedAggregateExpression + + cat >>"${spark_defaults_conf}" <> "${HADOOP_CONF_DIR}/container-executor.cfg" + printf 'export MIG_AS_GPU_ENABLED=1\n' >> "${HADOOP_CONF_DIR}/yarn-env.sh" + printf 'export ENABLE_MIG_GPUS_FOR_CGROUPS=1\n' >> "${HADOOP_CONF_DIR}/yarn-env.sh" else - dnf clean all + printf '\n[gpu]\nmodule.enabled=true\n[cgroups]\nroot=/sys/fs/cgroup\nyarn-hierarchy=yarn\n' >> "${HADOOP_CONF_DIR}/container-executor.cfg" fi - # print disk usage statistics for large components - if is_ubuntu ; then - du -hs \ - /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ - /usr/lib \ - /opt/nvidia/* \ - /usr/local/cuda-1?.? \ - /opt/conda/miniconda3 | sort -h - elif is_debian ; then - du -hs \ - /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ - /usr/lib \ - /usr/local/cuda-1?.? \ - /opt/conda/miniconda3 | sort -h - else - du -hs \ - /var/lib/docker \ - /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \ - /usr/lib64/google-cloud-sdk \ - /usr/lib \ - /opt/nvidia/* \ - /usr/local/cuda-1?.? \ - /opt/conda/miniconda3 - fi - - # Process disk usage logs from installation period - rm -f /run/keep-running-df - sync - sleep 5.01s - # compute maximum size of disk during installation - # Log file contains logs like the following (minus the preceeding #): -#Filesystem 1K-blocks Used Available Use% Mounted on -#/dev/vda2 7096908 2611344 4182932 39% / - df / | tee -a "/run/disk-usage.log" + # Configure a systemd unit to ensure that permissions are set on restart + cat >/etc/systemd/system/dataproc-cgroup-device-permissions.service< $b } - map { (split)[2] =~ /^(\d+)/ } - grep { m:^/: } ); -$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min; -print( " samples-taken: ", scalar @siz, $/, - "maximum-disk-used: $max", $/, - "minimum-disk-used: $min", $/, - " increased-by: $inc", $/ )' < "/run/disk-usage.log" +[Service] +ExecStart=/bin/bash -c "chmod a+rwx -R /sys/fs/cgroup/cpu,cpuacct; chmod a+rwx -R /sys/fs/cgroup/devices" - echo "exit_handler has completed" +[Install] +WantedBy=multi-user.target +EOF - # zero free disk space - if [[ -n "$(get_metadata_attribute creating-image)" ]]; then - dd if=/dev/zero of=/zero - sync - sleep 3s - rm -f /zero + systemctl enable dataproc-cgroup-device-permissions + systemctl start dataproc-cgroup-device-permissions +} + +function setup_gpu_yarn() { + # This configuration should be run on all nodes + # regardless if they have attached GPUs + configure_yarn_gpu_resources + + # When there is no GPU, but the installer is executing on a master node: + if [[ "${gpu_count}" == "0" ]] ; then + if [[ "${ROLE}" == "Master" ]]; then + configure_yarn_nodemanager + fi + return 0 fi - return 0 + install_nvidia_container_toolkit + configure_yarn_nodemanager_gpu + configure_gpu_script + configure_gpu_isolation } -function set_proxy(){ - export METADATA_HTTP_PROXY="$(get_metadata_attribute http-proxy)" - export http_proxy="${METADATA_HTTP_PROXY}" - export https_proxy="${METADATA_HTTP_PROXY}" - export HTTP_PROXY="${METADATA_HTTP_PROXY}" - export HTTPS_PROXY="${METADATA_HTTP_PROXY}" - export no_proxy=metadata.google.internal,169.254.169.254 - export NO_PROXY=metadata.google.internal,169.254.169.254 -} -function mount_ramdisk(){ - local free_mem - free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" - if [[ ${free_mem} -lt 10500000 ]]; then return 0 ; fi +function install_spark_rapids() { + # Update SPARK RAPIDS config + local DEFAULT_SPARK_RAPIDS_VERSION="24.08.1" + local DEFAULT_XGBOOST_VERSION="1.7.6" # 2.1.3 - # Write to a ramdisk instead of churning the persistent disk + # https://mvnrepository.com/artifact/ml.dmlc/xgboost4j-spark-gpu + local -r scala_ver="2.12" - tmpdir="/mnt/shm" - mkdir -p "${tmpdir}" - mount -t tmpfs tmpfs "${tmpdir}" + if [[ "${DATAPROC_IMAGE_VERSION}" == "2.0" ]] ; then + local DEFAULT_SPARK_RAPIDS_VERSION="23.08.2" # Final release to support spark 3.1.3 + fi - # Clear pip cache - # TODO: make this conditional on which OSs have pip without cache purge - pip cache purge || echo "unable to purge pip cache" + readonly SPARK_RAPIDS_VERSION=$(get_metadata_attribute 'spark-rapids-version' ${DEFAULT_SPARK_RAPIDS_VERSION}) + readonly XGBOOST_VERSION=$(get_metadata_attribute 'xgboost-version' ${DEFAULT_XGBOOST_VERSION}) - # Download pip packages to tmpfs - pip config set global.cache-dir "${tmpdir}" || echo "unable to set global.cache-dir" + local -r rapids_repo_url='https://repo1.maven.org/maven2/ai/rapids' + local -r nvidia_repo_url='https://repo1.maven.org/maven2/com/nvidia' + local -r dmlc_repo_url='https://repo.maven.apache.org/maven2/ml/dmlc' - # Download OS packages to tmpfs - if is_debuntu ; then - mount -t tmpfs tmpfs /var/cache/apt/archives - else - mount -t tmpfs tmpfs /var/cache/dnf - fi -} + local jar_basename -function prepare_to_install(){ - nvsmi_works="0" - readonly bdcfg="/usr/local/bin/bdconfig" - tmpdir=/tmp/ - if ! is_debuntu && ! is_rocky ; then - echo "Unsupported OS: '$(os_name)'" - exit 1 - fi + jar_basename="xgboost4j-spark-gpu_${scala_ver}-${XGBOOST_VERSION}.jar" + cache_fetched_package "${dmlc_repo_url}/xgboost4j-spark-gpu_${scala_ver}/${XGBOOST_VERSION}/${jar_basename}" \ + "${pkg_bucket}/xgboost4j-spark-gpu_${scala_ver}/${XGBOOST_VERSION}/${jar_basename}" \ + "/usr/lib/spark/jars/${jar_basename}" - repair_old_backports + jar_basename="xgboost4j-gpu_${scala_ver}-${XGBOOST_VERSION}.jar" + cache_fetched_package "${dmlc_repo_url}/xgboost4j-gpu_${scala_ver}/${XGBOOST_VERSION}/${jar_basename}" \ + "${pkg_bucket}/xgboost4j-gpu_${scala_ver}/${XGBOOST_VERSION}/${jar_basename}" \ + "/usr/lib/spark/jars/${jar_basename}" - export DEBIAN_FRONTEND=noninteractive + jar_basename="rapids-4-spark_${scala_ver}-${SPARK_RAPIDS_VERSION}.jar" + cache_fetched_package "${nvidia_repo_url}/rapids-4-spark_${scala_ver}/${SPARK_RAPIDS_VERSION}/${jar_basename}" \ + "${pkg_bucket}/rapids-4-spark_${scala_ver}/${SPARK_RAPIDS_VERSION}/${jar_basename}" \ + "/usr/lib/spark/jars/${jar_basename}" +} - trap exit_handler EXIT - mount_ramdisk - install_log="${tmpdir}/install.log" - set_proxy +function main() { + install_gpu_driver_and_cuda - if is_debuntu ; then - clean_up_sources_lists - apt-get update -qq - apt-get -y clean - sleep 5s - apt-get -y -qq autoremove - if ge_debian12 ; then - apt-mark unhold systemd libsystemd0 ; fi + #Install GPU metrics collection in Stackdriver if needed + if [[ "${INSTALL_GPU_AGENT}" == "true" ]]; then + install_gpu_agent +# install_gpu_monitoring_agent + echo 'GPU metrics agent successfully deployed.' else - dnf clean all + echo 'GPU metrics agent has not been installed.' fi + configure_gpu_exclusive_mode - # zero free disk space - if [[ -n "$(get_metadata_attribute creating-image)" ]]; then ( set +e - time dd if=/dev/zero of=/zero status=none ; sync ; sleep 3s ; rm -f /zero - ) fi + setup_gpu_yarn + + echo "yarn setup complete" + + if ( test -v CUDNN_VERSION && [[ -n "${CUDNN_VERSION}" ]] ) ; then + install_nvidia_nccl + install_nvidia_cudnn + fi + + if [[ "${RAPIDS_RUNTIME}" == "SPARK" ]]; then + install_spark_rapids + configure_gpu_script + echo "RAPIDS initialized with Spark runtime" + elif [[ "${RAPIDS_RUNTIME}" == "DASK" ]]; then + # we are not currently tooled for installing dask in this action. + echo "RAPIDS recognizes DASK runtime - currently supported using dask/dask.sh or rapids/rapids.sh" + else + echo "Unrecognized RAPIDS Runtime: ${RAPIDS_RUNTIME}" + fi - configure_dkms_certs + echo "main complete" + return 0 +} - install_dependencies +function exit_handler() { + set +e + gpu_install_exit_handler + gpu_exit_handler + pip_exit_handler + yarn_exit_handler + common_exit_handler + return 0 +} - # Monitor disk usage in a screen session - df / > "/run/disk-usage.log" - touch "/run/keep-running-df" - screen -d -m -US keep-running-df \ - bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done" +function prepare_to_install(){ + prepare_common_env + prepare_pip_env + prepare_gpu_env + prepare_gpu_install_env + trap exit_handler EXIT } prepare_to_install From 10ceea0e0d72b520c032eae7d66a769cbf46ec6e Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 21:21:47 -0800 Subject: [PATCH 11/15] printing the time of the connection failure before retrying --- templates/dask/util_functions | 1 + 1 file changed, 1 insertion(+) diff --git a/templates/dask/util_functions b/templates/dask/util_functions index ce6964e94..17c17479c 100644 --- a/templates/dask/util_functions +++ b/templates/dask/util_functions @@ -150,6 +150,7 @@ function start_systemd_dask_service() { # Pause while scheduler comes online retries=30 while ! nc -vz "${MASTER}" 8786 ; do + date sleep 3s ((retries--)) if [[ "${retries}" == "0" ]] ; then echo "dask scheduler unreachable" ; exit 1 ; fi From 24623a672db9f1fc5392c666a2e8fdd2d1cb055f Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 21:31:41 -0800 Subject: [PATCH 12/15] provide more leeway for slow dask scheduler startup --- templates/dask/util_functions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/dask/util_functions b/templates/dask/util_functions index 17c17479c..f91c195db 100644 --- a/templates/dask/util_functions +++ b/templates/dask/util_functions @@ -151,7 +151,7 @@ function start_systemd_dask_service() { retries=30 while ! nc -vz "${MASTER}" 8786 ; do date - sleep 3s + sleep 7s ((retries--)) if [[ "${retries}" == "0" ]] ; then echo "dask scheduler unreachable" ; exit 1 ; fi done From 22318dde4a37eefbb204efd9c8f560d03fe7d16b Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 21:51:32 -0800 Subject: [PATCH 13/15] correct skips --- gpu/test_gpu.py | 78 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index f8438915f..e83699e7e 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -64,8 +64,14 @@ def verify_instance_spark(self): def test_install_gpu_default_agent(self, configuration, machine_suffixes, master_accelerator, worker_accelerator, driver_provider): - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() >= pkg_resources.parse_version("2.2"): - self.skipTest("GPU drivers are currently FTBFS on Rocky 9 ; base dataproc image out of date") + self.skipTest("No need to regularly test installing the agent on its own cluster ; this is exercised elsewhere") + + if configuration == 'SINGLE' \ + and self.getImageOs() == 'rocky' \ + and self.getImageVersion() <= pkg_resources.parse_version("2.1"): + # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') + unittest.expectedFailure(self) + self.skipTest("known to fail") metadata = None if driver_provider is not None: @@ -94,8 +100,12 @@ def test_install_gpu_without_agent(self, configuration, machine_suffixes, self.skipTest("No need to regularly test not installing the agent") - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() >= pkg_resources.parse_version("2.2"): - self.skipTest("GPU drivers are currently FTBFS on Rocky 9 ; base dataproc image out of date") + if configuration == 'SINGLE' \ + and self.getImageOs() == 'rocky' \ + and self.getImageVersion() <= pkg_resources.parse_version("2.1"): + # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') + unittest.expectedFailure(self) + self.skipTest("known to fail") metadata = "install-gpu-agent=false" if driver_provider is not None: @@ -121,8 +131,13 @@ def test_install_gpu_without_agent(self, configuration, machine_suffixes, def test_install_gpu_with_agent(self, configuration, machine_suffixes, master_accelerator, worker_accelerator, driver_provider): - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() >= pkg_resources.parse_version("2.2"): - self.skipTest("GPU drivers are currently FTBFS on Rocky 9 ; base dataproc image out of date") + self.skipTest("No need to regularly test installing the agent on its own cluster ; this is exercised elsewhere") + + if configuration == 'KERBEROS' \ + and self.getImageVersion() <= pkg_resources.parse_version("2.1"): + # ('KERBEROS fails with image version <= 2.1') + unittest.expectedFailure(self) + self.skipTest("known to fail") metadata = "install-gpu-agent=true" if driver_provider is not None: @@ -159,15 +174,22 @@ def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes, and ( self.getImageOs() == 'debian' and self.getImageVersion() >= pkg_resources.parse_version("2.2") ): self.skipTest("CUDA == 12.0 not supported on debian 12") - if pkg_resources.parse_version(cuda_version) > pkg_resources.parse_version("12.0") \ + if pkg_resources.parse_version(cuda_version) > pkg_resources.parse_version("12.4") \ and ( ( self.getImageOs() == 'ubuntu' and self.getImageVersion() <= pkg_resources.parse_version("2.0") ) or \ ( self.getImageOs() == 'debian' and self.getImageVersion() <= pkg_resources.parse_version("2.1") ) ): - self.skipTest("CUDA > 12.0 not supported on older debian/ubuntu releases") + self.skipTest("CUDA > 12.4 not supported on older debian/ubuntu releases") - if pkg_resources.parse_version(cuda_version) < pkg_resources.parse_version("12.0") \ - and ( self.getImageOs() == 'debian' or self.getImageOs() == 'rocky' ) \ + if pkg_resources.parse_version(cuda_version) <= pkg_resources.parse_version("12.0") \ and self.getImageVersion() >= pkg_resources.parse_version("2.2"): - self.skipTest("CUDA < 12 not supported on Debian >= 12, Rocky >= 9") + self.skipTest( "Kernel driver FTBFS with older CUDA versions on image version >= 2.2" ) + + if configuration == 'SINGLE' \ + and self.getImageOs() == 'rocky' \ + and self.getImageVersion() <= pkg_resources.parse_version("2.1"): + # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') + unittest.expectedFailure(self) + self.skipTest("known to fail") + metadata = "gpu-driver-provider=NVIDIA,cuda-version={}".format(cuda_version) self.createCluster( @@ -236,12 +258,13 @@ def test_install_gpu_with_mig(self, configuration, machine_suffixes, ) def test_gpu_allocation(self, configuration, master_accelerator, worker_accelerator, driver_provider): - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() >= pkg_resources.parse_version("2.2"): - self.skipTest("GPU drivers are currently FTBFS on Rocky 9 ; base dataproc image out of date") - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() <= pkg_resources.parse_version("2.1") \ - and configuration == 'SINGLE': - self.skipTest("2.1-rocky8 and 2.0-rocky8 single instance tests are known to fail with errors about nodes_include being empty") + if configuration == 'SINGLE' \ + and self.getImageOs() == 'rocky' \ + and self.getImageVersion() <= pkg_resources.parse_version("2.1"): + # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') + unittest.expectedFailure(self) + self.skipTest("known to fail") metadata = None if driver_provider is not None: @@ -270,16 +293,21 @@ def test_install_gpu_cuda_nvidia_with_spark_job(self, configuration, machine_suf master_accelerator, worker_accelerator, cuda_version): - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() >= pkg_resources.parse_version("2.2"): - self.skipTest("GPU drivers are currently FTBFS on Rocky 9 ; base dataproc image out of date") - - if ( self.getImageOs() == 'rocky' ) and self.getImageVersion() <= pkg_resources.parse_version("2.1") \ - and configuration == 'SINGLE': - self.skipTest("2.1-rocky8 and 2.0-rocky8 single instance tests fail with errors about nodes_include being empty") + if pkg_resources.parse_version(cuda_version) > pkg_resources.parse_version("12.4") \ + and ( ( self.getImageOs() == 'ubuntu' and self.getImageVersion() <= pkg_resources.parse_version("2.0") ) or \ + ( self.getImageOs() == 'debian' and self.getImageVersion() <= pkg_resources.parse_version("2.1") ) ): + self.skipTest("CUDA > 12.4 not supported on older debian/ubuntu releases") - if pkg_resources.parse_version(cuda_version) == pkg_resources.parse_version("12.0") \ - and ( self.getImageOs() == 'debian' and self.getImageVersion() >= pkg_resources.parse_version("2.2") ): - self.skipTest("CUDA == 12.0 not supported on debian 12") + if pkg_resources.parse_version(cuda_version) <= pkg_resources.parse_version("12.0") \ + and self.getImageVersion() >= pkg_resources.parse_version("2.2"): + self.skipTest( "Kernel driver FTBFS with older CUDA versions on image version >= 2.2" ) + + if configuration == 'SINGLE' \ + and self.getImageOs() == 'rocky' \ + and self.getImageVersion() <= pkg_resources.parse_version("2.1"): + # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') + unittest.expectedFailure(self) + self.skipTest("known to fail") if pkg_resources.parse_version(cuda_version) > pkg_resources.parse_version("12.0") \ and ( ( self.getImageOs() == 'ubuntu' and self.getImageVersion() <= pkg_resources.parse_version("2.0") ) or \ From 138e26cfb1b9fb3c711a8869dbaad7143af764c5 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 7 Jan 2025 22:14:28 -0800 Subject: [PATCH 14/15] removed expectedFailure calls since unittest import was removed --- gpu/test_gpu.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index e83699e7e..395ddff0f 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -70,7 +70,6 @@ def test_install_gpu_default_agent(self, configuration, machine_suffixes, and self.getImageOs() == 'rocky' \ and self.getImageVersion() <= pkg_resources.parse_version("2.1"): # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') - unittest.expectedFailure(self) self.skipTest("known to fail") metadata = None @@ -104,7 +103,6 @@ def test_install_gpu_without_agent(self, configuration, machine_suffixes, and self.getImageOs() == 'rocky' \ and self.getImageVersion() <= pkg_resources.parse_version("2.1"): # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') - unittest.expectedFailure(self) self.skipTest("known to fail") metadata = "install-gpu-agent=false" @@ -136,7 +134,6 @@ def test_install_gpu_with_agent(self, configuration, machine_suffixes, if configuration == 'KERBEROS' \ and self.getImageVersion() <= pkg_resources.parse_version("2.1"): # ('KERBEROS fails with image version <= 2.1') - unittest.expectedFailure(self) self.skipTest("known to fail") metadata = "install-gpu-agent=true" @@ -187,7 +184,6 @@ def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes, and self.getImageOs() == 'rocky' \ and self.getImageVersion() <= pkg_resources.parse_version("2.1"): # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') - unittest.expectedFailure(self) self.skipTest("known to fail") @@ -263,7 +259,6 @@ def test_gpu_allocation(self, configuration, master_accelerator, and self.getImageOs() == 'rocky' \ and self.getImageVersion() <= pkg_resources.parse_version("2.1"): # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') - unittest.expectedFailure(self) self.skipTest("known to fail") metadata = None @@ -306,7 +301,6 @@ def test_install_gpu_cuda_nvidia_with_spark_job(self, configuration, machine_suf and self.getImageOs() == 'rocky' \ and self.getImageVersion() <= pkg_resources.parse_version("2.1"): # ('2.1-rocky8 and 2.0-rocky8 tests are known to fail in SINGLE configuration with errors about nodes_include being empty') - unittest.expectedFailure(self) self.skipTest("known to fail") if pkg_resources.parse_version(cuda_version) > pkg_resources.parse_version("12.0") \ From 1e702a6d9499d174355bcbfcfe79f98eeb2f6963 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Wed, 8 Jan 2025 00:04:22 -0800 Subject: [PATCH 15/15] increase CPU count on slow cluster create to help ubuntu18 through the finish line --- gpu/test_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py index 395ddff0f..8237359e4 100644 --- a/gpu/test_gpu.py +++ b/gpu/test_gpu.py @@ -269,7 +269,7 @@ def test_gpu_allocation(self, configuration, master_accelerator, configuration, self.INIT_ACTIONS, metadata=metadata, - machine_type="n1-highmem-8", + machine_type="n1-standard-32", master_accelerator=master_accelerator, worker_accelerator=worker_accelerator, boot_disk_size="50GB",