diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 7c6a3dd04658..77541caed471 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -1,182 +1,79 @@ [dev] -# Set to "huggingface", for example, if you are a huggingface developer. Default is "" partner_developer = "" -# Please only set it to true if you are preparing an EI related PR -# Do remember to revert it back to false before merging any PR (including EI dedicated PR) ei_mode = false -# Please only set it to true if you are preparing a NEURON related PR -# Do remember to revert it back to false before merging any PR (including NEURON dedicated PR) neuron_mode = false -# Please only set it to true if you are preparing a NEURONX related PR -# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR) -neuronx_mode = false -# Please only set it to true if you are preparing a GRAVITON related PR -# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR) +neuronx_mode = true graviton_mode = false -# Please only set it to true if you are preparing a ARM64 related PR -# Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) arm64_mode = false -# Please only set it to True if you are preparing a HABANA related PR -# Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false -# Please only set it to True if you are preparing a HUGGINGFACE TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including HUGGINGFACE TRCOMP dedicated PR) -# This mode is used to build TF 2.6 and PT1.11 DLC huggingface_trcomp_mode = false -# Please only set it to True if you are preparing a TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including TRCOMP dedicated PR) -# This mode is used to build PT1.12 and above DLC trcomp_mode = false -# Set deep_canary_mode to true to simulate Deep Canary Test conditions on PR for all frameworks in the -# build_frameworks list below. This will cause all image builds and non-deep-canary tests on the PR to be skipped, -# regardless of whether they are enabled or disabled below. -# Set graviton_mode/arm64_mode to true to run Deep Canaries on Graviton/ARM64 images. -# Do remember to revert it back to false before merging any PR. deep_canary_mode = false [build] -# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. -# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] - - -# By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true +build_frameworks = [ "huggingface_pytorch",] +build_training = false build_inference = true - -# Set do_build to "false" to skip builds and test the latest image built by this PR -# Note: at least one build is required to set do_build to "false" do_build = true [notify] -### Notify on test failures -### Off by default notify_test_failures = false - # Valid values: medium or high - notification_severity = "medium" +notification_severity = "medium" [test] -### On by default sanity_tests = true security_tests = true - safety_check_test = false - ecr_scan_allowlist_feature = false +safety_check_test = false +ecr_scan_allowlist_feature = false ecs_tests = true eks_tests = true ec2_tests = true -# Set it to true if you are preparing a Benchmark related PR ec2_benchmark_tests = false - -### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by -### default. If false, these types of tests will be skipped while other tests will run as usual. -### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true. -### Off by default (set to false) ec2_tests_on_heavy_instances = false -### SM specific tests -### On by default sagemaker_local_tests = true -### Set enable_ipv6 = true to run tests with IPv6-enabled resources -### Off by default (set to false) enable_ipv6 = false -### Set the VPC name to be used for IPv6 testing, this variable is empty by default -### To create an IPv6-enabled VPC and its related resources: -### 1. Follow this AWS doc: https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html#create-vpc-and-other-resources -### 2. After creating the VPC and related resources: -### a. Set 'Auto-assign IPv6 address' option to 'No' in all public subnets within the VPC -### b. Configure the default security group to allow SSH traffic using IPv4 -### -### 3. Create an EFA-enabled security group: -### a. Follow 'Step 1: Prepare an EFA-enabled security group' in: -### https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-security -### b. Configure this security group to also allow SSH traffic via IPv4 ipv6_vpc_name = "" - -# run standard sagemaker remote tests from test/sagemaker_tests sagemaker_remote_tests = true -# run efa sagemaker tests sagemaker_efa_tests = false -# run release_candidate_integration tests sagemaker_rc_tests = false -# run sagemaker benchmark tests sagemaker_benchmark_tests = false - -# SM remote EFA test instance type sagemaker_remote_efa_instance_type = "" - -# Run CI tests for nightly images -# false by default nightly_pr_test_mode = false - use_scheduler = false [buildspec_override] -# Assign the path to the required buildspec file from the deep-learning-containers folder -# For example: -# dlc-pr-tensorflow-2-habana-training = "habana/tensorflow/training/buildspec-2-10.yml" -# dlc-pr-pytorch-inference = "pytorch/inference/buildspec-1-12.yml" -# Setting the buildspec file path to "" allows the image builder to choose the default buildspec file. - -### TRAINING PR JOBS ### - -# Base dlc-pr-base = "" - -# Standard Framework Training dlc-pr-pytorch-training = "" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" - -# ARM64 Training dlc-pr-pytorch-arm64-training = "" - -# HuggingFace Training dlc-pr-huggingface-tensorflow-training = "" dlc-pr-huggingface-pytorch-training = "" - -# Training Compiler dlc-pr-huggingface-pytorch-trcomp-training = "" dlc-pr-huggingface-tensorflow-2-trcomp-training = "" dlc-pr-pytorch-trcomp-training = "" - -# Neuron Training dlc-pr-pytorch-neuron-training = "" dlc-pr-tensorflow-2-neuron-training = "" - -# Stability AI Training dlc-pr-stabilityai-pytorch-training = "" - -# Habana Training dlc-pr-pytorch-habana-training = "" dlc-pr-tensorflow-2-habana-training = "" - -### INFERENCE PR JOBS ### - -# Standard Framework Inference dlc-pr-pytorch-inference = "" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" - -# Graviton Inference dlc-pr-pytorch-graviton-inference = "" dlc-pr-tensorflow-2-graviton-inference = "" - -# ARM64 Inference dlc-pr-pytorch-arm64-inference = "" dlc-pr-tensorflow-2-arm64-inference = "" - -# Neuron Inference dlc-pr-pytorch-neuron-inference = "" dlc-pr-tensorflow-1-neuron-inference = "" dlc-pr-tensorflow-2-neuron-inference = "" - -# HuggingFace Inference dlc-pr-huggingface-tensorflow-inference = "" dlc-pr-huggingface-pytorch-inference = "" dlc-pr-huggingface-pytorch-neuron-inference = "" - -# Stability AI Inference dlc-pr-stabilityai-pytorch-inference = "" - -# EIA Inference dlc-pr-pytorch-eia-inference = "" -dlc-pr-tensorflow-2-eia-inference = "" \ No newline at end of file +dlc-pr-tensorflow-2-eia-inference = "" +dlc-pr-vllm = "" +# WARNING: Unrecognized key generated below +dlc-pr-huggingface-pytorch-neuronx-inference = "huggingface/pytorch/inference/buildspec-neuronx.yml" + diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 55ce3ca3587d..3b5d3d3874b0 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -196,6 +196,21 @@ RUN HOME_DIR=/root \ # conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya && rm -rf ${HOME_DIR}/.cache/conda +# Final security upgrade for vulnerable packages after all installations +RUN apt-get update \ + && apt-mark showhold \ + && apt-mark unhold linux-libc-dev libxml2 libsqlite3-0 || true \ + && echo "Checking available upgrades for vulnerable packages:" \ + && apt list --upgradable 2>/dev/null | grep -E "(linux-libc-dev|libxml2|libsqlite3-0)" || echo "No upgrades found in apt list" \ + && apt-cache policy linux-libc-dev libxml2 libsqlite3-0 \ + && apt-get dist-upgrade -y \ + && apt-get install -y --only-upgrade linux-libc-dev libxml2 libsqlite3-0 \ + && echo "Final package versions after upgrade:" \ + && dpkg -l | grep -E "(linux-libc-dev|libxml2|libsqlite3-0)" \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + EXPOSE 8080 8081 ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] CMD ["serve"] diff --git a/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index c98b05d4d510..ce8d4e48a25d 100644 --- a/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -59,7 +59,7 @@ RUN apt-get update \ libgstreamer-plugins-base1.0-0 \ libsoup2.4-1 \ libsqlite3-0 \ - && apt-get upgrade -y apparmor \ + && apt-get upgrade -y apparmor linux-libc-dev libxml2 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/huggingface/pytorch/training/docker/2.5/py3/cu124/Dockerfile.gpu b/huggingface/pytorch/training/docker/2.5/py3/cu124/Dockerfile.gpu index df84e95aa9b8..451a8f145eba 100644 --- a/huggingface/pytorch/training/docker/2.5/py3/cu124/Dockerfile.gpu +++ b/huggingface/pytorch/training/docker/2.5/py3/cu124/Dockerfile.gpu @@ -63,7 +63,7 @@ ENV HF_HUB_ENABLE_HF_TRANSFER="1" RUN apt-get update \ # TODO: Remove upgrade statements once packages are updated in base image - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup libkrb5-3 linux-libc-dev libsqlite3-0 \ + && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup libkrb5-3 linux-libc-dev libsqlite3-0 libxml2 \ && apt-get install -y git git-lfs wget tar \ && wget https://go.dev/dl/go1.24.2.linux-amd64.tar.gz \ && rm -rf /usr/local/go \