Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 38fcd5f

Browse files
committedMay 21, 2024
Merge branch 'r2.0.0rc0' of github.com:NVIDIA/NeMo into r2.0.0rc0
2 parents fd36bcc + 2a2d985 commit 38fcd5f

File tree

119 files changed

+1008
-556
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+1008
-556
lines changed
 

‎.github/workflows/cicd-main.yml

+83-82
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ jobs:
132132
apt-get update && apt-get install libsox-fmt-all -y && \
133133
popd
134134
135-
# AMMO installation
136-
pip install nvidia-ammo~=0.9.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
135+
# ModelOpt installation
136+
pip install nvidia-modelopt[torch]~=0.11.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
137137
138138
# PyTorch Lightning version
139139
python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
@@ -394,7 +394,7 @@ jobs:
394394
- name: Checkout repository
395395
uses: actions/checkout@v4
396396
- run: |
397-
python examples/nlp/language_modeling/megatron_llama_quantization.py \
397+
python examples/nlp/language_modeling/megatron_quantization.py \
398398
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
399399
quantization.algorithm=null \
400400
model_save=/home/TestData/nlp/megatron_llama/ci_baseline
@@ -403,69 +403,70 @@ jobs:
403403
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
404404
if: "failure()"
405405

406-
# L2_PTQ_Llama2_FP8:
407-
# needs: [cicd-test-container-setup]
408-
# runs-on: self-hosted-azure
409-
# timeout-minutes: 10
410-
# container:
411-
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
412-
# options:
413-
# # --user 0:128
414-
# --device=/dev/nvidia0
415-
# --gpus all
416-
# --shm-size=8g
417-
# --env TRANSFORMERS_OFFLINE=0
418-
# --env HYDRA_FULL_ERROR=1
419-
# --volume /mnt/datadrive/TestData:/home/TestData
420-
# steps:
421-
# - name: Checkout repository
422-
# uses: actions/checkout@v4
423-
# - run: |
424-
# python examples/nlp/language_modeling/megatron_llama_quantization.py \
425-
# model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
426-
# tensor_model_parallel_size=2 \
427-
# trainer.devices=2 \
428-
# quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
429-
# quantization.algorithm=fp8 \
430-
# quantization.num_calib_size=8 \
431-
# inference.batch_size=2 \
432-
# export.inference_tensor_parallel=2 \
433-
# model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
434-
435-
# rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
436-
# - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
437-
# if: "failure()"
438-
439-
# L2_PTQ_Llama2_INT8_SQ:
440-
# needs: [cicd-test-container-setup]
441-
# runs-on: self-hosted-azure
442-
# timeout-minutes: 10
443-
# container:
444-
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
445-
# options:
446-
# # --user 0:128
447-
# --device=/dev/nvidia0
448-
# --gpus all
449-
# --shm-size=8g
450-
# --env TRANSFORMERS_OFFLINE=0
451-
# --env HYDRA_FULL_ERROR=1
452-
# --volume /mnt/datadrive/TestData:/home/TestData
453-
# steps:
454-
# - name: Checkout repository
455-
# uses: actions/checkout@v4
456-
# - run: |
457-
# python examples/nlp/language_modeling/megatron_llama_quantization.py \
458-
# model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
459-
# quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
460-
# quantization.algorithm=int8_sq \
461-
# quantization.num_calib_size=8 \
462-
# inference.batch_size=2 \
463-
# model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
464-
465-
# rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
466-
# - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
467-
# if: "failure()"
468-
406+
L2_PTQ_Llama2_FP8:
407+
needs: [cicd-test-container-setup]
408+
runs-on: self-hosted-azure
409+
timeout-minutes: 10
410+
container:
411+
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
412+
options:
413+
# --user 0:128
414+
--device=/dev/nvidia0
415+
--gpus all
416+
--shm-size=8g
417+
--env TRANSFORMERS_OFFLINE=0
418+
--env HYDRA_FULL_ERROR=1
419+
--volume /mnt/datadrive/TestData:/home/TestData
420+
steps:
421+
- name: Checkout repository
422+
uses: actions/checkout@v4
423+
- run: |
424+
python examples/nlp/language_modeling/megatron_quantization.py \
425+
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
426+
tensor_model_parallel_size=2 \
427+
trainer.devices=2 \
428+
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
429+
quantization.algorithm=fp8 \
430+
quantization.num_calib_size=8 \
431+
inference.batch_size=2 \
432+
export.inference_tensor_parallel=2 \
433+
model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
434+
435+
rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
436+
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
437+
if: "failure()"
438+
439+
L2_PTQ_Llama2_INT8_SQ:
440+
needs: [cicd-test-container-setup]
441+
runs-on: self-hosted-azure
442+
timeout-minutes: 10
443+
container:
444+
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
445+
options:
446+
# --user 0:128
447+
--device=/dev/nvidia0
448+
--gpus all
449+
--shm-size=8g
450+
--env TRANSFORMERS_OFFLINE=0
451+
--env HYDRA_FULL_ERROR=1
452+
--volume /mnt/datadrive/TestData:/home/TestData
453+
steps:
454+
- name: Checkout repository
455+
uses: actions/checkout@v4
456+
- run: |
457+
python examples/nlp/language_modeling/megatron_quantization.py \
458+
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
459+
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
460+
quantization.algorithm=int8_sq \
461+
quantization.num_calib_size=8 \
462+
inference.batch_size=2 \
463+
model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
464+
465+
rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
466+
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
467+
if: "failure()"
468+
469+
# TODO: investigate int4_awq stuck issues and restore the test
469470
#L2_PTQ_Llama2_INT4_AWQ:
470471
# needs: [cicd-test-container-setup]
471472
# runs-on: self-hosted-azure
@@ -484,7 +485,7 @@ jobs:
484485
# - name: Checkout repository
485486
# uses: actions/checkout@v4
486487
# - run: |
487-
# python examples/nlp/language_modeling/megatron_llama_quantization.py \
488+
# python examples/nlp/language_modeling/megatron_quantization.py \
488489
# model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
489490
# tensor_model_parallel_size=1 \
490491
# trainer.devices=1 \
@@ -2268,8 +2269,8 @@ jobs:
22682269
# stage('L2: Model Parallel Size 2 Megatron Text Classification') {
22692270
# when {
22702271
# anyOf{
2271-
# branch 'main'
2272-
# changeRequest target: 'main'
2272+
# branch 'r2.0.0rc0'
2273+
# changeRequest target: 'r2.0.0rc0'
22732274
# }
22742275
# }
22752276
# failFast true
@@ -2297,8 +2298,8 @@ jobs:
22972298
# stage('L2: Model Parallel Size 2 Megatron Autoresume') {
22982299
# when {
22992300
# anyOf{
2300-
# branch 'main'
2301-
# changeRequest target: 'main'
2301+
# branch 'r2.0.0rc0'
2302+
# changeRequest target: 'r2.0.0rc0'
23022303
# }
23032304
# }
23042305
# failFast true
@@ -2328,8 +2329,8 @@ jobs:
23282329
# stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') {
23292330
# when {
23302331
# anyOf{
2331-
# branch 'main'
2332-
# changeRequest target: 'main'
2332+
# branch 'r2.0.0rc0'
2333+
# changeRequest target: 'r2.0.0rc0'
23332334
# }
23342335
# }
23352336
# failFast true
@@ -2349,8 +2350,8 @@ jobs:
23492350
# stage('L2: Model Parallel Size 2 Megatron Train from .nemo') {
23502351
# when {
23512352
# anyOf{
2352-
# branch 'main'
2353-
# changeRequest target: 'main'
2353+
# branch 'r2.0.0rc0'
2354+
# changeRequest target: 'r2.0.0rc0'
23542355
# }
23552356
# }
23562357
# failFast true
@@ -3464,8 +3465,8 @@ jobs:
34643465
# stage('L2: NMT Bottleneck Fallback') {
34653466
# when {
34663467
# anyOf {
3467-
# branch 'main'
3468-
# changeRequest target: 'main'
3468+
# branch 'r2.0.0rc0'
3469+
# changeRequest target: 'r2.0.0rc0'
34693470
# }
34703471
# }
34713472
# failFast true
@@ -3510,8 +3511,8 @@ jobs:
35103511
# stage('L2: NMT Bottleneck Architecture') {
35113512
# when {
35123513
# anyOf {
3513-
# branch 'main'
3514-
# changeRequest target: 'main'
3514+
# branch 'r2.0.0rc0'
3515+
# changeRequest target: 'r2.0.0rc0'
35153516
# }
35163517
# }
35173518
# failFast true
@@ -3591,8 +3592,8 @@ jobs:
35913592
# stage('L2: NMT Bottleneck LVM') {
35923593
# when {
35933594
# anyOf {
3594-
# branch 'main'
3595-
# changeRequest target: 'main'
3595+
# branch 'r2.0.0rc0'
3596+
# changeRequest target: 'r2.0.0rc0'
35963597
# }
35973598
# }
35983599
# failFast true
@@ -6484,7 +6485,7 @@ jobs:
64846485
Speech_Checkpoints_tests:
64856486
needs: [cicd-test-container-setup]
64866487
runs-on: self-hosted-azure
6487-
timeout-minutes: 10
6488+
timeout-minutes: 20
64886489
container:
64896490
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
64906491
options:

‎Dockerfile

-2
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,6 @@ RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-chec
133133
RUN pip install flash-attn
134134
# install numba for latest containers
135135
RUN pip install numba>=0.57.1
136-
# install ammo
137-
RUN pip install nvidia-ammo~=0.9.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
138136

139137
# copy nemo source into a scratch image
140138
FROM scratch as nemo-src

0 commit comments

Comments
 (0)
Please sign in to comment.