@@ -132,8 +132,8 @@ jobs:
132
132
apt-get update && apt-get install libsox-fmt-all -y && \
133
133
popd
134
134
135
- # AMMO installation
136
- pip install nvidia-ammo ~=0.9 .0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
135
+ # ModelOpt installation
136
+ pip install nvidia-modelopt[torch] ~=0.11 .0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
137
137
138
138
# PyTorch Lightning version
139
139
python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
@@ -394,7 +394,7 @@ jobs:
394
394
- name : Checkout repository
395
395
uses : actions/checkout@v4
396
396
- run : |
397
- python examples/nlp/language_modeling/megatron_llama_quantization .py \
397
+ python examples/nlp/language_modeling/megatron_quantization .py \
398
398
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
399
399
quantization.algorithm=null \
400
400
model_save=/home/TestData/nlp/megatron_llama/ci_baseline
@@ -403,69 +403,70 @@ jobs:
403
403
- uses : " NVIDIA/NeMo/.github/actions/cancel-workflow@main"
404
404
if : " failure()"
405
405
406
- # L2_PTQ_Llama2_FP8:
407
- # needs: [cicd-test-container-setup]
408
- # runs-on: self-hosted-azure
409
- # timeout-minutes: 10
410
- # container:
411
- # image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
412
- # options:
413
- # # --user 0:128
414
- # --device=/dev/nvidia0
415
- # --gpus all
416
- # --shm-size=8g
417
- # --env TRANSFORMERS_OFFLINE=0
418
- # --env HYDRA_FULL_ERROR=1
419
- # --volume /mnt/datadrive/TestData:/home/TestData
420
- # steps:
421
- # - name: Checkout repository
422
- # uses: actions/checkout@v4
423
- # - run: |
424
- # python examples/nlp/language_modeling/megatron_llama_quantization.py \
425
- # model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
426
- # tensor_model_parallel_size=2 \
427
- # trainer.devices=2 \
428
- # quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
429
- # quantization.algorithm=fp8 \
430
- # quantization.num_calib_size=8 \
431
- # inference.batch_size=2 \
432
- # export.inference_tensor_parallel=2 \
433
- # model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
434
-
435
- # rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
436
- # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
437
- # if: "failure()"
438
-
439
- # L2_PTQ_Llama2_INT8_SQ:
440
- # needs: [cicd-test-container-setup]
441
- # runs-on: self-hosted-azure
442
- # timeout-minutes: 10
443
- # container:
444
- # image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
445
- # options:
446
- # # --user 0:128
447
- # --device=/dev/nvidia0
448
- # --gpus all
449
- # --shm-size=8g
450
- # --env TRANSFORMERS_OFFLINE=0
451
- # --env HYDRA_FULL_ERROR=1
452
- # --volume /mnt/datadrive/TestData:/home/TestData
453
- # steps:
454
- # - name: Checkout repository
455
- # uses: actions/checkout@v4
456
- # - run: |
457
- # python examples/nlp/language_modeling/megatron_llama_quantization.py \
458
- # model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
459
- # quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
460
- # quantization.algorithm=int8_sq \
461
- # quantization.num_calib_size=8 \
462
- # inference.batch_size=2 \
463
- # model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
464
-
465
- # rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
466
- # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
467
- # if: "failure()"
468
-
406
+ L2_PTQ_Llama2_FP8 :
407
+ needs : [cicd-test-container-setup]
408
+ runs-on : self-hosted-azure
409
+ timeout-minutes : 10
410
+ container :
411
+ image : nemoci.azurecr.io/nemo_container_${{ github.run_id }}
412
+ options :
413
+ # --user 0:128
414
+ --device=/dev/nvidia0
415
+ --gpus all
416
+ --shm-size=8g
417
+ --env TRANSFORMERS_OFFLINE=0
418
+ --env HYDRA_FULL_ERROR=1
419
+ --volume /mnt/datadrive/TestData:/home/TestData
420
+ steps :
421
+ - name : Checkout repository
422
+ uses : actions/checkout@v4
423
+ - run : |
424
+ python examples/nlp/language_modeling/megatron_quantization.py \
425
+ model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
426
+ tensor_model_parallel_size=2 \
427
+ trainer.devices=2 \
428
+ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
429
+ quantization.algorithm=fp8 \
430
+ quantization.num_calib_size=8 \
431
+ inference.batch_size=2 \
432
+ export.inference_tensor_parallel=2 \
433
+ model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
434
+
435
+ rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
436
+ - uses : " NVIDIA/NeMo/.github/actions/cancel-workflow@main"
437
+ if : " failure()"
438
+
439
+ L2_PTQ_Llama2_INT8_SQ :
440
+ needs : [cicd-test-container-setup]
441
+ runs-on : self-hosted-azure
442
+ timeout-minutes : 10
443
+ container :
444
+ image : nemoci.azurecr.io/nemo_container_${{ github.run_id }}
445
+ options :
446
+ # --user 0:128
447
+ --device=/dev/nvidia0
448
+ --gpus all
449
+ --shm-size=8g
450
+ --env TRANSFORMERS_OFFLINE=0
451
+ --env HYDRA_FULL_ERROR=1
452
+ --volume /mnt/datadrive/TestData:/home/TestData
453
+ steps :
454
+ - name : Checkout repository
455
+ uses : actions/checkout@v4
456
+ - run : |
457
+ python examples/nlp/language_modeling/megatron_quantization.py \
458
+ model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
459
+ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
460
+ quantization.algorithm=int8_sq \
461
+ quantization.num_calib_size=8 \
462
+ inference.batch_size=2 \
463
+ model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
464
+
465
+ rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
466
+ - uses : " NVIDIA/NeMo/.github/actions/cancel-workflow@main"
467
+ if : " failure()"
468
+
469
+ # TODO: investigate int4_awq stuck issues and restore the test
469
470
# L2_PTQ_Llama2_INT4_AWQ:
470
471
# needs: [cicd-test-container-setup]
471
472
# runs-on: self-hosted-azure
@@ -484,7 +485,7 @@ jobs:
484
485
# - name: Checkout repository
485
486
# uses: actions/checkout@v4
486
487
# - run: |
487
- # python examples/nlp/language_modeling/megatron_llama_quantization .py \
488
+ # python examples/nlp/language_modeling/megatron_quantization .py \
488
489
# model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
489
490
# tensor_model_parallel_size=1 \
490
491
# trainer.devices=1 \
@@ -2268,8 +2269,8 @@ jobs:
2268
2269
# stage('L2: Model Parallel Size 2 Megatron Text Classification') {
2269
2270
# when {
2270
2271
# anyOf{
2271
- # branch 'main '
2272
- # changeRequest target: 'main '
2272
+ # branch 'r2.0.0rc0 '
2273
+ # changeRequest target: 'r2.0.0rc0 '
2273
2274
# }
2274
2275
# }
2275
2276
# failFast true
@@ -2297,8 +2298,8 @@ jobs:
2297
2298
# stage('L2: Model Parallel Size 2 Megatron Autoresume') {
2298
2299
# when {
2299
2300
# anyOf{
2300
- # branch 'main '
2301
- # changeRequest target: 'main '
2301
+ # branch 'r2.0.0rc0 '
2302
+ # changeRequest target: 'r2.0.0rc0 '
2302
2303
# }
2303
2304
# }
2304
2305
# failFast true
@@ -2328,8 +2329,8 @@ jobs:
2328
2329
# stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') {
2329
2330
# when {
2330
2331
# anyOf{
2331
- # branch 'main '
2332
- # changeRequest target: 'main '
2332
+ # branch 'r2.0.0rc0 '
2333
+ # changeRequest target: 'r2.0.0rc0 '
2333
2334
# }
2334
2335
# }
2335
2336
# failFast true
@@ -2349,8 +2350,8 @@ jobs:
2349
2350
# stage('L2: Model Parallel Size 2 Megatron Train from .nemo') {
2350
2351
# when {
2351
2352
# anyOf{
2352
- # branch 'main '
2353
- # changeRequest target: 'main '
2353
+ # branch 'r2.0.0rc0 '
2354
+ # changeRequest target: 'r2.0.0rc0 '
2354
2355
# }
2355
2356
# }
2356
2357
# failFast true
@@ -3464,8 +3465,8 @@ jobs:
3464
3465
# stage('L2: NMT Bottleneck Fallback') {
3465
3466
# when {
3466
3467
# anyOf {
3467
- # branch 'main '
3468
- # changeRequest target: 'main '
3468
+ # branch 'r2.0.0rc0 '
3469
+ # changeRequest target: 'r2.0.0rc0 '
3469
3470
# }
3470
3471
# }
3471
3472
# failFast true
@@ -3510,8 +3511,8 @@ jobs:
3510
3511
# stage('L2: NMT Bottleneck Architecture') {
3511
3512
# when {
3512
3513
# anyOf {
3513
- # branch 'main '
3514
- # changeRequest target: 'main '
3514
+ # branch 'r2.0.0rc0 '
3515
+ # changeRequest target: 'r2.0.0rc0 '
3515
3516
# }
3516
3517
# }
3517
3518
# failFast true
@@ -3591,8 +3592,8 @@ jobs:
3591
3592
# stage('L2: NMT Bottleneck LVM') {
3592
3593
# when {
3593
3594
# anyOf {
3594
- # branch 'main '
3595
- # changeRequest target: 'main '
3595
+ # branch 'r2.0.0rc0 '
3596
+ # changeRequest target: 'r2.0.0rc0 '
3596
3597
# }
3597
3598
# }
3598
3599
# failFast true
@@ -6484,7 +6485,7 @@ jobs:
6484
6485
Speech_Checkpoints_tests :
6485
6486
needs : [cicd-test-container-setup]
6486
6487
runs-on : self-hosted-azure
6487
- timeout-minutes : 10
6488
+ timeout-minutes : 20
6488
6489
container :
6489
6490
image : nemoci.azurecr.io/nemo_container_${{ github.run_id }}
6490
6491
options :
0 commit comments