@@ -378,38 +378,39 @@ module load intelmpi && mpirun --help | grep '#{node['cfncluster']['intelmpi']['
378
378
TESTNVIDIA
379
379
end
380
380
381
- bash 'test CUDA install' do
382
- cwd Chef ::Config [ :file_cache_path ]
383
- code <<-TESTCUDA
384
- has_gpu=$(lspci | grep -o "NVIDIA")
385
- if [ -z "$has_gpu" ]; then
386
- echo "No GPU detected, no test needed."
387
- exit 0
388
- fi
389
-
390
- set -e
391
- cuda_ver="#{ node [ 'cfncluster' ] [ 'nvidia' ] [ 'cuda_version' ] } "
392
- # Test CUDA installation
393
- echo "Testing CUDA install with nvcc..."
394
- export PATH=/usr/local/cuda-$cuda_ver/bin:$PATH
395
- export LD_LIBRARY_PATH=/usr/local/cuda-$cuda_ver/lib64:$LD_LIBRARY_PATH
396
- # grep CUDA version from nvcc output. If CUDA is not installed nvcc command will fail
397
- cuda_output=$(nvcc -V | grep -E -o "release [0-9]+.[0-9]+")
398
- if [ "$cuda_output" != "release $cuda_ver" ]; then
399
- echo "CUDA installed incorrectly! Installed $cuda_output but expected $cuda_ver"
400
- exit 1
401
- else
402
- echo "CUDA nvcc test passed, $cuda_output"
403
- fi
404
-
405
- # Test deviceQuery
406
- echo "Testing CUDA install with deviceQuery..."
407
- /usr/local/cuda-$cuda_ver/extras/demo_suite/deviceQuery | grep -o "Result = PASS"
408
- echo "CUDA deviceQuery test passed"
409
- echo "Correctly installed CUDA $cuda_output"
410
- TESTCUDA
381
+ unless node [ 'cfncluster' ] [ 'cfn_base_os' ] == 'alinux' && get_nvswitches > 1
382
+ bash 'test CUDA install' do
383
+ cwd Chef ::Config [ :file_cache_path ]
384
+ code <<-TESTCUDA
385
+ has_gpu=$(lspci | grep -o "NVIDIA")
386
+ if [ -z "$has_gpu" ]; then
387
+ echo "No GPU detected, no test needed."
388
+ exit 0
389
+ fi
390
+
391
+ set -e
392
+ cuda_ver="#{ node [ 'cfncluster' ] [ 'nvidia' ] [ 'cuda_version' ] } "
393
+ # Test CUDA installation
394
+ echo "Testing CUDA install with nvcc..."
395
+ export PATH=/usr/local/cuda-$cuda_ver/bin:$PATH
396
+ export LD_LIBRARY_PATH=/usr/local/cuda-$cuda_ver/lib64:$LD_LIBRARY_PATH
397
+ # grep CUDA version from nvcc output. If CUDA is not installed nvcc command will fail
398
+ cuda_output=$(nvcc -V | grep -E -o "release [0-9]+.[0-9]+")
399
+ if [ "$cuda_output" != "release $cuda_ver" ]; then
400
+ echo "CUDA installed incorrectly! Installed $cuda_output but expected $cuda_ver"
401
+ exit 1
402
+ else
403
+ echo "CUDA nvcc test passed, $cuda_output"
404
+ fi
405
+
406
+ # Test deviceQuery
407
+ echo "Testing CUDA install with deviceQuery..."
408
+ /usr/local/cuda-$cuda_ver/extras/demo_suite/deviceQuery | grep -o "Result = PASS"
409
+ echo "CUDA deviceQuery test passed"
410
+ echo "Correctly installed CUDA $cuda_output"
411
+ TESTCUDA
412
+ end
411
413
end
412
-
413
414
###################
414
415
# FabricManager
415
416
###################
0 commit comments