-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnoctua2_job
executable file
·27 lines (23 loc) · 1.02 KB
/
noctua2_job
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/sh
#SBATCH -J "LOLA - MoE analysis"
#SBATCH -t 5:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH --mem 32G
#SBATCH --gres=gpu:a100:1
#SBATCH --cpus-per-task=16
# Loading required modules
module load toolchain/foss/2022b
module load lib/libaio/0.3.113-GCCcore-12.2.0
module load lang
module load Anaconda3
module load system/CUDA/12.0.0
module load lib/NCCL/2.16.2-GCCcore-12.2.0-CUDA-12.0.0
module load compiler/GCC/10.3.0
export HF_HOME=/scratch/hpc-prf-lola/nikit/.cache/huggingface
export HF_DATASETS_CACHE=/scratch/hpc-prf-lola/nikit/.cache/huggingface
export CONDA_P=/scratch/hpc-prf-lola/nikit/repos/LOLA-Megatron-DeepSpeed/lola_ws/fine-tune/instruction-ft/venv-lola-ft
export LD_LIBRARY_PATH=$CONDA_P/lib/python3.10/site-packages/nvidia/nvjitlink/lib:$LD_LIBRARY_PATH
# https://huggingface.co/docs/datasets/loading#offline
export HF_HUB_OFFLINE=1
"$CONDA_P"/bin/python ./moe_analysis.py --model "$3" --rows-limit 10000 --batch-size 1 --output-dir "moe-analysis-output-$2" --dataset-path "/scratch/hpc-prf-lola/share/CulturaX" --lang "$1"