File tree 8 files changed +8
-9
lines changed
8 files changed +8
-9
lines changed Original file line number Diff line number Diff line change @@ -22,7 +22,7 @@ if [[ ! -f ${file} ]]; then
22
22
exit_code=$(( $exit_code + 1 ))
23
23
fi
24
24
25
- srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python internlm/launcher/ launch.py --config ${file}
25
+ srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python internlm/launch/launcher .py --config ${file}
26
26
[[ $? -ne 0 ]] && { echo " test slurm training failed." ; exit_code=$(( $exit_code + 1 )) ; }
27
27
28
28
Original file line number Diff line number Diff line change @@ -22,7 +22,7 @@ if [[ -d ${CKPTS20_PATH} ]]; then
22
22
fi
23
23
fi
24
24
25
- srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python internlm/launcher/ launch.py --config ./ci_scripts/train/ci_7B_sft.py
25
+ srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python internlm/launch/launcher .py --config ./ci_scripts/train/ci_7B_sft.py
26
26
[[ $? -ne 0 ]] && { echo " test slurm training failed." ; exit_code=$(( $exit_code + 1 )) ; }
27
27
28
28
num=$( num_files " ${CKPTS20_OUTPUT} " )
Original file line number Diff line number Diff line change @@ -22,7 +22,7 @@ if [[ -d ${CKPTS20_PATH} ]]; then
22
22
fi
23
23
fi
24
24
25
- srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 internlm/launcher/ launch.py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
25
+ srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 internlm/launch/launcher .py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
26
26
[[ $? -ne 0 ]] && { echo " test torch training failed." ; exit_code=$(( $exit_code + 1 )) ; }
27
27
28
28
num=$( num_files " ${CKPTS_OUTPUT} " )
Original file line number Diff line number Diff line change 8
8
9
9
import torch
10
10
11
- from internlm .inference import InferenceParams
12
11
from internlm .core .engine import Engine
12
+ from internlm .inference import InferenceParams
13
13
14
14
15
15
class BaseScheduler (ABC ):
Original file line number Diff line number Diff line change 1
- from .inference_utils import InferenceParams , process_parallel_output
2
1
from .inference import SequenceGenerator , batch_tokenize
2
+ from .inference_utils import InferenceParams , process_parallel_output
3
3
4
4
__all__ = [
5
5
"InferenceParams" ,
Original file line number Diff line number Diff line change 4
4
import torch
5
5
import torch .nn .functional as F
6
6
7
- from internlm .inference import InferenceParams , process_parallel_output
8
7
from internlm .core .context import ParallelMode # noqa: E402
9
8
from internlm .core .context import global_context as gpc # noqa: E402
10
9
from internlm .core .trainer import Trainer
10
+ from internlm .inference import InferenceParams , process_parallel_output
11
11
12
12
13
13
class SequenceGenerator :
Original file line number Diff line number Diff line change 2
2
# -*- encoding: utf-8 -*-
3
3
4
4
from internlm .core .context import global_context as gpc
5
- from internlm .launch .trainer_builder import TrainerBuilder
6
5
from internlm .data import (
7
6
build_train_loader_with_data_type ,
8
7
build_valid_loader_with_data_type ,
9
8
)
10
9
from internlm .initialize import initialize_launcher
10
+ from internlm .launch .trainer_builder import TrainerBuilder
11
11
from internlm .model .model_implementations .builder import create_model
12
12
from internlm .model .model_implementations .registry import register_model_initializer
13
13
from internlm .monitor import internevo_monitor
Original file line number Diff line number Diff line change 1
- from datetime import datetime
2
1
import fcntl
3
2
import logging
4
3
import os
8
7
import time
9
8
import traceback
10
9
from contextlib import contextmanager
10
+ from datetime import datetime
11
11
from functools import wraps
12
12
from threading import Thread
13
13
16
16
from internlm .monitor import send_feishu_msg_with_webhook
17
17
from internlm .utils .common import SingletonMeta , set_env_var
18
18
19
-
20
19
logger = logging .getLogger (__file__ )
21
20
internlm_accelerator = get_accelerator ()
22
21
You can’t perform that action at this time.
0 commit comments