Skip to content

Commit

Permalink
add configs
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmeda14960 committed Feb 3, 2025
1 parent 75bcaee commit 0536c63
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
10 changes: 7 additions & 3 deletions config/debug_sft.yaml → config/llama3.1_tulu3_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ model: # 7B class model
type: llama
seq_len: 4096
hidden_dim: 4096
intermediate_dim: 11008
intermediate_dim: 14336
num_layers: 32
num_heads: 32
num_kv_heads: 32
num_kv_heads: 8
use_flash_attention: True
flash_attention_block_size: 512
use_bias: false
Expand All @@ -31,7 +31,8 @@ trainer:
mp: p=f32,c=bfloat16
# same as 606 sft in marin
train_batch_size: 128
num_train_steps: 7335 # 3,000,000,000,000 / 4,000,000 = 750,000
# number of steps until we hit stop iteration
num_train_steps: 1791 # 3,000,000,000,000 / 4,000,000 = 750,000
steps_per_eval: 1000
tensor_parallel_axes: ["mlp", "heads"]
fsdp_axis: "embed"
Expand All @@ -43,4 +44,7 @@ optimizer:
lr_schedule: "linear"
warmup: 0.03

hf_save_steps: 1790
hf_save_path: "gs://levanter-checkpoints/marin/llama_3.1_tulusft/"

epoch: 0
11 changes: 8 additions & 3 deletions config/llama3_openthoughts_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ model: # 7B class model
type: llama
seq_len: 4096
hidden_dim: 4096
intermediate_dim: 11008
intermediate_dim: 14336
num_layers: 32
num_heads: 32
num_kv_heads: 32
num_kv_heads: 8
use_flash_attention: True
flash_attention_block_size: 512
use_bias: false
Expand All @@ -31,7 +31,8 @@ trainer:
mp: p=f32,c=bfloat16
# same as 606 sft in marin
train_batch_size: 128
num_train_steps: 7335 # 3,000,000,000,000 / 4,000,000 = 750,000
# number of steps until we hit stop iteration
num_train_steps: 802
steps_per_eval: 1000
tensor_parallel_axes: ["mlp", "heads"]
fsdp_axis: "embed"
Expand All @@ -43,4 +44,8 @@ optimizer:
lr_schedule: "linear"
warmup: 0.03


hf_save_steps: 801
hf_save_path: "gs://levanter-checkpoints/marin/tulusft_openthoughtsft/"

epoch: 0
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Model configuration
model:
type: llama
seq_len: 2048
seq_len: 4096
hidden_dim: 4096
intermediate_dim: 11008
intermediate_dim: 14336
num_layers: 32
num_heads: 32
num_kv_heads: 32
num_kv_heads: 8
use_flash_attention: true
flash_attention_block_size: 512
use_bias: false
use_layer_norm_weight: false
initializer_range: 0.02

0 comments on commit 0536c63

Please sign in to comment.