Skip to content

Commit

Permalink
fix llama3 and parallel_residual (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
vince62s authored Jun 5, 2024
1 parent 2afa380 commit 6d1e1be
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
4 changes: 2 additions & 2 deletions eole/bin/convert/convert_HF.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def run(cls, args):
transformer_ff = config["intermediate_size"]
else:
transformer_ff = hidden_size * 4
pos_ffn_activation_fn = act_table[arch]
mlp_activation_fn = act_table[arch]
layer_norm = ln_table[arch]

multiquery = False
Expand Down Expand Up @@ -850,7 +850,7 @@ def get_weight(checkpoint, tensor_name):
model_type="text",
layer_norm=layer_norm,
norm_eps=norm_eps,
pos_ffn_activation_fn=pos_ffn_activation_fn,
mlp_activation_fn=mlp_activation_fn,
self_attn_type="scaled-dot-flash",
max_relative_positions=-1,
rotary_interleave=rotary_interleave,
Expand Down
6 changes: 5 additions & 1 deletion eole/decoders/transformer_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@ def _forward(
mask=src_pad_mask,
return_attn=return_attn,
)
ff_in = norm_layer_in
if not self.shared_layer_norm:
norm_res_layer_in = self.residual_layernorm(layer_in)
ff_in = norm_res_layer_in
else:
ff_in = norm_layer_in
else:
norm_query = self.precontext_layernorm(self_attn + layer_in)
ctx_attn, attns = self.context_attn(
Expand Down

0 comments on commit 6d1e1be

Please sign in to comment.