Skip to content

Commit

Permalink
quick fixes (#207)
Browse files Browse the repository at this point in the history
  • Loading branch information
vince62s authored Feb 11, 2025
1 parent e0077ec commit cfce780
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
4 changes: 2 additions & 2 deletions eole/modules/rope.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,9 @@ def update(self, maxseqlen, step=0, prefetch=1024, reset=False, positions=None):
elif hasattr(self, "cos") and self.cos.size(0) >= max(offset + (step or 0), 0) + maxseqlen:
return self.cos, self.sin
if self.mode == "1d":
cos, sin = self.forward_1d(maxseqlen, step=step, prefetch=prefetch, offset=offset)
cos, sin = self.forward_1d(maxseqlen, step=(step or 0), prefetch=prefetch, offset=offset)
elif self.mode == "2d":
cos, sin = self.forward_2d(maxseqlen, step=step, prefetch=prefetch, positions=positions)
cos, sin = self.forward_2d(maxseqlen, step=(step or 0), prefetch=prefetch, positions=positions)
else:
raise NotImplementedError
self.register_buffer("cos", cos, persistent=False)
Expand Down
1 change: 1 addition & 0 deletions eole/transforms/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ def hex_to_char(match):
tokenizer = self.load_models[side]
if self.gpt2_pretok:
sentence = "".join(tokens)
sentence = sentence.replace("⦅", "ï½Ł").replace("⦆", "ï½ł")
detokenized = bytearray([self.revtable[c] for c in sentence]).decode("utf-8", errors="replace")
else:
detokenized = tokenizer.detokenize(tokens)
Expand Down
2 changes: 1 addition & 1 deletion eole/transforms/tokenize_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def tokenize_string(self, string, side="src", is_train=False):
kwargs = {"max_length": max_length, "truncation": True}
else:
kwargs = {}
tokens = self.tokenizers[side].encode(string, **kwargs)
tokens = self.tokenizers[side].encode(string.replace(DefaultTokens.SEP, "\n"), **kwargs)
return tokens

def apply(self, example, is_train=False, stats=None, **kwargs):
Expand Down

0 comments on commit cfce780

Please sign in to comment.