From 949f97a8b449b187422da982d3591139dea05b6d Mon Sep 17 00:00:00 2001 From: RyanChang Date: Mon, 17 Oct 2022 21:19:45 +0800 Subject: [PATCH] fix missing token issue --- convert-pt-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-pt-to-ggml.py b/convert-pt-to-ggml.py index 22bd12e5c84..9e9b2dcebef 100644 --- a/convert-pt-to-ggml.py +++ b/convert-pt-to-ggml.py @@ -271,7 +271,7 @@ def bytes_to_unicode(): fout.write(struct.pack("i", len(tokens))) for key in tokens: - text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8') + text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text)