diff options
author | artnitolog <53991623+artnitolog@users.noreply.github.com> | 2022-06-23 23:50:03 +0300 |
---|---|---|
committer | artnitolog <53991623+artnitolog@users.noreply.github.com> | 2022-06-23 23:50:03 +0300 |
commit | deb045def565ad4d765341c08dbd73583bfab3d3 (patch) | |
tree | 873b6ca016dfb304680ad3e6721b9ddfd4ab1f62 | |
parent | 06e5164d5de93e97d01912e9a388b849ac7b64c6 (diff) |
fix newline
-rw-r--r-- | megatron_lm/megatron/tokenizer/tokenizer.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/megatron_lm/megatron/tokenizer/tokenizer.py b/megatron_lm/megatron/tokenizer/tokenizer.py index 9637723..62bba51 100644 --- a/megatron_lm/megatron/tokenizer/tokenizer.py +++ b/megatron_lm/megatron/tokenizer/tokenizer.py @@ -263,7 +263,7 @@ class _SpTokenizer(AbstractTokenizer): return token_ids def detokenize(self, token_ids): - return self.tokenizer._tokenizer.detokenize(token_ids) + return self.tokenizer._tokenizer.detokenize(token_ids).replace('[NL]', '\n') @property def cls(self): |