From d5faa74cd6d7de66a058a9b3368e5cbc6dcaf4d6 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Tue, 5 Nov 2019 15:48:00 +0000 Subject: [PATCH] tokenizer white space: revert to previous behavior --- examples/run_pplm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run_pplm.py b/examples/run_pplm.py index 2d4dee72a3..4b1a6a2b6f 100644 --- a/examples/run_pplm.py +++ b/examples/run_pplm.py @@ -373,7 +373,7 @@ def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str]) -> List[List[ filepath = id_or_path with open(filepath, "r") as f: words = f.read().split("\n") - bow_indices.append([TOKENIZER.encode(word) for word in words]) + bow_indices.append([TOKENIZER.encode(word, add_prefix_space=True) for word in words]) return bow_indices