tokenizer white space: revert to previous behavior

2019-11-05 15:48:00 +00:00
parent 0b77d66a6d
commit d5faa74cd6
1 changed files with 1 additions and 1 deletions
--- a/examples/run_pplm.py
+++ b/examples/run_pplm.py
@@ -373,7 +373,7 @@ def get_bag_of_words_indices(bag_of_words_ids_or_paths: List[str]) -> List[List[
            filepath = id_or_path
        with open(filepath, "r") as f:
            words = f.read().split("\n")
-        bow_indices.append([TOKENIZER.encode(word) for word in words])
+        bow_indices.append([TOKENIZER.encode(word, add_prefix_space=True) for word in words])
    return bow_indices