Remove

2019-10-22 14:12:33 -04:00
parent 44286b94d3
commit 7d709e55ed
10 changed files with 41 additions and 39 deletions
--- a/examples/benchmarks.py
+++ b/examples/benchmarks.py
@@ -309,7 +309,7 @@ def _compute_pytorch(model_names, dictionary, average_over, device, torchscript)
        model = AutoModel.from_pretrained(model_name, config=config)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

-        tokenized_sequence = tokenizer.encode(input_text)
+        tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)

        max_input_size = tokenizer.max_model_input_sizes[model_name]
        batch_sizes = [1, 2, 4, 8]
@@ -353,7 +353,7 @@ def _compute_tensorflow(model_names, dictionary, average_over):
        model = TFAutoModel.from_pretrained(model_name, config=config)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

-        tokenized_sequence = tokenizer.encode(input_text)
+        tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)

        max_input_size = tokenizer.max_model_input_sizes[model_name]
        batch_sizes = [1, 2, 4, 8]
--- a/examples/distillation/scripts/binarized_data.py
+++ b/examples/distillation/scripts/binarized_data.py
@@ -68,7 +68,7 @@ def main():
    start = time.time()
    for text in data:
        text = f'{bos} {text.strip()} {sep}'
-        token_ids = tokenizer.encode(text)
+        token_ids = tokenizer.encode(text, add_special_tokens=False)
        rslt.append(token_ids)

        iter += 1
--- a/examples/run_generation.py
+++ b/examples/run_generation.py
@@ -223,7 +223,7 @@ def main():
        if args.model_type in ["transfo-xl", "xlnet"]:
            # Models with memory likes to have a long prompt for short inputs.
            raw_text = (args.padding_text if args.padding_text else PADDING_TEXT) + raw_text
-        context_tokens = tokenizer.encode(raw_text)
+        context_tokens = tokenizer.encode(raw_text, add_special_tokens=False)
        out = sample_sequence(
            model=model,
            context=context_tokens,