This commit is contained in:
Lysandre
2019-10-22 14:12:33 -04:00
parent 44286b94d3
commit 7d709e55ed
10 changed files with 41 additions and 39 deletions

View File

@@ -309,7 +309,7 @@ def _compute_pytorch(model_names, dictionary, average_over, device, torchscript)
model = AutoModel.from_pretrained(model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_sequence = tokenizer.encode(input_text)
tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)
max_input_size = tokenizer.max_model_input_sizes[model_name]
batch_sizes = [1, 2, 4, 8]
@@ -353,7 +353,7 @@ def _compute_tensorflow(model_names, dictionary, average_over):
model = TFAutoModel.from_pretrained(model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_sequence = tokenizer.encode(input_text)
tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)
max_input_size = tokenizer.max_model_input_sizes[model_name]
batch_sizes = [1, 2, 4, 8]

View File

@@ -68,7 +68,7 @@ def main():
start = time.time()
for text in data:
text = f'{bos} {text.strip()} {sep}'
token_ids = tokenizer.encode(text)
token_ids = tokenizer.encode(text, add_special_tokens=False)
rslt.append(token_ids)
iter += 1

View File

@@ -223,7 +223,7 @@ def main():
if args.model_type in ["transfo-xl", "xlnet"]:
# Models with memory likes to have a long prompt for short inputs.
raw_text = (args.padding_text if args.padding_text else PADDING_TEXT) + raw_text
context_tokens = tokenizer.encode(raw_text)
context_tokens = tokenizer.encode(raw_text, add_special_tokens=False)
out = sample_sequence(
model=model,
context=context_tokens,