Remove
This commit is contained in:
@@ -309,7 +309,7 @@ def _compute_pytorch(model_names, dictionary, average_over, device, torchscript)
|
||||
model = AutoModel.from_pretrained(model_name, config=config)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
tokenized_sequence = tokenizer.encode(input_text)
|
||||
tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)
|
||||
|
||||
max_input_size = tokenizer.max_model_input_sizes[model_name]
|
||||
batch_sizes = [1, 2, 4, 8]
|
||||
@@ -353,7 +353,7 @@ def _compute_tensorflow(model_names, dictionary, average_over):
|
||||
model = TFAutoModel.from_pretrained(model_name, config=config)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
tokenized_sequence = tokenizer.encode(input_text)
|
||||
tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False)
|
||||
|
||||
max_input_size = tokenizer.max_model_input_sizes[model_name]
|
||||
batch_sizes = [1, 2, 4, 8]
|
||||
|
||||
@@ -68,7 +68,7 @@ def main():
|
||||
start = time.time()
|
||||
for text in data:
|
||||
text = f'{bos} {text.strip()} {sep}'
|
||||
token_ids = tokenizer.encode(text)
|
||||
token_ids = tokenizer.encode(text, add_special_tokens=False)
|
||||
rslt.append(token_ids)
|
||||
|
||||
iter += 1
|
||||
|
||||
@@ -223,7 +223,7 @@ def main():
|
||||
if args.model_type in ["transfo-xl", "xlnet"]:
|
||||
# Models with memory likes to have a long prompt for short inputs.
|
||||
raw_text = (args.padding_text if args.padding_text else PADDING_TEXT) + raw_text
|
||||
context_tokens = tokenizer.encode(raw_text)
|
||||
context_tokens = tokenizer.encode(raw_text, add_special_tokens=False)
|
||||
out = sample_sequence(
|
||||
model=model,
|
||||
context=context_tokens,
|
||||
|
||||
Reference in New Issue
Block a user