More explicit error when failing to tensorize overflowing tokens (#5633)

This commit is contained in:
Lysandre Debut
2020-07-09 13:35:21 -04:00
committed by GitHub
parent b9d8af07e6
commit 3cc23eee06

View File

@@ -511,6 +511,11 @@ class BatchEncoding(UserDict):
self[key] = tensor self[key] = tensor
except: # noqa E722 except: # noqa E722
if key == "overflowing_tokens":
raise ValueError(
"Unable to create tensor returning overflowing tokens of different lengths. "
"Please see if a fast version of this tokenizer is available to have this feature available."
)
raise ValueError( raise ValueError(
"Unable to create tensor, you should probably activate truncation and/or padding " "Unable to create tensor, you should probably activate truncation and/or padding "
"with 'padding=True' 'truncation=True' to have batched tensors with the same length." "with 'padding=True' 'truncation=True' to have batched tensors with the same length."