More explicit error when failing to tensorize overflowing tokens (#5633)
This commit is contained in:
@@ -511,6 +511,11 @@ class BatchEncoding(UserDict):
|
|||||||
|
|
||||||
self[key] = tensor
|
self[key] = tensor
|
||||||
except: # noqa E722
|
except: # noqa E722
|
||||||
|
if key == "overflowing_tokens":
|
||||||
|
raise ValueError(
|
||||||
|
"Unable to create tensor returning overflowing tokens of different lengths. "
|
||||||
|
"Please see if a fast version of this tokenizer is available to have this feature available."
|
||||||
|
)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Unable to create tensor, you should probably activate truncation and/or padding "
|
"Unable to create tensor, you should probably activate truncation and/or padding "
|
||||||
"with 'padding=True' 'truncation=True' to have batched tensors with the same length."
|
"with 'padding=True' 'truncation=True' to have batched tensors with the same length."
|
||||||
|
|||||||
Reference in New Issue
Block a user