Use None to detect if truncation was unset (#19794)
* Use None to detect if truncation was unset * Fix repo consistency
This commit is contained in:
@@ -432,7 +432,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -584,7 +584,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -767,7 +767,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -814,7 +814,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -933,7 +933,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -185,7 +185,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -337,7 +337,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -401,7 +401,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ class LayoutLMv3Processor(ProcessorMixin):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -554,7 +554,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -707,7 +707,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -893,7 +893,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -941,7 +941,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -1061,7 +1061,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -231,7 +231,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -384,7 +384,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -450,7 +450,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ class LayoutXLMProcessor(ProcessorMixin):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -450,7 +450,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -777,7 +777,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
|
|||||||
word_labels: Optional[List[int]] = None,
|
word_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -275,7 +275,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -268,7 +268,7 @@ class LukeTokenizer(RobertaTokenizer):
|
|||||||
entities_pair: Optional[Union[EntityInput, List[EntityInput]]] = None,
|
entities_pair: Optional[Union[EntityInput, List[EntityInput]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
max_entity_length: Optional[int] = None,
|
max_entity_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
@@ -858,7 +858,7 @@ class LukeTokenizer(RobertaTokenizer):
|
|||||||
pair_entity_token_spans: Optional[List[Tuple[int, int]]] = None,
|
pair_entity_token_spans: Optional[List[Tuple[int, int]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
max_entity_length: Optional[int] = None,
|
max_entity_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class MarkupLMProcessor(ProcessorMixin):
|
|||||||
questions=None,
|
questions=None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -514,7 +514,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
|
|||||||
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -663,7 +663,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
|
|||||||
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -844,7 +844,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
|
|||||||
node_labels: Optional[List[int]] = None,
|
node_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -891,7 +891,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
|
|||||||
node_labels: Optional[List[int]] = None,
|
node_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -1010,7 +1010,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer):
|
|||||||
node_labels: Optional[List[int]] = None,
|
node_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -290,7 +290,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -438,7 +438,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
node_labels: Optional[Union[List[int], List[List[int]]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -501,7 +501,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
node_labels: Optional[List[int]] = None,
|
node_labels: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -374,7 +374,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
|
|||||||
entities_pair: Optional[Union[EntityInput, List[EntityInput]]] = None,
|
entities_pair: Optional[Union[EntityInput, List[EntityInput]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
max_entity_length: Optional[int] = None,
|
max_entity_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
@@ -970,7 +970,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
|
|||||||
pair_entity_token_spans: Optional[List[Tuple[int, int]]] = None,
|
pair_entity_token_spans: Optional[List[Tuple[int, int]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
max_entity_length: Optional[int] = None,
|
max_entity_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
|
|||||||
@@ -518,7 +518,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: Union[str, List[str]] = None,
|
answer: Union[str, List[str]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -595,7 +595,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: Union[str, List[str]] = None,
|
answer: Union[str, List[str]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -683,7 +683,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: List[str] = None,
|
answer: List[str] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str] = False,
|
truncation: Union[bool, str] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
@@ -871,7 +871,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: Optional[str] = None,
|
answer: Optional[str] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
**kwargs
|
**kwargs
|
||||||
@@ -903,7 +903,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: Optional[str] = None,
|
answer: Optional[str] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str] = False,
|
truncation: Union[bool, str] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
@@ -1007,7 +1007,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: Union[str, List[str]],
|
answer: Union[str, List[str]],
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
@@ -1072,7 +1072,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: List[str],
|
answer: List[str],
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str] = False,
|
truncation: Union[bool, str] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
@@ -1187,7 +1187,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: str,
|
answer: str,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
**kwargs
|
**kwargs
|
||||||
@@ -1218,7 +1218,7 @@ class TapexTokenizer(PreTrainedTokenizer):
|
|||||||
answer: str,
|
answer: str,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str] = False,
|
truncation: Union[bool, str] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ class ViltProcessor(ProcessorMixin):
|
|||||||
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
@@ -2235,7 +2235,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||||
@@ -2274,7 +2274,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def _get_padding_truncation_strategies(
|
def _get_padding_truncation_strategies(
|
||||||
self, padding=False, truncation=False, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs
|
self, padding=False, truncation=None, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Find the correct padding/truncation strategy with backward compatibility for old arguments (truncation_strategy
|
Find the correct padding/truncation strategy with backward compatibility for old arguments (truncation_strategy
|
||||||
@@ -2285,7 +2285,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
|
|
||||||
# Backward compatibility for previous behavior, maybe we should deprecate it:
|
# Backward compatibility for previous behavior, maybe we should deprecate it:
|
||||||
# If you only set max_length, it activates truncation for max_length
|
# If you only set max_length, it activates truncation for max_length
|
||||||
if max_length is not None and padding is False and truncation is False:
|
if max_length is not None and padding is False and truncation is None:
|
||||||
if verbose:
|
if verbose:
|
||||||
if not self.deprecation_warnings.get("Truncation-not-explicitly-activated", False):
|
if not self.deprecation_warnings.get("Truncation-not-explicitly-activated", False):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@@ -2316,7 +2316,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
elif padding is not False:
|
elif padding is not False:
|
||||||
if padding is True:
|
if padding is True:
|
||||||
if verbose:
|
if verbose:
|
||||||
if max_length is not None and (truncation is False or truncation == "do_not_truncate"):
|
if max_length is not None and (
|
||||||
|
truncation is None or truncation is False or truncation == "do_not_truncate"
|
||||||
|
):
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"`max_length` is ignored when `padding`=`True` and there is no truncation strategy. "
|
"`max_length` is ignored when `padding`=`True` and there is no truncation strategy. "
|
||||||
"To pad to max length, use `padding='max_length'`."
|
"To pad to max length, use `padding='max_length'`."
|
||||||
@@ -2332,7 +2334,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
padding_strategy = PaddingStrategy.DO_NOT_PAD
|
padding_strategy = PaddingStrategy.DO_NOT_PAD
|
||||||
|
|
||||||
# Get truncation strategy
|
# Get truncation strategy
|
||||||
if truncation is False and old_truncation_strategy != "do_not_truncate":
|
if truncation is None and old_truncation_strategy != "do_not_truncate":
|
||||||
if verbose:
|
if verbose:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"The `truncation_strategy` argument is deprecated and will be removed in a future version, use"
|
"The `truncation_strategy` argument is deprecated and will be removed in a future version, use"
|
||||||
@@ -2346,7 +2348,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
FutureWarning,
|
FutureWarning,
|
||||||
)
|
)
|
||||||
truncation_strategy = TruncationStrategy(old_truncation_strategy)
|
truncation_strategy = TruncationStrategy(old_truncation_strategy)
|
||||||
elif truncation is not False:
|
elif truncation is not False and truncation is not None:
|
||||||
if truncation is True:
|
if truncation is True:
|
||||||
truncation_strategy = (
|
truncation_strategy = (
|
||||||
TruncationStrategy.LONGEST_FIRST
|
TruncationStrategy.LONGEST_FIRST
|
||||||
@@ -2420,7 +2422,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
] = None,
|
] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
is_split_into_words: bool = False,
|
is_split_into_words: bool = False,
|
||||||
@@ -2504,7 +2506,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
|
text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
is_split_into_words: bool = False,
|
is_split_into_words: bool = False,
|
||||||
@@ -2617,7 +2619,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
is_split_into_words: bool = False,
|
is_split_into_words: bool = False,
|
||||||
@@ -2719,7 +2721,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
],
|
],
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
is_split_into_words: bool = False,
|
is_split_into_words: bool = False,
|
||||||
@@ -3029,7 +3031,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
pair_ids: Optional[List[int]] = None,
|
pair_ids: Optional[List[int]] = None,
|
||||||
add_special_tokens: bool = True,
|
add_special_tokens: bool = True,
|
||||||
padding: Union[bool, str, PaddingStrategy] = False,
|
padding: Union[bool, str, PaddingStrategy] = False,
|
||||||
truncation: Union[bool, str, TruncationStrategy] = False,
|
truncation: Union[bool, str, TruncationStrategy] = None,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
stride: int = 0,
|
stride: int = 0,
|
||||||
pad_to_multiple_of: Optional[int] = None,
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
|
|||||||
Reference in New Issue
Block a user