diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py index 5944c98f07..f91c249f21 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py @@ -432,7 +432,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -584,7 +584,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -767,7 +767,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -814,7 +814,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -933,7 +933,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py index e1d608c339..e164c49ebc 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py @@ -185,7 +185,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -337,7 +337,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -401,7 +401,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutlmv3/processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/processing_layoutlmv3.py index c80b2bd5f2..f254cd5b55 100644 --- a/src/transformers/models/layoutlmv3/processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/processing_layoutlmv3.py @@ -53,7 +53,7 @@ class LayoutLMv3Processor(ProcessorMixin): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py index b01e70ffb0..3521266e31 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py @@ -554,7 +554,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -707,7 +707,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -893,7 +893,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -941,7 +941,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -1061,7 +1061,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py index be5f938dbf..121685fc93 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py @@ -231,7 +231,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -384,7 +384,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -450,7 +450,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutxlm/processing_layoutxlm.py b/src/transformers/models/layoutxlm/processing_layoutxlm.py index da75398493..b382c03c5f 100644 --- a/src/transformers/models/layoutxlm/processing_layoutxlm.py +++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py @@ -53,7 +53,7 @@ class LayoutXLMProcessor(ProcessorMixin): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py index 52d9b3ba80..d2f5e7514c 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py @@ -450,7 +450,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -777,7 +777,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): word_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index 71a7661437..387f7cdbda 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -275,7 +275,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index 98931ddb6f..65d3875d2f 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -268,7 +268,7 @@ class LukeTokenizer(RobertaTokenizer): entities_pair: Optional[Union[EntityInput, List[EntityInput]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, max_entity_length: Optional[int] = None, stride: int = 0, @@ -858,7 +858,7 @@ class LukeTokenizer(RobertaTokenizer): pair_entity_token_spans: Optional[List[Tuple[int, int]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, max_entity_length: Optional[int] = None, stride: int = 0, diff --git a/src/transformers/models/markuplm/processing_markuplm.py b/src/transformers/models/markuplm/processing_markuplm.py index 5740fe43ab..86ed8dc7ee 100644 --- a/src/transformers/models/markuplm/processing_markuplm.py +++ b/src/transformers/models/markuplm/processing_markuplm.py @@ -54,7 +54,7 @@ class MarkupLMProcessor(ProcessorMixin): questions=None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/markuplm/tokenization_markuplm.py b/src/transformers/models/markuplm/tokenization_markuplm.py index bf8d4e6dd9..faaebf3f26 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm.py +++ b/src/transformers/models/markuplm/tokenization_markuplm.py @@ -514,7 +514,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -663,7 +663,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -844,7 +844,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): node_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -891,7 +891,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): node_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -1010,7 +1010,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): node_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index 5e76f4d0bc..c55cb2b3e1 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -290,7 +290,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -438,7 +438,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -501,7 +501,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): node_labels: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index cafb0aee01..cc347bca86 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -374,7 +374,7 @@ class MLukeTokenizer(PreTrainedTokenizer): entities_pair: Optional[Union[EntityInput, List[EntityInput]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, max_entity_length: Optional[int] = None, stride: int = 0, @@ -970,7 +970,7 @@ class MLukeTokenizer(PreTrainedTokenizer): pair_entity_token_spans: Optional[List[Tuple[int, int]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, max_entity_length: Optional[int] = None, stride: int = 0, diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py index 555bf9fd2c..e9994ff7b5 100644 --- a/src/transformers/models/tapex/tokenization_tapex.py +++ b/src/transformers/models/tapex/tokenization_tapex.py @@ -518,7 +518,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Union[str, List[str]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -595,7 +595,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Union[str, List[str]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -683,7 +683,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: List[str] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = False, + truncation: Union[bool, str] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -871,7 +871,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Optional[str] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = None, max_length: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, **kwargs @@ -903,7 +903,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Optional[str] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = False, + truncation: Union[bool, str] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -1007,7 +1007,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Union[str, List[str]], add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, @@ -1072,7 +1072,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: List[str], add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = False, + truncation: Union[bool, str] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -1187,7 +1187,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: str, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy, TapexTruncationStrategy] = None, max_length: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, **kwargs @@ -1218,7 +1218,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: str, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = False, + truncation: Union[bool, str] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/vilt/processing_vilt.py b/src/transformers/models/vilt/processing_vilt.py index f7e7a3eda5..7410666d8c 100644 --- a/src/transformers/models/vilt/processing_vilt.py +++ b/src/transformers/models/vilt/processing_vilt.py @@ -49,7 +49,7 @@ class ViltProcessor(ProcessorMixin): text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index cacb5285ef..19cfb5b88d 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2235,7 +2235,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, return_tensors: Optional[Union[str, TensorType]] = None, @@ -2274,7 +2274,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): raise NotImplementedError def _get_padding_truncation_strategies( - self, padding=False, truncation=False, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs + self, padding=False, truncation=None, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs ): """ Find the correct padding/truncation strategy with backward compatibility for old arguments (truncation_strategy @@ -2285,7 +2285,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): # Backward compatibility for previous behavior, maybe we should deprecate it: # If you only set max_length, it activates truncation for max_length - if max_length is not None and padding is False and truncation is False: + if max_length is not None and padding is False and truncation is None: if verbose: if not self.deprecation_warnings.get("Truncation-not-explicitly-activated", False): logger.warning( @@ -2316,7 +2316,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): elif padding is not False: if padding is True: if verbose: - if max_length is not None and (truncation is False or truncation == "do_not_truncate"): + if max_length is not None and ( + truncation is None or truncation is False or truncation == "do_not_truncate" + ): warnings.warn( "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " "To pad to max length, use `padding='max_length'`." @@ -2332,7 +2334,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): padding_strategy = PaddingStrategy.DO_NOT_PAD # Get truncation strategy - if truncation is False and old_truncation_strategy != "do_not_truncate": + if truncation is None and old_truncation_strategy != "do_not_truncate": if verbose: warnings.warn( "The `truncation_strategy` argument is deprecated and will be removed in a future version, use" @@ -2346,7 +2348,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): FutureWarning, ) truncation_strategy = TruncationStrategy(old_truncation_strategy) - elif truncation is not False: + elif truncation is not False and truncation is not None: if truncation is True: truncation_strategy = ( TruncationStrategy.LONGEST_FIRST @@ -2420,7 +2422,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, @@ -2504,7 +2506,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, @@ -2617,7 +2619,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, @@ -2719,7 +2721,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ], add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, @@ -3029,7 +3031,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): pair_ids: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = False, + truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None,