diff --git a/pyproject.toml b/pyproject.toml index 83a4bf3ad3..b8f4676a2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,10 +19,13 @@ line-length = 119 [tool.ruff.lint] # Never enforce `E501` (line length violations). -ignore = ["C901", "E501", "E741", "F402", "F823"] +# SIM300: Yoda condition detected +# SIM212: Checks for if expressions that check against a negated condition. +# SIM905: Consider using a list literal instead of `str.split` +ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905"] # RUF013: Checks for the use of implicit Optional # in type annotations when the default parameter value is None. -select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208"] +select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208","SIM"] extend-safe-fixes = ["UP006"] # Ignore import violations in all `__init__.py` files. diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py index c3f9bc2838..801901896d 100644 --- a/src/transformers/commands/serving.py +++ b/src/transformers/commands/serving.py @@ -901,7 +901,7 @@ class ServeCommand(BaseTransformersCLICommand): inputs = processor.apply_chat_template( processor_inputs, add_generation_prompt=True, - tools=req.get("tools", None), + tools=req.get("tools"), return_tensors="pt", return_dict=True, tokenize=True, diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index 18f2980a38..92dbacbe5d 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -183,7 +183,7 @@ def tf_default_data_collator(features: list[InputDataClass]) -> dict[str, Any]: if label_col_name is not None: if isinstance(first[label_col_name], tf.Tensor): dtype = tf.int64 if first[label_col_name].dtype.is_integer else tf.float32 - elif isinstance(first[label_col_name], np.ndarray) or isinstance(first[label_col_name], np.generic): + elif isinstance(first[label_col_name], (np.ndarray, np.generic)): dtype = tf.int64 if np.issubdtype(first[label_col_name].dtype, np.integer) else tf.float32 elif isinstance(first[label_col_name], (tuple, list)): dtype = tf.int64 if isinstance(first[label_col_name][0], int) else tf.float32 diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 8c4d5369c1..177cfeeda6 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -647,8 +647,8 @@ class GenerationMixin(ContinuousMixin): # If it's not defined, it means the model uses the new general mask API if causal_mask_creation_function is None: # can't be found - token_type_ids = model_inputs.get("token_type_ids", None) - position_ids = model_inputs.get(position_ids_key, None) + token_type_ids = model_inputs.get("token_type_ids") + position_ids = model_inputs.get(position_ids_key) # Some models may overwrite the general one causal_mask_creation_function = getattr(self, "create_masks_for_generate", create_masks_for_generate) attention_mask = causal_mask_creation_function( diff --git a/src/transformers/integrations/flex_attention.py b/src/transformers/integrations/flex_attention.py index 31d3b3b14d..298a115517 100644 --- a/src/transformers/integrations/flex_attention.py +++ b/src/transformers/integrations/flex_attention.py @@ -284,7 +284,7 @@ def flex_attention_forward( num_local_query_heads = query.shape[1] # When running TP this helps: - if not ((num_local_query_heads & (num_local_query_heads - 1)) == 0): + if (num_local_query_heads & (num_local_query_heads - 1)) != 0: key = repeat_kv(key, query.shape[1] // key.shape[1]) value = repeat_kv(value, query.shape[1] // value.shape[1]) enable_gqa = False diff --git a/src/transformers/integrations/vptq.py b/src/transformers/integrations/vptq.py index 643fa91e65..00b55c6bb7 100644 --- a/src/transformers/integrations/vptq.py +++ b/src/transformers/integrations/vptq.py @@ -45,7 +45,7 @@ def replace_with_vptq_linear( should not be passed by the user. """ - modules_to_not_convert = ["lm_head"] if not modules_to_not_convert else modules_to_not_convert + modules_to_not_convert = modules_to_not_convert if modules_to_not_convert else ["lm_head"] for name, module in model.named_children(): if current_key_name is None: diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index b6669a7b45..ab7fc4615b 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -167,7 +167,7 @@ class KerasMetricCallback(keras.callbacks.Callback): # If it's a dict with only one key, just return the array if len(outputs) == 1: outputs = list(outputs.values())[0] - elif isinstance(inputs[0], list) or isinstance(inputs[0], tuple): + elif isinstance(inputs[0], (tuple, list)): outputs = [] for input_list in zip(*inputs): outputs.append(self._concatenate_batches(input_list)) diff --git a/src/transformers/models/aya_vision/configuration_aya_vision.py b/src/transformers/models/aya_vision/configuration_aya_vision.py index 1b79c156a9..a8c1965ec4 100644 --- a/src/transformers/models/aya_vision/configuration_aya_vision.py +++ b/src/transformers/models/aya_vision/configuration_aya_vision.py @@ -81,9 +81,7 @@ class AyaVisionConfig(PretrainedConfig): self.vision_feature_layer = vision_feature_layer if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["siglip_vision_model"]( @@ -99,7 +97,7 @@ class AyaVisionConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2" + text_config["model_type"] = text_config.get("model_type", "cohere2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["cohere2"]() diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py index 8da189b1b3..616e9ed665 100755 --- a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py @@ -148,7 +148,7 @@ def rewrite_dict_keys(d): # (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up, # e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er': 7} d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "", k), v) for k, v in d.items()) - keep_keys = " ".split() + keep_keys = ["", "", "", ""] # restore the special tokens for k in keep_keys: del d2[f"{k}"] diff --git a/src/transformers/models/clvp/modeling_clvp.py b/src/transformers/models/clvp/modeling_clvp.py index 314c18c4d0..c8077eb871 100644 --- a/src/transformers/models/clvp/modeling_clvp.py +++ b/src/transformers/models/clvp/modeling_clvp.py @@ -1303,7 +1303,7 @@ class ClvpForCausalLM(ClvpPreTrainedModel, GenerationMixin): # Check if conditioning_embeds are provided or not, if yes then concatenate the bos_token_id at the end of the conditioning_embeds. # Then we must subtract the positional_ids because during the forward pass it will be added anyways, so we must cancel them out here. - conditioning_embeds = model_kwargs.get("conditioning_embeds", None) + conditioning_embeds = model_kwargs.get("conditioning_embeds") if conditioning_embeds is not None: mel_start_token_embedding = self.model.decoder.input_embeds_layer( diff --git a/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py b/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py index e4e670e4a6..acc40fcf85 100644 --- a/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py +++ b/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py @@ -57,9 +57,7 @@ class Cohere2VisionConfig(PretrainedConfig): self.alignment_intermediate_size = alignment_intermediate_size if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["siglip_vision_model"]( @@ -73,7 +71,7 @@ class Cohere2VisionConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2" + text_config["model_type"] = text_config.get("model_type", "cohere2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["cohere2"](tie_word_embeddings=True) diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py index 5bebf43c9b..f82b01de48 100644 --- a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +++ b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py @@ -66,7 +66,7 @@ class DeepseekVLImageProcessorFast(BaseImageProcessorFast): def __init__(self, **kwargs: Unpack[DeepseekVLFastImageProcessorKwargs]): super().__init__(**kwargs) - if kwargs.get("image_mean", None) is None: + if kwargs.get("image_mean") is None: background_color = (127, 127, 127) else: background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")]) diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py index 2120a65dd4..d0b22c70dc 100644 --- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py @@ -93,11 +93,11 @@ class DeepseekVLHybridImageProcessorFast(BaseImageProcessorFast): high_res_resample = PILImageResampling.BICUBIC def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]): - if kwargs.get("image_mean", None) is None: + if kwargs.get("image_mean") is None: background_color = (127, 127, 127) else: background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")]) - if kwargs.get("high_res_image_mean", None) is None: + if kwargs.get("high_res_image_mean") is None: high_res_background_color = (127, 127, 127) else: high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")]) diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py index e3149b4202..058bbc4b8a 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py @@ -749,11 +749,11 @@ class DeepseekVLHybridImageProcessorFast(DeepseekVLImageProcessorFast): high_res_resample = PILImageResampling.BICUBIC def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]): - if kwargs.get("image_mean", None) is None: + if kwargs.get("image_mean") is None: background_color = (127, 127, 127) else: background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")]) - if kwargs.get("high_res_image_mean", None) is None: + if kwargs.get("high_res_image_mean") is None: high_res_background_color = (127, 127, 127) else: high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")]) diff --git a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py index 35e8265850..07a83a1cb0 100755 --- a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py @@ -79,7 +79,7 @@ def rewrite_dict_keys(d): # (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up, # e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er': 7} d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "", k), v) for k, v in d.items()) - keep_keys = " ".split() + keep_keys = ["", "", "", ""] # restore the special tokens for k in keep_keys: del d2[f"{k}"] diff --git a/src/transformers/models/janus/image_processing_janus_fast.py b/src/transformers/models/janus/image_processing_janus_fast.py index 81f9bafed7..17df6ba9ea 100644 --- a/src/transformers/models/janus/image_processing_janus_fast.py +++ b/src/transformers/models/janus/image_processing_janus_fast.py @@ -71,7 +71,7 @@ class JanusImageProcessorFast(BaseImageProcessorFast): valid_kwargs = JanusFastImageProcessorKwargs def __init__(self, **kwargs: Unpack[JanusFastImageProcessorKwargs]): - if kwargs.get("image_mean", None) is None: + if kwargs.get("image_mean") is None: background_color = (127, 127, 127) else: background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")]) diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py index 2159761547..615aacdf87 100644 --- a/src/transformers/models/jetmoe/modeling_jetmoe.py +++ b/src/transformers/models/jetmoe/modeling_jetmoe.py @@ -857,9 +857,7 @@ class JetMoePreTrainedModel(PreTrainedModel): module.weight.data.fill_(1.0) elif isinstance(module, JetMoeParallelExperts): module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) - elif isinstance(module, JetMoeMoA): - module.bias.data.zero_() - elif isinstance(module, JetMoeMoE): + elif isinstance(module, (JetMoeMoA, JetMoeMoE)): module.bias.data.zero_() diff --git a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py index 193faecf39..e49ccde7e2 100644 --- a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +++ b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py @@ -260,7 +260,7 @@ class MMGroundingDinoConfig(PretrainedConfig): self.disable_custom_kernels = disable_custom_kernels # Text backbone if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert" + text_config["model_type"] = text_config.get("model_type", "bert") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["bert"]() diff --git a/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py index 6fc13df410..aea644fdd6 100644 --- a/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +++ b/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py @@ -268,7 +268,7 @@ class MMGroundingDinoConfig(GroundingDinoConfig, PretrainedConfig): self.disable_custom_kernels = disable_custom_kernels # Text backbone if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert" + text_config["model_type"] = text_config.get("model_type", "bert") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["bert"]() diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py index 53160467c7..d6af3e57e5 100644 --- a/src/transformers/models/oneformer/modeling_oneformer.py +++ b/src/transformers/models/oneformer/modeling_oneformer.py @@ -2794,11 +2794,7 @@ class OneFormerPreTrainedModel(PreTrainedModel): nn.init.constant_(module.output_proj.bias.data, 0.0) elif isinstance(module, OneFormerPixelDecoder): nn.init.normal_(module.level_embed, std=0) - elif isinstance(module, OneFormerTransformerDecoderLayer): - for p in module.parameters(): - if p.dim() > 1: - nn.init.xavier_uniform_(p, gain=xavier_std) - elif isinstance(module, OneFormerTransformerDecoderQueryTransformer): + elif isinstance(module, (OneFormerTransformerDecoderLayer, OneFormerTransformerDecoderQueryTransformer)): for p in module.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p, gain=xavier_std) diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py index f5dbe71dad..cc69cf6d27 100644 --- a/src/transformers/pipelines/fill_mask.py +++ b/src/transformers/pipelines/fill_mask.py @@ -197,7 +197,7 @@ class FillMaskPipeline(Pipeline): vocab = {} target_ids = [] for target in targets: - id_ = vocab.get(target, None) + id_ = vocab.get(target) if id_ is None: input_ids = self.tokenizer( target, diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py index 22faadeddd..efa70ca185 100644 --- a/src/transformers/pipelines/token_classification.py +++ b/src/transformers/pipelines/token_classification.py @@ -427,9 +427,11 @@ class TokenClassificationPipeline(ChunkPipeline): if previous_entity["start"] <= entity["start"] < previous_entity["end"]: current_length = entity["end"] - entity["start"] previous_length = previous_entity["end"] - previous_entity["start"] - if current_length > previous_length: - previous_entity = entity - elif current_length == previous_length and entity["score"] > previous_entity["score"]: + if ( + current_length > previous_length + or current_length == previous_length + and entity["score"] > previous_entity["score"] + ): previous_entity = entity else: aggregated_entities.append(previous_entity) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 0580fa7287..4934d27fb6 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -2637,9 +2637,7 @@ def nested_simplify(obj, decimals=3): return nested_simplify(obj.tolist()) elif isinstance(obj, Mapping): return {nested_simplify(k, decimals): nested_simplify(v, decimals) for k, v in obj.items()} - elif isinstance(obj, (str, int, np.int64)): - return obj - elif obj is None: + elif isinstance(obj, (str, int, np.int64)) or obj is None: return obj elif is_torch_available() and isinstance(obj, torch.Tensor): return nested_simplify(obj.tolist(), decimals) diff --git a/src/transformers/tokenization_mistral_common.py b/src/transformers/tokenization_mistral_common.py index cd85a641cb..a362a7c8b0 100644 --- a/src/transformers/tokenization_mistral_common.py +++ b/src/transformers/tokenization_mistral_common.py @@ -1784,9 +1784,7 @@ class MistralCommonTokenizer(PushToHubMixin): pathlib_repo_file = Path(path) file_name = pathlib_repo_file.name suffix = "".join(pathlib_repo_file.suffixes) - if file_name == "tekken.json": - valid_tokenizer_files.append(file_name) - elif suffix in sentencepiece_suffixes: + if file_name == "tekken.json" or suffix in sentencepiece_suffixes: valid_tokenizer_files.append(file_name) if len(valid_tokenizer_files) == 0: diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 9eaf804fab..1528d085d6 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -877,9 +877,11 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): input_ids = [] for ids_or_pair_ids in batch_text_or_text_pairs: - if not isinstance(ids_or_pair_ids, (list, tuple)): - ids, pair_ids = ids_or_pair_ids, None - elif is_split_into_words and not isinstance(ids_or_pair_ids[0], (list, tuple)): + if ( + not isinstance(ids_or_pair_ids, (list, tuple)) + or is_split_into_words + and not isinstance(ids_or_pair_ids[0], (list, tuple)) + ): ids, pair_ids = ids_or_pair_ids, None else: ids, pair_ids = ids_or_pair_ids diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index e93e279052..c32516b167 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -153,9 +153,7 @@ def find_batch_size(tensors): result = find_batch_size(value) if result is not None: return result - elif isinstance(tensors, torch.Tensor): - return tensors.shape[0] if len(tensors.shape) >= 1 else None - elif isinstance(tensors, np.ndarray): + elif isinstance(tensors, (torch.Tensor, np.ndarray)): return tensors.shape[0] if len(tensors.shape) >= 1 else None @@ -634,10 +632,7 @@ class LengthGroupedSampler(Sampler): self.batch_size = batch_size if lengths is None: model_input_name = model_input_name if model_input_name is not None else "input_ids" - if ( - not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding)) - or model_input_name not in dataset[0] - ): + if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]: raise ValueError( "Can only automatically infer lengths for datasets whose items are dictionaries with an " f"'{model_input_name}' key." @@ -697,10 +692,7 @@ class DistributedLengthGroupedSampler(DistributedSampler): if lengths is None: model_input_name = model_input_name if model_input_name is not None else "input_ids" - if ( - not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding)) - or model_input_name not in dataset[0] - ): + if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]: raise ValueError( "Can only automatically infer lengths for datasets whose items are dictionaries with an " f"'{model_input_name}' key." diff --git a/src/transformers/utils/auto_docstring.py b/src/transformers/utils/auto_docstring.py index ba03cf9cfe..6f9e099944 100644 --- a/src/transformers/utils/auto_docstring.py +++ b/src/transformers/utils/auto_docstring.py @@ -1404,8 +1404,8 @@ def _process_regular_parameters( param_type = f"[`{class_name}`]" else: param_type = f"[`{param_type.split('.')[-1]}`]" - elif param_type == "" and False: # TODO: Enforce typing for all parameters - print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type") + # elif param_type == "" and False: # TODO: Enforce typing for all parameters + # print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type") param_type = param_type if "`" in param_type else f"`{param_type}`" # Format the parameter docstring if additional_info: @@ -1833,7 +1833,7 @@ def auto_class_docstring(cls, custom_intro=None, custom_args=None, checkpoint=No docstring += set_min_indent(f"\n{docstring_init}", indent_level) elif is_dataclass: # No init function, we have a data class - docstring += "\nArgs:\n" if not docstring_args else docstring_args + docstring += docstring_args if docstring_args else "\nArgs:\n" source_args_dict = get_args_doc_from_source(ModelOutputArgs) doc_class = cls.__doc__ if cls.__doc__ else "" documented_kwargs, _ = parse_docstring(doc_class) diff --git a/src/transformers/utils/fx.py b/src/transformers/utils/fx.py index 86bcf00c35..8a48cf9cbb 100755 --- a/src/transformers/utils/fx.py +++ b/src/transformers/utils/fx.py @@ -749,9 +749,7 @@ def create_wrapper( tracer = found_proxies[0].tracer if op_type == "call_function": target = function - elif op_type == "call_method": - target = function.__name__ - elif op_type == "get_attr": + elif op_type == "call_method" or op_type == "get_attr": target = function.__name__ else: raise ValueError(f"op_type {op_type} not supported.") diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index ea54336f14..163ee04109 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -451,9 +451,7 @@ def get_torch_major_and_minor_version() -> str: def is_torch_sdpa_available(): - if not is_torch_available(): - return False - elif _torch_version == "N/A": + if not is_torch_available() or _torch_version == "N/A": return False # NOTE: MLU is OK with non-contiguous inputs. @@ -467,9 +465,7 @@ def is_torch_sdpa_available(): def is_torch_flex_attn_available(): - if not is_torch_available(): - return False - elif _torch_version == "N/A": + if not is_torch_available() or _torch_version == "N/A": return False # TODO check if some bugs cause push backs on the exact version @@ -1088,7 +1084,7 @@ def is_ninja_available(): [ninja](https://ninja-build.org/) build system is available on the system, `False` otherwise. """ try: - subprocess.check_output("ninja --version".split()) + subprocess.check_output(["ninja", "--version"]) except Exception: return False else: diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index 1a4966b09e..99b1450a0d 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -1222,7 +1222,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus): # this is just inference, so no optimizer should be loaded # it only works for z3 (makes no sense with z1-z2) - fp32 = True if dtype == "fp32" else False + fp32 = dtype == "fp32" self.run_and_check( stage=ZERO3, dtype=dtype, @@ -1337,13 +1337,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus): if do_eval: actions += 1 - args.extend( - """ - --do_eval - --max_eval_samples 16 - --per_device_eval_batch_size 2 - """.split() - ) + args.extend(["--do_eval", "--max_eval_samples", "16", "--per_device_eval_batch_size", "2"]) assert actions > 0, "need at least do_train or do_eval for the test to run" diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index a31c23adfe..82d3cdb9c3 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -315,9 +315,7 @@ class TestTrainerExt(TestCasePlus): --eval_steps {str(eval_steps)} """.split() - args_predict = """ - --do_predict - """.split() + args_predict = ["--do_predict"] args = [] if do_train: @@ -330,11 +328,11 @@ class TestTrainerExt(TestCasePlus): args += args_predict if predict_with_generate: - args += "--predict_with_generate".split() + args += ["--predict_with_generate"] if do_train: if optim == "adafactor": - args += "--adafactor".split() + args += ["--adafactor"] else: args += f"--optim {optim}".split() diff --git a/tests/generation/test_fsdp.py b/tests/generation/test_fsdp.py index 9ecb431573..77e2de37c7 100644 --- a/tests/generation/test_fsdp.py +++ b/tests/generation/test_fsdp.py @@ -122,7 +122,7 @@ class TestFSDPGeneration(TestCasePlus): --master_port={get_torch_dist_unique_port()} {self.test_file_dir}/test_fsdp.py """.split() - args = "--fsdp".split() + args = ["--fsdp"] cmd = ["torchrun"] + distributed_args + args execute_subprocess_async(cmd, env=self.get_env()) # successful return here == success - any errors would have caused an error in the sub-call @@ -135,7 +135,7 @@ class TestFSDPGeneration(TestCasePlus): --master_port={get_torch_dist_unique_port()} {self.test_file_dir}/test_fsdp.py """.split() - args = "--fsdp2".split() + args = ["--fsdp2"] cmd = ["torchrun"] + distributed_args + args execute_subprocess_async(cmd, env=self.get_env()) # successful return here == success - any errors would have caused an error in the sub-call diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index a0e2911789..6f82b14853 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -5017,7 +5017,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase): def get_commit_history(self, repo): commit_logs = subprocess.run( - "git log".split(), + ["git", "log"], capture_output=True, check=True, encoding="utf-8",