diff --git a/pyproject.toml b/pyproject.toml
index 83a4bf3ad3..b8f4676a2b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,10 +19,13 @@ line-length = 119
[tool.ruff.lint]
# Never enforce `E501` (line length violations).
-ignore = ["C901", "E501", "E741", "F402", "F823"]
+# SIM300: Yoda condition detected
+# SIM212: Checks for if expressions that check against a negated condition.
+# SIM905: Consider using a list literal instead of `str.split`
+ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905"]
# RUF013: Checks for the use of implicit Optional
# in type annotations when the default parameter value is None.
-select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208"]
+select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208","SIM"]
extend-safe-fixes = ["UP006"]
# Ignore import violations in all `__init__.py` files.
diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py
index c3f9bc2838..801901896d 100644
--- a/src/transformers/commands/serving.py
+++ b/src/transformers/commands/serving.py
@@ -901,7 +901,7 @@ class ServeCommand(BaseTransformersCLICommand):
inputs = processor.apply_chat_template(
processor_inputs,
add_generation_prompt=True,
- tools=req.get("tools", None),
+ tools=req.get("tools"),
return_tensors="pt",
return_dict=True,
tokenize=True,
diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py
index 18f2980a38..92dbacbe5d 100644
--- a/src/transformers/data/data_collator.py
+++ b/src/transformers/data/data_collator.py
@@ -183,7 +183,7 @@ def tf_default_data_collator(features: list[InputDataClass]) -> dict[str, Any]:
if label_col_name is not None:
if isinstance(first[label_col_name], tf.Tensor):
dtype = tf.int64 if first[label_col_name].dtype.is_integer else tf.float32
- elif isinstance(first[label_col_name], np.ndarray) or isinstance(first[label_col_name], np.generic):
+ elif isinstance(first[label_col_name], (np.ndarray, np.generic)):
dtype = tf.int64 if np.issubdtype(first[label_col_name].dtype, np.integer) else tf.float32
elif isinstance(first[label_col_name], (tuple, list)):
dtype = tf.int64 if isinstance(first[label_col_name][0], int) else tf.float32
diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
index 8c4d5369c1..177cfeeda6 100644
--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -647,8 +647,8 @@ class GenerationMixin(ContinuousMixin):
# If it's not defined, it means the model uses the new general mask API
if causal_mask_creation_function is None: # can't be found
- token_type_ids = model_inputs.get("token_type_ids", None)
- position_ids = model_inputs.get(position_ids_key, None)
+ token_type_ids = model_inputs.get("token_type_ids")
+ position_ids = model_inputs.get(position_ids_key)
# Some models may overwrite the general one
causal_mask_creation_function = getattr(self, "create_masks_for_generate", create_masks_for_generate)
attention_mask = causal_mask_creation_function(
diff --git a/src/transformers/integrations/flex_attention.py b/src/transformers/integrations/flex_attention.py
index 31d3b3b14d..298a115517 100644
--- a/src/transformers/integrations/flex_attention.py
+++ b/src/transformers/integrations/flex_attention.py
@@ -284,7 +284,7 @@ def flex_attention_forward(
num_local_query_heads = query.shape[1]
# When running TP this helps:
- if not ((num_local_query_heads & (num_local_query_heads - 1)) == 0):
+ if (num_local_query_heads & (num_local_query_heads - 1)) != 0:
key = repeat_kv(key, query.shape[1] // key.shape[1])
value = repeat_kv(value, query.shape[1] // value.shape[1])
enable_gqa = False
diff --git a/src/transformers/integrations/vptq.py b/src/transformers/integrations/vptq.py
index 643fa91e65..00b55c6bb7 100644
--- a/src/transformers/integrations/vptq.py
+++ b/src/transformers/integrations/vptq.py
@@ -45,7 +45,7 @@ def replace_with_vptq_linear(
should not be passed by the user.
"""
- modules_to_not_convert = ["lm_head"] if not modules_to_not_convert else modules_to_not_convert
+ modules_to_not_convert = modules_to_not_convert if modules_to_not_convert else ["lm_head"]
for name, module in model.named_children():
if current_key_name is None:
diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py
index b6669a7b45..ab7fc4615b 100644
--- a/src/transformers/keras_callbacks.py
+++ b/src/transformers/keras_callbacks.py
@@ -167,7 +167,7 @@ class KerasMetricCallback(keras.callbacks.Callback):
# If it's a dict with only one key, just return the array
if len(outputs) == 1:
outputs = list(outputs.values())[0]
- elif isinstance(inputs[0], list) or isinstance(inputs[0], tuple):
+ elif isinstance(inputs[0], (tuple, list)):
outputs = []
for input_list in zip(*inputs):
outputs.append(self._concatenate_batches(input_list))
diff --git a/src/transformers/models/aya_vision/configuration_aya_vision.py b/src/transformers/models/aya_vision/configuration_aya_vision.py
index 1b79c156a9..a8c1965ec4 100644
--- a/src/transformers/models/aya_vision/configuration_aya_vision.py
+++ b/src/transformers/models/aya_vision/configuration_aya_vision.py
@@ -81,9 +81,7 @@ class AyaVisionConfig(PretrainedConfig):
self.vision_feature_layer = vision_feature_layer
if isinstance(vision_config, dict):
- vision_config["model_type"] = (
- vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
- )
+ vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model")
vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
elif vision_config is None:
vision_config = CONFIG_MAPPING["siglip_vision_model"](
@@ -99,7 +97,7 @@ class AyaVisionConfig(PretrainedConfig):
self.vision_config = vision_config
if isinstance(text_config, dict):
- text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2"
+ text_config["model_type"] = text_config.get("model_type", "cohere2")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["cohere2"]()
diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
index 8da189b1b3..616e9ed665 100755
--- a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
@@ -148,7 +148,7 @@ def rewrite_dict_keys(d):
# (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up,
# e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er': 7}
d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "", k), v) for k, v in d.items())
- keep_keys = " ".split()
+ keep_keys = ["", "", "", ""]
# restore the special tokens
for k in keep_keys:
del d2[f"{k}"]
diff --git a/src/transformers/models/clvp/modeling_clvp.py b/src/transformers/models/clvp/modeling_clvp.py
index 314c18c4d0..c8077eb871 100644
--- a/src/transformers/models/clvp/modeling_clvp.py
+++ b/src/transformers/models/clvp/modeling_clvp.py
@@ -1303,7 +1303,7 @@ class ClvpForCausalLM(ClvpPreTrainedModel, GenerationMixin):
# Check if conditioning_embeds are provided or not, if yes then concatenate the bos_token_id at the end of the conditioning_embeds.
# Then we must subtract the positional_ids because during the forward pass it will be added anyways, so we must cancel them out here.
- conditioning_embeds = model_kwargs.get("conditioning_embeds", None)
+ conditioning_embeds = model_kwargs.get("conditioning_embeds")
if conditioning_embeds is not None:
mel_start_token_embedding = self.model.decoder.input_embeds_layer(
diff --git a/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py b/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py
index e4e670e4a6..acc40fcf85 100644
--- a/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py
+++ b/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py
@@ -57,9 +57,7 @@ class Cohere2VisionConfig(PretrainedConfig):
self.alignment_intermediate_size = alignment_intermediate_size
if isinstance(vision_config, dict):
- vision_config["model_type"] = (
- vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
- )
+ vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model")
vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
elif vision_config is None:
vision_config = CONFIG_MAPPING["siglip_vision_model"](
@@ -73,7 +71,7 @@ class Cohere2VisionConfig(PretrainedConfig):
self.vision_config = vision_config
if isinstance(text_config, dict):
- text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2"
+ text_config["model_type"] = text_config.get("model_type", "cohere2")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["cohere2"](tie_word_embeddings=True)
diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py
index 5bebf43c9b..f82b01de48 100644
--- a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py
+++ b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py
@@ -66,7 +66,7 @@ class DeepseekVLImageProcessorFast(BaseImageProcessorFast):
def __init__(self, **kwargs: Unpack[DeepseekVLFastImageProcessorKwargs]):
super().__init__(**kwargs)
- if kwargs.get("image_mean", None) is None:
+ if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py
index 2120a65dd4..d0b22c70dc 100644
--- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py
+++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py
@@ -93,11 +93,11 @@ class DeepseekVLHybridImageProcessorFast(BaseImageProcessorFast):
high_res_resample = PILImageResampling.BICUBIC
def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
- if kwargs.get("image_mean", None) is None:
+ if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
- if kwargs.get("high_res_image_mean", None) is None:
+ if kwargs.get("high_res_image_mean") is None:
high_res_background_color = (127, 127, 127)
else:
high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])
diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py
index e3149b4202..058bbc4b8a 100644
--- a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py
+++ b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py
@@ -749,11 +749,11 @@ class DeepseekVLHybridImageProcessorFast(DeepseekVLImageProcessorFast):
high_res_resample = PILImageResampling.BICUBIC
def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
- if kwargs.get("image_mean", None) is None:
+ if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
- if kwargs.get("high_res_image_mean", None) is None:
+ if kwargs.get("high_res_image_mean") is None:
high_res_background_color = (127, 127, 127)
else:
high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])
diff --git a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
index 35e8265850..07a83a1cb0 100755
--- a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
@@ -79,7 +79,7 @@ def rewrite_dict_keys(d):
# (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up,
# e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er': 7}
d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "", k), v) for k, v in d.items())
- keep_keys = " ".split()
+ keep_keys = ["", "", "", ""]
# restore the special tokens
for k in keep_keys:
del d2[f"{k}"]
diff --git a/src/transformers/models/janus/image_processing_janus_fast.py b/src/transformers/models/janus/image_processing_janus_fast.py
index 81f9bafed7..17df6ba9ea 100644
--- a/src/transformers/models/janus/image_processing_janus_fast.py
+++ b/src/transformers/models/janus/image_processing_janus_fast.py
@@ -71,7 +71,7 @@ class JanusImageProcessorFast(BaseImageProcessorFast):
valid_kwargs = JanusFastImageProcessorKwargs
def __init__(self, **kwargs: Unpack[JanusFastImageProcessorKwargs]):
- if kwargs.get("image_mean", None) is None:
+ if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py
index 2159761547..615aacdf87 100644
--- a/src/transformers/models/jetmoe/modeling_jetmoe.py
+++ b/src/transformers/models/jetmoe/modeling_jetmoe.py
@@ -857,9 +857,7 @@ class JetMoePreTrainedModel(PreTrainedModel):
module.weight.data.fill_(1.0)
elif isinstance(module, JetMoeParallelExperts):
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
- elif isinstance(module, JetMoeMoA):
- module.bias.data.zero_()
- elif isinstance(module, JetMoeMoE):
+ elif isinstance(module, (JetMoeMoA, JetMoeMoE)):
module.bias.data.zero_()
diff --git a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py
index 193faecf39..e49ccde7e2 100644
--- a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py
+++ b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py
@@ -260,7 +260,7 @@ class MMGroundingDinoConfig(PretrainedConfig):
self.disable_custom_kernels = disable_custom_kernels
# Text backbone
if isinstance(text_config, dict):
- text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert"
+ text_config["model_type"] = text_config.get("model_type", "bert")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["bert"]()
diff --git a/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py
index 6fc13df410..aea644fdd6 100644
--- a/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py
+++ b/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py
@@ -268,7 +268,7 @@ class MMGroundingDinoConfig(GroundingDinoConfig, PretrainedConfig):
self.disable_custom_kernels = disable_custom_kernels
# Text backbone
if isinstance(text_config, dict):
- text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert"
+ text_config["model_type"] = text_config.get("model_type", "bert")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["bert"]()
diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py
index 53160467c7..d6af3e57e5 100644
--- a/src/transformers/models/oneformer/modeling_oneformer.py
+++ b/src/transformers/models/oneformer/modeling_oneformer.py
@@ -2794,11 +2794,7 @@ class OneFormerPreTrainedModel(PreTrainedModel):
nn.init.constant_(module.output_proj.bias.data, 0.0)
elif isinstance(module, OneFormerPixelDecoder):
nn.init.normal_(module.level_embed, std=0)
- elif isinstance(module, OneFormerTransformerDecoderLayer):
- for p in module.parameters():
- if p.dim() > 1:
- nn.init.xavier_uniform_(p, gain=xavier_std)
- elif isinstance(module, OneFormerTransformerDecoderQueryTransformer):
+ elif isinstance(module, (OneFormerTransformerDecoderLayer, OneFormerTransformerDecoderQueryTransformer)):
for p in module.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p, gain=xavier_std)
diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py
index f5dbe71dad..cc69cf6d27 100644
--- a/src/transformers/pipelines/fill_mask.py
+++ b/src/transformers/pipelines/fill_mask.py
@@ -197,7 +197,7 @@ class FillMaskPipeline(Pipeline):
vocab = {}
target_ids = []
for target in targets:
- id_ = vocab.get(target, None)
+ id_ = vocab.get(target)
if id_ is None:
input_ids = self.tokenizer(
target,
diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py
index 22faadeddd..efa70ca185 100644
--- a/src/transformers/pipelines/token_classification.py
+++ b/src/transformers/pipelines/token_classification.py
@@ -427,9 +427,11 @@ class TokenClassificationPipeline(ChunkPipeline):
if previous_entity["start"] <= entity["start"] < previous_entity["end"]:
current_length = entity["end"] - entity["start"]
previous_length = previous_entity["end"] - previous_entity["start"]
- if current_length > previous_length:
- previous_entity = entity
- elif current_length == previous_length and entity["score"] > previous_entity["score"]:
+ if (
+ current_length > previous_length
+ or current_length == previous_length
+ and entity["score"] > previous_entity["score"]
+ ):
previous_entity = entity
else:
aggregated_entities.append(previous_entity)
diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
index 0580fa7287..4934d27fb6 100644
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -2637,9 +2637,7 @@ def nested_simplify(obj, decimals=3):
return nested_simplify(obj.tolist())
elif isinstance(obj, Mapping):
return {nested_simplify(k, decimals): nested_simplify(v, decimals) for k, v in obj.items()}
- elif isinstance(obj, (str, int, np.int64)):
- return obj
- elif obj is None:
+ elif isinstance(obj, (str, int, np.int64)) or obj is None:
return obj
elif is_torch_available() and isinstance(obj, torch.Tensor):
return nested_simplify(obj.tolist(), decimals)
diff --git a/src/transformers/tokenization_mistral_common.py b/src/transformers/tokenization_mistral_common.py
index cd85a641cb..a362a7c8b0 100644
--- a/src/transformers/tokenization_mistral_common.py
+++ b/src/transformers/tokenization_mistral_common.py
@@ -1784,9 +1784,7 @@ class MistralCommonTokenizer(PushToHubMixin):
pathlib_repo_file = Path(path)
file_name = pathlib_repo_file.name
suffix = "".join(pathlib_repo_file.suffixes)
- if file_name == "tekken.json":
- valid_tokenizer_files.append(file_name)
- elif suffix in sentencepiece_suffixes:
+ if file_name == "tekken.json" or suffix in sentencepiece_suffixes:
valid_tokenizer_files.append(file_name)
if len(valid_tokenizer_files) == 0:
diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
index 9eaf804fab..1528d085d6 100644
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -877,9 +877,11 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
input_ids = []
for ids_or_pair_ids in batch_text_or_text_pairs:
- if not isinstance(ids_or_pair_ids, (list, tuple)):
- ids, pair_ids = ids_or_pair_ids, None
- elif is_split_into_words and not isinstance(ids_or_pair_ids[0], (list, tuple)):
+ if (
+ not isinstance(ids_or_pair_ids, (list, tuple))
+ or is_split_into_words
+ and not isinstance(ids_or_pair_ids[0], (list, tuple))
+ ):
ids, pair_ids = ids_or_pair_ids, None
else:
ids, pair_ids = ids_or_pair_ids
diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py
index e93e279052..c32516b167 100644
--- a/src/transformers/trainer_pt_utils.py
+++ b/src/transformers/trainer_pt_utils.py
@@ -153,9 +153,7 @@ def find_batch_size(tensors):
result = find_batch_size(value)
if result is not None:
return result
- elif isinstance(tensors, torch.Tensor):
- return tensors.shape[0] if len(tensors.shape) >= 1 else None
- elif isinstance(tensors, np.ndarray):
+ elif isinstance(tensors, (torch.Tensor, np.ndarray)):
return tensors.shape[0] if len(tensors.shape) >= 1 else None
@@ -634,10 +632,7 @@ class LengthGroupedSampler(Sampler):
self.batch_size = batch_size
if lengths is None:
model_input_name = model_input_name if model_input_name is not None else "input_ids"
- if (
- not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding))
- or model_input_name not in dataset[0]
- ):
+ if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]:
raise ValueError(
"Can only automatically infer lengths for datasets whose items are dictionaries with an "
f"'{model_input_name}' key."
@@ -697,10 +692,7 @@ class DistributedLengthGroupedSampler(DistributedSampler):
if lengths is None:
model_input_name = model_input_name if model_input_name is not None else "input_ids"
- if (
- not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding))
- or model_input_name not in dataset[0]
- ):
+ if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]:
raise ValueError(
"Can only automatically infer lengths for datasets whose items are dictionaries with an "
f"'{model_input_name}' key."
diff --git a/src/transformers/utils/auto_docstring.py b/src/transformers/utils/auto_docstring.py
index ba03cf9cfe..6f9e099944 100644
--- a/src/transformers/utils/auto_docstring.py
+++ b/src/transformers/utils/auto_docstring.py
@@ -1404,8 +1404,8 @@ def _process_regular_parameters(
param_type = f"[`{class_name}`]"
else:
param_type = f"[`{param_type.split('.')[-1]}`]"
- elif param_type == "" and False: # TODO: Enforce typing for all parameters
- print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type")
+ # elif param_type == "" and False: # TODO: Enforce typing for all parameters
+ # print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type")
param_type = param_type if "`" in param_type else f"`{param_type}`"
# Format the parameter docstring
if additional_info:
@@ -1833,7 +1833,7 @@ def auto_class_docstring(cls, custom_intro=None, custom_args=None, checkpoint=No
docstring += set_min_indent(f"\n{docstring_init}", indent_level)
elif is_dataclass:
# No init function, we have a data class
- docstring += "\nArgs:\n" if not docstring_args else docstring_args
+ docstring += docstring_args if docstring_args else "\nArgs:\n"
source_args_dict = get_args_doc_from_source(ModelOutputArgs)
doc_class = cls.__doc__ if cls.__doc__ else ""
documented_kwargs, _ = parse_docstring(doc_class)
diff --git a/src/transformers/utils/fx.py b/src/transformers/utils/fx.py
index 86bcf00c35..8a48cf9cbb 100755
--- a/src/transformers/utils/fx.py
+++ b/src/transformers/utils/fx.py
@@ -749,9 +749,7 @@ def create_wrapper(
tracer = found_proxies[0].tracer
if op_type == "call_function":
target = function
- elif op_type == "call_method":
- target = function.__name__
- elif op_type == "get_attr":
+ elif op_type == "call_method" or op_type == "get_attr":
target = function.__name__
else:
raise ValueError(f"op_type {op_type} not supported.")
diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py
index ea54336f14..163ee04109 100644
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@@ -451,9 +451,7 @@ def get_torch_major_and_minor_version() -> str:
def is_torch_sdpa_available():
- if not is_torch_available():
- return False
- elif _torch_version == "N/A":
+ if not is_torch_available() or _torch_version == "N/A":
return False
# NOTE: MLU is OK with non-contiguous inputs.
@@ -467,9 +465,7 @@ def is_torch_sdpa_available():
def is_torch_flex_attn_available():
- if not is_torch_available():
- return False
- elif _torch_version == "N/A":
+ if not is_torch_available() or _torch_version == "N/A":
return False
# TODO check if some bugs cause push backs on the exact version
@@ -1088,7 +1084,7 @@ def is_ninja_available():
[ninja](https://ninja-build.org/) build system is available on the system, `False` otherwise.
"""
try:
- subprocess.check_output("ninja --version".split())
+ subprocess.check_output(["ninja", "--version"])
except Exception:
return False
else:
diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py
index 1a4966b09e..99b1450a0d 100644
--- a/tests/deepspeed/test_deepspeed.py
+++ b/tests/deepspeed/test_deepspeed.py
@@ -1222,7 +1222,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
# this is just inference, so no optimizer should be loaded
# it only works for z3 (makes no sense with z1-z2)
- fp32 = True if dtype == "fp32" else False
+ fp32 = dtype == "fp32"
self.run_and_check(
stage=ZERO3,
dtype=dtype,
@@ -1337,13 +1337,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
if do_eval:
actions += 1
- args.extend(
- """
- --do_eval
- --max_eval_samples 16
- --per_device_eval_batch_size 2
- """.split()
- )
+ args.extend(["--do_eval", "--max_eval_samples", "16", "--per_device_eval_batch_size", "2"])
assert actions > 0, "need at least do_train or do_eval for the test to run"
diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py
index a31c23adfe..82d3cdb9c3 100644
--- a/tests/extended/test_trainer_ext.py
+++ b/tests/extended/test_trainer_ext.py
@@ -315,9 +315,7 @@ class TestTrainerExt(TestCasePlus):
--eval_steps {str(eval_steps)}
""".split()
- args_predict = """
- --do_predict
- """.split()
+ args_predict = ["--do_predict"]
args = []
if do_train:
@@ -330,11 +328,11 @@ class TestTrainerExt(TestCasePlus):
args += args_predict
if predict_with_generate:
- args += "--predict_with_generate".split()
+ args += ["--predict_with_generate"]
if do_train:
if optim == "adafactor":
- args += "--adafactor".split()
+ args += ["--adafactor"]
else:
args += f"--optim {optim}".split()
diff --git a/tests/generation/test_fsdp.py b/tests/generation/test_fsdp.py
index 9ecb431573..77e2de37c7 100644
--- a/tests/generation/test_fsdp.py
+++ b/tests/generation/test_fsdp.py
@@ -122,7 +122,7 @@ class TestFSDPGeneration(TestCasePlus):
--master_port={get_torch_dist_unique_port()}
{self.test_file_dir}/test_fsdp.py
""".split()
- args = "--fsdp".split()
+ args = ["--fsdp"]
cmd = ["torchrun"] + distributed_args + args
execute_subprocess_async(cmd, env=self.get_env())
# successful return here == success - any errors would have caused an error in the sub-call
@@ -135,7 +135,7 @@ class TestFSDPGeneration(TestCasePlus):
--master_port={get_torch_dist_unique_port()}
{self.test_file_dir}/test_fsdp.py
""".split()
- args = "--fsdp2".split()
+ args = ["--fsdp2"]
cmd = ["torchrun"] + distributed_args + args
execute_subprocess_async(cmd, env=self.get_env())
# successful return here == success - any errors would have caused an error in the sub-call
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index a0e2911789..6f82b14853 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -5017,7 +5017,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
def get_commit_history(self, repo):
commit_logs = subprocess.run(
- "git log".split(),
+ ["git", "log"],
capture_output=True,
check=True,
encoding="utf-8",