diff --git a/examples/modular-transformers/modeling_test_detr.py b/examples/modular-transformers/modeling_test_detr.py index 910d568a1e..e1286c3256 100644 --- a/examples/modular-transformers/modeling_test_detr.py +++ b/examples/modular-transformers/modeling_test_detr.py @@ -202,7 +202,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = TestDetrFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index da09963028..b1049772b7 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -495,7 +495,7 @@ def main(): # region Training and validation if training_args.do_train: - if training_args.do_eval and not data_args.task_name == "mnli": + if training_args.do_eval and data_args.task_name != "mnli": # Do both evaluation and training in the Keras fit loop, unless the task is MNLI # because MNLI has two validation sets validation_data = tf_data["validation"] diff --git a/src/transformers/debug_utils.py b/src/transformers/debug_utils.py index 100ee85121..920b1cf44d 100644 --- a/src/transformers/debug_utils.py +++ b/src/transformers/debug_utils.py @@ -248,7 +248,7 @@ class DebugUnderflowOverflow: last_frame_of_batch = False - trace_mode = True if self.batch_number in self.trace_batch_nums else False + trace_mode = self.batch_number in self.trace_batch_nums if trace_mode: self.reset_saved_frames() diff --git a/src/transformers/generation/beam_search.py b/src/transformers/generation/beam_search.py index 9db8694c23..b6647760b7 100644 --- a/src/transformers/generation/beam_search.py +++ b/src/transformers/generation/beam_search.py @@ -228,7 +228,7 @@ class BeamSearchScorer(BeamScorer): cur_len = input_ids.shape[-1] + 1 batch_size = len(self._beam_hyps) // self.num_beam_groups - if not (batch_size == (input_ids.shape[0] // self.group_size)): + if batch_size != (input_ids.shape[0] // self.group_size): if self.num_beam_groups > 1: raise ValueError( f"A group beam size of {input_ids.shape[0]} is used as the input, but a group beam " @@ -564,7 +564,7 @@ class ConstrainedBeamSearchScorer(BeamScorer): # add up to the length which the next_scores is calculated on (including decoder prompt) cur_len = input_ids.shape[-1] + 1 batch_size = len(self._beam_hyps) - if not (batch_size == (input_ids.shape[0] // self.group_size)): + if batch_size != (input_ids.shape[0] // self.group_size): if self.num_beam_groups > 1: raise ValueError( f"A group beam size of {input_ids.shape[0]} is used as the input, but a group beam " diff --git a/src/transformers/generation/tf_logits_process.py b/src/transformers/generation/tf_logits_process.py index c88ea5afcc..436793c402 100644 --- a/src/transformers/generation/tf_logits_process.py +++ b/src/transformers/generation/tf_logits_process.py @@ -435,9 +435,7 @@ class TFNoRepeatNGramLogitsProcessor(TFLogitsProcessor): # create banned_tokens boolean mask banned_tokens_indices_mask = [] for banned_tokens_slice in banned_tokens: - banned_tokens_indices_mask.append( - [True if token in banned_tokens_slice else False for token in range(vocab_size)] - ) + banned_tokens_indices_mask.append([token in banned_tokens_slice for token in range(vocab_size)]) scores = tf.where(tf.convert_to_tensor(banned_tokens_indices_mask, dtype=tf.bool), -float("inf"), scores) diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index 7e51bfeaec..9d22ee818e 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -833,7 +833,7 @@ class ImageFeatureExtractionMixin: return image.crop((left, top, right, bottom)) # Check if image is in (n_channels, height, width) or (height, width, n_channels) format - channel_first = True if image.shape[0] in [1, 3] else False + channel_first = image.shape[0] in [1, 3] # Transpose (height, width, n_channels) format images if not channel_first: diff --git a/src/transformers/modeling_rope_utils.py b/src/transformers/modeling_rope_utils.py index 59989aa592..bdb1dd64ce 100644 --- a/src/transformers/modeling_rope_utils.py +++ b/src/transformers/modeling_rope_utils.py @@ -508,13 +508,13 @@ def _validate_longrope_parameters(config: PretrainedConfig, ignore_keys: Optiona short_factor = rope_scaling.get("short_factor") if not isinstance(short_factor, list) and all(isinstance(x, (int, float)) for x in short_factor): logger.warning(f"`rope_scaling`'s short_factor field must be a list of numbers, got {short_factor}") - if not len(short_factor) == dim // 2: + if len(short_factor) != dim // 2: logger.warning(f"`rope_scaling`'s short_factor field must have length {dim // 2}, got {len(short_factor)}") long_factor = rope_scaling.get("long_factor") if not isinstance(long_factor, list) and all(isinstance(x, (int, float)) for x in long_factor): logger.warning(f"`rope_scaling`'s long_factor field must be a list of numbers, got {long_factor}") - if not len(long_factor) == dim // 2: + if len(long_factor) != dim // 2: logger.warning(f"`rope_scaling`'s long_factor field must have length {dim // 2}, got {len(long_factor)}") # Handle Phi3 divergence: prefer the use of `attention_factor` and/or `factor` over diff --git a/src/transformers/models/align/modeling_align.py b/src/transformers/models/align/modeling_align.py index 24cc3639ce..a9e6fa5c7c 100644 --- a/src/transformers/models/align/modeling_align.py +++ b/src/transformers/models/align/modeling_align.py @@ -394,7 +394,7 @@ class AlignVisionBlock(nn.Module): ): super().__init__() self.expand_ratio = expand_ratio - self.expand = True if self.expand_ratio != 1 else False + self.expand = self.expand_ratio != 1 expand_in_dim = in_dim * expand_ratio if self.expand: @@ -464,10 +464,10 @@ class AlignVisionEncoder(nn.Module): expand_ratio = config.expand_ratios[i] for j in range(round_repeats(config.num_block_repeats[i])): - id_skip = True if j == 0 else False + id_skip = j == 0 stride = 1 if j > 0 else stride in_dim = out_dim if j > 0 else in_dim - adjust_padding = False if curr_block_num in config.depthwise_padding else True + adjust_padding = curr_block_num not in config.depthwise_padding drop_rate = config.drop_connect_rate * curr_block_num / num_blocks block = AlignVisionBlock( diff --git a/src/transformers/models/aria/image_processing_aria.py b/src/transformers/models/aria/image_processing_aria.py index 4d0ae92dd0..8db238d66f 100644 --- a/src/transformers/models/aria/image_processing_aria.py +++ b/src/transformers/models/aria/image_processing_aria.py @@ -515,8 +515,8 @@ class AriaImageProcessor(BaseImageProcessor): Returns: `int`: Number of patches per image. """ - split_image = images_kwargs["split_image"] if "split_image" in images_kwargs else self.split_image - max_image_size = images_kwargs["max_image_size"] if "max_image_size" in images_kwargs else self.max_image_size + split_image = images_kwargs.get("split_image", self.split_image) + max_image_size = images_kwargs.get("max_image_size", self.max_image_size) resized_height, resized_width = select_best_resolution((height, width), self.split_resolutions) num_patches = 1 if not split_image else resized_height // max_image_size * resized_width // max_image_size diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index c27916a0df..4a19c3387d 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -901,8 +901,8 @@ class AriaImageProcessor(BaseImageProcessor): Returns: `int`: Number of patches per image. """ - split_image = images_kwargs["split_image"] if "split_image" in images_kwargs else self.split_image - max_image_size = images_kwargs["max_image_size"] if "max_image_size" in images_kwargs else self.max_image_size + split_image = images_kwargs.get("split_image", self.split_image) + max_image_size = images_kwargs.get("max_image_size", self.max_image_size) resized_height, resized_width = select_best_resolution((height, width), self.split_resolutions) num_patches = 1 if not split_image else resized_height // max_image_size * resized_width // max_image_size diff --git a/src/transformers/models/aya_vision/configuration_aya_vision.py b/src/transformers/models/aya_vision/configuration_aya_vision.py index ad7fdfd319..41c472b909 100644 --- a/src/transformers/models/aya_vision/configuration_aya_vision.py +++ b/src/transformers/models/aya_vision/configuration_aya_vision.py @@ -81,9 +81,7 @@ class AyaVisionConfig(PretrainedConfig): self.vision_feature_layer = vision_feature_layer if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "clip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "clip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["siglip_vision_model"]( @@ -99,7 +97,7 @@ class AyaVisionConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["cohere2"]() diff --git a/src/transformers/models/bark/configuration_bark.py b/src/transformers/models/bark/configuration_bark.py index 5bbc0d5a77..25787a90d6 100644 --- a/src/transformers/models/bark/configuration_bark.py +++ b/src/transformers/models/bark/configuration_bark.py @@ -269,7 +269,7 @@ class BarkConfig(PretrainedConfig): self.semantic_config = BarkSemanticConfig(**semantic_config) self.coarse_acoustics_config = BarkCoarseConfig(**coarse_acoustics_config) self.fine_acoustics_config = BarkFineConfig(**fine_acoustics_config) - codec_model_type = codec_config["model_type"] if "model_type" in codec_config else "encodec" + codec_model_type = codec_config.get("model_type", "encodec") self.codec_config = CONFIG_MAPPING[codec_model_type](**codec_config) self.initializer_range = initializer_range diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 97387cdfc0..ba9433126f 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -414,9 +414,7 @@ class BertSdpaSelfAttention(BertSelfAttention): # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create # a causal mask in case tgt_len == 1. - is_causal = ( - True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False - ) + is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index affbc3335e..8b83d9e439 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -96,9 +96,7 @@ def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False): name_items[0] = f"bert/encoder/layer_{layer_name_items[2]}" - name = "/".join([_TRIVIA_QA_MAPPING[x] if x in _TRIVIA_QA_MAPPING else x for x in name_items])[ - :-2 - ] # remove last :0 in variable + name = "/".join([_TRIVIA_QA_MAPPING.get(x, x) for x in name_items])[:-2] # remove last :0 in variable if "self/attention/output" in name: name = name.replace("self/attention/output", "output") diff --git a/src/transformers/models/bigbird_pegasus/convert_bigbird_pegasus_tf_to_pytorch.py b/src/transformers/models/bigbird_pegasus/convert_bigbird_pegasus_tf_to_pytorch.py index 686277b8e8..d0a312ebc1 100644 --- a/src/transformers/models/bigbird_pegasus/convert_bigbird_pegasus_tf_to_pytorch.py +++ b/src/transformers/models/bigbird_pegasus/convert_bigbird_pegasus_tf_to_pytorch.py @@ -103,7 +103,7 @@ def convert_bigbird_pegasus(tf_weights: dict, config_update: dict) -> BigBirdPeg new_k = rename_state_dict_key(k, patterns) if new_k not in state_dict: raise ValueError(f"could not find new key {new_k} in state dict. (converted from {k})") - if any(True if i in k else False for i in ["dense", "query", "key", "value"]): + if any(i in k for i in ["dense", "query", "key", "value"]): v = v.T mapping[new_k] = torch.from_numpy(v) assert v.shape == state_dict[new_k].shape, f"{new_k}, {k}, {v.shape}, {state_dict[new_k].shape}" @@ -116,7 +116,7 @@ def convert_bigbird_pegasus(tf_weights: dict, config_update: dict) -> BigBirdPeg new_k = rename_state_dict_key(k, patterns) if new_k not in state_dict and k != "pegasus/embeddings/position_embeddings": raise ValueError(f"could not find new key {new_k} in state dict. (converted from {k})") - if any(True if i in k else False for i in ["dense", "query", "key", "value"]): + if any(i in k for i in ["dense", "query", "key", "value"]): v = v.T mapping[new_k] = torch.from_numpy(v) if k != "pegasus/embeddings/position_embeddings": diff --git a/src/transformers/models/bit/modeling_bit.py b/src/transformers/models/bit/modeling_bit.py index 140a2e7b52..98faa9ebea 100644 --- a/src/transformers/models/bit/modeling_bit.py +++ b/src/transformers/models/bit/modeling_bit.py @@ -252,7 +252,7 @@ class BitEmbeddings(nn.Module): else: self.pad = nn.ConstantPad2d(padding=(1, 1, 1, 1), value=0.0) - if not config.layer_type == "preactivation": + if config.layer_type != "preactivation": self.norm = BitGroupNormActivation(config, num_channels=config.embedding_size) else: self.norm = nn.Identity() diff --git a/src/transformers/models/blip_2/configuration_blip_2.py b/src/transformers/models/blip_2/configuration_blip_2.py index 9b4a48a21c..ff7e629887 100644 --- a/src/transformers/models/blip_2/configuration_blip_2.py +++ b/src/transformers/models/blip_2/configuration_blip_2.py @@ -304,7 +304,7 @@ class Blip2Config(PretrainedConfig): self.vision_config = Blip2VisionConfig(**vision_config) self.qformer_config = Blip2QFormerConfig(**qformer_config) - text_model_type = text_config["model_type"] if "model_type" in text_config else "opt" + text_model_type = text_config.get("model_type", "opt") self.text_config = CONFIG_MAPPING[text_model_type](**text_config) self.num_query_tokens = num_query_tokens diff --git a/src/transformers/models/camembert/modeling_camembert.py b/src/transformers/models/camembert/modeling_camembert.py index e729505186..f5467c6c1f 100644 --- a/src/transformers/models/camembert/modeling_camembert.py +++ b/src/transformers/models/camembert/modeling_camembert.py @@ -365,9 +365,7 @@ class CamembertSdpaSelfAttention(CamembertSelfAttention): # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create # a causal mask in case tgt_len == 1. - is_causal = ( - True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False - ) + is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, diff --git a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py index dbfa823231..7f026c9a30 100644 --- a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py +++ b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py @@ -323,8 +323,8 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1): state_dict[f"model.vqmodel.{k}"] = v # Write configs - ffn_dim_multiplier = params["ffn_dim_multiplier"] if "ffn_dim_multiplier" in params else 1 - multiple_of = params["multiple_of"] if "multiple_of" in params else 256 + ffn_dim_multiplier = params.get("ffn_dim_multiplier", 1) + multiple_of = params.get("multiple_of", 256) with open(os.path.join(input_base_path, "tokenizer/text_tokenizer.json")) as tokenizer_file: tokenizer_config = json.load(tokenizer_file) diff --git a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py index 86b766eabf..7ea82bce51 100644 --- a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py +++ b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py @@ -36,7 +36,7 @@ def get_clipseg_config(model_name): text_config = CLIPSegTextConfig() vision_config = CLIPSegVisionConfig(patch_size=16) - use_complex_transposed_convolution = True if "refined" in model_name else False + use_complex_transposed_convolution = "refined" in model_name reduce_dim = 16 if "rd16" in model_name else 64 config = CLIPSegConfig.from_text_vision_configs( diff --git a/src/transformers/models/colpali/configuration_colpali.py b/src/transformers/models/colpali/configuration_colpali.py index c7a1d01d14..84be59aef0 100644 --- a/src/transformers/models/colpali/configuration_colpali.py +++ b/src/transformers/models/colpali/configuration_colpali.py @@ -93,7 +93,7 @@ class ColPaliConfig(PretrainedConfig): self.vlm_config = vlm_config self.text_config = text_config if text_config is not None else vlm_config.text_config if isinstance(self.text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "gemma" + text_config["model_type"] = text_config.get("model_type", "gemma") self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) self.embedding_dim = embedding_dim diff --git a/src/transformers/models/colpali/modular_colpali.py b/src/transformers/models/colpali/modular_colpali.py index a090fb5dfe..d36d59d44f 100644 --- a/src/transformers/models/colpali/modular_colpali.py +++ b/src/transformers/models/colpali/modular_colpali.py @@ -139,7 +139,7 @@ class ColPaliProcessor(PaliGemmaProcessor): ) suffix = output_kwargs["text_kwargs"].pop("suffix", None) - return_token_type_ids = True if suffix is not None else False + return_token_type_ids = suffix is not None if text is None and images is None: raise ValueError("Either text or images must be provided") diff --git a/src/transformers/models/colpali/processing_colpali.py b/src/transformers/models/colpali/processing_colpali.py index e0d8118d44..2bbbf46d5d 100644 --- a/src/transformers/models/colpali/processing_colpali.py +++ b/src/transformers/models/colpali/processing_colpali.py @@ -183,7 +183,7 @@ class ColPaliProcessor(ProcessorMixin): ) suffix = output_kwargs["text_kwargs"].pop("suffix", None) - return_token_type_ids = True if suffix is not None else False + return_token_type_ids = suffix is not None if text is None and images is None: raise ValueError("Either text or images must be provided") diff --git a/src/transformers/models/colqwen2/modular_colqwen2.py b/src/transformers/models/colqwen2/modular_colqwen2.py index 5c7bfb2dc0..18107a366e 100644 --- a/src/transformers/models/colqwen2/modular_colqwen2.py +++ b/src/transformers/models/colqwen2/modular_colqwen2.py @@ -143,7 +143,7 @@ class ColQwen2Processor(ColPaliProcessor): ) suffix = output_kwargs["text_kwargs"].pop("suffix", None) - return_token_type_ids = True if suffix is not None else False + return_token_type_ids = suffix is not None if text is None and images is None: raise ValueError("Either text or images must be provided") diff --git a/src/transformers/models/colqwen2/processing_colqwen2.py b/src/transformers/models/colqwen2/processing_colqwen2.py index a8b99380ac..d339c5ab7b 100644 --- a/src/transformers/models/colqwen2/processing_colqwen2.py +++ b/src/transformers/models/colqwen2/processing_colqwen2.py @@ -142,7 +142,7 @@ class ColQwen2Processor(ProcessorMixin): ) suffix = output_kwargs["text_kwargs"].pop("suffix", None) - return_token_type_ids = True if suffix is not None else False + return_token_type_ids = suffix is not None if text is None and images is None: raise ValueError("Either text or images must be provided") diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index c0f41e93e6..e3de085ee5 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -352,7 +352,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 25eacb959a..31512d46e8 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -236,7 +236,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = ConditionalDetrFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/d_fine/modeling_d_fine.py b/src/transformers/models/d_fine/modeling_d_fine.py index 76726a6512..94b28f1087 100644 --- a/src/transformers/models/d_fine/modeling_d_fine.py +++ b/src/transformers/models/d_fine/modeling_d_fine.py @@ -619,7 +619,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = DFineFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/dab_detr/modeling_dab_detr.py b/src/transformers/models/dab_detr/modeling_dab_detr.py index d9060213f5..8b767ef9ca 100644 --- a/src/transformers/models/dab_detr/modeling_dab_detr.py +++ b/src/transformers/models/dab_detr/modeling_dab_detr.py @@ -187,7 +187,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = DabDetrFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index c29bf8f6c1..c9b3f01f42 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -399,7 +399,7 @@ class Data2VecAudioEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index ee5ec65f86..461d4ce0c4 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -507,7 +507,7 @@ class DbrxSdpaAttention(DbrxAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py index 59a8c7b3ea..e192474c3d 100644 --- a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py +++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py @@ -370,7 +370,7 @@ class SPMTokenizer: logger.warning_once( "The `DebertaTokenizer.id` method is deprecated and will be removed in `transformers==4.35`" ) - return self.vocab[sym] if sym in self.vocab else 1 + return self.vocab.get(sym, 1) def _encode_as_pieces(self, text): text = convert_to_unicode(text) diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index 4863442c17..358a2da919 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -350,7 +350,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index db74c715b5..2627af13cb 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -298,7 +298,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = DeformableDetrFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/deprecated/deta/image_processing_deta.py b/src/transformers/models/deprecated/deta/image_processing_deta.py index df8a62428d..434d25a1ab 100644 --- a/src/transformers/models/deprecated/deta/image_processing_deta.py +++ b/src/transformers/models/deprecated/deta/image_processing_deta.py @@ -332,7 +332,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/models/deprecated/deta/modeling_deta.py b/src/transformers/models/deprecated/deta/modeling_deta.py index edc0f2598a..90688cffe8 100644 --- a/src/transformers/models/deprecated/deta/modeling_deta.py +++ b/src/transformers/models/deprecated/deta/modeling_deta.py @@ -393,7 +393,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = DetaFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/deprecated/mctct/modeling_mctct.py b/src/transformers/models/deprecated/mctct/modeling_mctct.py index adaf4c1a70..a6686c1eb2 100755 --- a/src/transformers/models/deprecated/mctct/modeling_mctct.py +++ b/src/transformers/models/deprecated/mctct/modeling_mctct.py @@ -589,7 +589,7 @@ class MCTCTEncoder(MCTCTPreTrainedModel): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = encoder_layer( diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 4fd1fef6ea..039ba1781c 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -345,7 +345,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index d2a205fb21..9a448784f9 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -232,7 +232,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = DetrFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/diffllama/modeling_diffllama.py b/src/transformers/models/diffllama/modeling_diffllama.py index 5deec876f6..66cc8d5c3e 100644 --- a/src/transformers/models/diffllama/modeling_diffllama.py +++ b/src/transformers/models/diffllama/modeling_diffllama.py @@ -420,7 +420,7 @@ class DiffLlamaSdpaAttention(DiffLlamaAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/diffllama/modular_diffllama.py b/src/transformers/models/diffllama/modular_diffllama.py index b5034f1749..7b91739c04 100644 --- a/src/transformers/models/diffllama/modular_diffllama.py +++ b/src/transformers/models/diffllama/modular_diffllama.py @@ -356,7 +356,7 @@ class DiffLlamaSdpaAttention(DiffLlamaAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py b/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py index 40c5b22e3b..a945a6b50a 100644 --- a/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py +++ b/src/transformers/models/dit/convert_dit_unilm_to_pytorch.py @@ -136,7 +136,7 @@ def convert_dit_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub """ # define default BEiT configuration - has_lm_head = False if "rvlcdip" in checkpoint_url else True + has_lm_head = "rvlcdip" not in checkpoint_url config = BeitConfig(use_absolute_position_embeddings=True, use_mask_token=has_lm_head) # size of the architecture diff --git a/src/transformers/models/efficientnet/modeling_efficientnet.py b/src/transformers/models/efficientnet/modeling_efficientnet.py index 8f53227f69..4de89316b7 100644 --- a/src/transformers/models/efficientnet/modeling_efficientnet.py +++ b/src/transformers/models/efficientnet/modeling_efficientnet.py @@ -300,7 +300,7 @@ class EfficientNetBlock(nn.Module): ): super().__init__() self.expand_ratio = expand_ratio - self.expand = True if self.expand_ratio != 1 else False + self.expand = self.expand_ratio != 1 expand_in_dim = in_dim * expand_ratio if self.expand: @@ -371,10 +371,10 @@ class EfficientNetEncoder(nn.Module): expand_ratio = config.expand_ratios[i] for j in range(round_repeats(config.num_block_repeats[i])): - id_skip = True if j == 0 else False + id_skip = j == 0 stride = 1 if j > 0 else stride in_dim = out_dim if j > 0 else in_dim - adjust_padding = False if curr_block_num in config.depthwise_padding else True + adjust_padding = curr_block_num not in config.depthwise_padding drop_rate = config.drop_connect_rate * curr_block_num / num_blocks block = EfficientNetBlock( diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index 3dcec6bb8d..d0338983dc 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -143,7 +143,7 @@ class EncodecConv1d(nn.Module): """ length = hidden_states.shape[-1] padding_left, padding_right = paddings - if not mode == "reflect": + if mode != "reflect": return nn.functional.pad(hidden_states, paddings, mode, value) max_pad = max(padding_left, padding_right) diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index da0dde6915..fd6b3223a0 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -616,7 +616,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): self, input_ids, past_key_values=None, attention_mask=None, use_cache=None, encoder_outputs=None, **kwargs ): decoder_inputs = self.decoder.prepare_inputs_for_generation(input_ids, past_key_values=past_key_values) - decoder_attention_mask = decoder_inputs["attention_mask"] if "attention_mask" in decoder_inputs else None + decoder_attention_mask = decoder_inputs.get("attention_mask", None) past_key_values = decoder_inputs.get("past_key_values") if past_key_values is None: past_key_values = decoder_inputs.get("past") # e.g. on TF GPT2 diff --git a/src/transformers/models/esm/convert_esm.py b/src/transformers/models/esm/convert_esm.py index 020dd4e576..86d7bb8a28 100644 --- a/src/transformers/models/esm/convert_esm.py +++ b/src/transformers/models/esm/convert_esm.py @@ -130,7 +130,7 @@ def convert_esm_checkpoint_to_pytorch( num_attention_heads = esm.args.attention_heads intermediate_size = esm.args.ffn_embed_dim token_dropout = esm.args.token_dropout - emb_layer_norm_before = True if esm.emb_layer_norm_before else False + emb_layer_norm_before = bool(esm.emb_layer_norm_before) position_embedding_type = "absolute" is_folding_model = False esmfold_config = None diff --git a/src/transformers/models/esm/openfold_utils/chunk_utils.py b/src/transformers/models/esm/openfold_utils/chunk_utils.py index 930543c444..d8271371ca 100644 --- a/src/transformers/models/esm/openfold_utils/chunk_utils.py +++ b/src/transformers/models/esm/openfold_utils/chunk_utils.py @@ -233,7 +233,7 @@ def chunk_layer( def _prep_inputs(t: torch.Tensor) -> torch.Tensor: if not low_mem: - if not sum(t.shape[:no_batch_dims]) == no_batch_dims: + if sum(t.shape[:no_batch_dims]) != no_batch_dims: t = t.expand(orig_batch_dims + t.shape[no_batch_dims:]) t = t.reshape(-1, *t.shape[no_batch_dims:]) else: diff --git a/src/transformers/models/esm/openfold_utils/data_transforms.py b/src/transformers/models/esm/openfold_utils/data_transforms.py index fe71691323..bcd67aacab 100644 --- a/src/transformers/models/esm/openfold_utils/data_transforms.py +++ b/src/transformers/models/esm/openfold_utils/data_transforms.py @@ -31,9 +31,7 @@ def make_atom14_masks(protein: dict[str, torch.Tensor]) -> dict[str, torch.Tenso atom_names = rc.restype_name_to_atom14_names[rc.restype_1to3[rt]] restype_atom14_to_atom37_list.append([(rc.atom_order[name] if name else 0) for name in atom_names]) atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)} - restype_atom37_to_atom14_list.append( - [(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) for name in rc.atom_types] - ) + restype_atom37_to_atom14_list.append([(atom_name_to_idx14.get(name, 0)) for name in rc.atom_types]) restype_atom14_mask_list.append([(1.0 if name else 0.0) for name in atom_names]) diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index a6b73b80ce..c4bf2dcc3b 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -334,7 +334,7 @@ class FalconAttention(nn.Module): # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` # The query_length > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not # create a causal mask in case query_length == 1. - is_causal = True if self.is_causal and attention_mask is None and query_length > 1 else False + is_causal = self.is_causal and attention_mask is None and query_length > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, key_layer, @@ -364,7 +364,7 @@ class FalconAttention(nn.Module): if self._use_sdpa and not output_attentions and head_mask is None: # We dispatch to SDPA's Flash Attention or Efficient kernels via this if statement instead of an # inline conditional assignment to support both torch.compile's `dynamic=True` and `fullgraph=True` - is_causal = True if self.is_causal and attention_mask is None and query_length > 1 else False + is_causal = self.is_causal and attention_mask is None and query_length > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, key_layer, diff --git a/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py b/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py index d6af117e56..ead9950e2a 100644 --- a/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py +++ b/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py @@ -29,9 +29,9 @@ from transformers.image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD def get_focalnet_config(model_name): depths = [2, 2, 6, 2] if "tiny" in model_name else [2, 2, 18, 2] - use_conv_embed = True if "large" in model_name or "huge" in model_name else False - use_post_layernorm = True if "large" in model_name or "huge" in model_name else False - use_layerscale = True if "large" in model_name or "huge" in model_name else False + use_conv_embed = bool("large" in model_name or "huge" in model_name) + use_post_layernorm = bool("large" in model_name or "huge" in model_name) + use_layerscale = bool("large" in model_name or "huge" in model_name) if "large" in model_name or "xlarge" in model_name or "huge" in model_name: if "fl3" in model_name: diff --git a/src/transformers/models/fuyu/configuration_fuyu.py b/src/transformers/models/fuyu/configuration_fuyu.py index 90e150a9f9..40da84e2e7 100644 --- a/src/transformers/models/fuyu/configuration_fuyu.py +++ b/src/transformers/models/fuyu/configuration_fuyu.py @@ -158,7 +158,7 @@ class FuyuConfig(PretrainedConfig): "tie_word_embeddings": tie_word_embeddings, } logger.info("text_config is None. initializing the text model with default values.") - text_model_type = text_config["model_type"] if "model_type" in text_config else "persimmon" + text_model_type = text_config.get("model_type", "persimmon") self.text_config = CONFIG_MAPPING[text_model_type](**text_config) self._vocab_size = vocab_size diff --git a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py index 1a3a4a92f3..d1b0636a99 100644 --- a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py +++ b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py @@ -222,7 +222,7 @@ def main(): spm_path = os.path.join(args.tokenizer_checkpoint) write_tokenizer(spm_path, args.output_dir, args.push_to_hub) - if not args.model_size == "tokenizer_only": + if args.model_size != "tokenizer_only": config = CONFIG_MAPPING[args.model_size] dtype = getattr(torch, args.dtype) write_model( diff --git a/src/transformers/models/glm4v/image_processing_glm4v.py b/src/transformers/models/glm4v/image_processing_glm4v.py index 2b4f9aa24b..699b8c5ad7 100644 --- a/src/transformers/models/glm4v/image_processing_glm4v.py +++ b/src/transformers/models/glm4v/image_processing_glm4v.py @@ -449,8 +449,8 @@ class Glm4vImageProcessor(BaseImageProcessor): Returns: `int`: Number of image patches per image. """ - patch_size = images_kwargs["patch_size"] if "patch_size" in images_kwargs else self.patch_size - merge_size = images_kwargs["merge_size"] if "merge_size" in images_kwargs else self.merge_size + patch_size = images_kwargs.get("patch_size", self.patch_size) + merge_size = images_kwargs.get("merge_size", self.merge_size) factor = patch_size * merge_size resized_height, resized_width = smart_resize( diff --git a/src/transformers/models/got_ocr2/configuration_got_ocr2.py b/src/transformers/models/got_ocr2/configuration_got_ocr2.py index 1c7a0de0c7..eb039f9589 100644 --- a/src/transformers/models/got_ocr2/configuration_got_ocr2.py +++ b/src/transformers/models/got_ocr2/configuration_got_ocr2.py @@ -179,7 +179,7 @@ class GotOcr2Config(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "qwen2" + text_config["model_type"] = text_config.get("model_type", "qwen2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["qwen2"]( diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py index a1f0eca4cc..b6414b1c61 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py @@ -505,12 +505,10 @@ class GotOcr2ImageProcessor(BaseImageProcessor): Returns: `int`: Number of patches per image. """ - min_patches = images_kwargs["min_patches"] if "min_patches" in images_kwargs else self.min_patches - max_patches = images_kwargs["max_patches"] if "max_patches" in images_kwargs else self.max_patches - patch_size = images_kwargs["patch_size"] if "patch_size" in images_kwargs else self.size - crop_to_patches = ( - images_kwargs["crop_to_patches"] if "crop_to_patches" in images_kwargs else self.crop_to_patches - ) + min_patches = images_kwargs.get("min_patches", self.min_patches) + max_patches = images_kwargs.get("max_patches", self.max_patches) + patch_size = images_kwargs.get("patch_size", self.size) + crop_to_patches = images_kwargs.get("crop_to_patches", self.crop_to_patches) num_patches = 1 if crop_to_patches and max_patches > 1: diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py index 04cf09fe39..813bb6061b 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py @@ -237,12 +237,10 @@ class GotOcr2ImageProcessorFast(BaseImageProcessorFast): Returns: `int`: Number of patches per image. """ - min_patches = images_kwargs["min_patches"] if "min_patches" in images_kwargs else self.min_patches - max_patches = images_kwargs["max_patches"] if "max_patches" in images_kwargs else self.max_patches - patch_size = images_kwargs["patch_size"] if "patch_size" in images_kwargs else self.size - crop_to_patches = ( - images_kwargs["crop_to_patches"] if "crop_to_patches" in images_kwargs else self.crop_to_patches - ) + min_patches = images_kwargs.get("min_patches", self.min_patches) + max_patches = images_kwargs.get("max_patches", self.max_patches) + patch_size = images_kwargs.get("patch_size", self.size) + crop_to_patches = images_kwargs.get("crop_to_patches", self.crop_to_patches) num_patches = 1 if crop_to_patches and max_patches > 1: diff --git a/src/transformers/models/got_ocr2/modular_got_ocr2.py b/src/transformers/models/got_ocr2/modular_got_ocr2.py index 7b381e08cc..9e017659e4 100644 --- a/src/transformers/models/got_ocr2/modular_got_ocr2.py +++ b/src/transformers/models/got_ocr2/modular_got_ocr2.py @@ -201,7 +201,7 @@ class GotOcr2Config(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "qwen2" + text_config["model_type"] = text_config.get("model_type", "qwen2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["qwen2"]( diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 36d1bdb8b8..3403ac3196 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -893,7 +893,7 @@ class GPT2Model(GPT2PreTrainedModel): encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa( mask=encoder_attention_mask, dtype=inputs_embeds.dtype, tgt_len=input_shape[-1] ) - elif not self._attn_implementation == "flash_attention_2": + elif self._attn_implementation != "flash_attention_2": encoder_attention_mask = self.invert_attention_mask(encoder_attention_mask) else: encoder_attention_mask = None diff --git a/src/transformers/models/granite_speech/configuration_granite_speech.py b/src/transformers/models/granite_speech/configuration_granite_speech.py index b3e7e388a1..fede07b7b7 100644 --- a/src/transformers/models/granite_speech/configuration_granite_speech.py +++ b/src/transformers/models/granite_speech/configuration_granite_speech.py @@ -169,15 +169,13 @@ class GraniteSpeechConfig(PretrainedConfig): **kwargs, ): if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "granite" + text_config["model_type"] = text_config.get("model_type", "granite") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["granite"]() if isinstance(projector_config, dict): - projector_config["model_type"] = ( - projector_config["model_type"] if "model_type" in projector_config else "blip_2_qformer" - ) + projector_config["model_type"] = projector_config.get("model_type", "blip_2_qformer") projector_config = CONFIG_MAPPING[projector_config["model_type"]](**projector_config) elif projector_config is None: projector_config = CONFIG_MAPPING["blip_2_qformer"]() diff --git a/src/transformers/models/grounding_dino/configuration_grounding_dino.py b/src/transformers/models/grounding_dino/configuration_grounding_dino.py index ca8960ee9a..a45848b4c4 100644 --- a/src/transformers/models/grounding_dino/configuration_grounding_dino.py +++ b/src/transformers/models/grounding_dino/configuration_grounding_dino.py @@ -261,7 +261,7 @@ class GroundingDinoConfig(PretrainedConfig): self.disable_custom_kernels = disable_custom_kernels # Text backbone if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert" + text_config["model_type"] = text_config.get("model_type", "bert") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["bert"]() diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py index d56a24cb57..8ba1f34ae9 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py @@ -359,7 +359,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 8432e510ed..310aaf8e0c 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -354,7 +354,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = GroundingDinoFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py b/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py index 16dfeb77c2..71da4f055a 100644 --- a/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py +++ b/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py @@ -284,7 +284,7 @@ class HGNetV2Stage(nn.Module): mid_channels, out_channels, num_layers, - residual=False if i == 0 else True, + residual=(i != 0), kernel_size=kernel_size, light_block=light_block, drop_path=drop_path, diff --git a/src/transformers/models/hgnet_v2/modular_hgnet_v2.py b/src/transformers/models/hgnet_v2/modular_hgnet_v2.py index f0c90ce0a6..4f898718e3 100644 --- a/src/transformers/models/hgnet_v2/modular_hgnet_v2.py +++ b/src/transformers/models/hgnet_v2/modular_hgnet_v2.py @@ -407,7 +407,7 @@ class HGNetV2Stage(nn.Module): mid_channels, out_channels, num_layers, - residual=False if i == 0 else True, + residual=(i != 0), kernel_size=kernel_size, light_block=light_block, drop_path=drop_path, diff --git a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py index 8d31e5e5c3..a0e0b5cd56 100644 --- a/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py @@ -217,7 +217,7 @@ def convert_hubert_checkpoint( word_delimiter_token="|", do_lower_case=False, ) - return_attention_mask = True if config.feat_extract_norm == "layer" else False + return_attention_mask = config.feat_extract_norm == "layer" feature_extractor = Wav2Vec2FeatureExtractor( feature_size=1, sampling_rate=16000, diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 19f3302c5a..060b715e8d 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -459,7 +459,7 @@ class HubertEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( @@ -624,7 +624,7 @@ class HubertEncoderStableLayerNorm(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync # XXX: could optimize this like synced_gpus in generate_utils but not sure if it's worth the code complication diff --git a/src/transformers/models/idefics/processing_idefics.py b/src/transformers/models/idefics/processing_idefics.py index 3c59105c23..fb0d129c50 100644 --- a/src/transformers/models/idefics/processing_idefics.py +++ b/src/transformers/models/idefics/processing_idefics.py @@ -234,9 +234,7 @@ class IdeficsProcessor(ProcessorMixin): ) self.tokenizer_was_trained_with_end_of_utterance_token = ( - True - if "" in self.tokenizer.special_tokens_map.get("additional_special_tokens", []) - else False + "" in self.tokenizer.special_tokens_map.get("additional_special_tokens", []) ) @deprecate_kwarg(old_name="prompts", version="5.0.0", new_name="text", raise_if_both_names=True) @@ -402,7 +400,7 @@ class IdeficsProcessor(ProcessorMixin): last_was_text = False for i, item in enumerate(sample): if i > 0: - last_was_text = True if not last_was_image else False + last_was_text = bool(not last_was_image) if isinstance(item, str): item = item.strip(" ") diff --git a/src/transformers/models/idefics2/configuration_idefics2.py b/src/transformers/models/idefics2/configuration_idefics2.py index 31912d6ad9..a8fa442a1d 100644 --- a/src/transformers/models/idefics2/configuration_idefics2.py +++ b/src/transformers/models/idefics2/configuration_idefics2.py @@ -241,7 +241,7 @@ class Idefics2Config(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "mistral" + text_config["model_type"] = text_config.get("model_type", "mistral") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: logger.info("text_config is None, using default text config") diff --git a/src/transformers/models/idefics3/configuration_idefics3.py b/src/transformers/models/idefics3/configuration_idefics3.py index 01c96afcaa..97a2e57f1d 100644 --- a/src/transformers/models/idefics3/configuration_idefics3.py +++ b/src/transformers/models/idefics3/configuration_idefics3.py @@ -171,7 +171,7 @@ class Idefics3Config(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: logger.info("text_config is None, using default text config") diff --git a/src/transformers/models/idefics3/image_processing_idefics3.py b/src/transformers/models/idefics3/image_processing_idefics3.py index 194dd092bb..f98413d133 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3.py +++ b/src/transformers/models/idefics3/image_processing_idefics3.py @@ -866,11 +866,9 @@ class Idefics3ImageProcessor(BaseImageProcessor): Returns: `int`: Number of patches per image. """ - do_image_splitting = ( - images_kwargs["do_image_splitting"] if "do_image_splitting" in images_kwargs else self.do_image_splitting - ) - max_image_size = images_kwargs["max_image_size"] if "max_image_size" in images_kwargs else self.max_image_size - size = images_kwargs["size"] if "size" in images_kwargs else self.size + do_image_splitting = images_kwargs.get("do_image_splitting", self.do_image_splitting) + max_image_size = images_kwargs.get("max_image_size", self.max_image_size) + size = images_kwargs.get("size", self.size) num_patches = num_rows = num_cols = 1 if do_image_splitting: diff --git a/src/transformers/models/idefics3/image_processing_idefics3_fast.py b/src/transformers/models/idefics3/image_processing_idefics3_fast.py index a2251e7853..b70829f5b4 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3_fast.py +++ b/src/transformers/models/idefics3/image_processing_idefics3_fast.py @@ -514,11 +514,9 @@ class Idefics3ImageProcessorFast(BaseImageProcessorFast): Returns: `int`: Number of patches per image. """ - do_image_splitting = ( - images_kwargs["do_image_splitting"] if "do_image_splitting" in images_kwargs else self.do_image_splitting - ) - max_image_size = images_kwargs["max_image_size"] if "max_image_size" in images_kwargs else self.max_image_size - size = images_kwargs["size"] if "size" in images_kwargs else self.size + do_image_splitting = images_kwargs.get("do_image_splitting", self.do_image_splitting) + max_image_size = images_kwargs.get("max_image_size", self.max_image_size) + size = images_kwargs.get("size", self.size) num_patches = num_rows = num_cols = 1 if do_image_splitting: diff --git a/src/transformers/models/instructblip/configuration_instructblip.py b/src/transformers/models/instructblip/configuration_instructblip.py index 4206e174fa..9b8323f15f 100644 --- a/src/transformers/models/instructblip/configuration_instructblip.py +++ b/src/transformers/models/instructblip/configuration_instructblip.py @@ -302,7 +302,7 @@ class InstructBlipConfig(PretrainedConfig): self.vision_config = InstructBlipVisionConfig(**vision_config) self.qformer_config = InstructBlipQFormerConfig(**qformer_config) - text_model_type = text_config["model_type"] if "model_type" in text_config else "opt" + text_model_type = text_config.get("model_type", "opt") self.text_config = CONFIG_MAPPING[text_model_type](**text_config) self.num_query_tokens = num_query_tokens diff --git a/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py b/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py index 7ad1653a37..af2acc8338 100644 --- a/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py @@ -308,7 +308,7 @@ class InstructBlipVideoConfig(PretrainedConfig): self.vision_config = InstructBlipVideoVisionConfig(**vision_config) self.qformer_config = InstructBlipVideoQFormerConfig(**qformer_config) - text_model_type = text_config["model_type"] if "model_type" in text_config else "opt" + text_model_type = text_config.get("model_type", "opt") self.text_config = CONFIG_MAPPING[text_model_type](**text_config) self.num_query_tokens = num_query_tokens diff --git a/src/transformers/models/instructblipvideo/modular_instructblipvideo.py b/src/transformers/models/instructblipvideo/modular_instructblipvideo.py index 5c4404a6c4..e7bcfaba82 100644 --- a/src/transformers/models/instructblipvideo/modular_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/modular_instructblipvideo.py @@ -142,7 +142,7 @@ class InstructBlipVideoConfig(PretrainedConfig): self.vision_config = InstructBlipVideoVisionConfig(**vision_config) self.qformer_config = InstructBlipVideoQFormerConfig(**qformer_config) - text_model_type = text_config["model_type"] if "model_type" in text_config else "opt" + text_model_type = text_config.get("model_type", "opt") self.text_config = CONFIG_MAPPING[text_model_type](**text_config) self.num_query_tokens = num_query_tokens diff --git a/src/transformers/models/internvl/configuration_internvl.py b/src/transformers/models/internvl/configuration_internvl.py index a9fe4db5f3..17be5388b6 100644 --- a/src/transformers/models/internvl/configuration_internvl.py +++ b/src/transformers/models/internvl/configuration_internvl.py @@ -212,7 +212,7 @@ class InternVLConfig(PretrainedConfig): self.vision_config = InternVLVisionConfig() if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "qwen2" + text_config["model_type"] = text_config.get("model_type", "qwen2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["qwen2"]() diff --git a/src/transformers/models/jamba/modeling_jamba.py b/src/transformers/models/jamba/modeling_jamba.py index 44e1336907..e4a376e90a 100755 --- a/src/transformers/models/jamba/modeling_jamba.py +++ b/src/transformers/models/jamba/modeling_jamba.py @@ -533,7 +533,7 @@ class JambaSdpaAttention(JambaAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal = True if self.is_causal and causal_mask is None and q_len > 1 else False + is_causal = self.is_causal and causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py index 9eee14cb4a..5156ac5974 100644 --- a/src/transformers/models/jetmoe/modeling_jetmoe.py +++ b/src/transformers/models/jetmoe/modeling_jetmoe.py @@ -620,7 +620,7 @@ class JetMoeSdpaAttention(JetMoeAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py index bf0b55d5a1..0489fc91d1 100644 --- a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +++ b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py @@ -690,7 +690,7 @@ class KyutaiSpeechToTextSdpaAttention(KyutaiSpeechToTextAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/lightglue/configuration_lightglue.py b/src/transformers/models/lightglue/configuration_lightglue.py index d920727c0f..d0f228ae4b 100644 --- a/src/transformers/models/lightglue/configuration_lightglue.py +++ b/src/transformers/models/lightglue/configuration_lightglue.py @@ -129,9 +129,7 @@ class LightGlueConfig(PretrainedConfig): # Keypoint Detector is forced into eager attention mode because SuperPoint does not have Attention # See https://github.com/huggingface/transformers/pull/31718#discussion_r2109733153 if isinstance(keypoint_detector_config, dict): - keypoint_detector_config["model_type"] = ( - keypoint_detector_config["model_type"] if "model_type" in keypoint_detector_config else "superpoint" - ) + keypoint_detector_config["model_type"] = keypoint_detector_config.get("model_type", "superpoint") if keypoint_detector_config["model_type"] not in CONFIG_MAPPING: keypoint_detector_config = AutoConfig.from_pretrained( keypoint_detector_config["_name_or_path"], trust_remote_code=self.trust_remote_code diff --git a/src/transformers/models/lightglue/modular_lightglue.py b/src/transformers/models/lightglue/modular_lightglue.py index d90708e6bd..cefb235fcb 100644 --- a/src/transformers/models/lightglue/modular_lightglue.py +++ b/src/transformers/models/lightglue/modular_lightglue.py @@ -143,9 +143,7 @@ class LightGlueConfig(PretrainedConfig): # Keypoint Detector is forced into eager attention mode because SuperPoint does not have Attention # See https://github.com/huggingface/transformers/pull/31718#discussion_r2109733153 if isinstance(keypoint_detector_config, dict): - keypoint_detector_config["model_type"] = ( - keypoint_detector_config["model_type"] if "model_type" in keypoint_detector_config else "superpoint" - ) + keypoint_detector_config["model_type"] = keypoint_detector_config.get("model_type", "superpoint") if keypoint_detector_config["model_type"] not in CONFIG_MAPPING: keypoint_detector_config = AutoConfig.from_pretrained( keypoint_detector_config["_name_or_path"], trust_remote_code=self.trust_remote_code diff --git a/src/transformers/models/llama/convert_llama_weights_to_hf.py b/src/transformers/models/llama/convert_llama_weights_to_hf.py index 2a1b0df8fd..ed7a89f6f3 100644 --- a/src/transformers/models/llama/convert_llama_weights_to_hf.py +++ b/src/transformers/models/llama/convert_llama_weights_to_hf.py @@ -360,8 +360,8 @@ def write_model( # Write configs index_dict["metadata"] = {"total_size": param_count * 2} write_json(index_dict, os.path.join(tmp_model_path, "pytorch_model.bin.index.json")) - ffn_dim_multiplier = params["ffn_dim_multiplier"] if "ffn_dim_multiplier" in params else 1 - multiple_of = params["multiple_of"] if "multiple_of" in params else 256 + ffn_dim_multiplier = params.get("ffn_dim_multiplier", 1) + multiple_of = params.get("multiple_of", 256) if is_llama_3(llama_version): bos_token_id = 128000 @@ -398,7 +398,7 @@ def write_model( max_position_embeddings=max_position_embeddings, bos_token_id=bos_token_id, eos_token_id=eos_token_id, - tie_word_embeddings=True if llama_version in ["3.2"] else False, + tie_word_embeddings=llama_version in ["3.2"], ) config.save_pretrained(tmp_model_path) diff --git a/src/transformers/models/llava/configuration_llava.py b/src/transformers/models/llava/configuration_llava.py index 869841e967..9ae710c011 100644 --- a/src/transformers/models/llava/configuration_llava.py +++ b/src/transformers/models/llava/configuration_llava.py @@ -106,9 +106,7 @@ class LlavaConfig(PretrainedConfig): self.vision_feature_layer = vision_feature_layer if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "clip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "clip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["clip_vision_model"]( @@ -125,7 +123,7 @@ class LlavaConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() diff --git a/src/transformers/models/llava_next/configuration_llava_next.py b/src/transformers/models/llava_next/configuration_llava_next.py index 4cda90aff1..17ea71b1aa 100644 --- a/src/transformers/models/llava_next/configuration_llava_next.py +++ b/src/transformers/models/llava_next/configuration_llava_next.py @@ -120,9 +120,7 @@ class LlavaNextConfig(PretrainedConfig): self.image_grid_pinpoints = image_grid_pinpoints if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "clip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "clip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["clip_vision_model"]( @@ -139,7 +137,7 @@ class LlavaNextConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() diff --git a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py index 8aee180dd7..41fc226783 100644 --- a/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py +++ b/src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py @@ -127,7 +127,7 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False): torch.set_default_dtype(torch.float16) text_config = AutoConfig.from_pretrained(text_model_id) - use_fast = False if model_id == "liuhaotian/llava-v1.6-34b" else True + use_fast = model_id != "liuhaotian/llava-v1.6-34b" tokenizer = AutoTokenizer.from_pretrained(text_model_id, use_fast=use_fast) tokenizer.add_tokens(AddedToken("", special=True, normalized=False), special_tokens=True) diff --git a/src/transformers/models/llava_next_video/configuration_llava_next_video.py b/src/transformers/models/llava_next_video/configuration_llava_next_video.py index 32fa77f97c..1eb1078b6a 100644 --- a/src/transformers/models/llava_next_video/configuration_llava_next_video.py +++ b/src/transformers/models/llava_next_video/configuration_llava_next_video.py @@ -136,9 +136,7 @@ class LlavaNextVideoConfig(PretrainedConfig): self.image_grid_pinpoints = image_grid_pinpoints if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "clip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "clip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["clip_vision_model"]( @@ -155,7 +153,7 @@ class LlavaNextVideoConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() diff --git a/src/transformers/models/llava_next_video/modular_llava_next_video.py b/src/transformers/models/llava_next_video/modular_llava_next_video.py index cfbb7aa36e..fecd2320f9 100644 --- a/src/transformers/models/llava_next_video/modular_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modular_llava_next_video.py @@ -153,9 +153,7 @@ class LlavaNextVideoConfig(PretrainedConfig): self.image_grid_pinpoints = image_grid_pinpoints if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "clip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "clip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["clip_vision_model"]( @@ -172,7 +170,7 @@ class LlavaNextVideoConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() diff --git a/src/transformers/models/llava_onevision/configuration_llava_onevision.py b/src/transformers/models/llava_onevision/configuration_llava_onevision.py index f6f40c1bd8..21ead3df17 100644 --- a/src/transformers/models/llava_onevision/configuration_llava_onevision.py +++ b/src/transformers/models/llava_onevision/configuration_llava_onevision.py @@ -165,9 +165,7 @@ class LlavaOnevisionConfig(PretrainedConfig): self.image_grid_pinpoints = image_grid_pinpoints if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["siglip_vision_model"]( @@ -183,7 +181,7 @@ class LlavaOnevisionConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "qwen2" + text_config["model_type"] = text_config.get("model_type", "qwen2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["qwen2"]() diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index ccd7c000ac..89344e6ac1 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -871,7 +871,7 @@ class M2M100Encoder(M2M100PreTrainedModel): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync @@ -1120,7 +1120,7 @@ class M2M100Decoder(M2M100PreTrainedModel): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync diff --git a/src/transformers/models/markuplm/feature_extraction_markuplm.py b/src/transformers/models/markuplm/feature_extraction_markuplm.py index 43c6ed9395..8ea35b6da5 100644 --- a/src/transformers/models/markuplm/feature_extraction_markuplm.py +++ b/src/transformers/models/markuplm/feature_extraction_markuplm.py @@ -159,7 +159,7 @@ class MarkupLMFeatureExtractor(FeatureExtractionMixin): f"but is of type {type(html_strings)}." ) - is_batched = bool(isinstance(html_strings, (list, tuple)) and (isinstance(html_strings[0], str))) + is_batched = isinstance(html_strings, (list, tuple)) and (isinstance(html_strings[0], str)) if not is_batched: html_strings = [html_strings] diff --git a/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py index 79ef4917e9..43fbd234fb 100644 --- a/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py @@ -295,7 +295,7 @@ def convert_maskformer_checkpoint( ignore_index = 65535 else: ignore_index = 255 - do_reduce_labels = True if "ade" in model_name else False + do_reduce_labels = "ade" in model_name image_processor = MaskFormerImageProcessor(ignore_index=ignore_index, do_reduce_labels=do_reduce_labels) inputs = image_processor(image, return_tensors="pt") diff --git a/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py index 41e8b48888..4b6e32e5cc 100644 --- a/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py @@ -276,7 +276,7 @@ def convert_maskformer_checkpoint( ignore_index = 65535 else: ignore_index = 255 - do_reduce_labels = True if "ade" in model_name else False + do_reduce_labels = "ade" in model_name image_processor = MaskFormerImageProcessor(ignore_index=ignore_index, do_reduce_labels=do_reduce_labels) inputs = image_processor(image, return_tensors="pt") diff --git a/src/transformers/models/mimi/modeling_mimi.py b/src/transformers/models/mimi/modeling_mimi.py index 260ea6f7ce..584f3c43e0 100644 --- a/src/transformers/models/mimi/modeling_mimi.py +++ b/src/transformers/models/mimi/modeling_mimi.py @@ -280,7 +280,7 @@ class MimiConv1d(nn.Module): """ length = hidden_states.shape[-1] padding_left, padding_right = paddings - if not mode == "reflect": + if mode != "reflect": return nn.functional.pad(hidden_states, paddings, mode, value) max_pad = max(padding_left, padding_right) @@ -888,7 +888,7 @@ class MimiSdpaAttention(MimiAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/mistral3/configuration_mistral3.py b/src/transformers/models/mistral3/configuration_mistral3.py index b6f9caa334..2dd5586087 100644 --- a/src/transformers/models/mistral3/configuration_mistral3.py +++ b/src/transformers/models/mistral3/configuration_mistral3.py @@ -92,7 +92,7 @@ class Mistral3Config(PretrainedConfig): self.vision_feature_layer = vision_feature_layer if isinstance(vision_config, dict): - vision_config["model_type"] = vision_config["model_type"] if "model_type" in vision_config else "pixtral" + vision_config["model_type"] = vision_config.get("model_type", "pixtral") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["pixtral"]( @@ -110,7 +110,7 @@ class Mistral3Config(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "mistral" + text_config["model_type"] = text_config.get("model_type", "mistral") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["mistral"]( diff --git a/src/transformers/models/moonshine/modeling_moonshine.py b/src/transformers/models/moonshine/modeling_moonshine.py index 15598ccb32..b26b303859 100644 --- a/src/transformers/models/moonshine/modeling_moonshine.py +++ b/src/transformers/models/moonshine/modeling_moonshine.py @@ -266,7 +266,7 @@ class MoonshineAttention(nn.Module): if self.config._attn_implementation != "eager": attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation] - is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + is_causal = self.is_causal and attention_mask is None and q_len > 1 if self.head_dim_padding > 0: query_states = torch.nn.functional.pad(query_states, (0, self.head_dim_padding)) diff --git a/src/transformers/models/moonshine/modular_moonshine.py b/src/transformers/models/moonshine/modular_moonshine.py index 326cd743ce..8452283e9c 100644 --- a/src/transformers/models/moonshine/modular_moonshine.py +++ b/src/transformers/models/moonshine/modular_moonshine.py @@ -365,7 +365,7 @@ class MoonshineAttention(GlmAttention): if self.config._attn_implementation != "eager": attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation] - is_causal = True if self.is_causal and attention_mask is None and q_len > 1 else False + is_causal = self.is_causal and attention_mask is None and q_len > 1 if self.head_dim_padding > 0: query_states = torch.nn.functional.pad(query_states, (0, self.head_dim_padding)) diff --git a/src/transformers/models/moshi/modeling_moshi.py b/src/transformers/models/moshi/modeling_moshi.py index 5a9aa1e993..c9717e49be 100644 --- a/src/transformers/models/moshi/modeling_moshi.py +++ b/src/transformers/models/moshi/modeling_moshi.py @@ -693,7 +693,7 @@ class MoshiSdpaAttention(MoshiAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/nemotron/modeling_nemotron.py b/src/transformers/models/nemotron/modeling_nemotron.py index 714b71f6bb..d16b12d054 100644 --- a/src/transformers/models/nemotron/modeling_nemotron.py +++ b/src/transformers/models/nemotron/modeling_nemotron.py @@ -473,7 +473,7 @@ class NemotronSdpaAttention(NemotronAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/nllb_moe/modeling_nllb_moe.py b/src/transformers/models/nllb_moe/modeling_nllb_moe.py index 909993b5d3..5523aeda58 100644 --- a/src/transformers/models/nllb_moe/modeling_nllb_moe.py +++ b/src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -1285,7 +1285,7 @@ class NllbMoeDecoder(NllbMoePreTrainedModel): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.layerdrop if not skip_the_layer or synced_gpus: layer_head_mask = head_mask[idx] if head_mask is not None else None cross_attn_layer_head_mask = cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None diff --git a/src/transformers/models/olmoe/modeling_olmoe.py b/src/transformers/models/olmoe/modeling_olmoe.py index 38b538fdf3..eacb56c064 100644 --- a/src/transformers/models/olmoe/modeling_olmoe.py +++ b/src/transformers/models/olmoe/modeling_olmoe.py @@ -538,7 +538,7 @@ class OlmoeSdpaAttention(OlmoeAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py b/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py index da358d7119..ab6f057302 100644 --- a/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py +++ b/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py @@ -55,7 +55,7 @@ def get_omdet_turbo_config(model_name, use_timm_backbone): text_config={"model_type": "clip_text_model"}, use_timm_backbone=use_timm_backbone, backbone="swin_tiny_patch4_window7_224" if use_timm_backbone else None, - apply_layernorm_after_vision_backbone=True if use_timm_backbone else False, + apply_layernorm_after_vision_backbone=bool(use_timm_backbone), use_pretrained_backbone=False, ) diff --git a/src/transformers/models/paligemma/configuration_paligemma.py b/src/transformers/models/paligemma/configuration_paligemma.py index f32ad303bf..e4ee4b3b45 100644 --- a/src/transformers/models/paligemma/configuration_paligemma.py +++ b/src/transformers/models/paligemma/configuration_paligemma.py @@ -92,9 +92,7 @@ class PaliGemmaConfig(PretrainedConfig): self.is_encoder_decoder = False if isinstance(self.vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model") self.vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: self.vision_config = CONFIG_MAPPING["siglip_vision_model"]( @@ -110,7 +108,7 @@ class PaliGemmaConfig(PretrainedConfig): self.text_config = text_config if isinstance(self.text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "gemma" + text_config["model_type"] = text_config.get("model_type", "gemma") self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: self.text_config = CONFIG_MAPPING["gemma"]( diff --git a/src/transformers/models/paligemma/processing_paligemma.py b/src/transformers/models/paligemma/processing_paligemma.py index e9629b2d2f..3a7d06b6ad 100644 --- a/src/transformers/models/paligemma/processing_paligemma.py +++ b/src/transformers/models/paligemma/processing_paligemma.py @@ -223,7 +223,7 @@ class PaliGemmaProcessor(ProcessorMixin): ) suffix = output_kwargs["text_kwargs"].pop("suffix", None) - return_token_type_ids = True if suffix is not None else False + return_token_type_ids = suffix is not None if images is None: raise ValueError("`images` are expected as arguments to a `PaliGemmaProcessor` instance.") diff --git a/src/transformers/models/perception_lm/configuration_perception_lm.py b/src/transformers/models/perception_lm/configuration_perception_lm.py index 12352967d7..4b94652e20 100644 --- a/src/transformers/models/perception_lm/configuration_perception_lm.py +++ b/src/transformers/models/perception_lm/configuration_perception_lm.py @@ -75,7 +75,7 @@ class PerceptionLMConfig(PretrainedConfig): self.vision_use_cls_token = vision_use_cls_token if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() diff --git a/src/transformers/models/perception_lm/convert_perception_lm_weights_to_hf.py b/src/transformers/models/perception_lm/convert_perception_lm_weights_to_hf.py index ee96c86876..6174309303 100644 --- a/src/transformers/models/perception_lm/convert_perception_lm_weights_to_hf.py +++ b/src/transformers/models/perception_lm/convert_perception_lm_weights_to_hf.py @@ -351,8 +351,8 @@ def write_model( # Write configs index_dict["metadata"] = {"total_size": param_count * 2} write_json(index_dict, os.path.join(tmp_model_path, "pytorch_model.bin.index.json")) - ffn_dim_multiplier = model_params["ffn_dim_multiplier"] if "ffn_dim_multiplier" in model_params else 1 - multiple_of = model_params["multiple_of"] if "multiple_of" in model_params else 256 + ffn_dim_multiplier = model_params.get("ffn_dim_multiplier", 1) + multiple_of = model_params.get("multiple_of", 256) bos_token_id = tokenizer.convert_tokens_to_ids("<|begin_of_text|>") eos_token_id = [tokenizer.convert_tokens_to_ids(t) for t in ["<|end_of_text|>", "<|eot_id|>"]] diff --git a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py index c0376fc0e2..d2c587ccec 100644 --- a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py @@ -577,7 +577,7 @@ class Phi4MultimodalVisionModel(Phi4MultimodalVisionPreTrainedModel): else: attention_mask = ( _prepare_4d_attention_mask(patch_attention_mask, hidden_states.dtype) - if not self.config._attn_implementation == "flash_attention_2" + if self.config._attn_implementation != "flash_attention_2" else patch_attention_mask ) diff --git a/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py index 7208cab16f..8772796b03 100644 --- a/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py @@ -702,7 +702,7 @@ class Phi4MultimodalVisionModel(Phi4MultimodalVisionPreTrainedModel): else: attention_mask = ( _prepare_4d_attention_mask(patch_attention_mask, hidden_states.dtype) - if not self.config._attn_implementation == "flash_attention_2" + if self.config._attn_implementation != "flash_attention_2" else patch_attention_mask ) diff --git a/src/transformers/models/phimoe/modeling_phimoe.py b/src/transformers/models/phimoe/modeling_phimoe.py index 3b1369a924..2207793dca 100644 --- a/src/transformers/models/phimoe/modeling_phimoe.py +++ b/src/transformers/models/phimoe/modeling_phimoe.py @@ -488,7 +488,7 @@ class PhimoeSdpaAttention(PhimoeAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = causal_mask is None and q_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py index 821c4c8172..1eed6dc18b 100644 --- a/src/transformers/models/plbart/tokenization_plbart.py +++ b/src/transformers/models/plbart/tokenization_plbart.py @@ -425,7 +425,7 @@ class PLBartTokenizer(PreTrainedTokenizer): def _convert_lang_code_special_format(self, lang: str) -> str: """Convert Language Codes to format tokenizer uses if required""" - lang = FAIRSEQ_LANGUAGE_CODES_MAP[lang] if lang in FAIRSEQ_LANGUAGE_CODES_MAP else lang + lang = FAIRSEQ_LANGUAGE_CODES_MAP.get(lang, lang) return lang diff --git a/src/transformers/models/pop2piano/feature_extraction_pop2piano.py b/src/transformers/models/pop2piano/feature_extraction_pop2piano.py index d772194274..97255f969a 100644 --- a/src/transformers/models/pop2piano/feature_extraction_pop2piano.py +++ b/src/transformers/models/pop2piano/feature_extraction_pop2piano.py @@ -377,7 +377,7 @@ class Pop2PianoFeatureExtractor(SequenceFeatureExtractor): """ requires_backends(self, ["librosa"]) - is_batched = bool(isinstance(audio, (list, tuple)) and isinstance(audio[0], (np.ndarray, tuple, list))) + is_batched = isinstance(audio, (list, tuple)) and isinstance(audio[0], (np.ndarray, tuple, list)) if is_batched: # This enables the user to process files of different sampling_rate at same time if not isinstance(sampling_rate, list): diff --git a/src/transformers/models/qwen2_audio/configuration_qwen2_audio.py b/src/transformers/models/qwen2_audio/configuration_qwen2_audio.py index ef95f19ed2..88e930a94f 100644 --- a/src/transformers/models/qwen2_audio/configuration_qwen2_audio.py +++ b/src/transformers/models/qwen2_audio/configuration_qwen2_audio.py @@ -172,9 +172,7 @@ class Qwen2AudioConfig(PretrainedConfig): self.audio_token_index = audio_token_index if isinstance(audio_config, dict): - audio_config["model_type"] = ( - audio_config["model_type"] if "model_type" in audio_config else "qwen2_audio_encoder" - ) + audio_config["model_type"] = audio_config.get("model_type", "qwen2_audio_encoder") audio_config = CONFIG_MAPPING[audio_config["model_type"]](**audio_config) elif audio_config is None: audio_config = CONFIG_MAPPING["qwen2_audio_encoder"]( @@ -192,7 +190,7 @@ class Qwen2AudioConfig(PretrainedConfig): self.audio_config = audio_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "qwen2" + text_config["model_type"] = text_config.get("model_type", "qwen2") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["qwen2"]() diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 171ee4e657..108aa96176 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -553,7 +553,7 @@ class Qwen2MoeSdpaAttention(Qwen2MoeAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = bool(causal_mask is None and q_len > 1) attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py index e42eeeef20..e1529594d0 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py @@ -504,8 +504,8 @@ class Qwen2VLImageProcessor(BaseImageProcessor): """ min_pixels = images_kwargs["min_pixels"] if "min_pixels" in images_kwargs else self.size["shortest_edge"] max_pixels = images_kwargs["max_pixels"] if "max_pixels" in images_kwargs else self.size["longest_edge"] - patch_size = images_kwargs["patch_size"] if "patch_size" in images_kwargs else self.patch_size - merge_size = images_kwargs["merge_size"] if "merge_size" in images_kwargs else self.merge_size + patch_size = images_kwargs.get("patch_size", self.patch_size) + merge_size = images_kwargs.get("merge_size", self.merge_size) factor = patch_size * merge_size resized_height, resized_width = smart_resize( diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py index cadecfbf3f..cc3e0ae693 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py @@ -301,8 +301,8 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast): """ min_pixels = images_kwargs["min_pixels"] if "min_pixels" in images_kwargs else self.size["shortest_edge"] max_pixels = images_kwargs["max_pixels"] if "max_pixels" in images_kwargs else self.size["longest_edge"] - patch_size = images_kwargs["patch_size"] if "patch_size" in images_kwargs else self.patch_size - merge_size = images_kwargs["merge_size"] if "merge_size" in images_kwargs else self.merge_size + patch_size = images_kwargs.get("patch_size", self.patch_size) + merge_size = images_kwargs.get("merge_size", self.merge_size) factor = patch_size * merge_size resized_height, resized_width = smart_resize( diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index f9fc9c3b58..fa8e638ef3 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -364,9 +364,7 @@ class RobertaSdpaSelfAttention(RobertaSelfAttention): # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create # a causal mask in case tgt_len == 1. - is_causal = ( - True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False - ) + is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr.py b/src/transformers/models/rt_detr/image_processing_rt_detr.py index 9d9548e69e..42d8efef38 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr.py @@ -297,7 +297,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/models/rt_detr/modeling_rt_detr.py b/src/transformers/models/rt_detr/modeling_rt_detr.py index 6ec7edbb2b..eb20d4c2d1 100644 --- a/src/transformers/models/rt_detr/modeling_rt_detr.py +++ b/src/transformers/models/rt_detr/modeling_rt_detr.py @@ -341,7 +341,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = RTDetrFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py index 3589b25761..f9588eac5a 100644 --- a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py @@ -670,7 +670,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = RTDetrV2FrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py index 619b6448b4..8e17cb17b2 100755 --- a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py @@ -742,9 +742,7 @@ class SeamlessM4TConformerEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = ( - True if self.training and (dropout_probability < self.config.speech_encoder_layerdrop) else False - ) + skip_the_layer = self.training and dropout_probability < self.config.speech_encoder_layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py index 950c0d3b8f..e35278b48e 100644 --- a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py @@ -611,9 +611,7 @@ class SeamlessM4Tv2ConformerEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = ( - True if self.training and (dropout_probability < self.config.speech_encoder_layerdrop) else False - ) + skip_the_layer = self.training and dropout_probability < self.config.speech_encoder_layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py index be8b507b60..f39e14e409 100644 --- a/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py @@ -240,7 +240,7 @@ def convert_sew_checkpoint( config = convert_config(model[0], is_finetuned) model = model[0].eval() - return_attention_mask = True if config.feat_extract_norm == "layer" else False + return_attention_mask = config.feat_extract_norm == "layer" feature_extractor = Wav2Vec2FeatureExtractor( feature_size=1, sampling_rate=16000, diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index 3b985bce20..d5bd617e84 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -479,7 +479,7 @@ class SEWEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/sew/modular_sew.py b/src/transformers/models/sew/modular_sew.py index 4614892b3d..5b4ee00c4a 100644 --- a/src/transformers/models/sew/modular_sew.py +++ b/src/transformers/models/sew/modular_sew.py @@ -227,7 +227,7 @@ class SEWEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py index b4dfd7bd2a..bc638e6b7c 100644 --- a/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py @@ -252,7 +252,7 @@ def convert_sew_checkpoint( config = convert_config(model[0], is_finetuned) model = model[0].eval() - return_attention_mask = True if config.feat_extract_norm == "layer" else False + return_attention_mask = config.feat_extract_norm == "layer" feature_extractor = Wav2Vec2FeatureExtractor( feature_size=1, sampling_rate=16000, diff --git a/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py b/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py index acb53924b2..63d7f49bf3 100644 --- a/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py +++ b/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py @@ -91,9 +91,7 @@ class ShieldGemma2Config(PretrainedConfig): **kwargs, ): if isinstance(vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model") vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: vision_config = CONFIG_MAPPING["siglip_vision_model"]() @@ -101,7 +99,7 @@ class ShieldGemma2Config(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "gemma3_text" + text_config["model_type"] = text_config.get("model_type", "gemma3_text") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["gemma3_text"]() diff --git a/src/transformers/models/smolvlm/configuration_smolvlm.py b/src/transformers/models/smolvlm/configuration_smolvlm.py index cd85441568..2dca4721c0 100644 --- a/src/transformers/models/smolvlm/configuration_smolvlm.py +++ b/src/transformers/models/smolvlm/configuration_smolvlm.py @@ -177,7 +177,7 @@ class SmolVLMConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: logger.info("text_config is None, using default text config") diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm.py b/src/transformers/models/smolvlm/image_processing_smolvlm.py index 440f263d0a..2c7e34a986 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm.py @@ -863,11 +863,9 @@ class SmolVLMImageProcessor(BaseImageProcessor): Returns: `int`: Number of patches per image. """ - do_image_splitting = ( - images_kwargs["do_image_splitting"] if "do_image_splitting" in images_kwargs else self.do_image_splitting - ) - max_image_size = images_kwargs["max_image_size"] if "max_image_size" in images_kwargs else self.max_image_size - size = images_kwargs["size"] if "size" in images_kwargs else self.size + do_image_splitting = images_kwargs.get("do_image_splitting", self.do_image_splitting) + max_image_size = images_kwargs.get("max_image_size", self.max_image_size) + size = images_kwargs.get("size", self.size) num_patches = num_rows = num_cols = 1 if do_image_splitting: diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py b/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py index ecbd3a7e07..a070cd87bf 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py @@ -504,11 +504,9 @@ class SmolVLMImageProcessorFast(BaseImageProcessorFast): Returns: `int`: Number of patches per image. """ - do_image_splitting = ( - images_kwargs["do_image_splitting"] if "do_image_splitting" in images_kwargs else self.do_image_splitting - ) - max_image_size = images_kwargs["max_image_size"] if "max_image_size" in images_kwargs else self.max_image_size - size = images_kwargs["size"] if "size" in images_kwargs else self.size + do_image_splitting = images_kwargs.get("do_image_splitting", self.do_image_splitting) + max_image_size = images_kwargs.get("max_image_size", self.max_image_size) + size = images_kwargs.get("size", self.size) num_patches = num_rows = num_cols = 1 if do_image_splitting: diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index b70eaf1a09..559c1a5f95 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -393,7 +393,7 @@ class StableLmSdpaAttention(StableLmAttention): # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. - is_causal = True if causal_mask is None and q_len > 1 else False + is_causal = bool(causal_mask is None and q_len > 1) attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, diff --git a/src/transformers/models/superglue/configuration_superglue.py b/src/transformers/models/superglue/configuration_superglue.py index 7ca9e229b7..de6086114f 100644 --- a/src/transformers/models/superglue/configuration_superglue.py +++ b/src/transformers/models/superglue/configuration_superglue.py @@ -100,9 +100,7 @@ class SuperGlueConfig(PretrainedConfig): self.matching_threshold = matching_threshold if isinstance(keypoint_detector_config, dict): - keypoint_detector_config["model_type"] = ( - keypoint_detector_config["model_type"] if "model_type" in keypoint_detector_config else "superpoint" - ) + keypoint_detector_config["model_type"] = keypoint_detector_config.get("model_type", "superpoint") keypoint_detector_config = CONFIG_MAPPING[keypoint_detector_config["model_type"]]( **keypoint_detector_config ) diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index 5f0729ba3a..71bd7e9515 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -185,7 +185,7 @@ def replace_batch_norm(model): if isinstance(module, nn.BatchNorm2d): new_module = TableTransformerFrozenBatchNorm2d(module.num_features) - if not module.weight.device == torch.device("meta"): + if module.weight.device != torch.device("meta"): new_module.weight.data.copy_(module.weight) new_module.bias.data.copy_(module.bias) new_module.running_mean.data.copy_(module.running_mean) diff --git a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py index b17d304a1e..f0e05cbe15 100644 --- a/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py @@ -229,7 +229,7 @@ def convert_unispeech_checkpoint( word_delimiter_token="|", do_lower_case=False, ) - return_attention_mask = True if config.feat_extract_norm == "layer" else False + return_attention_mask = config.feat_extract_norm == "layer" feature_extractor = Wav2Vec2FeatureExtractor( feature_size=1, sampling_rate=16000, diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index 233de0d14c..76f8072883 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -491,7 +491,7 @@ class UniSpeechEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( @@ -656,7 +656,7 @@ class UniSpeechEncoderStableLayerNorm(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync # XXX: could optimize this like synced_gpus in generate_utils but not sure if it's worth the code complication diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index c7fc63849c..0b771ec08f 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -496,7 +496,7 @@ class UniSpeechSatEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( @@ -661,7 +661,7 @@ class UniSpeechSatEncoderStableLayerNorm(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync # XXX: could optimize this like synced_gpus in generate_utils but not sure if it's worth the code complication diff --git a/src/transformers/models/vipllava/configuration_vipllava.py b/src/transformers/models/vipllava/configuration_vipllava.py index 9ad117c754..cdeb7823ad 100644 --- a/src/transformers/models/vipllava/configuration_vipllava.py +++ b/src/transformers/models/vipllava/configuration_vipllava.py @@ -94,9 +94,7 @@ class VipLlavaConfig(PretrainedConfig): self.vision_config = vision_config if isinstance(self.vision_config, dict): - vision_config["model_type"] = ( - vision_config["model_type"] if "model_type" in vision_config else "clip_vision_model" - ) + vision_config["model_type"] = vision_config.get("model_type", "clip_vision_model") self.vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config) elif vision_config is None: self.vision_config = CONFIG_MAPPING["clip_vision_model"]( @@ -111,7 +109,7 @@ class VipLlavaConfig(PretrainedConfig): ) if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 5818a74271..4368690d09 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -658,7 +658,7 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos self, input_ids, past_key_values=None, attention_mask=None, use_cache=None, encoder_outputs=None, **kwargs ): decoder_inputs = self.decoder.prepare_inputs_for_generation(input_ids, past_key_values=past_key_values) - decoder_attention_mask = decoder_inputs["attention_mask"] if "attention_mask" in decoder_inputs else None + decoder_attention_mask = decoder_inputs.get("attention_mask", None) past_key_values = decoder_inputs.get("past_key_values") input_dict = { "pixel_values": None, # needs to be passed to make Keras.layer.__call__ happy diff --git a/src/transformers/models/voxtral/configuration_voxtral.py b/src/transformers/models/voxtral/configuration_voxtral.py index 72c986da37..8cdd499cde 100644 --- a/src/transformers/models/voxtral/configuration_voxtral.py +++ b/src/transformers/models/voxtral/configuration_voxtral.py @@ -175,16 +175,14 @@ class VoxtralConfig(PretrainedConfig): **kwargs, ): if isinstance(audio_config, dict): - audio_config["model_type"] = ( - audio_config["model_type"] if "model_type" in audio_config else "voxtral_encoder" - ) + audio_config["model_type"] = audio_config.get("model_type", "voxtral_encoder") audio_config = CONFIG_MAPPING[audio_config["model_type"]](**audio_config) elif audio_config is None: audio_config = CONFIG_MAPPING["voxtral_encoder"]() self.audio_config = audio_config if isinstance(text_config, dict): - text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" + text_config["model_type"] = text_config.get("model_type", "llama") text_config = CONFIG_MAPPING[text_config["model_type"]]( **{**self._default_text_config_kwargs, **text_config} ) diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py index 361684319c..9523631097 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py @@ -326,7 +326,7 @@ def convert_wav2vec2_checkpoint( word_delimiter_token="|", do_lower_case=False, ) - return_attention_mask = True if config.feat_extract_norm == "layer" else False + return_attention_mask = config.feat_extract_norm == "layer" feature_extractor = Wav2Vec2FeatureExtractor( feature_size=1, sampling_rate=16000, diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 2c352da180..41a8a872c2 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -729,7 +729,7 @@ class Wav2Vec2Encoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( @@ -824,7 +824,7 @@ class Wav2Vec2EncoderStableLayerNorm(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync # XXX: could optimize this like synced_gpus in generate_utils but not sure if it's worth the code complication diff --git a/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py index e416fd8a66..e8f67e2d73 100644 --- a/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py @@ -518,7 +518,7 @@ class Wav2Vec2BertEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py index d029d22cd3..1f9ce07f0f 100644 --- a/src/transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py @@ -416,7 +416,7 @@ class Wav2Vec2BertEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py index ce54a1756a..eca851f3a0 100644 --- a/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py @@ -262,7 +262,7 @@ def convert_wav2vec2_conformer_checkpoint( word_delimiter_token="|", do_lower_case=False, ) - return_attention_mask = True if config.feat_extract_norm == "layer" else False + return_attention_mask = config.feat_extract_norm == "layer" feature_extractor = Wav2Vec2FeatureExtractor( feature_size=1, sampling_rate=16000, diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index bdc3dcddaf..459d42f2f3 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -694,7 +694,7 @@ class Wav2Vec2ConformerEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py index b54e7d0259..89048b363d 100644 --- a/src/transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py @@ -502,7 +502,7 @@ class Wav2Vec2ConformerEncoder(nn.Module): # add LayerDrop (see https://huggingface.co/papers/1909.11556 for description) dropout_probability = torch.rand([]) - skip_the_layer = True if self.training and (dropout_probability < self.config.layerdrop) else False + skip_the_layer = self.training and dropout_probability < self.config.layerdrop if not skip_the_layer or synced_gpus: # under fsdp or deepspeed zero3 all gpus must run in sync layer_outputs = layer( diff --git a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py index 4ff6ea8cf2..929ee370fe 100644 --- a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py @@ -365,9 +365,7 @@ class XLMRobertaSdpaSelfAttention(XLMRobertaSelfAttention): # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create # a causal mask in case tgt_len == 1. - is_causal = ( - True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False - ) + is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index 69c623934e..2a3665f87f 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -362,9 +362,7 @@ class XLMRobertaXLSdpaSelfAttention(XLMRobertaXLSelfAttention): # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create # a causal mask in case tgt_len == 1. - is_causal = ( - True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False - ) + is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 attn_output = torch.nn.functional.scaled_dot_product_attention( query_layer, diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index c859260088..4a716036d4 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -357,7 +357,7 @@ def prepare_coco_detection_annotation( # for conversion to coco api area = np.asarray([obj["area"] for obj in annotations], dtype=np.float32) - iscrowd = np.asarray([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in annotations], dtype=np.int64) + iscrowd = np.asarray([obj.get("iscrowd", 0) for obj in annotations], dtype=np.int64) boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index 152bc98d6d..778fa7046f 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -416,7 +416,7 @@ def validate_model_outputs( logger.info(f'\t- Validating ONNX Model output "{name}":') # Shape - if not ort_value.shape == ref_value.shape: + if ort_value.shape != ref_value.shape: logger.info(f"\t\t-[x] shape {ort_value.shape} doesn't match {ref_value.shape}") raise ValueError( "Outputs shape doesn't match between reference model and ONNX exported model: " diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index b0507a4ed3..2823dc36fc 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -341,7 +341,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): raise ValueError("If you are using a VisionEncoderDecoderModel, you must provide a feature extractor") words, boxes = None, None - if not self.model_type == ModelType.VisionEncoderDecoder: + if self.model_type != ModelType.VisionEncoderDecoder: if "word_boxes" in input: words = [x[0] for x in input["word_boxes"]] boxes = [x[1] for x in input["word_boxes"]] diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py index ed21e39084..ee86074a4c 100644 --- a/src/transformers/pipelines/question_answering.py +++ b/src/transformers/pipelines/question_answering.py @@ -594,7 +594,7 @@ class QuestionAnsweringPipeline(ChunkPipeline): # Start: Index of the first character of the answer in the context string # End: Index of the character following the last character of the answer in the context string # Answer: Plain text of the answer - question_first = bool(self.tokenizer.padding_side == "right") + question_first = self.tokenizer.padding_side == "right" enc = output["encoding"] # Encoding was *not* padded, input_ids *might*. diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py index 54a65ad77f..2ff8982b83 100644 --- a/src/transformers/pipelines/table_question_answering.py +++ b/src/transformers/pipelines/table_question_answering.py @@ -143,8 +143,8 @@ class TableQuestionAnsweringPipeline(Pipeline): mapping.update(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES) self.check_model_type(mapping) - self.aggregate = bool(getattr(self.model.config, "aggregation_labels", None)) and bool( - getattr(self.model.config, "num_aggregation_labels", None) + self.aggregate = getattr(self.model.config, "aggregation_labels", None) and getattr( + self.model.config, "num_aggregation_labels", None ) self.type = "tapas" if hasattr(self.model.config, "aggregation_labels") else None diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py index a9d1467482..22faadeddd 100644 --- a/src/transformers/pipelines/token_classification.py +++ b/src/transformers/pipelines/token_classification.py @@ -280,7 +280,7 @@ class TokenClassificationPipeline(ChunkPipeline): def preprocess(self, sentence, offset_mapping=None, **preprocess_params): tokenizer_params = preprocess_params.pop("tokenizer_params", {}) - truncation = True if self.tokenizer.model_max_length and self.tokenizer.model_max_length > 0 else False + truncation = self.tokenizer.model_max_length and self.tokenizer.model_max_length > 0 word_to_chars_map = None is_split_into_words = preprocess_params["is_split_into_words"] diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 7f77daa297..a807e65e7e 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -1107,7 +1107,7 @@ class ProcessorMixin(PushToHubMixin): for i, arg in enumerate(accepted_args_and_kwargs) if (arg in valid_kwargs and i < len(args)) } - args = [arg if i not in args_to_update else args_to_update[i] for i, arg in enumerate(args)] + args = [args_to_update.get(i, arg) for i, arg in enumerate(args)] # instantiate processor with used (and valid) kwargs only processor = cls(*args, **valid_kwargs) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index beb91a9472..36e57c0713 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3465,7 +3465,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): **kwargs, ) - pair = bool(pair_ids is not None) + pair = pair_ids is not None len_ids = len(ids) len_pair_ids = len(pair_ids) if pair else 0 diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 3e5df5fb29..87a0e2b94a 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -615,7 +615,7 @@ class Trainer: # Bnb Quantized models doesn't support `.to` operation. if ( self.place_model_on_device - and not getattr(model, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES + and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES ): self._move_model_to_device(model, args.device) @@ -2363,7 +2363,7 @@ class Trainer: # as the model is wrapped, don't use `accelerator.prepare` # this is for unhandled cases such as # FSDP-XLA, SageMaker MP/DP, DataParallel, IPEX - use_accelerator_prepare = True if model is self.model else False + use_accelerator_prepare = model is self.model if use_accelerator_prepare and self.is_fsdp_enabled: # In case of auto_find_batch_size=True @@ -4621,7 +4621,7 @@ class Trainer: return_loss = inputs.get("return_loss") if return_loss is None: return_loss = self.can_return_loss - loss_without_labels = True if len(self.label_names) == 0 and return_loss else False + loss_without_labels = len(self.label_names) == 0 and return_loss inputs = self._prepare_inputs(inputs) if ignore_keys is None: diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index c435c1f6df..fb0cc33dce 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -894,7 +894,7 @@ def check_target_module_exists(optim_target_modules, key: str, return_is_regex: if isinstance(optim_target_modules, str): target_module_found = bool(re.fullmatch(optim_target_modules, key)) - is_regex = True if not optim_target_modules == key else False + is_regex = optim_target_modules != key elif key in optim_target_modules: # from here, target_module_found must be a list of str # this module is specified directly in target_modules target_module_found = True diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 335a5d5717..2989f0230c 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -413,11 +413,7 @@ class ModelOutput(OrderedDict): # set the associated fields if first_field_iterator: for idx, element in enumerate(iterator): - if ( - not isinstance(element, (list, tuple)) - or not len(element) == 2 - or not isinstance(element[0], str) - ): + if not isinstance(element, (list, tuple)) or len(element) != 2 or not isinstance(element[0], str): if idx == 0: # If we do not have an iterator of key/values, set it as attribute self[class_fields[0].name] = first_field diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index c4921459a7..ac3eba924c 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -204,7 +204,7 @@ def define_sagemaker_information(): dlc_tag = None sagemaker_params = json.loads(os.getenv("SM_FRAMEWORK_PARAMS", "{}")) - runs_distributed_training = True if "sagemaker_distributed_dataparallel_enabled" in sagemaker_params else False + runs_distributed_training = "sagemaker_distributed_dataparallel_enabled" in sagemaker_params account_id = os.getenv("TRAINING_JOB_ARN").split(":")[4] if "TRAINING_JOB_ARN" in os.environ else None sagemaker_object = { diff --git a/src/transformers/utils/logging.py b/src/transformers/utils/logging.py index a2915e167a..eaed3228ec 100644 --- a/src/transformers/utils/logging.py +++ b/src/transformers/utils/logging.py @@ -101,7 +101,7 @@ def _configure_library_root_logger() -> None: _default_handler.setFormatter(formatter) is_ci = os.getenv("CI") is not None and os.getenv("CI").upper() in {"1", "ON", "YES", "TRUE"} - library_root_logger.propagate = True if is_ci else False + library_root_logger.propagate = is_ci def _reset_library_root_logger() -> None: diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index b270a14105..397aa3e3ff 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -253,7 +253,7 @@ class NotebookTrainingTracker(NotebookProgressBar): first_column = self.inner_table[0][0] if last_values[0] != values[first_column]: # write new line - self.inner_table.append([values[c] if c in values else "No Log" for c in columns]) + self.inner_table.append([values.get(c, "No Log") for c in columns]) else: # update last line new_values = values diff --git a/tests/models/depth_anything/test_modeling_depth_anything.py b/tests/models/depth_anything/test_modeling_depth_anything.py index 93a24c6728..3527e1d6b8 100644 --- a/tests/models/depth_anything/test_modeling_depth_anything.py +++ b/tests/models/depth_anything/test_modeling_depth_anything.py @@ -147,7 +147,7 @@ class DepthAnythingModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Tes test_resize_embeddings = False test_head_masking = False test_torch_exportable = True - test_torch_exportable_strictly = not get_torch_major_and_minor_version() == "2.7" + test_torch_exportable_strictly = get_torch_major_and_minor_version() != "2.7" def setUp(self): self.model_tester = DepthAnythingModelTester(self) diff --git a/tests/models/dpt/test_modeling_dpt_auto_backbone.py b/tests/models/dpt/test_modeling_dpt_auto_backbone.py index b9068631bd..165da4be6b 100644 --- a/tests/models/dpt/test_modeling_dpt_auto_backbone.py +++ b/tests/models/dpt/test_modeling_dpt_auto_backbone.py @@ -141,7 +141,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): test_resize_embeddings = False test_head_masking = False test_torch_exportable = True - test_torch_exportable_strictly = not get_torch_major_and_minor_version() == "2.7" + test_torch_exportable_strictly = get_torch_major_and_minor_version() != "2.7" def setUp(self): self.model_tester = DPTModelTester(self) diff --git a/tests/models/hiera/test_modeling_hiera.py b/tests/models/hiera/test_modeling_hiera.py index 1e3ed8e795..8a79e9afa8 100644 --- a/tests/models/hiera/test_modeling_hiera.py +++ b/tests/models/hiera/test_modeling_hiera.py @@ -371,7 +371,7 @@ class HieraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): [num_patches, self.model_tester.embed_dim], ) - if not model_class.__name__ == "HieraBackbone": + if model_class.__name__ != "HieraBackbone": reshaped_hidden_states = outputs.reshaped_hidden_states self.assertEqual(len(reshaped_hidden_states), expected_num_layers) diff --git a/tests/models/swin/test_modeling_swin.py b/tests/models/swin/test_modeling_swin.py index a8e35d429f..a7cdb6a8de 100644 --- a/tests/models/swin/test_modeling_swin.py +++ b/tests/models/swin/test_modeling_swin.py @@ -382,7 +382,7 @@ class SwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): [num_patches, self.model_tester.embed_dim], ) - if not model_class.__name__ == "SwinBackbone": + if model_class.__name__ != "SwinBackbone": reshaped_hidden_states = outputs.reshaped_hidden_states self.assertEqual(len(reshaped_hidden_states), expected_num_layers) diff --git a/tests/models/swinv2/test_modeling_swinv2.py b/tests/models/swinv2/test_modeling_swinv2.py index 69f0a77f3a..6c4e648a74 100644 --- a/tests/models/swinv2/test_modeling_swinv2.py +++ b/tests/models/swinv2/test_modeling_swinv2.py @@ -365,7 +365,7 @@ class Swinv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): [num_patches, self.model_tester.embed_dim], ) - if not model_class.__name__ == "Swinv2Backbone": + if model_class.__name__ != "Swinv2Backbone": reshaped_hidden_states = outputs.reshaped_hidden_states self.assertEqual(len(reshaped_hidden_states), expected_num_layers) diff --git a/tests/models/upernet/test_modeling_upernet.py b/tests/models/upernet/test_modeling_upernet.py index ed0a982efd..084b03317b 100644 --- a/tests/models/upernet/test_modeling_upernet.py +++ b/tests/models/upernet/test_modeling_upernet.py @@ -156,7 +156,7 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) test_torchscript = False has_attentions = False test_torch_exportable = True - test_torch_exportable_strictly = not get_torch_major_and_minor_version() == "2.7" + test_torch_exportable_strictly = get_torch_major_and_minor_version() != "2.7" def setUp(self): self.model_tester = UperNetModelTester(self) diff --git a/tests/models/vitmatte/test_modeling_vitmatte.py b/tests/models/vitmatte/test_modeling_vitmatte.py index 3e6ef0bb48..10c36a2dd8 100644 --- a/tests/models/vitmatte/test_modeling_vitmatte.py +++ b/tests/models/vitmatte/test_modeling_vitmatte.py @@ -144,7 +144,7 @@ class VitMatteModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase test_resize_embeddings = False test_head_masking = False test_torch_exportable = True - test_torch_exportable_strictly = not get_torch_major_and_minor_version() == "2.7" + test_torch_exportable_strictly = get_torch_major_and_minor_version() != "2.7" def setUp(self): self.model_tester = VitMatteModelTester(self) diff --git a/tests/models/vitpose/test_modeling_vitpose.py b/tests/models/vitpose/test_modeling_vitpose.py index e9bce2d4c6..d7782915cf 100644 --- a/tests/models/vitpose/test_modeling_vitpose.py +++ b/tests/models/vitpose/test_modeling_vitpose.py @@ -155,7 +155,7 @@ class VitPoseModelTest(ModelTesterMixin, unittest.TestCase): test_resize_embeddings = False test_head_masking = False test_torch_exportable = True - test_torch_exportable_strictly = not get_torch_major_and_minor_version() == "2.7" + test_torch_exportable_strictly = get_torch_major_and_minor_version() != "2.7" def setUp(self): self.model_tester = VitPoseModelTester(self) diff --git a/tests/models/zoedepth/test_modeling_zoedepth.py b/tests/models/zoedepth/test_modeling_zoedepth.py index 7a19bc7b5b..5fcf0e9a2f 100644 --- a/tests/models/zoedepth/test_modeling_zoedepth.py +++ b/tests/models/zoedepth/test_modeling_zoedepth.py @@ -148,7 +148,7 @@ class ZoeDepthModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase test_resize_embeddings = False test_head_masking = False # `strict=True/False` are both failing with torch 2.7, see #38677 - test_torch_exportable = not get_torch_major_and_minor_version() == "2.7" + test_torch_exportable = get_torch_major_and_minor_version() != "2.7" def setUp(self): self.model_tester = ZoeDepthModelTester(self) diff --git a/utils/check_repo.py b/utils/check_repo.py index 01ed84939f..d32a42b747 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -1193,7 +1193,7 @@ def check_model_type_doc_match(): model_docs = [m.stem for m in model_doc_folder.glob("*.md")] model_types = list(transformers.models.auto.configuration_auto.MODEL_NAMES_MAPPING.keys()) - model_types = [MODEL_TYPE_TO_DOC_MAPPING[m] if m in MODEL_TYPE_TO_DOC_MAPPING else m for m in model_types] + model_types = [MODEL_TYPE_TO_DOC_MAPPING.get(m, m) for m in model_types] errors = [] for m in model_docs: diff --git a/utils/notification_service.py b/utils/notification_service.py index 7838338ba2..f7dbbec907 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1059,7 +1059,7 @@ if __name__ == "__main__": runner_not_available = False runner_failed = False # Some jobs don't depend (`needs`) on the job `setup`: in this case, the status of the job `setup` is `skipped`. - setup_failed = False if setup_status in ["skipped", "success"] else True + setup_failed = setup_status not in ["skipped", "success"] org = "huggingface" repo = "transformers" diff --git a/utils/notification_service_doc_tests.py b/utils/notification_service_doc_tests.py index 5802cf23a6..f8f7b8c610 100644 --- a/utils/notification_service_doc_tests.py +++ b/utils/notification_service_doc_tests.py @@ -371,7 +371,7 @@ if __name__ == "__main__": file_path, test = line, line job_result["failed"].append(test) - failure = all_failures[test] if test in all_failures else "N/A" + failure = all_failures.get(test, "N/A") job_result["failures"][test] = failure # Save and to be uploaded as artifact diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 3dcca15164..aae78cabe6 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -1057,9 +1057,9 @@ def infer_tests_to_run( test_files_to_run.extend(test_map[f]) test_files_to_run = sorted(set(test_files_to_run)) # Remove repo utils tests - test_files_to_run = [f for f in test_files_to_run if not f.split(os.path.sep)[1] == "repo_utils"] + test_files_to_run = [f for f in test_files_to_run if f.split(os.path.sep)[1] != "repo_utils"] # Remove SageMaker tests - test_files_to_run = [f for f in test_files_to_run if not f.split(os.path.sep)[1] == "sagemaker"] + test_files_to_run = [f for f in test_files_to_run if f.split(os.path.sep)[1] != "sagemaker"] # Make sure we did not end up with a test file that was removed test_files_to_run = [f for f in test_files_to_run if (PATH_TO_REPO / f).exists()]