Use Python 3.9 syntax in examples (#37279)

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyyever
2025-04-07 19:52:21 +08:00
committed by GitHub
parent 08f36771b3
commit 0fb8d49e88
123 changed files with 358 additions and 451 deletions

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,7 +23,7 @@ import re
import sys
import warnings
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union
from typing import Optional, Union
import datasets
import evaluate
@@ -211,11 +210,11 @@ class DataTrainingArguments:
)
},
)
chars_to_ignore: Optional[List[str]] = list_field(
chars_to_ignore: Optional[list[str]] = list_field(
default=None,
metadata={"help": "A list of characters to remove from the transcripts."},
)
eval_metrics: List[str] = list_field(
eval_metrics: list[str] = list_field(
default=["wer"],
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
)
@@ -318,7 +317,7 @@ class DataCollatorCTCWithPadding:
pad_to_multiple_of_labels: Optional[int] = None
feature_extractor_input_name: Optional[str] = "input_values"
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
input_features = [

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,7 +23,7 @@ import re
import sys
import warnings
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union
from typing import Optional, Union
import datasets
import evaluate
@@ -201,11 +200,11 @@ class DataTrainingArguments:
)
},
)
chars_to_ignore: Optional[List[str]] = list_field(
chars_to_ignore: Optional[list[str]] = list_field(
default=None,
metadata={"help": "A list of characters to remove from the transcripts."},
)
eval_metrics: List[str] = list_field(
eval_metrics: list[str] = list_field(
default=["wer"],
metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
)
@@ -300,7 +299,7 @@ class DataCollatorCTCWithPadding:
pad_to_multiple_of: Optional[int] = None
pad_to_multiple_of_labels: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
input_features = [{"input_values": feature["input_values"]} for feature in features]

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -23,7 +22,7 @@ import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from typing import Any, Optional, Union
import datasets
import evaluate
@@ -110,11 +109,11 @@ class ModelArguments:
freeze_encoder: bool = field(
default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
)
forced_decoder_ids: List[List[int]] = field(
forced_decoder_ids: list[list[int]] = field(
default=None,
metadata={"help": "Deprecated. Please use the `language` and `task` arguments instead."},
)
suppress_tokens: List[int] = field(
suppress_tokens: list[int] = field(
default=None,
metadata={
"help": (
@@ -247,7 +246,7 @@ class DataCollatorSpeechSeq2SeqWithPadding:
decoder_start_token_id: int
forward_attention_mask: bool
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: list[dict[str, Union[list[int], torch.Tensor]]]) -> dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
model_input_name = self.processor.model_input_names[0]