Use Python 3.9 syntax in examples (#37279)

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyyever
2025-04-07 19:52:21 +08:00
committed by GitHub
parent 08f36771b3
commit 0fb8d49e88
123 changed files with 358 additions and 451 deletions

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2022 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -833,8 +832,7 @@ def main():
# No need to shuffle here
loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False)
for batch in loader:
yield batch
yield from loader
# Metric
metric = evaluate.load("rouge", cache_dir=model_args.cache_dir)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,7 +29,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Dict, List, Optional
from typing import Optional
import flax
import jax
@@ -294,7 +293,7 @@ class FlaxDataCollatorForBartDenoisingLM:
" language modeling. "
)
def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncoding:
def __call__(self, examples: list[dict[str, list[int]]]) -> BatchEncoding:
# convert list to dict and tensorize input
batch = BatchEncoding(
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -33,7 +32,7 @@ from itertools import chain
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from typing import Optional
import flax
import jax
@@ -302,7 +301,7 @@ class FlaxDataCollatorForLanguageModeling:
"You should pass `mlm=False` to train on causal language modeling instead."
)
def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multiple_of: int) -> Dict[str, np.ndarray]:
def __call__(self, examples: list[dict[str, np.ndarray]], pad_to_multiple_of: int) -> dict[str, np.ndarray]:
# Handle dict or lists with proper padding and conversion to tensor.
batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY)
@@ -316,7 +315,7 @@ class FlaxDataCollatorForLanguageModeling:
def mask_tokens(
self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray]
) -> Tuple[np.ndarray, np.ndarray]:
) -> tuple[np.ndarray, np.ndarray]:
"""
Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
"""

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -32,7 +31,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Dict, List, Optional
from typing import Optional
import flax
import jax
@@ -338,7 +337,7 @@ class FlaxDataCollatorForT5MLM:
pad_token_id: int
decoder_start_token_id: int
def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEncoding:
def __call__(self, examples: list[dict[str, np.ndarray]]) -> BatchEncoding:
# convert list to dict and tensorize input
batch = BatchEncoding(
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import json
from typing import Iterator, List, Union
from collections.abc import Iterator
from typing import Union
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers
from tokenizers.implementations.base_tokenizer import BaseTokenizer
@@ -72,7 +73,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
def train(
self,
files: Union[str, List[str]],
files: Union[str, list[str]],
vocab_size: int = 8000,
show_progress: bool = True,
):

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,7 +27,7 @@ import time
from dataclasses import asdict, dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple
from typing import Any, Callable, Optional
import datasets
import evaluate
@@ -908,8 +907,8 @@ def main():
# region Define train step functions
def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]:
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
start_positions = batch.pop("start_positions")

View File

@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +19,7 @@ import collections
import json
import logging
import os
from typing import Optional, Tuple
from typing import Optional
import numpy as np
from tqdm.auto import tqdm
@@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
def postprocess_qa_predictions(
examples,
features,
predictions: Tuple[np.ndarray, np.ndarray],
predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False,
n_best_size: int = 20,
max_answer_length: int = 30,
@@ -223,7 +222,7 @@ def postprocess_qa_predictions(
# If we have an output_dir, let's save all those dicts.
if output_dir is not None:
if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.")
raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
@@ -253,7 +252,7 @@ def postprocess_qa_predictions(
def postprocess_qa_predictions_with_beam_search(
examples,
features,
predictions: Tuple[np.ndarray, np.ndarray],
predictions: tuple[np.ndarray, np.ndarray],
version_2_with_negative: bool = False,
n_best_size: int = 20,
max_answer_length: int = 30,
@@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
# If we have an output_dir, let's save all those dicts.
if output_dir is not None:
if not os.path.isdir(output_dir):
raise EnvironmentError(f"{output_dir} is not a directory.")
raise OSError(f"{output_dir} is not a directory.")
prediction_file = os.path.join(
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,7 +24,7 @@ import time
from dataclasses import field
from functools import partial
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
from typing import Any, Callable, Optional, Union
import datasets
import evaluate
@@ -303,7 +302,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
pad_input_to_multiple_of: Optional[int] = None
pad_target_to_multiple_of: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
def __call__(self, features: list[dict[str, Union[list[int], np.ndarray]]]) -> dict[str, np.ndarray]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
model_input_name = self.processor.model_input_names[0]

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2021 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -64,7 +63,7 @@ def get_setup_file():
def get_results(output_dir, split="eval"):
path = os.path.join(output_dir, f"{split}_results.json")
if os.path.exists(path):
with open(path, "r") as f:
with open(path) as f:
return json.load(f)
raise ValueError(f"can't find {path}")

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,7 +24,7 @@ import time
import warnings
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple
from typing import Any, Callable, Optional
import datasets
import evaluate
@@ -572,8 +571,8 @@ def main():
# define step functions
def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]:
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
targets = batch.pop("labels")

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -27,7 +26,7 @@ from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple
from typing import Any, Callable, Optional
import datasets
import evaluate
@@ -651,8 +650,8 @@ def main():
# define step functions
def train_step(
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
) -> Tuple[train_state.TrainState, float]:
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
) -> tuple[train_state.TrainState, float]:
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
targets = batch.pop("labels")

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");