Use Python 3.9 syntax in examples (#37279)
Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -833,8 +832,7 @@ def main():
|
||||
# No need to shuffle here
|
||||
loader = data_loader(rng, _ds, batch_size=batch_size, shuffle=False)
|
||||
|
||||
for batch in loader:
|
||||
yield batch
|
||||
yield from loader
|
||||
|
||||
# Metric
|
||||
metric = evaluate.load("rouge", cache_dir=model_args.cache_dir)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -30,7 +29,7 @@ from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import flax
|
||||
import jax
|
||||
@@ -294,7 +293,7 @@ class FlaxDataCollatorForBartDenoisingLM:
|
||||
" language modeling. "
|
||||
)
|
||||
|
||||
def __call__(self, examples: List[Dict[str, List[int]]]) -> BatchEncoding:
|
||||
def __call__(self, examples: list[dict[str, list[int]]]) -> BatchEncoding:
|
||||
# convert list to dict and tensorize input
|
||||
batch = BatchEncoding(
|
||||
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -33,7 +32,7 @@ from itertools import chain
|
||||
|
||||
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import flax
|
||||
import jax
|
||||
@@ -302,7 +301,7 @@ class FlaxDataCollatorForLanguageModeling:
|
||||
"You should pass `mlm=False` to train on causal language modeling instead."
|
||||
)
|
||||
|
||||
def __call__(self, examples: List[Dict[str, np.ndarray]], pad_to_multiple_of: int) -> Dict[str, np.ndarray]:
|
||||
def __call__(self, examples: list[dict[str, np.ndarray]], pad_to_multiple_of: int) -> dict[str, np.ndarray]:
|
||||
# Handle dict or lists with proper padding and conversion to tensor.
|
||||
batch = self.tokenizer.pad(examples, pad_to_multiple_of=pad_to_multiple_of, return_tensors=TensorType.NUMPY)
|
||||
|
||||
@@ -316,7 +315,7 @@ class FlaxDataCollatorForLanguageModeling:
|
||||
|
||||
def mask_tokens(
|
||||
self, inputs: np.ndarray, special_tokens_mask: Optional[np.ndarray]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -32,7 +31,7 @@ from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Optional
|
||||
|
||||
import flax
|
||||
import jax
|
||||
@@ -338,7 +337,7 @@ class FlaxDataCollatorForT5MLM:
|
||||
pad_token_id: int
|
||||
decoder_start_token_id: int
|
||||
|
||||
def __call__(self, examples: List[Dict[str, np.ndarray]]) -> BatchEncoding:
|
||||
def __call__(self, examples: list[dict[str, np.ndarray]]) -> BatchEncoding:
|
||||
# convert list to dict and tensorize input
|
||||
batch = BatchEncoding(
|
||||
{k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from typing import Iterator, List, Union
|
||||
from collections.abc import Iterator
|
||||
from typing import Union
|
||||
|
||||
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, trainers
|
||||
from tokenizers.implementations.base_tokenizer import BaseTokenizer
|
||||
@@ -72,7 +73,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
|
||||
|
||||
def train(
|
||||
self,
|
||||
files: Union[str, List[str]],
|
||||
files: Union[str, list[str]],
|
||||
vocab_size: int = 8000,
|
||||
show_progress: bool = True,
|
||||
):
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -28,7 +27,7 @@ import time
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@@ -908,8 +907,8 @@ def main():
|
||||
|
||||
# region Define train step functions
|
||||
def train_step(
|
||||
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> Tuple[train_state.TrainState, float]:
|
||||
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> tuple[train_state.TrainState, float]:
|
||||
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
|
||||
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
||||
start_positions = batch.pop("start_positions")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -20,7 +19,7 @@ import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from tqdm.auto import tqdm
|
||||
@@ -32,7 +31,7 @@ logger = logging.getLogger(__name__)
|
||||
def postprocess_qa_predictions(
|
||||
examples,
|
||||
features,
|
||||
predictions: Tuple[np.ndarray, np.ndarray],
|
||||
predictions: tuple[np.ndarray, np.ndarray],
|
||||
version_2_with_negative: bool = False,
|
||||
n_best_size: int = 20,
|
||||
max_answer_length: int = 30,
|
||||
@@ -223,7 +222,7 @@ def postprocess_qa_predictions(
|
||||
# If we have an output_dir, let's save all those dicts.
|
||||
if output_dir is not None:
|
||||
if not os.path.isdir(output_dir):
|
||||
raise EnvironmentError(f"{output_dir} is not a directory.")
|
||||
raise OSError(f"{output_dir} is not a directory.")
|
||||
|
||||
prediction_file = os.path.join(
|
||||
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
|
||||
@@ -253,7 +252,7 @@ def postprocess_qa_predictions(
|
||||
def postprocess_qa_predictions_with_beam_search(
|
||||
examples,
|
||||
features,
|
||||
predictions: Tuple[np.ndarray, np.ndarray],
|
||||
predictions: tuple[np.ndarray, np.ndarray],
|
||||
version_2_with_negative: bool = False,
|
||||
n_best_size: int = 20,
|
||||
max_answer_length: int = 30,
|
||||
@@ -417,7 +416,7 @@ def postprocess_qa_predictions_with_beam_search(
|
||||
# If we have an output_dir, let's save all those dicts.
|
||||
if output_dir is not None:
|
||||
if not os.path.isdir(output_dir):
|
||||
raise EnvironmentError(f"{output_dir} is not a directory.")
|
||||
raise OSError(f"{output_dir} is not a directory.")
|
||||
|
||||
prediction_file = os.path.join(
|
||||
output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -25,7 +24,7 @@ import time
|
||||
from dataclasses import field
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@@ -303,7 +302,7 @@ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
|
||||
pad_input_to_multiple_of: Optional[int] = None
|
||||
pad_target_to_multiple_of: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
|
||||
def __call__(self, features: list[dict[str, Union[list[int], np.ndarray]]]) -> dict[str, np.ndarray]:
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
model_input_name = self.processor.model_input_names[0]
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -64,7 +63,7 @@ def get_setup_file():
|
||||
def get_results(output_dir, split="eval"):
|
||||
path = os.path.join(output_dir, f"{split}_results.json")
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as f:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
raise ValueError(f"can't find {path}")
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -25,7 +24,7 @@ import time
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@@ -572,8 +571,8 @@ def main():
|
||||
|
||||
# define step functions
|
||||
def train_step(
|
||||
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> Tuple[train_state.TrainState, float]:
|
||||
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> tuple[train_state.TrainState, float]:
|
||||
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
|
||||
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
||||
targets = batch.pop("labels")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -27,7 +26,7 @@ from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import datasets
|
||||
import evaluate
|
||||
@@ -651,8 +650,8 @@ def main():
|
||||
|
||||
# define step functions
|
||||
def train_step(
|
||||
state: train_state.TrainState, batch: Dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> Tuple[train_state.TrainState, float]:
|
||||
state: train_state.TrainState, batch: dict[str, Array], dropout_rng: PRNGKey
|
||||
) -> tuple[train_state.TrainState, float]:
|
||||
"""Trains model with an optimizer (both in `state`) on `batch`, returning a pair `(new_state, loss)`."""
|
||||
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
||||
targets = batch.pop("labels")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The HuggingFace Team All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
||||
Reference in New Issue
Block a user