Update quality tooling for formatting (#21480)

* Result of black 23.1

* Update target to Python 3.7

* Switch flake8 to ruff

* Configure isort

* Configure isort

* Apply isort with line limit

* Put the right black version

* adapt black in check copies

* Fix copies
This commit is contained in:
Sylvain Gugger
2023-02-06 18:10:56 -05:00
committed by GitHub
parent b7bb2b59f7
commit 6f79d26442
1211 changed files with 1532 additions and 2687 deletions

View File

@@ -22,6 +22,7 @@ from dataclasses import dataclass, field
from typing import Dict, Optional
import numpy as np
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
import transformers
from transformers import (
@@ -36,7 +37,6 @@ from transformers import (
set_seed,
)
from transformers.trainer_utils import is_main_process
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
logger = logging.getLogger(__name__)

View File

@@ -26,8 +26,8 @@ from enum import Enum
from typing import List, Optional
import tqdm
from filelock import FileLock
from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available
@@ -112,7 +112,6 @@ if is_torch_available():
# and the others will use the cache.
lock_path = cached_features_file + ".lock"
with FileLock(lock_path):
if os.path.exists(cached_features_file) and not overwrite_cache:
logger.info(f"Loading features from cached file {cached_features_file}")
self.features = torch.load(cached_features_file)

View File

@@ -69,7 +69,7 @@ class BaseTransformer(pl.LightningModule):
config=None,
tokenizer=None,
model=None,
**config_kwargs
**config_kwargs,
):
"""Initialize a model, tokenizer and config."""
super().__init__()
@@ -346,7 +346,7 @@ def generic_train(
extra_callbacks=[],
checkpoint_callback=None,
logging_callback=None,
**extra_train_kwargs
**extra_train_kwargs,
):
pl.seed_everything(args.seed)

View File

@@ -7,21 +7,19 @@ from argparse import Namespace
import numpy as np
import torch
from lightning_base import BaseTransformer, add_generic_args, generic_train
from torch.utils.data import DataLoader, TensorDataset
from lightning_base import BaseTransformer, add_generic_args, generic_train
from transformers import glue_compute_metrics as compute_metrics
from transformers import glue_convert_examples_to_features as convert_examples_to_features
from transformers import glue_output_modes
from transformers import glue_output_modes, glue_tasks_num_labels
from transformers import glue_processors as processors
from transformers import glue_tasks_num_labels
logger = logging.getLogger(__name__)
class GLUETransformer(BaseTransformer):
mode = "sequence-classification"
def __init__(self, hparams):

View File

@@ -7,11 +7,10 @@ from importlib import import_module
import numpy as np
import torch
from lightning_base import BaseTransformer, add_generic_args, generic_train
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader, TensorDataset
from lightning_base import BaseTransformer, add_generic_args, generic_train
from utils_ner import TokenClassificationTask

View File

@@ -172,7 +172,6 @@ def train(args, train_dataset, model, tokenizer):
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
# Skip past any already trained steps if resuming training
if steps_trained_in_current_epoch > 0:
steps_trained_in_current_epoch -= 1

View File

@@ -30,9 +30,10 @@ from transformers import (
DataCollatorWithPadding,
HfArgumentParser,
SquadDataset,
Trainer,
TrainingArguments,
)
from transformers import SquadDataTrainingArguments as DataTrainingArguments
from transformers import Trainer, TrainingArguments
from transformers.trainer_utils import is_main_process

View File

@@ -4,6 +4,7 @@ import json
from typing import List
from ltp import LTP
from transformers import BertTokenizer
@@ -93,7 +94,6 @@ def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokeni
ref_ids = []
for input_ids, chinese_word in zip(bert_res, ltp_res):
input_tokens = []
for id in input_ids:
token = bert_tokenizer._convert_id_to_token(id)

View File

@@ -19,9 +19,10 @@ import sys
from dataclasses import dataclass, field
from typing import Optional
import transformers
from seq2seq_trainer import Seq2SeqTrainer
from seq2seq_training_args import Seq2SeqTrainingArguments
import transformers
from transformers import (
AutoConfig,
AutoModelForSeq2SeqLM,
@@ -337,7 +338,6 @@ def main():
metrics["val_loss"] = round(metrics["val_loss"], 4)
if trainer.is_world_process_zero():
handle_metrics("val", metrics, training_args.output_dir)
all_metrics.update(metrics)

View File

@@ -16,8 +16,8 @@ from collections import defaultdict
from pathlib import Path
import pandas as pd
from rouge_cli import calculate_rouge_path
from utils import calculate_rouge
@@ -87,7 +87,6 @@ def test_single_sent_scores_dont_depend_on_newline_sep():
def test_pegasus_newline():
pred = [
"""" "a person who has such a video needs to immediately give it to the investigators," prosecutor says .<n> "it is a very disturbing scene," editor-in-chief of bild online tells "erin burnett: outfront" """
]

View File

@@ -17,11 +17,11 @@ from pathlib import Path
import numpy as np
import pytest
from torch.utils.data import DataLoader
from pack_dataset import pack_data_dir
from parameterized import parameterized
from save_len_file import save_len_file
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from transformers.models.mbart.modeling_mbart import shift_tokens_right
from transformers.testing_utils import TestCasePlus, slow

View File

@@ -18,6 +18,7 @@ import json
import unittest
from parameterized import parameterized
from transformers import FSMTForConditionalGeneration, FSMTTokenizer
from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device
from utils import calculate_bleu

View File

@@ -21,6 +21,7 @@ from unittest.mock import patch
from parameterized import parameterized
from run_eval import run_generate
from run_eval_search import run_search
from transformers.testing_utils import CaptureStdout, TestCasePlus, slow
from utils import ROUGE_KEYS

View File

@@ -29,7 +29,6 @@ from transformers import AutoTokenizer
def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
finished_src, finished_tgt = [], []
sorted_examples = list(zip(src_examples, tgt_examples))

View File

@@ -20,6 +20,7 @@ import sys
from collections import OrderedDict
from run_eval import datetime_now, run_generate
from utils import ROUGE_KEYS

View File

@@ -17,6 +17,7 @@ from dataclasses import dataclass, field
from typing import Optional
from seq2seq_trainer import arg_to_scheduler
from transformers import TrainingArguments

View File

@@ -29,10 +29,10 @@ import torch
import torch.distributed as dist
from rouge_score import rouge_scorer, scoring
from sacrebleu import corpus_bleu
from sentence_splitter import add_newline_to_end_of_each_sentence
from torch import nn
from torch.utils.data import Dataset, Sampler
from sentence_splitter import add_newline_to_end_of_each_sentence
from transformers import BartTokenizer, EvalPrediction, PreTrainedTokenizer, T5Tokenizer
from transformers.models.bart.modeling_bart import shift_tokens_right
from transformers.utils import cached_property
@@ -132,7 +132,7 @@ class AbstractSeq2SeqDataset(Dataset):
type_path="train",
n_obs=None,
prefix="",
**dataset_kwargs
**dataset_kwargs,
):
super().__init__()
self.src_file = Path(data_dir).joinpath(type_path + ".source")

View File

@@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Tuple
import numpy as np
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
from torch import nn
from utils_ner import Split, TokenClassificationDataset, TokenClassificationTask
import transformers
from transformers import (
@@ -38,7 +39,6 @@ from transformers import (
set_seed,
)
from transformers.trainer_utils import is_main_process
from utils_ner import Split, TokenClassificationDataset, TokenClassificationTask
logger = logging.getLogger(__name__)

View File

@@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Tuple
import numpy as np
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
from utils_ner import Split, TFTokenClassificationDataset, TokenClassificationTask
from transformers import (
AutoConfig,
@@ -35,7 +36,6 @@ from transformers import (
TFTrainingArguments,
)
from transformers.utils import logging as hf_logging
from utils_ner import Split, TFTokenClassificationDataset, TokenClassificationTask
hf_logging.set_verbosity_info()

View File

@@ -3,7 +3,6 @@ import os
from typing import List, TextIO, Union
from conllu import parse_incr
from utils_ner import InputExample, Split, TokenClassificationTask

View File

@@ -23,6 +23,7 @@ from enum import Enum
from typing import List, Optional, Union
from filelock import FileLock
from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available
@@ -240,7 +241,6 @@ if is_torch_available():
# and the others will use the cache.
lock_path = cached_features_file + ".lock"
with FileLock(lock_path):
if os.path.exists(cached_features_file) and not overwrite_cache:
logger.info(f"Loading features from cached file {cached_features_file}")
self.features = torch.load(cached_features_file)