Update quality tooling for formatting (#21480)

* Result of black 23.1

* Update target to Python 3.7

* Switch flake8 to ruff

* Configure isort

* Configure isort

* Apply isort with line limit

* Put the right black version

* adapt black in check copies

* Fix copies
This commit is contained in:
Sylvain Gugger
2023-02-06 18:10:56 -05:00
committed by GitHub
parent b7bb2b59f7
commit 6f79d26442
1211 changed files with 1532 additions and 2687 deletions

View File

@@ -19,9 +19,10 @@ import sys
from dataclasses import dataclass, field
from typing import Optional
import transformers
from seq2seq_trainer import Seq2SeqTrainer
from seq2seq_training_args import Seq2SeqTrainingArguments
import transformers
from transformers import (
AutoConfig,
AutoModelForSeq2SeqLM,
@@ -337,7 +338,6 @@ def main():
metrics["val_loss"] = round(metrics["val_loss"], 4)
if trainer.is_world_process_zero():
handle_metrics("val", metrics, training_args.output_dir)
all_metrics.update(metrics)

View File

@@ -16,8 +16,8 @@ from collections import defaultdict
from pathlib import Path
import pandas as pd
from rouge_cli import calculate_rouge_path
from utils import calculate_rouge
@@ -87,7 +87,6 @@ def test_single_sent_scores_dont_depend_on_newline_sep():
def test_pegasus_newline():
pred = [
"""" "a person who has such a video needs to immediately give it to the investigators," prosecutor says .<n> "it is a very disturbing scene," editor-in-chief of bild online tells "erin burnett: outfront" """
]

View File

@@ -17,11 +17,11 @@ from pathlib import Path
import numpy as np
import pytest
from torch.utils.data import DataLoader
from pack_dataset import pack_data_dir
from parameterized import parameterized
from save_len_file import save_len_file
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from transformers.models.mbart.modeling_mbart import shift_tokens_right
from transformers.testing_utils import TestCasePlus, slow

View File

@@ -18,6 +18,7 @@ import json
import unittest
from parameterized import parameterized
from transformers import FSMTForConditionalGeneration, FSMTTokenizer
from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device
from utils import calculate_bleu

View File

@@ -21,6 +21,7 @@ from unittest.mock import patch
from parameterized import parameterized
from run_eval import run_generate
from run_eval_search import run_search
from transformers.testing_utils import CaptureStdout, TestCasePlus, slow
from utils import ROUGE_KEYS

View File

@@ -29,7 +29,6 @@ from transformers import AutoTokenizer
def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
finished_src, finished_tgt = [], []
sorted_examples = list(zip(src_examples, tgt_examples))

View File

@@ -20,6 +20,7 @@ import sys
from collections import OrderedDict
from run_eval import datetime_now, run_generate
from utils import ROUGE_KEYS

View File

@@ -17,6 +17,7 @@ from dataclasses import dataclass, field
from typing import Optional
from seq2seq_trainer import arg_to_scheduler
from transformers import TrainingArguments

View File

@@ -29,10 +29,10 @@ import torch
import torch.distributed as dist
from rouge_score import rouge_scorer, scoring
from sacrebleu import corpus_bleu
from sentence_splitter import add_newline_to_end_of_each_sentence
from torch import nn
from torch.utils.data import Dataset, Sampler
from sentence_splitter import add_newline_to_end_of_each_sentence
from transformers import BartTokenizer, EvalPrediction, PreTrainedTokenizer, T5Tokenizer
from transformers.models.bart.modeling_bart import shift_tokens_right
from transformers.utils import cached_property
@@ -132,7 +132,7 @@ class AbstractSeq2SeqDataset(Dataset):
type_path="train",
n_obs=None,
prefix="",
**dataset_kwargs
**dataset_kwargs,
):
super().__init__()
self.src_file = Path(data_dir).joinpath(type_path + ".source")