Update quality tooling for formatting (#21480)
* Result of black 23.1 * Update target to Python 3.7 * Switch flake8 to ruff * Configure isort * Configure isort * Apply isort with line limit * Put the right black version * adapt black in check copies * Fix copies
This commit is contained in:
@@ -22,6 +22,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional
|
||||
|
||||
import numpy as np
|
||||
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
|
||||
|
||||
import transformers
|
||||
from transformers import (
|
||||
@@ -36,7 +37,6 @@ from transformers import (
|
||||
set_seed,
|
||||
)
|
||||
from transformers.trainer_utils import is_main_process
|
||||
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -26,8 +26,8 @@ from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
import tqdm
|
||||
|
||||
from filelock import FileLock
|
||||
|
||||
from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available
|
||||
|
||||
|
||||
@@ -112,7 +112,6 @@ if is_torch_available():
|
||||
# and the others will use the cache.
|
||||
lock_path = cached_features_file + ".lock"
|
||||
with FileLock(lock_path):
|
||||
|
||||
if os.path.exists(cached_features_file) and not overwrite_cache:
|
||||
logger.info(f"Loading features from cached file {cached_features_file}")
|
||||
self.features = torch.load(cached_features_file)
|
||||
|
||||
@@ -69,7 +69,7 @@ class BaseTransformer(pl.LightningModule):
|
||||
config=None,
|
||||
tokenizer=None,
|
||||
model=None,
|
||||
**config_kwargs
|
||||
**config_kwargs,
|
||||
):
|
||||
"""Initialize a model, tokenizer and config."""
|
||||
super().__init__()
|
||||
@@ -346,7 +346,7 @@ def generic_train(
|
||||
extra_callbacks=[],
|
||||
checkpoint_callback=None,
|
||||
logging_callback=None,
|
||||
**extra_train_kwargs
|
||||
**extra_train_kwargs,
|
||||
):
|
||||
pl.seed_everything(args.seed)
|
||||
|
||||
|
||||
@@ -7,21 +7,19 @@ from argparse import Namespace
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from lightning_base import BaseTransformer, add_generic_args, generic_train
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
from lightning_base import BaseTransformer, add_generic_args, generic_train
|
||||
from transformers import glue_compute_metrics as compute_metrics
|
||||
from transformers import glue_convert_examples_to_features as convert_examples_to_features
|
||||
from transformers import glue_output_modes
|
||||
from transformers import glue_output_modes, glue_tasks_num_labels
|
||||
from transformers import glue_processors as processors
|
||||
from transformers import glue_tasks_num_labels
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GLUETransformer(BaseTransformer):
|
||||
|
||||
mode = "sequence-classification"
|
||||
|
||||
def __init__(self, hparams):
|
||||
|
||||
@@ -7,11 +7,10 @@ from importlib import import_module
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from lightning_base import BaseTransformer, add_generic_args, generic_train
|
||||
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
from lightning_base import BaseTransformer, add_generic_args, generic_train
|
||||
from utils_ner import TokenClassificationTask
|
||||
|
||||
|
||||
|
||||
@@ -172,7 +172,6 @@ def train(args, train_dataset, model, tokenizer):
|
||||
for _ in train_iterator:
|
||||
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
|
||||
for step, batch in enumerate(epoch_iterator):
|
||||
|
||||
# Skip past any already trained steps if resuming training
|
||||
if steps_trained_in_current_epoch > 0:
|
||||
steps_trained_in_current_epoch -= 1
|
||||
|
||||
@@ -30,9 +30,10 @@ from transformers import (
|
||||
DataCollatorWithPadding,
|
||||
HfArgumentParser,
|
||||
SquadDataset,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
)
|
||||
from transformers import SquadDataTrainingArguments as DataTrainingArguments
|
||||
from transformers import Trainer, TrainingArguments
|
||||
from transformers.trainer_utils import is_main_process
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import json
|
||||
from typing import List
|
||||
|
||||
from ltp import LTP
|
||||
|
||||
from transformers import BertTokenizer
|
||||
|
||||
|
||||
@@ -93,7 +94,6 @@ def prepare_ref(lines: List[str], ltp_tokenizer: LTP, bert_tokenizer: BertTokeni
|
||||
|
||||
ref_ids = []
|
||||
for input_ids, chinese_word in zip(bert_res, ltp_res):
|
||||
|
||||
input_tokens = []
|
||||
for id in input_ids:
|
||||
token = bert_tokenizer._convert_id_to_token(id)
|
||||
|
||||
@@ -19,9 +19,10 @@ import sys
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import transformers
|
||||
from seq2seq_trainer import Seq2SeqTrainer
|
||||
from seq2seq_training_args import Seq2SeqTrainingArguments
|
||||
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoModelForSeq2SeqLM,
|
||||
@@ -337,7 +338,6 @@ def main():
|
||||
metrics["val_loss"] = round(metrics["val_loss"], 4)
|
||||
|
||||
if trainer.is_world_process_zero():
|
||||
|
||||
handle_metrics("val", metrics, training_args.output_dir)
|
||||
all_metrics.update(metrics)
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from rouge_cli import calculate_rouge_path
|
||||
|
||||
from utils import calculate_rouge
|
||||
|
||||
|
||||
@@ -87,7 +87,6 @@ def test_single_sent_scores_dont_depend_on_newline_sep():
|
||||
|
||||
|
||||
def test_pegasus_newline():
|
||||
|
||||
pred = [
|
||||
"""" "a person who has such a video needs to immediately give it to the investigators," prosecutor says .<n> "it is a very disturbing scene," editor-in-chief of bild online tells "erin burnett: outfront" """
|
||||
]
|
||||
|
||||
@@ -17,11 +17,11 @@ from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from pack_dataset import pack_data_dir
|
||||
from parameterized import parameterized
|
||||
from save_len_file import save_len_file
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from transformers.models.mbart.modeling_mbart import shift_tokens_right
|
||||
from transformers.testing_utils import TestCasePlus, slow
|
||||
|
||||
@@ -18,6 +18,7 @@ import json
|
||||
import unittest
|
||||
|
||||
from parameterized import parameterized
|
||||
|
||||
from transformers import FSMTForConditionalGeneration, FSMTTokenizer
|
||||
from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device
|
||||
from utils import calculate_bleu
|
||||
|
||||
@@ -21,6 +21,7 @@ from unittest.mock import patch
|
||||
from parameterized import parameterized
|
||||
from run_eval import run_generate
|
||||
from run_eval_search import run_search
|
||||
|
||||
from transformers.testing_utils import CaptureStdout, TestCasePlus, slow
|
||||
from utils import ROUGE_KEYS
|
||||
|
||||
|
||||
@@ -29,7 +29,6 @@ from transformers import AutoTokenizer
|
||||
|
||||
|
||||
def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
|
||||
|
||||
finished_src, finished_tgt = [], []
|
||||
|
||||
sorted_examples = list(zip(src_examples, tgt_examples))
|
||||
|
||||
@@ -20,6 +20,7 @@ import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
from run_eval import datetime_now, run_generate
|
||||
|
||||
from utils import ROUGE_KEYS
|
||||
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from seq2seq_trainer import arg_to_scheduler
|
||||
|
||||
from transformers import TrainingArguments
|
||||
|
||||
|
||||
|
||||
@@ -29,10 +29,10 @@ import torch
|
||||
import torch.distributed as dist
|
||||
from rouge_score import rouge_scorer, scoring
|
||||
from sacrebleu import corpus_bleu
|
||||
from sentence_splitter import add_newline_to_end_of_each_sentence
|
||||
from torch import nn
|
||||
from torch.utils.data import Dataset, Sampler
|
||||
|
||||
from sentence_splitter import add_newline_to_end_of_each_sentence
|
||||
from transformers import BartTokenizer, EvalPrediction, PreTrainedTokenizer, T5Tokenizer
|
||||
from transformers.models.bart.modeling_bart import shift_tokens_right
|
||||
from transformers.utils import cached_property
|
||||
@@ -132,7 +132,7 @@ class AbstractSeq2SeqDataset(Dataset):
|
||||
type_path="train",
|
||||
n_obs=None,
|
||||
prefix="",
|
||||
**dataset_kwargs
|
||||
**dataset_kwargs,
|
||||
):
|
||||
super().__init__()
|
||||
self.src_file = Path(data_dir).joinpath(type_path + ".source")
|
||||
|
||||
@@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Tuple
|
||||
import numpy as np
|
||||
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
|
||||
from torch import nn
|
||||
from utils_ner import Split, TokenClassificationDataset, TokenClassificationTask
|
||||
|
||||
import transformers
|
||||
from transformers import (
|
||||
@@ -38,7 +39,6 @@ from transformers import (
|
||||
set_seed,
|
||||
)
|
||||
from transformers.trainer_utils import is_main_process
|
||||
from utils_ner import Split, TokenClassificationDataset, TokenClassificationTask
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
|
||||
from utils_ner import Split, TFTokenClassificationDataset, TokenClassificationTask
|
||||
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
@@ -35,7 +36,6 @@ from transformers import (
|
||||
TFTrainingArguments,
|
||||
)
|
||||
from transformers.utils import logging as hf_logging
|
||||
from utils_ner import Split, TFTokenClassificationDataset, TokenClassificationTask
|
||||
|
||||
|
||||
hf_logging.set_verbosity_info()
|
||||
|
||||
@@ -3,7 +3,6 @@ import os
|
||||
from typing import List, TextIO, Union
|
||||
|
||||
from conllu import parse_incr
|
||||
|
||||
from utils_ner import InputExample, Split, TokenClassificationTask
|
||||
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ from enum import Enum
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from filelock import FileLock
|
||||
|
||||
from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available
|
||||
|
||||
|
||||
@@ -240,7 +241,6 @@ if is_torch_available():
|
||||
# and the others will use the cache.
|
||||
lock_path = cached_features_file + ".lock"
|
||||
with FileLock(lock_path):
|
||||
|
||||
if os.path.exists(cached_features_file) and not overwrite_cache:
|
||||
logger.info(f"Loading features from cached file {cached_features_file}")
|
||||
self.features = torch.load(cached_features_file)
|
||||
|
||||
Reference in New Issue
Block a user