Update quality tooling for formatting (#21480)

* Result of black 23.1

* Update target to Python 3.7

* Switch flake8 to ruff

* Configure isort

* Configure isort

* Apply isort with line limit

* Put the right black version

* adapt black in check copies

* Fix copies
This commit is contained in:
Sylvain Gugger
2023-02-06 18:10:56 -05:00
committed by GitHub
parent b7bb2b59f7
commit 6f79d26442
1211 changed files with 1532 additions and 2687 deletions

View File

@@ -1,7 +1,7 @@
from arguments import TokenizerTrainingArguments
from datasets import load_dataset
from tqdm import tqdm
from arguments import TokenizerTrainingArguments
from transformers import AutoTokenizer, HfArgumentParser
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode

View File

@@ -6,16 +6,16 @@ from pathlib import Path
import datasets
import torch
from accelerate import Accelerator, DistributedType
from arguments import TrainingArguments
from datasets import load_dataset
from huggingface_hub import Repository
from torch.optim import AdamW
from torch.utils.data import IterableDataset
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.datapipes.iter.combinatorics import ShufflerIterDataPipe
import transformers
from accelerate import Accelerator, DistributedType
from arguments import TrainingArguments
from huggingface_hub import Repository
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, get_scheduler, set_seed

View File

@@ -5,15 +5,15 @@ import re
from collections import defaultdict
import torch
from accelerate import Accelerator
from accelerate.utils import set_seed
from arguments import HumanEvalArguments
from datasets import load_dataset, load_metric
from torch.utils.data import IterableDataset
from torch.utils.data.dataloader import DataLoader
from tqdm import tqdm
import transformers
from accelerate import Accelerator
from accelerate.utils import set_seed
from arguments import HumanEvalArguments
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, StoppingCriteria, StoppingCriteriaList

View File

@@ -1,4 +1,5 @@
from arguments import InitializationArguments
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, HfArgumentParser

View File

@@ -6,10 +6,9 @@ from functools import partial
from typing import Dict, List, Optional, Set, Tuple, Type
from datasets import Dataset
from tqdm import tqdm
from datasketch import MinHash, MinHashLSH
from dpu_utils.utils.iterators import ThreadedIterator
from tqdm import tqdm
NON_ALPHA = re.compile("[^A-Za-z_0-9]")

View File

@@ -9,10 +9,10 @@ import time
from pathlib import Path
import numpy as np
from datasets import load_dataset
from arguments import PreprocessingArguments
from datasets import load_dataset
from minhash_deduplication import deduplicate_dataset
from transformers import AutoTokenizer, HfArgumentParser

View File

@@ -1,9 +1,9 @@
import multiprocessing
import time
from arguments import PretokenizationArguments
from datasets import load_dataset
from arguments import PretokenizationArguments
from transformers import AutoTokenizer, HfArgumentParser

View File

@@ -1,7 +1,6 @@
from unittest import TestCase
from datasets import Dataset
from minhash_deduplication import deduplicate_dataset, make_duplicate_clusters

View File

@@ -1,12 +1,12 @@
import logging
import torch
from accelerate import Accelerator
from arguments import EvaluationArguments
from datasets import load_dataset
from torch.utils.data import IterableDataset
from torch.utils.data.dataloader import DataLoader
from accelerate import Accelerator
from arguments import EvaluationArguments
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, set_seed