From 158e82e061c02fc2f1613adb7ac1d1cb6adae71c Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sat, 21 Dec 2019 15:57:32 +0100 Subject: [PATCH] Sort imports with isort. This is the result of: $ isort --recursive examples templates transformers utils hubconf.py setup.py --- examples/benchmarks.py | 11 +- examples/contrib/run_camembert.py | 4 +- examples/contrib/run_openai_gpt.py | 13 +- examples/contrib/run_swag.py | 20 +- examples/contrib/run_transfo_xl.py | 5 +- examples/distillation/distiller.py | 22 +-- .../distillation/grouped_batch_sampler.py | 2 +- examples/distillation/lm_seqs_dataset.py | 2 +- .../distillation/run_squad_w_distillation.py | 38 ++-- .../distillation/scripts/binarized_data.py | 7 +- examples/distillation/scripts/extract.py | 7 +- .../scripts/extract_distilbert.py | 7 +- examples/distillation/scripts/token_counts.py | 5 +- examples/distillation/train.py | 26 ++- examples/distillation/utils.py | 10 +- examples/mm-imdb/run_mmimdb.py | 38 ++-- examples/mm-imdb/utils_mmimdb.py | 6 +- examples/pplm/run_pplm.py | 3 +- examples/pplm/run_pplm_discrim_train.py | 10 +- examples/run_bertology.py | 16 +- examples/run_generation.py | 22 ++- examples/run_glue.py | 68 +++---- examples/run_lm_finetuning.py | 28 +-- examples/run_multiple_choice.py | 37 ++-- examples/run_ner.py | 29 ++- examples/run_squad.py | 48 ++--- examples/run_tf_glue.py | 7 +- examples/run_tf_ner.py | 44 +++-- examples/run_xnli.py | 43 ++-- ...ert_bertabs_original_pytorch_checkpoint.py | 6 +- examples/summarization/modeling_bertabs.py | 3 +- examples/summarization/run_summarization.py | 11 +- examples/summarization/utils_summarization.py | 2 +- .../summarization/utils_summarization_test.py | 7 +- examples/test_examples.py | 16 +- examples/utils_multiple_choice.py | 11 +- examples/utils_ner.py | 1 + hubconf.py | 7 +- setup.py | 1 + .../adding_a_new_example_script/run_xxx.py | 35 ++-- .../adding_a_new_example_script/utils_xxx.py | 5 +- .../adding_a_new_model/configuration_xxx.py | 4 +- ...t_xxx_original_tf_checkpoint_to_pytorch.py | 7 +- .../adding_a_new_model/modeling_tf_xxx.py | 7 +- templates/adding_a_new_model/modeling_xxx.py | 7 +- .../tests/modeling_tf_xxx_test.py | 15 +- .../tests/modeling_xxx_test.py | 7 +- .../tests/tokenization_xxx_test.py | 2 +- .../adding_a_new_model/tokenization_xxx.py | 1 + transformers/__init__.py | 184 +++++++++--------- transformers/commands/convert.py | 1 - transformers/commands/run.py | 2 +- transformers/commands/serving.py | 13 +- transformers/commands/train.py | 9 +- transformers/commands/user.py | 2 +- transformers/configuration_albert.py | 1 + transformers/configuration_auto.py | 27 +-- transformers/configuration_bert.py | 1 + transformers/configuration_camembert.py | 1 + transformers/configuration_ctrl.py | 1 + transformers/configuration_distilbert.py | 3 +- transformers/configuration_gpt2.py | 1 + transformers/configuration_mmbt.py | 1 + transformers/configuration_openai.py | 1 + transformers/configuration_roberta.py | 1 + transformers/configuration_t5.py | 4 +- transformers/configuration_transfo_xl.py | 1 + transformers/configuration_utils.py | 3 +- transformers/configuration_xlm.py | 1 + transformers/configuration_xlm_roberta.py | 1 + transformers/configuration_xlnet.py | 1 + ...lbert_original_tf_checkpoint_to_pytorch.py | 7 +- ..._bert_original_tf_checkpoint_to_pytorch.py | 7 +- ..._bert_pytorch_checkpoint_to_original_tf.py | 6 +- ..._gpt2_original_tf_checkpoint_to_pytorch.py | 2 +- ...penai_original_tf_checkpoint_to_pytorch.py | 2 +- .../convert_pytorch_checkpoint_to_tf2.py | 77 ++++---- ..._original_pytorch_checkpoint_to_pytorch.py | 14 +- ...rt_t5_original_tf_checkpoint_to_pytorch.py | 7 +- ...fo_xl_original_tf_checkpoint_to_pytorch.py | 13 +- ..._original_pytorch_checkpoint_to_pytorch.py | 4 +- ...xlnet_original_tf_checkpoint_to_pytorch.py | 12 +- transformers/data/__init__.py | 20 +- transformers/data/metrics/__init__.py | 3 +- transformers/data/metrics/squad_metrics.py | 10 +- transformers/data/processors/__init__.py | 6 +- transformers/data/processors/glue.py | 3 +- transformers/data/processors/squad.py | 13 +- transformers/data/processors/utils.py | 5 +- transformers/data/processors/xnli.py | 1 + transformers/file_utils.py | 13 +- transformers/hf_api.py | 1 + transformers/modelcard.py | 5 +- transformers/modeling_albert.py | 12 +- transformers/modeling_auto.py | 118 ++++++----- transformers/modeling_bert.py | 3 +- transformers/modeling_camembert.py | 15 +- transformers/modeling_ctrl.py | 4 +- transformers/modeling_distilbert.py | 8 +- transformers/modeling_encoder_decoder.py | 1 + transformers/modeling_gpt2.py | 3 +- transformers/modeling_mmbt.py | 1 + transformers/modeling_openai.py | 3 +- transformers/modeling_roberta.py | 3 +- transformers/modeling_t5.py | 11 +- transformers/modeling_tf_albert.py | 5 +- transformers/modeling_tf_auto.py | 87 ++++----- transformers/modeling_tf_bert.py | 3 +- transformers/modeling_tf_ctrl.py | 4 +- transformers/modeling_tf_distilbert.py | 8 +- transformers/modeling_tf_gpt2.py | 17 +- transformers/modeling_tf_openai.py | 17 +- transformers/modeling_tf_pytorch_utils.py | 2 + transformers/modeling_tf_roberta.py | 4 +- transformers/modeling_tf_t5.py | 7 +- transformers/modeling_tf_transfo_xl.py | 13 +- .../modeling_tf_transfo_xl_utilities.py | 1 - transformers/modeling_tf_utils.py | 5 +- transformers/modeling_tf_xlm.py | 19 +- transformers/modeling_tf_xlnet.py | 2 +- transformers/modeling_transfo_xl.py | 13 +- transformers/modeling_transfo_xl_utilities.py | 2 +- transformers/modeling_utils.py | 3 +- transformers/modeling_xlm.py | 8 +- transformers/modeling_xlm_roberta.py | 15 +- transformers/modeling_xlnet.py | 12 +- transformers/optimization.py | 1 + transformers/optimization_tf.py | 4 +- transformers/pipelines.py | 20 +- .../tests/configuration_common_test.py | 8 +- transformers/tests/hf_api_test.py | 1 + transformers/tests/model_card_test.py | 3 +- transformers/tests/modeling_albert_test.py | 7 +- transformers/tests/modeling_auto_test.py | 11 +- transformers/tests/modeling_bert_test.py | 7 +- transformers/tests/modeling_common_test.py | 20 +- transformers/tests/modeling_ctrl_test.py | 15 +- .../tests/modeling_distilbert_test.py | 13 +- .../tests/modeling_encoder_decoder_test.py | 2 + transformers/tests/modeling_gpt2_test.py | 13 +- transformers/tests/modeling_openai_test.py | 13 +- transformers/tests/modeling_roberta_test.py | 13 +- transformers/tests/modeling_t5_test.py | 7 +- transformers/tests/modeling_tf_albert_test.py | 15 +- transformers/tests/modeling_tf_auto_test.py | 11 +- transformers/tests/modeling_tf_bert_test.py | 15 +- transformers/tests/modeling_tf_common_test.py | 10 +- transformers/tests/modeling_tf_ctrl_test.py | 15 +- .../tests/modeling_tf_distilbert_test.py | 9 +- transformers/tests/modeling_tf_gpt2_test.py | 15 +- .../tests/modeling_tf_openai_gpt_test.py | 15 +- .../tests/modeling_tf_roberta_test.py | 9 +- transformers/tests/modeling_tf_t5_test.py | 15 +- .../tests/modeling_tf_transfo_xl_test.py | 15 +- transformers/tests/modeling_tf_xlm_test.py | 13 +- transformers/tests/modeling_tf_xlnet_test.py | 17 +- .../tests/modeling_transfo_xl_test.py | 15 +- transformers/tests/modeling_xlm_test.py | 13 +- transformers/tests/modeling_xlnet_test.py | 17 +- transformers/tests/optimization_test.py | 13 +- transformers/tests/optimization_tf_test.py | 5 +- transformers/tests/pipelines_test.py | 2 +- .../tests/tokenization_albert_test.py | 3 +- transformers/tests/tokenization_auto_test.py | 19 +- .../tests/tokenization_bert_japanese_test.py | 8 +- transformers/tests/tokenization_bert_test.py | 2 +- transformers/tests/tokenization_ctrl_test.py | 4 +- .../tests/tokenization_distilbert_test.py | 2 +- transformers/tests/tokenization_gpt2_test.py | 4 +- .../tests/tokenization_openai_test.py | 4 +- .../tests/tokenization_roberta_test.py | 5 +- transformers/tests/tokenization_t5_test.py | 1 + .../tests/tokenization_tests_commons.py | 7 +- .../tests/tokenization_transfo_xl_test.py | 7 +- transformers/tests/tokenization_utils_test.py | 5 +- transformers/tests/tokenization_xlm_test.py | 4 +- transformers/tests/tokenization_xlnet_test.py | 3 +- transformers/tests/utils.py | 3 +- transformers/tokenization_albert.py | 9 +- transformers/tokenization_auto.py | 19 +- transformers/tokenization_bert.py | 1 + transformers/tokenization_bert_japanese.py | 6 +- transformers/tokenization_camembert.py | 3 + transformers/tokenization_ctrl.py | 4 +- transformers/tokenization_distilbert.py | 1 + transformers/tokenization_gpt2.py | 10 +- transformers/tokenization_openai.py | 3 +- transformers/tokenization_roberta.py | 6 +- transformers/tokenization_t5.py | 4 +- transformers/tokenization_transfo_xl.py | 1 + transformers/tokenization_utils.py | 12 +- transformers/tokenization_xlm.py | 3 +- transformers/tokenization_xlm_roberta.py | 3 + transformers/tokenization_xlnet.py | 3 +- utils/download_glue_data.py | 7 +- 195 files changed, 1182 insertions(+), 1044 deletions(-) diff --git a/examples/benchmarks.py b/examples/benchmarks.py index 20b62112b4..4ef0640e35 100644 --- a/examples/benchmarks.py +++ b/examples/benchmarks.py @@ -18,12 +18,14 @@ # If checking the tensors placement # tf.debugging.set_log_device_placement(True) -from typing import List -import timeit -from transformers import is_tf_available, is_torch_available -from time import time import argparse import csv +import timeit +from time import time +from typing import List + +from transformers import AutoConfig, AutoTokenizer, is_tf_available, is_torch_available + if is_tf_available(): import tensorflow as tf @@ -33,7 +35,6 @@ if is_torch_available(): import torch from transformers import AutoModel -from transformers import AutoConfig, AutoTokenizer input_text = """Bent over their instruments, three hundred Fertilizers were plunged, as the Director of Hatcheries and Conditioning entered the room, in the diff --git a/examples/contrib/run_camembert.py b/examples/contrib/run_camembert.py index 99f54f5442..791a02fedf 100644 --- a/examples/contrib/run_camembert.py +++ b/examples/contrib/run_camembert.py @@ -1,11 +1,11 @@ -from pathlib import Path import tarfile import urllib.request +from pathlib import Path import torch -from transformers.tokenization_camembert import CamembertTokenizer from transformers.modeling_camembert import CamembertForMaskedLM +from transformers.tokenization_camembert import CamembertTokenizer def fill_mask(masked_input, model, tokenizer, topk=5): diff --git a/examples/contrib/run_openai_gpt.py b/examples/contrib/run_openai_gpt.py index f6431c80be..e35f3d4fe9 100644 --- a/examples/contrib/run_openai_gpt.py +++ b/examples/contrib/run_openai_gpt.py @@ -28,26 +28,27 @@ --train_batch_size 16 \ """ import argparse -import os import csv -import random import logging -from tqdm import tqdm, trange +import os +import random import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset +from tqdm import tqdm, trange from transformers import ( + CONFIG_NAME, + WEIGHTS_NAME, + AdamW, OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, - AdamW, cached_path, - WEIGHTS_NAME, - CONFIG_NAME, get_linear_schedule_with_warmup, ) + ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz" logging.basicConfig( diff --git a/examples/contrib/run_swag.py b/examples/contrib/run_swag.py index d03d1aacec..65c07c2a32 100644 --- a/examples/contrib/run_swag.py +++ b/examples/contrib/run_swag.py @@ -19,28 +19,34 @@ from __future__ import absolute_import, division, print_function import argparse -import logging import csv +import glob +import logging import os import random import sys -import glob import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForMultipleChoice, + BertTokenizer, + get_linear_schedule_with_warmup, +) + try: from torch.utils.tensorboard import SummaryWriter except: from tensorboardX import SummaryWriter -from tqdm import tqdm, trange - -from transformers import WEIGHTS_NAME, BertConfig, BertForMultipleChoice, BertTokenizer - -from transformers import AdamW, get_linear_schedule_with_warmup logger = logging.getLogger(__name__) diff --git a/examples/contrib/run_transfo_xl.py b/examples/contrib/run_transfo_xl.py index 1ef66bef1e..e4af4f6db5 100644 --- a/examples/contrib/run_transfo_xl.py +++ b/examples/contrib/run_transfo_xl.py @@ -23,12 +23,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera import argparse import logging -import time import math +import time import torch -from transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer +from transformers import TransfoXLCorpus, TransfoXLLMHeadModel, TransfoXLTokenizer + logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO diff --git a/examples/distillation/distiller.py b/examples/distillation/distiller.py index e3bf0d443e..a957b1a094 100644 --- a/examples/distillation/distiller.py +++ b/examples/distillation/distiller.py @@ -15,31 +15,31 @@ """ The distiller to distil the student. Adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) """ -import os import math -import psutil +import os import time -from tqdm import trange, tqdm -import numpy as np +import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.optim import AdamW +from torch.utils.data import BatchSampler, DataLoader, RandomSampler from torch.utils.data.distributed import DistributedSampler -from torch.utils.data import RandomSampler, BatchSampler, DataLoader +from tqdm import tqdm, trange + +import psutil +from grouped_batch_sampler import GroupedBatchSampler, create_lengths_groups +from lm_seqs_dataset import LmSeqsDataset +from transformers import get_linear_schedule_with_warmup +from utils import logger + try: from torch.utils.tensorboard import SummaryWriter except: from tensorboardX import SummaryWriter -from transformers import get_linear_schedule_with_warmup - -from utils import logger -from lm_seqs_dataset import LmSeqsDataset -from grouped_batch_sampler import GroupedBatchSampler, create_lengths_groups - class Distiller: def __init__( diff --git a/examples/distillation/grouped_batch_sampler.py b/examples/distillation/grouped_batch_sampler.py index 1132fdb582..c386c4224d 100644 --- a/examples/distillation/grouped_batch_sampler.py +++ b/examples/distillation/grouped_batch_sampler.py @@ -17,8 +17,8 @@ import bisect import copy from collections import defaultdict -import numpy as np +import numpy as np from torch.utils.data.sampler import BatchSampler, Sampler from utils import logger diff --git a/examples/distillation/lm_seqs_dataset.py b/examples/distillation/lm_seqs_dataset.py index bb0d80f38b..691e010cf2 100644 --- a/examples/distillation/lm_seqs_dataset.py +++ b/examples/distillation/lm_seqs_dataset.py @@ -15,10 +15,10 @@ """ Dataset to distilled models adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) """ +import numpy as np import torch from torch.utils.data import Dataset -import numpy as np from utils import logger diff --git a/examples/distillation/run_squad_w_distillation.py b/examples/distillation/run_squad_w_distillation.py index 0d5a004eb3..11524e388e 100644 --- a/examples/distillation/run_squad_w_distillation.py +++ b/examples/distillation/run_squad_w_distillation.py @@ -18,56 +18,58 @@ from __future__ import absolute_import, division, print_function import argparse +import glob import logging import os import random -import glob import numpy as np import torch +import torch.nn as nn +import torch.nn.functional as F from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler -import torch.nn.functional as F -import torch.nn as nn - -try: - from torch.utils.tensorboard import SummaryWriter -except: - from tensorboardX import SummaryWriter - from tqdm import tqdm, trange from transformers import ( WEIGHTS_NAME, + AdamW, BertConfig, BertForQuestionAnswering, BertTokenizer, + DistilBertConfig, + DistilBertForQuestionAnswering, + DistilBertTokenizer, XLMConfig, XLMForQuestionAnswering, XLMTokenizer, XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer, - DistilBertConfig, - DistilBertForQuestionAnswering, - DistilBertTokenizer, + get_linear_schedule_with_warmup, ) -from transformers import AdamW, get_linear_schedule_with_warmup - from ..utils_squad import ( - read_squad_examples, - convert_examples_to_features, RawResult, - write_predictions, RawResultExtended, + convert_examples_to_features, + read_squad_examples, + write_predictions, write_predictions_extended, ) # The follwing import is the official SQuAD evaluation script (2.0). # You can remove it from the dependencies if you are using this script outside of the library # We've added it here for automated tests (see examples/test_examples.py file) -from ..utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad +from ..utils_squad_evaluate import EVAL_OPTS +from ..utils_squad_evaluate import main as evaluate_on_squad + + +try: + from torch.utils.tensorboard import SummaryWriter +except: + from tensorboardX import SummaryWriter + logger = logging.getLogger(__name__) diff --git a/examples/distillation/scripts/binarized_data.py b/examples/distillation/scripts/binarized_data.py index 40bde7d152..7590cfcbcf 100644 --- a/examples/distillation/scripts/binarized_data.py +++ b/examples/distillation/scripts/binarized_data.py @@ -16,12 +16,15 @@ Preprocessing script before distillation. """ import argparse +import logging import pickle import random import time + import numpy as np -from transformers import BertTokenizer, RobertaTokenizer, GPT2Tokenizer -import logging + +from transformers import BertTokenizer, GPT2Tokenizer, RobertaTokenizer + logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO diff --git a/examples/distillation/scripts/extract.py b/examples/distillation/scripts/extract.py index 9610f8f17a..429350a772 100644 --- a/examples/distillation/scripts/extract.py +++ b/examples/distillation/scripts/extract.py @@ -16,10 +16,13 @@ Preprocessing script before training the distilled model. Specific to RoBERTa -> DistilRoBERTa and GPT2 -> DistilGPT2. """ -from transformers import BertForMaskedLM, RobertaForMaskedLM, GPT2LMHeadModel -import torch import argparse +import torch + +from transformers import BertForMaskedLM, GPT2LMHeadModel, RobertaForMaskedLM + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Extraction some layers of the full RobertaForMaskedLM or GPT2LMHeadModel for Transfer Learned Distillation" diff --git a/examples/distillation/scripts/extract_distilbert.py b/examples/distillation/scripts/extract_distilbert.py index 8e58db5552..db0dc3ed84 100644 --- a/examples/distillation/scripts/extract_distilbert.py +++ b/examples/distillation/scripts/extract_distilbert.py @@ -16,10 +16,13 @@ Preprocessing script before training DistilBERT. Specific to BERT -> DistilBERT. """ -from transformers import BertForMaskedLM, RobertaForMaskedLM -import torch import argparse +import torch + +from transformers import BertForMaskedLM, RobertaForMaskedLM + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Extraction some layers of the full BertForMaskedLM or RObertaForMaskedLM for Transfer Learned Distillation" diff --git a/examples/distillation/scripts/token_counts.py b/examples/distillation/scripts/token_counts.py index 623caad4b1..0238bf66f8 100644 --- a/examples/distillation/scripts/token_counts.py +++ b/examples/distillation/scripts/token_counts.py @@ -15,10 +15,11 @@ """ Preprocessing script before training the distilled model. """ -from collections import Counter import argparse -import pickle import logging +import pickle +from collections import Counter + logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO diff --git a/examples/distillation/train.py b/examples/distillation/train.py index 37c49ae7b2..a37a7c4274 100644 --- a/examples/distillation/train.py +++ b/examples/distillation/train.py @@ -16,22 +16,32 @@ Training the distilled model. Supported architectures include: BERT -> DistilBERT, RoBERTa -> DistilRoBERTa, GPT2 -> DistilGPT2. """ -import os import argparse -import pickle import json +import os +import pickle import shutil + import numpy as np import torch -from transformers import BertConfig, BertForMaskedLM, BertTokenizer -from transformers import RobertaConfig, RobertaForMaskedLM, RobertaTokenizer -from transformers import DistilBertConfig, DistilBertForMaskedLM, DistilBertTokenizer -from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer - from distiller import Distiller -from utils import git_log, logger, init_gpu_params, set_seed from lm_seqs_dataset import LmSeqsDataset +from transformers import ( + BertConfig, + BertForMaskedLM, + BertTokenizer, + DistilBertConfig, + DistilBertForMaskedLM, + DistilBertTokenizer, + GPT2Config, + GPT2LMHeadModel, + GPT2Tokenizer, + RobertaConfig, + RobertaForMaskedLM, + RobertaTokenizer, +) +from utils import git_log, init_gpu_params, logger, set_seed MODEL_CLASSES = { diff --git a/examples/distillation/utils.py b/examples/distillation/utils.py index f9d7412cb5..b081f239c3 100644 --- a/examples/distillation/utils.py +++ b/examples/distillation/utils.py @@ -15,14 +15,16 @@ """ Utils to train DistilBERT adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) """ -import git import json +import logging import os import socket -import torch -import numpy as np -import logging +import numpy as np +import torch + +import git + logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - PID: %(process)d - %(message)s", diff --git a/examples/mm-imdb/run_mmimdb.py b/examples/mm-imdb/run_mmimdb.py index c92dbd3d36..e87555f7da 100644 --- a/examples/mm-imdb/run_mmimdb.py +++ b/examples/mm-imdb/run_mmimdb.py @@ -19,32 +19,33 @@ from __future__ import absolute_import, division, print_function import argparse import glob +import json import logging import os import random -import json -from sklearn.metrics import f1_score import numpy as np import torch import torch.nn as nn +from sklearn.metrics import f1_score from torch.utils.data import DataLoader, RandomSampler, SequentialSampler from torch.utils.data.distributed import DistributedSampler - -try: - from torch.utils.tensorboard import SummaryWriter -except: - from tensorboardX import SummaryWriter - from tqdm import tqdm, trange -from utils_mmimdb import ImageEncoder, JsonlDataset, collate_fn, get_mmimdb_labels, get_image_transforms - from transformers import ( WEIGHTS_NAME, + AdamW, + AlbertConfig, + AlbertModel, + AlbertTokenizer, BertConfig, BertModel, BertTokenizer, + DistilBertConfig, + DistilBertModel, + DistilBertTokenizer, + MMBTConfig, + MMBTForClassification, RobertaConfig, RobertaModel, RobertaTokenizer, @@ -54,17 +55,16 @@ from transformers import ( XLNetConfig, XLNetModel, XLNetTokenizer, - DistilBertConfig, - DistilBertModel, - DistilBertTokenizer, - AlbertConfig, - AlbertModel, - AlbertTokenizer, - MMBTForClassification, - MMBTConfig, + get_linear_schedule_with_warmup, ) +from utils_mmimdb import ImageEncoder, JsonlDataset, collate_fn, get_image_transforms, get_mmimdb_labels + + +try: + from torch.utils.tensorboard import SummaryWriter +except: + from tensorboardX import SummaryWriter -from transformers import AdamW, get_linear_schedule_with_warmup logger = logging.getLogger(__name__) diff --git a/examples/mm-imdb/utils_mmimdb.py b/examples/mm-imdb/utils_mmimdb.py index 57cee25f9d..7a52a99b1a 100644 --- a/examples/mm-imdb/utils_mmimdb.py +++ b/examples/mm-imdb/utils_mmimdb.py @@ -17,13 +17,15 @@ import json import os from collections import Counter -from PIL import Image import torch import torch.nn as nn +from torch.utils.data import Dataset + import torchvision import torchvision.transforms as transforms -from torch.utils.data import Dataset +from PIL import Image + POOLING_BREAKDOWN = {1: (1, 1), 2: (2, 1), 3: (3, 1), 4: (2, 2), 5: (5, 1), 6: (3, 2), 7: (7, 1), 8: (4, 2), 9: (3, 3)} diff --git a/examples/pplm/run_pplm.py b/examples/pplm/run_pplm.py index 37183a5121..ec848323e9 100644 --- a/examples/pplm/run_pplm.py +++ b/examples/pplm/run_pplm.py @@ -34,10 +34,11 @@ import torch.nn.functional as F from torch.autograd import Variable from tqdm import trange +from pplm_classification_head import ClassificationHead from transformers import GPT2Tokenizer from transformers.file_utils import cached_path from transformers.modeling_gpt2 import GPT2LMHeadModel -from pplm_classification_head import ClassificationHead + PPLM_BOW = 1 PPLM_DISCRIM = 2 diff --git a/examples/pplm/run_pplm_discrim_train.py b/examples/pplm/run_pplm_discrim_train.py index 14136c4c7a..287715e53b 100644 --- a/examples/pplm/run_pplm_discrim_train.py +++ b/examples/pplm/run_pplm_discrim_train.py @@ -24,16 +24,16 @@ import time import numpy as np import torch import torch.nn.functional as F -import torch.optim import torch.optim as optim import torch.utils.data as data -from nltk.tokenize.treebank import TreebankWordDetokenizer -from torchtext import data as torchtext_data -from torchtext import datasets from tqdm import tqdm, trange -from transformers import GPT2Tokenizer, GPT2LMHeadModel +from nltk.tokenize.treebank import TreebankWordDetokenizer from pplm_classification_head import ClassificationHead +from torchtext import data as torchtext_data +from torchtext import datasets +from transformers import GPT2LMHeadModel, GPT2Tokenizer + torch.manual_seed(0) np.random.seed(0) diff --git a/examples/run_bertology.py b/examples/run_bertology.py index 6b4739d6bd..27709fa7ee 100644 --- a/examples/run_bertology.py +++ b/examples/run_bertology.py @@ -19,19 +19,19 @@ Some parts of this script are adapted from the code of Michel et al. (http://arxiv.org/abs/1905.10650) which is available at https://github.com/pmichel31415/are-16-heads-really-better-than-1 """ -import os import argparse import logging -from datetime import timedelta, datetime -from tqdm import tqdm +import os +from datetime import datetime, timedelta import numpy as np - import torch -from torch.utils.data import DataLoader, SequentialSampler, TensorDataset, Subset -from torch.utils.data.distributed import DistributedSampler from torch.nn import CrossEntropyLoss, MSELoss +from torch.utils.data import DataLoader, SequentialSampler, Subset, TensorDataset +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm +from run_glue import ALL_MODELS, MODEL_CLASSES, load_and_cache_examples, set_seed from transformers import ( WEIGHTS_NAME, BertConfig, @@ -44,13 +44,11 @@ from transformers import ( XLNetForSequenceClassification, XLNetTokenizer, ) - -from run_glue import set_seed, load_and_cache_examples, ALL_MODELS, MODEL_CLASSES - from transformers import glue_compute_metrics as compute_metrics from transformers import glue_output_modes as output_modes from transformers import glue_processors as processors + logger = logging.getLogger(__name__) diff --git a/examples/run_generation.py b/examples/run_generation.py index e62ccf87c6..629b9348a0 100644 --- a/examples/run_generation.py +++ b/examples/run_generation.py @@ -21,15 +21,23 @@ from __future__ import absolute_import, division, print_function, unicode_litera import argparse import logging -import torch import numpy as np +import torch -from transformers import GPT2LMHeadModel, GPT2Tokenizer -from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer -from transformers import XLNetLMHeadModel, XLNetTokenizer -from transformers import TransfoXLLMHeadModel, TransfoXLTokenizer -from transformers import CTRLLMHeadModel, CTRLTokenizer -from transformers import XLMWithLMHeadModel, XLMTokenizer +from transformers import ( + CTRLLMHeadModel, + CTRLTokenizer, + GPT2LMHeadModel, + GPT2Tokenizer, + OpenAIGPTLMHeadModel, + OpenAIGPTTokenizer, + TransfoXLLMHeadModel, + TransfoXLTokenizer, + XLMTokenizer, + XLMWithLMHeadModel, + XLNetLMHeadModel, + XLNetTokenizer, +) logging.basicConfig( diff --git a/examples/run_glue.py b/examples/run_glue.py index bbfd52ea3d..d70e20f330 100644 --- a/examples/run_glue.py +++ b/examples/run_glue.py @@ -19,54 +19,54 @@ from __future__ import absolute_import, division, print_function import argparse import glob +import json import logging import os import random -import json import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange + +from transformers import ( + WEIGHTS_NAME, + AdamW, + AlbertConfig, + AlbertForSequenceClassification, + AlbertTokenizer, + BertConfig, + BertForSequenceClassification, + BertTokenizer, + DistilBertConfig, + DistilBertForSequenceClassification, + DistilBertTokenizer, + RobertaConfig, + RobertaForSequenceClassification, + RobertaTokenizer, + XLMConfig, + XLMForSequenceClassification, + XLMRobertaConfig, + XLMRobertaForSequenceClassification, + XLMRobertaTokenizer, + XLMTokenizer, + XLNetConfig, + XLNetForSequenceClassification, + XLNetTokenizer, + get_linear_schedule_with_warmup, +) +from transformers import glue_compute_metrics as compute_metrics +from transformers import glue_convert_examples_to_features as convert_examples_to_features +from transformers import glue_output_modes as output_modes +from transformers import glue_processors as processors + try: from torch.utils.tensorboard import SummaryWriter except: from tensorboardX import SummaryWriter -from tqdm import tqdm, trange - -from transformers import ( - WEIGHTS_NAME, - BertConfig, - BertForSequenceClassification, - BertTokenizer, - RobertaConfig, - RobertaForSequenceClassification, - RobertaTokenizer, - XLMConfig, - XLMForSequenceClassification, - XLMTokenizer, - XLNetConfig, - XLNetForSequenceClassification, - XLNetTokenizer, - DistilBertConfig, - DistilBertForSequenceClassification, - DistilBertTokenizer, - AlbertConfig, - AlbertForSequenceClassification, - AlbertTokenizer, - XLMRobertaConfig, - XLMRobertaForSequenceClassification, - XLMRobertaTokenizer, -) - -from transformers import AdamW, get_linear_schedule_with_warmup - -from transformers import glue_compute_metrics as compute_metrics -from transformers import glue_output_modes as output_modes -from transformers import glue_processors as processors -from transformers import glue_convert_examples_to_features as convert_examples_to_features logger = logging.getLogger(__name__) diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py index 60b99f29d4..f916897d00 100644 --- a/examples/run_lm_finetuning.py +++ b/examples/run_lm_finetuning.py @@ -32,23 +32,22 @@ import shutil import numpy as np import torch -from torch.utils.data import DataLoader, Dataset, SequentialSampler, RandomSampler +from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler from torch.utils.data.distributed import DistributedSampler - -try: - from torch.utils.tensorboard import SummaryWriter -except: - from tensorboardX import SummaryWriter - from tqdm import tqdm, trange from transformers import ( WEIGHTS_NAME, AdamW, - get_linear_schedule_with_warmup, BertConfig, BertForMaskedLM, BertTokenizer, + CamembertConfig, + CamembertForMaskedLM, + CamembertTokenizer, + DistilBertConfig, + DistilBertForMaskedLM, + DistilBertTokenizer, GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, @@ -58,15 +57,16 @@ from transformers import ( RobertaConfig, RobertaForMaskedLM, RobertaTokenizer, - DistilBertConfig, - DistilBertForMaskedLM, - DistilBertTokenizer, - CamembertConfig, - CamembertForMaskedLM, - CamembertTokenizer, + get_linear_schedule_with_warmup, ) +try: + from torch.utils.tensorboard import SummaryWriter +except: + from tensorboardX import SummaryWriter + + logger = logging.getLogger(__name__) diff --git a/examples/run_multiple_choice.py b/examples/run_multiple_choice.py index bfa62cfb7f..19ca558cac 100644 --- a/examples/run_multiple_choice.py +++ b/examples/run_multiple_choice.py @@ -23,35 +23,34 @@ import logging import os import random - import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForMultipleChoice, + BertTokenizer, + RobertaConfig, + RobertaForMultipleChoice, + RobertaTokenizer, + XLNetConfig, + XLNetForMultipleChoice, + XLNetTokenizer, + get_linear_schedule_with_warmup, +) +from utils_multiple_choice import convert_examples_to_features, processors + try: from torch.utils.tensorboard import SummaryWriter except: from tensorboardX import SummaryWriter -from tqdm import tqdm, trange - -from transformers import ( - WEIGHTS_NAME, - BertConfig, - BertForMultipleChoice, - BertTokenizer, - XLNetConfig, - XLNetForMultipleChoice, - XLNetTokenizer, - RobertaConfig, - RobertaForMultipleChoice, - RobertaTokenizer, -) - -from transformers import AdamW, get_linear_schedule_with_warmup - -from utils_multiple_choice import convert_examples_to_features, processors logger = logging.getLogger(__name__) diff --git a/examples/run_ner.py b/examples/run_ner.py index 48ac61b4fe..8d991555a9 100644 --- a/examples/run_ner.py +++ b/examples/run_ner.py @@ -25,20 +25,35 @@ import random import numpy as np import torch -from seqeval.metrics import precision_score, recall_score, f1_score from tensorboardX import SummaryWriter from torch.nn import CrossEntropyLoss from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange + +from seqeval.metrics import f1_score, precision_score, recall_score +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForTokenClassification, + BertTokenizer, + CamembertConfig, + CamembertForTokenClassification, + CamembertTokenizer, + DistilBertConfig, + DistilBertForTokenClassification, + DistilBertTokenizer, + RobertaConfig, + RobertaForTokenClassification, + RobertaTokenizer, + XLMRobertaConfig, + XLMRobertaForTokenClassification, + XLMRobertaTokenizer, + get_linear_schedule_with_warmup, +) from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file -from transformers import AdamW, get_linear_schedule_with_warmup -from transformers import WEIGHTS_NAME, BertConfig, BertForTokenClassification, BertTokenizer -from transformers import RobertaConfig, RobertaForTokenClassification, RobertaTokenizer -from transformers import DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer -from transformers import CamembertConfig, CamembertForTokenClassification, CamembertTokenizer -from transformers import XLMRobertaConfig, XLMRobertaForTokenClassification, XLMRobertaTokenizer logger = logging.getLogger(__name__) diff --git a/examples/run_squad.py b/examples/run_squad.py index 1580a31e85..9dbc39cb7b 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -16,57 +16,57 @@ """ Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet).""" from __future__ import absolute_import, division, print_function -from transformers.data.processors.squad import SquadV1Processor, SquadV2Processor, SquadResult -from transformers.data.metrics.squad_metrics import ( - compute_predictions_logits, - compute_predictions_log_probs, - squad_evaluate, -) import argparse +import glob import logging import os import random -import glob import timeit + import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler - -try: - from torch.utils.tensorboard import SummaryWriter -except: - from tensorboardX import SummaryWriter - from tqdm import tqdm, trange from transformers import ( WEIGHTS_NAME, + AdamW, + AlbertConfig, + AlbertForQuestionAnswering, + AlbertTokenizer, BertConfig, BertForQuestionAnswering, BertTokenizer, + DistilBertConfig, + DistilBertForQuestionAnswering, + DistilBertTokenizer, + RobertaConfig, RobertaForQuestionAnswering, RobertaTokenizer, - RobertaConfig, XLMConfig, XLMForQuestionAnswering, XLMTokenizer, XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer, - DistilBertConfig, - DistilBertForQuestionAnswering, - DistilBertTokenizer, - AlbertConfig, - AlbertForQuestionAnswering, - AlbertTokenizer, - XLMConfig, - XLMForQuestionAnswering, - XLMTokenizer, + get_linear_schedule_with_warmup, + squad_convert_examples_to_features, ) +from transformers.data.metrics.squad_metrics import ( + compute_predictions_log_probs, + compute_predictions_logits, + squad_evaluate, +) +from transformers.data.processors.squad import SquadResult, SquadV1Processor, SquadV2Processor + + +try: + from torch.utils.tensorboard import SummaryWriter +except: + from tensorboardX import SummaryWriter -from transformers import AdamW, get_linear_schedule_with_warmup, squad_convert_examples_to_features logger = logging.getLogger(__name__) diff --git a/examples/run_tf_glue.py b/examples/run_tf_glue.py index 74a6db34ad..511a98e942 100644 --- a/examples/run_tf_glue.py +++ b/examples/run_tf_glue.py @@ -1,15 +1,18 @@ import os + import tensorflow as tf + import tensorflow_datasets from transformers import ( + BertConfig, + BertForSequenceClassification, BertTokenizer, TFBertForSequenceClassification, - BertConfig, glue_convert_examples_to_features, - BertForSequenceClassification, glue_processors, ) + # script parameters BATCH_SIZE = 32 EVAL_BATCH_SIZE = BATCH_SIZE * 2 diff --git a/examples/run_tf_ner.py b/examples/run_tf_ner.py index 77850d1ab5..68c4b15a06 100644 --- a/examples/run_tf_ner.py +++ b/examples/run_tf_ner.py @@ -1,23 +1,33 @@ # coding=utf-8 -import datetime -import os -import math -import glob -import re -import tensorflow as tf -import collections -import numpy as np -from seqeval import metrics import _pickle as pickle -from absl import logging -from transformers import TF2_WEIGHTS_NAME, BertConfig, BertTokenizer, TFBertForTokenClassification -from transformers import RobertaConfig, RobertaTokenizer, TFRobertaForTokenClassification -from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForTokenClassification -from transformers import create_optimizer, GradientAccumulator -from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file +import collections +import datetime +import glob +import math +import os +import re + +import numpy as np +import tensorflow as tf +from absl import app, flags, logging + from fastprogress import master_bar, progress_bar -from absl import flags -from absl import app +from seqeval import metrics +from transformers import ( + TF2_WEIGHTS_NAME, + BertConfig, + BertTokenizer, + DistilBertConfig, + DistilBertTokenizer, + GradientAccumulator, + RobertaConfig, + RobertaTokenizer, + TFBertForTokenClassification, + TFDistilBertForTokenClassification, + TFRobertaForTokenClassification, + create_optimizer, +) +from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file ALL_MODELS = sum( diff --git a/examples/run_xnli.py b/examples/run_xnli.py index 9faba294dd..bc1789f1d0 100644 --- a/examples/run_xnli.py +++ b/examples/run_xnli.py @@ -28,34 +28,33 @@ import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange + +from transformers import ( + WEIGHTS_NAME, + AdamW, + BertConfig, + BertForSequenceClassification, + BertTokenizer, + DistilBertConfig, + DistilBertForSequenceClassification, + DistilBertTokenizer, + XLMConfig, + XLMForSequenceClassification, + XLMTokenizer, + get_linear_schedule_with_warmup, +) +from transformers import glue_convert_examples_to_features as convert_examples_to_features +from transformers import xnli_compute_metrics as compute_metrics +from transformers import xnli_output_modes as output_modes +from transformers import xnli_processors as processors + try: from torch.utils.tensorboard import SummaryWriter except: from tensorboardX import SummaryWriter -from tqdm import tqdm, trange - -from transformers import ( - WEIGHTS_NAME, - BertConfig, - BertForSequenceClassification, - BertTokenizer, - XLMConfig, - XLMForSequenceClassification, - XLMTokenizer, - DistilBertConfig, - DistilBertForSequenceClassification, - DistilBertTokenizer, -) - -from transformers import AdamW, get_linear_schedule_with_warmup - -from transformers import xnli_compute_metrics as compute_metrics -from transformers import xnli_output_modes as output_modes -from transformers import xnli_processors as processors - -from transformers import glue_convert_examples_to_features as convert_examples_to_features logger = logging.getLogger(__name__) diff --git a/examples/summarization/convert_bertabs_original_pytorch_checkpoint.py b/examples/summarization/convert_bertabs_original_pytorch_checkpoint.py index d32e6fc06c..a1cbd64dd8 100644 --- a/examples/summarization/convert_bertabs_original_pytorch_checkpoint.py +++ b/examples/summarization/convert_bertabs_original_pytorch_checkpoint.py @@ -20,13 +20,13 @@ the model within the original codebase to be able to only save its `state_dict`. """ import argparse -from collections import namedtuple import logging +from collections import namedtuple + import torch -from models.model_builder import AbsSummarizer # The authors' implementation from model_bertabs import BertAbsSummarizer - +from models.model_builder import AbsSummarizer # The authors' implementation from transformers import BertTokenizer diff --git a/examples/summarization/modeling_bertabs.py b/examples/summarization/modeling_bertabs.py index d4d8c6648d..e8087f3001 100644 --- a/examples/summarization/modeling_bertabs.py +++ b/examples/summarization/modeling_bertabs.py @@ -27,9 +27,8 @@ import torch from torch import nn from torch.nn.init import xavier_uniform_ -from transformers import BertModel, BertConfig, PreTrainedModel - from configuration_bertabs import BertAbsConfig +from transformers import BertConfig, BertModel, PreTrainedModel MAX_SIZE = 5000 diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py index 36210d999d..1917ca30be 100644 --- a/examples/summarization/run_summarization.py +++ b/examples/summarization/run_summarization.py @@ -1,26 +1,25 @@ #! /usr/bin/python3 import argparse -from collections import namedtuple import logging import os import sys +from collections import namedtuple import torch from torch.utils.data import DataLoader, SequentialSampler from tqdm import tqdm -from transformers import BertTokenizer - from modeling_bertabs import BertAbs, build_predictor - +from transformers import BertTokenizer from utils_summarization import ( SummarizationDataset, - encode_for_summarization, build_mask, - fit_to_block_size, compute_token_type_ids, + encode_for_summarization, + fit_to_block_size, ) + logger = logging.getLogger(__name__) logging.basicConfig(stream=sys.stdout, level=logging.INFO) diff --git a/examples/summarization/utils_summarization.py b/examples/summarization/utils_summarization.py index 96470f47a2..360520fda3 100644 --- a/examples/summarization/utils_summarization.py +++ b/examples/summarization/utils_summarization.py @@ -1,5 +1,5 @@ -from collections import deque import os +from collections import deque import torch from torch.utils.data import Dataset diff --git a/examples/summarization/utils_summarization_test.py b/examples/summarization/utils_summarization_test.py index 253eae388d..86ec5b6006 100644 --- a/examples/summarization/utils_summarization_test.py +++ b/examples/summarization/utils_summarization_test.py @@ -17,12 +17,7 @@ import unittest import numpy as np import torch -from utils_summarization import ( - compute_token_type_ids, - fit_to_block_size, - build_mask, - process_story, -) +from utils_summarization import build_mask, compute_token_type_ids, fit_to_block_size, process_story class SummarizationDataProcessingTest(unittest.TestCase): diff --git a/examples/test_examples.py b/examples/test_examples.py index 1293559c26..d27f5671a4 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import sys -import unittest import argparse import logging +import sys +import unittest + +import run_generation +import run_glue +import run_squad + try: # python 3.4+ can use builtin unittest.mock instead of mock package @@ -27,9 +30,6 @@ try: except ImportError: from mock import patch -import run_glue -import run_squad -import run_generation logging.basicConfig(level=logging.DEBUG) diff --git a/examples/utils_multiple_choice.py b/examples/utils_multiple_choice.py index 492eb23e35..987ffbc0ef 100644 --- a/examples/utils_multiple_choice.py +++ b/examples/utils_multiple_choice.py @@ -17,16 +17,17 @@ from __future__ import absolute_import, division, print_function - +import csv +import glob +import json import logging import os import sys from io import open -import json -import csv -import glob -import tqdm from typing import List + +import tqdm + from transformers import PreTrainedTokenizer diff --git a/examples/utils_ner.py b/examples/utils_ner.py index d37583469c..214064e844 100644 --- a/examples/utils_ner.py +++ b/examples/utils_ner.py @@ -21,6 +21,7 @@ import logging import os from io import open + logger = logging.getLogger(__name__) diff --git a/hubconf.py b/hubconf.py index 1d100271ae..f8d0d1a84d 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,13 +1,14 @@ from transformers import ( - AutoTokenizer, AutoConfig, AutoModel, - AutoModelWithLMHead, - AutoModelForSequenceClassification, AutoModelForQuestionAnswering, + AutoModelForSequenceClassification, + AutoModelWithLMHead, + AutoTokenizer, ) from transformers.file_utils import add_start_docstrings + dependencies = ["torch", "tqdm", "boto3", "requests", "regex", "sentencepiece", "sacremoses"] diff --git a/setup.py b/setup.py index 59dbfef12b..13fe6d90f1 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ To create the package for pypi. """ from io import open + from setuptools import find_packages, setup diff --git a/templates/adding_a_new_example_script/run_xxx.py b/templates/adding_a_new_example_script/run_xxx.py index 64e92f2a28..e7e95ede67 100644 --- a/templates/adding_a_new_example_script/run_xxx.py +++ b/templates/adding_a_new_example_script/run_xxx.py @@ -17,54 +17,55 @@ from __future__ import absolute_import, division, print_function import argparse +import glob import logging import os import random -import glob import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from torch.utils.data.distributed import DistributedSampler - -try: - from torch.utils.tensorboard import SummaryWriter -except: - from tensorboardX import SummaryWriter - from tqdm import tqdm, trange from transformers import ( WEIGHTS_NAME, + AdamW, BertConfig, BertForQuestionAnswering, BertTokenizer, + DistilBertConfig, + DistilBertForQuestionAnswering, + DistilBertTokenizer, XLMConfig, XLMForQuestionAnswering, XLMTokenizer, XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer, - DistilBertConfig, - DistilBertForQuestionAnswering, - DistilBertTokenizer, + get_linear_schedule_with_warmup, ) - -from transformers import AdamW, get_linear_schedule_with_warmup - from utils_squad import ( - read_squad_examples, - convert_examples_to_features, RawResult, - write_predictions, RawResultExtended, + convert_examples_to_features, + read_squad_examples, + write_predictions, write_predictions_extended, ) # The follwing import is the official SQuAD evaluation script (2.0). # You can remove it from the dependencies if you are using this script outside of the library # We've added it here for automated tests (see examples/test_examples.py file) -from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad +from utils_squad_evaluate import EVAL_OPTS +from utils_squad_evaluate import main as evaluate_on_squad + + +try: + from torch.utils.tensorboard import SummaryWriter +except: + from tensorboardX import SummaryWriter + logger = logging.getLogger(__name__) diff --git a/templates/adding_a_new_example_script/utils_xxx.py b/templates/adding_a_new_example_script/utils_xxx.py index bd016bd306..4c5b97bd50 100644 --- a/templates/adding_a_new_example_script/utils_xxx.py +++ b/templates/adding_a_new_example_script/utils_xxx.py @@ -16,16 +16,17 @@ from __future__ import absolute_import, division, print_function +import collections import json import logging import math -import collections from io import open from transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize # Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method) -from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores +from utils_squad_evaluate import find_all_best_thresh_v2, get_raw_scores, make_qid_to_has_ans + logger = logging.getLogger(__name__) diff --git a/templates/adding_a_new_model/configuration_xxx.py b/templates/adding_a_new_model/configuration_xxx.py index 370fbb569f..9670b4f8c4 100644 --- a/templates/adding_a_new_model/configuration_xxx.py +++ b/templates/adding_a_new_model/configuration_xxx.py @@ -19,11 +19,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera import json import logging import sys -import six from io import open +import six + from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py b/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py index 99d3761496..2e6c473475 100755 --- a/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py +++ b/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py @@ -14,16 +14,15 @@ # limitations under the License. """Convert XXX checkpoint.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import argparse +import logging + import torch from transformers import XxxConfig, XxxForPreTraining, load_tf_weights_in_xxx -import logging logging.basicConfig(level=logging.INFO) diff --git a/templates/adding_a_new_model/modeling_tf_xxx.py b/templates/adding_a_new_model/modeling_tf_xxx.py index a4477704ae..3e8f51bfd2 100644 --- a/templates/adding_a_new_model/modeling_tf_xxx.py +++ b/templates/adding_a_new_model/modeling_tf_xxx.py @@ -21,21 +21,22 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import copy +import itertools import json import logging import math import os import sys -import copy -import itertools from io import open import numpy as np import tensorflow as tf from .configuration_xxx import XxxConfig -from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list from .file_utils import add_start_docstrings +from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list + logger = logging.getLogger(__name__) diff --git a/templates/adding_a_new_model/modeling_xxx.py b/templates/adding_a_new_model/modeling_xxx.py index 7270376ec7..4ea3cca8c4 100644 --- a/templates/adding_a_new_model/modeling_xxx.py +++ b/templates/adding_a_new_model/modeling_xxx.py @@ -20,22 +20,23 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import copy +import itertools import json import logging import math import os import sys -import copy -import itertools from io import open import torch from torch import nn from torch.nn import CrossEntropyLoss, MSELoss -from .modeling_utils import PreTrainedModel, prune_linear_layer from .configuration_xxx import XxxConfig from .file_utils import add_start_docstrings +from .modeling_utils import PreTrainedModel, prune_linear_layer + logger = logging.getLogger(__name__) diff --git a/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py b/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py index 1e4f64042a..b427df639a 100644 --- a/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py +++ b/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import XxxConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_xxx import ( diff --git a/templates/adding_a_new_model/tests/modeling_xxx_test.py b/templates/adding_a_new_model/tests/modeling_xxx_test.py index 2043d79655..4191922eb7 100644 --- a/templates/adding_a_new_model/tests/modeling_xxx_test.py +++ b/templates/adding_a_new_model/tests/modeling_xxx_test.py @@ -12,18 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available -from .modeling_common_test import CommonTestCases, ids_tensor from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor from .utils import CACHE_DIR, require_torch, slow, torch_device + if is_torch_available(): from transformers import ( XxxConfig, diff --git a/templates/adding_a_new_model/tests/tokenization_xxx_test.py b/templates/adding_a_new_model/tests/tokenization_xxx_test.py index 940de5c769..087c1002d1 100644 --- a/templates/adding_a_new_model/tests/tokenization_xxx_test.py +++ b/templates/adding_a_new_model/tests/tokenization_xxx_test.py @@ -18,7 +18,7 @@ import os import unittest from io import open -from transformers.tokenization_bert import XxxTokenizer, VOCAB_FILES_NAMES +from transformers.tokenization_bert import VOCAB_FILES_NAMES, XxxTokenizer from .tokenization_tests_commons import CommonTestCases diff --git a/templates/adding_a_new_model/tokenization_xxx.py b/templates/adding_a_new_model/tokenization_xxx.py index c1ea93a6d2..30e3ce5674 100644 --- a/templates/adding_a_new_model/tokenization_xxx.py +++ b/templates/adding_a_new_model/tokenization_xxx.py @@ -24,6 +24,7 @@ from io import open from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) #################################################### diff --git a/transformers/__init__.py b/transformers/__init__.py index 318cd5ce4e..8e52771f2c 100755 --- a/transformers/__init__.py +++ b/transformers/__init__.py @@ -15,86 +15,114 @@ except: import logging -logger = logging.getLogger(__name__) # pylint: disable=invalid-name +from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig +from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig +from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig +from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig +from .configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig +from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig +from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config +from .configuration_mmbt import MMBTConfig +from .configuration_openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig +from .configuration_roberta import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, RobertaConfig +from .configuration_t5 import T5_PRETRAINED_CONFIG_ARCHIVE_MAP, T5Config +from .configuration_transfo_xl import TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TransfoXLConfig + +# Configurations +from .configuration_utils import PretrainedConfig +from .configuration_xlm import XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMConfig +from .configuration_xlm_roberta import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMRobertaConfig +from .configuration_xlnet import XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLNetConfig +from .data import ( + DataProcessor, + InputExample, + InputFeatures, + SingleSentenceClassificationProcessor, + SquadExample, + SquadFeatures, + SquadV1Processor, + SquadV2Processor, + glue_convert_examples_to_features, + glue_output_modes, + glue_processors, + glue_tasks_num_labels, + is_sklearn_available, + squad_convert_examples_to_features, + xnli_output_modes, + xnli_processors, + xnli_tasks_num_labels, +) # Files and general utilities from .file_utils import ( - TRANSFORMERS_CACHE, - PYTORCH_TRANSFORMERS_CACHE, - PYTORCH_PRETRAINED_BERT_CACHE, - cached_path, - add_start_docstrings, - add_end_docstrings, - WEIGHTS_NAME, - TF2_WEIGHTS_NAME, - TF_WEIGHTS_NAME, CONFIG_NAME, MODEL_CARD_NAME, + PYTORCH_PRETRAINED_BERT_CACHE, + PYTORCH_TRANSFORMERS_CACHE, + TF2_WEIGHTS_NAME, + TF_WEIGHTS_NAME, + TRANSFORMERS_CACHE, + WEIGHTS_NAME, + add_end_docstrings, + add_start_docstrings, + cached_path, is_tf_available, is_torch_available, ) -from .data import ( - is_sklearn_available, - InputExample, - InputFeatures, - DataProcessor, - SingleSentenceClassificationProcessor, - glue_output_modes, - glue_convert_examples_to_features, - glue_processors, - glue_tasks_num_labels, - xnli_output_modes, - xnli_processors, - xnli_tasks_num_labels, - squad_convert_examples_to_features, - SquadFeatures, - SquadExample, - SquadV1Processor, - SquadV2Processor, +# Model Cards +from .modelcard import ModelCard + +# TF 2.0 <=> PyTorch conversion utilities +from .modeling_tf_pytorch_utils import ( + convert_tf_weight_name_to_pt_weight_name, + load_pytorch_checkpoint_in_tf2_model, + load_pytorch_model_in_tf2_model, + load_pytorch_weights_in_tf2_model, + load_tf2_checkpoint_in_pytorch_model, + load_tf2_model_in_pytorch_model, + load_tf2_weights_in_pytorch_model, ) +# Pipelines +from .pipelines import ( + CsvPipelineDataFormat, + FeatureExtractionPipeline, + JsonPipelineDataFormat, + NerPipeline, + PipedPipelineDataFormat, + Pipeline, + PipelineDataFormat, + QuestionAnsweringPipeline, + TextClassificationPipeline, + pipeline, +) +from .tokenization_albert import AlbertTokenizer +from .tokenization_auto import AutoTokenizer +from .tokenization_bert import BasicTokenizer, BertTokenizer, WordpieceTokenizer +from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer +from .tokenization_camembert import CamembertTokenizer +from .tokenization_ctrl import CTRLTokenizer +from .tokenization_distilbert import DistilBertTokenizer +from .tokenization_gpt2 import GPT2Tokenizer +from .tokenization_openai import OpenAIGPTTokenizer +from .tokenization_roberta import RobertaTokenizer +from .tokenization_t5 import T5Tokenizer +from .tokenization_transfo_xl import TransfoXLCorpus, TransfoXLTokenizer + +# Tokenizers +from .tokenization_utils import PreTrainedTokenizer +from .tokenization_xlm import XLMTokenizer +from .tokenization_xlm_roberta import XLMRobertaTokenizer +from .tokenization_xlnet import SPIECE_UNDERLINE, XLNetTokenizer + + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + if is_sklearn_available(): from .data import glue_compute_metrics, xnli_compute_metrics -# Model Cards -from .modelcard import ModelCard - -# Tokenizers -from .tokenization_utils import PreTrainedTokenizer -from .tokenization_auto import AutoTokenizer -from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer -from .tokenization_bert_japanese import BertJapaneseTokenizer, MecabTokenizer, CharacterTokenizer -from .tokenization_openai import OpenAIGPTTokenizer -from .tokenization_transfo_xl import TransfoXLTokenizer, TransfoXLCorpus -from .tokenization_gpt2 import GPT2Tokenizer -from .tokenization_ctrl import CTRLTokenizer -from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE -from .tokenization_xlm import XLMTokenizer -from .tokenization_roberta import RobertaTokenizer -from .tokenization_distilbert import DistilBertTokenizer -from .tokenization_albert import AlbertTokenizer -from .tokenization_camembert import CamembertTokenizer -from .tokenization_t5 import T5Tokenizer -from .tokenization_xlm_roberta import XLMRobertaTokenizer - -# Configurations -from .configuration_utils import PretrainedConfig -from .configuration_auto import AutoConfig, ALL_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_bert import BertConfig, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_openai import OpenAIGPTConfig, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_transfo_xl import TransfoXLConfig, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_gpt2 import GPT2Config, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_ctrl import CTRLConfig, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_xlnet import XLNetConfig, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_xlm import XLMConfig, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_roberta import RobertaConfig, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_albert import AlbertConfig, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_camembert import CamembertConfig, CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_t5 import T5Config, T5_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_xlm_roberta import XLMRobertaConfig, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_mmbt import MMBTConfig # Modeling if is_torch_available(): @@ -345,30 +373,6 @@ if is_tf_available(): # Optimization from .optimization_tf import WarmUp, create_optimizer, AdamWeightDecay, GradientAccumulator -# TF 2.0 <=> PyTorch conversion utilities -from .modeling_tf_pytorch_utils import ( - convert_tf_weight_name_to_pt_weight_name, - load_pytorch_checkpoint_in_tf2_model, - load_pytorch_weights_in_tf2_model, - load_pytorch_model_in_tf2_model, - load_tf2_checkpoint_in_pytorch_model, - load_tf2_weights_in_pytorch_model, - load_tf2_model_in_pytorch_model, -) - -# Pipelines -from .pipelines import ( - pipeline, - PipelineDataFormat, - CsvPipelineDataFormat, - JsonPipelineDataFormat, - PipedPipelineDataFormat, - Pipeline, - FeatureExtractionPipeline, - QuestionAnsweringPipeline, - NerPipeline, - TextClassificationPipeline, -) if not is_tf_available() and not is_torch_available(): logger.warning( diff --git a/transformers/commands/convert.py b/transformers/commands/convert.py index e358d8532f..29b6b1076b 100644 --- a/transformers/commands/convert.py +++ b/transformers/commands/convert.py @@ -1,5 +1,4 @@ from argparse import ArgumentParser, Namespace - from logging import getLogger from transformers import AutoModel, AutoTokenizer diff --git a/transformers/commands/run.py b/transformers/commands/run.py index 6172263064..fdc88c55e4 100644 --- a/transformers/commands/run.py +++ b/transformers/commands/run.py @@ -2,7 +2,7 @@ import logging from argparse import ArgumentParser from transformers.commands import BaseTransformersCLICommand -from transformers.pipelines import pipeline, Pipeline, PipelineDataFormat, SUPPORTED_TASKS +from transformers.pipelines import SUPPORTED_TASKS, Pipeline, PipelineDataFormat, pipeline logger = logging.getLogger(__name__) # pylint: disable=invalid-name diff --git a/transformers/commands/serving.py b/transformers/commands/serving.py index f7729c0bf0..5d48cc0b3b 100644 --- a/transformers/commands/serving.py +++ b/transformers/commands/serving.py @@ -1,7 +1,11 @@ -from argparse import ArgumentParser, Namespace -from typing import List, Optional, Union, Any - import logging +from argparse import ArgumentParser, Namespace +from typing import Any, List, Optional, Union + +from transformers import Pipeline +from transformers.commands import BaseTransformersCLICommand +from transformers.pipelines import SUPPORTED_TASKS, pipeline + try: from uvicorn import run @@ -14,9 +18,6 @@ except (ImportError, AttributeError): Body = lambda *x, **y: None _serve_dependancies_installed = False -from transformers import Pipeline -from transformers.commands import BaseTransformersCLICommand -from transformers.pipelines import SUPPORTED_TASKS, pipeline logger = logging.getLogger("transformers-cli/serving") diff --git a/transformers/commands/train.py b/transformers/commands/train.py index e51be71c75..bf16a4f5e0 100644 --- a/transformers/commands/train.py +++ b/transformers/commands/train.py @@ -2,13 +2,10 @@ import os from argparse import ArgumentParser, Namespace from logging import getLogger +from transformers import SingleSentenceClassificationProcessor as Processor +from transformers import TextClassificationPipeline, is_tf_available, is_torch_available from transformers.commands import BaseTransformersCLICommand -from transformers import ( - is_tf_available, - is_torch_available, - TextClassificationPipeline, - SingleSentenceClassificationProcessor as Processor, -) + if not is_tf_available() and not is_torch_available(): raise ImportError("At least one of PyTorch or TensorFlow 2.0+ should be installed to use CLI training") diff --git a/transformers/commands/user.py b/transformers/commands/user.py index d29867d7c8..c6edda2801 100644 --- a/transformers/commands/user.py +++ b/transformers/commands/user.py @@ -1,6 +1,6 @@ +import os from argparse import ArgumentParser from getpass import getpass -import os from transformers.commands import BaseTransformersCLICommand from transformers.hf_api import HfApi, HfFolder, HTTPError diff --git a/transformers/configuration_albert.py b/transformers/configuration_albert.py index dc2b74a29c..1d6adfa7e9 100644 --- a/transformers/configuration_albert.py +++ b/transformers/configuration_albert.py @@ -17,6 +17,7 @@ from .configuration_utils import PretrainedConfig + ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "albert-base-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-config.json", "albert-large-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-config.json", diff --git a/transformers/configuration_auto.py b/transformers/configuration_auto.py index e4311fc285..2c1d3f9d7f 100644 --- a/transformers/configuration_auto.py +++ b/transformers/configuration_auto.py @@ -18,19 +18,20 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging -from .configuration_bert import BertConfig, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_openai import OpenAIGPTConfig, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_transfo_xl import TransfoXLConfig, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_gpt2 import GPT2Config, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_ctrl import CTRLConfig, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_xlnet import XLNetConfig, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_xlm import XLMConfig, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_roberta import RobertaConfig, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_albert import AlbertConfig, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_camembert import CamembertConfig, CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_t5 import T5Config, T5_PRETRAINED_CONFIG_ARCHIVE_MAP -from .configuration_xlm_roberta import XLMRobertaConfig, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP +from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig +from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig +from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig +from .configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig +from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig +from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config +from .configuration_openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig +from .configuration_roberta import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, RobertaConfig +from .configuration_t5 import T5_PRETRAINED_CONFIG_ARCHIVE_MAP, T5Config +from .configuration_transfo_xl import TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TransfoXLConfig +from .configuration_xlm import XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMConfig +from .configuration_xlm_roberta import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMRobertaConfig +from .configuration_xlnet import XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLNetConfig + logger = logging.getLogger(__name__) diff --git a/transformers/configuration_bert.py b/transformers/configuration_bert.py index 7c5ee434a4..21ab7e47cf 100644 --- a/transformers/configuration_bert.py +++ b/transformers/configuration_bert.py @@ -24,6 +24,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_camembert.py b/transformers/configuration_camembert.py index 9aa641aa5f..12f7d591e2 100644 --- a/transformers/configuration_camembert.py +++ b/transformers/configuration_camembert.py @@ -21,6 +21,7 @@ import logging from .configuration_roberta import RobertaConfig + logger = logging.getLogger(__name__) CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_ctrl.py b/transformers/configuration_ctrl.py index 2726727d48..9b9a999609 100644 --- a/transformers/configuration_ctrl.py +++ b/transformers/configuration_ctrl.py @@ -23,6 +23,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf-ctrl/pytorch/ctrl-config.json"} diff --git a/transformers/configuration_distilbert.py b/transformers/configuration_distilbert.py index 120cbfb9f2..8aae69ad0d 100644 --- a/transformers/configuration_distilbert.py +++ b/transformers/configuration_distilbert.py @@ -15,13 +15,14 @@ """ DistilBERT model configuration """ from __future__ import absolute_import, division, print_function, unicode_literals -import sys import json import logging +import sys from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_gpt2.py b/transformers/configuration_gpt2.py index adc8842edc..1bccdf9c44 100644 --- a/transformers/configuration_gpt2.py +++ b/transformers/configuration_gpt2.py @@ -24,6 +24,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_mmbt.py b/transformers/configuration_mmbt.py index 5dad2babef..3d85d4448b 100644 --- a/transformers/configuration_mmbt.py +++ b/transformers/configuration_mmbt.py @@ -19,6 +19,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging + logger = logging.getLogger(__name__) diff --git a/transformers/configuration_openai.py b/transformers/configuration_openai.py index 53929aab5f..81b2c82c66 100644 --- a/transformers/configuration_openai.py +++ b/transformers/configuration_openai.py @@ -24,6 +24,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_roberta.py b/transformers/configuration_roberta.py index 3b8ddd1c46..7b1074abd1 100644 --- a/transformers/configuration_roberta.py +++ b/transformers/configuration_roberta.py @@ -21,6 +21,7 @@ import logging from .configuration_bert import BertConfig + logger = logging.getLogger(__name__) ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_t5.py b/transformers/configuration_t5.py index 4584015e27..686e1af4dc 100644 --- a/transformers/configuration_t5.py +++ b/transformers/configuration_t5.py @@ -19,11 +19,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera import json import logging import sys -import six from io import open +import six + from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) T5_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_transfo_xl.py b/transformers/configuration_transfo_xl.py index a2a7c5c02d..49e6ce3031 100644 --- a/transformers/configuration_transfo_xl.py +++ b/transformers/configuration_transfo_xl.py @@ -24,6 +24,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_utils.py b/transformers/configuration_utils.py index f29899175c..696930bb5d 100644 --- a/transformers/configuration_utils.py +++ b/transformers/configuration_utils.py @@ -23,7 +23,8 @@ import logging import os from io import open -from .file_utils import CONFIG_NAME, cached_path, is_remote_url, hf_bucket_url +from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url + logger = logging.getLogger(__name__) diff --git a/transformers/configuration_xlm.py b/transformers/configuration_xlm.py index a98024e9e6..cadf350c95 100644 --- a/transformers/configuration_xlm.py +++ b/transformers/configuration_xlm.py @@ -22,6 +22,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) XLM_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_xlm_roberta.py b/transformers/configuration_xlm_roberta.py index fcf5c571d8..bbd275ffea 100644 --- a/transformers/configuration_xlm_roberta.py +++ b/transformers/configuration_xlm_roberta.py @@ -21,6 +21,7 @@ import logging from .configuration_roberta import RobertaConfig + logger = logging.getLogger(__name__) XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/configuration_xlnet.py b/transformers/configuration_xlnet.py index 8768aeac9b..5af883e8e7 100644 --- a/transformers/configuration_xlnet.py +++ b/transformers/configuration_xlnet.py @@ -23,6 +23,7 @@ from io import open from .configuration_utils import PretrainedConfig + logger = logging.getLogger(__name__) XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { diff --git a/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py b/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py index 733f6fc5ca..bba3269a90 100644 --- a/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_albert_original_tf_checkpoint_to_pytorch.py @@ -14,16 +14,15 @@ # limitations under the License. """Convert ALBERT checkpoint.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import argparse +import logging + import torch from transformers import AlbertConfig, AlbertForMaskedLM, load_tf_weights_in_albert -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py b/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py index 9393068b17..87608f482f 100755 --- a/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_bert_original_tf_checkpoint_to_pytorch.py @@ -14,16 +14,15 @@ # limitations under the License. """Convert BERT checkpoint.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import argparse +import logging + import torch from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py b/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py index 304c634502..c451521a46 100644 --- a/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py +++ b/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py @@ -15,11 +15,13 @@ """Convert Huggingface Pytorch checkpoint to Tensorflow checkpoint.""" -import os import argparse -import torch +import os + import numpy as np import tensorflow as tf +import torch + from transformers import BertModel diff --git a/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py b/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py index eeafdb81e5..3aa8957257 100755 --- a/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_gpt2_original_tf_checkpoint_to_pytorch.py @@ -17,13 +17,13 @@ from __future__ import absolute_import, division, print_function import argparse +import logging from io import open import torch from transformers import CONFIG_NAME, WEIGHTS_NAME, GPT2Config, GPT2Model, load_tf_weights_in_gpt2 -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py b/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py index c87bb9d594..25c2a0a006 100755 --- a/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_openai_original_tf_checkpoint_to_pytorch.py @@ -17,13 +17,13 @@ from __future__ import absolute_import, division, print_function import argparse +import logging from io import open import torch from transformers import CONFIG_NAME, WEIGHTS_NAME, OpenAIGPTConfig, OpenAIGPTModel, load_tf_weights_in_openai_gpt -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_pytorch_checkpoint_to_tf2.py b/transformers/convert_pytorch_checkpoint_to_tf2.py index c7ad66e132..ba1dec53b6 100644 --- a/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -14,58 +14,59 @@ # limitations under the License. """ Convert pytorch checkpoints to TensorFlow """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import os import argparse +import logging +import os + import tensorflow as tf -from transformers import is_torch_available, cached_path - from transformers import ( - load_pytorch_checkpoint_in_tf2_model, + ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, + BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, + CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, + DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, + GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, + OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, + ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, + T5_PRETRAINED_CONFIG_ARCHIVE_MAP, + TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, + XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, + XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, + AlbertConfig, BertConfig, + CTRLConfig, + DistilBertConfig, + GPT2Config, + OpenAIGPTConfig, + RobertaConfig, + T5Config, + TFAlbertForMaskedLM, TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification, - BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, - GPT2Config, - TFGPT2LMHeadModel, - GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, - XLNetConfig, - TFXLNetLMHeadModel, - XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, - XLMConfig, - TFXLMWithLMHeadModel, - XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, - TransfoXLConfig, - TFTransfoXLLMHeadModel, - TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, - OpenAIGPTConfig, - TFOpenAIGPTLMHeadModel, - OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, - RobertaConfig, - TFRobertaForMaskedLM, - TFRobertaForSequenceClassification, - ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, - DistilBertConfig, + TFCTRLLMHeadModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, TFDistilBertForSequenceClassification, - DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, - CTRLConfig, - TFCTRLLMHeadModel, - CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, - AlbertConfig, - TFAlbertForMaskedLM, - ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, - T5Config, + TFGPT2LMHeadModel, + TFOpenAIGPTLMHeadModel, + TFRobertaForMaskedLM, + TFRobertaForSequenceClassification, TFT5WithLMHeadModel, - T5_PRETRAINED_CONFIG_ARCHIVE_MAP, + TFTransfoXLLMHeadModel, + TFXLMWithLMHeadModel, + TFXLNetLMHeadModel, + TransfoXLConfig, + XLMConfig, + XLNetConfig, + cached_path, + is_torch_available, + load_pytorch_checkpoint_in_tf2_model, ) + if is_torch_available(): import torch import numpy as np @@ -158,8 +159,6 @@ else: ) -import logging - logging.basicConfig(level=logging.INFO) MODEL_CLASSES = { diff --git a/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py b/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py index 3dec4882ff..5cf766b81d 100644 --- a/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py +++ b/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py @@ -18,16 +18,13 @@ from __future__ import absolute_import, division, print_function import argparse import logging -import numpy as np -import torch import pathlib -import fairseq +import numpy as np +import torch from packaging import version -if version.parse(fairseq.__version__) < version.parse("0.9.0"): - raise Exception("requires fairseq >= 0.9.0") - +import fairseq from fairseq.models.roberta import RobertaModel as FairseqRobertaModel from fairseq.modules import TransformerSentenceEncoderLayer from transformers.modeling_bert import ( @@ -47,6 +44,11 @@ from transformers.modeling_roberta import ( RobertaModel, ) + +if version.parse(fairseq.__version__) < version.parse("0.9.0"): + raise Exception("requires fairseq >= 0.9.0") + + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) diff --git a/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py b/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py index 0b22a5f9c6..853c9b7175 100755 --- a/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py @@ -14,16 +14,15 @@ # limitations under the License. """Convert T5 checkpoint.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import argparse +import logging + import torch from transformers import T5Config, T5Model, load_tf_weights_in_t5 -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py b/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py index f8dd45ae55..dc3241706f 100755 --- a/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function import argparse +import logging import os import sys from io import open @@ -24,17 +25,21 @@ from io import open import torch import transformers.tokenization_transfo_xl as data_utils - -from transformers import CONFIG_NAME, WEIGHTS_NAME -from transformers import TransfoXLConfig, TransfoXLLMHeadModel, load_tf_weights_in_transfo_xl +from transformers import ( + CONFIG_NAME, + WEIGHTS_NAME, + TransfoXLConfig, + TransfoXLLMHeadModel, + load_tf_weights_in_transfo_xl, +) from transformers.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES + if sys.version_info[0] == 2: import cPickle as pickle else: import pickle -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py index 7cbf9cae95..ef98b76ab1 100755 --- a/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py +++ b/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py @@ -18,15 +18,15 @@ from __future__ import absolute_import, division, print_function import argparse import json +import logging from io import open -import torch import numpy +import torch from transformers import CONFIG_NAME, WEIGHTS_NAME from transformers.tokenization_xlm import VOCAB_FILES_NAMES -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py b/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py index 83688cf07b..37e93b7a1f 100755 --- a/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py +++ b/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py @@ -14,24 +14,25 @@ # limitations under the License. """Convert BERT checkpoint.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import os import argparse +import logging +import os + import torch from transformers import ( CONFIG_NAME, WEIGHTS_NAME, XLNetConfig, - XLNetLMHeadModel, XLNetForQuestionAnswering, XLNetForSequenceClassification, + XLNetLMHeadModel, load_tf_weights_in_xlnet, ) + GLUE_TASKS_NUM_LABELS = { "cola": 2, "mnli": 3, @@ -44,7 +45,6 @@ GLUE_TASKS_NUM_LABELS = { "wnli": 2, } -import logging logging.basicConfig(level=logging.INFO) diff --git a/transformers/data/__init__.py b/transformers/data/__init__.py index bac6c6e3af..c0a3cbf4c2 100644 --- a/transformers/data/__init__.py +++ b/transformers/data/__init__.py @@ -1,15 +1,23 @@ +from .metrics import is_sklearn_available from .processors import ( + DataProcessor, InputExample, InputFeatures, - DataProcessor, - SquadFeatures, SingleSentenceClassificationProcessor, + SquadExample, + SquadFeatures, + SquadV1Processor, + SquadV2Processor, + glue_convert_examples_to_features, + glue_output_modes, + glue_processors, + glue_tasks_num_labels, + squad_convert_examples_to_features, + xnli_output_modes, + xnli_processors, + xnli_tasks_num_labels, ) -from .processors import glue_output_modes, glue_processors, glue_tasks_num_labels, glue_convert_examples_to_features -from .processors import squad_convert_examples_to_features, SquadExample, SquadV1Processor, SquadV2Processor -from .processors import xnli_output_modes, xnli_processors, xnli_tasks_num_labels -from .metrics import is_sklearn_available if is_sklearn_available(): from .metrics import glue_compute_metrics, xnli_compute_metrics diff --git a/transformers/data/metrics/__init__.py b/transformers/data/metrics/__init__.py index bd3b76efc0..5264d501a0 100644 --- a/transformers/data/metrics/__init__.py +++ b/transformers/data/metrics/__init__.py @@ -15,8 +15,9 @@ # limitations under the License. import csv -import sys import logging +import sys + logger = logging.getLogger(__name__) diff --git a/transformers/data/metrics/squad_metrics.py b/transformers/data/metrics/squad_metrics.py index a867fe3fde..0009a2e70f 100644 --- a/transformers/data/metrics/squad_metrics.py +++ b/transformers/data/metrics/squad_metrics.py @@ -8,17 +8,19 @@ that a question is unanswerable. """ +import collections import json import logging import math -import collections -from io import open -from tqdm import tqdm -import string import re +import string +from io import open + +from tqdm import tqdm from transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize + logger = logging.getLogger(__name__) diff --git a/transformers/data/processors/__init__.py b/transformers/data/processors/__init__.py index e59e9fbcb2..dee7f438a5 100644 --- a/transformers/data/processors/__init__.py +++ b/transformers/data/processors/__init__.py @@ -1,4 +1,4 @@ -from .utils import InputExample, InputFeatures, DataProcessor, SingleSentenceClassificationProcessor -from .glue import glue_output_modes, glue_processors, glue_tasks_num_labels, glue_convert_examples_to_features -from .squad import squad_convert_examples_to_features, SquadFeatures, SquadExample, SquadV1Processor, SquadV2Processor +from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels +from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features +from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels diff --git a/transformers/data/processors/glue.py b/transformers/data/processors/glue.py index f9c0132a71..e88773ac95 100644 --- a/transformers/data/processors/glue.py +++ b/transformers/data/processors/glue.py @@ -18,8 +18,9 @@ import logging import os -from .utils import DataProcessor, InputExample, InputFeatures from ...file_utils import is_tf_available +from .utils import DataProcessor, InputExample, InputFeatures + if is_tf_available(): import tensorflow as tf diff --git a/transformers/data/processors/squad.py b/transformers/data/processors/squad.py index efb10830bd..d47211a0d8 100644 --- a/transformers/data/processors/squad.py +++ b/transformers/data/processors/squad.py @@ -1,16 +1,17 @@ -from tqdm import tqdm import collections +import json import logging import os -import json -import numpy as np -from multiprocessing import Pool -from multiprocessing import cpu_count from functools import partial +from multiprocessing import Pool, cpu_count +import numpy as np +from tqdm import tqdm + +from ...file_utils import is_tf_available, is_torch_available from ...tokenization_bert import BasicTokenizer, whitespace_tokenize from .utils import DataProcessor, InputExample, InputFeatures -from ...file_utils import is_tf_available, is_torch_available + if is_torch_available(): import torch diff --git a/transformers/data/processors/utils.py b/transformers/data/processors/utils.py index 41cc00d4bd..7e044438af 100644 --- a/transformers/data/processors/utils.py +++ b/transformers/data/processors/utils.py @@ -14,14 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import csv -import sys import copy +import csv import json import logging +import sys from ...file_utils import is_tf_available, is_torch_available + logger = logging.getLogger(__name__) diff --git a/transformers/data/processors/xnli.py b/transformers/data/processors/xnli.py index ffe0358c1e..d67a53062e 100644 --- a/transformers/data/processors/xnli.py +++ b/transformers/data/processors/xnli.py @@ -22,6 +22,7 @@ import os from .utils import DataProcessor, InputExample + logger = logging.getLogger(__name__) diff --git a/transformers/file_utils.py b/transformers/file_utils.py index c45bdee04a..b1a4d240d2 100644 --- a/transformers/file_utils.py +++ b/transformers/file_utils.py @@ -5,26 +5,27 @@ Copyright by the AllenNLP authors. """ from __future__ import absolute_import, division, print_function, unicode_literals -import sys +import fnmatch import json import logging import os -import six +import sys import tempfile -import fnmatch +from contextlib import contextmanager from functools import partial, wraps from hashlib import sha256 from io import open import boto3 +import requests +import six from botocore.config import Config from botocore.exceptions import ClientError -import requests +from filelock import FileLock from tqdm.auto import tqdm -from contextlib import contextmanager + from . import __version__ -from filelock import FileLock logger = logging.getLogger(__name__) # pylint: disable=invalid-name diff --git a/transformers/hf_api.py b/transformers/hf_api.py index 81cc9f7ebb..f92c10df58 100644 --- a/transformers/hf_api.py +++ b/transformers/hf_api.py @@ -22,6 +22,7 @@ import six from requests.exceptions import HTTPError from tqdm import tqdm + ENDPOINT = "https://huggingface.co" diff --git a/transformers/modelcard.py b/transformers/modelcard.py index e6b1982e96..bd218f0c46 100644 --- a/transformers/modelcard.py +++ b/transformers/modelcard.py @@ -23,15 +23,14 @@ import os from io import open from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP - from .file_utils import ( CONFIG_NAME, MODEL_CARD_NAME, - WEIGHTS_NAME, TF2_WEIGHTS_NAME, + WEIGHTS_NAME, cached_path, - is_remote_url, hf_bucket_url, + is_remote_url, ) diff --git a/transformers/modeling_albert.py b/transformers/modeling_albert.py index 3d55bcd64d..7ff2000845 100644 --- a/transformers/modeling_albert.py +++ b/transformers/modeling_albert.py @@ -14,17 +14,21 @@ # limitations under the License. """PyTorch ALBERT model. """ -import os -import math import logging +import math +import os + import torch import torch.nn as nn from torch.nn import CrossEntropyLoss, MSELoss -from transformers.modeling_utils import PreTrainedModel + from transformers.configuration_albert import AlbertConfig -from transformers.modeling_bert import BertEmbeddings, BertSelfAttention, prune_linear_layer, ACT2FN +from transformers.modeling_bert import ACT2FN, BertEmbeddings, BertSelfAttention, prune_linear_layer +from transformers.modeling_utils import PreTrainedModel + from .file_utils import add_start_docstrings + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_auto.py b/transformers/modeling_auto.py index 31e9ee6bd2..bcdde45bd1 100644 --- a/transformers/modeling_auto.py +++ b/transformers/modeling_auto.py @@ -29,80 +29,78 @@ from .configuration_auto import ( RobertaConfig, TransfoXLConfig, XLMConfig, - XLNetConfig, XLMRobertaConfig, + XLNetConfig, +) +from .file_utils import add_start_docstrings +from .modeling_albert import ( + ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + AlbertForMaskedLM, + AlbertForQuestionAnswering, + AlbertForSequenceClassification, + AlbertModel, ) - from .modeling_bert import ( - BertModel, - BertForMaskedLM, - BertForSequenceClassification, - BertForQuestionAnswering, - BertForTokenClassification, BERT_PRETRAINED_MODEL_ARCHIVE_MAP, + BertForMaskedLM, + BertForQuestionAnswering, + BertForSequenceClassification, + BertForTokenClassification, + BertModel, ) -from .modeling_openai import OpenAIGPTModel, OpenAIGPTLMHeadModel, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_gpt2 import GPT2Model, GPT2LMHeadModel, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_ctrl import CTRLModel, CTRLLMHeadModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_transfo_xl import TransfoXLModel, TransfoXLLMHeadModel, TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_xlnet import ( - XLNetModel, - XLNetLMHeadModel, - XLNetForSequenceClassification, - XLNetForQuestionAnswering, - XLNetForTokenClassification, - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, +from .modeling_camembert import ( + CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + CamembertForMaskedLM, + CamembertForMultipleChoice, + CamembertForSequenceClassification, + CamembertForTokenClassification, + CamembertModel, ) -from .modeling_xlm import ( - XLMModel, - XLMWithLMHeadModel, - XLMForSequenceClassification, - XLMForQuestionAnswering, - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, +from .modeling_ctrl import CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel, CTRLModel +from .modeling_distilbert import ( + DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + DistilBertForMaskedLM, + DistilBertForQuestionAnswering, + DistilBertForSequenceClassification, + DistilBertForTokenClassification, + DistilBertModel, ) +from .modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, GPT2LMHeadModel, GPT2Model +from .modeling_openai import OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, OpenAIGPTLMHeadModel, OpenAIGPTModel from .modeling_roberta import ( - RobertaModel, + ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, RobertaForMaskedLM, RobertaForSequenceClassification, RobertaForTokenClassification, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + RobertaModel, ) -from .modeling_distilbert import ( - DistilBertModel, - DistilBertForQuestionAnswering, - DistilBertForMaskedLM, - DistilBertForSequenceClassification, - DistilBertForTokenClassification, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, -) -from .modeling_camembert import ( - CamembertModel, - CamembertForMaskedLM, - CamembertForSequenceClassification, - CamembertForMultipleChoice, - CamembertForTokenClassification, - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, -) -from .modeling_albert import ( - AlbertModel, - AlbertForMaskedLM, - AlbertForSequenceClassification, - AlbertForQuestionAnswering, - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, -) -from .modeling_t5 import T5Model, T5WithLMHeadModel, T5_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_xlm_roberta import ( - XLMRobertaModel, - XLMRobertaForMaskedLM, - XLMRobertaForSequenceClassification, - XLMRobertaForMultipleChoice, - XLMRobertaForTokenClassification, - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, -) - +from .modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5Model, T5WithLMHeadModel +from .modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, TransfoXLLMHeadModel, TransfoXLModel from .modeling_utils import PreTrainedModel, SequenceSummary +from .modeling_xlm import ( + XLM_PRETRAINED_MODEL_ARCHIVE_MAP, + XLMForQuestionAnswering, + XLMForSequenceClassification, + XLMModel, + XLMWithLMHeadModel, +) +from .modeling_xlm_roberta import ( + XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + XLMRobertaForMaskedLM, + XLMRobertaForMultipleChoice, + XLMRobertaForSequenceClassification, + XLMRobertaForTokenClassification, + XLMRobertaModel, +) +from .modeling_xlnet import ( + XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, + XLNetForQuestionAnswering, + XLNetForSequenceClassification, + XLNetForTokenClassification, + XLNetLMHeadModel, + XLNetModel, +) -from .file_utils import add_start_docstrings logger = logging.getLogger(__name__) diff --git a/transformers/modeling_bert.py b/transformers/modeling_bert.py index 0994e832de..9c6cccf71f 100644 --- a/transformers/modeling_bert.py +++ b/transformers/modeling_bert.py @@ -26,9 +26,10 @@ import torch from torch import nn from torch.nn import CrossEntropyLoss, MSELoss -from .modeling_utils import PreTrainedModel, prune_linear_layer from .configuration_bert import BertConfig from .file_utils import add_start_docstrings +from .modeling_utils import PreTrainedModel, prune_linear_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_camembert.py b/transformers/modeling_camembert.py index 2a7a7a7332..363399ee50 100644 --- a/transformers/modeling_camembert.py +++ b/transformers/modeling_camembert.py @@ -19,15 +19,16 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging -from .modeling_roberta import ( - RobertaModel, - RobertaForMaskedLM, - RobertaForSequenceClassification, - RobertaForMultipleChoice, - RobertaForTokenClassification, -) from .configuration_camembert import CamembertConfig from .file_utils import add_start_docstrings +from .modeling_roberta import ( + RobertaForMaskedLM, + RobertaForMultipleChoice, + RobertaForSequenceClassification, + RobertaForTokenClassification, + RobertaModel, +) + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_ctrl.py b/transformers/modeling_ctrl.py index 37c15cf54f..f34189170d 100644 --- a/transformers/modeling_ctrl.py +++ b/transformers/modeling_ctrl.py @@ -24,15 +24,17 @@ import math import os import sys from io import open + import numpy as np import torch import torch.nn as nn from torch.nn import CrossEntropyLoss from torch.nn.parameter import Parameter -from .modeling_utils import PreTrainedModel, Conv1D, prune_conv1d_layer, SequenceSummary from .configuration_ctrl import CTRLConfig from .file_utils import add_start_docstrings +from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv1d_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_distilbert.py b/transformers/modeling_distilbert.py index 7345c23651..5fef44384d 100644 --- a/transformers/modeling_distilbert.py +++ b/transformers/modeling_distilbert.py @@ -18,25 +18,23 @@ """ from __future__ import absolute_import, division, print_function, unicode_literals +import copy +import itertools import json import logging import math -import copy import sys from io import open -import itertools import numpy as np - import torch import torch.nn as nn from torch.nn import CrossEntropyLoss -from .modeling_utils import PreTrainedModel, prune_linear_layer from .configuration_distilbert import DistilBertConfig from .file_utils import add_start_docstrings +from .modeling_utils import PreTrainedModel, prune_linear_layer -import logging logger = logging.getLogger(__name__) diff --git a/transformers/modeling_encoder_decoder.py b/transformers/modeling_encoder_decoder.py index e5bad422c4..dfdcc418d4 100644 --- a/transformers/modeling_encoder_decoder.py +++ b/transformers/modeling_encoder_decoder.py @@ -26,6 +26,7 @@ from tqdm import trange from .modeling_auto import AutoModel, AutoModelWithLMHead + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_gpt2.py b/transformers/modeling_gpt2.py index fe8a973f0b..94e977e620 100644 --- a/transformers/modeling_gpt2.py +++ b/transformers/modeling_gpt2.py @@ -30,9 +30,10 @@ import torch.nn as nn from torch.nn import CrossEntropyLoss from torch.nn.parameter import Parameter -from .modeling_utils import PreTrainedModel, Conv1D, prune_conv1d_layer, SequenceSummary from .configuration_gpt2 import GPT2Config from .file_utils import add_start_docstrings +from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv1d_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_mmbt.py b/transformers/modeling_mmbt.py index 1c173ac692..2c22a409bc 100644 --- a/transformers/modeling_mmbt.py +++ b/transformers/modeling_mmbt.py @@ -25,6 +25,7 @@ from torch.nn import CrossEntropyLoss, MSELoss from .file_utils import add_start_docstrings + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_openai.py b/transformers/modeling_openai.py index ed746ecac8..3f37a4acfb 100644 --- a/transformers/modeling_openai.py +++ b/transformers/modeling_openai.py @@ -30,9 +30,10 @@ import torch.nn as nn from torch.nn import CrossEntropyLoss from torch.nn.parameter import Parameter -from .modeling_utils import PreTrainedModel, Conv1D, prune_conv1d_layer, SequenceSummary from .configuration_openai import OpenAIGPTConfig from .file_utils import add_start_docstrings +from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv1d_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_roberta.py b/transformers/modeling_roberta.py index 730058ea92..f6233061aa 100644 --- a/transformers/modeling_roberta.py +++ b/transformers/modeling_roberta.py @@ -23,9 +23,10 @@ import torch import torch.nn as nn from torch.nn import CrossEntropyLoss, MSELoss -from .modeling_bert import BertEmbeddings, BertLayerNorm, BertModel, BertPreTrainedModel, gelu from .configuration_roberta import RobertaConfig from .file_utils import add_start_docstrings +from .modeling_bert import BertEmbeddings, BertLayerNorm, BertModel, BertPreTrainedModel, gelu + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_t5.py b/transformers/modeling_t5.py index 2ee8cd011b..1467a0cd21 100644 --- a/transformers/modeling_t5.py +++ b/transformers/modeling_t5.py @@ -16,23 +16,24 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import copy +import itertools import json import logging import math import os import sys -import copy -import itertools from io import open import torch -from torch import nn import torch.nn.functional as F +from torch import nn from torch.nn import CrossEntropyLoss, MSELoss -from .modeling_utils import PreTrainedModel, prune_linear_layer from .configuration_t5 import T5Config -from .file_utils import add_start_docstrings, DUMMY_INPUTS, DUMMY_MASK +from .file_utils import DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings +from .modeling_utils import PreTrainedModel, prune_linear_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_albert.py b/transformers/modeling_tf_albert.py index 25d0863987..de6ef405c8 100644 --- a/transformers/modeling_tf_albert.py +++ b/transformers/modeling_tf_albert.py @@ -22,11 +22,10 @@ import sys import tensorflow as tf from .configuration_albert import AlbertConfig -from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list -from .modeling_tf_bert import ACT2FN, TFBertSelfAttention from .file_utils import add_start_docstrings +from .modeling_tf_bert import ACT2FN, TFBertSelfAttention +from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list -import logging logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_auto.py b/transformers/modeling_tf_auto.py index 24a7338d41..9ce83fe4d6 100644 --- a/transformers/modeling_tf_auto.py +++ b/transformers/modeling_tf_auto.py @@ -29,62 +29,61 @@ from .configuration_auto import ( XLMConfig, XLNetConfig, ) - +from .file_utils import add_start_docstrings +from .modeling_tf_albert import ( + TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TFAlbertForMaskedLM, + TFAlbertForSequenceClassification, + TFAlbertModel, +) from .modeling_tf_bert import ( - TFBertModel, - TFBertForMaskedLM, - TFBertForSequenceClassification, - TFBertForQuestionAnswering, - TFBertForTokenClassification, TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TFBertForMaskedLM, + TFBertForQuestionAnswering, + TFBertForSequenceClassification, + TFBertForTokenClassification, + TFBertModel, ) -from .modeling_tf_openai import TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_tf_gpt2 import TFGPT2Model, TFGPT2LMHeadModel, TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_tf_transfo_xl import ( - TFTransfoXLModel, - TFTransfoXLLMHeadModel, - TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, -) -from .modeling_tf_xlnet import ( - TFXLNetModel, - TFXLNetLMHeadModel, - TFXLNetForSequenceClassification, - TFXLNetForQuestionAnsweringSimple, - TFXLNetForTokenClassification, - TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, -) -from .modeling_tf_xlm import ( - TFXLMModel, - TFXLMWithLMHeadModel, - TFXLMForSequenceClassification, - TFXLMForQuestionAnsweringSimple, - TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, +from .modeling_tf_ctrl import TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, TFCTRLLMHeadModel, TFCTRLModel +from .modeling_tf_distilbert import ( + TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TFDistilBertForMaskedLM, + TFDistilBertForQuestionAnswering, + TFDistilBertForSequenceClassification, + TFDistilBertForTokenClassification, + TFDistilBertModel, ) +from .modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, TFGPT2LMHeadModel, TFGPT2Model +from .modeling_tf_openai import TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, TFOpenAIGPTLMHeadModel, TFOpenAIGPTModel from .modeling_tf_roberta import ( - TFRobertaModel, + TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, TFRobertaForMaskedLM, TFRobertaForSequenceClassification, TFRobertaForTokenClassification, - TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + TFRobertaModel, ) -from .modeling_tf_distilbert import ( - TFDistilBertModel, - TFDistilBertForQuestionAnswering, - TFDistilBertForMaskedLM, - TFDistilBertForSequenceClassification, - TFDistilBertForTokenClassification, - TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, +from .modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP, TFT5Model, TFT5WithLMHeadModel +from .modeling_tf_transfo_xl import ( + TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, + TFTransfoXLLMHeadModel, + TFTransfoXLModel, ) -from .modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_tf_albert import ( - TFAlbertModel, - TFAlbertForMaskedLM, - TFAlbertForSequenceClassification, - TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, +from .modeling_tf_xlm import ( + TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, + TFXLMForQuestionAnsweringSimple, + TFXLMForSequenceClassification, + TFXLMModel, + TFXLMWithLMHeadModel, +) +from .modeling_tf_xlnet import ( + TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, + TFXLNetForQuestionAnsweringSimple, + TFXLNetForSequenceClassification, + TFXLNetForTokenClassification, + TFXLNetLMHeadModel, + TFXLNetModel, ) -from .modeling_tf_t5 import TFT5Model, TFT5WithLMHeadModel, TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP -from .file_utils import add_start_docstrings logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_bert.py b/transformers/modeling_tf_bert.py index bcb83d5df7..4f919eab2d 100644 --- a/transformers/modeling_tf_bert.py +++ b/transformers/modeling_tf_bert.py @@ -28,8 +28,9 @@ import numpy as np import tensorflow as tf from .configuration_bert import BertConfig -from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list from .file_utils import add_start_docstrings +from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_ctrl.py b/transformers/modeling_tf_ctrl.py index 3aba94a50d..a3a22040d3 100644 --- a/transformers/modeling_tf_ctrl.py +++ b/transformers/modeling_tf_ctrl.py @@ -21,12 +21,14 @@ import logging import os import sys from io import open + import numpy as np import tensorflow as tf from .configuration_ctrl import CTRLConfig -from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list, TFSharedEmbeddings from .file_utils import add_start_docstrings +from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, get_initializer, shape_list + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_distilbert.py b/transformers/modeling_tf_distilbert.py index e9e89d2e73..98317488b5 100644 --- a/transformers/modeling_tf_distilbert.py +++ b/transformers/modeling_tf_distilbert.py @@ -16,21 +16,21 @@ """ from __future__ import absolute_import, division, print_function, unicode_literals +import copy +import itertools import json import logging import math -import copy import sys from io import open -import itertools - import numpy as np import tensorflow as tf from .configuration_distilbert import DistilBertConfig -from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, shape_list, get_initializer from .file_utils import add_start_docstrings +from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, get_initializer, shape_list + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_gpt2.py b/transformers/modeling_tf_gpt2.py index a4722fb343..bf551991b5 100644 --- a/transformers/modeling_tf_gpt2.py +++ b/transformers/modeling_tf_gpt2.py @@ -28,16 +28,17 @@ from io import open import numpy as np import tensorflow as tf -from .modeling_tf_utils import ( - TFPreTrainedModel, - TFConv1D, - TFSharedEmbeddings, - TFSequenceSummary, - shape_list, - get_initializer, -) from .configuration_gpt2 import GPT2Config from .file_utils import add_start_docstrings +from .modeling_tf_utils import ( + TFConv1D, + TFPreTrainedModel, + TFSequenceSummary, + TFSharedEmbeddings, + get_initializer, + shape_list, +) + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_openai.py b/transformers/modeling_tf_openai.py index 4720e3c5db..44924b4f42 100644 --- a/transformers/modeling_tf_openai.py +++ b/transformers/modeling_tf_openai.py @@ -28,16 +28,17 @@ from io import open import numpy as np import tensorflow as tf -from .modeling_tf_utils import ( - TFPreTrainedModel, - TFConv1D, - TFSharedEmbeddings, - TFSequenceSummary, - shape_list, - get_initializer, -) from .configuration_openai import OpenAIGPTConfig from .file_utils import add_start_docstrings +from .modeling_tf_utils import ( + TFConv1D, + TFPreTrainedModel, + TFSequenceSummary, + TFSharedEmbeddings, + get_initializer, + shape_list, +) + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_pytorch_utils.py b/transformers/modeling_tf_pytorch_utils.py index d1073d23a3..f05b8aa4a9 100644 --- a/transformers/modeling_tf_pytorch_utils.py +++ b/transformers/modeling_tf_pytorch_utils.py @@ -20,8 +20,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging import os import re + import numpy + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_roberta.py b/transformers/modeling_tf_roberta.py index 136ab66157..5c40682c38 100644 --- a/transformers/modeling_tf_roberta.py +++ b/transformers/modeling_tf_roberta.py @@ -22,10 +22,10 @@ import logging import tensorflow as tf from .configuration_roberta import RobertaConfig -from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list from .file_utils import add_start_docstrings - from .modeling_tf_bert import TFBertEmbeddings, TFBertMainLayer, gelu, gelu_new +from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_t5.py b/transformers/modeling_tf_t5.py index 38a2bf4190..e995bc3c9e 100644 --- a/transformers/modeling_tf_t5.py +++ b/transformers/modeling_tf_t5.py @@ -17,16 +17,17 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import logging -import math import copy import itertools +import logging +import math import tensorflow as tf from .configuration_t5 import T5Config +from .file_utils import DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, shape_list -from .file_utils import add_start_docstrings, DUMMY_INPUTS, DUMMY_MASK + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_transfo_xl.py b/transformers/modeling_tf_transfo_xl.py index fc7ea932aa..b6807d33dd 100644 --- a/transformers/modeling_tf_transfo_xl.py +++ b/transformers/modeling_tf_transfo_xl.py @@ -18,11 +18,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import os -import json -import math -import logging import collections +import json +import logging +import math +import os import sys from io import open @@ -30,9 +30,10 @@ import numpy as np import tensorflow as tf from .configuration_transfo_xl import TransfoXLConfig -from .modeling_tf_utils import TFPreTrainedModel, TFConv1D, TFSequenceSummary, shape_list, get_initializer -from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask from .file_utils import add_start_docstrings +from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask +from .modeling_tf_utils import TFConv1D, TFPreTrainedModel, TFSequenceSummary, get_initializer, shape_list + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_transfo_xl_utilities.py b/transformers/modeling_tf_transfo_xl_utilities.py index 0f2a4ebeba..33244eae82 100644 --- a/transformers/modeling_tf_transfo_xl_utilities.py +++ b/transformers/modeling_tf_transfo_xl_utilities.py @@ -19,7 +19,6 @@ from collections import defaultdict import numpy as np - import tensorflow as tf from .modeling_tf_utils import shape_list diff --git a/transformers/modeling_tf_utils.py b/transformers/modeling_tf_utils.py index 7ecd79afd3..637013b379 100644 --- a/transformers/modeling_tf_utils.py +++ b/transformers/modeling_tf_utils.py @@ -20,22 +20,23 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging import os +import h5py import tensorflow as tf from tensorflow.python.keras.saving import hdf5_format -import h5py from .configuration_utils import PretrainedConfig from .file_utils import ( + DUMMY_INPUTS, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, - DUMMY_INPUTS, cached_path, hf_bucket_url, is_remote_url, ) from .modeling_tf_pytorch_utils import load_pytorch_checkpoint_in_tf2_model + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_xlm.py b/transformers/modeling_tf_xlm.py index 2f443ae2fc..a29a0b7fec 100644 --- a/transformers/modeling_tf_xlm.py +++ b/transformers/modeling_tf_xlm.py @@ -16,24 +16,25 @@ """ from __future__ import absolute_import, division, print_function, unicode_literals +import itertools import logging import math import os -import itertools import numpy as np import tensorflow as tf from .configuration_xlm import XLMConfig -from .modeling_tf_utils import ( - TFPreTrainedModel, - TFSharedEmbeddings, - TFSequenceSummary, - shape_list, - get_initializer, - DUMMY_INPUTS, -) from .file_utils import add_start_docstrings +from .modeling_tf_utils import ( + DUMMY_INPUTS, + TFPreTrainedModel, + TFSequenceSummary, + TFSharedEmbeddings, + get_initializer, + shape_list, +) + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_tf_xlnet.py b/transformers/modeling_tf_xlnet.py index c1ed720f96..e913a0513b 100644 --- a/transformers/modeling_tf_xlnet.py +++ b/transformers/modeling_tf_xlnet.py @@ -28,8 +28,8 @@ import numpy as np import tensorflow as tf from .configuration_xlnet import XLNetConfig -from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, TFSequenceSummary, shape_list, get_initializer from .file_utils import add_start_docstrings +from .modeling_tf_utils import TFPreTrainedModel, TFSequenceSummary, TFSharedEmbeddings, get_initializer, shape_list logger = logging.getLogger(__name__) diff --git a/transformers/modeling_transfo_xl.py b/transformers/modeling_transfo_xl.py index cee61ed37a..a9040b53db 100644 --- a/transformers/modeling_transfo_xl.py +++ b/transformers/modeling_transfo_xl.py @@ -20,11 +20,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import os -import json -import math -import logging import collections +import json +import logging +import math +import os import sys from io import open @@ -34,10 +34,11 @@ import torch.nn.functional as F from torch.nn import CrossEntropyLoss from torch.nn.parameter import Parameter -from .modeling_utils import PreTrainedModel, Conv1D, prune_conv1d_layer, SequenceSummary from .configuration_transfo_xl import TransfoXLConfig -from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits, LogUniformSampler from .file_utils import add_start_docstrings +from .modeling_transfo_xl_utilities import LogUniformSampler, ProjectedAdaptiveLogSoftmax, sample_logits +from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv1d_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_transfo_xl_utilities.py b/transformers/modeling_transfo_xl_utilities.py index 89451bb558..c419541640 100644 --- a/transformers/modeling_transfo_xl_utilities.py +++ b/transformers/modeling_transfo_xl_utilities.py @@ -20,11 +20,11 @@ from collections import defaultdict import numpy as np - import torch import torch.nn as nn import torch.nn.functional as F + # CUDA_MAJOR = int(torch.version.cuda.split('.')[0]) # CUDA_MINOR = int(torch.version.cuda.split('.')[1]) diff --git a/transformers/modeling_utils.py b/transformers/modeling_utils.py index e934b90528..245a1afa06 100644 --- a/transformers/modeling_utils.py +++ b/transformers/modeling_utils.py @@ -31,15 +31,16 @@ from torch.nn import functional as F from .configuration_utils import PretrainedConfig from .file_utils import ( + DUMMY_INPUTS, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, - DUMMY_INPUTS, cached_path, hf_bucket_url, is_remote_url, ) + logger = logging.getLogger(__name__) try: diff --git a/transformers/modeling_xlm.py b/transformers/modeling_xlm.py index cd758a0433..2127bbad36 100644 --- a/transformers/modeling_xlm.py +++ b/transformers/modeling_xlm.py @@ -16,20 +16,20 @@ """ from __future__ import absolute_import, division, print_function, unicode_literals +import itertools import logging import math -import itertools import numpy as np - import torch from torch import nn -from torch.nn import functional as F from torch.nn import CrossEntropyLoss, MSELoss +from torch.nn import functional as F -from .modeling_utils import PreTrainedModel, prune_linear_layer, SequenceSummary, SQuADHead from .configuration_xlm import XLMConfig from .file_utils import add_start_docstrings +from .modeling_utils import PreTrainedModel, SequenceSummary, SQuADHead, prune_linear_layer + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_xlm_roberta.py b/transformers/modeling_xlm_roberta.py index 8f1ed6ec67..adf7f23346 100644 --- a/transformers/modeling_xlm_roberta.py +++ b/transformers/modeling_xlm_roberta.py @@ -19,15 +19,16 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging -from .modeling_roberta import ( - RobertaModel, - RobertaForMaskedLM, - RobertaForSequenceClassification, - RobertaForMultipleChoice, - RobertaForTokenClassification, -) from .configuration_xlm_roberta import XLMRobertaConfig from .file_utils import add_start_docstrings +from .modeling_roberta import ( + RobertaForMaskedLM, + RobertaForMultipleChoice, + RobertaForSequenceClassification, + RobertaForTokenClassification, + RobertaModel, +) + logger = logging.getLogger(__name__) diff --git a/transformers/modeling_xlnet.py b/transformers/modeling_xlnet.py index 2a210502d9..423ba8cb7c 100644 --- a/transformers/modeling_xlnet.py +++ b/transformers/modeling_xlnet.py @@ -26,19 +26,19 @@ from io import open import torch from torch import nn -from torch.nn import functional as F from torch.nn import CrossEntropyLoss, MSELoss +from torch.nn import functional as F +from .configuration_xlnet import XLNetConfig +from .file_utils import add_start_docstrings from .modeling_utils import ( - PreTrainedModel, - prune_linear_layer, - SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits, + PreTrainedModel, + SequenceSummary, + prune_linear_layer, ) -from .configuration_xlnet import XLNetConfig -from .file_utils import add_start_docstrings logger = logging.getLogger(__name__) diff --git a/transformers/optimization.py b/transformers/optimization.py index 0cd57078ba..814a0c5ba1 100644 --- a/transformers/optimization.py +++ b/transformers/optimization.py @@ -21,6 +21,7 @@ import torch from torch.optim import Optimizer from torch.optim.lr_scheduler import LambdaLR + logger = logging.getLogger(__name__) diff --git a/transformers/optimization_tf.py b/transformers/optimization_tf.py index bdcbd323ce..c2c8a3180a 100644 --- a/transformers/optimization_tf.py +++ b/transformers/optimization_tf.py @@ -14,9 +14,7 @@ # ============================================================================== """Functions and classes related to optimization (weight updates).""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import re diff --git a/transformers/pipelines.py b/transformers/pipelines.py index 4149c2e475..257a891ae9 100755 --- a/transformers/pipelines.py +++ b/transformers/pipelines.py @@ -14,36 +14,36 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals -import sys import csv import json +import logging import os import pickle -import logging -import six - +import sys from abc import ABC, abstractmethod from contextlib import contextmanager from itertools import groupby from os.path import abspath, exists -from typing import Union, Optional, Tuple, List, Dict +from typing import Dict, List, Optional, Tuple, Union import numpy as np +import six from transformers import ( + ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig, AutoTokenizer, - PreTrainedTokenizer, - PretrainedConfig, + BasicTokenizer, ModelCard, + PretrainedConfig, + PreTrainedTokenizer, SquadExample, - squad_convert_examples_to_features, is_tf_available, is_torch_available, - BasicTokenizer, - ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, + squad_convert_examples_to_features, ) + if is_tf_available(): import tensorflow as tf from transformers import ( diff --git a/transformers/tests/configuration_common_test.py b/transformers/tests/configuration_common_test.py index d109a655f8..65a4a35ae6 100644 --- a/transformers/tests/configuration_common_test.py +++ b/transformers/tests/configuration_common_test.py @@ -12,15 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import os import json +import os import tempfile - import unittest + from .tokenization_tests_commons import TemporaryDirectory diff --git a/transformers/tests/hf_api_test.py b/transformers/tests/hf_api_test.py index 71963df107..0c86fab97b 100644 --- a/transformers/tests/hf_api_test.py +++ b/transformers/tests/hf_api_test.py @@ -23,6 +23,7 @@ import six from transformers.hf_api import HfApi, HfFolder, HTTPError, PresignedUrl, S3Obj + USER = "__DUMMY_TRANSFORMERS_USER__" PASS = "__DUMMY_TRANSFORMERS_PASS__" FILES = [ diff --git a/transformers/tests/model_card_test.py b/transformers/tests/model_card_test.py index 30fe33a904..7a6f0721a7 100644 --- a/transformers/tests/model_card_test.py +++ b/transformers/tests/model_card_test.py @@ -14,11 +14,12 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals -import os import json +import os import unittest from transformers.modelcard import ModelCard + from .tokenization_tests_commons import TemporaryDirectory diff --git a/transformers/tests/modeling_albert_test.py b/transformers/tests/modeling_albert_test.py index f798af95bc..b2a0abe1f1 100644 --- a/transformers/tests/modeling_albert_test.py +++ b/transformers/tests/modeling_albert_test.py @@ -12,18 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available -from .modeling_common_test import CommonTestCases, ids_tensor from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor from .utils import CACHE_DIR, require_torch, slow, torch_device + if is_torch_available(): from transformers import ( AlbertConfig, diff --git a/transformers/tests/modeling_auto_test.py b/transformers/tests/modeling_auto_test.py index 3bdaa8a378..b2bb54ffaa 100644 --- a/transformers/tests/modeling_auto_test.py +++ b/transformers/tests/modeling_auto_test.py @@ -12,17 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest -import shutil import logging +import shutil +import unittest from transformers import is_torch_available -from .utils import require_torch, slow, SMALL_MODEL_IDENTIFIER +from .utils import SMALL_MODEL_IDENTIFIER, require_torch, slow + if is_torch_available(): from transformers import ( diff --git a/transformers/tests/modeling_bert_test.py b/transformers/tests/modeling_bert_test.py index 6711aded69..f7325eff9e 100644 --- a/transformers/tests/modeling_bert_test.py +++ b/transformers/tests/modeling_bert_test.py @@ -12,18 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available -from .modeling_common_test import CommonTestCases, ids_tensor, floats_tensor from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, floats_tensor, ids_tensor from .utils import CACHE_DIR, require_torch, slow, torch_device + if is_torch_available(): from transformers import ( BertConfig, diff --git a/transformers/tests/modeling_common_test.py b/transformers/tests/modeling_common_test.py index 6834c78d15..42de8c9aef 100644 --- a/transformers/tests/modeling_common_test.py +++ b/transformers/tests/modeling_common_test.py @@ -12,26 +12,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import copy -import sys -import os.path -import shutil -import tempfile import json -import random -import uuid - -import unittest import logging +import os.path +import random +import shutil +import sys +import tempfile +import unittest +import uuid from transformers import is_torch_available from .utils import CACHE_DIR, require_torch, slow, torch_device + if is_torch_available(): import torch import numpy as np diff --git a/transformers/tests/modeling_ctrl_test.py b/transformers/tests/modeling_ctrl_test.py index 9b71b1dd50..cdcd691043 100644 --- a/transformers/tests/modeling_ctrl_test.py +++ b/transformers/tests/modeling_ctrl_test.py @@ -11,22 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import pdb +import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class CTRLModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_distilbert_test.py b/transformers/tests/modeling_distilbert_test.py index 5b4f4683de..eee84af1cb 100644 --- a/transformers/tests/modeling_distilbert_test.py +++ b/transformers/tests/modeling_distilbert_test.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): from transformers import ( DistilBertConfig, @@ -30,10 +33,6 @@ if is_torch_available(): DistilBertForSequenceClassification, ) -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class DistilBertModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_encoder_decoder_test.py b/transformers/tests/modeling_encoder_decoder_test.py index 491c502bac..b9cef6667a 100644 --- a/transformers/tests/modeling_encoder_decoder_test.py +++ b/transformers/tests/modeling_encoder_decoder_test.py @@ -17,8 +17,10 @@ import logging import unittest from transformers import is_torch_available + from .utils import require_torch, slow + if is_torch_available(): from transformers import BertModel, BertForMaskedLM, Model2Model from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/transformers/tests/modeling_gpt2_test.py b/transformers/tests/modeling_gpt2_test.py index 2706166b33..82ace85291 100644 --- a/transformers/tests/modeling_gpt2_test.py +++ b/transformers/tests/modeling_gpt2_test.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): from transformers import ( GPT2Config, @@ -29,10 +32,6 @@ if is_torch_available(): GPT2DoubleHeadsModel, ) -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class GPT2ModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_openai_test.py b/transformers/tests/modeling_openai_test.py index f22a0b760c..21ea556ac4 100644 --- a/transformers/tests/modeling_openai_test.py +++ b/transformers/tests/modeling_openai_test.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): from transformers import ( OpenAIGPTConfig, @@ -29,10 +32,6 @@ if is_torch_available(): OpenAIGPTDoubleHeadsModel, ) -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_roberta_test.py b/transformers/tests/modeling_roberta_test.py index 451dafe08e..e6909deae2 100644 --- a/transformers/tests/modeling_roberta_test.py +++ b/transformers/tests/modeling_roberta_test.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): import torch from transformers import ( @@ -32,10 +35,6 @@ if is_torch_available(): from transformers.modeling_roberta import RobertaEmbeddings from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class RobertaModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_t5_test.py b/transformers/tests/modeling_t5_test.py index 3feb61a622..2bf3bdae17 100644 --- a/transformers/tests/modeling_t5_test.py +++ b/transformers/tests/modeling_t5_test.py @@ -12,18 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available -from .modeling_common_test import CommonTestCases, ids_tensor, floats_tensor from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, floats_tensor, ids_tensor from .utils import CACHE_DIR, require_torch, slow, torch_device + if is_torch_available(): from transformers import T5Config, T5Model, T5WithLMHeadModel from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/transformers/tests/modeling_tf_albert_test.py b/transformers/tests/modeling_tf_albert_test.py index 0406592d54..344e999a08 100644 --- a/transformers/tests/modeling_tf_albert_test.py +++ b/transformers/tests/modeling_tf_albert_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import AlbertConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_albert import ( diff --git a/transformers/tests/modeling_tf_auto_test.py b/transformers/tests/modeling_tf_auto_test.py index d695474ecf..54581505eb 100644 --- a/transformers/tests/modeling_tf_auto_test.py +++ b/transformers/tests/modeling_tf_auto_test.py @@ -12,17 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest -import shutil import logging +import shutil +import unittest from transformers import is_tf_available -from .utils import require_tf, slow, SMALL_MODEL_IDENTIFIER +from .utils import SMALL_MODEL_IDENTIFIER, require_tf, slow + if is_tf_available(): from transformers import ( diff --git a/transformers/tests/modeling_tf_bert_test.py b/transformers/tests/modeling_tf_bert_test.py index e36e3a2c3f..735de447e2 100644 --- a/transformers/tests/modeling_tf_bert_test.py +++ b/transformers/tests/modeling_tf_bert_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import BertConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_bert import ( diff --git a/transformers/tests/modeling_tf_common_test.py b/transformers/tests/modeling_tf_common_test.py index d65e270ae1..6f2d62cc94 100644 --- a/transformers/tests/modeling_tf_common_test.py +++ b/transformers/tests/modeling_tf_common_test.py @@ -14,23 +14,23 @@ # limitations under the License. from __future__ import absolute_import, division, print_function -import os import copy +import importlib import json import logging -import importlib +import os import random import shutil +import sys +import tempfile import unittest import uuid -import tempfile - -import sys from transformers import is_tf_available, is_torch_available from .utils import require_tf, slow + if is_tf_available(): import tensorflow as tf import numpy as np diff --git a/transformers/tests/modeling_tf_ctrl_test.py b/transformers/tests/modeling_tf_ctrl_test.py index fb8c4c2551..895579eab5 100644 --- a/transformers/tests/modeling_tf_ctrl_test.py +++ b/transformers/tests/modeling_tf_ctrl_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import CTRLConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/transformers/tests/modeling_tf_distilbert_test.py b/transformers/tests/modeling_tf_distilbert_test.py index 3260f63d56..ebb17e2980 100644 --- a/transformers/tests/modeling_tf_distilbert_test.py +++ b/transformers/tests/modeling_tf_distilbert_test.py @@ -12,17 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from transformers import DistilBertConfig, is_tf_available + from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor from .utils import CACHE_DIR, require_tf, slow -from transformers import DistilBertConfig, is_tf_available if is_tf_available(): import tensorflow as tf diff --git a/transformers/tests/modeling_tf_gpt2_test.py b/transformers/tests/modeling_tf_gpt2_test.py index 09b7eb0710..49bb10c43e 100644 --- a/transformers/tests/modeling_tf_gpt2_test.py +++ b/transformers/tests/modeling_tf_gpt2_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import GPT2Config, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_gpt2 import ( diff --git a/transformers/tests/modeling_tf_openai_gpt_test.py b/transformers/tests/modeling_tf_openai_gpt_test.py index a59395e02b..0198527f5f 100644 --- a/transformers/tests/modeling_tf_openai_gpt_test.py +++ b/transformers/tests/modeling_tf_openai_gpt_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import OpenAIGPTConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_openai import ( diff --git a/transformers/tests/modeling_tf_roberta_test.py b/transformers/tests/modeling_tf_roberta_test.py index 23ea557404..3b9f1961b8 100644 --- a/transformers/tests/modeling_tf_roberta_test.py +++ b/transformers/tests/modeling_tf_roberta_test.py @@ -12,17 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from transformers import RobertaConfig, is_tf_available + from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor from .utils import CACHE_DIR, require_tf, slow -from transformers import RobertaConfig, is_tf_available if is_tf_available(): import tensorflow as tf diff --git a/transformers/tests/modeling_tf_t5_test.py b/transformers/tests/modeling_tf_t5_test.py index 521085219b..2108b9007a 100644 --- a/transformers/tests/modeling_tf_t5_test.py +++ b/transformers/tests/modeling_tf_t5_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import sys - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import T5Config, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_t5 import TFT5Model, TFT5WithLMHeadModel, TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/transformers/tests/modeling_tf_transfo_xl_test.py b/transformers/tests/modeling_tf_transfo_xl_test.py index 20de598d05..2b17668a94 100644 --- a/transformers/tests/modeling_tf_transfo_xl_test.py +++ b/transformers/tests/modeling_tf_transfo_xl_test.py @@ -12,19 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import random - -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow +import unittest from transformers import TransfoXLConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_transfo_xl import ( diff --git a/transformers/tests/modeling_tf_xlm_test.py b/transformers/tests/modeling_tf_xlm_test.py index 9162bf2b38..0850cecb07 100644 --- a/transformers/tests/modeling_tf_xlm_test.py +++ b/transformers/tests/modeling_tf_xlm_test.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf from transformers import ( @@ -31,10 +34,6 @@ if is_tf_available(): TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, ) -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow - @require_tf class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): diff --git a/transformers/tests/modeling_tf_xlnet_test.py b/transformers/tests/modeling_tf_xlnet_test.py index 9a56384a0c..67fc1a5ced 100644 --- a/transformers/tests/modeling_tf_xlnet_test.py +++ b/transformers/tests/modeling_tf_xlnet_test.py @@ -12,17 +12,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import os -import unittest import json +import os import random +import unittest from transformers import XLNetConfig, is_tf_available +from .configuration_common_test import ConfigTester +from .modeling_tf_common_test import TFCommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_tf, slow + + if is_tf_available(): import tensorflow as tf @@ -35,10 +38,6 @@ if is_tf_available(): TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, ) -from .modeling_tf_common_test import TFCommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_tf, slow - @require_tf class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): diff --git a/transformers/tests/modeling_transfo_xl_test.py b/transformers/tests/modeling_transfo_xl_test.py index f04205d4e0..4289483a89 100644 --- a/transformers/tests/modeling_transfo_xl_test.py +++ b/transformers/tests/modeling_transfo_xl_test.py @@ -12,24 +12,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import random +import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): import torch from transformers import TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class TransfoXLModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_xlm_test.py b/transformers/tests/modeling_xlm_test.py index 843693fd03..a0cc8e69f0 100644 --- a/transformers/tests/modeling_xlm_test.py +++ b/transformers/tests/modeling_xlm_test.py @@ -12,14 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): from transformers import ( XLMConfig, @@ -31,10 +34,6 @@ if is_torch_available(): ) from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class XLMModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/modeling_xlnet_test.py b/transformers/tests/modeling_xlnet_test.py index 487756a5c8..ac0e542ccb 100644 --- a/transformers/tests/modeling_xlnet_test.py +++ b/transformers/tests/modeling_xlnet_test.py @@ -12,17 +12,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import os -import unittest import json +import os import random +import unittest from transformers import is_torch_available +from .configuration_common_test import ConfigTester +from .modeling_common_test import CommonTestCases, ids_tensor +from .utils import CACHE_DIR, require_torch, slow, torch_device + + if is_torch_available(): import torch @@ -36,10 +39,6 @@ if is_torch_available(): ) from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP -from .modeling_common_test import CommonTestCases, ids_tensor -from .configuration_common_test import ConfigTester -from .utils import CACHE_DIR, require_torch, slow, torch_device - @require_torch class XLNetModelTest(CommonTestCases.CommonModelTester): diff --git a/transformers/tests/optimization_test.py b/transformers/tests/optimization_test.py index 0addcde1d8..c0cef1e387 100644 --- a/transformers/tests/optimization_test.py +++ b/transformers/tests/optimization_test.py @@ -12,15 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest import os +import unittest from transformers import is_torch_available +from .tokenization_tests_commons import TemporaryDirectory +from .utils import require_torch + + if is_torch_available(): import torch @@ -33,9 +35,6 @@ if is_torch_available(): get_linear_schedule_with_warmup, ) -from .tokenization_tests_commons import TemporaryDirectory -from .utils import require_torch - def unwrap_schedule(scheduler, num_steps=10): lrs = [] diff --git a/transformers/tests/optimization_tf_test.py b/transformers/tests/optimization_tf_test.py index e88ee971e4..4058aaf835 100644 --- a/transformers/tests/optimization_tf_test.py +++ b/transformers/tests/optimization_tf_test.py @@ -1,6 +1,4 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest @@ -8,6 +6,7 @@ from transformers import is_tf_available from .utils import require_tf + if is_tf_available(): import tensorflow as tf from tensorflow.python.eager import context diff --git a/transformers/tests/pipelines_test.py b/transformers/tests/pipelines_test.py index 3c258594d1..2dfbdaaa05 100644 --- a/transformers/tests/pipelines_test.py +++ b/transformers/tests/pipelines_test.py @@ -1,10 +1,10 @@ import unittest - from typing import Iterable from transformers import pipeline from transformers.tests.utils import require_tf, require_torch + QA_FINETUNED_MODELS = { ("bert-base-uncased", "bert-large-uncased-whole-word-masking-finetuned-squad", None), ("bert-base-cased", "bert-large-cased-whole-word-masking-finetuned-squad", None), diff --git a/transformers/tests/tokenization_albert_test.py b/transformers/tests/tokenization_albert_test.py index 7d7e793b54..867dd5591c 100644 --- a/transformers/tests/tokenization_albert_test.py +++ b/transformers/tests/tokenization_albert_test.py @@ -17,10 +17,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest -from transformers.tokenization_albert import AlbertTokenizer, SPIECE_UNDERLINE +from transformers.tokenization_albert import SPIECE_UNDERLINE, AlbertTokenizer from .tokenization_tests_commons import CommonTestCases + SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/spiece.model") diff --git a/transformers/tests/tokenization_auto_test.py b/transformers/tests/tokenization_auto_test.py index 7d77bf5b23..4ff2fa791d 100644 --- a/transformers/tests/tokenization_auto_test.py +++ b/transformers/tests/tokenization_auto_test.py @@ -12,18 +12,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import unittest -import shutil import logging +import shutil +import unittest -from transformers import AutoTokenizer, BertTokenizer, AutoTokenizer, GPT2Tokenizer -from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP +from transformers import ( + BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, + GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, + AutoTokenizer, + BertTokenizer, + GPT2Tokenizer, +) -from .utils import slow, SMALL_MODEL_IDENTIFIER +from .utils import SMALL_MODEL_IDENTIFIER, slow class AutoTokenizerTest(unittest.TestCase): diff --git a/transformers/tests/tokenization_bert_japanese_test.py b/transformers/tests/tokenization_bert_japanese_test.py index 02eb8c0a66..84119c0813 100644 --- a/transformers/tests/tokenization_bert_japanese_test.py +++ b/transformers/tests/tokenization_bert_japanese_test.py @@ -20,14 +20,14 @@ from io import open from transformers.tokenization_bert import WordpieceTokenizer from transformers.tokenization_bert_japanese import ( - BertJapaneseTokenizer, - MecabTokenizer, - CharacterTokenizer, VOCAB_FILES_NAMES, + BertJapaneseTokenizer, + CharacterTokenizer, + MecabTokenizer, ) from .tokenization_tests_commons import CommonTestCases -from .utils import slow, custom_tokenizers +from .utils import custom_tokenizers, slow @custom_tokenizers diff --git a/transformers/tests/tokenization_bert_test.py b/transformers/tests/tokenization_bert_test.py index bf023761a6..9c8c18fe44 100644 --- a/transformers/tests/tokenization_bert_test.py +++ b/transformers/tests/tokenization_bert_test.py @@ -19,13 +19,13 @@ import unittest from io import open from transformers.tokenization_bert import ( + VOCAB_FILES_NAMES, BasicTokenizer, BertTokenizer, WordpieceTokenizer, _is_control, _is_punctuation, _is_whitespace, - VOCAB_FILES_NAMES, ) from .tokenization_tests_commons import CommonTestCases diff --git a/transformers/tests/tokenization_ctrl_test.py b/transformers/tests/tokenization_ctrl_test.py index 04c9dec523..eb3fbb9da4 100644 --- a/transformers/tests/tokenization_ctrl_test.py +++ b/transformers/tests/tokenization_ctrl_test.py @@ -13,12 +13,12 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals +import json import os import unittest -import json from io import open -from transformers.tokenization_ctrl import CTRLTokenizer, VOCAB_FILES_NAMES +from transformers.tokenization_ctrl import VOCAB_FILES_NAMES, CTRLTokenizer from .tokenization_tests_commons import CommonTestCases diff --git a/transformers/tests/tokenization_distilbert_test.py b/transformers/tests/tokenization_distilbert_test.py index 551f9e188e..b7760e0eb4 100644 --- a/transformers/tests/tokenization_distilbert_test.py +++ b/transformers/tests/tokenization_distilbert_test.py @@ -20,8 +20,8 @@ from io import open from transformers.tokenization_distilbert import DistilBertTokenizer -from .tokenization_tests_commons import CommonTestCases from .tokenization_bert_test import BertTokenizationTest +from .tokenization_tests_commons import CommonTestCases from .utils import slow diff --git a/transformers/tests/tokenization_gpt2_test.py b/transformers/tests/tokenization_gpt2_test.py index 552b73416e..9246e5ce17 100644 --- a/transformers/tests/tokenization_gpt2_test.py +++ b/transformers/tests/tokenization_gpt2_test.py @@ -14,12 +14,12 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals +import json import os import unittest -import json from io import open -from transformers.tokenization_gpt2 import GPT2Tokenizer, VOCAB_FILES_NAMES +from transformers.tokenization_gpt2 import VOCAB_FILES_NAMES, GPT2Tokenizer from .tokenization_tests_commons import CommonTestCases diff --git a/transformers/tests/tokenization_openai_test.py b/transformers/tests/tokenization_openai_test.py index c6a802b7be..fe4ed77c13 100644 --- a/transformers/tests/tokenization_openai_test.py +++ b/transformers/tests/tokenization_openai_test.py @@ -14,11 +14,11 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals +import json import os import unittest -import json -from transformers.tokenization_openai import OpenAIGPTTokenizer, VOCAB_FILES_NAMES +from transformers.tokenization_openai import VOCAB_FILES_NAMES, OpenAIGPTTokenizer from .tokenization_tests_commons import CommonTestCases diff --git a/transformers/tests/tokenization_roberta_test.py b/transformers/tests/tokenization_roberta_test.py index a1d9d5fb72..92a1a6d5d5 100644 --- a/transformers/tests/tokenization_roberta_test.py +++ b/transformers/tests/tokenization_roberta_test.py @@ -14,12 +14,13 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals -import os import json +import os import unittest from io import open -from transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES +from transformers.tokenization_roberta import VOCAB_FILES_NAMES, RobertaTokenizer + from .tokenization_tests_commons import CommonTestCases from .utils import slow diff --git a/transformers/tests/tokenization_t5_test.py b/transformers/tests/tokenization_t5_test.py index 09bc0267f1..69f209f290 100644 --- a/transformers/tests/tokenization_t5_test.py +++ b/transformers/tests/tokenization_t5_test.py @@ -22,6 +22,7 @@ from transformers.tokenization_xlnet import SPIECE_UNDERLINE from .tokenization_tests_commons import CommonTestCases + SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model") diff --git a/transformers/tests/tokenization_tests_commons.py b/transformers/tests/tokenization_tests_commons.py index ba81101084..79b4bf7810 100644 --- a/transformers/tests/tokenization_tests_commons.py +++ b/transformers/tests/tokenization_tests_commons.py @@ -15,11 +15,12 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os -import sys -from io import open -import tempfile import shutil +import sys +import tempfile import unittest +from io import open + if sys.version_info[0] == 2: import cPickle as pickle diff --git a/transformers/tests/tokenization_transfo_xl_test.py b/transformers/tests/tokenization_transfo_xl_test.py index 8b737283da..cb9d3d4de7 100644 --- a/transformers/tests/tokenization_transfo_xl_test.py +++ b/transformers/tests/tokenization_transfo_xl_test.py @@ -20,13 +20,14 @@ from io import open from transformers import is_torch_available +from .tokenization_tests_commons import CommonTestCases +from .utils import require_torch + + if is_torch_available(): import torch from transformers.tokenization_transfo_xl import TransfoXLTokenizer, VOCAB_FILES_NAMES -from .tokenization_tests_commons import CommonTestCases -from .utils import require_torch - @require_torch class TransfoXLTokenizationTest(CommonTestCases.CommonTokenizerTester): diff --git a/transformers/tests/tokenization_utils_test.py b/transformers/tests/tokenization_utils_test.py index 4fa92c44bf..76681b1af3 100644 --- a/transformers/tests/tokenization_utils_test.py +++ b/transformers/tests/tokenization_utils_test.py @@ -12,11 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import unittest + import six from transformers import PreTrainedTokenizer diff --git a/transformers/tests/tokenization_xlm_test.py b/transformers/tests/tokenization_xlm_test.py index e9aa2b7d0e..3ce5015353 100644 --- a/transformers/tests/tokenization_xlm_test.py +++ b/transformers/tests/tokenization_xlm_test.py @@ -14,11 +14,11 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals +import json import os import unittest -import json -from transformers.tokenization_xlm import XLMTokenizer, VOCAB_FILES_NAMES +from transformers.tokenization_xlm import VOCAB_FILES_NAMES, XLMTokenizer from .tokenization_tests_commons import CommonTestCases from .utils import slow diff --git a/transformers/tests/tokenization_xlnet_test.py b/transformers/tests/tokenization_xlnet_test.py index 32482449a4..2c55a337ba 100644 --- a/transformers/tests/tokenization_xlnet_test.py +++ b/transformers/tests/tokenization_xlnet_test.py @@ -17,11 +17,12 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest -from transformers.tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE +from transformers.tokenization_xlnet import SPIECE_UNDERLINE, XLNetTokenizer from .tokenization_tests_commons import CommonTestCases from .utils import slow + SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model") diff --git a/transformers/tests/utils.py b/transformers/tests/utils.py index aab5e5a8a5..66ff53d6ee 100644 --- a/transformers/tests/utils.py +++ b/transformers/tests/utils.py @@ -1,7 +1,6 @@ import os -import unittest import tempfile - +import unittest from distutils.util import strtobool from transformers.file_utils import _tf_available, _torch_available diff --git a/transformers/tokenization_albert.py b/transformers/tokenization_albert.py index b03b3ca119..276a33cbf2 100644 --- a/transformers/tokenization_albert.py +++ b/transformers/tokenization_albert.py @@ -15,13 +15,16 @@ """ Tokenization classes for ALBERT model.""" from __future__ import absolute_import, division, print_function, unicode_literals -from .tokenization_utils import PreTrainedTokenizer import logging -import unicodedata -import six import os +import unicodedata from shutil import copyfile +import six + +from .tokenization_utils import PreTrainedTokenizer + + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} diff --git a/transformers/tokenization_auto.py b/transformers/tokenization_auto.py index 5d36fdcbaf..7077ec134c 100644 --- a/transformers/tokenization_auto.py +++ b/transformers/tokenization_auto.py @@ -18,20 +18,21 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging +from .tokenization_albert import AlbertTokenizer from .tokenization_bert import BertTokenizer from .tokenization_bert_japanese import BertJapaneseTokenizer -from .tokenization_openai import OpenAIGPTTokenizer -from .tokenization_gpt2 import GPT2Tokenizer -from .tokenization_ctrl import CTRLTokenizer -from .tokenization_transfo_xl import TransfoXLTokenizer -from .tokenization_xlnet import XLNetTokenizer -from .tokenization_xlm import XLMTokenizer -from .tokenization_roberta import RobertaTokenizer -from .tokenization_distilbert import DistilBertTokenizer from .tokenization_camembert import CamembertTokenizer -from .tokenization_albert import AlbertTokenizer +from .tokenization_ctrl import CTRLTokenizer +from .tokenization_distilbert import DistilBertTokenizer +from .tokenization_gpt2 import GPT2Tokenizer +from .tokenization_openai import OpenAIGPTTokenizer +from .tokenization_roberta import RobertaTokenizer from .tokenization_t5 import T5Tokenizer +from .tokenization_transfo_xl import TransfoXLTokenizer +from .tokenization_xlm import XLMTokenizer from .tokenization_xlm_roberta import XLMRobertaTokenizer +from .tokenization_xlnet import XLNetTokenizer + logger = logging.getLogger(__name__) diff --git a/transformers/tokenization_bert.py b/transformers/tokenization_bert.py index 7b3705cc19..fc1c918df1 100644 --- a/transformers/tokenization_bert.py +++ b/transformers/tokenization_bert.py @@ -24,6 +24,7 @@ from io import open from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} diff --git a/transformers/tokenization_bert_japanese.py b/transformers/tokenization_bert_japanese.py index 48b9b04b4c..80c4990518 100644 --- a/transformers/tokenization_bert_japanese.py +++ b/transformers/tokenization_bert_japanese.py @@ -19,13 +19,15 @@ from __future__ import absolute_import, division, print_function, unicode_litera import collections import logging import os -import six import unicodedata from io import open -from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer, load_vocab +import six + +from .tokenization_bert import BasicTokenizer, BertTokenizer, WordpieceTokenizer, load_vocab from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} diff --git a/transformers/tokenization_camembert.py b/transformers/tokenization_camembert.py index c1e80e0e05..c5ae705f51 100644 --- a/transformers/tokenization_camembert.py +++ b/transformers/tokenization_camembert.py @@ -20,9 +20,12 @@ import os from shutil import copyfile import sentencepiece as spm + from transformers.tokenization_utils import PreTrainedTokenizer + from .tokenization_xlnet import SPIECE_UNDERLINE + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} diff --git a/transformers/tokenization_ctrl.py b/transformers/tokenization_ctrl.py index 2ce2bbf094..5b401f91fd 100644 --- a/transformers/tokenization_ctrl.py +++ b/transformers/tokenization_ctrl.py @@ -18,11 +18,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera import json import logging import os -import regex as re from io import open +import regex as re + from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = { diff --git a/transformers/tokenization_distilbert.py b/transformers/tokenization_distilbert.py index 7fed1e4058..bda5c6661c 100644 --- a/transformers/tokenization_distilbert.py +++ b/transformers/tokenization_distilbert.py @@ -24,6 +24,7 @@ from io import open from .tokenization_bert import BertTokenizer + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} diff --git a/transformers/tokenization_gpt2.py b/transformers/tokenization_gpt2.py index b6a0e7b78b..06da888506 100644 --- a/transformers/tokenization_gpt2.py +++ b/transformers/tokenization_gpt2.py @@ -15,13 +15,17 @@ """Tokenization classes for OpenAI GPT.""" from __future__ import absolute_import, division, print_function, unicode_literals -import sys import json import logging import os -import regex as re +import sys from io import open +import regex as re + +from .tokenization_utils import PreTrainedTokenizer + + try: from functools import lru_cache except ImportError: @@ -31,8 +35,6 @@ except ImportError: return lambda func: func -from .tokenization_utils import PreTrainedTokenizer - logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = { diff --git a/transformers/tokenization_openai.py b/transformers/tokenization_openai.py index d8f7549eda..4ea182c672 100644 --- a/transformers/tokenization_openai.py +++ b/transformers/tokenization_openai.py @@ -21,8 +21,9 @@ import os import re from io import open -from .tokenization_utils import PreTrainedTokenizer from .tokenization_bert import BasicTokenizer +from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) diff --git a/transformers/tokenization_roberta.py b/transformers/tokenization_roberta.py index eae8b638fe..95472f5b30 100644 --- a/transformers/tokenization_roberta.py +++ b/transformers/tokenization_roberta.py @@ -15,15 +15,17 @@ """Tokenization classes for RoBERTa.""" from __future__ import absolute_import, division, print_function, unicode_literals -import sys import json import logging import os -import regex as re +import sys from io import open +import regex as re + from .tokenization_gpt2 import GPT2Tokenizer + try: from functools import lru_cache except ImportError: diff --git a/transformers/tokenization_t5.py b/transformers/tokenization_t5.py index 3b70d40857..8eb589cd1c 100644 --- a/transformers/tokenization_t5.py +++ b/transformers/tokenization_t5.py @@ -19,11 +19,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging import os import re -import six from shutil import copyfile +import six + from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) SPIECE_UNDERLINE = "▁" diff --git a/transformers/tokenization_transfo_xl.py b/transformers/tokenization_transfo_xl.py index b2f59625f9..ce058580ba 100644 --- a/transformers/tokenization_transfo_xl.py +++ b/transformers/tokenization_transfo_xl.py @@ -30,6 +30,7 @@ import numpy as np from .file_utils import cached_path from .tokenization_utils import PreTrainedTokenizer + try: import torch except ImportError: diff --git a/transformers/tokenization_utils.py b/transformers/tokenization_utils.py index f848785ee2..6cc1bedd8a 100644 --- a/transformers/tokenization_utils.py +++ b/transformers/tokenization_utils.py @@ -15,16 +15,18 @@ """Tokenization classes for OpenAI GPT.""" from __future__ import absolute_import, division, print_function, unicode_literals -import logging -import os -import json -import six import copy import itertools +import json +import logging +import os import re from io import open -from .file_utils import cached_path, is_remote_url, hf_bucket_url, is_tf_available, is_torch_available +import six + +from .file_utils import cached_path, hf_bucket_url, is_remote_url, is_tf_available, is_torch_available + if is_tf_available(): import tensorflow as tf diff --git a/transformers/tokenization_xlm.py b/transformers/tokenization_xlm.py index 9b96b92f23..7ef53cf80a 100644 --- a/transformers/tokenization_xlm.py +++ b/transformers/tokenization_xlm.py @@ -25,8 +25,9 @@ from io import open import sacremoses as sm -from .tokenization_utils import PreTrainedTokenizer from .tokenization_bert import BasicTokenizer +from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) diff --git a/transformers/tokenization_xlm_roberta.py b/transformers/tokenization_xlm_roberta.py index 30814c3a1d..de71f87d02 100644 --- a/transformers/tokenization_xlm_roberta.py +++ b/transformers/tokenization_xlm_roberta.py @@ -20,9 +20,12 @@ import os from shutil import copyfile import sentencepiece as spm + from transformers.tokenization_utils import PreTrainedTokenizer + from .tokenization_xlnet import SPIECE_UNDERLINE + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} diff --git a/transformers/tokenization_xlnet.py b/transformers/tokenization_xlnet.py index 8ea0ccb77b..6c016728e1 100644 --- a/transformers/tokenization_xlnet.py +++ b/transformers/tokenization_xlnet.py @@ -17,13 +17,14 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging import os +import unicodedata from shutil import copyfile -import unicodedata import six from .tokenization_utils import PreTrainedTokenizer + logger = logging.getLogger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} diff --git a/utils/download_glue_data.py b/utils/download_glue_data.py index 7262dd7201..99eac94215 100644 --- a/utils/download_glue_data.py +++ b/utils/download_glue_data.py @@ -18,14 +18,15 @@ rm MSRParaphraseCorpus.msi 2/11/19: It looks like SentEval actually *is* hosting the extracted data. Hooray! """ -import os -import sys -import shutil import argparse +import os +import shutil +import sys import tempfile import urllib.request import zipfile + TASKS = ["CoLA", "SST", "MRPC", "QQP", "STS", "MNLI", "SNLI", "QNLI", "RTE", "WNLI", "diagnostic"] TASK2PATH = { "CoLA": "https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FCoLA.zip?alt=media&token=46d5e637-3411-4188-bc44-5809b5bfb5f4",