Reorganize repo (#8580)

* Put models in subfolders

* Styling

* Fix imports in tests

* More fixes in test imports

* Sneaky hidden imports

* Fix imports in doc files

* More sneaky imports

* Finish fixing tests

* Fix examples

* Fix path for copies

* More fixes for examples

* Fix dummy files

* More fixes for example

* More model import fixes

* Is this why you're unhappy GitHub?

* Fix imports in conver command
This commit is contained in:
Sylvain Gugger
2020-11-16 21:43:42 -05:00
committed by GitHub
parent 901507335f
commit c89bdfbe72
381 changed files with 2651 additions and 1571 deletions

View File

@@ -16,9 +16,9 @@
import os
import unittest
from transformers.configuration_auto import CONFIG_MAPPING, AutoConfig
from transformers.configuration_bert import BertConfig
from transformers.configuration_roberta import RobertaConfig
from transformers.models.auto.configuration_auto import CONFIG_MAPPING, AutoConfig
from transformers.models.bert.configuration_bert import BertConfig
from transformers.models.roberta.configuration_roberta import RobertaConfig
from transformers.testing_utils import DUMMY_UNKWOWN_IDENTIFIER

View File

@@ -6,9 +6,9 @@ from transformers.testing_utils import require_flax, slow
if is_flax_available():
import jax
from transformers.modeling_flax_auto import FlaxAutoModel
from transformers.modeling_flax_bert import FlaxBertModel
from transformers.modeling_flax_roberta import FlaxRobertaModel
from transformers.models.auto.modeling_flax_auto import FlaxAutoModel
from transformers.models.bert.modeling_flax_bert import FlaxBertModel
from transformers.models.roberta.modeling_flax_roberta import FlaxRobertaModel
@require_flax

View File

@@ -1,7 +1,7 @@
import os
import unittest
import transformers.tokenization_bart
import transformers.models.bart.tokenization_bart
from transformers import logging
from transformers.testing_utils import CaptureLogger, mockenv
@@ -31,7 +31,7 @@ class HfArgumentParserTest(unittest.TestCase):
def test_integration(self):
level_origin = logging.get_verbosity()
logger = logging.get_logger("transformers.tokenization_bart")
logger = logging.get_logger("transformers.models.bart.tokenization_bart")
msg = "Testing 1, 2, 3"
# should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`)
@@ -62,7 +62,7 @@ class HfArgumentParserTest(unittest.TestCase):
# reset for the env var to take effect, next time some logger call is made
transformers.utils.logging._reset_library_root_logger()
# this action activates the env var
_ = logging.get_logger("transformers.tokenization_bart")
_ = logging.get_logger("transformers.models.bart.tokenization_bart")
env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
env_level = logging.log_levels[env_level_str]
@@ -85,7 +85,7 @@ class HfArgumentParserTest(unittest.TestCase):
logger = logging.logging.getLogger()
with CaptureLogger(logger) as cl:
# this action activates the env var
logging.get_logger("transformers.tokenization_bart")
logging.get_logger("transformers.models.bart.tokenization_bart")
self.assertIn("Unknown option TRANSFORMERS_VERBOSITY=super-error", cl.out)
# no need to restore as nothing was changed

View File

@@ -37,7 +37,7 @@ if is_torch_available():
AlbertForTokenClassification,
AlbertModel,
)
from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.albert.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
class AlbertModelTester:

View File

@@ -45,7 +45,7 @@ if is_torch_available():
T5Config,
T5ForConditionalGeneration,
)
from transformers.modeling_auto import (
from transformers.models.auto.modeling_auto import (
MODEL_FOR_CAUSAL_LM_MAPPING,
MODEL_FOR_MASKED_LM_MAPPING,
MODEL_FOR_PRETRAINING_MAPPING,
@@ -56,9 +56,9 @@ if is_torch_available():
MODEL_MAPPING,
MODEL_WITH_LM_HEAD_MAPPING,
)
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.bert.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.gpt2.modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.t5.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch

View File

@@ -48,7 +48,7 @@ if is_torch_available():
PegasusConfig,
pipeline,
)
from transformers.modeling_bart import (
from transformers.models.bart.modeling_bart import (
SinusoidalPositionalEmbedding,
_prepare_bart_decoder_inputs,
invert_mask,

View File

@@ -40,7 +40,7 @@ if is_torch_available():
BertLMHeadModel,
BertModel,
)
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.bert.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_LIST
class BertModelTester:

View File

@@ -34,7 +34,7 @@ if is_torch_available():
DebertaForSequenceClassification,
DebertaModel,
)
from transformers.modeling_deberta import DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.deberta.modeling_deberta import DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch

View File

@@ -27,7 +27,7 @@ if is_torch_available():
import torch
from transformers import BertConfig, DPRConfig, DPRContextEncoder, DPRQuestionEncoder, DPRReader
from transformers.modeling_dpr import (
from transformers.models.dpr.modeling_dpr import (
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST,

View File

@@ -37,7 +37,7 @@ if is_torch_available():
ElectraForTokenClassification,
ElectraModel,
)
from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.electra.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
class ElectraModelTester:

View File

@@ -36,7 +36,7 @@ if is_torch_available():
FlaubertModel,
FlaubertWithLMHeadModel,
)
from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.flaubert.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST
class FlaubertModelTester(object):

View File

@@ -2,18 +2,17 @@ import unittest
from numpy import ndarray
from transformers import TensorType, is_flax_available, is_torch_available
from transformers import BertTokenizerFast, TensorType, is_flax_available, is_torch_available
from transformers.testing_utils import require_flax, require_torch
from transformers.tokenization_bert_fast import BertTokenizerFast
if is_flax_available():
from transformers.modeling_flax_bert import FlaxBertModel
from transformers.models.bert.modeling_flax_bert import FlaxBertModel
if is_torch_available():
import torch
from transformers.modeling_bert import BertModel
from transformers.models.bert.modeling_bert import BertModel
@require_flax

View File

@@ -2,18 +2,17 @@ import unittest
from numpy import ndarray
from transformers import TensorType, is_flax_available, is_torch_available
from transformers import RobertaTokenizerFast, TensorType, is_flax_available, is_torch_available
from transformers.testing_utils import require_flax, require_torch
from transformers.tokenization_roberta_fast import RobertaTokenizerFast
if is_flax_available():
from transformers.modeling_flax_roberta import FlaxRobertaModel
from transformers.models.roberta.modeling_flax_roberta import FlaxRobertaModel
if is_torch_available():
import torch
from transformers.modeling_roberta import RobertaModel
from transformers.models.roberta.modeling_roberta import RobertaModel
@require_flax

View File

@@ -32,7 +32,7 @@ if is_torch_available():
import torch
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTModel, FSMTTokenizer
from transformers.modeling_fsmt import (
from transformers.models.fsmt.modeling_fsmt import (
SinusoidalPositionalEmbedding,
_prepare_fsmt_decoder_inputs,
invert_mask,

View File

@@ -35,7 +35,7 @@ if is_torch_available():
LxmertForQuestionAnswering,
LxmertModel,
)
from transformers.modeling_lxmert import LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.lxmert.modeling_lxmert import LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST
class LxmertModelTester:

View File

@@ -28,12 +28,12 @@ if is_torch_available():
import torch
from transformers import AutoModelWithLMHead, MarianMTModel
from transformers.convert_marian_to_pytorch import (
from transformers.models.bart.modeling_bart import shift_tokens_right
from transformers.models.marian.convert_marian_to_pytorch import (
ORG_NAME,
convert_hf_name_to_opus_name,
convert_opus_name_to_hf_name,
)
from transformers.modeling_bart import shift_tokens_right
from transformers.pipelines import TranslationPipeline

View File

@@ -1,8 +1,8 @@
import unittest
from transformers import AutoConfig, AutoTokenizer, is_torch_available
from transformers.configuration_pegasus import task_specific_params
from transformers.file_utils import cached_property
from transformers.models.pegasus.configuration_pegasus import task_specific_params
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from transformers.utils.logging import ERROR, set_verbosity

View File

@@ -25,6 +25,9 @@ import numpy as np
from transformers import BartTokenizer, T5Tokenizer
from transformers.file_utils import cached_property, is_datasets_available, is_faiss_available, is_torch_available
from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.models.dpr.tokenization_dpr import DPRQuestionEncoderTokenizer
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
@@ -33,9 +36,6 @@ from transformers.testing_utils import (
slow,
torch_device,
)
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
from transformers.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
from .test_modeling_bart import ModelTester as BartModelTester
from .test_modeling_dpr import DPRModelTester
@@ -205,7 +205,7 @@ class RagTestMixin:
)
dataset.add_faiss_index("embeddings", string_factory="Flat", metric_type=faiss.METRIC_INNER_PRODUCT)
tokenizer = self.bart_tokenizer if config.generator.model_type == "bart" else self.t5_tokenizer
with patch("transformers.retrieval_rag.load_dataset") as mock_load_dataset:
with patch("transformers.models.rag.retrieval_rag.load_dataset") as mock_load_dataset:
mock_load_dataset.return_value = dataset
retriever = RagRetriever(
config,

View File

@@ -37,7 +37,7 @@ if is_torch_available():
RobertaForTokenClassification,
RobertaModel,
)
from transformers.modeling_roberta import (
from transformers.models.roberta.modeling_roberta import (
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
RobertaEmbeddings,
create_position_ids_from_input_ids,

View File

@@ -31,7 +31,7 @@ if is_torch_available():
import torch
from transformers import T5Config, T5ForConditionalGeneration, T5Model, T5Tokenizer
from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.t5.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST
class T5ModelTester:

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_albert import (
from transformers.models.albert.modeling_tf_albert import (
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFAlbertForMaskedLM,
TFAlbertForMultipleChoice,

View File

@@ -43,7 +43,7 @@ if is_tf_available():
TFRobertaForMaskedLM,
TFT5ForConditionalGeneration,
)
from transformers.modeling_tf_auto import (
from transformers.models.auto.modeling_tf_auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
TF_MODEL_FOR_PRETRAINING_MAPPING,
@@ -54,9 +54,9 @@ if is_tf_available():
TF_MODEL_MAPPING,
TF_MODEL_WITH_LM_HEAD_MAPPING,
)
from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.bert.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.gpt2.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.t5.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
@require_tf

View File

@@ -31,7 +31,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFBartForConditionalGeneration, TFBartModel
from transformers.modeling_tf_bart import TFSinusoidalPositionalEmbedding
from transformers.models.bart.modeling_tf_bart import TFSinusoidalPositionalEmbedding
@require_tf

View File

@@ -27,7 +27,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TF_MODEL_FOR_PRETRAINING_MAPPING
from transformers.modeling_tf_bert import (
from transformers.models.bert.modeling_tf_bert import (
TFBertForMaskedLM,
TFBertForMultipleChoice,
TFBertForNextSentencePrediction,

View File

@@ -26,7 +26,11 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_ctrl import TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, TFCTRLLMHeadModel, TFCTRLModel
from transformers.models.ctrl.modeling_tf_ctrl import (
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
TFCTRLLMHeadModel,
TFCTRLModel,
)
class TFCTRLModelTester(object):

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_distilbert import (
from transformers.models.distilbert.modeling_tf_distilbert import (
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFDistilBertForMaskedLM,
TFDistilBertForMultipleChoice,

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_electra import (
from transformers.models.electra.modeling_tf_electra import (
TFElectraForMaskedLM,
TFElectraForMultipleChoice,
TFElectraForPreTraining,

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_funnel import (
from transformers import (
TFFunnelBaseModel,
TFFunnelForMaskedLM,
TFFunnelForMultipleChoice,

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_gpt2 import (
from transformers.models.gpt2.modeling_tf_gpt2 import (
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST,
TFGPT2DoubleHeadsModel,
TFGPT2LMHeadModel,

View File

@@ -25,7 +25,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_lxmert import TFLxmertForPreTraining, TFLxmertModel
from transformers.models.lxmert.modeling_tf_lxmert import TFLxmertForPreTraining, TFLxmertModel
class TFLxmertModelTester(object):

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_mobilebert import (
from transformers import (
TFMobileBertForMaskedLM,
TFMobileBertForMultipleChoice,
TFMobileBertForNextSentencePrediction,

View File

@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_openai import (
from transformers.models.openai.modeling_tf_openai import (
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFOpenAIGPTDoubleHeadsModel,
TFOpenAIGPTLMHeadModel,

View File

@@ -43,9 +43,9 @@ if is_tf_available():
TFRobertaForMaskedLM,
TFT5ForConditionalGeneration,
)
from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.bert.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.gpt2.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.t5.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
if is_torch_available():
from transformers import (

View File

@@ -27,7 +27,7 @@ if is_tf_available():
import numpy
import tensorflow as tf
from transformers.modeling_tf_roberta import (
from transformers.models.roberta.modeling_tf_roberta import (
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
TFRobertaForMaskedLM,
TFRobertaForMultipleChoice,

View File

@@ -27,7 +27,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_xlnet import (
from transformers.models.xlnet.modeling_tf_xlnet import (
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST,
TFXLNetForMultipleChoice,
TFXLNetForQuestionAnsweringSimple,

View File

@@ -28,7 +28,7 @@ if is_torch_available():
import torch
from transformers import TransfoXLConfig, TransfoXLLMHeadModel, TransfoXLModel
from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.transfo_xl.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
class TransfoXLModelTester:

View File

@@ -37,7 +37,7 @@ if is_torch_available():
XLMModel,
XLMWithLMHeadModel,
)
from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.xlm.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST
class XLMModelTester:

View File

@@ -38,7 +38,7 @@ if is_torch_available():
XLNetLMHeadModel,
XLNetModel,
)
from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.models.xlnet.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
class XLNetModelTester:

View File

@@ -10,10 +10,14 @@ import numpy as np
from datasets import Dataset
from transformers import is_faiss_available
from transformers.configuration_bart import BartConfig
from transformers.configuration_dpr import DPRConfig
from transformers.configuration_rag import RagConfig
from transformers.retrieval_rag import CustomHFIndex, RagRetriever
from transformers.models.bart.configuration_bart import BartConfig
from transformers.models.bart.tokenization_bart import BartTokenizer
from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.models.dpr.configuration_dpr import DPRConfig
from transformers.models.dpr.tokenization_dpr import DPRQuestionEncoderTokenizer
from transformers.models.rag.configuration_rag import RagConfig
from transformers.models.rag.retrieval_rag import CustomHFIndex, RagRetriever
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
from transformers.testing_utils import (
require_datasets,
require_faiss,
@@ -21,10 +25,6 @@ from transformers.testing_utils import (
require_tokenizers,
require_torch,
)
from transformers.tokenization_bart import BartTokenizer
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
from transformers.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
if is_faiss_available():
@@ -126,7 +126,7 @@ class RagRetrieverTest(TestCase):
question_encoder=DPRConfig().to_dict(),
generator=BartConfig().to_dict(),
)
with patch("transformers.retrieval_rag.load_dataset") as mock_load_dataset:
with patch("transformers.models.rag.retrieval_rag.load_dataset") as mock_load_dataset:
mock_load_dataset.return_value = dataset
retriever = RagRetriever(
config,
@@ -213,7 +213,7 @@ class RagRetrieverTest(TestCase):
def test_canonical_hf_index_retriever_save_and_from_pretrained(self):
retriever = self.get_dummy_canonical_hf_index_retriever()
with tempfile.TemporaryDirectory() as tmp_dirname:
with patch("transformers.retrieval_rag.load_dataset") as mock_load_dataset:
with patch("transformers.models.rag.retrieval_rag.load_dataset") as mock_load_dataset:
mock_load_dataset.return_value = self.get_dummy_dataset()
retriever.save_pretrained(tmp_dirname)
retriever = RagRetriever.from_pretrained(tmp_dirname)

View File

@@ -27,8 +27,9 @@ from transformers import (
RobertaTokenizer,
RobertaTokenizerFast,
)
from transformers.configuration_auto import AutoConfig
from transformers.configuration_roberta import RobertaConfig
from transformers.models.auto.configuration_auto import AutoConfig
from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING
from transformers.models.roberta.configuration_roberta import RobertaConfig
from transformers.testing_utils import (
DUMMY_DIFF_TOKENIZER_IDENTIFIER,
DUMMY_UNKWOWN_IDENTIFIER,
@@ -36,7 +37,6 @@ from transformers.testing_utils import (
require_tokenizers,
slow,
)
from transformers.tokenization_auto import TOKENIZER_MAPPING
class AutoTokenizerTest(unittest.TestCase):

View File

@@ -4,8 +4,8 @@ import unittest
from transformers import BartTokenizer, BartTokenizerFast, BatchEncoding
from transformers.file_utils import cached_property
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers, require_torch
from transformers.tokenization_roberta import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin, filter_roberta_detectors

View File

@@ -18,8 +18,7 @@ import os
import unittest
from transformers import BertTokenizerFast
from transformers.testing_utils import require_tokenizers, slow
from transformers.tokenization_bert import (
from transformers.models.bert.tokenization_bert import (
VOCAB_FILES_NAMES,
BasicTokenizer,
BertTokenizer,
@@ -28,6 +27,7 @@ from transformers.tokenization_bert import (
_is_punctuation,
_is_whitespace,
)
from transformers.testing_utils import require_tokenizers, slow
from .test_tokenization_common import TokenizerTesterMixin, filter_non_english

View File

@@ -18,14 +18,14 @@ import os
import pickle
import unittest
from transformers.testing_utils import custom_tokenizers
from transformers.tokenization_bert_japanese import (
from transformers.models.bert_japanese.tokenization_bert_japanese import (
VOCAB_FILES_NAMES,
BertJapaneseTokenizer,
CharacterTokenizer,
MecabTokenizer,
WordpieceTokenizer,
)
from transformers.testing_utils import custom_tokenizers
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -16,7 +16,7 @@
import os
import unittest
from transformers.tokenization_bertweet import VOCAB_FILES_NAMES, BertweetTokenizer
from transformers.models.bertweet.tokenization_bertweet import VOCAB_FILES_NAMES, BertweetTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -20,7 +20,11 @@ import os
import unittest
from transformers.file_utils import cached_property
from transformers.tokenization_blenderbot import VOCAB_FILES_NAMES, BlenderbotSmallTokenizer, BlenderbotTokenizer
from transformers.models.blenderbot.tokenization_blenderbot import (
VOCAB_FILES_NAMES,
BlenderbotSmallTokenizer,
BlenderbotTokenizer,
)
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -17,7 +17,7 @@ import json
import os
import unittest
from transformers.tokenization_ctrl import VOCAB_FILES_NAMES, CTRLTokenizer
from transformers.models.ctrl.tokenization_ctrl import VOCAB_FILES_NAMES, CTRLTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -18,8 +18,8 @@ import re
import unittest
from typing import Tuple
from transformers.models.deberta.tokenization_deberta import DebertaTokenizer
from transformers.testing_utils import require_torch
from transformers.tokenization_deberta import DebertaTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -19,8 +19,8 @@ import os
import unittest
from transformers.file_utils import cached_property
from transformers.models.fsmt.tokenization_fsmt import VOCAB_FILES_NAMES, FSMTTokenizer
from transformers.testing_utils import slow
from transformers.tokenization_fsmt import VOCAB_FILES_NAMES, FSMTTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -18,8 +18,8 @@ import os
import unittest
from transformers import FunnelTokenizer, FunnelTokenizerFast
from transformers.models.funnel.tokenization_funnel import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_funnel import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -19,8 +19,8 @@ import os
import unittest
from transformers import GPT2Tokenizer, GPT2TokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_gpt2 import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -19,8 +19,8 @@ import os
import unittest
from transformers import HerbertTokenizer, HerbertTokenizerFast
from transformers.models.herbert.tokenization_herbert import VOCAB_FILES_NAMES
from transformers.testing_utils import get_tests_dir, require_tokenizers, slow
from transformers.tokenization_herbert import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -18,8 +18,8 @@ import os
import unittest
from transformers import LayoutLMTokenizer, LayoutLMTokenizerFast
from transformers.models.layoutlm.tokenization_layoutlm import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_layoutlm import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -18,8 +18,8 @@ import os
import unittest
from transformers import LxmertTokenizer, LxmertTokenizerFast
from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_bert import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -25,7 +25,7 @@ from transformers.testing_utils import _sentencepiece_available, _torch_availabl
if _sentencepiece_available:
from transformers.tokenization_marian import save_json, vocab_files_names
from transformers.models.marian.tokenization_marian import save_json, vocab_files_names
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -24,7 +24,7 @@ if _sentencepiece_available:
if is_torch_available():
from transformers.modeling_bart import shift_tokens_right
from transformers.models.bart.modeling_bart import shift_tokens_right
EN_CODE = 250004
RO_CODE = 250020

View File

@@ -19,8 +19,8 @@ import os
import unittest
from transformers import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
from transformers.models.openai.tokenization_openai import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_openai import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -16,7 +16,7 @@
import os
import unittest
from transformers.tokenization_phobert import VOCAB_FILES_NAMES, PhobertTokenizer
from transformers.models.phobert.tokenization_phobert import VOCAB_FILES_NAMES, PhobertTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -18,15 +18,15 @@ import os
import unittest
from transformers import BatchEncoding
from transformers.testing_utils import require_torch, slow
from transformers.tokenization_bert import (
from transformers.models.bert.tokenization_bert import (
BasicTokenizer,
WordpieceTokenizer,
_is_control,
_is_punctuation,
_is_whitespace,
)
from transformers.tokenization_prophetnet import VOCAB_FILES_NAMES, ProphetNetTokenizer
from transformers.models.prophetnet.tokenization_prophetnet import VOCAB_FILES_NAMES, ProphetNetTokenizer
from transformers.testing_utils import require_torch, slow
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -5,17 +5,17 @@ import tempfile
from unittest import TestCase
from transformers import BartTokenizer, BartTokenizerFast, DPRQuestionEncoderTokenizer, DPRQuestionEncoderTokenizerFast
from transformers.configuration_bart import BartConfig
from transformers.configuration_dpr import DPRConfig
from transformers.file_utils import is_datasets_available, is_faiss_available, is_torch_available
from transformers.models.bart.configuration_bart import BartConfig
from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.models.dpr.configuration_dpr import DPRConfig
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
from transformers.testing_utils import require_datasets, require_faiss, require_tokenizers, require_torch, slow
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
if is_torch_available() and is_datasets_available() and is_faiss_available():
from transformers.configuration_rag import RagConfig
from transformers.tokenization_rag import RagTokenizer
from transformers.models.rag.configuration_rag import RagConfig
from transformers.models.rag.tokenization_rag import RagTokenizer
@require_faiss

View File

@@ -19,8 +19,8 @@ import os
import unittest
from transformers import AddedToken, RobertaTokenizer, RobertaTokenizerFast
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers, slow
from transformers.tokenization_roberta import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -17,7 +17,7 @@
import os
import unittest
from transformers.tokenization_transfo_xl import VOCAB_FILES_NAMES, TransfoXLTokenizer
from transformers.models.transfo_xl.tokenization_transfo_xl import VOCAB_FILES_NAMES, TransfoXLTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -19,8 +19,8 @@ from typing import Callable, Optional
import numpy as np
from transformers import BatchEncoding, BertTokenizer, BertTokenizerFast, PreTrainedTokenizer, TensorType, TokenSpan
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
from transformers.testing_utils import require_tf, require_tokenizers, require_torch, slow
from transformers.tokenization_gpt2 import GPT2Tokenizer
class TokenizerUtilsTest(unittest.TestCase):

View File

@@ -18,8 +18,8 @@ import json
import os
import unittest
from transformers.models.xlm.tokenization_xlm import VOCAB_FILES_NAMES, XLMTokenizer
from transformers.testing_utils import slow
from transformers.tokenization_xlm import VOCAB_FILES_NAMES, XLMTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -18,8 +18,8 @@ import os
import unittest
from transformers.file_utils import cached_property
from transformers.models.xlm_prophetnet.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer
from transformers.testing_utils import require_sentencepiece, slow
from transformers.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer
from .test_tokenization_common import TokenizerTesterMixin

View File

@@ -37,10 +37,11 @@ REFERENCE_CODE = """ def __init__(self, config):
class CopyCheckTester(unittest.TestCase):
def setUp(self):
self.transformer_dir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.transformer_dir, "models/bert/"))
check_copies.TRANSFORMER_PATH = self.transformer_dir
shutil.copy(
os.path.join(git_repo_path, "src/transformers/modeling_bert.py"),
os.path.join(self.transformer_dir, "modeling_bert.py"),
os.path.join(git_repo_path, "src/transformers/models/bert/modeling_bert.py"),
os.path.join(self.transformer_dir, "models/bert/modeling_bert.py"),
)
def tearDown(self):
@@ -62,27 +63,27 @@ class CopyCheckTester(unittest.TestCase):
self.assertTrue(f.read(), expected)
def test_find_code_in_transformers(self):
code = check_copies.find_code_in_transformers("modeling_bert.BertLMPredictionHead")
code = check_copies.find_code_in_transformers("models.bert.modeling_bert.BertLMPredictionHead")
self.assertEqual(code, REFERENCE_CODE)
def test_is_copy_consistent(self):
# Base copy consistency
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
"BertLMPredictionHead",
REFERENCE_CODE + "\n",
)
# With no empty line at the end
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
"BertLMPredictionHead",
REFERENCE_CODE,
)
# Copy consistency with rename
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"TestModelLMPredictionHead",
re.sub("Bert", "TestModel", REFERENCE_CODE),
)
@@ -90,14 +91,14 @@ class CopyCheckTester(unittest.TestCase):
# Copy consistency with a really long name
long_class_name = "TestModelWithAReallyLongNameBecauseSomePeopleLikeThatForSomeReasonIReallyDontUnderstand"
self.check_copy_consistency(
f"# Copied from transformers.modeling_bert.BertLMPredictionHead with Bert->{long_class_name}",
f"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->{long_class_name}",
f"{long_class_name}LMPredictionHead",
re.sub("Bert", long_class_name, REFERENCE_CODE),
)
# Copy consistency with overwrite
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"TestModelLMPredictionHead",
REFERENCE_CODE,
overwrite_result=re.sub("Bert", "TestModel", REFERENCE_CODE),