Refactor Code samples; Test code samples (#5036)
* Refactor code samples * Test docstrings * Style * Tokenization examples * Run rust of tests * First step to testing source docs * Style and BART comment * Test the remainder of the code samples * Style * let to const * Formatting fixes * Ready for merge * Fix fixture + Style * Fix last tests * Update docs/source/quicktour.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Addressing @sgugger's comments + Fix MobileBERT in TF Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -13,52 +13,19 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import doctest
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import List, Union
|
||||
|
||||
import transformers
|
||||
|
||||
from .utils import require_tf, require_torch, slow
|
||||
|
||||
|
||||
def get_examples_from_file(file):
|
||||
examples = []
|
||||
example = []
|
||||
example_mode = False
|
||||
example_indentation = None
|
||||
for i, line in enumerate(file):
|
||||
if example_mode:
|
||||
current_indentation = len(line) - len(line.strip()) - 1
|
||||
|
||||
# Check if the indentation is 0 for the example, so that we don't exit as soon as there's a line return.
|
||||
empty_line = example_indentation == 0 and len(line) == 1
|
||||
|
||||
# If we're back to the example indentation or if it's the end of the docstring.
|
||||
if (current_indentation == example_indentation and not empty_line) or '"""' in line:
|
||||
# Exit the example mode and add the example to the examples list
|
||||
example_mode = False
|
||||
example_indentation = None
|
||||
examples.append(example)
|
||||
example = []
|
||||
else:
|
||||
# If line is not empty, add it to the current example
|
||||
if line != "\n":
|
||||
example.append(line[example_indentation + 4 : -1])
|
||||
|
||||
# Detect the example from '::' or 'example::'
|
||||
if "example::" in line.lower():
|
||||
example_mode = True
|
||||
example_indentation = line.lower().find("example::")
|
||||
elif "examples::" in line.lower():
|
||||
example_mode = True
|
||||
example_indentation = line.lower().find("examples::")
|
||||
# elif "::" in line.lower() and len(line.strip()) == 2:
|
||||
# example_mode = True
|
||||
# example_indentation = line.lower().find("::")
|
||||
|
||||
examples = ["\n".join(example) for example in examples]
|
||||
examples = [example for example in examples if "not runnable" not in example.lower()]
|
||||
|
||||
return examples
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@require_torch
|
||||
@@ -66,68 +33,81 @@ def get_examples_from_file(file):
|
||||
@slow
|
||||
class TestCodeExamples(unittest.TestCase):
|
||||
def analyze_directory(
|
||||
self, directory: str, identifier: Union[str, None] = None, ignore_files: Union[List[str], None] = None
|
||||
self,
|
||||
directory: Path,
|
||||
identifier: Union[str, None] = None,
|
||||
ignore_files: Union[List[str], None] = [],
|
||||
n_identifier: Union[str, None] = None,
|
||||
only_modules: bool = True,
|
||||
):
|
||||
"""
|
||||
Runs through the specific directory, looking for the files identified with `identifier`. Executes
|
||||
the doctests in those files
|
||||
|
||||
Args:
|
||||
directory (:obj:`str`): Directory containing the files
|
||||
identifier (:obj:`str`): Will parse files containing this
|
||||
ignore_files (:obj:`List[str]`): List of files to skip
|
||||
n_identifier (:obj:`str` or :obj:`List[str]`): Will not parse files containing this/these identifiers.
|
||||
only_modules (:obj:`bool`): Whether to only analyze modules
|
||||
"""
|
||||
files = [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
|
||||
|
||||
if identifier is not None:
|
||||
files = [file for file in files if identifier in file]
|
||||
|
||||
if ignore_files is not None:
|
||||
files = [file for file in files if file not in ignore_files]
|
||||
if n_identifier is not None:
|
||||
if isinstance(n_identifier, List):
|
||||
for n_ in n_identifier:
|
||||
files = [file for file in files if n_ not in file]
|
||||
else:
|
||||
files = [file for file in files if n_identifier not in file]
|
||||
|
||||
ignore_files.append("__init__.py")
|
||||
files = [file for file in files if file not in ignore_files]
|
||||
|
||||
for file in files:
|
||||
# Open all files
|
||||
print("Testing", file, end=" ")
|
||||
with open(os.path.join(directory, file)) as f:
|
||||
# Retrieve examples
|
||||
examples = get_examples_from_file(f)
|
||||
joined_examples = []
|
||||
print("Testing", file)
|
||||
|
||||
def execute_example(code_example):
|
||||
exec(code_example, {})
|
||||
|
||||
# Some examples are the continuation of others.
|
||||
if len(examples) > 0:
|
||||
joined_examples.append(examples[0])
|
||||
joined_examples_index = 0
|
||||
for example in examples[1:]:
|
||||
# If they contain this line, then they're a continuation of the previous script
|
||||
if "# Continuation of the previous script" in example:
|
||||
joined_examples[joined_examples_index] += "\n" + example
|
||||
# If not, create a new example and increment the index
|
||||
else:
|
||||
joined_examples.append(example)
|
||||
joined_examples_index += 1
|
||||
|
||||
print(str(len(joined_examples)) + "/" + str(len(joined_examples)))
|
||||
|
||||
# Execute sub tests with every example.
|
||||
for index, code_example in enumerate(joined_examples):
|
||||
with self.subTest(msg=file + " " + str(index) + "/" + str(len(joined_examples)) + code_example):
|
||||
execute_example(code_example)
|
||||
|
||||
def test_configuration_examples(self):
|
||||
transformers_directory = "src/transformers"
|
||||
configuration_files = "configuration"
|
||||
ignore_files = ["configuration_auto.py", "configuration_utils.py"]
|
||||
self.analyze_directory(transformers_directory, identifier=configuration_files, ignore_files=ignore_files)
|
||||
|
||||
def test_main_doc_examples(self):
|
||||
doc_directory = "docs/source"
|
||||
ignore_files = ["favicon.ico"]
|
||||
self.analyze_directory(doc_directory, ignore_files=ignore_files)
|
||||
if only_modules:
|
||||
try:
|
||||
module_identifier = file.split(".")[0]
|
||||
module_identifier = getattr(transformers, module_identifier)
|
||||
suite = doctest.DocTestSuite(module_identifier)
|
||||
result = unittest.TextTestRunner().run(suite)
|
||||
self.assertIs(len(result.failures), 0)
|
||||
except AttributeError:
|
||||
logger.info(f"{module_identifier} is not a module.")
|
||||
else:
|
||||
result = doctest.testfile(str(".." / directory / file), optionflags=doctest.ELLIPSIS)
|
||||
self.assertIs(result.failed, 0)
|
||||
|
||||
def test_modeling_examples(self):
|
||||
transformers_directory = "src/transformers"
|
||||
modeling_files = "modeling"
|
||||
files = "modeling"
|
||||
ignore_files = [
|
||||
"modeling_auto.py",
|
||||
"modeling_t5.py",
|
||||
"modeling_tf_auto.py",
|
||||
"modeling_utils.py",
|
||||
"modeling_tf_t5.py",
|
||||
"modeling_bart.py",
|
||||
"modeling_tf_utils.py",
|
||||
"modeling_ctrl.py",
|
||||
"modeling_tf_ctrl.py",
|
||||
]
|
||||
self.analyze_directory(transformers_directory, identifier=modeling_files, ignore_files=ignore_files)
|
||||
self.analyze_directory(transformers_directory, identifier=files, ignore_files=ignore_files)
|
||||
|
||||
def test_tokenization_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "tokenization"
|
||||
self.analyze_directory(transformers_directory, identifier=files)
|
||||
|
||||
def test_configuration_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "configuration"
|
||||
self.analyze_directory(transformers_directory, identifier=files)
|
||||
|
||||
def test_remaining_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
n_identifiers = ["configuration", "modeling", "tokenization"]
|
||||
self.analyze_directory(transformers_directory, n_identifier=n_identifiers)
|
||||
|
||||
def test_doc_sources(self):
|
||||
doc_source_directory = Path("docs/source")
|
||||
ignore_files = ["favicon.ico"]
|
||||
self.analyze_directory(doc_source_directory, ignore_files=ignore_files, only_modules=False)
|
||||
|
||||
@@ -31,6 +31,7 @@ if is_tf_available():
|
||||
TFXLMWithLMHeadModel,
|
||||
TFXLMForSequenceClassification,
|
||||
TFXLMForQuestionAnsweringSimple,
|
||||
TFXLMForTokenClassification,
|
||||
TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
|
||||
@@ -219,6 +220,26 @@ class TFXLMModelTester:
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
|
||||
|
||||
def create_and_check_xlm_for_token_classification(
|
||||
self,
|
||||
config,
|
||||
input_ids,
|
||||
token_type_ids,
|
||||
input_lengths,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
is_impossible_labels,
|
||||
input_mask,
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFXLMForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(
|
||||
@@ -244,7 +265,14 @@ class TFXLMModelTester:
|
||||
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (
|
||||
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
|
||||
# TODO The multiple choice model is missing and should be added.
|
||||
(
|
||||
TFXLMModel,
|
||||
TFXLMWithLMHeadModel,
|
||||
TFXLMForSequenceClassification,
|
||||
TFXLMForQuestionAnsweringSimple,
|
||||
TFXLMForTokenClassification,
|
||||
)
|
||||
if is_tf_available()
|
||||
else ()
|
||||
)
|
||||
@@ -275,6 +303,10 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_xlm_sequence_classif(*config_and_inputs)
|
||||
|
||||
def test_for_token_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
Reference in New Issue
Block a user