From 07a79db505253cd8196c2d00ad2ba498e8514944 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Wed, 4 Mar 2020 19:11:31 -0500 Subject: [PATCH] Fix failing doc samples --- docs/source/multilingual.rst | 4 ++++ src/transformers/modeling_flaubert.py | 5 ++++- tests/test_doc_samples.py | 8 ++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/source/multilingual.rst b/docs/source/multilingual.rst index f6f72b2434..781222962b 100644 --- a/docs/source/multilingual.rst +++ b/docs/source/multilingual.rst @@ -47,6 +47,7 @@ The different languages this model/tokenizer handles, as well as the ids of thes .. code-block:: + # Continuation of the previous script print(tokenizer.lang2id) # {'en': 0, 'fr': 1} @@ -54,6 +55,7 @@ These ids should be used when passing a language parameter during a model pass. .. code-block:: + # Continuation of the previous script input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")]) # batch size of 1 @@ -62,6 +64,7 @@ filled with the appropriate language ids, of the same size as input_ids. For eng .. code-block:: + # Continuation of the previous script language_id = tokenizer.lang2id['en'] # 0 langs = torch.tensor([language_id] * input_ids.shape[1]) # torch.tensor([0, 0, 0, ..., 0]) @@ -73,6 +76,7 @@ You can then feed it all as input to your model: .. code-block:: + # Continuation of the previous script outputs = model(input_ids, langs=langs) diff --git a/src/transformers/modeling_flaubert.py b/src/transformers/modeling_flaubert.py index 56c3ce17a9..7236e44a16 100644 --- a/src/transformers/modeling_flaubert.py +++ b/src/transformers/modeling_flaubert.py @@ -148,9 +148,12 @@ class FlaubertModel(XLMModel): Examples:: + from transformers import FlaubertTokenizer, FlaubertModel + import torch + tokenizer = FlaubertTokenizer.from_pretrained('flaubert-base-cased') model = FlaubertModel.from_pretrained('flaubert-base-cased') - input_ids = torch.tensor(tokenizer.encode("Le chat manges une pomme.", add_special_tokens=True)).unsqueeze(0) # Batch size 1 + input_ids = torch.tensor(tokenizer.encode("Le chat mange une pomme.", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple diff --git a/tests/test_doc_samples.py b/tests/test_doc_samples.py index c97af35200..efed437d9d 100644 --- a/tests/test_doc_samples.py +++ b/tests/test_doc_samples.py @@ -78,6 +78,7 @@ class TestCodeExamples(unittest.TestCase): for file in files: # Open all files + print("Testing", file, end=" ") with open(os.path.join(directory, file)) as f: # Retrieve examples examples = get_examples_from_file(f) @@ -99,7 +100,7 @@ class TestCodeExamples(unittest.TestCase): joined_examples.append(example) joined_examples_index += 1 - print("Testing", file, str(len(joined_examples)) + "/" + str(len(joined_examples))) + print(str(len(joined_examples)) + "/" + str(len(joined_examples))) # Execute sub tests with every example. for index, code_example in enumerate(joined_examples): @@ -114,7 +115,8 @@ class TestCodeExamples(unittest.TestCase): def test_main_doc_examples(self): doc_directory = "docs/source" - self.analyze_directory(doc_directory) + ignore_files = ["favicon.ico"] + self.analyze_directory(doc_directory, ignore_files=ignore_files) def test_modeling_examples(self): transformers_directory = "src/transformers" @@ -125,5 +127,7 @@ class TestCodeExamples(unittest.TestCase): "modeling_tf_auto.py", "modeling_utils.py", "modeling_tf_t5.py", + "modeling_bart.py", + "modeling_tf_utils.py" ] self.analyze_directory(transformers_directory, identifier=modeling_files, ignore_files=ignore_files)