From b86e42e0ac1b59f21f0eccf351d3346bbe3ed4eb Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Mon, 25 May 2020 19:20:50 -0400 Subject: [PATCH] [ci] fix 3 remaining slow GPU failures (#4584) --- src/transformers/configuration_distilbert.py | 4 ++-- src/transformers/modeling_encoder_decoder.py | 2 +- tests/test_modeling_bart.py | 2 +- tests/test_modeling_tf_electra.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transformers/configuration_distilbert.py b/src/transformers/configuration_distilbert.py index 1d528297bb..2bb838bd98 100644 --- a/src/transformers/configuration_distilbert.py +++ b/src/transformers/configuration_distilbert.py @@ -73,10 +73,10 @@ class DistilBertConfig(PretrainedConfig): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. qa_dropout (:obj:`float`, optional, defaults to 0.1): The dropout probabilities used in the question answering model - :class:`~tranformers.DistilBertForQuestionAnswering`. + :class:`~transformers.DistilBertForQuestionAnswering`. seq_classif_dropout (:obj:`float`, optional, defaults to 0.2): The dropout probabilities used in the sequence classification model - :class:`~tranformers.DistilBertForSequenceClassification`. + :class:`~transformers.DistilBertForSequenceClassification`. Example:: diff --git a/src/transformers/modeling_encoder_decoder.py b/src/transformers/modeling_encoder_decoder.py index 451edc6c03..57eccb6b33 100644 --- a/src/transformers/modeling_encoder_decoder.py +++ b/src/transformers/modeling_encoder_decoder.py @@ -125,7 +125,7 @@ class EncoderDecoderModel(PreTrainedModel): Examples:: - from tranformers import EncoderDecoder + from transformers import EncoderDecoder model = EncoderDecoder.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert """ diff --git a/tests/test_modeling_bart.py b/tests/test_modeling_bart.py index 718d41ca30..49989ed402 100644 --- a/tests/test_modeling_bart.py +++ b/tests/test_modeling_bart.py @@ -240,7 +240,7 @@ class BartTranslationTests(unittest.TestCase): with torch.no_grad(): logits, *other_stuff = model(**self.net_input) - expected_slice = torch.tensor([9.0078, 10.1113, 14.4787]) + expected_slice = torch.tensor([9.0078, 10.1113, 14.4787], device=torch_device) result_slice = logits[0][0][:3] self.assertTrue(torch.allclose(expected_slice, result_slice, atol=TOLERANCE)) diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py index 27e26c0b26..a480241fe2 100644 --- a/tests/test_modeling_tf_electra.py +++ b/tests/test_modeling_tf_electra.py @@ -222,6 +222,6 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): # for model_name in list(TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: - for model_name in ["electra-small-discriminator"]: + for model_name in ["google/electra-small-discriminator"]: model = TFElectraModel.from_pretrained(model_name) self.assertIsNotNone(model)