From 74ffc9ea6b65128fca06c42d26c9932681ea0944 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 7 May 2020 10:50:11 +0200 Subject: [PATCH] [Reformer] Fix example and error message (#4191) * fix example reformer * fix error message and example docstring * improved error message --- src/transformers/modeling_reformer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transformers/modeling_reformer.py b/src/transformers/modeling_reformer.py index 1936625641..fc7aa2dc0d 100644 --- a/src/transformers/modeling_reformer.py +++ b/src/transformers/modeling_reformer.py @@ -124,8 +124,8 @@ class AxialPositionEmbeddings(nn.Module): if self.training is True: assert ( reduce(mul, self.axial_pos_shape) == sequence_length - ), "Make sure that config.axial_pos_shape factors: {} multiply to sequence length: {}".format( - self.axial_pos_shape, sequence_length + ), "If training, make sure that config.axial_pos_shape factors: {} multiply to sequence length. Got prod({}) != sequence_length: {}. You might want to consider padding your sequence length to {} or changing config.axial_pos_shape.".format( + self.axial_pos_shape, self.axial_pos_shape, sequence_length, reduce(mul, self.axial_pos_shape) ) if self.dropout > 0: weights = torch.cat(broadcasted_weights, dim=-1) @@ -1515,11 +1515,11 @@ class ReformerModel(ReformerPreTrainedModel): Examples:: - from transformers import ReformerModel, ReformerTokenizer + from transformers import ReformerModelWithLMHead, ReformerTokenizer import torch - tokenizer = ReformerTokenizer.from_pretrained('bert-base-uncased') - model = ReformerModel.from_pretrained('bert-base-uncased') + tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment') + model = ReformerModelWithLMHead.from_pretrained('google/reformer-crime-and-punishment') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) @@ -1562,7 +1562,7 @@ class ReformerModel(ReformerPreTrainedModel): if self.training is True: raise ValueError( "If training, sequence Length {} has to be a multiple of least common multiple chunk_length {}. Please consider padding the input to a length of {}.".format( - input_shape[-2], least_common_mult_chunk_length, input_shape[-2] + padding_length + input_shape[-1], least_common_mult_chunk_length, input_shape[-1] + padding_length ) )