From 1e2acd0dcf44e6d53f80cac04341692478733072 Mon Sep 17 00:00:00 2001 From: Shashank Gupta Date: Mon, 9 Nov 2020 20:53:26 +0530 Subject: [PATCH] Bug fix for permutation language modelling (#8409) --- src/transformers/data/data_collator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index ba94baaa7d..6d8234f9c3 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -579,7 +579,7 @@ class DataCollatorForPermutationLanguageModeling: masked_indices.masked_fill_(padding_mask, value=0.0) # Mask indicating non-functional tokens, where functional tokens are [SEP], [CLS], padding, etc. - non_func_mask = ~(padding_mask & special_tokens_mask) + non_func_mask = ~(padding_mask | special_tokens_mask) inputs[masked_indices] = self.tokenizer.mask_token_id labels[~masked_indices] = -100 # We only compute loss on masked tokens