From 120176ea292d7e3c4df52021f92e9fb62222a18f Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 8 Sep 2020 18:51:28 +0200 Subject: [PATCH] [Longformer] Fix longformer documentation (#7016) * fix longformer * allow position ids to not be initialized --- src/transformers/modeling_longformer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index a7d98443f9..f1979225e2 100755 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -795,6 +795,7 @@ class LongformerPreTrainedModel(PreTrainedModel): config_class = LongformerConfig base_model_prefix = "longformer" + authorized_missing_keys = [r"position_ids"] def _init_weights(self, module): """ Initialize the weights """ @@ -1019,11 +1020,13 @@ class LongformerModel(LongformerPreTrainedModel): >>> # Attention mask values -- 0: no attention, 1: local attention, 2: global attention >>> attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention - >>> attention_mask[:, [1, 4, 21,]] = 2 # Set global attention based on the task. For example, + >>> global_attention_mask = torch.zeros(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to global attention to be deactivated for all tokens + >>> global_attention_mask[:, [1, 4, 21,]] = 1 # Set global attention to random tokens for the sake of this example + ... # Usually, set global attention based on the task. For example, ... # classification: the token ... # QA: question tokens ... # LM: potentially on the beginning of sentences and paragraphs - >>> outputs = model(input_ids, attention_mask=attention_mask) + >>> outputs = model(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask) >>> sequence_output = outputs.last_hidden_state >>> pooled_output = outputs.pooler_output """