Switch return_dict to True by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
This commit is contained in:
Sylvain Gugger
2020-11-16 11:43:00 -05:00
committed by GitHub
parent 0d0a0785fd
commit 1073a2bde5
106 changed files with 138 additions and 234 deletions

View File

@@ -210,7 +210,6 @@
" visual_feats=features,\n",
" visual_pos=normalized_boxes,\n",
" token_type_ids=inputs.token_type_ids,\n",
" return_dict=True,\n",
" output_attentions=False,\n",
" )\n",
" output_vqa = lxmert_vqa(\n",
@@ -219,7 +218,6 @@
" visual_feats=features,\n",
" visual_pos=normalized_boxes,\n",
" token_type_ids=inputs.token_type_ids,\n",
" return_dict=True,\n",
" output_attentions=False,\n",
" )\n",
" # get prediction\n",
@@ -266,4 +264,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

View File

@@ -321,7 +321,7 @@ def evaluate(args, model, tokenizer, prefix=""):
eval_feature = features[feature_index.item()]
unique_id = int(eval_feature.unique_id)
output = [to_list(output[i]) for output in outputs]
output = [to_list(output[i]) for output in outputs.to_tuple()]
# Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
# models only use two.

View File

@@ -95,7 +95,7 @@ def evaluate_batch_retrieval(args, rag_model, questions):
truncation=True,
)["input_ids"].to(args.device)
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids, return_dict=True)
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids)
question_enc_pool_output = question_enc_outputs.pooler_output
result = rag_model.retriever(

View File

@@ -204,7 +204,6 @@ class GenerativeQAModule(BaseTransformer):
decoder_input_ids=decoder_input_ids,
use_cache=False,
labels=lm_labels,
return_dict=True,
**rag_kwargs,
)

View File

@@ -47,7 +47,7 @@ def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer: DPRCon
input_ids = ctx_tokenizer(
documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt"
)["input_ids"]
embeddings = ctx_encoder(input_ids.to(device=device), return_dict=True).pooler_output
embeddings = ctx_encoder(input_ids.to(device=device)).pooler_output
return {"embeddings": embeddings.detach().cpu().numpy()}

View File

@@ -153,7 +153,6 @@ class SummarizationDistiller(SummarizationModule):
output_hidden_states=self.do_calc_hidden_loss,
output_attentions=False,
use_cache=False,
return_dict=True,
)
lm_logits = student_outputs.logits
@@ -179,7 +178,6 @@ class SummarizationDistiller(SummarizationModule):
input_ids,
attention_mask=src_mask,
output_hidden_states=self.do_calc_hidden_loss,
return_dict=True,
)
if self.different_base_models:
teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state
@@ -199,7 +197,6 @@ class SummarizationDistiller(SummarizationModule):
decoder_input_ids=decoder_input_ids,
output_hidden_states=self.do_calc_hidden_loss,
use_cache=False, # since we are not passing labels, never let this default to True
return_dict=True,
)
dec_mask = decoder_input_ids.ne(pad_token_id)
loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits)

View File

@@ -185,7 +185,7 @@ class TestSummarizationDistiller(TestCasePlus):
@require_torch_non_multi_gpu_but_fix_me
def test_loss_fn(self):
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY)
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device)
decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line?