Switch return_dict to True by default. (#8530)
* Use the CI to identify failing tests * Remove from all examples and tests * More default switch * Fixes * More test fixes * More fixes * Last fixes hopefully * Use the CI to identify failing tests * Remove from all examples and tests * More default switch * Fixes * More test fixes * More fixes * Last fixes hopefully * Run on the real suite * Fix slow tests
This commit is contained in:
@@ -210,7 +210,6 @@
|
||||
" visual_feats=features,\n",
|
||||
" visual_pos=normalized_boxes,\n",
|
||||
" token_type_ids=inputs.token_type_ids,\n",
|
||||
" return_dict=True,\n",
|
||||
" output_attentions=False,\n",
|
||||
" )\n",
|
||||
" output_vqa = lxmert_vqa(\n",
|
||||
@@ -219,7 +218,6 @@
|
||||
" visual_feats=features,\n",
|
||||
" visual_pos=normalized_boxes,\n",
|
||||
" token_type_ids=inputs.token_type_ids,\n",
|
||||
" return_dict=True,\n",
|
||||
" output_attentions=False,\n",
|
||||
" )\n",
|
||||
" # get prediction\n",
|
||||
@@ -266,4 +264,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
@@ -321,7 +321,7 @@ def evaluate(args, model, tokenizer, prefix=""):
|
||||
eval_feature = features[feature_index.item()]
|
||||
unique_id = int(eval_feature.unique_id)
|
||||
|
||||
output = [to_list(output[i]) for output in outputs]
|
||||
output = [to_list(output[i]) for output in outputs.to_tuple()]
|
||||
|
||||
# Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
|
||||
# models only use two.
|
||||
|
||||
@@ -95,7 +95,7 @@ def evaluate_batch_retrieval(args, rag_model, questions):
|
||||
truncation=True,
|
||||
)["input_ids"].to(args.device)
|
||||
|
||||
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids, return_dict=True)
|
||||
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids)
|
||||
question_enc_pool_output = question_enc_outputs.pooler_output
|
||||
|
||||
result = rag_model.retriever(
|
||||
|
||||
@@ -204,7 +204,6 @@ class GenerativeQAModule(BaseTransformer):
|
||||
decoder_input_ids=decoder_input_ids,
|
||||
use_cache=False,
|
||||
labels=lm_labels,
|
||||
return_dict=True,
|
||||
**rag_kwargs,
|
||||
)
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer: DPRCon
|
||||
input_ids = ctx_tokenizer(
|
||||
documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt"
|
||||
)["input_ids"]
|
||||
embeddings = ctx_encoder(input_ids.to(device=device), return_dict=True).pooler_output
|
||||
embeddings = ctx_encoder(input_ids.to(device=device)).pooler_output
|
||||
return {"embeddings": embeddings.detach().cpu().numpy()}
|
||||
|
||||
|
||||
|
||||
@@ -153,7 +153,6 @@ class SummarizationDistiller(SummarizationModule):
|
||||
output_hidden_states=self.do_calc_hidden_loss,
|
||||
output_attentions=False,
|
||||
use_cache=False,
|
||||
return_dict=True,
|
||||
)
|
||||
lm_logits = student_outputs.logits
|
||||
|
||||
@@ -179,7 +178,6 @@ class SummarizationDistiller(SummarizationModule):
|
||||
input_ids,
|
||||
attention_mask=src_mask,
|
||||
output_hidden_states=self.do_calc_hidden_loss,
|
||||
return_dict=True,
|
||||
)
|
||||
if self.different_base_models:
|
||||
teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state
|
||||
@@ -199,7 +197,6 @@ class SummarizationDistiller(SummarizationModule):
|
||||
decoder_input_ids=decoder_input_ids,
|
||||
output_hidden_states=self.do_calc_hidden_loss,
|
||||
use_cache=False, # since we are not passing labels, never let this default to True
|
||||
return_dict=True,
|
||||
)
|
||||
dec_mask = decoder_input_ids.ne(pad_token_id)
|
||||
loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits)
|
||||
|
||||
@@ -185,7 +185,7 @@ class TestSummarizationDistiller(TestCasePlus):
|
||||
|
||||
@require_torch_non_multi_gpu_but_fix_me
|
||||
def test_loss_fn(self):
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY)
|
||||
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
|
||||
target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device)
|
||||
decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line?
|
||||
|
||||
Reference in New Issue
Block a user