Fix DPRReaderTokenizer's attention_mask (#9663)

* Fix the attention_mask in DPRReaderTokenizer

* Add an integration test for DPRReader inference

* Run make style
This commit is contained in:
Sergey Mkrtchyan
2021-01-19 05:43:11 -05:00
committed by GitHub
parent 12c1b5b8f4
commit 917dbb15e0
3 changed files with 37 additions and 3 deletions

View File

@@ -251,7 +251,9 @@ class CustomDPRReaderTokenizerMixin:
]
}
if return_attention_mask is not False:
attention_mask = [input_ids != self.pad_token_id for input_ids in encoded_inputs["input_ids"]]
attention_mask = []
for input_ids in encoded_inputs["input_ids"]:
attention_mask.append([int(input_id != self.pad_token_id) for input_id in input_ids])
encoded_inputs["attention_mask"] = attention_mask
return self.pad(encoded_inputs, padding=padding, max_length=max_length, return_tensors=return_tensors)

View File

@@ -252,7 +252,9 @@ class CustomDPRReaderTokenizerMixin:
]
}
if return_attention_mask is not False:
attention_mask = [input_ids != self.pad_token_id for input_ids in encoded_inputs["input_ids"]]
attention_mask = []
for input_ids in encoded_inputs["input_ids"]:
attention_mask.append([int(input_id != self.pad_token_id) for input_id in input_ids])
encoded_inputs["attention_mask"] = attention_mask
return self.pad(encoded_inputs, padding=padding, max_length=max_length, return_tensors=return_tensors)