fixed: hard coding for max and min number will out of range in fp16, which will cause nan.

2019-09-11 15:41:53 +08:00
parent 7424b2848f
commit 8bdee1cb73
2 changed files with 26 additions and 15 deletions
--- a/pytorch_transformers/modeling_utils.py
+++ b/pytorch_transformers/modeling_utils.py
@@ -140,7 +140,7 @@ class PreTrainedModel(nn.Module):
        Arguments:

            new_num_tokens: (`optional`) int:
-                New number of tokens in the embedding matrix. Increasing the size will add newly initialized vectors at the end. Reducing the size will remove vectors from the end. 
+                New number of tokens in the embedding matrix. Increasing the size will add newly initialized vectors at the end. Reducing the size will remove vectors from the end.
                If not provided or None: does nothing and just returns a pointer to the input tokens ``torch.nn.Embeddings`` Module of the model.

        Return: ``torch.nn.Embeddings``
@@ -434,7 +434,10 @@ class PoolerStartLogits(nn.Module):
        x = self.dense(hidden_states).squeeze(-1)

        if p_mask is not None:
-            x = x * (1 - p_mask) - 1e30 * p_mask
+            if next(self.parameters()).dtype == torch.float16:
+                x = x * (1 - p_mask) - 65500 * p_mask
+            else:
+                x = x * (1 - p_mask) - 1e30 * p_mask

        return x