Fix tflongformer int dtype (#18907)

* Use int64 throughout TFLongFormer * make style * Do some more fixed casting in TFLongFormer * Fix some wonky "is None" conditionals * Cast all the dtypes, salt the earth * Fix copies to TFLED as well and do some casting there * dtype fix in TFLongformer test * Make fixup * Expand tolerances on the LED tests too (I think this is a TF32 thing) * Expand test tolerances for LED a tiny bit (probably a Tensorfloat thing again)
2022-09-12 17:51:10 +01:00
parent f7ceda345d
commit c126a239bc
4 changed files with 137 additions and 84 deletions
--- a/tests/models/led/test_modeling_tf_led.py
+++ b/tests/models/led/test_modeling_tf_led.py
@@ -412,7 +412,7 @@ class TFLEDModelIntegrationTest(unittest.TestCase):
        expected_slice = tf.convert_to_tensor(
            [[2.3050, 2.8279, 0.6531], [-1.8457, -0.1455, -3.5661], [-1.0186, 0.4586, -2.2043]],
        )
-        tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
+        tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-3)

    def test_inference_with_head(self):
        model = TFLEDForConditionalGeneration.from_pretrained("allenai/led-base-16384")
@@ -428,4 +428,4 @@ class TFLEDModelIntegrationTest(unittest.TestCase):
        expected_slice = tf.convert_to_tensor(
            [[33.6507, 6.4572, 16.8089], [5.8739, -2.4238, 11.2902], [-3.2139, -4.3149, 4.2783]],
        )
-        tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
+        tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-3, rtol=1e-3)