Fix tflongformer int dtype (#18907)

* Use int64 throughout TFLongFormer

* make style

* Do some more fixed casting in TFLongFormer

* Fix some wonky "is None" conditionals

* Cast all the dtypes, salt the earth

* Fix copies to TFLED as well and do some casting there

* dtype fix in TFLongformer test

* Make fixup

* Expand tolerances on the LED tests too (I think this is a TF32 thing)

* Expand test tolerances for LED a tiny bit (probably a Tensorfloat thing again)
This commit is contained in:
Matt
2022-09-12 17:51:10 +01:00
committed by GitHub
parent f7ceda345d
commit c126a239bc
4 changed files with 137 additions and 84 deletions

View File

@@ -412,7 +412,7 @@ class TFLEDModelIntegrationTest(unittest.TestCase):
expected_slice = tf.convert_to_tensor(
[[2.3050, 2.8279, 0.6531], [-1.8457, -0.1455, -3.5661], [-1.0186, 0.4586, -2.2043]],
)
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-3)
def test_inference_with_head(self):
model = TFLEDForConditionalGeneration.from_pretrained("allenai/led-base-16384")
@@ -428,4 +428,4 @@ class TFLEDModelIntegrationTest(unittest.TestCase):
expected_slice = tf.convert_to_tensor(
[[33.6507, 6.4572, 16.8089], [5.8739, -2.4238, 11.2902], [-3.2139, -4.3149, 4.2783]],
)
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-3, rtol=1e-3)