Fix tflongformer int dtype (#18907)
* Use int64 throughout TFLongFormer * make style * Do some more fixed casting in TFLongFormer * Fix some wonky "is None" conditionals * Cast all the dtypes, salt the earth * Fix copies to TFLED as well and do some casting there * dtype fix in TFLongformer test * Make fixup * Expand tolerances on the LED tests too (I think this is a TF32 thing) * Expand test tolerances for LED a tiny bit (probably a Tensorfloat thing again)
This commit is contained in:
@@ -412,7 +412,7 @@ class TFLEDModelIntegrationTest(unittest.TestCase):
|
||||
expected_slice = tf.convert_to_tensor(
|
||||
[[2.3050, 2.8279, 0.6531], [-1.8457, -0.1455, -3.5661], [-1.0186, 0.4586, -2.2043]],
|
||||
)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-3)
|
||||
|
||||
def test_inference_with_head(self):
|
||||
model = TFLEDForConditionalGeneration.from_pretrained("allenai/led-base-16384")
|
||||
@@ -428,4 +428,4 @@ class TFLEDModelIntegrationTest(unittest.TestCase):
|
||||
expected_slice = tf.convert_to_tensor(
|
||||
[[33.6507, 6.4572, 16.8089], [5.8739, -2.4238, 11.2902], [-3.2139, -4.3149, 4.2783]],
|
||||
)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-3, rtol=1e-3)
|
||||
|
||||
Reference in New Issue
Block a user