From e878eaa9fc4da9cec1c74ae962e89092b6832db8 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Fri, 11 Oct 2024 11:51:49 +0100 Subject: [PATCH] Tests: upcast `logits` to `float()` (#34042) upcast --- tests/models/granite/test_modeling_granite.py | 4 ++-- .../granitemoe/test_modeling_granitemoe.py | 6 +++-- tests/models/jetmoe/test_modeling_jetmoe.py | 2 +- tests/models/llama/test_modeling_llama.py | 22 +++++++++++++++---- tests/models/mistral/test_modeling_mistral.py | 2 +- tests/models/olmo/test_modeling_olmo.py | 6 ++--- tests/models/olmoe/test_modeling_olmoe.py | 2 +- .../persimmon/test_modeling_persimmon.py | 2 +- tests/models/qwen2/test_modeling_qwen2.py | 2 +- .../qwen2_moe/test_modeling_qwen2_moe.py | 2 +- .../models/stablelm/test_modeling_stablelm.py | 4 ++-- 11 files changed, 35 insertions(+), 19 deletions(-) diff --git a/tests/models/granite/test_modeling_granite.py b/tests/models/granite/test_modeling_granite.py index 9b25698f64..1bcb664180 100644 --- a/tests/models/granite/test_modeling_granite.py +++ b/tests/models/granite/test_modeling_granite.py @@ -538,7 +538,7 @@ class GraniteIntegrationTest(unittest.TestCase): self.assertTrue( torch.allclose( EXPECTED_SLICE.to(torch_device), - out.logits[0, 0, :15], + out.logits[0, 0, :15].float(), atol=1e-3, rtol=1e-3, ) @@ -558,4 +558,4 @@ class GraniteIntegrationTest(unittest.TestCase): # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-2.0984, -3.1294, -2.8153, -2.3568, -2.7337, -2.2624, -2.6016, -2.4022]]) - self.assertTrue(torch.allclose(EXPECTED_MEAN.to(torch_device), out.logits.mean(-1), atol=1e-2, rtol=1e-2)) + self.assertTrue(torch.allclose(EXPECTED_MEAN.to(torch_device), out.logits.float().mean(-1), atol=1e-2, rtol=1e-2)) diff --git a/tests/models/granitemoe/test_modeling_granitemoe.py b/tests/models/granitemoe/test_modeling_granitemoe.py index d5d0cee6da..124ce0c3bb 100644 --- a/tests/models/granitemoe/test_modeling_granitemoe.py +++ b/tests/models/granitemoe/test_modeling_granitemoe.py @@ -525,7 +525,9 @@ class GraniteMoeIntegrationTest(unittest.TestCase): # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-2.2122, -1.6632, -2.9269, -2.3344, -2.0143, -3.0146, -2.6839, -2.5610]]) - self.assertTrue(torch.allclose(EXPECTED_MEAN.to(torch_device), out.logits.mean(-1), atol=1e-2, rtol=1e-2)) + self.assertTrue( + torch.allclose(EXPECTED_MEAN.to(torch_device), out.logits.float().mean(-1), atol=1e-2, rtol=1e-2) + ) # slicing logits[0, 0, 0:15] EXPECTED_SLICE = torch.tensor([[4.8785, -2.2890, -2.2892, -2.2885, -2.2890, -3.5007, -2.2897, -2.2892, @@ -535,7 +537,7 @@ class GraniteMoeIntegrationTest(unittest.TestCase): self.assertTrue( torch.allclose( EXPECTED_SLICE.to(torch_device), - out.logits[0, 0, :15], + out.logits[0, 0, :15].float(), atol=1e-3, rtol=1e-3, ) diff --git a/tests/models/jetmoe/test_modeling_jetmoe.py b/tests/models/jetmoe/test_modeling_jetmoe.py index 50fd7a27e1..867f97c48a 100644 --- a/tests/models/jetmoe/test_modeling_jetmoe.py +++ b/tests/models/jetmoe/test_modeling_jetmoe.py @@ -481,7 +481,7 @@ class JetMoeIntegrationTest(unittest.TestCase): model = JetMoeForCausalLM.from_pretrained("jetmoe/jetmoe-8b") input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device) with torch.no_grad(): - out = model(input_ids).logits.cpu() + out = model(input_ids).logits.float().cpu() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[0.2507, -2.7073, -1.3445, -1.9363, -1.7216, -1.7370, -1.9054, -1.9792]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 3a103f3efa..d43a0fb13f 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -773,7 +773,14 @@ class LlamaIntegrationTest(unittest.TestCase): 8: torch.tensor([[-6.5208, -4.1218, -4.9377, -3.2536, 0.8127, -2.9811, 1.2918, -3.3848]]) } - self.assertTrue(torch.allclose(EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device), out.logits.mean(-1), atol=1e-2, rtol=1e-2)) + self.assertTrue( + torch.allclose( + EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device), + out.logits.float().mean(-1), + atol=1e-2, + rtol=1e-2 + ) + ) # slicing logits[0, 0, 0:15] EXPECTED_SLICE = { @@ -785,7 +792,7 @@ class LlamaIntegrationTest(unittest.TestCase): self.assertTrue( torch.allclose( EXPECTED_SLICE[self.cuda_compute_capability_major_version].to(torch_device), - out.logits[0, 0, :15], + out.logits[0, 0, :15].float(), atol=1e-2, rtol=1e-2, ) @@ -810,7 +817,14 @@ class LlamaIntegrationTest(unittest.TestCase): 8: torch.tensor([[-6.6544, -4.1259, -4.9840, -3.2456, 0.8261, -3.0124, 1.2971, -3.3641]]) } - self.assertTrue(torch.allclose(EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device), out.logits.mean(-1), atol=1e-2, rtol=1e-2)) + self.assertTrue( + torch.allclose( + EXPECTED_MEAN[self.cuda_compute_capability_major_version].to(torch_device), + out.logits.float().mean(-1), + atol=1e-2, + rtol=1e-2 + ) + ) # slicing logits[0, 0, 0:15] EXPECTED_SLICE = { @@ -822,7 +836,7 @@ class LlamaIntegrationTest(unittest.TestCase): self.assertTrue( torch.allclose( EXPECTED_SLICE[self.cuda_compute_capability_major_version].to(torch_device), - out.logits[0, 0, :15], + out.logits[0, 0, :15].float(), atol=1e-2, rtol=1e-2, ) diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 885795a129..c24436d4b8 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -524,7 +524,7 @@ class MistralIntegrationTest(unittest.TestCase): ) input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device) with torch.no_grad(): - out = model(input_ids).logits.cpu() + out = model(input_ids).logits.float().cpu() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py index 43e0b7afb4..e74785e29e 100644 --- a/tests/models/olmo/test_modeling_olmo.py +++ b/tests/models/olmo/test_modeling_olmo.py @@ -360,7 +360,7 @@ class OlmoIntegrationTest(unittest.TestCase): def test_model_1b_logits(self): input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]] model = OlmoForCausalLM.from_pretrained("allenai/OLMo-1B-hf", device_map="auto") - out = model(torch.tensor(input_ids)).logits + out = model(torch.tensor(input_ids)).logits.float() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[2.2869, 0.3315, 0.9876, 1.4146, 1.8804, 2.0430, 1.7055, 1.2065]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) @@ -372,7 +372,7 @@ class OlmoIntegrationTest(unittest.TestCase): def test_model_7b_logits(self): input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]] model = OlmoForCausalLM.from_pretrained("allenai/OLMo-7B-hf", device_map="auto") - out = model(torch.tensor(input_ids)).logits + out = model(torch.tensor(input_ids)).logits.float() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[0.0271, 0.0249, -0.0578, -0.0870, 0.0167, 0.0710, 0.1002, 0.0677]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) @@ -384,7 +384,7 @@ class OlmoIntegrationTest(unittest.TestCase): def test_model_7b_twin_2t_logits(self): input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]] model = OlmoForCausalLM.from_pretrained("allenai/OLMo-7B-Twin-2T-hf", device_map="auto") - out = model(torch.tensor(input_ids)).logits + out = model(torch.tensor(input_ids)).logits.float() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-0.3636, -0.3825, -0.4800, -0.3696, -0.8388, -0.9737, -0.9849, -0.8356]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) diff --git a/tests/models/olmoe/test_modeling_olmoe.py b/tests/models/olmoe/test_modeling_olmoe.py index 9c3af5723e..08ec1458ef 100644 --- a/tests/models/olmoe/test_modeling_olmoe.py +++ b/tests/models/olmoe/test_modeling_olmoe.py @@ -375,7 +375,7 @@ class OlmoeIntegrationTest(unittest.TestCase): def test_model_7b_logits(self): input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]] model = OlmoeForCausalLM.from_pretrained("allenai/OLMoE-1B-7B-0924", device_map="auto") - out = model(torch.tensor(input_ids)).logits + out = model(torch.tensor(input_ids)).logits.float() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-1.3814, -3.4450, -2.2990, -1.9542, -2.4387, -2.7941, -2.9312, -2.8309]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 600c5b8a2f..99d84f9b5b 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -496,7 +496,7 @@ class PersimmonIntegrationTest(unittest.TestCase): model = PersimmonForCausalLM.from_pretrained( "adept/persimmon-8b-chat", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16 ) - out = model(torch.tensor([input_ids], device=torch_device)).logits + out = model(torch.tensor([input_ids], device=torch_device)).logits.float() EXPECTED_MEAN = torch.tensor( [[-11.4726, -11.1495, -11.2694, -11.2223, -10.9452, -11.0663, -11.0031, -11.1028]] diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index c7fe657798..debcf42ab3 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -518,7 +518,7 @@ class Qwen2IntegrationTest(unittest.TestCase): model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-450m-beta", device_map="auto") input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device) with torch.no_grad(): - out = model(input_ids).logits.cpu() + out = model(input_ids).logits.float().cpu() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py index 11fc55f6ba..60df825c9b 100644 --- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py +++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py @@ -580,7 +580,7 @@ class Qwen2MoeIntegrationTest(unittest.TestCase): model = Qwen2MoeForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B", device_map="auto") input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device) with torch.no_grad(): - out = model(input_ids).logits.cpu() + out = model(input_ids).logits.float().cpu() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-4.2125, -3.6416, -4.9136, -4.3005, -4.9938, -3.4393, -3.5195, -4.1621]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index c88fda6fb8..e1f9bc2b8e 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -482,7 +482,7 @@ class StableLmModelIntegrationTest(unittest.TestCase): model = StableLmForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t").to(torch_device) model.eval() - output = model(**input_ids).logits + output = model(**input_ids).logits.float() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[2.7146, 2.4245, 1.5616, 1.4424, 2.6790]]).to(torch_device) @@ -515,7 +515,7 @@ class StableLmModelIntegrationTest(unittest.TestCase): model = StableLmForCausalLM.from_pretrained("stabilityai/tiny-random-stablelm-2").to(torch_device) model.eval() - output = model(**input_ids).logits + output = model(**input_ids).logits.float() # Expected mean on dim = -1 EXPECTED_MEAN = torch.tensor([[-2.7196, -3.6099, -2.6877, -3.1973, -3.9344]]).to(torch_device)