switch to device agnostic device calling for test cases (#38247)
* use device agnostic APIs in test cases Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * add one more Signed-off-by: YAO Matrix <matrix.yao@intel.com> * xpu now supports integer device id, aligning to CUDA behaviors Signed-off-by: Matrix Yao <matrix.yao@intel.com> * update to use device_properties Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * update comment Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix comments Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> --------- Signed-off-by: Matrix Yao <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -390,7 +390,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
def test_simple_generation(self):
|
||||
# This test is a bit flaky. For some GPU architectures, pytorch sets by default allow_fp16_reduced_precision_reduction = True and some operations
|
||||
# do not give the same results under this configuration, especially torch.baddmm and torch.bmm. https://pytorch.org/docs/stable/notes/numerical_accuracy.html#fp16-on-mi200
|
||||
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction , the tests failed when running in half-precision with smaller models (560m)
|
||||
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction, the tests failed when running in half-precision with smaller models (560m)
|
||||
# Please see: https://pytorch.org/docs/stable/notes/cuda.html#reduced-precision-reduction-in-fp16-gemms
|
||||
# This discrepancy is observed only when using small models and seems to be stable for larger models.
|
||||
# Our conclusion is that these operations are flaky for small inputs but seems to be stable for larger inputs (for the functions `baddmm` and `bmm`), and therefore for larger models.
|
||||
@@ -763,7 +763,6 @@ class BloomEmbeddingTest(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
def test_hidden_states_transformers(self):
|
||||
cuda_available = torch.cuda.is_available()
|
||||
model = BloomModel.from_pretrained(self.path_bigscience_model, use_cache=False, torch_dtype="auto").to(
|
||||
torch_device
|
||||
)
|
||||
@@ -782,7 +781,7 @@ class BloomEmbeddingTest(unittest.TestCase):
|
||||
"max": logits.last_hidden_state.max(dim=-1).values[0][0].item(),
|
||||
}
|
||||
|
||||
if cuda_available:
|
||||
if torch_device == "cuda":
|
||||
self.assertAlmostEqual(MEAN_VALUE_LAST_LM, logits.last_hidden_state.mean().item(), places=4)
|
||||
else:
|
||||
self.assertAlmostEqual(MEAN_VALUE_LAST_LM, logits.last_hidden_state.mean().item(), places=3)
|
||||
@@ -791,7 +790,6 @@ class BloomEmbeddingTest(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
def test_logits(self):
|
||||
cuda_available = torch.cuda.is_available()
|
||||
model = BloomForCausalLM.from_pretrained(self.path_bigscience_model, use_cache=False, torch_dtype="auto").to(
|
||||
torch_device
|
||||
) # load in bf16
|
||||
@@ -807,9 +805,5 @@ class BloomEmbeddingTest(unittest.TestCase):
|
||||
output = model(tensor_ids).logits
|
||||
|
||||
output_gpu_1, output_gpu_2 = output.split(125440, dim=-1)
|
||||
if cuda_available:
|
||||
self.assertAlmostEqual(output_gpu_1.mean().item(), MEAN_LOGITS_GPU_1, places=6)
|
||||
self.assertAlmostEqual(output_gpu_2.mean().item(), MEAN_LOGITS_GPU_2, places=6)
|
||||
else:
|
||||
self.assertAlmostEqual(output_gpu_1.mean().item(), MEAN_LOGITS_GPU_1, places=6) # 1e-06 precision!!
|
||||
self.assertAlmostEqual(output_gpu_2.mean().item(), MEAN_LOGITS_GPU_2, places=6)
|
||||
self.assertAlmostEqual(output_gpu_1.mean().item(), MEAN_LOGITS_GPU_1, places=6)
|
||||
self.assertAlmostEqual(output_gpu_2.mean().item(), MEAN_LOGITS_GPU_2, places=6)
|
||||
|
||||
Reference in New Issue
Block a user