[tests] enable bnb tests on xpu (#36233)

* fix failed test * fix device * fix more device cases * add more cases * fix empty cache * Update test_4bit.py --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
2025-02-24 18:30:15 +08:00
parent 92c5ca9dd7
commit 4dbf17c17f
4 changed files with 27 additions and 21 deletions
--- a/tests/models/falcon/test_modeling_falcon.py
+++ b/tests/models/falcon/test_modeling_falcon.py
@@ -591,12 +591,12 @@ class FalconLanguageGenerationTest(unittest.TestCase):

        test_text = "A sequence: 1, 2"  # should generate the rest of the sequence

-        unpadded_inputs = tokenizer([test_text], return_tensors="pt").to("cuda:0")
+        unpadded_inputs = tokenizer([test_text], return_tensors="pt").to(f"{torch_device}:0")
        unpadded_gen_out = model.generate(**unpadded_inputs, max_new_tokens=20)
        unpadded_gen_text = tokenizer.batch_decode(unpadded_gen_out, skip_special_tokens=True)

        dummy_text = "This is a longer text " * 2  # forces left-padding on `test_text`
-        padded_inputs = tokenizer([test_text, dummy_text], return_tensors="pt", padding=True).to("cuda:0")
+        padded_inputs = tokenizer([test_text, dummy_text], return_tensors="pt", padding=True).to(f"{torch_device}:0")
        padded_gen_out = model.generate(**padded_inputs, max_new_tokens=20)
        padded_gen_text = tokenizer.batch_decode(padded_gen_out, skip_special_tokens=True)