CI: AMD MI300 tests fix (#30797)

* add fix

* update import

* updated dicts and comments

* remove prints

* Update testing_utils.py
This commit is contained in:
Mohit Sharma
2024-05-21 17:16:07 +05:30
committed by GitHub
parent a755745546
commit 7a4792e6b3
10 changed files with 126 additions and 10 deletions

View File

@@ -2551,7 +2551,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
)
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
training_args = TrainingArguments(output_dir="./examples", use_cpu=True)
training_args = TrainingArguments(output_dir="./examples", use_cpu=True, report_to="none")
trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset)
result = trainer.evaluate()
self.assertLess(result["eval_loss"], 0.2)
@@ -2572,6 +2572,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir="./examples",
use_cpu=True,
per_device_eval_batch_size=1,
report_to="none",
)
trainer = Trainer(
model=model,
@@ -3107,6 +3108,8 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
"--predict_with_generate",
"--ddp_timeout",
"60",
"--report_to",
"none",
]
execute_subprocess_async(command)
# successful return here == success - any errors would have caused an error or a timeout in the sub-call