[examples tests on multigpu] resolving require_torch_non_multi_gpu_but_fix_me (#10561)

* batch 1

* this is tpu

* deebert attempt

* the rest
This commit is contained in:
Stas Bekman
2021-03-08 11:11:40 -08:00
committed by GitHub
parent dfd16af832
commit f284089ec4
9 changed files with 35 additions and 62 deletions

View File

@@ -24,7 +24,7 @@ from unittest.mock import patch
import torch
from transformers.file_utils import is_apex_available
from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow, torch_device
from transformers.testing_utils import TestCasePlus, get_gpu_count, slow, torch_device
SRC_DIRS = [
@@ -82,7 +82,6 @@ def is_cuda_and_apex_available():
class ExamplesTests(TestCasePlus):
@require_torch_non_multi_gpu_but_fix_me
def test_run_glue(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -114,7 +113,6 @@ class ExamplesTests(TestCasePlus):
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.75)
@require_torch_non_multi_gpu_but_fix_me
def test_run_clm(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -147,7 +145,6 @@ class ExamplesTests(TestCasePlus):
result = get_results(tmp_dir)
self.assertLess(result["perplexity"], 100)
@require_torch_non_multi_gpu_but_fix_me
def test_run_mlm(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -174,11 +171,13 @@ class ExamplesTests(TestCasePlus):
result = get_results(tmp_dir)
self.assertLess(result["perplexity"], 42)
@require_torch_non_multi_gpu_but_fix_me
def test_run_ner(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
# with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
epochs = 7 if get_gpu_count() > 1 else 2
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
run_ner.py
@@ -193,7 +192,7 @@ class ExamplesTests(TestCasePlus):
--learning_rate=2e-4
--per_device_train_batch_size=2
--per_device_eval_batch_size=2
--num_train_epochs=2
--num_train_epochs={epochs}
""".split()
if torch_device != "cuda":
@@ -206,7 +205,6 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["eval_precision"], 0.75)
self.assertLess(result["eval_loss"], 0.5)
@require_torch_non_multi_gpu_but_fix_me
def test_run_squad(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -235,7 +233,6 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["f1"], 30)
self.assertGreaterEqual(result["exact"], 30)
@require_torch_non_multi_gpu_but_fix_me
def test_run_swag(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -262,7 +259,6 @@ class ExamplesTests(TestCasePlus):
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.8)
@require_torch_non_multi_gpu_but_fix_me
def test_generation(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -281,7 +277,6 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(len(result[0]), 10)
@slow
@require_torch_non_multi_gpu_but_fix_me
def test_run_seq2seq_summarization(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
@@ -314,7 +309,6 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["eval_rougeLsum"], 7)
@slow
@require_torch_non_multi_gpu_but_fix_me
def test_run_seq2seq_translation(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)