Fix no_trainer CI (#18242)

* Fix all tests
This commit is contained in:
Zachary Mueller
2022-07-21 14:44:57 -04:00
committed by GitHub
parent 561b9a8c00
commit 99eb9b523f
2 changed files with 46 additions and 14 deletions

View File

@@ -19,14 +19,14 @@ import json
import logging import logging
import os import os
import shutil import shutil
import subprocess
import sys import sys
import tempfile import tempfile
from unittest import mock
import torch import torch
from accelerate.utils import write_basic_config from accelerate.utils import write_basic_config
from transformers.testing_utils import TestCasePlus, get_gpu_count, slow, torch_device from transformers.testing_utils import TestCasePlus, get_gpu_count, run_command, slow, torch_device
from transformers.utils import is_apex_available from transformers.utils import is_apex_available
@@ -75,6 +75,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
def tearDownClass(cls): def tearDownClass(cls):
shutil.rmtree(cls.tmpdir) shutil.rmtree(cls.tmpdir)
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_glue_no_trainer(self): def test_run_glue_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -94,12 +95,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
if is_cuda_and_apex_available(): if is_cuda_and_apex_available():
testargs.append("--fp16") testargs.append("--fp16")
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.75) self.assertGreaterEqual(result["eval_accuracy"], 0.75)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "glue_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "glue_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_clm_no_trainer(self): def test_run_clm_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -120,12 +122,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
# Skipping because there are not enough batches to train the model + would need a drop_last to work. # Skipping because there are not enough batches to train the model + would need a drop_last to work.
return return
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertLess(result["perplexity"], 100) self.assertLess(result["perplexity"], 100)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "clm_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "clm_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_mlm_no_trainer(self): def test_run_mlm_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -139,12 +142,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking --with_tracking
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertLess(result["perplexity"], 42) self.assertLess(result["perplexity"], 42)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "mlm_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "mlm_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_ner_no_trainer(self): def test_run_ner_no_trainer(self):
# with so little data distributed training needs more epochs to get the score on par with 0/1 gpu # with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
epochs = 7 if get_gpu_count() > 1 else 2 epochs = 7 if get_gpu_count() > 1 else 2
@@ -165,13 +169,14 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking --with_tracking
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.75) self.assertGreaterEqual(result["eval_accuracy"], 0.75)
self.assertLess(result["train_loss"], 0.5) self.assertLess(result["train_loss"], 0.5)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "ner_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "ner_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_squad_no_trainer(self): def test_run_squad_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -190,7 +195,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking --with_tracking
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
# Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics. # Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
self.assertGreaterEqual(result["eval_f1"], 28) self.assertGreaterEqual(result["eval_f1"], 28)
@@ -198,6 +203,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "qa_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "qa_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_swag_no_trainer(self): def test_run_swag_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -214,12 +220,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking --with_tracking
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.8) self.assertGreaterEqual(result["eval_accuracy"], 0.8)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "swag_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "swag_no_trainer")))
@slow @slow
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_summarization_no_trainer(self): def test_run_summarization_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -237,7 +244,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking --with_tracking
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_rouge1"], 10) self.assertGreaterEqual(result["eval_rouge1"], 10)
self.assertGreaterEqual(result["eval_rouge2"], 2) self.assertGreaterEqual(result["eval_rouge2"], 2)
@@ -247,6 +254,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "summarization_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "summarization_no_trainer")))
@slow @slow
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_translation_no_trainer(self): def test_run_translation_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -268,7 +276,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking --with_tracking
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_bleu"], 30) self.assertGreaterEqual(result["eval_bleu"], 30)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
@@ -292,10 +300,11 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--checkpointing_steps epoch --checkpointing_steps epoch
""".split() """.split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_overall_accuracy"], 0.10) self.assertGreaterEqual(result["eval_overall_accuracy"], 0.10)
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_image_classification_no_trainer(self): def test_run_image_classification_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir() tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f""" testargs = f"""
@@ -316,9 +325,9 @@ class ExamplesTestsNoTrainer(TestCasePlus):
if is_cuda_and_apex_available(): if is_cuda_and_apex_available():
testargs.append("--fp16") testargs.append("--fp16")
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE) run_command(self._launch_args + testargs)
result = get_results(tmp_dir) result = get_results(tmp_dir)
# The base model scores a 25% # The base model scores a 25%
self.assertGreaterEqual(result["eval_accuracy"], 0.625) self.assertGreaterEqual(result["eval_accuracy"], 0.6)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "step_1"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "step_1")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "image_classification_no_trainer"))) self.assertTrue(os.path.exists(os.path.join(tmp_dir, "image_classification_no_trainer")))

View File

@@ -20,6 +20,7 @@ import os
import re import re
import shlex import shlex
import shutil import shutil
import subprocess
import sys import sys
import tempfile import tempfile
import unittest import unittest
@@ -27,7 +28,7 @@ from collections.abc import Mapping
from distutils.util import strtobool from distutils.util import strtobool
from io import StringIO from io import StringIO
from pathlib import Path from pathlib import Path
from typing import Iterator, Union from typing import Iterator, List, Union
from unittest import mock from unittest import mock
from transformers import logging as transformers_logging from transformers import logging as transformers_logging
@@ -1561,3 +1562,25 @@ def to_2tuple(x):
if isinstance(x, collections.abc.Iterable): if isinstance(x, collections.abc.Iterable):
return x return x
return (x, x) return (x, x)
# These utils relate to ensuring the right error message is received when running scripts
class SubprocessCallException(Exception):
pass
def run_command(command: List[str], return_stdout=False):
"""
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
if an error occured while running `command`
"""
try:
output = subprocess.check_output(command, stderr=subprocess.STDOUT)
if return_stdout:
if hasattr(output, "decode"):
output = output.decode("utf-8")
return output
except subprocess.CalledProcessError as e:
raise SubprocessCallException(
f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
) from e