[examples] unit test for run_bart_sum (#3544)
- adds pytorch-lightning dependency
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
@@ -10,6 +12,7 @@ from torch.utils.data import DataLoader
|
||||
from transformers import BartTokenizer
|
||||
|
||||
from .evaluate_cnn import run_generate
|
||||
from .run_bart_sum import main
|
||||
from .utils import SummarizationDataset
|
||||
|
||||
|
||||
@@ -17,16 +20,61 @@ logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
DEFAULT_ARGS = {
|
||||
"output_dir": "",
|
||||
"fp16": False,
|
||||
"fp16_opt_level": "O1",
|
||||
"n_gpu": 1,
|
||||
"n_tpu_cores": 0,
|
||||
"max_grad_norm": 1.0,
|
||||
"do_train": True,
|
||||
"do_predict": False,
|
||||
"gradient_accumulation_steps": 1,
|
||||
"server_ip": "",
|
||||
"server_port": "",
|
||||
"seed": 42,
|
||||
"model_type": "bart",
|
||||
"model_name_or_path": "sshleifer/bart-tiny-random",
|
||||
"config_name": "",
|
||||
"tokenizer_name": "",
|
||||
"cache_dir": "",
|
||||
"do_lower_case": False,
|
||||
"learning_rate": 3e-05,
|
||||
"weight_decay": 0.0,
|
||||
"adam_epsilon": 1e-08,
|
||||
"warmup_steps": 0,
|
||||
"num_train_epochs": 1,
|
||||
"train_batch_size": 2,
|
||||
"eval_batch_size": 2,
|
||||
"max_source_length": 12,
|
||||
"max_target_length": 12,
|
||||
}
|
||||
|
||||
|
||||
def _dump_articles(path: Path, articles: list):
|
||||
with path.open("w") as f:
|
||||
f.write("\n".join(articles))
|
||||
|
||||
|
||||
def make_test_data_dir():
|
||||
tmp_dir = Path(tempfile.gettempdir())
|
||||
articles = [" Sam ate lunch today", "Sams lunch ingredients"]
|
||||
summaries = ["A very interesting story about what I ate for lunch.", "Avocado, celery, turkey, coffee"]
|
||||
for split in ["train", "val", "test"]:
|
||||
_dump_articles((tmp_dir / f"{split}.source"), articles)
|
||||
_dump_articles((tmp_dir / f"{split}.target"), summaries)
|
||||
return tmp_dir
|
||||
|
||||
|
||||
class TestBartExamples(unittest.TestCase):
|
||||
def test_bart_cnn_cli(self):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
logger.addHandler(stream_handler)
|
||||
logging.disable(logging.CRITICAL) # remove noisy download output from tracebacks
|
||||
return cls
|
||||
|
||||
def test_bart_cnn_cli(self):
|
||||
tmp = Path(tempfile.gettempdir()) / "utest_generations_bart_sum.hypo"
|
||||
output_file_name = Path(tempfile.gettempdir()) / "utest_output_bart_sum.hypo"
|
||||
articles = [" New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County."]
|
||||
@@ -34,7 +82,19 @@ class TestBartExamples(unittest.TestCase):
|
||||
testargs = ["evaluate_cnn.py", str(tmp), str(output_file_name), "sshleifer/bart-tiny-random"]
|
||||
with patch.object(sys, "argv", testargs):
|
||||
run_generate()
|
||||
self.assertTrue(output_file_name.exists())
|
||||
self.assertTrue(Path(output_file_name).exists())
|
||||
os.remove(Path(output_file_name))
|
||||
|
||||
def test_bart_run_sum_cli(self):
|
||||
args_d: dict = DEFAULT_ARGS.copy()
|
||||
tmp_dir = make_test_data_dir()
|
||||
output_dir = tempfile.mkdtemp(prefix="output_")
|
||||
args_d.update(
|
||||
data_dir=tmp_dir, model_type="bart", train_batch_size=2, eval_batch_size=2, n_gpu=0, output_dir=output_dir,
|
||||
)
|
||||
|
||||
args = argparse.Namespace(**args_d)
|
||||
main(args)
|
||||
|
||||
def test_bart_summarization_dataset(self):
|
||||
tmp_dir = Path(tempfile.gettempdir())
|
||||
|
||||
Reference in New Issue
Block a user