From 821d518e035211eb982aab73e7eb293cd2f05bbf Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Mon, 8 Mar 2021 16:04:30 -0500 Subject: [PATCH] Revert "Tests" This reverts commit b35e7b68caade1df761454501bbd7248c64b6bc9. --- src/transformers/trainer.py | 8 +++----- src/transformers/trainer_pt_utils.py | 16 ---------------- tests/test_trainer.py | 14 -------------- tests/test_trainer_utils.py | 24 ------------------------ 4 files changed, 3 insertions(+), 59 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index aaf9c1e627..0fa496dcc7 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -80,7 +80,6 @@ from .trainer_pt_utils import ( SequentialDistributedSampler, distributed_broadcast_scalars, distributed_concat, - get_parameter_names, nested_concat, nested_detach, nested_numpify, @@ -614,15 +613,14 @@ class Trainer: Trainer's init through :obj:`optimizers`, or subclass and override this method in a subclass. """ if self.optimizer is None: - decay_parameters = get_parameter_names(self.model, [torch.nn.LayerNorm]) - decay_parameters = [name for name in decay_parameters if "bias" not in name] + no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { - "params": [p for n, p in self.model.named_parameters() if n in decay_parameters], + "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": self.args.weight_decay, }, { - "params": [p for n, p in self.model.named_parameters() if n not in decay_parameters], + "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index ae8e249490..ed92222612 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -672,19 +672,3 @@ def save_state(self): path = os.path.join(self.args.output_dir, "trainer_state.json") self.state.save_to_json(path) - - -def get_parameter_names(model, forbidden_layer_types): - """ - Returns the names of the model parameters that are not inside a forbidden layer. - """ - result = [] - for name, child in model.named_children(): - result += [ - f"{name}.{n}" - for n in get_parameter_names(child, forbidden_layer_types) - if not isinstance(child, tuple(forbidden_layer_types)) - ] - # Add model specific parameters (defined with nn.Parameter) since they are not in any child. - result += list(model._parameters.keys()) - return result diff --git a/tests/test_trainer.py b/tests/test_trainer.py index f29a8a60fc..09801dd6aa 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -59,8 +59,6 @@ if is_torch_available(): ) from transformers.modeling_utils import unwrap_model - from .test_trainer_utils import TstLayer - PATH_SAMPLE_TEXT = f"{get_tests_dir()}/fixtures/sample_text.txt" @@ -992,18 +990,6 @@ class TrainerIntegrationTest(unittest.TestCase): # should be about half of fp16_init # perfect world: fp32_init/2 == fp16_eval self.assertAlmostEqual(fp16_eval, fp32_init / 2, delta=5_000) - - def test_no_wd_param_group(self): - model = torch.nn.Sequential(TstLayer(128), torch.nn.ModuleList([TstLayer(128), TstLayer(128)])) - trainer = Trainer(model=model) - trainer.create_optimizer_and_scheduler(10) - # fmt: off - wd_names = ['0.linear1.weight', '0.linear2.weight', '1.0.linear1.weight', '1.0.linear2.weight', '1.1.linear1.weight', '1.1.linear2.weight'] - # fmt: on - wd_params = [p for n, p in model.named_parameters() if n in wd_names] - no_wd_params = [p for n, p in model.named_parameters() if n not in wd_names] - self.assertListEqual(trainer.optimizer.param_groups[0]["params"], wd_params) - self.assertListEqual(trainer.optimizer.param_groups[1]["params"], no_wd_params) @require_torch diff --git a/tests/test_trainer_utils.py b/tests/test_trainer_utils.py index 2d9d1d688f..19dfa9b1d1 100644 --- a/tests/test_trainer_utils.py +++ b/tests/test_trainer_utils.py @@ -30,23 +30,8 @@ if is_torch_available(): DistributedTensorGatherer, LabelSmoother, LengthGroupedSampler, - get_parameter_names ) - class TstLayer(torch.nn.Module): - def __init__(self, hidden_size): - super().__init__() - self.linear1 = torch.nn.Linear(hidden_size, hidden_size) - self.ln1 = torch.nn.LayerNorm(hidden_size) - self.linear2 = torch.nn.Linear(hidden_size, hidden_size) - self.ln2 = torch.nn.LayerNorm(hidden_size) - self.bias = torch.nn.Parameter(torch.zeros(hidden_size)) - - def forward(self, x): - h = self.ln1(torch.nn.functional.relu(self.linear1(x))) - h = torch.nn.functional.relu(self.linear2(x)) - return self.ln2(x + h + self.bias) - @require_torch class TrainerUtilsTest(unittest.TestCase): @@ -132,12 +117,3 @@ class TrainerUtilsTest(unittest.TestCase): self.assertEqual(lengths[indices_process_0[0]], 50) # The indices should be a permutation of range(100) self.assertEqual(list(sorted(indices_process_0 + indices_process_1)), list(range(100))) - - def test_get_parameter_names(self): - model = torch.nn.Sequential(TstLayer(128), torch.nn.ModuleList([TstLayer(128), TstLayer(128)])) - # fmt: off - self.assertEqual( - get_parameter_names(model, [torch.nn.LayerNorm]), - ['0.linear1.weight', '0.linear1.bias', '0.linear2.weight', '0.linear2.bias', '0.bias', '1.0.linear1.weight', '1.0.linear1.bias', '1.0.linear2.weight', '1.0.linear2.bias', '1.0.bias', '1.1.linear1.weight', '1.1.linear1.bias', '1.1.linear2.weight', '1.1.linear2.bias', '1.1.bias'] - ) - # fmt: on