PatchtTST and PatchTSMixer fixes (#28083)

* 🐛 fix .max bug * remove prediction_length from regression output dimensions * fix parameter names, fix output names, update tests * ensure shape for PatchTST * ensure output shape for PatchTSMixer * update model, batch, and expected for regression distribution test * update test expected Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> * Update tests/models/patchtst/test_modeling_patchtst.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtst/test_modeling_patchtst.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtst/test_modeling_patchtst.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update src/transformers/models/patchtsmixer/modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * standardize on patch_length Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Make arguments more explicit Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> * adjust prepared inputs Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> --------- Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> Co-authored-by: Wesley M. Gifford <wmgifford@us.ibm.com> Co-authored-by: Kashif Rasul <kashif.rasul@gmail.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-01-29 05:09:26 -05:00
parent 3a08cc485f
commit f72c7c22d9
5 changed files with 115 additions and 89 deletions
--- a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py
+++ b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py
@@ -191,11 +191,8 @@ class PatchTSMixerModelTester:
        # [bs x context_length x n_vars]
        past_values = floats_tensor([self.batch_size, _past_length, self.num_input_channels])

-        future_values = floats_tensor([self.batch_size, config.prediction_length, self.num_input_channels])
-
        inputs_dict = {
            "past_values": past_values,
-            "future_values": future_values,
        }
        return inputs_dict

@@ -256,21 +253,25 @@ class PatchTSMixerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)

-        # if classification model:
-        if model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING):
+        if model_class == PatchTSMixerForPrediction:
+            rng = random.Random(self.model_tester.seed_number)
+            labels = floats_tensor(
+                [
+                    self.model_tester.batch_size,
+                    self.model_tester.prediction_length,
+                    self.model_tester.num_input_channels,
+                ],
+                rng=rng,
+            )
+            inputs_dict["future_values"] = labels
+        elif model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING):
            rng = random.Random(self.model_tester.seed_number)
            labels = ids_tensor([self.model_tester.batch_size], self.model_tester.num_targets, rng=rng)
-            # inputs_dict["labels"] = labels
-            inputs_dict["future_values"] = labels
-            # inputs_dict.pop("future_values")
+            inputs_dict["target_values"] = labels
        elif model_class in get_values(MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING):
            rng = random.Random(self.model_tester.seed_number)
            labels = floats_tensor([self.model_tester.batch_size, self.model_tester.num_targets], rng=rng)
-            # inputs_dict["labels"] = labels
-            inputs_dict["future_values"] = labels
-            # inputs_dict.pop("future_values")
-        elif model_class in [PatchTSMixerModel, PatchTSMixerForPretraining]:
-            inputs_dict.pop("future_values")
+            inputs_dict["target_values"] = labels

        inputs_dict["output_hidden_states"] = True
        return inputs_dict
@@ -409,28 +410,37 @@ class PatchTSMixerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

-            expected_arg_names_with_target = [
-                "past_values",
-                "observed_mask",
-                "future_values",
-                "output_hidden_states",
-                "return_loss",
-            ]
-            expected_arg_names_without_target = [
-                "past_values",
-                "observed_mask",
-                "output_hidden_states",
-            ]
-
-            expected_arg_names = expected_arg_names_with_target
            if model_class == PatchTSMixerForPretraining:
-                expected_arg_names = expected_arg_names_without_target + ["return_loss"]
-            if model_class == PatchTSMixerModel:
-                expected_arg_names = expected_arg_names_without_target
-            if model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING) or model_class in get_values(
+                expected_arg_names = [
+                    "past_values",
+                    "observed_mask",
+                    "output_hidden_states",
+                    "return_loss",
+                ]
+            elif model_class == PatchTSMixerModel:
+                expected_arg_names = [
+                    "past_values",
+                    "observed_mask",
+                    "output_hidden_states",
+                ]
+            elif model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING) or model_class in get_values(
                MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING
            ):
-                expected_arg_names.remove("observed_mask")
+                expected_arg_names = [
+                    "past_values",
+                    "target_values",
+                    "output_hidden_states",
+                    "return_loss",
+                ]
+            else:
+                # PatchTSMixerForPrediction
+                expected_arg_names = [
+                    "past_values",
+                    "observed_mask",
+                    "future_values",
+                    "output_hidden_states",
+                    "return_loss",
+                ]

            self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)

@@ -686,20 +696,27 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
            else:
                target_output = target_input
            ref_samples = target_output.unsqueeze(1).expand(-1, config.num_parallel_samples, -1, -1)
-
+            ground_truth_arg = "future_values"
+            output_predictions_arg = "prediction_outputs"
        elif task == "classification":
            mdl = PatchTSMixerForTimeSeriesClassification(config)
            target_input = self.__class__.correct_classification_classes
            target_output = self.__class__.correct_classification_output
+            ground_truth_arg = "target_values"
+            output_predictions_arg = "prediction_outputs"
        elif task == "regression":
            mdl = PatchTSMixerForRegression(config)
            target_input = self.__class__.correct_regression_output
            target_output = self.__class__.correct_regression_output
            ref_samples = target_output.unsqueeze(1).expand(-1, config.num_parallel_samples, -1)
+            ground_truth_arg = "target_values"
+            output_predictions_arg = "regression_outputs"
        elif task == "pretrain":
            mdl = PatchTSMixerForPretraining(config)
            target_input = None
            target_output = self.__class__.correct_pretrain_output
+            ground_truth_arg = None
+            output_predictions_arg = "prediction_outputs"
        else:
            print("invalid task")

@@ -710,15 +727,18 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
        else:
            output = mdl(
                self.__class__.data,
-                future_values=target_input,
-                output_hidden_states=output_hidden_states,
+                **{
+                    ground_truth_arg: target_input,
+                    "output_hidden_states": output_hidden_states,
+                },
            )

-        if isinstance(output.prediction_outputs, tuple):
-            for t in output.prediction_outputs:
+        prediction_outputs = getattr(output, output_predictions_arg)
+        if isinstance(prediction_outputs, tuple):
+            for t in prediction_outputs:
                self.assertEqual(t.shape, target_output.shape)
        else:
-            self.assertEqual(output.prediction_outputs.shape, target_output.shape)
+            self.assertEqual(prediction_outputs.shape, target_output.shape)

        self.assertEqual(output.last_hidden_state.shape, enc_output.shape)

@@ -980,7 +1000,7 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
        mdl = PatchTSMixerForTimeSeriesClassification(config)
        output = mdl(
            self.__class__.data,
-            future_values=self.__class__.correct_classification_classes,
+            target_values=self.__class__.correct_classification_classes,
        )
        self.assertEqual(
            output.prediction_outputs.shape,
@@ -994,7 +1014,7 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
        mdl = PatchTSMixerForTimeSeriesClassification(config)
        output = mdl(
            self.__class__.data,
-            future_values=self.__class__.correct_classification_classes,
+            target_values=self.__class__.correct_classification_classes,
            return_dict=False,
        )
        if isinstance(output, tuple):
@@ -1017,9 +1037,9 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
    def test_regression_full(self):
        config = PatchTSMixerConfig(**self.__class__.params)
        mdl = PatchTSMixerForRegression(config)
-        output = mdl(self.__class__.data, future_values=self.__class__.correct_regression_output)
+        output = mdl(self.__class__.data, target_values=self.__class__.correct_regression_output)
        self.assertEqual(
-            output.prediction_outputs.shape,
+            output.regression_outputs.shape,
            self.__class__.correct_regression_output.shape,
        )
        self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
@@ -1030,13 +1050,13 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
        mdl = PatchTSMixerForRegression(config)
        output = mdl(
            self.__class__.data,
-            future_values=self.__class__.correct_regression_output,
+            target_values=self.__class__.correct_regression_output,
            return_dict=False,
        )
        if isinstance(output, tuple):
            output = PatchTSMixerForRegressionOutput(*output)
        self.assertEqual(
-            output.prediction_outputs.shape,
+            output.regression_outputs.shape,
            self.__class__.correct_regression_output.shape,
        )
        self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
@@ -1049,13 +1069,13 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
        config = PatchTSMixerConfig(**params)

        mdl = PatchTSMixerForRegression(config)
-        output = mdl(self.__class__.data, future_values=self.__class__.correct_regression_output)
+        output = mdl(self.__class__.data, target_values=self.__class__.correct_regression_output)
        self.assertEqual(
-            output.prediction_outputs[0].shape,
+            output.regression_outputs[0].shape,
            self.__class__.correct_regression_output.shape,
        )
        self.assertEqual(
-            output.prediction_outputs[1].shape,
+            output.regression_outputs[1].shape,
            self.__class__.correct_regression_output.shape,
        )
        self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
@@ -1075,13 +1095,13 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
        config = PatchTSMixerConfig(**params)

        mdl = PatchTSMixerForRegression(config)
-        output = mdl(self.__class__.data, future_values=self.__class__.correct_regression_output)
+        output = mdl(self.__class__.data, target_values=self.__class__.correct_regression_output)
        self.assertEqual(
-            output.prediction_outputs[0].shape,
+            output.regression_outputs[0].shape,
            self.__class__.correct_regression_output.shape,
        )
        self.assertEqual(
-            output.prediction_outputs[1].shape,
+            output.regression_outputs[1].shape,
            self.__class__.correct_regression_output.shape,
        )
        self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
--- a/tests/models/patchtst/test_modeling_patchtst.py
+++ b/tests/models/patchtst/test_modeling_patchtst.py
@@ -367,19 +367,19 @@ class PatchTSTModelIntegrationTests(unittest.TestCase):
        self.assertTrue(torch.allclose(mean_prediction[0, -1:], expected_slice, atol=TOLERANCE))

    def test_regression_generation(self):
-        model = PatchTSTForRegression.from_pretrained("namctin/patchtst_etth1_regression").to(torch_device)
-        batch = prepare_batch(file="test-batch.pt")
+        model = PatchTSTForRegression.from_pretrained("ibm/patchtst-etth1-regression-distribution").to(torch_device)
+        batch = prepare_batch(repo_id="ibm/patchtst-etth1-test-data", file="regression_distribution_batch.pt")

        torch.manual_seed(0)
+        model.eval()
        with torch.no_grad():
            outputs = model.generate(past_values=batch["past_values"].to(torch_device))
        expected_shape = torch.Size((64, model.config.num_parallel_samples, model.config.num_targets))
        self.assertEqual(outputs.sequences.shape, expected_shape)

        expected_slice = torch.tensor(
-            [[0.3228, 0.4320, 0.4591, 0.4066, -0.3461, 0.3094, -0.8426]],
+            [[-0.08046409], [-0.06570087], [-0.28218266], [-0.20636195], [-0.11787311]],
            device=torch_device,
        )
        mean_prediction = outputs.sequences.mean(dim=1)
-
-        self.assertTrue(torch.allclose(mean_prediction[0, -1:], expected_slice, rtol=TOLERANCE))
+        self.assertTrue(torch.allclose(mean_prediction[-5:], expected_slice, rtol=TOLERANCE))