PatchtTST and PatchTSMixer fixes (#28083)
* 🐛 fix .max bug * remove prediction_length from regression output dimensions * fix parameter names, fix output names, update tests * ensure shape for PatchTST * ensure output shape for PatchTSMixer * update model, batch, and expected for regression distribution test * update test expected Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> * Update tests/models/patchtst/test_modeling_patchtst.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtst/test_modeling_patchtst.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtst/test_modeling_patchtst.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update src/transformers/models/patchtsmixer/modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * standardize on patch_length Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/patchtsmixer/test_modeling_patchtsmixer.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Make arguments more explicit Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> * adjust prepared inputs Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> --------- Signed-off-by: Wesley M. Gifford <wmgifford@us.ibm.com> Co-authored-by: Wesley M. Gifford <wmgifford@us.ibm.com> Co-authored-by: Kashif Rasul <kashif.rasul@gmail.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -191,11 +191,8 @@ class PatchTSMixerModelTester:
|
||||
# [bs x context_length x n_vars]
|
||||
past_values = floats_tensor([self.batch_size, _past_length, self.num_input_channels])
|
||||
|
||||
future_values = floats_tensor([self.batch_size, config.prediction_length, self.num_input_channels])
|
||||
|
||||
inputs_dict = {
|
||||
"past_values": past_values,
|
||||
"future_values": future_values,
|
||||
}
|
||||
return inputs_dict
|
||||
|
||||
@@ -256,21 +253,25 @@ class PatchTSMixerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
|
||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||
|
||||
# if classification model:
|
||||
if model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING):
|
||||
if model_class == PatchTSMixerForPrediction:
|
||||
rng = random.Random(self.model_tester.seed_number)
|
||||
labels = floats_tensor(
|
||||
[
|
||||
self.model_tester.batch_size,
|
||||
self.model_tester.prediction_length,
|
||||
self.model_tester.num_input_channels,
|
||||
],
|
||||
rng=rng,
|
||||
)
|
||||
inputs_dict["future_values"] = labels
|
||||
elif model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING):
|
||||
rng = random.Random(self.model_tester.seed_number)
|
||||
labels = ids_tensor([self.model_tester.batch_size], self.model_tester.num_targets, rng=rng)
|
||||
# inputs_dict["labels"] = labels
|
||||
inputs_dict["future_values"] = labels
|
||||
# inputs_dict.pop("future_values")
|
||||
inputs_dict["target_values"] = labels
|
||||
elif model_class in get_values(MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING):
|
||||
rng = random.Random(self.model_tester.seed_number)
|
||||
labels = floats_tensor([self.model_tester.batch_size, self.model_tester.num_targets], rng=rng)
|
||||
# inputs_dict["labels"] = labels
|
||||
inputs_dict["future_values"] = labels
|
||||
# inputs_dict.pop("future_values")
|
||||
elif model_class in [PatchTSMixerModel, PatchTSMixerForPretraining]:
|
||||
inputs_dict.pop("future_values")
|
||||
inputs_dict["target_values"] = labels
|
||||
|
||||
inputs_dict["output_hidden_states"] = True
|
||||
return inputs_dict
|
||||
@@ -409,28 +410,37 @@ class PatchTSMixerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
|
||||
# signature.parameters is an OrderedDict => so arg_names order is deterministic
|
||||
arg_names = [*signature.parameters.keys()]
|
||||
|
||||
expected_arg_names_with_target = [
|
||||
"past_values",
|
||||
"observed_mask",
|
||||
"future_values",
|
||||
"output_hidden_states",
|
||||
"return_loss",
|
||||
]
|
||||
expected_arg_names_without_target = [
|
||||
"past_values",
|
||||
"observed_mask",
|
||||
"output_hidden_states",
|
||||
]
|
||||
|
||||
expected_arg_names = expected_arg_names_with_target
|
||||
if model_class == PatchTSMixerForPretraining:
|
||||
expected_arg_names = expected_arg_names_without_target + ["return_loss"]
|
||||
if model_class == PatchTSMixerModel:
|
||||
expected_arg_names = expected_arg_names_without_target
|
||||
if model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING) or model_class in get_values(
|
||||
expected_arg_names = [
|
||||
"past_values",
|
||||
"observed_mask",
|
||||
"output_hidden_states",
|
||||
"return_loss",
|
||||
]
|
||||
elif model_class == PatchTSMixerModel:
|
||||
expected_arg_names = [
|
||||
"past_values",
|
||||
"observed_mask",
|
||||
"output_hidden_states",
|
||||
]
|
||||
elif model_class in get_values(MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING) or model_class in get_values(
|
||||
MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING
|
||||
):
|
||||
expected_arg_names.remove("observed_mask")
|
||||
expected_arg_names = [
|
||||
"past_values",
|
||||
"target_values",
|
||||
"output_hidden_states",
|
||||
"return_loss",
|
||||
]
|
||||
else:
|
||||
# PatchTSMixerForPrediction
|
||||
expected_arg_names = [
|
||||
"past_values",
|
||||
"observed_mask",
|
||||
"future_values",
|
||||
"output_hidden_states",
|
||||
"return_loss",
|
||||
]
|
||||
|
||||
self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
|
||||
|
||||
@@ -686,20 +696,27 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
else:
|
||||
target_output = target_input
|
||||
ref_samples = target_output.unsqueeze(1).expand(-1, config.num_parallel_samples, -1, -1)
|
||||
|
||||
ground_truth_arg = "future_values"
|
||||
output_predictions_arg = "prediction_outputs"
|
||||
elif task == "classification":
|
||||
mdl = PatchTSMixerForTimeSeriesClassification(config)
|
||||
target_input = self.__class__.correct_classification_classes
|
||||
target_output = self.__class__.correct_classification_output
|
||||
ground_truth_arg = "target_values"
|
||||
output_predictions_arg = "prediction_outputs"
|
||||
elif task == "regression":
|
||||
mdl = PatchTSMixerForRegression(config)
|
||||
target_input = self.__class__.correct_regression_output
|
||||
target_output = self.__class__.correct_regression_output
|
||||
ref_samples = target_output.unsqueeze(1).expand(-1, config.num_parallel_samples, -1)
|
||||
ground_truth_arg = "target_values"
|
||||
output_predictions_arg = "regression_outputs"
|
||||
elif task == "pretrain":
|
||||
mdl = PatchTSMixerForPretraining(config)
|
||||
target_input = None
|
||||
target_output = self.__class__.correct_pretrain_output
|
||||
ground_truth_arg = None
|
||||
output_predictions_arg = "prediction_outputs"
|
||||
else:
|
||||
print("invalid task")
|
||||
|
||||
@@ -710,15 +727,18 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
else:
|
||||
output = mdl(
|
||||
self.__class__.data,
|
||||
future_values=target_input,
|
||||
output_hidden_states=output_hidden_states,
|
||||
**{
|
||||
ground_truth_arg: target_input,
|
||||
"output_hidden_states": output_hidden_states,
|
||||
},
|
||||
)
|
||||
|
||||
if isinstance(output.prediction_outputs, tuple):
|
||||
for t in output.prediction_outputs:
|
||||
prediction_outputs = getattr(output, output_predictions_arg)
|
||||
if isinstance(prediction_outputs, tuple):
|
||||
for t in prediction_outputs:
|
||||
self.assertEqual(t.shape, target_output.shape)
|
||||
else:
|
||||
self.assertEqual(output.prediction_outputs.shape, target_output.shape)
|
||||
self.assertEqual(prediction_outputs.shape, target_output.shape)
|
||||
|
||||
self.assertEqual(output.last_hidden_state.shape, enc_output.shape)
|
||||
|
||||
@@ -980,7 +1000,7 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
mdl = PatchTSMixerForTimeSeriesClassification(config)
|
||||
output = mdl(
|
||||
self.__class__.data,
|
||||
future_values=self.__class__.correct_classification_classes,
|
||||
target_values=self.__class__.correct_classification_classes,
|
||||
)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs.shape,
|
||||
@@ -994,7 +1014,7 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
mdl = PatchTSMixerForTimeSeriesClassification(config)
|
||||
output = mdl(
|
||||
self.__class__.data,
|
||||
future_values=self.__class__.correct_classification_classes,
|
||||
target_values=self.__class__.correct_classification_classes,
|
||||
return_dict=False,
|
||||
)
|
||||
if isinstance(output, tuple):
|
||||
@@ -1017,9 +1037,9 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
def test_regression_full(self):
|
||||
config = PatchTSMixerConfig(**self.__class__.params)
|
||||
mdl = PatchTSMixerForRegression(config)
|
||||
output = mdl(self.__class__.data, future_values=self.__class__.correct_regression_output)
|
||||
output = mdl(self.__class__.data, target_values=self.__class__.correct_regression_output)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs.shape,
|
||||
output.regression_outputs.shape,
|
||||
self.__class__.correct_regression_output.shape,
|
||||
)
|
||||
self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
|
||||
@@ -1030,13 +1050,13 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
mdl = PatchTSMixerForRegression(config)
|
||||
output = mdl(
|
||||
self.__class__.data,
|
||||
future_values=self.__class__.correct_regression_output,
|
||||
target_values=self.__class__.correct_regression_output,
|
||||
return_dict=False,
|
||||
)
|
||||
if isinstance(output, tuple):
|
||||
output = PatchTSMixerForRegressionOutput(*output)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs.shape,
|
||||
output.regression_outputs.shape,
|
||||
self.__class__.correct_regression_output.shape,
|
||||
)
|
||||
self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
|
||||
@@ -1049,13 +1069,13 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
config = PatchTSMixerConfig(**params)
|
||||
|
||||
mdl = PatchTSMixerForRegression(config)
|
||||
output = mdl(self.__class__.data, future_values=self.__class__.correct_regression_output)
|
||||
output = mdl(self.__class__.data, target_values=self.__class__.correct_regression_output)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs[0].shape,
|
||||
output.regression_outputs[0].shape,
|
||||
self.__class__.correct_regression_output.shape,
|
||||
)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs[1].shape,
|
||||
output.regression_outputs[1].shape,
|
||||
self.__class__.correct_regression_output.shape,
|
||||
)
|
||||
self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
|
||||
@@ -1075,13 +1095,13 @@ class PatchTSMixerFunctionalTests(unittest.TestCase):
|
||||
config = PatchTSMixerConfig(**params)
|
||||
|
||||
mdl = PatchTSMixerForRegression(config)
|
||||
output = mdl(self.__class__.data, future_values=self.__class__.correct_regression_output)
|
||||
output = mdl(self.__class__.data, target_values=self.__class__.correct_regression_output)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs[0].shape,
|
||||
output.regression_outputs[0].shape,
|
||||
self.__class__.correct_regression_output.shape,
|
||||
)
|
||||
self.assertEqual(
|
||||
output.prediction_outputs[1].shape,
|
||||
output.regression_outputs[1].shape,
|
||||
self.__class__.correct_regression_output.shape,
|
||||
)
|
||||
self.assertEqual(output.last_hidden_state.shape, self.__class__.enc_output.shape)
|
||||
|
||||
@@ -367,19 +367,19 @@ class PatchTSTModelIntegrationTests(unittest.TestCase):
|
||||
self.assertTrue(torch.allclose(mean_prediction[0, -1:], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_regression_generation(self):
|
||||
model = PatchTSTForRegression.from_pretrained("namctin/patchtst_etth1_regression").to(torch_device)
|
||||
batch = prepare_batch(file="test-batch.pt")
|
||||
model = PatchTSTForRegression.from_pretrained("ibm/patchtst-etth1-regression-distribution").to(torch_device)
|
||||
batch = prepare_batch(repo_id="ibm/patchtst-etth1-test-data", file="regression_distribution_batch.pt")
|
||||
|
||||
torch.manual_seed(0)
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(past_values=batch["past_values"].to(torch_device))
|
||||
expected_shape = torch.Size((64, model.config.num_parallel_samples, model.config.num_targets))
|
||||
self.assertEqual(outputs.sequences.shape, expected_shape)
|
||||
|
||||
expected_slice = torch.tensor(
|
||||
[[0.3228, 0.4320, 0.4591, 0.4066, -0.3461, 0.3094, -0.8426]],
|
||||
[[-0.08046409], [-0.06570087], [-0.28218266], [-0.20636195], [-0.11787311]],
|
||||
device=torch_device,
|
||||
)
|
||||
mean_prediction = outputs.sequences.mean(dim=1)
|
||||
|
||||
self.assertTrue(torch.allclose(mean_prediction[0, -1:], expected_slice, rtol=TOLERANCE))
|
||||
self.assertTrue(torch.allclose(mean_prediction[-5:], expected_slice, rtol=TOLERANCE))
|
||||
|
||||
Reference in New Issue
Block a user