Fix model equivalence tests (#15670)

* Fix model equivalence tests * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2022-02-15 18:55:22 -05:00
parent 1690319217
commit 943e2aa036
3 changed files with 27 additions and 31 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -1475,17 +1475,17 @@ class ModelTesterMixin:
                if type(tensor) == bool:
                    tf_inputs_dict[key] = tensor
                elif key == "input_values":
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
                elif key == "pixel_values":
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
                elif key == "input_features":
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
                else:
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.int32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)

            # Check we can load pt model in tf and vice-versa with model => model functions
            tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
-            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)
+            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device)

            # need to rename encoder-decoder "inputs" for PyTorch
            #            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
@@ -1496,7 +1496,7 @@ class ModelTesterMixin:
            tfo = tf_model(tf_inputs_dict, training=False)

            tf_hidden_states = tfo[0].numpy()
-            pt_hidden_states = pto[0].numpy()
+            pt_hidden_states = pto[0].cpu().numpy()

            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))
@@ -1518,6 +1518,7 @@ class ModelTesterMixin:
                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
+                pt_model = pt_model.to(torch_device)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
@@ -1528,13 +1529,13 @@ class ModelTesterMixin:
                    tensor = np.array(tensor, dtype=bool)
                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32)
                elif key == "input_values":
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
                elif key == "pixel_values":
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
                elif key == "input_features":
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
                else:
-                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.int32)
+                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)

            # need to rename encoder-decoder "inputs" for PyTorch
            #            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
@@ -1545,7 +1546,7 @@ class ModelTesterMixin:

            tfo = tf_model(tf_inputs_dict)
            tfo = tfo[0].numpy()
-            pto = pto[0].numpy()
+            pto = pto[0].cpu().numpy()
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))