From 70e7d1d65627d64ecf71afce88cec3de83eb456a Mon Sep 17 00:00:00 2001
From: Mikkel Denker <Mikkeldenker@gmail.com>
Date: Wed, 27 Jul 2022 12:38:40 +0200
Subject: [PATCH] Fixes torch jit tracing for LayoutLMv2 model (re-open)
 (#18313)

* Fixes torch jit tracing for LayoutLMv2 model.
Pytorch seems to reuse memory for input_shape which caused a mismatch in shapes later in the forward pass.

* Fixed code quality

* avoid unneeded allocation of vector for shape
---
 .../models/layoutlmv2/modeling_layoutlmv2.py  | 23 +++++++++++--------
 .../layoutlmv2/test_modeling_layoutlmv2.py    |  2 +-
 tests/test_modeling_common.py                 |  7 ++++++
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
index 382a7b305b..be31af99d6 100755
--- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
@@ -805,6 +805,16 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
 
         return visual_bbox
 
+    def _get_input_shape(self, input_ids=None, inputs_embeds=None):
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            return input_ids.size()
+        elif inputs_embeds is not None:
+            return inputs_embeds.size()[:-1]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+
     @add_start_docstrings_to_model_forward(LAYOUTLMV2_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     @replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
@@ -857,21 +867,14 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
         )
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
 
-        if input_ids is not None and inputs_embeds is not None:
-            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
-        elif input_ids is not None:
-            input_shape = input_ids.size()
-        elif inputs_embeds is not None:
-            input_shape = inputs_embeds.size()[:-1]
-        else:
-            raise ValueError("You have to specify either input_ids or inputs_embeds")
-
+        input_shape = self._get_input_shape(input_ids, inputs_embeds)
         device = input_ids.device if input_ids is not None else inputs_embeds.device
 
         visual_shape = list(input_shape)
         visual_shape[1] = self.config.image_feature_pool_shape[0] * self.config.image_feature_pool_shape[1]
         visual_shape = torch.Size(visual_shape)
-        final_shape = list(input_shape)
+        # needs a new copy of input_shape for tracing. Otherwise wrong dimensions will occur
+        final_shape = list(self._get_input_shape(input_ids, inputs_embeds))
         final_shape[1] += visual_shape[1]
         final_shape = torch.Size(final_shape)
 
diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
index 35eef14d2b..3c38373163 100644
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -260,7 +260,7 @@ class LayoutLMv2ModelTester:
 class LayoutLMv2ModelTest(ModelTesterMixin, unittest.TestCase):
 
     test_pruning = False
-    test_torchscript = False
+    test_torchscript = True
     test_mismatched_shapes = False
 
     all_model_classes = (
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index 00f2c7cb5b..31dc5f0bdb 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -648,6 +648,13 @@ class ModelTesterMixin:
                     traced_model = torch.jit.trace(
                         model, (main_input, attention_mask, decoder_input_ids, decoder_attention_mask)
                     )
+                elif "bbox" in inputs and "image" in inputs:  # LayoutLMv2 requires additional inputs
+                    input_ids = inputs["input_ids"]
+                    bbox = inputs["bbox"]
+                    image = inputs["image"].tensor
+                    traced_model = torch.jit.trace(
+                        model, (input_ids, bbox, image), check_trace=False
+                    )  # when traced model is checked, an error is produced due to name mangling
                 else:
                     main_input = inputs[main_input_name]
                     traced_model = torch.jit.trace(model, main_input)