Add DistilHuBERT (#14174)

* Add conversion * Rename * Add an integration test and remove layer_norm * Remove layer_norm from the converter * wording * Fix imports
2021-10-27 20:17:31 +03:00
parent e5b8ffb848
commit 232822f36d
4 changed files with 270 additions and 2 deletions
--- a/tests/test_modeling_hubert.py
+++ b/tests/test_modeling_hubert.py
@@ -760,3 +760,46 @@ class HubertModelIntegrationTest(unittest.TestCase):
        self.assertListEqual(predicted_ids.tolist(), expected_labels)
        # TODO: lower the tolerance after merging the padding fix https://github.com/pytorch/fairseq/pull/3572
        self.assertTrue(torch.allclose(predicted_logits, expected_logits, atol=1e-1))
+
+    def test_inference_distilhubert(self):
+        model = HubertModel.from_pretrained("anton-l/distilhubert").to(torch_device)
+        processor = Wav2Vec2FeatureExtractor.from_pretrained("anton-l/distilhubert")
+
+        # TODO: can't test on batched inputs due to incompatible padding https://github.com/pytorch/fairseq/pull/3572
+        input_speech = self._load_datasamples(1)
+
+        inputs = processor(input_speech, return_tensors="pt", padding=True)
+
+        input_values = inputs.input_values.to(torch_device)
+
+        with torch.no_grad():
+            outputs = model(input_values).last_hidden_state
+
+        # expected outputs taken from the original SEW implementation
+        expected_outputs_first = torch.tensor(
+            [
+                [
+                    [-0.3505, 0.1167, 0.0608, 0.1294],
+                    [-0.3085, 0.0481, 0.1106, 0.0955],
+                    [-0.3107, -0.0391, 0.0739, 0.1360],
+                    [-0.2385, -0.1795, -0.0928, 0.2389],
+                ]
+            ],
+            device=torch_device,
+        )
+        expected_outputs_last = torch.tensor(
+            [
+                [
+                    [-0.0732, 0.0255, 0.0529, -0.1372],
+                    [-0.0812, 0.1259, 0.0564, -0.0438],
+                    [-0.0054, 0.0758, -0.0002, -0.1617],
+                    [0.0133, -0.0320, -0.0687, 0.0062],
+                ]
+            ],
+            device=torch_device,
+        )
+        expected_output_sum = -3776.0730
+
+        self.assertTrue(torch.allclose(outputs[:, :4, :4], expected_outputs_first, atol=5e-3))
+        self.assertTrue(torch.allclose(outputs[:, -4:, -4:], expected_outputs_last, atol=5e-3))
+        self.assertTrue(abs(outputs.sum() - expected_output_sum) < 0.1)