[Speech Examples] Add pytorch speech pretraining (#13877)

* adapt wav2vec2 * add example * add files * adapt * remove bogus file * Apply suggestions from code review * adapt files more * upload changes * del old files * up * up * up * up * up * correct gradient checkpoitning * add readme * finish * finish * up * more fixes * up * up * add demo run to readme * up
2021-10-12 00:46:32 +02:00
parent 3499728dc4
commit d45fc7da3d
9 changed files with 1196 additions and 183 deletions
--- a/tests/test_modeling_hubert.py
+++ b/tests/test_modeling_hubert.py
@@ -586,7 +586,8 @@ class HubertUtilsTest(unittest.TestCase):
        mask_prob = 0.5
        mask_length = 1

-        mask = _compute_mask_indices((batch_size, sequence_length), mask_prob, mask_length, torch_device)
+        mask = _compute_mask_indices((batch_size, sequence_length), mask_prob, mask_length)
+        mask = torch.from_numpy(mask).to(torch_device)

        self.assertListEqual(mask.sum(axis=-1).tolist(), [mask_prob * sequence_length for _ in range(batch_size)])

@@ -596,7 +597,8 @@ class HubertUtilsTest(unittest.TestCase):
        mask_prob = 0.5
        mask_length = 4

-        mask = _compute_mask_indices((batch_size, sequence_length), mask_prob, mask_length, torch_device)
+        mask = _compute_mask_indices((batch_size, sequence_length), mask_prob, mask_length)
+        mask = torch.from_numpy(mask).to(torch_device)

        # because of overlap mask don't have to add up exactly to `mask_prob * sequence_length`, but have to be smaller or equal
        for batch_sum in mask.sum(axis=-1):