Fixing Dataset for TQA + token-classification. (#14658)

* Fixing Dataset for TQA + token-classification. * Fixing the tests. * Making sure `offset_mappings` is a valid argument.
2021-12-08 09:54:24 +01:00
parent fae0b9faef
commit 2e12d90b9e
5 changed files with 58 additions and 20 deletions
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -183,9 +183,12 @@ class PipelineTestCaseMeta(type):

                    # 10 examples with batch size 4 means there needs to be a unfinished batch
                    # which is important for the unbatcher
-                    dataset = [copy.deepcopy(random.choice(examples)) for i in range(10)]
+                    def data(n):
+                        for _ in range(n):
+                            # Need to copy because Conversation object is mutated
+                            yield copy.deepcopy(random.choice(examples))

-                    for item in pipeline(dataset, batch_size=4):
+                    for item in pipeline(data(10), batch_size=4):
                        pass

                run_batch_test(pipeline, examples)