Fix beam search when using model parallel (#24969)

* Fix GPTNeoX beam search when using parallelize * Fix beam search idx device when using model parallel * remove onnx related stuff Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * fix: move test_beam_search_on_multi_gpu to GenerationTesterMixin * fix: add right item to _no_split_modules of MegaPreTrainedModel * fix: add num_beams within parallelized beam_search test Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2023-09-15 00:00:52 +09:00
parent 0dd06c3f78
commit 8881f38a4f
53 changed files with 191 additions and 95 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -2482,34 +2482,6 @@ class ModelTesterMixin:
                    for value_, parallel_value_ in zip(value, parallel_value):
                        self.assertTrue(torch.allclose(value_, parallel_value_.to("cpu"), atol=1e-7))

-    @require_torch_multi_gpu
-    def test_model_parallel_beam_search(self):
-        if not self.test_model_parallel:
-            return
-
-        all_generative_and_parallelizable_model_classes = tuple(
-            set(self.all_generative_model_classes).intersection(self.all_parallelizable_model_classes)
-        )
-
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
-        for model_class in all_generative_and_parallelizable_model_classes:
-            inputs_dict = self._prepare_for_class(inputs_dict, model_class)
-            model = model_class(config)
-
-            def cast_to_device(dictionary, device):
-                output = {}
-                for k, v in dictionary.items():
-                    if isinstance(v, torch.Tensor):
-                        output[k] = v.to(device)
-                    else:
-                        output[k] = v
-
-                return output
-
-            model.parallelize()
-            model.generate(**cast_to_device(inputs_dict, "cuda:0"), num_beams=2)
-
    def check_device_map_is_respected(self, model, device_map):
        for param_name, param in model.named_parameters():
            # Find device in device_map