Llama et al. / FSDP : Fix breaking change in 4.40 for FSDP (#31161)

* fix llama fsdp * fixup * adding FSDP tests for CPU offloading * fixes * fix tests * fix tests * add it for mixtral * propagate the changes on other models * Update src/transformers/models/phi/modeling_phi.py * Delete utils/testing_scripts/fsdp_cpu_offloading.py Remove script - FSDP + CPU offloading it tested in the test suite * Delete utils/testing_scripts/dummy_fsdp_config.yml * Update + add cache_positions docstring --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-06-26 15:50:08 +02:00
parent ac52084bf2
commit 3f93fd0694
13 changed files with 71 additions and 1 deletions
--- a/tests/fsdp/test_fsdp.py
+++ b/tests/fsdp/test_fsdp.py
@@ -14,6 +14,7 @@

 import itertools
 import os
+import subprocess
 import unittest
 from copy import deepcopy
 from functools import partial
@@ -31,6 +32,7 @@ from transformers.testing_utils import (
    require_accelerate,
    require_fsdp,
    require_torch_accelerator,
+    require_torch_gpu,
    require_torch_multi_accelerator,
    slow,
    torch_device,
@@ -276,6 +278,20 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
            if "learning_rate" in log:
                self.assertAlmostEqual(log["learning_rate"], log1["learning_rate"], delta=1e-5)

+    @require_torch_multi_accelerator
+    @slow
+    @require_torch_gpu
+    @require_fsdp
+    def test_fsdp_cpu_offloading(self):
+        try:
+            subprocess.run(
+                "accelerate launch utils/testing_scripts/fsdp_cpu_offloading.py --config utils/testing_scripts/dummy_fsdp_config.yml",
+                shell=True,
+                check=True,
+            )
+        except:  # noqa
+            raise AssertionError("CPU offloading failed with FSDP!")
+
    def run_cmd_and_get_logs(self, use_accelerate, sharding_strategy, launcher, script, args, output_dir):
        if not use_accelerate:
            fsdp_args = [