Llama et al. / FSDP : Fix breaking change in 4.40 for FSDP (#31161)
* fix llama fsdp * fixup * adding FSDP tests for CPU offloading * fixes * fix tests * fix tests * add it for mixtral * propagate the changes on other models * Update src/transformers/models/phi/modeling_phi.py * Delete utils/testing_scripts/fsdp_cpu_offloading.py Remove script - FSDP + CPU offloading it tested in the test suite * Delete utils/testing_scripts/dummy_fsdp_config.yml * Update + add cache_positions docstring --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
|
||||
import itertools
|
||||
import os
|
||||
import subprocess
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
@@ -31,6 +32,7 @@ from transformers.testing_utils import (
|
||||
require_accelerate,
|
||||
require_fsdp,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -276,6 +278,20 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
|
||||
if "learning_rate" in log:
|
||||
self.assertAlmostEqual(log["learning_rate"], log1["learning_rate"], delta=1e-5)
|
||||
|
||||
@require_torch_multi_accelerator
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_fsdp
|
||||
def test_fsdp_cpu_offloading(self):
|
||||
try:
|
||||
subprocess.run(
|
||||
"accelerate launch utils/testing_scripts/fsdp_cpu_offloading.py --config utils/testing_scripts/dummy_fsdp_config.yml",
|
||||
shell=True,
|
||||
check=True,
|
||||
)
|
||||
except: # noqa
|
||||
raise AssertionError("CPU offloading failed with FSDP!")
|
||||
|
||||
def run_cmd_and_get_logs(self, use_accelerate, sharding_strategy, launcher, script, args, output_dir):
|
||||
if not use_accelerate:
|
||||
fsdp_args = [
|
||||
|
||||
Reference in New Issue
Block a user