Fix bad test about slower init (#32002)

Bronked main
This commit is contained in:
Zach Mueller
2024-07-16 10:33:05 -04:00
committed by GitHub
parent 25e5e3fa56
commit 693cb828ff

View File

@@ -20,7 +20,6 @@ import os.path
import sys import sys
import tempfile import tempfile
import threading import threading
import time
import unittest import unittest
import unittest.mock as mock import unittest.mock as mock
import uuid import uuid
@@ -895,28 +894,22 @@ class ModelUtilsTest(TestCasePlus):
@require_usr_bin_time @require_usr_bin_time
@require_accelerate @require_accelerate
@mark.accelerate_tests @mark.accelerate_tests
def test_from_pretrained_low_cpu_mem_usage_slower(self): def test_from_pretrained_low_cpu_mem_usage_equal(self):
# Before this would test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default # Before this would test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default
# Now though the memory is the same, we simply test that loading with `low_cpu_mem_usage` winds up being *slower* # Now though these should be around the same.
# (mostly from extra logic needed) # TODO: Look for good bounds to check that their timings are near the same
mname = "hf-internal-testing/tiny-random-bert" mname = "hf-internal-testing/tiny-random-bert"
preamble = "from transformers import AutoModel" preamble = "from transformers import AutoModel"
one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)' one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)'
start_time = time.time()
# Save this output as `max_rss_normal` if testing memory results # Save this output as `max_rss_normal` if testing memory results
max_rss_normal = self.python_one_liner_max_rss(one_liner_str) max_rss_normal = self.python_one_liner_max_rss(one_liner_str)
end_time = time.time()
elapsed_time_normal = end_time - start_time
# print(f"{max_rss_normal=}") # print(f"{max_rss_normal=}")
one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)' one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)'
start_time = time.time()
# Save this output as `max_rss_low_mem` if testing memory results # Save this output as `max_rss_low_mem` if testing memory results
max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str) max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str)
end_time = time.time()
elapsed_time_low_mem = end_time - start_time
# Should be within 2MBs of each other (overhead) # Should be within 2MBs of each other (overhead)
self.assertAlmostEqual( self.assertAlmostEqual(
@@ -926,13 +919,6 @@ class ModelUtilsTest(TestCasePlus):
msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.", msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.",
) )
self.assertGreater(
elapsed_time_low_mem,
elapsed_time_normal,
"using `low_cpu_mem_usage` should be slower due to extra logic, "
f"but got elapsed_time_normal={elapsed_time_normal} and elapsed_time_low_mem={elapsed_time_low_mem}",
)
# if you want to compare things manually, let's first look at the size of the model in bytes # if you want to compare things manually, let's first look at the size of the model in bytes
# model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False) # model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False)
# total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) # total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())