@@ -20,7 +20,6 @@ import os.path
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
import time
|
|
||||||
import unittest
|
import unittest
|
||||||
import unittest.mock as mock
|
import unittest.mock as mock
|
||||||
import uuid
|
import uuid
|
||||||
@@ -895,28 +894,22 @@ class ModelUtilsTest(TestCasePlus):
|
|||||||
@require_usr_bin_time
|
@require_usr_bin_time
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@mark.accelerate_tests
|
@mark.accelerate_tests
|
||||||
def test_from_pretrained_low_cpu_mem_usage_slower(self):
|
def test_from_pretrained_low_cpu_mem_usage_equal(self):
|
||||||
# Before this would test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default
|
# Before this would test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default
|
||||||
# Now though the memory is the same, we simply test that loading with `low_cpu_mem_usage` winds up being *slower*
|
# Now though these should be around the same.
|
||||||
# (mostly from extra logic needed)
|
# TODO: Look for good bounds to check that their timings are near the same
|
||||||
|
|
||||||
mname = "hf-internal-testing/tiny-random-bert"
|
mname = "hf-internal-testing/tiny-random-bert"
|
||||||
|
|
||||||
preamble = "from transformers import AutoModel"
|
preamble = "from transformers import AutoModel"
|
||||||
one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)'
|
one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)'
|
||||||
start_time = time.time()
|
|
||||||
# Save this output as `max_rss_normal` if testing memory results
|
# Save this output as `max_rss_normal` if testing memory results
|
||||||
max_rss_normal = self.python_one_liner_max_rss(one_liner_str)
|
max_rss_normal = self.python_one_liner_max_rss(one_liner_str)
|
||||||
end_time = time.time()
|
|
||||||
elapsed_time_normal = end_time - start_time
|
|
||||||
# print(f"{max_rss_normal=}")
|
# print(f"{max_rss_normal=}")
|
||||||
|
|
||||||
one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)'
|
one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)'
|
||||||
start_time = time.time()
|
|
||||||
# Save this output as `max_rss_low_mem` if testing memory results
|
# Save this output as `max_rss_low_mem` if testing memory results
|
||||||
max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str)
|
max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str)
|
||||||
end_time = time.time()
|
|
||||||
elapsed_time_low_mem = end_time - start_time
|
|
||||||
|
|
||||||
# Should be within 2MBs of each other (overhead)
|
# Should be within 2MBs of each other (overhead)
|
||||||
self.assertAlmostEqual(
|
self.assertAlmostEqual(
|
||||||
@@ -926,13 +919,6 @@ class ModelUtilsTest(TestCasePlus):
|
|||||||
msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.",
|
msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertGreater(
|
|
||||||
elapsed_time_low_mem,
|
|
||||||
elapsed_time_normal,
|
|
||||||
"using `low_cpu_mem_usage` should be slower due to extra logic, "
|
|
||||||
f"but got elapsed_time_normal={elapsed_time_normal} and elapsed_time_low_mem={elapsed_time_low_mem}",
|
|
||||||
)
|
|
||||||
|
|
||||||
# if you want to compare things manually, let's first look at the size of the model in bytes
|
# if you want to compare things manually, let's first look at the size of the model in bytes
|
||||||
# model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False)
|
# model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False)
|
||||||
# total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
|
# total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
|
||||||
|
|||||||
Reference in New Issue
Block a user