From e3d99ec2f58e0e2a4df6b2b41152fdfb3f92a52f Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 19 Feb 2025 15:14:02 +0000 Subject: [PATCH] [tests] make `test_from_pretrained_low_cpu_mem_usage_equal` less flaky (#36255) Co-authored-by: ydshieh --- tests/utils/test_modeling_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 9bd34168d6..66210cae80 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -993,6 +993,7 @@ class ModelUtilsTest(TestCasePlus): for mname in mnames: _ = BertModel.from_pretrained(mname, low_cpu_mem_usage=True) + @slow @require_usr_bin_time @require_accelerate @mark.accelerate_tests @@ -1001,30 +1002,29 @@ class ModelUtilsTest(TestCasePlus): # Now though these should be around the same. # TODO: Look for good bounds to check that their timings are near the same - mname = "hf-internal-testing/tiny-random-bert" + mname = "HuggingFaceTB/SmolLM-135M" preamble = "from transformers import AutoModel" one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=False)' # Save this output as `max_rss_normal` if testing memory results max_rss_normal = self.python_one_liner_max_rss(one_liner_str) - # print(f"{max_rss_normal=}") one_liner_str = f'{preamble}; AutoModel.from_pretrained("{mname}", low_cpu_mem_usage=True)' # Save this output as `max_rss_low_mem` if testing memory results max_rss_low_mem = self.python_one_liner_max_rss(one_liner_str) - # Should be within 2MBs of each other (overhead) + # Should be within 5MBs of each other (overhead) self.assertAlmostEqual( max_rss_normal / 1024 / 1024, max_rss_low_mem / 1024 / 1024, - delta=2, + delta=5, msg="using `low_cpu_mem_usage` should incur the same memory usage in both cases.", ) # if you want to compare things manually, let's first look at the size of the model in bytes - # model = BertModel.from_pretrained(mname, low_cpu_mem_usage=False) + # model = AutoModel.from_pretrained(mname, low_cpu_mem_usage=False) # total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) - # total_bytes = total_numel * 4 # 420MB + # total_bytes = total_numel * 4 # Now the diff_bytes should be very close to total_bytes, but the reports are inconsistent. # The easiest way to test this is to switch the model and torch.load to do all the work on # gpu - that way one can measure exactly the total and peak memory used. Perhaps once we add