device agnostic models testing (#27146)
* device agnostic models testing * add decorator `require_torch_fp16` * make style * apply review suggestion * Oops, the fp16 decorator was misused
This commit is contained in:
@@ -22,7 +22,13 @@ import warnings
|
||||
import numpy as np
|
||||
|
||||
from transformers import is_torch_available, pipeline
|
||||
from transformers.testing_utils import require_accelerate, require_torch, require_torch_multi_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_accelerate,
|
||||
require_torch,
|
||||
require_torch_multi_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import floats_tensor, ids_tensor
|
||||
from .test_framework_agnostic import GenerationIntegrationTestsMixin
|
||||
@@ -1019,7 +1025,7 @@ class GenerationTesterMixin:
|
||||
)
|
||||
|
||||
@require_accelerate
|
||||
@require_torch_multi_gpu
|
||||
@require_torch_multi_accelerator
|
||||
def test_model_parallel_beam_search(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
if model_class._no_split_modules is None:
|
||||
|
||||
@@ -32,7 +32,13 @@ from transformers.models.bark.generation_configuration_bark import (
|
||||
BarkFineGenerationConfig,
|
||||
BarkSemanticGenerationConfig,
|
||||
)
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -570,13 +576,13 @@ class BarkSemanticModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Te
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = self.all_generative_model_classes[0](config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -636,13 +642,13 @@ class BarkCoarseModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = self.all_generative_model_classes[0](config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -700,14 +706,14 @@ class BarkFineModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
# take first codebook channel
|
||||
|
||||
model = self.all_model_classes[0](config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
|
||||
# toy generation_configs
|
||||
semantic_generation_config = BarkSemanticGenerationConfig(semantic_vocab_size=0)
|
||||
|
||||
@@ -22,7 +22,14 @@ import unittest
|
||||
import timeout_decorator # noqa
|
||||
|
||||
from transformers import BartConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -383,12 +390,12 @@ class BartHeadTests(unittest.TestCase):
|
||||
bart_toks = tokenizer.encode(ex, return_tensors="pt").squeeze()
|
||||
assert_tensors_close(desired_result.long(), bart_toks, prefix=ex)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_ids, batch_size = self._get_config_and_data()
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = BartForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -497,13 +504,13 @@ class BartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = BartForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
from transformers import BertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import CaptureLogger, require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import CaptureLogger, require_torch, require_torch_accelerator, slow, torch_device
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -601,7 +601,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_torchscript_device_change(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
|
||||
@@ -20,7 +20,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import BigBirdPegasusConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -376,13 +383,13 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_dict.pop("decoder_attention_mask")
|
||||
input_dict.pop("decoder_input_ids")
|
||||
model = BigBirdPegasusForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(**input_dict)
|
||||
model.generate(**input_dict, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -18,7 +18,15 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import BlenderbotConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -269,13 +277,13 @@ class BlenderbotModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = BlenderbotForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -315,7 +323,7 @@ class Blenderbot3BIntegrationTests(unittest.TestCase):
|
||||
FASTER_GEN_KWARGS = {"num_beams": 1, "early_stopping": True, "min_length": 15, "max_length": 25}
|
||||
TOK_DECODE_KW = {"skip_special_tokens": True, "clean_up_tokenization_spaces": True}
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
model = BlenderbotForConditionalGeneration.from_pretrained(self.ckpt).half().to(torch_device)
|
||||
|
||||
src_text = ["Sam"]
|
||||
|
||||
@@ -18,7 +18,12 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import BlenderbotSmallConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -269,13 +274,13 @@ class BlenderbotSmallModelTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = BlenderbotSmallForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -24,7 +24,14 @@ import numpy as np
|
||||
import requests
|
||||
|
||||
from transformers import BlipConfig, BlipTextConfig, BlipVisionConfig
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -1280,7 +1287,8 @@ class BlipModelIntegrationTest(unittest.TestCase):
|
||||
[30522, 1037, 3861, 1997, 1037, 2450, 1998, 2014, 3899, 2006, 1996, 3509, 102],
|
||||
)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_image_captioning_fp16(self):
|
||||
model = BlipForConditionalGeneration.from_pretrained(
|
||||
"Salesforce/blip-image-captioning-base", torch_dtype=torch.float16
|
||||
|
||||
@@ -23,7 +23,13 @@ import numpy as np
|
||||
import requests
|
||||
|
||||
from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
|
||||
from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_multi_accelerator,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -934,8 +940,8 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
|
||||
self.assertEqual(predictions[0].tolist(), [0, 2335, 1556, 28, 1782, 30, 8, 2608, 1])
|
||||
self.assertEqual(predictions[1].tolist(), [0, 2335, 1556, 28, 1782, 30, 8, 2608, 1])
|
||||
|
||||
@require_torch_multi_gpu
|
||||
def test_inference_opt_multi_gpu(self):
|
||||
@require_torch_multi_accelerator
|
||||
def test_inference_opt_multi_accelerator(self):
|
||||
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||
model = Blip2ForConditionalGeneration.from_pretrained(
|
||||
"Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16, device_map="balanced"
|
||||
@@ -966,8 +972,8 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(generated_text, "it's not a city, it's a beach")
|
||||
|
||||
@require_torch_multi_gpu
|
||||
def test_inference_t5_multi_gpu(self):
|
||||
@require_torch_multi_accelerator
|
||||
def test_inference_t5_multi_accelerator(self):
|
||||
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
|
||||
device_map = device_map = {
|
||||
"query_tokens": 0,
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
|
||||
from transformers import ConvBertConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
|
||||
@@ -427,7 +427,7 @@ class ConvBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_torchscript_device_change(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
|
||||
@@ -17,7 +17,7 @@ import gc
|
||||
import unittest
|
||||
|
||||
from transformers import CTRLConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -230,7 +230,7 @@ class CTRLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
@@ -260,7 +260,7 @@ class CTRLModelLanguageGenerationTest(unittest.TestCase):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@slow
|
||||
def test_lm_generate_ctrl(self):
|
||||
|
||||
@@ -25,7 +25,7 @@ from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import (
|
||||
require_timm,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -648,7 +648,7 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
|
||||
self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
|
||||
self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_inference_object_detection_head_equivalence_cpu_gpu(self):
|
||||
image_processor = self.default_image_processor
|
||||
image = prepare_img()
|
||||
@@ -663,10 +663,10 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
|
||||
cpu_outputs = model(pixel_values, pixel_mask)
|
||||
|
||||
# 2. run model on GPU
|
||||
model.to("cuda")
|
||||
model.to(torch_device)
|
||||
|
||||
with torch.no_grad():
|
||||
gpu_outputs = model(pixel_values.to("cuda"), pixel_mask.to("cuda"))
|
||||
gpu_outputs = model(pixel_values.to(torch_device), pixel_mask.to(torch_device))
|
||||
|
||||
# 3. assert equivalence
|
||||
for key in cpu_outputs.keys():
|
||||
|
||||
@@ -24,7 +24,8 @@ from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import (
|
||||
require_accelerate,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -424,7 +425,8 @@ class DeiTModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
@require_accelerate
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_fp16(self):
|
||||
r"""
|
||||
A small test to make sure that inference work in half precision without any problem.
|
||||
|
||||
@@ -17,7 +17,7 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import DistilBertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
|
||||
@@ -264,7 +264,7 @@ class DistilBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_torchscript_device_change(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
|
||||
from transformers import ErnieConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -574,7 +574,7 @@ class ErnieModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_torchscript_device_change(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
|
||||
@@ -17,7 +17,7 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import FlaubertConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
|
||||
@@ -463,7 +463,7 @@ class FlaubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_torchscript_device_change(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
|
||||
@@ -20,7 +20,14 @@ import timeout_decorator # noqa
|
||||
from parameterized import parameterized
|
||||
|
||||
from transformers import FSMTConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -398,12 +405,12 @@ class FSMTHeadTests(unittest.TestCase):
|
||||
self.assertEqual(n_pad_after, n_pad_before - 1)
|
||||
self.assertTrue(torch.eq(shifted[:, 0], 2).all())
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_ids, batch_size = self._get_config_and_data()
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = FSMTForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -538,8 +545,7 @@ class FSMTModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_translation_pipeline(self, pair):
|
||||
tokenizer, model, src_text, tgt_text = self.translation_setup(pair)
|
||||
device = 0 if torch_device == "cuda" else -1
|
||||
pipeline = TranslationPipeline(model, tokenizer, framework="pt", device=device)
|
||||
pipeline = TranslationPipeline(model, tokenizer, framework="pt", device=torch_device)
|
||||
output = pipeline([src_text])
|
||||
self.assertEqual([tgt_text], [x["translation_text"] for x in output])
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import unittest
|
||||
import requests
|
||||
|
||||
from transformers import AutoTokenizer, FuyuConfig, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
|
||||
|
||||
from ...test_modeling_common import ids_tensor, random_attention_mask
|
||||
|
||||
@@ -257,7 +257,7 @@ class FuyuModelTester:
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
class FuyuIntegrationTest(unittest.TestCase): # , ModelTesterMixin)
|
||||
"""
|
||||
@@ -279,7 +279,6 @@ class FuyuIntegrationTest(unittest.TestCase): # , ModelTesterMixin)
|
||||
self.bus_image_pil = Image.open(io.BytesIO(requests.get(self.bus_image_url).content))
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
def test_model_8b_chat_greedy_generation_bus_captioning(self):
|
||||
EXPECTED_TEXT_COMPLETION = """A bus parked on the side of a road.|ENDOFTEXT|"""
|
||||
text_prompt_coco_captioning = "Generate a coco-style caption.\n"
|
||||
|
||||
@@ -20,7 +20,7 @@ import math
|
||||
import unittest
|
||||
|
||||
from transformers import GPT2Config, is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -505,7 +505,7 @@ class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
@@ -712,7 +712,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def _test_lm_generate_gpt2_helper(
|
||||
self,
|
||||
|
||||
@@ -562,7 +562,8 @@ class GPTJModelLanguageGenerationTest(unittest.TestCase):
|
||||
output_seq_strs = tokenizer.batch_decode(output_seq, skip_special_tokens=True)
|
||||
output_seq_tt_strs = tokenizer.batch_decode(output_seq_tt, skip_special_tokens=True)
|
||||
|
||||
if torch_device == "cuda":
|
||||
if torch_device != "cpu":
|
||||
# currently this expect value is only for `cuda`
|
||||
EXPECTED_OUTPUT_STR = (
|
||||
"Today is a nice day and I've already been enjoying it. I walked to work with my wife"
|
||||
)
|
||||
|
||||
@@ -16,7 +16,13 @@ import unittest
|
||||
from unittest import skip
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.trainer_utils import set_seed
|
||||
|
||||
|
||||
@@ -363,7 +369,7 @@ class Jukebox5bModelTester(unittest.TestCase):
|
||||
self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@skip("Not enough GPU memory on CI runners")
|
||||
def test_slow_sampling(self):
|
||||
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
|
||||
@@ -388,7 +394,8 @@ class Jukebox5bModelTester(unittest.TestCase):
|
||||
torch.testing.assert_allclose(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0))
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_fp16_slow_sampling(self):
|
||||
prior_id = "ArthurZ/jukebox_prior_0"
|
||||
model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device)
|
||||
|
||||
@@ -21,7 +21,14 @@ import unittest
|
||||
|
||||
from transformers import LEDConfig, is_torch_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -363,13 +370,13 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = LEDForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -21,7 +21,14 @@ from parameterized import parameterized
|
||||
from pytest import mark
|
||||
|
||||
from transformers import LlamaConfig, is_torch_available, set_seed
|
||||
from transformers.testing_utils import require_flash_attn, require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -534,7 +541,7 @@ end
|
||||
""",
|
||||
]
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_model_7b_logits(self):
|
||||
model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf").to(torch_device)
|
||||
|
||||
@@ -20,7 +20,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import M2M100Config, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -312,13 +319,13 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = M2M100ForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -20,7 +20,14 @@ import unittest
|
||||
from huggingface_hub.hf_api import list_models
|
||||
|
||||
from transformers import MarianConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -281,13 +288,13 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = MarianMTModel(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -620,9 +627,9 @@ class TestMarian_en_ROMANCE(MarianIntegrationTest):
|
||||
self._assert_generated_batch_equal_expected()
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
def test_pipeline(self):
|
||||
device = 0 if torch_device == "cuda" else -1
|
||||
pipeline = TranslationPipeline(self.model, self.tokenizer, framework="pt", device=device)
|
||||
pipeline = TranslationPipeline(self.model, self.tokenizer, framework="pt", device=torch_device)
|
||||
output = pipeline(self.src_text)
|
||||
self.assertEqual(self.expected_text, [x["translation_text"] for x in output])
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@ from tests.test_modeling_common import floats_tensor
|
||||
from transformers import Mask2FormerConfig, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_torch_multi_gpu,
|
||||
require_vision,
|
||||
slow,
|
||||
@@ -427,7 +428,8 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
|
||||
).to(torch_device)
|
||||
self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_fp16(self):
|
||||
model = (
|
||||
Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints)
|
||||
|
||||
@@ -24,7 +24,8 @@ from tests.test_modeling_common import floats_tensor
|
||||
from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_torch_multi_gpu,
|
||||
require_vision,
|
||||
slow,
|
||||
@@ -516,7 +517,8 @@ class MaskFormerModelIntegrationTest(unittest.TestCase):
|
||||
).to(torch_device)
|
||||
self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_fp16(self):
|
||||
model = (
|
||||
MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
|
||||
|
||||
@@ -20,7 +20,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import MBartConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -317,13 +324,13 @@ class MBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = MBartForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -17,7 +17,13 @@
|
||||
import unittest
|
||||
|
||||
from transformers import MegaConfig, is_torch_available
|
||||
from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
TestCasePlus,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -619,12 +625,12 @@ class MegaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.check_sequence_length_beyond_max_positions(*config_and_inputs)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs_for_decoder()
|
||||
# attention_mask = torch.LongTensor(input_ids.ne(1)).to(torch_device)
|
||||
model = MegaForCausalLM(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -22,7 +22,14 @@ import unittest
|
||||
from pytest import mark
|
||||
|
||||
from transformers import AutoTokenizer, MistralConfig, is_torch_available
|
||||
from transformers.testing_utils import require_flash_attn, require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -450,7 +457,7 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
|
||||
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
gc.collect()
|
||||
|
||||
@slow
|
||||
@@ -467,5 +474,5 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
|
||||
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
gc.collect()
|
||||
|
||||
@@ -28,7 +28,13 @@ from transformers import (
|
||||
PretrainedConfig,
|
||||
T5Config,
|
||||
)
|
||||
from transformers.testing_utils import is_torch_available, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
is_torch_available,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -1082,13 +1088,13 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
||||
output_ids_generate = model.generate(do_sample=False, max_length=max_length, remove_invalid_values=True)
|
||||
self.assertIsNotNone(output_ids_generate)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
|
||||
for model_class in self.greedy_sample_model_classes:
|
||||
model = model_class(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
# greedy
|
||||
model.generate(input_dict["input_ids"], attention_mask=input_dict["attention_mask"], max_new_tokens=10)
|
||||
# sampling
|
||||
|
||||
@@ -22,7 +22,14 @@ import unittest
|
||||
import timeout_decorator # noqa
|
||||
|
||||
from transformers import MvpConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -374,12 +381,12 @@ class MvpHeadTests(unittest.TestCase):
|
||||
mvp_toks = tokenizer.encode(ex, return_tensors="pt").squeeze()
|
||||
assert_tensors_close(desired_result.long(), mvp_toks, prefix=ex)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_ids, batch_size = self._get_config_and_data()
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = MvpForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -505,13 +512,13 @@ class MvpModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = MvpForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -327,13 +328,13 @@ class NllbMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = NllbMoeForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -24,7 +24,8 @@ from tests.test_modeling_common import floats_tensor
|
||||
from transformers import OneFormerConfig, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_torch_multi_gpu,
|
||||
require_vision,
|
||||
slow,
|
||||
@@ -540,7 +541,8 @@ class OneFormerModelIntegrationTest(unittest.TestCase):
|
||||
).to(torch_device)
|
||||
self.assertTrue(torch.allclose(class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_fp16(self):
|
||||
model = (
|
||||
OneFormerForUniversalSegmentation.from_pretrained(self.model_checkpoints)
|
||||
|
||||
@@ -22,7 +22,7 @@ import unittest
|
||||
import timeout_decorator # noqa
|
||||
|
||||
from transformers import OPTConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, require_torch_fp16, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, require_torch_fp16, slow, torch_device
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -514,7 +514,8 @@ class OPTGenerationTest(unittest.TestCase):
|
||||
|
||||
self.assertListEqual(predicted_outputs, EXPECTED_OUTPUTS)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_batched_nan_fp16(self):
|
||||
# a bug manifested starting at models facebook/opt-1.3 and larger when running batched generations,
|
||||
# therefore not using a tiny model, but the smallest model the problem was seen with which is opt-1.3b.
|
||||
|
||||
@@ -24,7 +24,14 @@ import numpy as np
|
||||
import requests
|
||||
|
||||
from transformers import Owlv2Config, Owlv2TextConfig, Owlv2VisionConfig
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -869,7 +876,8 @@ class Owlv2ModelIntegrationTest(unittest.TestCase):
|
||||
self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_one_shot_object_detection_fp16(self):
|
||||
model_name = "google/owlv2-base-patch16"
|
||||
model = Owlv2ForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device)
|
||||
|
||||
@@ -24,7 +24,14 @@ import numpy as np
|
||||
import requests
|
||||
|
||||
from transformers import OwlViTConfig, OwlViTTextConfig, OwlViTVisionConfig
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -860,7 +867,8 @@ class OwlViTModelIntegrationTest(unittest.TestCase):
|
||||
self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_one_shot_object_detection_fp16(self):
|
||||
model_name = "google/owlvit-base-patch32"
|
||||
model = OwlViTForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device)
|
||||
|
||||
@@ -18,7 +18,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import PegasusConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -280,13 +287,13 @@ class PegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = PegasusForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@@ -352,6 +359,7 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest):
|
||||
return AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name).to(torch_device)
|
||||
|
||||
@slow
|
||||
@require_torch_fp16
|
||||
def test_pegasus_xsum_summary(self):
|
||||
assert self.tokenizer.model_max_length == 512
|
||||
inputs = self.tokenizer(self.src_text, return_tensors="pt", truncation=True, max_length=512, padding=True).to(
|
||||
@@ -362,9 +370,6 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest):
|
||||
decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
|
||||
assert self.tgt_text == decoded
|
||||
|
||||
if "cuda" not in torch_device:
|
||||
return
|
||||
# Demonstrate fp16 issue, Contributions welcome!
|
||||
self.model.half()
|
||||
translated_tokens_fp16 = self.model.generate(**inputs, max_length=10)
|
||||
decoded_fp16 = self.tokenizer.batch_decode(translated_tokens_fp16, skip_special_tokens=True)
|
||||
|
||||
@@ -21,7 +21,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -274,13 +281,13 @@ class PegasusXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = PegasusXForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -21,7 +21,14 @@ import unittest
|
||||
from parameterized import parameterized
|
||||
|
||||
from transformers import PersimmonConfig, is_torch_available, set_seed
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -413,12 +420,13 @@ class PersimmonIntegrationTest(unittest.TestCase):
|
||||
# fmt: on
|
||||
torch.testing.assert_close(out.cpu()[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
del model
|
||||
gc.collect()
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_model_8b_chat_greedy_generation(self):
|
||||
EXPECTED_TEXT_COMPLETION = """human: Simply put, the theory of relativity states that?\n\nadept: The theory of relativity states that the laws of physics are the same for all observers, regardless of their relative motion."""
|
||||
prompt = "human: Simply put, the theory of relativity states that?\n\nadept:"
|
||||
@@ -433,6 +441,6 @@ class PersimmonIntegrationTest(unittest.TestCase):
|
||||
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
del model
|
||||
gc.collect()
|
||||
|
||||
@@ -20,7 +20,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import PLBartConfig, is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -304,13 +311,13 @@ class PLBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = PLBartForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@ from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import (
|
||||
require_accelerate,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -318,7 +319,8 @@ class PvtModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
@require_accelerate
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_fp16(self):
|
||||
r"""
|
||||
A small test to make sure that inference work in half precision without any problem.
|
||||
|
||||
@@ -22,7 +22,7 @@ import unittest
|
||||
import requests
|
||||
|
||||
from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -478,7 +478,7 @@ class SamModelIntegrationTest(unittest.TestCase):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_inference_mask_generation_no_point(self):
|
||||
model = SamModel.from_pretrained("facebook/sam-vit-base")
|
||||
@@ -772,9 +772,7 @@ class SamModelIntegrationTest(unittest.TestCase):
|
||||
torch.testing.assert_allclose(iou_scores, EXPECTED_IOU, atol=1e-4, rtol=1e-4)
|
||||
|
||||
def test_dummy_pipeline_generation(self):
|
||||
generator = pipeline(
|
||||
"mask-generation", model="facebook/sam-vit-base", device=0 if torch.cuda.is_available() else -1
|
||||
)
|
||||
generator = pipeline("mask-generation", model="facebook/sam-vit-base", device=torch_device)
|
||||
raw_image = prepare_image()
|
||||
|
||||
_ = generator(raw_image, points_per_batch=64)
|
||||
|
||||
@@ -26,6 +26,7 @@ from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
require_torchaudio,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -336,14 +337,14 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest
|
||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||
pass
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_features = input_dict["input_features"]
|
||||
attention_mask = input_dict["attention_mask"]
|
||||
model = Speech2TextForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
input_features = input_features.half()
|
||||
model.half()
|
||||
input_features = input_features.half()
|
||||
model.half()
|
||||
model.generate(input_features, attention_mask=attention_mask)
|
||||
model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -19,7 +19,14 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import SwitchTransformersConfig, is_torch_available
|
||||
from transformers.testing_utils import require_tokenizers, require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_bf16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -1017,7 +1024,8 @@ class SwitchTransformerRouterTest(unittest.TestCase):
|
||||
@require_torch
|
||||
@require_tokenizers
|
||||
class SwitchTransformerModelIntegrationTests(unittest.TestCase):
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_bf16
|
||||
def test_small_logits(self):
|
||||
r"""
|
||||
Logits testing to check implementation consistency between `t5x` implementation
|
||||
|
||||
@@ -22,7 +22,8 @@ from transformers import ViTConfig
|
||||
from transformers.testing_utils import (
|
||||
require_accelerate,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -316,7 +317,8 @@ class ViTModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
@require_accelerate
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_fp16(self):
|
||||
r"""
|
||||
A small test to make sure that inference work in half precision without any problem.
|
||||
|
||||
@@ -29,6 +29,7 @@ from datasets import load_dataset
|
||||
from transformers import Wav2Vec2Config, is_torch_available
|
||||
from transformers.testing_utils import (
|
||||
CaptureLogger,
|
||||
backend_empty_cache,
|
||||
is_pt_flax_cross_test,
|
||||
is_pyctcdecode_available,
|
||||
is_torchaudio_available,
|
||||
@@ -1455,7 +1456,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def _load_datasamples(self, num_samples):
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
|
||||
@@ -21,7 +21,14 @@ import numpy as np
|
||||
from datasets import load_dataset
|
||||
|
||||
from transformers import Wav2Vec2ConformerConfig, is_torch_available
|
||||
from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
is_pt_flax_cross_test,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_common import (
|
||||
@@ -468,12 +475,14 @@ class Wav2Vec2ConformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model_with_adapter_proj_dim(*config_and_inputs)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_model_float16_with_relative(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative")
|
||||
self.model_tester.create_and_check_model_float16(*config_and_inputs)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_model_float16_with_rotary(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="rotary")
|
||||
self.model_tester.create_and_check_model_float16(*config_and_inputs)
|
||||
|
||||
@@ -24,7 +24,14 @@ import numpy as np
|
||||
|
||||
import transformers
|
||||
from transformers import WhisperConfig
|
||||
from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torchaudio, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
is_pt_flax_cross_test,
|
||||
require_torch,
|
||||
require_torch_fp16,
|
||||
require_torchaudio,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property, is_flax_available, is_torch_available
|
||||
from transformers.utils.import_utils import is_datasets_available
|
||||
|
||||
@@ -429,14 +436,14 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
def test_generate_with_head_masking(self):
|
||||
pass
|
||||
|
||||
@require_torch_fp16
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
config.max_target_positions = 400
|
||||
input_features = input_dict["input_features"]
|
||||
model = WhisperForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
input_features = input_features.half()
|
||||
model.half()
|
||||
input_features = input_features.half()
|
||||
model.half()
|
||||
model.generate(input_features)
|
||||
model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
@@ -19,7 +19,13 @@ import math
|
||||
import unittest
|
||||
|
||||
from transformers import XGLMConfig, is_torch_available
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -492,7 +498,8 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
|
||||
duration = datetime.datetime.now() - start
|
||||
self.assertGreater(duration, datetime.timedelta(seconds=1.25 * MAX_TIME))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_batched_nan_fp16(self):
|
||||
model_name = "facebook/xglm-564M"
|
||||
tokenizer = XGLMTokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
|
||||
|
||||
@@ -44,8 +44,8 @@ from transformers.testing_utils import (
|
||||
require_accelerate,
|
||||
require_safetensors,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_multi_accelerator,
|
||||
require_usr_bin_time,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -681,7 +681,7 @@ class ModelUtilsTest(TestCasePlus):
|
||||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_multi_gpu
|
||||
@require_torch_multi_accelerator
|
||||
@slow
|
||||
def test_model_parallelism_gpt2(self):
|
||||
device_map = {"transformer.wte": 0, "transformer.wpe": 0, "lm_head": 0, "transformer.ln_f": 1}
|
||||
@@ -699,7 +699,7 @@ class ModelUtilsTest(TestCasePlus):
|
||||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_from_pretrained_disk_offload_task_model(self):
|
||||
model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
device_map = {
|
||||
@@ -1036,7 +1036,7 @@ class ModelUtilsTest(TestCasePlus):
|
||||
opt_fn(input_ids)
|
||||
self.assertEqual(compile_counter.frame_count, 0)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_pretrained_low_mem_new_config(self):
|
||||
# Checking for 1 model(the same one which was described in the issue) .
|
||||
|
||||
Reference in New Issue
Block a user