Replaces calls to .cuda with .to(torch_device) in tests (#25571)
* Replaces calls to `.cuda` with `.to(torch_device)` in tests `torch.Tensor.cuda()` is a pre-0.4 solution to changing a tensor's device. It is recommended to prefer `.to(...)` for greater flexibility and error handling. Furthermore, this makes it more consistent with other tests (that tend to use `.to(torch_device)`) and ensures the correct device backend is used (if `torch_device` is neither `cpu` or `cuda`). * addressing review comments * more formatting changes in Bloom test * `make style` * Update tests/models/bloom/test_modeling_bloom.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * fixes style failures --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
@@ -423,7 +423,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
# >=1b1 + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
|
# >=1b1 + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
|
||||||
|
|
||||||
path_560m = "bigscience/bloom-560m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_560m)
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m)
|
||||||
|
|
||||||
@@ -435,7 +435,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
)
|
)
|
||||||
|
|
||||||
input_ids = tokenizer.encode(input_sentence, return_tensors="pt")
|
input_ids = tokenizer.encode(input_sentence, return_tensors="pt")
|
||||||
greedy_output = model.generate(input_ids.cuda(), max_length=50)
|
greedy_output = model.generate(input_ids.to(torch_device), max_length=50)
|
||||||
|
|
||||||
self.assertEqual(tokenizer.decode(greedy_output[0], skip_special_tokens=True), EXPECTED_OUTPUT)
|
self.assertEqual(tokenizer.decode(greedy_output[0], skip_special_tokens=True), EXPECTED_OUTPUT)
|
||||||
|
|
||||||
@@ -443,16 +443,16 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
def test_batch_generation(self):
|
def test_batch_generation(self):
|
||||||
path_560m = "bigscience/bloom-560m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
||||||
|
|
||||||
input_sentence = ["I enjoy walking with my cute dog", "I enjoy walking with my cute dog"]
|
input_sentence = ["I enjoy walking with my cute dog", "I enjoy walking with my cute dog"]
|
||||||
|
|
||||||
input_ids = tokenizer.batch_encode_plus(input_sentence, return_tensors="pt", padding=True)
|
input_ids = tokenizer.batch_encode_plus(input_sentence, return_tensors="pt", padding=True)
|
||||||
greedy_output = model.generate(
|
input_ids = input_ids["input_ids"].to(torch_device)
|
||||||
input_ids["input_ids"].cuda(), attention_mask=input_ids["attention_mask"], max_length=50, do_sample=False
|
attention_mask = input_ids["attention_mask"]
|
||||||
)
|
greedy_output = model.generate(input_ids, attention_mask=attention_mask, max_length=50, do_sample=False)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
tokenizer.decode(greedy_output[0], skip_special_tokens=True),
|
tokenizer.decode(greedy_output[0], skip_special_tokens=True),
|
||||||
@@ -463,7 +463,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
def test_batch_generation_padd(self):
|
def test_batch_generation_padd(self):
|
||||||
path_560m = "bigscience/bloom-560m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
||||||
|
|
||||||
@@ -473,10 +473,11 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
input_ids = tokenizer.batch_encode_plus(input_sentence, return_tensors="pt", padding=True)
|
input_ids = tokenizer.batch_encode_plus(input_sentence, return_tensors="pt", padding=True)
|
||||||
input_ids_without_pad = tokenizer.encode(input_sentence_without_pad, return_tensors="pt")
|
input_ids_without_pad = tokenizer.encode(input_sentence_without_pad, return_tensors="pt")
|
||||||
|
|
||||||
greedy_output = model.generate(
|
input_ids, attention_mask = input_ids["input_ids"].to(torch_device), input_ids["attention_mask"]
|
||||||
input_ids["input_ids"].cuda(), attention_mask=input_ids["attention_mask"], max_length=50, do_sample=False
|
greedy_output = model.generate(input_ids, attention_mask=attention_mask, max_length=50, do_sample=False)
|
||||||
|
greedy_output_without_pad = model.generate(
|
||||||
|
input_ids_without_pad.to(torch_device), max_length=50, do_sample=False
|
||||||
)
|
)
|
||||||
greedy_output_without_pad = model.generate(input_ids_without_pad.cuda(), max_length=50, do_sample=False)
|
|
||||||
|
|
||||||
# test token values
|
# test token values
|
||||||
self.assertEqual(greedy_output[-1, 3:].tolist(), greedy_output_without_pad[0, :-3].tolist())
|
self.assertEqual(greedy_output[-1, 3:].tolist(), greedy_output_without_pad[0, :-3].tolist())
|
||||||
@@ -492,7 +493,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
def test_batch_generated_text(self):
|
def test_batch_generated_text(self):
|
||||||
path_560m = "bigscience/bloom-560m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
|
|
||||||
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
||||||
|
|
||||||
@@ -502,7 +503,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
]
|
]
|
||||||
inputs = tokenizer(input_sentences, return_tensors="pt", padding=True, truncation=True)
|
inputs = tokenizer(input_sentences, return_tensors="pt", padding=True, truncation=True)
|
||||||
generated_ids = model.generate(
|
generated_ids = model.generate(
|
||||||
inputs["input_ids"].cuda(), attention_mask=inputs["attention_mask"], max_length=20
|
inputs["input_ids"].to(torch_device), attention_mask=inputs["attention_mask"], max_length=20
|
||||||
)
|
)
|
||||||
generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import unittest
|
|||||||
from unittest import skip
|
from unittest import skip
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
from transformers.testing_utils import require_torch, slow
|
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||||
from transformers.trainer_utils import set_seed
|
from transformers.trainer_utils import set_seed
|
||||||
|
|
||||||
|
|
||||||
@@ -363,35 +363,37 @@ class Jukebox5bModelTester(unittest.TestCase):
|
|||||||
self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
|
self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
|
@require_torch_gpu
|
||||||
@skip("Not enough GPU memory on CI runners")
|
@skip("Not enough GPU memory on CI runners")
|
||||||
def test_slow_sampling(self):
|
def test_slow_sampling(self):
|
||||||
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
|
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
|
||||||
labels = [i.cuda() for i in self.prepare_inputs(self.model_id)]
|
labels = [i.to(torch_device) for i in self.prepare_inputs(self.model_id)]
|
||||||
|
|
||||||
set_seed(0)
|
set_seed(0)
|
||||||
model.priors[0].cuda()
|
model.priors[0].to(torch_device)
|
||||||
zs = [torch.zeros(1, 0, dtype=torch.long).cuda() for _ in range(3)]
|
zs = [torch.zeros(1, 0, dtype=torch.long).to(torch_device) for _ in range(3)]
|
||||||
zs = model._sample(zs, labels, [0], sample_length=60 * model.priors[0].raw_to_tokens, save_results=False)
|
zs = model._sample(zs, labels, [0], sample_length=60 * model.priors[0].raw_to_tokens, save_results=False)
|
||||||
torch.testing.assert_allclose(zs[0][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_2))
|
torch.testing.assert_allclose(zs[0][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_2))
|
||||||
model.priors[0].cpu()
|
model.priors[0].cpu()
|
||||||
|
|
||||||
set_seed(0)
|
set_seed(0)
|
||||||
model.priors[1].cuda()
|
model.priors[1].to(torch_device)
|
||||||
zs = model._sample(zs, labels, [1], sample_length=60 * model.priors[1].raw_to_tokens, save_results=False)
|
zs = model._sample(zs, labels, [1], sample_length=60 * model.priors[1].raw_to_tokens, save_results=False)
|
||||||
torch.testing.assert_allclose(zs[1][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_1))
|
torch.testing.assert_allclose(zs[1][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_1))
|
||||||
model.priors[1].cpu()
|
model.priors[1].cpu()
|
||||||
|
|
||||||
set_seed(0)
|
set_seed(0)
|
||||||
model.priors[2].cuda()
|
model.priors[2].to(torch_device)
|
||||||
zs = model._sample(zs, labels, [2], sample_length=60 * model.priors[2].raw_to_tokens, save_results=False)
|
zs = model._sample(zs, labels, [2], sample_length=60 * model.priors[2].raw_to_tokens, save_results=False)
|
||||||
torch.testing.assert_allclose(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0))
|
torch.testing.assert_allclose(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0))
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
|
@require_torch_gpu
|
||||||
def test_fp16_slow_sampling(self):
|
def test_fp16_slow_sampling(self):
|
||||||
prior_id = "ArthurZ/jukebox_prior_0"
|
prior_id = "ArthurZ/jukebox_prior_0"
|
||||||
model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to("cuda")
|
model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device)
|
||||||
|
|
||||||
labels = self.prepare_inputs(prior_id)[0].cuda()
|
labels = self.prepare_inputs(prior_id)[0].to(torch_device)
|
||||||
metadata = model.get_metadata(labels, 0, 7680, 0)
|
metadata = model.get_metadata(labels, 0, 7680, 0)
|
||||||
set_seed(0)
|
set_seed(0)
|
||||||
outputs = model.sample(1, metadata=metadata, sample_tokens=60)
|
outputs = model.sample(1, metadata=metadata, sample_tokens=60)
|
||||||
|
|||||||
@@ -522,13 +522,13 @@ class OPTGenerationTest(unittest.TestCase):
|
|||||||
model_name = "facebook/opt-1.3b"
|
model_name = "facebook/opt-1.3b"
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
|
tokenizer = GPT2Tokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
|
||||||
|
|
||||||
model = OPTForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, use_cache=True).cuda()
|
model = OPTForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, use_cache=True).to(torch_device)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
|
|
||||||
batch = tokenizer(["Who are you?", "Joe Biden is the president of"], padding=True, return_tensors="pt")
|
batch = tokenizer(["Who are you?", "Joe Biden is the president of"], padding=True, return_tensors="pt")
|
||||||
|
|
||||||
input_ids = batch["input_ids"].cuda()
|
input_ids = batch["input_ids"].to(torch_device)
|
||||||
attention_mask = batch["attention_mask"].cuda()
|
attention_mask = batch["attention_mask"].to(torch_device)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
outputs = model(input_ids, attention_mask=attention_mask)
|
outputs = model(input_ids, attention_mask=attention_mask)
|
||||||
|
|||||||
@@ -497,13 +497,13 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
|
|||||||
model_name = "facebook/xglm-564M"
|
model_name = "facebook/xglm-564M"
|
||||||
tokenizer = XGLMTokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
|
tokenizer = XGLMTokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
|
||||||
|
|
||||||
model = XGLMForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, use_cache=True).cuda()
|
model = XGLMForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, use_cache=True).to(torch_device)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
|
|
||||||
batch = tokenizer(["Who are you?", "Joe Biden is the president of"], padding=True, return_tensors="pt")
|
batch = tokenizer(["Who are you?", "Joe Biden is the president of"], padding=True, return_tensors="pt")
|
||||||
|
|
||||||
input_ids = batch["input_ids"].cuda()
|
input_ids = batch["input_ids"].to(torch_device)
|
||||||
attention_mask = batch["attention_mask"].cuda()
|
attention_mask = batch["attention_mask"].to(torch_device)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
outputs = model(input_ids, attention_mask=attention_mask)
|
outputs = model(input_ids, attention_mask=attention_mask)
|
||||||
|
|||||||
Reference in New Issue
Block a user