Replaces calls to .cuda with .to(torch_device) in tests (#25571)

* Replaces calls to `.cuda` with `.to(torch_device)` in tests
`torch.Tensor.cuda()` is a pre-0.4 solution to changing a tensor's device. It is recommended to prefer `.to(...)` for greater flexibility and error handling. Furthermore, this makes it more consistent with other tests (that tend to use `.to(torch_device)`) and ensures the correct device backend is used (if `torch_device` is neither `cpu` or `cuda`).

* addressing review comments

* more formatting changes in Bloom test

* `make style`

* Update tests/models/bloom/test_modeling_bloom.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* fixes style failures

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
Alex McKinney
2023-08-18 11:40:40 +01:00
committed by GitHub
parent c45aab7535
commit 9d7afd2536
4 changed files with 29 additions and 26 deletions

View File

@@ -423,7 +423,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
# >=1b1 + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
path_560m = "bigscience/bloom-560m"
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_560m)
@@ -435,7 +435,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
)
input_ids = tokenizer.encode(input_sentence, return_tensors="pt")
greedy_output = model.generate(input_ids.cuda(), max_length=50)
greedy_output = model.generate(input_ids.to(torch_device), max_length=50)
self.assertEqual(tokenizer.decode(greedy_output[0], skip_special_tokens=True), EXPECTED_OUTPUT)
@@ -443,16 +443,16 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
@require_torch_gpu
def test_batch_generation(self):
path_560m = "bigscience/bloom-560m"
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
input_sentence = ["I enjoy walking with my cute dog", "I enjoy walking with my cute dog"]
input_ids = tokenizer.batch_encode_plus(input_sentence, return_tensors="pt", padding=True)
greedy_output = model.generate(
input_ids["input_ids"].cuda(), attention_mask=input_ids["attention_mask"], max_length=50, do_sample=False
)
input_ids = input_ids["input_ids"].to(torch_device)
attention_mask = input_ids["attention_mask"]
greedy_output = model.generate(input_ids, attention_mask=attention_mask, max_length=50, do_sample=False)
self.assertEqual(
tokenizer.decode(greedy_output[0], skip_special_tokens=True),
@@ -463,7 +463,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
@require_torch_gpu
def test_batch_generation_padd(self):
path_560m = "bigscience/bloom-560m"
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
@@ -473,10 +473,11 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
input_ids = tokenizer.batch_encode_plus(input_sentence, return_tensors="pt", padding=True)
input_ids_without_pad = tokenizer.encode(input_sentence_without_pad, return_tensors="pt")
greedy_output = model.generate(
input_ids["input_ids"].cuda(), attention_mask=input_ids["attention_mask"], max_length=50, do_sample=False
input_ids, attention_mask = input_ids["input_ids"].to(torch_device), input_ids["attention_mask"]
greedy_output = model.generate(input_ids, attention_mask=attention_mask, max_length=50, do_sample=False)
greedy_output_without_pad = model.generate(
input_ids_without_pad.to(torch_device), max_length=50, do_sample=False
)
greedy_output_without_pad = model.generate(input_ids_without_pad.cuda(), max_length=50, do_sample=False)
# test token values
self.assertEqual(greedy_output[-1, 3:].tolist(), greedy_output_without_pad[0, :-3].tolist())
@@ -492,7 +493,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
def test_batch_generated_text(self):
path_560m = "bigscience/bloom-560m"
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").to(torch_device)
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
@@ -502,7 +503,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
]
inputs = tokenizer(input_sentences, return_tensors="pt", padding=True, truncation=True)
generated_ids = model.generate(
inputs["input_ids"].cuda(), attention_mask=inputs["attention_mask"], max_length=20
inputs["input_ids"].to(torch_device), attention_mask=inputs["attention_mask"], max_length=20
)
generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)