From f5eec0d8e96ed55e5d12d7c63213667284f0e12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= Date: Tue, 8 Jun 2021 05:58:38 -0700 Subject: [PATCH] Replace legacy tensor.Tensor with torch.tensor/torch.empty (#12027) * Replace legacy torch.Tensor constructor with torch.{tensor, empty} * Remove torch.Tensor in examples --- .../research_projects/lxmert/modeling_frcnn.py | 2 +- examples/research_projects/lxmert/utils.py | 2 +- .../emmental/modules/masked_nn.py | 2 +- .../rag-end2end-retriever/finetune_rag.py | 2 +- examples/research_projects/rag/finetune_rag.py | 2 +- .../models/convbert/modeling_convbert.py | 4 ++-- tests/test_activations.py | 2 +- tests/test_modeling_bart.py | 16 +++++++++------- tests/test_modeling_clip.py | 2 +- tests/test_modeling_fsmt.py | 4 ++-- tests/test_modeling_ibert.py | 2 +- tests/test_modeling_mbart.py | 6 ++++-- tests/test_modeling_roberta.py | 2 +- 13 files changed, 26 insertions(+), 22 deletions(-) diff --git a/examples/research_projects/lxmert/modeling_frcnn.py b/examples/research_projects/lxmert/modeling_frcnn.py index a86f68801e..9489f4c11d 100644 --- a/examples/research_projects/lxmert/modeling_frcnn.py +++ b/examples/research_projects/lxmert/modeling_frcnn.py @@ -1426,7 +1426,7 @@ class AnchorGenerator(nn.Module): h = aspect_ratio * w x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 anchors.append([x0, y0, x1, y1]) - return nn.Parameter(torch.Tensor(anchors)) + return nn.Parameter(torch.tensor(anchors)) def forward(self, features): """ diff --git a/examples/research_projects/lxmert/utils.py b/examples/research_projects/lxmert/utils.py index 1faf9feffa..59ae11d025 100644 --- a/examples/research_projects/lxmert/utils.py +++ b/examples/research_projects/lxmert/utils.py @@ -532,7 +532,7 @@ def load_frcnn_pkl_from_url(url): for k, v in model.items(): new[k] = torch.from_numpy(v) if "running_var" in k: - zero = torch.Tensor([0]) + zero = torch.tensor([0]) k2 = k.replace("running_var", "num_batches_tracked") new[k2] = zero return new diff --git a/examples/research_projects/movement-pruning/emmental/modules/masked_nn.py b/examples/research_projects/movement-pruning/emmental/modules/masked_nn.py index 298c7e5e51..72fa629aff 100644 --- a/examples/research_projects/movement-pruning/emmental/modules/masked_nn.py +++ b/examples/research_projects/movement-pruning/emmental/modules/masked_nn.py @@ -72,7 +72,7 @@ class MaskedLinear(nn.Linear): if self.pruning_method in ["topK", "threshold", "sigmoied_threshold", "l0"]: self.mask_scale = mask_scale self.mask_init = mask_init - self.mask_scores = nn.Parameter(torch.Tensor(self.weight.size())) + self.mask_scores = nn.Parameter(torch.empty(self.weight.size())) self.init_mask() def init_mask(self): diff --git a/examples/research_projects/rag-end2end-retriever/finetune_rag.py b/examples/research_projects/rag-end2end-retriever/finetune_rag.py index 507cece7f4..96cbc0f7c5 100644 --- a/examples/research_projects/rag-end2end-retriever/finetune_rag.py +++ b/examples/research_projects/rag-end2end-retriever/finetune_rag.py @@ -223,7 +223,7 @@ class GenerativeQAModule(BaseTransformer): decoder_start_token_id = generator.config.decoder_start_token_id decoder_input_ids = ( torch.cat( - [torch.Tensor([[decoder_start_token_id]] * target_ids.shape[0]).to(target_ids), target_ids], + [torch.tensor([[decoder_start_token_id]] * target_ids.shape[0]).to(target_ids), target_ids], dim=1, ) if target_ids.shape[0] < self.target_lens["train"] diff --git a/examples/research_projects/rag/finetune_rag.py b/examples/research_projects/rag/finetune_rag.py index e048153c98..b5ccaa228c 100644 --- a/examples/research_projects/rag/finetune_rag.py +++ b/examples/research_projects/rag/finetune_rag.py @@ -222,7 +222,7 @@ class GenerativeQAModule(BaseTransformer): decoder_start_token_id = generator.config.decoder_start_token_id decoder_input_ids = ( torch.cat( - [torch.Tensor([[decoder_start_token_id]] * target_ids.shape[0]).to(target_ids), target_ids], + [torch.tensor([[decoder_start_token_id]] * target_ids.shape[0]).to(target_ids), target_ids], dim=1, ) if target_ids.shape[0] < self.target_lens["train"] diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index a034cadae2..d3d8085d3f 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -473,8 +473,8 @@ class GroupedLinearLayer(nn.Module): self.num_groups = num_groups self.group_in_dim = self.input_size // self.num_groups self.group_out_dim = self.output_size // self.num_groups - self.weight = nn.Parameter(torch.Tensor(self.num_groups, self.group_in_dim, self.group_out_dim)) - self.bias = nn.Parameter(torch.Tensor(output_size)) + self.weight = nn.Parameter(torch.empty(self.num_groups, self.group_in_dim, self.group_out_dim)) + self.bias = nn.Parameter(torch.empty(output_size)) def forward(self, hidden_states): batch_size = list(hidden_states.size())[0] diff --git a/tests/test_activations.py b/tests/test_activations.py index 362595f632..fe15caf819 100644 --- a/tests/test_activations.py +++ b/tests/test_activations.py @@ -27,7 +27,7 @@ if is_torch_available(): @require_torch class TestActivations(unittest.TestCase): def test_gelu_versions(self): - x = torch.Tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100]) + x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100]) torch_builtin = get_activation("gelu") self.assertTrue(torch.eq(_gelu_python(x), torch_builtin(x)).all().item()) self.assertFalse(torch.eq(_gelu_python(x), gelu_new(x)).all().item()) diff --git a/tests/test_modeling_bart.py b/tests/test_modeling_bart.py index b8847efdc9..20f33f0dda 100644 --- a/tests/test_modeling_bart.py +++ b/tests/test_modeling_bart.py @@ -308,14 +308,16 @@ class BartHeadTests(unittest.TestCase): max_position_embeddings=48, ) lm_model = BartForConditionalGeneration(config).to(torch_device) - context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device) - summary = torch.Tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]]).long().to(torch_device) + context = torch.tensor( + [[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]], device=torch_device, dtype=torch.long + ) + summary = torch.tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]], device=torch_device, dtype=torch.long) outputs = lm_model(input_ids=context, decoder_input_ids=summary, labels=summary) expected_shape = (*summary.shape, config.vocab_size) self.assertEqual(outputs["logits"].shape, expected_shape) def test_generate_beam_search(self): - input_ids = torch.Tensor([[71, 82, 2], [68, 34, 2]]).long().to(torch_device) + input_ids = torch.tensor([[71, 82, 2], [68, 34, 2]], device=torch_device, dtype=torch.long) config = BartConfig( vocab_size=self.vocab_size, d_model=24, @@ -345,7 +347,7 @@ class BartHeadTests(unittest.TestCase): self.assertEqual(generated_ids.shape, (input_ids.shape[0], max_length)) def test_shift_tokens_right(self): - input_ids = torch.Tensor([[71, 82, 18, 33, 2, 1, 1], [68, 34, 26, 58, 30, 82, 2]]).long() + input_ids = torch.tensor([[71, 82, 18, 33, 2, 1, 1], [68, 34, 26, 58, 30, 82, 2]], dtype=torch.long) shifted = shift_tokens_right(input_ids, 1, 2) n_pad_before = input_ids.eq(1).float().sum() n_pad_after = shifted.eq(1).float().sum() @@ -358,8 +360,8 @@ class BartHeadTests(unittest.TestCase): tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") examples = [" Hello world", " DomDramg"] # need leading spaces for equality fairseq_results = [ - torch.Tensor([0, 20920, 232, 2]), - torch.Tensor([0, 11349, 495, 4040, 571, 2]), + torch.tensor([0, 20920, 232, 2]), + torch.tensor([0, 11349, 495, 4040, 571, 2]), ] for ex, desired_result in zip(examples, fairseq_results): bart_toks = tokenizer.encode(ex, return_tensors="pt").squeeze() @@ -614,7 +616,7 @@ class BartModelIntegrationTests(unittest.TestCase): batched_logits = outputs.logits expected_shape = torch.Size((2, 3)) self.assertEqual(batched_logits.shape, expected_shape) - expected_slice = torch.Tensor([[0.1907, 1.4342, -1.0289]]).to(torch_device) + expected_slice = torch.tensor([[0.1907, 1.4342, -1.0289]], device=torch_device) logits_arr = batched_logits[0].detach() # Test that padding does not change results diff --git a/tests/test_modeling_clip.py b/tests/test_modeling_clip.py index c5ab9416d1..8dc0ab214c 100644 --- a/tests/test_modeling_clip.py +++ b/tests/test_modeling_clip.py @@ -556,6 +556,6 @@ class CLIPModelIntegrationTest(unittest.TestCase): torch.Size((inputs.input_ids.shape[0], inputs.pixel_values.shape[0])), ) - expected_logits = torch.Tensor([[24.5056, 18.8076]]).to(torch_device) + expected_logits = torch.tensor([[24.5056, 18.8076]], device=torch_device) self.assertTrue(torch.allclose(outputs.logits_per_image, expected_logits, atol=1e-3)) diff --git a/tests/test_modeling_fsmt.py b/tests/test_modeling_fsmt.py index 4942fe7317..3c01360d0c 100644 --- a/tests/test_modeling_fsmt.py +++ b/tests/test_modeling_fsmt.py @@ -305,7 +305,7 @@ class FSMTHeadTests(unittest.TestCase): return config, input_ids, batch_size def test_generate_beam_search(self): - input_ids = torch.Tensor([[71, 82, 2], [68, 34, 2]]).long().to(torch_device) + input_ids = torch.tensor([[71, 82, 2], [68, 34, 2]], dtype=torch.long, device=torch_device) config = self._get_config() lm_model = FSMTForConditionalGeneration(config).to(torch_device) lm_model.eval() @@ -322,7 +322,7 @@ class FSMTHeadTests(unittest.TestCase): self.assertEqual(new_input_ids.shape, (input_ids.shape[0], max_length)) def test_shift_tokens_right(self): - input_ids = torch.Tensor([[71, 82, 18, 33, 2, 1, 1], [68, 34, 26, 58, 30, 82, 2]]).long() + input_ids = torch.tensor([[71, 82, 18, 33, 2, 1, 1], [68, 34, 26, 58, 30, 82, 2]], dtype=torch.long) shifted = shift_tokens_right(input_ids, 1) n_pad_before = input_ids.eq(1).float().sum() n_pad_after = shifted.eq(1).float().sum() diff --git a/tests/test_modeling_ibert.py b/tests/test_modeling_ibert.py index 7b0d7dbe37..8ef878b902 100755 --- a/tests/test_modeling_ibert.py +++ b/tests/test_modeling_ibert.py @@ -285,7 +285,7 @@ class IBertModelTest(ModelTesterMixin, unittest.TestCase): config = self.model_tester.prepare_config_and_inputs()[0] embeddings = IBertEmbeddings(config=config) - inputs_embeds = torch.Tensor(2, 4, 30) + inputs_embeds = torch.empty(2, 4, 30) expected_single_positions = [ 0 + embeddings.padding_idx + 1, 1 + embeddings.padding_idx + 1, diff --git a/tests/test_modeling_mbart.py b/tests/test_modeling_mbart.py index e5baa4f30a..40fc6fbcd8 100644 --- a/tests/test_modeling_mbart.py +++ b/tests/test_modeling_mbart.py @@ -396,8 +396,10 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest): add_final_layer_norm=True, ) lm_model = MBartForConditionalGeneration(config).to(torch_device) - context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device) - summary = torch.Tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]]).long().to(torch_device) + context = torch.tensor( + [[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]], device=torch_device, dtype=torch.long + ) + summary = torch.tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]], device=torch_device, dtype=torch.long) result = lm_model(input_ids=context, decoder_input_ids=summary, labels=summary) expected_shape = (*summary.shape, config.vocab_size) self.assertEqual(result.logits.shape, expected_shape) diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py index a6acdfe7b9..168e5073d7 100644 --- a/tests/test_modeling_roberta.py +++ b/tests/test_modeling_roberta.py @@ -461,7 +461,7 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCas config = self.model_tester.prepare_config_and_inputs()[0] embeddings = RobertaEmbeddings(config=config) - inputs_embeds = torch.Tensor(2, 4, 30) + inputs_embeds = torch.empty(2, 4, 30) expected_single_positions = [ 0 + embeddings.padding_idx + 1, 1 + embeddings.padding_idx + 1,