[TensorFlow] Adding GroupViT (#18020)
* chore: initial commit * chore: adding util methods yet to work on the nn.functional.interpolate port with align_corener=True * chore: refactor the utils * used tf.compat.v1.image.resize to align the F.interpolate function * added type hints to the method signatures * added references to the gists where one 2 one alignment of torch and tf has been shown * chore: adding the layers * chore: porting all the layers from torch to tf This is the initial draft, nothing is tested yet. * chore: aligning the layers with reference to tf clip * chore: aligning the modules * added demaraction comments * added copied and adapted from comments * chore: aligning with CLIP * chore: wrangling the layers to keep it tf compatible * chore: aligning the names of the layers for porting * chore: style changes * chore: adding docs and inits * chore: adding tfp dependencis the code is taken from TAPAS * chore: initial commit for testing * chore: aligning the vision embeddings with the vit implementatino * chore: changing model prefix * chore: fixing the name of the model and the layer normalization test case * chore: every test passes but the slow ones * chore: fix style and integration test * chore: moving comments below decorators * chore: make fixup and fix-copies changes * chore: adding the Vision and Text Model to check_repo * chore: modifying the prefix name to align it with the torch implementation * chore: fix typo in configuration * choer: changing the name of the model variable * chore: adding segmentation flag * chore: gante's review * chore: style refactor * chore: amy review * chore: adding shape_list to parts that have been copied from other snippets * chore: init batchnorm with torch defaults * chore: adding shape_list to pass the tests * test fix: adding seed as 0 * set seed * chore: changing the straight through trick to fix -ve dimensinos * chore: adding a dimension to the loss * chore: adding reviewers and contributors names to the docs * chore: added changes after review * chore: code quality fixup * chore: fixing the segmentation snippet * chore: adding to the layer calls * chore: changing int32 to int64 for inputs of serving * chore: review changes * chore: style changes * chore: remove from_pt=True * fix: repo consistency Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
bb6fa06f2d
commit
0dc7b3a785
@@ -17,6 +17,7 @@
|
||||
|
||||
import inspect
|
||||
import os
|
||||
import random
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
@@ -24,7 +25,7 @@ import numpy as np
|
||||
|
||||
import requests
|
||||
from transformers import GroupViTConfig, GroupViTTextConfig, GroupViTVisionConfig
|
||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import is_pt_tf_cross_test, require_torch, require_vision, slow, torch_device
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -95,7 +96,8 @@ class GroupViTVisionModelTester:
|
||||
self.seq_length = num_patches
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||
rng = random.Random(0)
|
||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size], rng=rng)
|
||||
config = self.get_config()
|
||||
|
||||
return config, pixel_values
|
||||
@@ -161,6 +163,18 @@ class GroupViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
def test_inputs_embeds(self):
|
||||
pass
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_pt_tf_model_equivalence(self):
|
||||
import tensorflow as tf
|
||||
|
||||
seed = 338
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
tf.random.set_seed(seed)
|
||||
return super().test_pt_tf_model_equivalence()
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
@@ -368,7 +382,8 @@ class GroupViTTextModelTester:
|
||||
self.scope = scope
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
rng = random.Random(0)
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size, rng=rng)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
@@ -532,6 +547,18 @@ class GroupViTModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
def test_model_common_attributes(self):
|
||||
pass
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_pt_tf_model_equivalence(self):
|
||||
import tensorflow as tf
|
||||
|
||||
seed = 163
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
tf.random.set_seed(seed)
|
||||
return super().test_pt_tf_model_equivalence()
|
||||
|
||||
# override as the `logit_scale` parameter initilization is different for GROUPVIT
|
||||
def test_initialization(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
Reference in New Issue
Block a user