[DETR and friends] Use AutoBackbone as alternative to timm (#20833)

* First draft

* More improvements

* Add conversion script

* More improvements

* Add docs

* Address review

* Rename class to ConvEncoder

* Address review

* Apply suggestion

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update all DETR friends

* Add corresponding test

* Improve test

* Fix bug

* Add more tests

* Set out_features to last stage by default

Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Co-authored-by: Niels Rogge <nielsrogge@Nielss-MBP.localdomain>
Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
NielsRogge
2023-01-23 12:15:47 +01:00
committed by GitHub
parent c8d719ff7e
commit 91ff7efeeb
14 changed files with 731 additions and 113 deletions

View File

@@ -31,7 +31,7 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_
if is_timm_available():
import torch
from transformers import TableTransformerForObjectDetection, TableTransformerModel
from transformers import ResNetConfig, TableTransformerForObjectDetection, TableTransformerModel
if is_vision_available():
@@ -153,6 +153,25 @@ class TableTransformerModelTester:
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_queries, self.num_labels + 1))
self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.num_queries, 4))
def create_and_check_table_transformer_no_timm_backbone(self, config, pixel_values, pixel_mask, labels):
config.use_timm_backbone = False
config.backbone_config = ResNetConfig()
model = TableTransformerForObjectDetection(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
result = model(pixel_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_queries, self.num_labels + 1))
self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.num_queries, 4))
result = model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)
self.parent.assertEqual(result.loss.shape, ())
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_queries, self.num_labels + 1))
self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.num_queries, 4))
@require_timm
class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
@@ -212,6 +231,10 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, unittes
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_table_transformer_object_detection_head_model(*config_and_inputs)
def test_table_transformer_no_timm_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_table_transformer_no_timm_backbone(*config_and_inputs)
@unittest.skip(reason="Table Transformer does not use inputs_embeds")
def test_inputs_embeds(self):
pass