Improve LayoutLM (#9476)
* Add LayoutLMForSequenceClassification and integration tests Improve docs Add LayoutLM notebook to list of community notebooks * Make style & quality * Address comments by @sgugger, @patrickvonplaten and @LysandreJik * Fix rebase with master * Reformat in one line * Improve code examples as requested by @patrickvonplaten Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
@@ -17,19 +17,26 @@
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
from transformers import LayoutLMConfig, LayoutLMForMaskedLM, LayoutLMForTokenClassification, LayoutLMModel
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
LayoutLMConfig,
|
||||
LayoutLMForMaskedLM,
|
||||
LayoutLMForSequenceClassification,
|
||||
LayoutLMForTokenClassification,
|
||||
LayoutLMModel,
|
||||
)
|
||||
|
||||
|
||||
class LayoutLMModelTester:
|
||||
"""You can also import this e.g from .test_modeling_bart import BartModelTester """
|
||||
"""You can also import this e.g from .test_modeling_layoutlm import LayoutLMModelTester """
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -150,6 +157,18 @@ class LayoutLMModelTester:
|
||||
result = model(input_ids, bbox, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def create_and_check_for_sequence_classification(
|
||||
self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = LayoutLMForSequenceClassification(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
result = model(
|
||||
input_ids, bbox, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
|
||||
)
|
||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
|
||||
|
||||
def create_and_check_for_token_classification(
|
||||
self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
@@ -185,7 +204,14 @@ class LayoutLMModelTester:
|
||||
class LayoutLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (
|
||||
(LayoutLMModel, LayoutLMForMaskedLM, LayoutLMForTokenClassification) if is_torch_available() else ()
|
||||
(
|
||||
LayoutLMModel,
|
||||
LayoutLMForMaskedLM,
|
||||
LayoutLMForSequenceClassification,
|
||||
LayoutLMForTokenClassification,
|
||||
)
|
||||
if is_torch_available()
|
||||
else None
|
||||
)
|
||||
|
||||
def setUp(self):
|
||||
@@ -209,36 +235,101 @@ class LayoutLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
||||
|
||||
def test_for_sequence_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
|
||||
|
||||
def test_for_token_classification(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
|
||||
|
||||
@cached_property
|
||||
def big_model(self):
|
||||
"""Cached property means this code will only be executed once."""
|
||||
checkpoint_path = "microsoft/layoutlm-large-uncased"
|
||||
model = LayoutLMForMaskedLM.from_pretrained(checkpoint_path).to(
|
||||
torch_device
|
||||
) # test whether AutoModel can determine your model_class from checkpoint name
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
|
||||
# optional: do more testing! This will save you time later!
|
||||
def prepare_layoutlm_batch_inputs():
|
||||
# Here we prepare a batch of 2 sequences to test a LayoutLM forward pass on:
|
||||
# fmt: off
|
||||
input_ids = torch.tensor([[-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-16.2628059,-10004.082,15.4330549,15.4330549,15.4330549,-9990.42,-16.3270779,-16.3270779,-16.3270779,-16.3270779,-16.3270779,-10004.8506]],device=torch_device) # noqa: E231
|
||||
attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],],device=torch_device) # noqa: E231
|
||||
bbox = torch.tensor([[[0,0,0,0],[423,237,440,251],[427,272,441,287],[419,115,437,129],[961,885,992,912],[256,38,330,58],[256,38,330,58],[336,42,353,57],[360,39,401,56],[360,39,401,56],[411,39,471,59],[479,41,528,59],[533,39,630,60],[67,113,134,131],[141,115,209,132],[68,149,133,166],[141,149,187,164],[195,148,287,165],[195,148,287,165],[195,148,287,165],[295,148,349,165],[441,149,492,166],[497,149,546,164],[64,201,125,218],[1000,1000,1000,1000]],[[0,0,0,0],[662,150,754,166],[665,199,742,211],[519,213,554,228],[519,213,554,228],[134,433,187,454],[130,467,204,480],[130,467,204,480],[130,467,204,480],[130,467,204,480],[130,467,204,480],[314,469,376,482],[504,684,582,706],[941,825,973,900],[941,825,973,900],[941,825,973,900],[941,825,973,900],[610,749,652,765],[130,659,168,672],[176,657,237,672],[238,657,312,672],[443,653,628,672],[443,653,628,672],[716,301,825,317],[1000,1000,1000,1000]]],device=torch_device) # noqa: E231
|
||||
token_type_ids = torch.tensor([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]],device=torch_device) # noqa: E231
|
||||
# these are sequence labels (i.e. at the token level)
|
||||
labels = torch.tensor([[-100,10,10,10,9,1,-100,7,7,-100,7,7,4,2,5,2,8,8,-100,-100,5,0,3,2,-100],[-100,12,12,12,-100,12,10,-100,-100,-100,-100,10,12,9,-100,-100,-100,10,10,10,9,12,-100,10,-100]],device=torch_device) # noqa: E231
|
||||
# fmt: on
|
||||
|
||||
return input_ids, attention_mask, bbox, token_type_ids, labels
|
||||
|
||||
|
||||
@require_torch
|
||||
class LayoutLMModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_that_LayoutLM_can_be_used_in_a_pipeline(self):
|
||||
"""We can use self.big_model here without calling __init__ again."""
|
||||
pass
|
||||
def test_forward_pass_no_head(self):
|
||||
model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased").to(torch_device)
|
||||
|
||||
def test_LayoutLM_loss_doesnt_change_if_you_add_padding(self):
|
||||
pass
|
||||
input_ids, attention_mask, bbox, token_type_ids, labels = prepare_layoutlm_batch_inputs()
|
||||
|
||||
def test_LayoutLM_bad_args(self):
|
||||
pass
|
||||
# forward pass
|
||||
outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)
|
||||
|
||||
def test_LayoutLM_backward_pass_reduces_loss(self):
|
||||
"""Test loss/gradients same as reference implementation, for example."""
|
||||
pass
|
||||
# test the sequence output on [0, :3, :3]
|
||||
expected_slice = torch.tensor(
|
||||
[[0.1785, -0.1947, -0.0425], [-0.3254, -0.2807, 0.2553], [-0.5391, -0.3322, 0.3364]],
|
||||
device=torch_device,
|
||||
)
|
||||
|
||||
@require_torch_gpu
|
||||
def test_large_inputs_in_fp16_dont_cause_overflow(self):
|
||||
pass
|
||||
self.assertTrue(torch.allclose(outputs.last_hidden_state[0, :3, :3], expected_slice, atol=1e-3))
|
||||
|
||||
# test the pooled output on [1, :3]
|
||||
expected_slice = torch.tensor([-0.6580, -0.0214, 0.8552], device=torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs.pooler_output[1, :3], expected_slice, atol=1e-3))
|
||||
|
||||
@slow
|
||||
def test_forward_pass_sequence_classification(self):
|
||||
# initialize model with randomly initialized sequence classification head
|
||||
model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=2).to(
|
||||
torch_device
|
||||
)
|
||||
|
||||
input_ids, attention_mask, bbox, token_type_ids, _ = prepare_layoutlm_batch_inputs()
|
||||
|
||||
# forward pass
|
||||
outputs = model(
|
||||
input_ids=input_ids,
|
||||
bbox=bbox,
|
||||
attention_mask=attention_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
labels=torch.tensor([1, 1], device=torch_device),
|
||||
)
|
||||
|
||||
# test whether we get a loss as a scalar
|
||||
loss = outputs.loss
|
||||
expected_shape = torch.Size([])
|
||||
self.assertEqual(loss.shape, expected_shape)
|
||||
|
||||
# test the shape of the logits
|
||||
logits = outputs.logits
|
||||
expected_shape = torch.Size((2, 2))
|
||||
self.assertEqual(logits.shape, expected_shape)
|
||||
|
||||
@slow
|
||||
def test_forward_pass_token_classification(self):
|
||||
# initialize model with randomly initialized token classification head
|
||||
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=13).to(
|
||||
torch_device
|
||||
)
|
||||
|
||||
input_ids, attention_mask, bbox, token_type_ids, labels = prepare_layoutlm_batch_inputs()
|
||||
|
||||
# forward pass
|
||||
outputs = model(
|
||||
input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels
|
||||
)
|
||||
|
||||
# test the loss calculation to be around 2.65
|
||||
expected_loss = torch.tensor(2.65, device=torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs.loss, expected_loss, atol=0.1))
|
||||
|
||||
# test the shape of the logits
|
||||
logits = outputs.logits
|
||||
expected_shape = torch.Size((2, 25, 13))
|
||||
self.assertEqual(logits.shape, expected_shape)
|
||||
|
||||
Reference in New Issue
Block a user