Improve LayoutLM (#9476)

* Add LayoutLMForSequenceClassification and integration tests

Improve docs

Add LayoutLM notebook to list of community notebooks

* Make style & quality

* Address comments by @sgugger, @patrickvonplaten and @LysandreJik

* Fix rebase with master

* Reformat in one line

* Improve code examples as requested by @patrickvonplaten

Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
NielsRogge
2021-01-12 15:26:32 +01:00
committed by GitHub
parent ccd1923f46
commit e45eba3b1c
8 changed files with 438 additions and 105 deletions

View File

@@ -17,19 +17,26 @@
import unittest
from transformers import is_torch_available
from transformers.file_utils import cached_property
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
from transformers.testing_utils import require_torch, slow, torch_device
from .test_configuration_common import ConfigTester
from .test_modeling_common import ModelTesterMixin, ids_tensor
if is_torch_available():
from transformers import LayoutLMConfig, LayoutLMForMaskedLM, LayoutLMForTokenClassification, LayoutLMModel
import torch
from transformers import (
LayoutLMConfig,
LayoutLMForMaskedLM,
LayoutLMForSequenceClassification,
LayoutLMForTokenClassification,
LayoutLMModel,
)
class LayoutLMModelTester:
"""You can also import this e.g from .test_modeling_bart import BartModelTester """
"""You can also import this e.g from .test_modeling_layoutlm import LayoutLMModelTester """
def __init__(
self,
@@ -150,6 +157,18 @@ class LayoutLMModelTester:
result = model(input_ids, bbox, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_for_sequence_classification(
self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = LayoutLMForSequenceClassification(config)
model.to(torch_device)
model.eval()
result = model(
input_ids, bbox, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def create_and_check_for_token_classification(
self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
@@ -185,7 +204,14 @@ class LayoutLMModelTester:
class LayoutLMModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(LayoutLMModel, LayoutLMForMaskedLM, LayoutLMForTokenClassification) if is_torch_available() else ()
(
LayoutLMModel,
LayoutLMForMaskedLM,
LayoutLMForSequenceClassification,
LayoutLMForTokenClassification,
)
if is_torch_available()
else None
)
def setUp(self):
@@ -209,36 +235,101 @@ class LayoutLMModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
@cached_property
def big_model(self):
"""Cached property means this code will only be executed once."""
checkpoint_path = "microsoft/layoutlm-large-uncased"
model = LayoutLMForMaskedLM.from_pretrained(checkpoint_path).to(
torch_device
) # test whether AutoModel can determine your model_class from checkpoint name
if torch_device == "cuda":
model.half()
# optional: do more testing! This will save you time later!
def prepare_layoutlm_batch_inputs():
# Here we prepare a batch of 2 sequences to test a LayoutLM forward pass on:
# fmt: off
input_ids = torch.tensor([[-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-9997.22461,-16.2628059,-10004.082,15.4330549,15.4330549,15.4330549,-9990.42,-16.3270779,-16.3270779,-16.3270779,-16.3270779,-16.3270779,-10004.8506]],device=torch_device) # noqa: E231
attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],],device=torch_device) # noqa: E231
bbox = torch.tensor([[[0,0,0,0],[423,237,440,251],[427,272,441,287],[419,115,437,129],[961,885,992,912],[256,38,330,58],[256,38,330,58],[336,42,353,57],[360,39,401,56],[360,39,401,56],[411,39,471,59],[479,41,528,59],[533,39,630,60],[67,113,134,131],[141,115,209,132],[68,149,133,166],[141,149,187,164],[195,148,287,165],[195,148,287,165],[195,148,287,165],[295,148,349,165],[441,149,492,166],[497,149,546,164],[64,201,125,218],[1000,1000,1000,1000]],[[0,0,0,0],[662,150,754,166],[665,199,742,211],[519,213,554,228],[519,213,554,228],[134,433,187,454],[130,467,204,480],[130,467,204,480],[130,467,204,480],[130,467,204,480],[130,467,204,480],[314,469,376,482],[504,684,582,706],[941,825,973,900],[941,825,973,900],[941,825,973,900],[941,825,973,900],[610,749,652,765],[130,659,168,672],[176,657,237,672],[238,657,312,672],[443,653,628,672],[443,653,628,672],[716,301,825,317],[1000,1000,1000,1000]]],device=torch_device) # noqa: E231
token_type_ids = torch.tensor([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]],device=torch_device) # noqa: E231
# these are sequence labels (i.e. at the token level)
labels = torch.tensor([[-100,10,10,10,9,1,-100,7,7,-100,7,7,4,2,5,2,8,8,-100,-100,5,0,3,2,-100],[-100,12,12,12,-100,12,10,-100,-100,-100,-100,10,12,9,-100,-100,-100,10,10,10,9,12,-100,10,-100]],device=torch_device) # noqa: E231
# fmt: on
return input_ids, attention_mask, bbox, token_type_ids, labels
@require_torch
class LayoutLMModelIntegrationTest(unittest.TestCase):
@slow
def test_that_LayoutLM_can_be_used_in_a_pipeline(self):
"""We can use self.big_model here without calling __init__ again."""
pass
def test_forward_pass_no_head(self):
model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased").to(torch_device)
def test_LayoutLM_loss_doesnt_change_if_you_add_padding(self):
pass
input_ids, attention_mask, bbox, token_type_ids, labels = prepare_layoutlm_batch_inputs()
def test_LayoutLM_bad_args(self):
pass
# forward pass
outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)
def test_LayoutLM_backward_pass_reduces_loss(self):
"""Test loss/gradients same as reference implementation, for example."""
pass
# test the sequence output on [0, :3, :3]
expected_slice = torch.tensor(
[[0.1785, -0.1947, -0.0425], [-0.3254, -0.2807, 0.2553], [-0.5391, -0.3322, 0.3364]],
device=torch_device,
)
@require_torch_gpu
def test_large_inputs_in_fp16_dont_cause_overflow(self):
pass
self.assertTrue(torch.allclose(outputs.last_hidden_state[0, :3, :3], expected_slice, atol=1e-3))
# test the pooled output on [1, :3]
expected_slice = torch.tensor([-0.6580, -0.0214, 0.8552], device=torch_device)
self.assertTrue(torch.allclose(outputs.pooler_output[1, :3], expected_slice, atol=1e-3))
@slow
def test_forward_pass_sequence_classification(self):
# initialize model with randomly initialized sequence classification head
model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=2).to(
torch_device
)
input_ids, attention_mask, bbox, token_type_ids, _ = prepare_layoutlm_batch_inputs()
# forward pass
outputs = model(
input_ids=input_ids,
bbox=bbox,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
labels=torch.tensor([1, 1], device=torch_device),
)
# test whether we get a loss as a scalar
loss = outputs.loss
expected_shape = torch.Size([])
self.assertEqual(loss.shape, expected_shape)
# test the shape of the logits
logits = outputs.logits
expected_shape = torch.Size((2, 2))
self.assertEqual(logits.shape, expected_shape)
@slow
def test_forward_pass_token_classification(self):
# initialize model with randomly initialized token classification head
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=13).to(
torch_device
)
input_ids, attention_mask, bbox, token_type_ids, labels = prepare_layoutlm_batch_inputs()
# forward pass
outputs = model(
input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels
)
# test the loss calculation to be around 2.65
expected_loss = torch.tensor(2.65, device=torch_device)
self.assertTrue(torch.allclose(outputs.loss, expected_loss, atol=0.1))
# test the shape of the logits
logits = outputs.logits
expected_shape = torch.Size((2, 25, 13))
self.assertEqual(logits.shape, expected_shape)