Deprecate low use models (#30781)

* Deprecate models
- graphormer
- time_series_transformer
- xlm_prophetnet
- qdqbert
- nat
- ernie_m
- tvlt
- nezha
- mega
- jukebox
- vit_hybrid
- x_clip
- deta
- speech_to_text_2
- efficientformer
- realm
- gptsan_japanese

* Fix up

* Fix speech2text2 imports

* Make sure message isn't indented

* Fix docstrings

* Correctly map for deprecated models from model_type

* Uncomment out

* Add back time series transformer and x-clip

* Import fix and fix-up

* Fix up with updated ruff
This commit is contained in:
amyeroberts
2024-05-28 18:07:07 +01:00
committed by GitHub
parent 7f08817be4
commit a564d10afe
142 changed files with 1308 additions and 11908 deletions

View File

@@ -1,535 +0,0 @@
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import pathlib
import unittest
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import DetaImageProcessor
class DetaImageProcessingTester(unittest.TestCase):
def __init__(
self,
parent,
batch_size=7,
num_channels=3,
min_resolution=30,
max_resolution=400,
do_resize=True,
size=None,
do_normalize=True,
image_mean=[0.5, 0.5, 0.5],
image_std=[0.5, 0.5, 0.5],
do_rescale=True,
rescale_factor=1 / 255,
do_pad=True,
):
# by setting size["longest_edge"] > max_resolution we're effectively not testing this :p
size = size if size is not None else {"shortest_edge": 18, "longest_edge": 1333}
self.parent = parent
self.batch_size = batch_size
self.num_channels = num_channels
self.min_resolution = min_resolution
self.max_resolution = max_resolution
self.do_resize = do_resize
self.size = size
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
self.do_pad = do_pad
def prepare_image_processor_dict(self):
return {
"do_resize": self.do_resize,
"size": self.size,
"do_normalize": self.do_normalize,
"image_mean": self.image_mean,
"image_std": self.image_std,
"do_rescale": self.do_rescale,
"rescale_factor": self.rescale_factor,
"do_pad": self.do_pad,
}
def get_expected_values(self, image_inputs, batched=False):
"""
This function computes the expected height and width when providing images to DetaImageProcessor,
assuming do_resize is set to True with a scalar size.
"""
if not batched:
image = image_inputs[0]
if isinstance(image, Image.Image):
w, h = image.size
else:
h, w = image.shape[1], image.shape[2]
if w < h:
expected_height = int(self.size["shortest_edge"] * h / w)
expected_width = self.size["shortest_edge"]
elif w > h:
expected_height = self.size["shortest_edge"]
expected_width = int(self.size["shortest_edge"] * w / h)
else:
expected_height = self.size["shortest_edge"]
expected_width = self.size["shortest_edge"]
else:
expected_values = []
for image in image_inputs:
expected_height, expected_width = self.get_expected_values([image])
expected_values.append((expected_height, expected_width))
expected_height = max(expected_values, key=lambda item: item[0])[0]
expected_width = max(expected_values, key=lambda item: item[1])[1]
return expected_height, expected_width
def expected_output_image_shape(self, images):
height, width = self.get_expected_values(images, batched=True)
return self.num_channels, height, width
def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False):
return prepare_image_inputs(
batch_size=self.batch_size,
num_channels=self.num_channels,
min_resolution=self.min_resolution,
max_resolution=self.max_resolution,
equal_resolution=equal_resolution,
numpify=numpify,
torchify=torchify,
)
@require_torch
@require_vision
class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DetaImageProcessor if is_vision_available() else None
def setUp(self):
self.image_processor_tester = DetaImageProcessingTester(self)
@property
def image_processor_dict(self):
return self.image_processor_tester.prepare_image_processor_dict()
def test_image_processor_properties(self):
image_processing = self.image_processing_class(**self.image_processor_dict)
self.assertTrue(hasattr(image_processing, "image_mean"))
self.assertTrue(hasattr(image_processing, "image_std"))
self.assertTrue(hasattr(image_processing, "do_normalize"))
self.assertTrue(hasattr(image_processing, "do_resize"))
self.assertTrue(hasattr(image_processing, "do_rescale"))
self.assertTrue(hasattr(image_processing, "do_pad"))
self.assertTrue(hasattr(image_processing, "size"))
def test_image_processor_from_dict_with_kwargs(self):
image_processor = self.image_processing_class.from_dict(self.image_processor_dict)
self.assertEqual(image_processor.size, {"shortest_edge": 18, "longest_edge": 1333})
self.assertEqual(image_processor.do_pad, True)
@slow
def test_call_pytorch_with_coco_detection_annotations(self):
# prepare image and target
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
target = json.loads(f.read())
target = {"image_id": 39769, "annotations": target}
# encode them
image_processing = DetaImageProcessor()
encoding = image_processing(images=image, annotations=target, return_tensors="pt")
# verify pixel values
expected_shape = torch.Size([1, 3, 800, 1066])
self.assertEqual(encoding["pixel_values"].shape, expected_shape)
expected_slice = torch.tensor([0.2796, 0.3138, 0.3481])
self.assertTrue(torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4))
# verify area
expected_area = torch.tensor([5887.9600, 11250.2061, 489353.8438, 837122.7500, 147967.5156, 165732.3438])
self.assertTrue(torch.allclose(encoding["labels"][0]["area"], expected_area))
# verify boxes
expected_boxes_shape = torch.Size([6, 4])
self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape)
expected_boxes_slice = torch.tensor([0.5503, 0.2765, 0.0604, 0.2215])
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3))
# verify image_id
expected_image_id = torch.tensor([39769])
self.assertTrue(torch.allclose(encoding["labels"][0]["image_id"], expected_image_id))
# verify is_crowd
expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0])
self.assertTrue(torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd))
# verify class_labels
expected_class_labels = torch.tensor([75, 75, 63, 65, 17, 17])
self.assertTrue(torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels))
# verify orig_size
expected_orig_size = torch.tensor([480, 640])
self.assertTrue(torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size))
# verify size
expected_size = torch.tensor([800, 1066])
self.assertTrue(torch.allclose(encoding["labels"][0]["size"], expected_size))
@slow
def test_call_pytorch_with_coco_panoptic_annotations(self):
# prepare image, target and masks_path
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
with open("./tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt", "r") as f:
target = json.loads(f.read())
target = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target}
masks_path = pathlib.Path("./tests/fixtures/tests_samples/COCO/coco_panoptic")
# encode them
image_processing = DetaImageProcessor(format="coco_panoptic")
encoding = image_processing(images=image, annotations=target, masks_path=masks_path, return_tensors="pt")
# verify pixel values
expected_shape = torch.Size([1, 3, 800, 1066])
self.assertEqual(encoding["pixel_values"].shape, expected_shape)
expected_slice = torch.tensor([0.2796, 0.3138, 0.3481])
self.assertTrue(torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4))
# verify area
expected_area = torch.tensor([147979.6875, 165527.0469, 484638.5938, 11292.9375, 5879.6562, 7634.1147])
self.assertTrue(torch.allclose(encoding["labels"][0]["area"], expected_area))
# verify boxes
expected_boxes_shape = torch.Size([6, 4])
self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape)
expected_boxes_slice = torch.tensor([0.2625, 0.5437, 0.4688, 0.8625])
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3))
# verify image_id
expected_image_id = torch.tensor([39769])
self.assertTrue(torch.allclose(encoding["labels"][0]["image_id"], expected_image_id))
# verify is_crowd
expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0])
self.assertTrue(torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd))
# verify class_labels
expected_class_labels = torch.tensor([17, 17, 63, 75, 75, 93])
self.assertTrue(torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels))
# verify masks
expected_masks_sum = 822873
self.assertEqual(encoding["labels"][0]["masks"].sum().item(), expected_masks_sum)
# verify orig_size
expected_orig_size = torch.tensor([480, 640])
self.assertTrue(torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size))
# verify size
expected_size = torch.tensor([800, 1066])
self.assertTrue(torch.allclose(encoding["labels"][0]["size"], expected_size))
@slow
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_batched_coco_detection_annotations with Detr->Deta
def test_batched_coco_detection_annotations(self):
image_0 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
image_1 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png").resize((800, 800))
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
target = json.loads(f.read())
annotations_0 = {"image_id": 39769, "annotations": target}
annotations_1 = {"image_id": 39769, "annotations": target}
# Adjust the bounding boxes for the resized image
w_0, h_0 = image_0.size
w_1, h_1 = image_1.size
for i in range(len(annotations_1["annotations"])):
coords = annotations_1["annotations"][i]["bbox"]
new_bbox = [
coords[0] * w_1 / w_0,
coords[1] * h_1 / h_0,
coords[2] * w_1 / w_0,
coords[3] * h_1 / h_0,
]
annotations_1["annotations"][i]["bbox"] = new_bbox
images = [image_0, image_1]
annotations = [annotations_0, annotations_1]
image_processing = DetaImageProcessor()
encoding = image_processing(
images=images,
annotations=annotations,
return_segmentation_masks=True,
return_tensors="pt", # do_convert_annotations=True
)
# Check the pixel values have been padded
postprocessed_height, postprocessed_width = 800, 1066
expected_shape = torch.Size([2, 3, postprocessed_height, postprocessed_width])
self.assertEqual(encoding["pixel_values"].shape, expected_shape)
# Check the bounding boxes have been adjusted for padded images
self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4]))
self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4]))
expected_boxes_0 = torch.tensor(
[
[0.6879, 0.4609, 0.0755, 0.3691],
[0.2118, 0.3359, 0.2601, 0.1566],
[0.5011, 0.5000, 0.9979, 1.0000],
[0.5010, 0.5020, 0.9979, 0.9959],
[0.3284, 0.5944, 0.5884, 0.8112],
[0.8394, 0.5445, 0.3213, 0.9110],
]
)
expected_boxes_1 = torch.tensor(
[
[0.4130, 0.2765, 0.0453, 0.2215],
[0.1272, 0.2016, 0.1561, 0.0940],
[0.3757, 0.4933, 0.7488, 0.9865],
[0.3759, 0.5002, 0.7492, 0.9955],
[0.1971, 0.5456, 0.3532, 0.8646],
[0.5790, 0.4115, 0.3430, 0.7161],
]
)
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1e-3))
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1e-3))
# Check the masks have also been padded
self.assertEqual(encoding["labels"][0]["masks"].shape, torch.Size([6, 800, 1066]))
self.assertEqual(encoding["labels"][1]["masks"].shape, torch.Size([6, 800, 1066]))
# Check if do_convert_annotations=False, then the annotations are not converted to centre_x, centre_y, width, height
# format and not in the range [0, 1]
encoding = image_processing(
images=images,
annotations=annotations,
return_segmentation_masks=True,
do_convert_annotations=False,
return_tensors="pt",
)
self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4]))
self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4]))
# Convert to absolute coordinates
unnormalized_boxes_0 = torch.vstack(
[
expected_boxes_0[:, 0] * postprocessed_width,
expected_boxes_0[:, 1] * postprocessed_height,
expected_boxes_0[:, 2] * postprocessed_width,
expected_boxes_0[:, 3] * postprocessed_height,
]
).T
unnormalized_boxes_1 = torch.vstack(
[
expected_boxes_1[:, 0] * postprocessed_width,
expected_boxes_1[:, 1] * postprocessed_height,
expected_boxes_1[:, 2] * postprocessed_width,
expected_boxes_1[:, 3] * postprocessed_height,
]
).T
# Convert from centre_x, centre_y, width, height to x_min, y_min, x_max, y_max
expected_boxes_0 = torch.vstack(
[
unnormalized_boxes_0[:, 0] - unnormalized_boxes_0[:, 2] / 2,
unnormalized_boxes_0[:, 1] - unnormalized_boxes_0[:, 3] / 2,
unnormalized_boxes_0[:, 0] + unnormalized_boxes_0[:, 2] / 2,
unnormalized_boxes_0[:, 1] + unnormalized_boxes_0[:, 3] / 2,
]
).T
expected_boxes_1 = torch.vstack(
[
unnormalized_boxes_1[:, 0] - unnormalized_boxes_1[:, 2] / 2,
unnormalized_boxes_1[:, 1] - unnormalized_boxes_1[:, 3] / 2,
unnormalized_boxes_1[:, 0] + unnormalized_boxes_1[:, 2] / 2,
unnormalized_boxes_1[:, 1] + unnormalized_boxes_1[:, 3] / 2,
]
).T
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_batched_coco_panoptic_annotations with Detr->Deta
def test_batched_coco_panoptic_annotations(self):
# prepare image, target and masks_path
image_0 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
image_1 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png").resize((800, 800))
with open("./tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt", "r") as f:
target = json.loads(f.read())
annotation_0 = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target}
annotation_1 = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target}
w_0, h_0 = image_0.size
w_1, h_1 = image_1.size
for i in range(len(annotation_1["segments_info"])):
coords = annotation_1["segments_info"][i]["bbox"]
new_bbox = [
coords[0] * w_1 / w_0,
coords[1] * h_1 / h_0,
coords[2] * w_1 / w_0,
coords[3] * h_1 / h_0,
]
annotation_1["segments_info"][i]["bbox"] = new_bbox
masks_path = pathlib.Path("./tests/fixtures/tests_samples/COCO/coco_panoptic")
images = [image_0, image_1]
annotations = [annotation_0, annotation_1]
# encode them
image_processing = DetaImageProcessor(format="coco_panoptic")
encoding = image_processing(
images=images,
annotations=annotations,
masks_path=masks_path,
return_tensors="pt",
return_segmentation_masks=True,
)
# Check the pixel values have been padded
postprocessed_height, postprocessed_width = 800, 1066
expected_shape = torch.Size([2, 3, postprocessed_height, postprocessed_width])
self.assertEqual(encoding["pixel_values"].shape, expected_shape)
# Check the bounding boxes have been adjusted for padded images
self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4]))
self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4]))
expected_boxes_0 = torch.tensor(
[
[0.2625, 0.5437, 0.4688, 0.8625],
[0.7719, 0.4104, 0.4531, 0.7125],
[0.5000, 0.4927, 0.9969, 0.9854],
[0.1688, 0.2000, 0.2063, 0.0917],
[0.5492, 0.2760, 0.0578, 0.2187],
[0.4992, 0.4990, 0.9984, 0.9979],
]
)
expected_boxes_1 = torch.tensor(
[
[0.1576, 0.3262, 0.2814, 0.5175],
[0.4634, 0.2463, 0.2720, 0.4275],
[0.3002, 0.2956, 0.5985, 0.5913],
[0.1013, 0.1200, 0.1238, 0.0550],
[0.3297, 0.1656, 0.0347, 0.1312],
[0.2997, 0.2994, 0.5994, 0.5987],
]
)
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1e-3))
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1e-3))
# Check the masks have also been padded
self.assertEqual(encoding["labels"][0]["masks"].shape, torch.Size([6, 800, 1066]))
self.assertEqual(encoding["labels"][1]["masks"].shape, torch.Size([6, 800, 1066]))
# Check if do_convert_annotations=False, then the annotations are not converted to centre_x, centre_y, width, height
# format and not in the range [0, 1]
encoding = image_processing(
images=images,
annotations=annotations,
masks_path=masks_path,
return_segmentation_masks=True,
do_convert_annotations=False,
return_tensors="pt",
)
self.assertEqual(encoding["labels"][0]["boxes"].shape, torch.Size([6, 4]))
self.assertEqual(encoding["labels"][1]["boxes"].shape, torch.Size([6, 4]))
# Convert to absolute coordinates
unnormalized_boxes_0 = torch.vstack(
[
expected_boxes_0[:, 0] * postprocessed_width,
expected_boxes_0[:, 1] * postprocessed_height,
expected_boxes_0[:, 2] * postprocessed_width,
expected_boxes_0[:, 3] * postprocessed_height,
]
).T
unnormalized_boxes_1 = torch.vstack(
[
expected_boxes_1[:, 0] * postprocessed_width,
expected_boxes_1[:, 1] * postprocessed_height,
expected_boxes_1[:, 2] * postprocessed_width,
expected_boxes_1[:, 3] * postprocessed_height,
]
).T
# Convert from centre_x, centre_y, width, height to x_min, y_min, x_max, y_max
expected_boxes_0 = torch.vstack(
[
unnormalized_boxes_0[:, 0] - unnormalized_boxes_0[:, 2] / 2,
unnormalized_boxes_0[:, 1] - unnormalized_boxes_0[:, 3] / 2,
unnormalized_boxes_0[:, 0] + unnormalized_boxes_0[:, 2] / 2,
unnormalized_boxes_0[:, 1] + unnormalized_boxes_0[:, 3] / 2,
]
).T
expected_boxes_1 = torch.vstack(
[
unnormalized_boxes_1[:, 0] - unnormalized_boxes_1[:, 2] / 2,
unnormalized_boxes_1[:, 1] - unnormalized_boxes_1[:, 3] / 2,
unnormalized_boxes_1[:, 0] + unnormalized_boxes_1[:, 2] / 2,
unnormalized_boxes_1[:, 1] + unnormalized_boxes_1[:, 3] / 2,
]
).T
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->Deta
def test_max_width_max_height_resizing_and_pad_strategy(self):
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
image_processor = DetaImageProcessor(
size={"max_height": 100, "max_width": 100},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
image_processor = DetaImageProcessor(
size={"max_height": 300, "max_width": 100},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
image_processor = DetaImageProcessor(
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
image_processor = DetaImageProcessor(
size={"max_height": 300, "max_width": 100},
do_pad=True,
pad_size={"height": 301, "width": 101},
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
### Check for batch
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
image_processor = DetaImageProcessor(
size={"max_height": 150, "max_width": 100},
do_pad=True,
pad_size={"height": 150, "width": 100},
)
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))

View File

@@ -1,671 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch DETA model."""
import collections
import inspect
import math
import re
import unittest
from transformers import DetaConfig, ResNetConfig, is_torch_available, is_torchvision_available, is_vision_available
from transformers.file_utils import cached_property
from transformers.testing_utils import require_torchvision, require_vision, slow, torch_device
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers.pytorch_utils import id_tensor_storage
if is_torchvision_available():
from transformers import DetaForObjectDetection, DetaModel
if is_vision_available():
from PIL import Image
from transformers import AutoImageProcessor
class DetaModelTester:
def __init__(
self,
parent,
batch_size=8,
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=8,
intermediate_size=4,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
num_queries=12,
two_stage_num_proposals=12,
num_channels=3,
image_size=224,
n_targets=8,
num_labels=91,
num_feature_levels=4,
encoder_n_points=2,
decoder_n_points=6,
two_stage=True,
assign_first_stage=True,
assign_second_stage=True,
):
self.parent = parent
self.batch_size = batch_size
self.is_training = is_training
self.use_labels = use_labels
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.num_queries = num_queries
self.two_stage_num_proposals = two_stage_num_proposals
self.num_channels = num_channels
self.image_size = image_size
self.n_targets = n_targets
self.num_labels = num_labels
self.num_feature_levels = num_feature_levels
self.encoder_n_points = encoder_n_points
self.decoder_n_points = decoder_n_points
self.two_stage = two_stage
self.assign_first_stage = assign_first_stage
self.assign_second_stage = assign_second_stage
# we also set the expected seq length for both encoder and decoder
self.encoder_seq_length = (
math.ceil(self.image_size / 8) ** 2
+ math.ceil(self.image_size / 16) ** 2
+ math.ceil(self.image_size / 32) ** 2
+ math.ceil(self.image_size / 64) ** 2
)
self.decoder_seq_length = self.num_queries
def prepare_config_and_inputs(self, model_class_name):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
pixel_mask = torch.ones([self.batch_size, self.image_size, self.image_size], device=torch_device)
labels = None
if self.use_labels:
# labels is a list of Dict (each Dict being the labels for a given example in the batch)
labels = []
for i in range(self.batch_size):
target = {}
target["class_labels"] = torch.randint(
high=self.num_labels, size=(self.n_targets,), device=torch_device
)
target["boxes"] = torch.rand(self.n_targets, 4, device=torch_device)
target["masks"] = torch.rand(self.n_targets, self.image_size, self.image_size, device=torch_device)
labels.append(target)
config = self.get_config(model_class_name)
return config, pixel_values, pixel_mask, labels
def get_config(self, model_class_name):
resnet_config = ResNetConfig(
num_channels=3,
embeddings_size=10,
hidden_sizes=[10, 20, 30, 40],
depths=[1, 1, 2, 1],
hidden_act="relu",
num_labels=3,
out_features=["stage2", "stage3", "stage4"],
out_indices=[2, 3, 4],
)
two_stage = model_class_name == "DetaForObjectDetection"
assign_first_stage = model_class_name == "DetaForObjectDetection"
assign_second_stage = model_class_name == "DetaForObjectDetection"
return DetaConfig(
d_model=self.hidden_size,
encoder_layers=self.num_hidden_layers,
decoder_layers=self.num_hidden_layers,
encoder_attention_heads=self.num_attention_heads,
decoder_attention_heads=self.num_attention_heads,
encoder_ffn_dim=self.intermediate_size,
decoder_ffn_dim=self.intermediate_size,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
num_queries=self.num_queries,
two_stage_num_proposals=self.two_stage_num_proposals,
num_labels=self.num_labels,
num_feature_levels=self.num_feature_levels,
encoder_n_points=self.encoder_n_points,
decoder_n_points=self.decoder_n_points,
two_stage=two_stage,
assign_first_stage=assign_first_stage,
assign_second_stage=assign_second_stage,
backbone_config=resnet_config,
backbone=None,
)
def prepare_config_and_inputs_for_common(self, model_class_name="DetaModel"):
config, pixel_values, pixel_mask, labels = self.prepare_config_and_inputs(model_class_name)
inputs_dict = {"pixel_values": pixel_values, "pixel_mask": pixel_mask}
return config, inputs_dict
def create_and_check_deta_model(self, config, pixel_values, pixel_mask, labels):
model = DetaModel(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
result = model(pixel_values)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.num_queries, self.hidden_size))
def create_and_check_deta_freeze_backbone(self, config, pixel_values, pixel_mask, labels):
model = DetaModel(config=config)
model.to(torch_device)
model.eval()
model.freeze_backbone()
for _, param in model.backbone.model.named_parameters():
self.parent.assertEqual(False, param.requires_grad)
def create_and_check_deta_unfreeze_backbone(self, config, pixel_values, pixel_mask, labels):
model = DetaModel(config=config)
model.to(torch_device)
model.eval()
model.unfreeze_backbone()
for _, param in model.backbone.model.named_parameters():
self.parent.assertEqual(True, param.requires_grad)
def create_and_check_deta_object_detection_head_model(self, config, pixel_values, pixel_mask, labels):
model = DetaForObjectDetection(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
result = model(pixel_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.two_stage_num_proposals, self.num_labels))
self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.two_stage_num_proposals, 4))
result = model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)
self.parent.assertEqual(result.loss.shape, ())
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.two_stage_num_proposals, self.num_labels))
self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.two_stage_num_proposals, 4))
@require_torchvision
class DetaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (DetaModel, DetaForObjectDetection) if is_torchvision_available() else ()
pipeline_model_mapping = (
{"image-feature-extraction": DetaModel, "object-detection": DetaForObjectDetection}
if is_torchvision_available()
else {}
)
is_encoder_decoder = True
test_torchscript = False
test_pruning = False
test_head_masking = False
test_missing_keys = False
# TODO: Fix the failed tests when this model gets more usage
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "ObjectDetectionPipelineTests":
return True
return False
@unittest.skip("Skip for now. PR #22437 causes some loading issue. See (not merged) #22656 for some discussions.")
def test_can_use_safetensors(self):
super().test_can_use_safetensors()
# special case for head models
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels:
if model_class.__name__ == "DetaForObjectDetection":
labels = []
for i in range(self.model_tester.batch_size):
target = {}
target["class_labels"] = torch.ones(
size=(self.model_tester.n_targets,), device=torch_device, dtype=torch.long
)
target["boxes"] = torch.ones(
self.model_tester.n_targets, 4, device=torch_device, dtype=torch.float
)
target["masks"] = torch.ones(
self.model_tester.n_targets,
self.model_tester.image_size,
self.model_tester.image_size,
device=torch_device,
dtype=torch.float,
)
labels.append(target)
inputs_dict["labels"] = labels
return inputs_dict
def setUp(self):
self.model_tester = DetaModelTester(self)
self.config_tester = ConfigTester(self, config_class=DetaConfig, has_text_modality=False)
def test_config(self):
# we don't test common_properties and arguments_init as these don't apply for DETA
self.config_tester.create_and_test_config_to_json_string()
self.config_tester.create_and_test_config_to_json_file()
self.config_tester.create_and_test_config_from_and_save_pretrained()
self.config_tester.create_and_test_config_with_num_labels()
self.config_tester.check_config_can_be_init_without_params()
def test_deta_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaModel")
self.model_tester.create_and_check_deta_model(*config_and_inputs)
def test_deta_freeze_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaModel")
self.model_tester.create_and_check_deta_freeze_backbone(*config_and_inputs)
def test_deta_unfreeze_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaModel")
self.model_tester.create_and_check_deta_unfreeze_backbone(*config_and_inputs)
def test_deta_object_detection_head_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(model_class_name="DetaForObjectDetection")
self.model_tester.create_and_check_deta_object_detection_head_model(*config_and_inputs)
@unittest.skip(reason="DETA does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip(reason="DETA does not use inputs_embeds")
def test_inputs_embeds_matches_input_ids(self):
pass
@unittest.skip(reason="DETA does not have a get_input_embeddings method")
def test_model_common_attributes(self):
pass
@unittest.skip(reason="DETA is not a generative model")
def test_generate_without_input_ids(self):
pass
@unittest.skip(reason="DETA does not use token embeddings")
def test_resize_tokens_embeddings(self):
pass
@unittest.skip(reason="Feed forward chunking is not implemented")
def test_feed_forward_chunking(self):
pass
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[
self.model_tester.num_attention_heads,
self.model_tester.num_feature_levels,
self.model_tester.encoder_n_points,
],
)
out_len = len(outputs)
correct_outlen = 8
# loss is at first position
if "labels" in inputs_dict:
correct_outlen += 1 # loss is added to beginning
# Object Detection model returns pred_logits and pred_boxes
if model_class.__name__ == "DetaForObjectDetection":
correct_outlen += 2
self.assertEqual(out_len, correct_outlen)
# decoder attentions
decoder_attentions = outputs.decoder_attentions
self.assertIsInstance(decoder_attentions, (list, tuple))
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, self.model_tester.num_queries, self.model_tester.num_queries],
)
# cross attentions
cross_attentions = outputs.cross_attentions
self.assertIsInstance(cross_attentions, (list, tuple))
self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(cross_attentions[0].shape[-3:]),
[
self.model_tester.num_attention_heads,
self.model_tester.num_feature_levels,
self.model_tester.decoder_n_points,
],
)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
if hasattr(self.model_tester, "num_hidden_states_types"):
added_hidden_states = self.model_tester.num_hidden_states_types
elif self.is_encoder_decoder:
added_hidden_states = 2
else:
added_hidden_states = 1
self.assertEqual(out_len + added_hidden_states, len(outputs))
self_attentions = outputs.encoder_attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[
self.model_tester.num_attention_heads,
self.model_tester.num_feature_levels,
self.model_tester.encoder_n_points,
],
)
# removed retain_grad and grad on decoder_hidden_states, as queries don't require grad
def test_retain_grad_hidden_states_attentions(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = True
config.output_attentions = True
# no need to test all models as different heads yield the same functionality
model_class = self.all_model_classes[0]
model = model_class(config)
model.to(torch_device)
inputs = self._prepare_for_class(inputs_dict, model_class)
outputs = model(**inputs)
# we take the second output since last_hidden_state is the second item
output = outputs[1]
encoder_hidden_states = outputs.encoder_hidden_states[0]
encoder_attentions = outputs.encoder_attentions[0]
encoder_hidden_states.retain_grad()
encoder_attentions.retain_grad()
decoder_attentions = outputs.decoder_attentions[0]
decoder_attentions.retain_grad()
cross_attentions = outputs.cross_attentions[0]
cross_attentions.retain_grad()
output.flatten()[0].backward(retain_graph=True)
self.assertIsNotNone(encoder_hidden_states.grad)
self.assertIsNotNone(encoder_attentions.grad)
self.assertIsNotNone(decoder_attentions.grad)
self.assertIsNotNone(cross_attentions.grad)
def test_forward_auxiliary_loss(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.auxiliary_loss = True
# only test for object detection and segmentation model
for model_class in self.all_model_classes[1:]:
model = model_class(config)
model.to(torch_device)
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
outputs = model(**inputs)
self.assertIsNotNone(outputs.auxiliary_outputs)
self.assertEqual(len(outputs.auxiliary_outputs), self.model_tester.num_hidden_layers - 1)
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
signature = inspect.signature(model.forward)
# signature.parameters is an OrderedDict => so arg_names order is deterministic
arg_names = [*signature.parameters.keys()]
if model.config.is_encoder_decoder:
expected_arg_names = ["pixel_values", "pixel_mask"]
expected_arg_names.extend(
["head_mask", "decoder_head_mask", "encoder_outputs"]
if "head_mask" and "decoder_head_mask" in arg_names
else []
)
self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
else:
expected_arg_names = ["pixel_values", "pixel_mask"]
self.assertListEqual(arg_names[:1], expected_arg_names)
@unittest.skip(reason="Model doesn't use tied weights")
def test_tied_model_weights_key_ignore(self):
pass
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
# Skip the check for the backbone
for name, module in model.named_modules():
if module.__class__.__name__ == "DetaBackboneWithPositionalEncodings":
backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()]
break
for name, param in model.named_parameters():
if param.requires_grad:
if (
"level_embed" in name
or "sampling_offsets.bias" in name
or "value_proj" in name
or "output_proj" in name
or "reference_points" in name
or name in backbone_params
):
continue
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
@unittest.skip("No support for low_cpu_mem_usage=True.")
def test_save_load_low_cpu_mem_usage(self):
pass
@unittest.skip("No support for low_cpu_mem_usage=True.")
def test_save_load_low_cpu_mem_usage_checkpoints(self):
pass
@unittest.skip("No support for low_cpu_mem_usage=True.")
def test_save_load_low_cpu_mem_usage_no_safetensors(self):
pass
# Inspired by tests.test_modeling_common.ModelTesterMixin.test_tied_weights_keys
def test_tied_weights_keys(self):
for model_class in self.all_model_classes:
# We need to pass model class name to correctly initialize the config.
# If we don't pass it, the config for `DetaForObjectDetection`` will be initialized
# with `two_stage=False` and the test will fail because for that case `class_embed`
# weights are not tied.
config, _ = self.model_tester.prepare_config_and_inputs_for_common(model_class_name=model_class.__name__)
config.tie_word_embeddings = True
model_tied = model_class(config)
ptrs = collections.defaultdict(list)
for name, tensor in model_tied.state_dict().items():
ptrs[id_tensor_storage(tensor)].append(name)
# These are all the pointers of shared tensors.
tied_params = [names for _, names in ptrs.items() if len(names) > 1]
tied_weight_keys = model_tied._tied_weights_keys if model_tied._tied_weights_keys is not None else []
# Detect we get a hit for each key
for key in tied_weight_keys:
is_tied_key = any(re.search(key, p) for group in tied_params for p in group)
self.assertTrue(is_tied_key, f"{key} is not a tied weight key for {model_class}.")
# Removed tied weights found from tied params -> there should only be one left after
for key in tied_weight_keys:
for i in range(len(tied_params)):
tied_params[i] = [p for p in tied_params[i] if re.search(key, p) is None]
tied_params = [group for group in tied_params if len(group) > 1]
self.assertListEqual(
tied_params,
[],
f"Missing `_tied_weights_keys` for {model_class}: add all of {tied_params} except one.",
)
TOLERANCE = 1e-4
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@require_torchvision
@require_vision
@slow
class DetaModelIntegrationTests(unittest.TestCase):
@cached_property
def default_image_processor(self):
return AutoImageProcessor.from_pretrained("jozhang97/deta-resnet-50") if is_vision_available() else None
def test_inference_object_detection_head(self):
model = DetaForObjectDetection.from_pretrained("jozhang97/deta-resnet-50").to(torch_device)
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
with torch.no_grad():
outputs = model(**inputs)
expected_shape_logits = torch.Size((1, 300, model.config.num_labels))
self.assertEqual(outputs.logits.shape, expected_shape_logits)
expected_logits = torch.tensor(
[[-7.3978, -2.5406, -4.1668], [-8.2684, -3.9933, -3.8096], [-7.0515, -3.7973, -5.8516]]
).to(torch_device)
expected_boxes = torch.tensor(
[[0.5043, 0.4973, 0.9998], [0.2542, 0.5489, 0.4748], [0.5490, 0.2765, 0.0570]]
).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0, :3, :3], expected_logits, atol=1e-4))
expected_shape_boxes = torch.Size((1, 300, 4))
self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4))
# verify postprocessing
results = image_processor.post_process_object_detection(
outputs, threshold=0.3, target_sizes=[image.size[::-1]]
)[0]
expected_scores = torch.tensor([0.6392, 0.6276, 0.5546, 0.5260, 0.4706], device=torch_device)
expected_labels = [75, 17, 17, 75, 63]
expected_slice_boxes = torch.tensor([40.5866, 73.2107, 176.1421, 117.1751], device=torch_device)
self.assertTrue(torch.allclose(results["scores"], expected_scores, atol=1e-4))
self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes))
def test_inference_object_detection_head_swin_backbone(self):
model = DetaForObjectDetection.from_pretrained("jozhang97/deta-swin-large").to(torch_device)
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
with torch.no_grad():
outputs = model(**inputs)
expected_shape_logits = torch.Size((1, 300, model.config.num_labels))
self.assertEqual(outputs.logits.shape, expected_shape_logits)
expected_logits = torch.tensor(
[[-7.6308, -2.8485, -5.3737], [-7.2037, -4.5505, -4.8027], [-7.2943, -4.2611, -4.6617]]
).to(torch_device)
expected_boxes = torch.tensor(
[[0.4987, 0.4969, 0.9999], [0.2549, 0.5498, 0.4805], [0.5498, 0.2757, 0.0569]]
).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0, :3, :3], expected_logits, atol=1e-4))
expected_shape_boxes = torch.Size((1, 300, 4))
self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4))
# verify postprocessing
results = image_processor.post_process_object_detection(
outputs, threshold=0.3, target_sizes=[image.size[::-1]]
)[0]
expected_scores = torch.tensor([0.6831, 0.6826, 0.5684, 0.5464, 0.4392], device=torch_device)
expected_labels = [17, 17, 75, 75, 63]
expected_slice_boxes = torch.tensor([345.8478, 23.6754, 639.8562, 372.8265], device=torch_device)
self.assertTrue(torch.allclose(results["scores"], expected_scores, atol=1e-4))
self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes))

View File

@@ -1,99 +0,0 @@
# coding=utf-8
# Copyright 2021 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers.testing_utils import require_torch, require_vision
from transformers.utils import is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
if is_vision_available():
from transformers import ViTImageProcessor
class EfficientFormerImageProcessorTester(unittest.TestCase):
def __init__(
self,
parent,
batch_size=13,
num_channels=3,
image_size=224,
min_resolution=30,
max_resolution=400,
do_resize=True,
size=None,
do_normalize=True,
image_mean=[0.5, 0.5, 0.5],
image_std=[0.5, 0.5, 0.5],
):
size = size if size is not None else {"height": 18, "width": 18}
self.parent = parent
self.batch_size = batch_size
self.num_channels = num_channels
self.image_size = image_size
self.min_resolution = min_resolution
self.max_resolution = max_resolution
self.do_resize = do_resize
self.size = size
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
def prepare_image_processor_dict(self):
return {
"image_mean": self.image_mean,
"image_std": self.image_std,
"do_normalize": self.do_normalize,
"do_resize": self.do_resize,
"size": self.size,
}
def expected_output_image_shape(self, images):
return self.num_channels, self.size["height"], self.size["width"]
def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False):
return prepare_image_inputs(
batch_size=self.batch_size,
num_channels=self.num_channels,
min_resolution=self.min_resolution,
max_resolution=self.max_resolution,
equal_resolution=equal_resolution,
numpify=numpify,
torchify=torchify,
)
@require_torch
@require_vision
class EfficientFormerImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = ViTImageProcessor if is_vision_available() else None
def setUp(self):
self.image_processor_tester = EfficientFormerImageProcessorTester(self)
@property
def image_processor_dict(self):
return self.image_processor_tester.prepare_image_processor_dict()
def test_image_proc_properties(self):
image_processor = self.image_processing_class(**self.image_processor_dict)
self.assertTrue(hasattr(image_processor, "image_mean"))
self.assertTrue(hasattr(image_processor, "image_std"))
self.assertTrue(hasattr(image_processor, "do_normalize"))
self.assertTrue(hasattr(image_processor, "do_resize"))
self.assertTrue(hasattr(image_processor, "size"))

View File

@@ -1,478 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch EfficientFormer model."""
import unittest
import warnings
from typing import List
from transformers import EfficientFormerConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers import (
EfficientFormerForImageClassification,
EfficientFormerForImageClassificationWithTeacher,
EfficientFormerModel,
)
from transformers.models.auto.modeling_auto import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
MODEL_MAPPING_NAMES,
)
if is_vision_available():
from PIL import Image
from transformers import EfficientFormerImageProcessor
class EfficientFormerModelTester:
def __init__(
self,
parent,
batch_size: int = 13,
image_size: int = 64,
patch_size: int = 2,
embed_dim: int = 3,
num_channels: int = 3,
is_training: bool = True,
use_labels: bool = True,
hidden_size: int = 128,
hidden_sizes=[16, 32, 64, 128],
num_hidden_layers: int = 7,
num_attention_heads: int = 4,
intermediate_size: int = 37,
hidden_act: str = "gelu",
hidden_dropout_prob: float = 0.1,
attention_probs_dropout_prob: float = 0.1,
type_sequence_label_size: int = 10,
initializer_range: float = 0.02,
encoder_stride: int = 2,
num_attention_outputs: int = 1,
dim: int = 128,
depths: List[int] = [2, 2, 2, 2],
resolution: int = 2,
mlp_expansion_ratio: int = 2,
):
self.parent = parent
self.batch_size = batch_size
self.image_size = image_size
self.patch_size = patch_size
self.num_channels = num_channels
self.is_training = is_training
self.use_labels = use_labels
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.encoder_stride = encoder_stride
self.num_attention_outputs = num_attention_outputs
self.embed_dim = embed_dim
self.seq_length = embed_dim + 1
self.resolution = resolution
self.depths = depths
self.hidden_sizes = hidden_sizes
self.dim = dim
self.mlp_expansion_ratio = mlp_expansion_ratio
def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
labels = None
if self.use_labels:
labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
config = self.get_config()
return config, pixel_values, labels
def get_config(self):
return EfficientFormerConfig(
image_size=self.image_size,
patch_size=self.patch_size,
num_channels=self.num_channels,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
is_decoder=False,
initializer_range=self.initializer_range,
encoder_stride=self.encoder_stride,
resolution=self.resolution,
depths=self.depths,
hidden_sizes=self.hidden_sizes,
dim=self.dim,
mlp_expansion_ratio=self.mlp_expansion_ratio,
)
def create_and_check_model(self, config, pixel_values, labels):
model = EfficientFormerModel(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size
model = EfficientFormerForImageClassification(config)
model.to(torch_device)
model.eval()
result = model(pixel_values, labels=labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
# test greyscale images
config.num_channels = 1
model = EfficientFormerForImageClassification(config)
model.to(torch_device)
model.eval()
pixel_values = floats_tensor([self.batch_size, 1, self.image_size, self.image_size])
result = model(pixel_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
pixel_values,
labels,
) = config_and_inputs
inputs_dict = {"pixel_values": pixel_values}
return config, inputs_dict
@require_torch
class EfficientFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
"""
Here we also overwrite some of the tests of test_modeling_common.py, as EfficientFormer does not use input_ids, inputs_embeds,
attention_mask and seq_length.
"""
all_model_classes = (
(
EfficientFormerModel,
EfficientFormerForImageClassificationWithTeacher,
EfficientFormerForImageClassification,
)
if is_torch_available()
else ()
)
pipeline_model_mapping = (
{
"image-feature-extraction": EfficientFormerModel,
"image-classification": (
EfficientFormerForImageClassification,
EfficientFormerForImageClassificationWithTeacher,
),
}
if is_torch_available()
else {}
)
fx_compatible = False
test_pruning = False
test_resize_embeddings = False
test_head_masking = False
def setUp(self):
self.model_tester = EfficientFormerModelTester(self)
self.config_tester = ConfigTester(
self, config_class=EfficientFormerConfig, has_text_modality=False, hidden_size=37
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="EfficientFormer does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip(reason="EfficientFormer does not support input and output embeddings")
def test_model_common_attributes(self):
pass
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
if hasattr(self.model_tester, "encoder_seq_length"):
seq_length = self.model_tester.encoder_seq_length
if hasattr(self.model_tester, "chunk_length") and self.model_tester.chunk_length > 1:
seq_length = seq_length * self.model_tester.chunk_length
else:
seq_length = self.model_tester.seq_length
self.assertListEqual(
list(hidden_states[-1].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
if config.is_encoder_decoder:
hidden_states = outputs.decoder_hidden_states
self.assertIsInstance(hidden_states, (list, tuple))
self.assertEqual(len(hidden_states), expected_num_layers)
seq_len = getattr(self.model_tester, "seq_length", None)
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
self.assertListEqual(
list(hidden_states[-1].shape[-2:]),
[decoder_seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels:
if model_class.__name__ == "EfficientFormerForImageClassificationWithTeacher":
del inputs_dict["labels"]
return inputs_dict
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(reason="EfficientFormer does not implement masked image modeling yet")
def test_for_masked_image_modeling(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_image_modeling(*config_and_inputs)
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
# special case for EfficientFormerForImageClassificationWithTeacher model
def test_training(self):
if not self.model_tester.is_training:
return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
for model_class in self.all_model_classes:
# EfficientFormerForImageClassificationWithTeacher supports inference-only
if (
model_class.__name__ in MODEL_MAPPING_NAMES.values()
or model_class.__name__ == "EfficientFormerForImageClassificationWithTeacher"
):
continue
model = model_class(config)
model.to(torch_device)
model.train()
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
loss = model(**inputs).loss
loss.backward()
def test_problem_types(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
problem_types = [
{"title": "multi_label_classification", "num_labels": 2, "dtype": torch.float},
{"title": "single_label_classification", "num_labels": 1, "dtype": torch.long},
{"title": "regression", "num_labels": 1, "dtype": torch.float},
]
for model_class in self.all_model_classes:
if (
model_class.__name__
not in [
*MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES.values(),
]
or model_class.__name__ == "EfficientFormerForImageClassificationWithTeacher"
):
continue
for problem_type in problem_types:
with self.subTest(msg=f"Testing {model_class} with {problem_type['title']}"):
config.problem_type = problem_type["title"]
config.num_labels = problem_type["num_labels"]
model = model_class(config)
model.to(torch_device)
model.train()
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
if problem_type["num_labels"] > 1:
inputs["labels"] = inputs["labels"].unsqueeze(1).repeat(1, problem_type["num_labels"])
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
# This tests that we do not trigger the warning form PyTorch "Using a target size that is different
# to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
# they have the same size." which is a symptom something in wrong for the regression problem.
# See https://github.com/huggingface/transformers/issues/11780
with warnings.catch_warnings(record=True) as warning_list:
loss = model(**inputs).loss
for w in warning_list:
if "Using a target size that is different to the input size" in str(w.message):
raise ValueError(
f"Something is going wrong in the regression problem: intercepted {w.message}"
)
loss.backward()
@slow
def test_model_from_pretrained(self):
model_name = "snap-research/efficientformer-l1-300"
model = EfficientFormerModel.from_pretrained(model_name)
self.assertIsNotNone(model)
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_len = getattr(self.model_tester, "seq_length", None)
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
chunk_length = getattr(self.model_tester, "chunk_length", None)
if chunk_length is not None and hasattr(self.model_tester, "num_hashes"):
encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_attention_outputs)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_attention_outputs)
if chunk_length is not None:
self.assertListEqual(
list(attentions[0].shape[-4:]),
[self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length],
)
else:
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@require_torch
@require_vision
class EfficientFormerModelIntegrationTest(unittest.TestCase):
@cached_property
def default_image_processor(self):
return (
EfficientFormerImageProcessor.from_pretrained("snap-research/efficientformer-l1-300")
if is_vision_available()
else None
)
@slow
def test_inference_image_classification_head(self):
model = EfficientFormerForImageClassification.from_pretrained("snap-research/efficientformer-l1-300").to(
torch_device
)
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
# verify the logits
expected_shape = (1, 1000)
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-0.0555, 0.4825, -0.0852]).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0][:3], expected_slice, atol=1e-4))
@slow
def test_inference_image_classification_head_with_teacher(self):
model = EfficientFormerForImageClassificationWithTeacher.from_pretrained(
"snap-research/efficientformer-l1-300"
).to(torch_device)
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
# verify the logits
expected_shape = (1, 1000)
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-0.1312, 0.4353, -1.0499]).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0][:3], expected_slice, atol=1e-4))

View File

@@ -1,409 +0,0 @@
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the TensorFlow EfficientFormer model."""
import inspect
import unittest
from typing import List
import numpy as np
from transformers import EfficientFormerConfig
from transformers.testing_utils import require_tf, require_vision, slow
from transformers.utils import cached_property, is_tf_available, is_vision_available
from ...test_configuration_common import ConfigTester
from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_tf_available():
import tensorflow as tf
from transformers import (
TFEfficientFormerForImageClassification,
TFEfficientFormerForImageClassificationWithTeacher,
TFEfficientFormerModel,
)
from transformers.modeling_tf_utils import keras
if is_vision_available():
from PIL import Image
from transformers import EfficientFormerImageProcessor
class TFEfficientFormerModelTester:
def __init__(
self,
parent,
batch_size: int = 13,
image_size: int = 64,
patch_size: int = 2,
embed_dim: int = 3,
num_channels: int = 3,
is_training: bool = True,
use_labels: bool = True,
hidden_size: int = 128,
hidden_sizes=[16, 32, 64, 128],
num_hidden_layers: int = 7,
num_attention_heads: int = 4,
intermediate_size: int = 37,
hidden_act: str = "gelu",
hidden_dropout_prob: float = 0.1,
attention_probs_dropout_prob: float = 0.1,
type_sequence_label_size: int = 10,
initializer_range: float = 0.02,
encoder_stride: int = 2,
num_attention_outputs: int = 1,
dim: int = 128,
depths: List[int] = [2, 2, 2, 2],
resolution: int = 2,
mlp_expansion_ratio: int = 2,
):
self.parent = parent
self.batch_size = batch_size
self.image_size = image_size
self.patch_size = patch_size
self.num_channels = num_channels
self.is_training = is_training
self.use_labels = use_labels
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.encoder_stride = encoder_stride
self.num_attention_outputs = num_attention_outputs
self.embed_dim = embed_dim
self.seq_length = embed_dim + 1
self.resolution = resolution
self.depths = depths
self.hidden_sizes = hidden_sizes
self.dim = dim
self.mlp_expansion_ratio = mlp_expansion_ratio
def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
labels = None
if self.use_labels:
labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
config = self.get_config()
return config, pixel_values, labels
def get_config(self):
return EfficientFormerConfig(
image_size=self.image_size,
patch_size=self.patch_size,
num_channels=self.num_channels,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
is_decoder=False,
initializer_range=self.initializer_range,
encoder_stride=self.encoder_stride,
resolution=self.resolution,
depths=self.depths,
hidden_sizes=self.hidden_sizes,
dim=self.dim,
mlp_expansion_ratio=self.mlp_expansion_ratio,
)
def create_and_check_model(self, config, pixel_values, labels):
model = TFEfficientFormerModel(config=config)
result = model(pixel_values, training=False)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size
model = TFEfficientFormerForImageClassification(config)
result = model(pixel_values, labels=labels, training=False)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
# test greyscale images
config.num_channels = 1
model = TFEfficientFormerForImageClassification(config)
pixel_values = floats_tensor([self.batch_size, 1, self.image_size, self.image_size])
result = model(pixel_values, labels=labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, pixel_values, labels = config_and_inputs
inputs_dict = {"pixel_values": pixel_values}
return config, inputs_dict
@require_tf
class TFEfficientFormerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
"""
Here we also overwrite some of the tests of test_modeling_tf_common.py, as EfficientFormer does not use input_ids,
inputs_embeds, attention_mask and seq_length.
"""
all_model_classes = (
(
TFEfficientFormerModel,
TFEfficientFormerForImageClassificationWithTeacher,
TFEfficientFormerForImageClassification,
)
if is_tf_available()
else ()
)
pipeline_model_mapping = (
{
"feature-extraction": TFEfficientFormerModel,
"image-classification": (
TFEfficientFormerForImageClassification,
TFEfficientFormerForImageClassificationWithTeacher,
),
}
if is_tf_available()
else {}
)
fx_compatible = False
test_pruning = False
test_resize_embeddings = False
test_head_masking = False
test_onnx = False
def setUp(self):
self.model_tester = TFEfficientFormerModelTester(self)
self.config_tester = ConfigTester(
self, config_class=EfficientFormerConfig, has_text_modality=False, hidden_size=37
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="EfficientFormer does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip(reason="EfficientFormer does not support input and output embeddings")
def test_model_common_attributes(self):
pass
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
signature = inspect.signature(model.call)
# signature.parameters is an OrderedDict => so arg_names order is deterministic
arg_names = [*signature.parameters.keys()]
expected_arg_names = ["pixel_values"]
self.assertListEqual(arg_names[:1], expected_arg_names)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
if hasattr(self.model_tester, "encoder_seq_length"):
seq_length = self.model_tester.encoder_seq_length
if hasattr(self.model_tester, "chunk_length") and self.model_tester.chunk_length > 1:
seq_length = seq_length * self.model_tester.chunk_length
else:
seq_length = self.model_tester.seq_length
self.assertListEqual(
list(hidden_states[-1].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
if config.is_encoder_decoder:
hidden_states = outputs.decoder_hidden_states
self.asseretIsInstance(hidden_states, (list, tuple))
self.assertEqual(len(hidden_states), expected_num_layers)
seq_len = getattr(self.model_tester, "seq_length", None)
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
self.assertListEqual(
list(hidden_states[-1].shape[-2:]),
[decoder_seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels:
if model_class.__name__ == "TFEfficientFormerForImageClassificationWithTeacher":
del inputs_dict["labels"]
return inputs_dict
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(reason="EfficientFormer does not implement masked image modeling yet")
def test_for_masked_image_modeling(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_image_modeling(*config_and_inputs)
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
model_name = "snap-research/efficientformer-l1-300"
model = TFEfficientFormerModel.from_pretrained(model_name)
self.assertIsNotNone(model)
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_len = getattr(self.model_tester, "seq_length", None)
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
chunk_length = getattr(self.model_tester, "chunk_length", None)
if chunk_length is not None and hasattr(self.model_tester, "num_hashes"):
encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_attention_outputs)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_attention_outputs)
if chunk_length is not None:
self.assertListEqual(
list(attentions[0].shape[-4:]),
[self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length],
)
else:
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)
def test_compile_tf_model(self):
# We use a simplified version of this test for EfficientFormer because it requires training=False
# and Keras refuses to let us force that during functional construction
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
# Prepare our model
model = model_class(config)
# These are maximally general inputs for the model, with multiple None dimensions
# Hopefully this will catch any conditionals that fail for flexible shapes
functional_inputs = {
key: keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
for key, val in model.input_signature.items()
if key in model.dummy_inputs
}
outputs_dict = model(functional_inputs)
self.assertTrue(outputs_dict is not None)
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@require_tf
@require_vision
class EfficientFormerModelIntegrationTest(unittest.TestCase):
@cached_property
def default_image_processor(self):
return (
EfficientFormerImageProcessor.from_pretrained("snap-research/efficientformer-l1-300")
if is_vision_available()
else None
)
@slow
def test_inference_image_classification_head(self):
model = TFEfficientFormerForImageClassification.from_pretrained("snap-research/efficientformer-l1-300")
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="tf")
# forward pass
outputs = model(**inputs, training=False)
# verify the logits
expected_shape = tf.TensorShape((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = tf.constant([-0.0555, 0.4825, -0.0852])
self.assertTrue(np.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))
@slow
def test_inference_image_classification_head_with_teacher(self):
model = TFEfficientFormerForImageClassificationWithTeacher.from_pretrained(
"snap-research/efficientformer-l1-300"
)
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="tf")
# forward pass
outputs = model(**inputs, training=False)
# verify the logits
expected_shape = tf.TensorShape((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = tf.constant([-0.1312, 0.4353, -1.0499])
self.assertTrue(np.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))

View File

@@ -1,323 +0,0 @@
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. and Baidu team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch ErnieM model."""
import unittest
from transformers import ErnieMConfig, is_torch_available
from transformers.testing_utils import require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers import (
ErnieMForInformationExtraction,
ErnieMForMultipleChoice,
ErnieMForQuestionAnswering,
ErnieMForSequenceClassification,
ErnieMForTokenClassification,
ErnieMModel,
)
class ErnieMModelTester:
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = random_attention_mask([self.batch_size, self.seq_length])
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = self.get_config()
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
def prepare_config_and_inputs_for_uiem(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = random_attention_mask([self.batch_size, self.seq_length])
config = self.get_config()
return config, input_ids, input_mask
def get_config(self):
return ErnieMConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
)
def create_and_check_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels):
model = ErnieMModel(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, return_dict=True)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_question_answering(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = ErnieMForQuestionAnswering(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
def create_and_check_for_information_extraction(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = ErnieMForInformationExtraction(config=config)
model.to(torch_device)
model.eval()
sequence_labels = torch.ones_like(input_ids, dtype=torch.float32)
result = model(
input_ids,
attention_mask=input_mask,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
def create_and_check_for_sequence_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = ErnieMForSequenceClassification(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, labels=sequence_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def create_and_check_for_token_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = ErnieMForTokenClassification(config=config)
model.to(torch_device)
model.eval()
input_ids.to(torch_device)
input_mask.to(torch_device)
token_labels.to(torch_device)
result = model(input_ids, attention_mask=input_mask, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
def create_and_check_for_multiple_choice(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices
model = ErnieMForMultipleChoice(config=config)
model.to(torch_device)
model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
result = model(
multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
labels=choice_labels,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict
@require_torch
class ErnieMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
ErnieMModel,
ErnieMForMultipleChoice,
ErnieMForQuestionAnswering,
ErnieMForSequenceClassification,
ErnieMForTokenClassification,
)
if is_torch_available()
else ()
)
all_generative_model_classes = ()
pipeline_model_mapping = (
{
"feature-extraction": ErnieMModel,
"question-answering": ErnieMForQuestionAnswering,
"text-classification": ErnieMForSequenceClassification,
"token-classification": ErnieMForTokenClassification,
"zero-shot": ErnieMForSequenceClassification,
}
if is_torch_available()
else {}
)
test_torchscript = False
# TODO: Fix the failed tests when this model gets more usage
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "QAPipelineTests":
return True
return False
def setUp(self):
self.model_tester = ErnieMModelTester(self)
self.config_tester = ConfigTester(self, config_class=ErnieMConfig, hidden_size=37)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_model_various_embeddings(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
for type in ["absolute", "relative_key", "relative_key_query"]:
config_and_inputs[0].position_embedding_type = type
self.model_tester.create_and_check_model(*config_and_inputs)
def test_for_multiple_choice(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
def test_for_question_answering(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
def test_for_information_extraction(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_information_extraction(*config_and_inputs)
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
model_name = "susnato/ernie-m-base_pytorch"
model = ErnieMModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@require_torch
class ErnieMModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_model(self):
model = ErnieMModel.from_pretrained("susnato/ernie-m-base_pytorch")
model.eval()
input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]])
output = model(input_ids)[0]
# TODO Replace vocab size
hidden_size = 768
expected_shape = torch.Size((1, 6, hidden_size))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor(
[[[-0.0012, 0.1245, -0.0214], [-0.0742, 0.0244, -0.0771], [-0.0333, 0.1164, -0.1554]]]
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))

View File

@@ -1,143 +0,0 @@
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. and Baidu team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch ErnieM model."""
import unittest
from transformers import ErnieMTokenizer
from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow
from ...test_tokenization_common import TokenizerTesterMixin
SAMPLE_VOCAB = get_tests_dir("fixtures/spiece.model")
@require_sentencepiece
@require_tokenizers
class ErnieMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
from_pretrained_id = "susnato/ernie-m-base_pytorch"
tokenizer_class = ErnieMTokenizer
test_seq2seq = False
test_sentencepiece = True
test_rust_tokenizer = False
test_sentencepiece_ignore_case = False
def setUp(self):
super().setUp()
# We have a SentencePiece fixture for testing
tokenizer = ErnieMTokenizer(SAMPLE_VOCAB, unk_token="<unk>", pad_token="<pad>")
tokenizer.save_pretrained(self.tmpdirname)
def get_input_output_texts(self, tokenizer):
input_text = "this is a test"
output_text = "this is a test"
return input_text, output_text
def test_convert_token_and_id(self):
"""Test ``_convert_token_to_id`` and ``_convert_id_to_token``."""
token = "<pad>"
token_id = 0
self.assertEqual(self.get_tokenizer()._convert_token_to_id(token), token_id)
self.assertEqual(self.get_tokenizer()._convert_id_to_token(token_id), token)
def test_get_vocab(self):
vocab_keys = list(self.get_tokenizer().get_vocab().keys())
self.assertEqual(vocab_keys[0], "<pad>")
self.assertEqual(vocab_keys[1], "<unk>")
self.assertEqual(vocab_keys[-1], "▁eloquent")
self.assertEqual(len(vocab_keys), 30_000)
def test_vocab_size(self):
self.assertEqual(self.get_tokenizer().vocab_size, 30_000)
def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
return
tokenizer = self.get_tokenizer()
rust_tokenizer = self.get_rust_tokenizer()
sequence = "I was born in 92000, and this is falsé."
tokens = tokenizer.tokenize(sequence)
rust_tokens = rust_tokenizer.tokenize(sequence)
self.assertListEqual(tokens, rust_tokens)
ids = tokenizer.encode(sequence, add_special_tokens=False)
rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False)
self.assertListEqual(ids, rust_ids)
rust_tokenizer = self.get_rust_tokenizer()
ids = tokenizer.encode(sequence)
rust_ids = rust_tokenizer.encode(sequence)
self.assertListEqual(ids, rust_ids)
def test_full_tokenizer(self):
tokenizer = ErnieMTokenizer(SAMPLE_VOCAB, do_lower_case=True, unk_token="<unk>", pad_token="<pad>")
tokens = tokenizer.tokenize("This is a test")
self.assertListEqual(tokens, ["▁this", "▁is", "▁a", "▁test"])
self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [48, 25, 21, 1289])
tokens = tokenizer.tokenize("I was born in 92000, and this is falsé.")
# ErnieMTokenizer(paddlenlp implementation) outputs '9' instead of '_9' so to mimic that '_9' is changed to '9'
self.assertListEqual(
tokens, ["▁i", "▁was", "▁born", "▁in", "9", "2000", ",", "▁and", "▁this", "▁is", "▁fal", "s", "é", "."]
)
ids = tokenizer.convert_tokens_to_ids(tokens)
self.assertListEqual(ids, [31, 23, 386, 19, 518, 3050, 15, 17, 48, 25, 8256, 18, 1, 9])
back_tokens = tokenizer.convert_ids_to_tokens(ids)
self.assertListEqual(
back_tokens,
["▁i", "▁was", "▁born", "▁in", "9", "2000", ",", "▁and", "▁this", "▁is", "▁fal", "s", "<unk>", "."],
)
def test_sequence_builders(self):
tokenizer = ErnieMTokenizer(SAMPLE_VOCAB, unk_token="<unk>", pad_token="<pad>")
text = tokenizer.encode("sequence builders")
text_2 = tokenizer.encode("multi-sequence build")
encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
assert encoded_sentence == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id]
assert encoded_pair == [tokenizer.cls_token_id] + text + [tokenizer.sep_token_id] + [
tokenizer.sep_token_id
] + text_2 + [tokenizer.sep_token_id]
@slow
def test_tokenizer_integration(self):
expected_encoding = {'input_ids': [[0, 11062, 82772, 7, 15, 82772, 538, 51529, 237, 17198, 1290, 206, 9, 215175, 1314, 136, 17198, 1290, 206, 9, 56359, 42, 122009, 9, 16466, 16, 87344, 4537, 9, 4717, 78381, 6, 159958, 7, 15, 24480, 618, 4, 527, 22693, 9, 304, 4, 2777, 24480, 9874, 4, 43523, 594, 4, 803, 18392, 33189, 18, 4, 43523, 24447, 5, 5, 5, 16, 100, 24955, 83658, 9626, 144057, 15, 839, 22335, 16, 136, 24955, 83658, 83479, 15, 39102, 724, 16, 678, 645, 6460, 1328, 4589, 42, 122009, 115774, 23, 3559, 1328, 46876, 7, 136, 53894, 1940, 42227, 41159, 17721, 823, 425, 4, 27512, 98722, 206, 136, 5531, 4970, 919, 17336, 5, 2], [0, 20080, 618, 83, 82775, 47, 479, 9, 1517, 73, 53894, 333, 80581, 110117, 18811, 5256, 1295, 51, 152526, 297, 7986, 390, 124416, 538, 35431, 214, 98, 15044, 25737, 136, 7108, 43701, 23, 756, 135355, 7, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 581, 63773, 119455, 6, 147797, 88203, 7, 645, 70, 21, 3285, 10269, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]} # fmt: skip
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="susnato/ernie-m-base_pytorch",
sequences=[
"Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides "
"general-purpose architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet...) for Natural "
"Language Understanding (NLU) and Natural Language Generation (NLG) with over32+ pretrained "
"models in100+ languages and deep interoperability between Jax, PyTorch and TensorFlow.",
"BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly "
"conditioning on both left and right context in all layers.",
"The quick brown fox jumps over the lazy dog.",
],
)

View File

@@ -1,476 +0,0 @@
# coding=utf-8
# Copyright 2023 Toshiyuki Sakamoto(tanreinama) and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from transformers import (
GPTSanJapaneseConfig,
GPTSanJapaneseForConditionalGeneration,
GPTSanJapaneseModel,
GPTSanJapaneseTokenizer,
is_torch_available,
)
from transformers.generation import GenerationConfig
from transformers.testing_utils import require_torch, slow, tooslow, torch_device
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
class GPTSanJapaneseTester:
def __init__(
self,
parent,
vocab_size=99,
batch_size=13,
num_contexts=7,
# For common tests
is_training=True,
hidden_size=32,
ext_size=42,
num_hidden_layers=2,
num_ext_layers=2,
num_attention_heads=4,
num_experts=2,
d_ff=32,
d_ext=80,
d_spout=33,
dropout_rate=0.0,
layer_norm_epsilon=1e-6,
expert_capacity=100,
router_jitter_noise=0.0,
):
self.vocab_size = vocab_size
self.parent = parent
self.batch_size = batch_size
self.num_contexts = num_contexts
# For common tests
self.seq_length = self.num_contexts
self.is_training = is_training
self.hidden_size = hidden_size
self.num_ext_layers = num_ext_layers
self.ext_size = ext_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.num_experts = num_experts
self.d_ff = d_ff
self.d_ext = d_ext
self.d_spout = d_spout
self.dropout_rate = dropout_rate
self.layer_norm_epsilon = layer_norm_epsilon
self.expert_capacity = expert_capacity
self.router_jitter_noise = router_jitter_noise
def get_large_model_config(self):
return GPTSanJapaneseConfig.from_pretrained("Tanrei/GPTSAN-japanese")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = self.get_config()
return (config, input_ids)
def prepare_config_and_inputs_for_common(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = self.get_config()
return (config, {"input_ids": input_ids})
def get_config(self):
return GPTSanJapaneseConfig(
vocab_size=self.vocab_size,
num_contexts=self.seq_length,
d_model=self.hidden_size,
d_ff=self.d_ff,
d_ext=self.d_ext,
d_spout=self.d_spout,
num_switch_layers=self.num_hidden_layers - self.num_ext_layers,
num_ext_layers=self.num_ext_layers,
num_heads=self.num_attention_heads,
num_experts=self.num_experts,
expert_capacity=self.expert_capacity,
dropout_rate=self.dropout_rate,
layer_norm_epsilon=self.layer_norm_epsilon,
router_jitter_noise=self.router_jitter_noise,
)
def create_and_check_model(
self,
config,
input_ids,
):
model = GPTSanJapaneseForConditionalGeneration(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids=input_ids,
)
self.parent.assertIsNotNone(result)
@require_torch
class GPTSanJapaneseTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (GPTSanJapaneseModel,) if is_torch_available() else ()
pipeline_model_mapping = (
{
"conversational": GPTSanJapaneseForConditionalGeneration,
"feature-extraction": GPTSanJapaneseForConditionalGeneration,
"summarization": GPTSanJapaneseForConditionalGeneration,
"text2text-generation": GPTSanJapaneseForConditionalGeneration,
"translation": GPTSanJapaneseForConditionalGeneration,
}
if is_torch_available()
else {}
)
fx_compatible = False
is_encoder_decoder = False
test_pruning = False
test_headmasking = False
test_save_load_fast_init_to_base = False
test_training = False
# The small GPTSAN_JAPANESE model needs higher percentages for CPU/MP tests
model_split_percents = [0.5, 0.8, 0.9]
# TODO: Fix the failed tests when this model gets more usage
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "SummarizationPipelineTests":
# TODO: fix `_reorder_cache` is not implemented for this model
return True
elif pipeline_test_casse_name == "Text2TextGenerationPipelineTests":
# TODO: check this.
return True
return False
def setUp(self):
self.model_tester = GPTSanJapaneseTester(self)
self.config_tester = ConfigTester(self, config_class=GPTSanJapaneseConfig, d_model=37)
def test_config(self):
GPTSanJapaneseConfig()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(
reason="skip for now as the computed `max_memory` by `model_split_percents` in the test method will be changed inside `from_pretrained`"
)
def test_model_parallelism(self):
super().test_model_parallelism()
@unittest.skip(reason="Gptsan does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip(reason="Gptsan does not use inputs_embeds")
def test_inputs_embeds_matches_input_ids(self):
pass
@require_torch
class GPTSanJapaneseForConditionalGenerationTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
all_model_classes = (GPTSanJapaneseForConditionalGeneration,) if is_torch_available() else ()
fx_compatible = False
is_encoder_decoder = False
test_pruning = False
test_headmasking = False
# The small GPTSAN_JAPANESE model needs higher percentages for CPU/MP tests
model_split_percents = [0.5, 0.8, 0.9]
def setUp(self):
self.model_tester = GPTSanJapaneseTester(self)
self.config_tester = ConfigTester(self, config_class=GPTSanJapaneseConfig, d_model=37)
def test_config(self):
GPTSanJapaneseConfig()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(
reason="skip for now as the computed `max_memory` by `model_split_percents` in the test method will be changed inside `from_pretrained`"
)
def test_model_parallelism(self):
super().test_model_parallelism()
@unittest.skip(reason="Gptsan does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip(reason="Gptsan does not use inputs_embeds")
def test_inputs_embeds_matches_input_ids(self):
pass
@slow
def test_logits(self):
model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
input_ids = tokenizer.encode("武田信玄は", return_tensors="pt")
outputs = model(input_ids)
output_logits = outputs.logits.detach().cpu().numpy()
# Output of original model created with mesh-tensoflow
# fmt: off
target = [
[-12.037839889526367, -12.433061599731445, -14.333840370178223, -12.450345993041992, -11.1661376953125,
-11.930137634277344, -10.659740447998047, -12.909574508666992, -13.241043090820312, -13.398579597473145,
-11.107524871826172, -12.3685941696167, -22.97943115234375, -10.481067657470703, -12.484030723571777,
-12.807360649108887, -14.769700050354004, -12.233579635620117, -13.428145408630371, -22.624177932739258],
[-7.511149883270264, -8.281851768493652, -7.943127155303955, -7.55021333694458, -6.49869966506958,
-7.586796283721924, -6.978085994720459, -7.839145183563232, -8.21964168548584, -8.695091247558594,
-6.706910610198975, -6.6585798263549805, -19.565698623657227, -5.353842735290527, -8.350686073303223,
-8.039388656616211, -10.856569290161133, -7.75154447555542, -8.819022178649902, -19.51532745361328],
[-9.73066234588623, -10.223922729492188, -9.932981491088867, -11.857836723327637, -7.662626266479492,
-11.13529109954834, -7.765097618103027, -11.472923278808594, -9.543149948120117, -11.905633926391602,
-9.366164207458496, -11.5734281539917, -23.699003219604492, -9.429590225219727, -10.42839241027832,
-10.585240364074707, -10.94771957397461, -11.095416069030762, -10.390240669250488, -23.769372940063477],
[-9.728265762329102, -9.859712600708008, -10.09729290008545, -9.678522109985352, -6.879519939422607,
-9.68487548828125, -4.2803425788879395, -10.018914222717285, -9.308445930480957, -10.63394546508789,
-8.083646774291992, -9.06301498413086, -21.904266357421875, -8.90160846710205, -8.841876029968262,
-11.856719970703125, -12.079398155212402, -11.233753204345703, -10.177338600158691, -21.87256622314453],
[-9.669764518737793, -9.614198684692383, -9.814510345458984, -9.996501922607422, -11.375690460205078,
-10.113405227661133, -10.546867370605469, -10.04369068145752, -10.907809257507324, -10.504216194152832,
-11.129199028015137, -10.151124000549316, -21.96586799621582, -9.086349487304688, -11.730339050292969,
-10.460667610168457, -10.298049926757812, -10.784148216247559, -10.840693473815918, -22.03152847290039],
]
# fmt: on
target = np.array(target).flatten()
predict = output_logits[0, :, :20].flatten()
def check(a, b, epsilon=5e-4):
return abs(a - b) < epsilon * max(abs(a), abs(b))
self.assertTrue(np.all([check(target[i], predict[i]) for i in range(len(target))]))
@slow
def test_batch_generation(self):
model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
model.to(torch_device)
# set deterministically
generation_config = GenerationConfig.from_pretrained("Tanrei/GPTSAN-japanese")
generation_config.top_k = 1
# use different length sentences to test batching
sentences = [
"甲斐なら武田と言うほど",
"織田信長は、",
]
tokenizer.padding_side = "left"
inputs = tokenizer(sentences, return_tensors="pt", padding=True)
input_ids = inputs["input_ids"].to(torch_device)
self.assertNotEqual(inputs["attention_mask"][0].numpy().tolist(), inputs["attention_mask"][1].numpy().tolist())
outputs = model.generate(
input_ids=input_ids,
attention_mask=inputs["attention_mask"].to(torch_device),
max_new_tokens=3,
generation_config=generation_config,
)
inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device)
output_non_padded = model.generate(
input_ids=inputs_non_padded, max_new_tokens=3, generation_config=generation_config
)
inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device)
output_padded = model.generate(input_ids=inputs_padded, max_new_tokens=3, generation_config=generation_config)
self.assertNotEqual(inputs_non_padded.shape, inputs_padded.shape)
batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
expected_output_sentence = [
"甲斐なら武田と言うほど甲斐の武田",
"織田信長は、このような",
]
self.assertListEqual(expected_output_sentence, batch_out_sentence)
self.assertListEqual(batch_out_sentence, [non_padded_sentence, padded_sentence])
@tooslow
def test_sample(self):
model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
# Output of original model created with mesh-tensoflow
target = [
("武田信玄は", 35675),
("武田信玄は、", 45),
("武田信玄は、この", 29),
("武田信玄は、このよう", 30642),
("武田信玄は、このような", 35680),
("武田信玄は、このような「", 8640),
("武田信玄は、このような「武田", 31617),
("武田信玄は、このような「武田家", 30646),
("武田信玄は、このような「武田家の", 31617),
("武田信玄は、このような「武田家の家", 31381),
]
for input, output in target:
input_ids = tokenizer.encode(input, return_tensors="pt")
outputs = model(input_ids)
output_logits = outputs.logits.detach().cpu().numpy()[0]
output_id = np.argmax(output_logits[-1])
self.assertEqual(output_id, output)
@slow
def test_spout_generation(self):
model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
model.to(torch_device)
# set deterministically
generation_config = GenerationConfig.from_pretrained("Tanrei/GPTSAN-japanese")
generation_config.top_k = 1
input_text = "武田信玄は、"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(torch_device)
input_ids_batch = tokenizer([input_text, input_text], return_tensors="pt").input_ids.to(torch_device)
# spout from uniform and one-hot
spouts = [
[0.87882208, 0.38426396, 0.33220248, 0.43890406, 0.16562252,
0.04803985, 0.211572 , 0.23188473, 0.37153068, 0.7836377 ,
0.02160172, 0.38761719, 0.75290772, 0.90198857, 0.34365777,
0.64168169, 0.44318471, 0.14575746, 0.92562881, 0.40812148,
0.29019122, 0.88861599, 0.65524846, 0.43563456, 0.38177187,
0.70832965, 0.81527892, 0.68832812, 0.38833192, 0.4561522 ,
0.14828817, 0.47248213, 0.54357335, 0.82009566, 0.1338884 ,
0.02755417, 0.19764677, 0.2422084 , 0.04757674, 0.65409606,
0.0824589 , 0.03304383, 0.94387689, 0.98764509, 0.82433901,
0.27646741, 0.64907493, 0.76009406, 0.30087915, 0.17904689,
0.41601714, 0.67046398, 0.10422822, 0.08447374, 0.07354344,
0.61423565, 0.70284866, 0.7532333 , 0.1972038 , 0.29575659,
0.90583886, 0.29265307, 0.50000175, 0.70407655, 0.889363 ,
0.81904418, 0.66829128, 0.64468815, 0.56563723, 0.85601875,
0.94924672, 0.00166762, 0.25220643, 0.74540219, 0.67993247,
0.1549675 , 0.39385352, 0.92153607, 0.63745931, 0.27759043,
0.84702295, 0.65904271, 0.58676614, 0.8666936 , 0.39607438,
0.79954983, 0.42220697, 0.39650381, 0.7849864 , 0.56150201,
0.15678925, 0.14746032, 0.34542114, 0.47026783, 0.11956489,
0.25421435, 0.33788901, 0.68934842, 0.36424685, 0.71737898,
0.38983449, 0.94393779, 0.39575588, 0.36616553, 0.87104665,
0.64630203, 0.22516905, 0.88270804, 0.15031338, 0.75144345,
0.46459025, 0.85396454, 0.86355643, 0.65139851, 0.70266061,
0.30241389, 0.81056497, 0.88865969, 0.38773807, 0.70635849,
0.90718459, 0.43245789, 0.28000654, 0.45935562, 0.08773519,
0.9552151 , 0.93901511, 0.22489288], # uniform
[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0.],
] # fmt: skip
output1 = model.generate(
input_ids=input_ids,
spout=spouts[0],
max_new_tokens=20,
generation_config=generation_config,
)
output2 = model.generate(
input_ids=input_ids,
spout=spouts[1],
max_new_tokens=20,
generation_config=generation_config,
)
output3 = model.generate(
input_ids=input_ids_batch,
spout=spouts,
max_new_tokens=20,
generation_config=generation_config,
)
out1_sentence = tokenizer.decode(output1[0])
out2_sentence = tokenizer.decode(output2[0])
batch_out_sentence = tokenizer.batch_decode(output3)
expected_output_sentence = [
"武田信玄は、武田氏の滅亡後、武田氏の居城であった甲斐武田氏の居城である",
"武田信玄は、武田家の滅亡を防ぐため、武田家の家臣である武田信虎を討",
]
self.assertListEqual(expected_output_sentence, batch_out_sentence)
self.assertListEqual(batch_out_sentence, [out1_sentence, out2_sentence])
@slow
def test_prefix_lm_generation(self):
model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
tokenizer = GPTSanJapaneseTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
model.to(torch_device)
# set deterministically
generation_config = GenerationConfig.from_pretrained("Tanrei/GPTSAN-japanese")
generation_config.top_k = 1
prefix_text_1 = "武田信玄"
prefix_text_2 = "織田信長"
input_text_1 = "は、"
input_text_2 = "が、"
input_tok_1 = tokenizer(input_text_1, prefix_text=prefix_text_1, return_tensors="pt")
input_tok_2 = tokenizer(input_text_2, prefix_text=prefix_text_2, return_tensors="pt")
input_tok_3 = tokenizer([[prefix_text_1, input_text_1], [prefix_text_2, input_text_2]], return_tensors="pt")
output1 = model.generate(
input_ids=input_tok_1.input_ids.to(torch_device),
token_type_ids=input_tok_1.token_type_ids.to(torch_device),
max_new_tokens=20,
generation_config=generation_config,
)
output2 = model.generate(
input_ids=input_tok_2.input_ids.to(torch_device),
token_type_ids=input_tok_2.token_type_ids.to(torch_device),
max_new_tokens=20,
generation_config=generation_config,
)
output3 = model.generate(
input_ids=input_tok_3.input_ids.to(torch_device),
token_type_ids=input_tok_3.token_type_ids.to(torch_device),
attention_mask=input_tok_3.attention_mask.to(torch_device),
max_new_tokens=20,
generation_config=generation_config,
)
out1_sentence = tokenizer.decode(output1[0])
out2_sentence = tokenizer.decode(output2[0])
batch_out_sentence = tokenizer.batch_decode(output3)
expected_output_sentence = [
"武田信玄は、武田氏の祖である武田信虎を、その子・武田信友を擁して",
"織田信長が、織田信長の妻・お市の方を妻として迎えたという逸話が残",
]
self.assertListEqual(expected_output_sentence, batch_out_sentence)
self.assertListEqual(batch_out_sentence, [out1_sentence, out2_sentence])

View File

@@ -1,218 +0,0 @@
# coding=utf-8
# Copyright 2023 Toshiyuki Sakamoto(tanreinama) and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import unittest
from transformers.models.gptsan_japanese.tokenization_gptsan_japanese import (
VOCAB_FILES_NAMES,
GPTSanJapaneseTokenizer,
)
from transformers.testing_utils import require_jinja, require_tokenizers, slow
from ...test_tokenization_common import TokenizerTesterMixin
@require_tokenizers
class GPTSanJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
from_pretrained_id = "Tanrei/GPTSAN-japanese"
tokenizer_class = GPTSanJapaneseTokenizer
test_rust_tokenizer = False
from_pretrained_kwargs = {"do_clean_text": False, "add_prefix_space": False}
def setUp(self):
super().setUp()
vocab_tokens = ["こん", "こんに", "にちは", "ばんは", "世界,㔺界", "", "", "<BR>", "<SP>", "<TAB>", "<URL>", "<EMAIL>", "<TEL>", "<DATE>", "<PRICE>", "<BLOCK>", "<KIGOU>", "<U2000U2BFF>", "<|emoji1|>", "<unk>", "<|bagoftoken|>", "<|endoftext|>"] # fmt: skip
emoji_tokens = {"emoji": {"\ud83d\ude00": "<|emoji1|>"}, "emoji_inv": {"<|emoji1|>": "\ud83d\ude00"}} # 😀
self.special_tokens_map = {"unk_token": "<unk>"}
self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
self.emoji_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["emoji_file"])
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
with open(self.emoji_file, "w") as emoji_writer:
emoji_writer.write(json.dumps(emoji_tokens))
def get_tokenizer(self, **kwargs):
kwargs.update(self.special_tokens_map)
return GPTSanJapaneseTokenizer.from_pretrained(self.tmpdirname, **kwargs)
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.get_input_output_texts
def get_input_output_texts(self, tokenizer):
input_text = "こんにちは、世界。 \nこんばんは、㔺界。😀"
output_text = "こんにちは、世界。 \nこんばんは、世界。😀"
return input_text, output_text
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.get_clean_sequence
def get_clean_sequence(self, tokenizer):
input_text, output_text = self.get_input_output_texts(tokenizer)
ids = tokenizer.encode(output_text, add_special_tokens=False)
text = tokenizer.decode(ids, clean_up_tokenization_spaces=False)
return text, ids
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_pretokenized_inputs
def test_pretokenized_inputs(self):
pass # TODO add if relevant
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_maximum_encoding_length_pair_input
def test_maximum_encoding_length_pair_input(self):
pass # TODO add if relevant
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_maximum_encoding_length_single_input
def test_maximum_encoding_length_single_input(self):
pass # TODO add if relevant
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_full_tokenizer
def test_full_tokenizer(self):
tokenizer = self.get_tokenizer()
# Testing tokenization
input_text = "こんにちは、世界。 こんばんは、㔺界。"
expected_token = ["こん", "にちは", "", "世界", "", "<SP>", "こん", "ばんは", "", "㔺界", ""]
tokens = tokenizer.tokenize(input_text)
self.assertListEqual(tokens, expected_token)
# Testing conversion to ids without special tokens
expected_ids = [0, 2, 5, 4, 6, 8, 0, 3, 5, 4, 6]
input_ids = tokenizer.convert_tokens_to_ids(tokens)
self.assertListEqual(input_ids, expected_ids)
# Testing conversion to ids with special tokens
input_tokens = tokens + [tokenizer.unk_token]
expected_ids = [0, 2, 5, 4, 6, 8, 0, 3, 5, 4, 6, 19]
input_ids = tokenizer.convert_tokens_to_ids(input_tokens)
self.assertListEqual(input_ids, expected_ids)
def test_token_bagging(self):
tokenizer = self.get_tokenizer()
# Testing tokenization
input_text = "こんにちは、<|bagoftoken|>世界。こんばんは、<|bagoftoken|>㔺界。"
expected_text = "こんにちは、、、、世界。こんばんは、、、、世界。"
tokens = tokenizer.encode(input_text)
output_text = tokenizer.decode(tokens)
self.assertEqual(output_text, expected_text)
@slow
def test_prefix_input(self):
tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese")
# Testing tokenization
prefix_text = "こんにちは、世界。"
input_text = "こんばんは、㔺界。😀"
expected_text = "こんにちは、世界。こんばんは、世界。😀"
tokens_1 = tokenizer.encode(prefix_text + input_text)
tokens_2 = tokenizer.encode("", prefix_text=prefix_text + input_text)
tokens_3 = tokenizer.encode(input_text, prefix_text=prefix_text)
output_text_1 = tokenizer.decode(tokens_1)
output_text_2 = tokenizer.decode(tokens_2)
output_text_3 = tokenizer.decode(tokens_3)
self.assertEqual(output_text_1, expected_text)
self.assertEqual(output_text_2, expected_text)
self.assertEqual(output_text_3, expected_text)
@slow
def test_token_type_ids(self):
tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese")
# Testing tokenization
prefix_text = "こんにちは、世界。"
input_text = "こんばんは、㔺界。😀"
len_prefix = len(tokenizer.encode(prefix_text)) - 2
len_text = len(tokenizer.encode(input_text)) - 2
expected_mask_1 = [1] + [0] * (len_prefix + len_text + 1)
expected_mask_2 = [1] * (len_prefix + len_text + 1) + [0]
expected_mask_3 = [1] + [1] * (len_prefix) + [0] * (len_text + 1)
type_id_1 = tokenizer(prefix_text + input_text).token_type_ids
type_id_2 = tokenizer("", prefix_text=prefix_text + input_text).token_type_ids
type_id_3 = tokenizer(input_text, prefix_text=prefix_text).token_type_ids
self.assertListEqual(type_id_1, expected_mask_1)
self.assertListEqual(type_id_2, expected_mask_2)
self.assertListEqual(type_id_3, expected_mask_3)
@slow
def test_prefix_tokens(self):
tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese")
x_token_1 = tokenizer.encode("あンいワ")
x_token_2 = tokenizer.encode("", prefix_text="あンいワ")
x_token_3 = tokenizer.encode("いワ", prefix_text="あン")
self.assertEqual(tokenizer.decode(x_token_1), tokenizer.decode(x_token_2))
self.assertEqual(tokenizer.decode(x_token_1), tokenizer.decode(x_token_3))
self.assertNotEqual(x_token_1, x_token_2)
self.assertNotEqual(x_token_1, x_token_3)
self.assertEqual(x_token_1[1], x_token_2[-1]) # SEG token
self.assertEqual(x_token_1[1], x_token_3[3]) # SEG token
@slow
def test_batch_encode(self):
tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese")
input_pairs = [["武田信玄", "は、"], ["織田信長", "の配下の、"]]
x_token = tokenizer(input_pairs, padding=True)
x_token_2 = tokenizer.batch_encode_plus(input_pairs, padding=True)
# fmt: off
expected_outputs = [[35993, 8640, 25948, 35998, 30647, 35675, 35999, 35999], [35993, 10382, 9868, 35998, 30646, 9459, 30646, 35675]]
expected_typeids = [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 1, 0, 0, 0, 0, 0]]
expected_attmask = [[1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1]]
# fmt: on
self.assertListEqual(x_token.input_ids, expected_outputs)
self.assertListEqual(x_token.token_type_ids, expected_typeids)
self.assertListEqual(x_token.attention_mask, expected_attmask)
self.assertListEqual(x_token_2.input_ids, expected_outputs)
self.assertListEqual(x_token_2.token_type_ids, expected_typeids)
self.assertListEqual(x_token_2.attention_mask, expected_attmask)
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_conversion_reversible
def test_conversion_reversible(self):
# Intentionally convert some words to accommodate character fluctuations unique to Japanese
pass
# Copied from tests.models.gpt_neox_japanese.test_tokenization_gpt_neox_japanese.GPTNeoXJapaneseTokenizationTest.test_padding_different_model_input_name
def test_padding_different_model_input_name(self):
# tokenizer has no padding token
pass
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = self.tokenizer_class.from_pretrained("Tanrei/GPTSAN-japanese")
# This is in English, but it's just here to make sure the chat control tokens are being added properly
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
]
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
# fmt: off
expected_tokens = [
[35993, 35998, 35637, 35659, 35665, 35716, 35645, 35662, 35649, 35716, 35645, 35716, 35652, 35649, 35656, 35660, 35650, 35665, 35656, 35716, 35647, 35652, 35645, 35664, 35646, 35659, 35664, 35595, 35716, 35999, 35993, 35998, 35620, 35649, 35656, 35656, 35659, 35582, 35716, 35999],
[35993, 35998, 35637, 35659, 35665, 35716, 35645, 35662, 35649, 35716, 35645, 35716, 35652, 35649, 35656, 35660, 35650, 35665, 35656, 35716, 35647, 35652, 35645, 35664, 35646, 35659, 35664, 35595, 35716, 35999, 35993, 35998, 35620, 35649, 35656, 35656, 35659, 35582, 35716, 35999, 35993, 35998, 35626, 35653, 35647, 35649, 35716, 35664, 35659, 35716, 35657, 35649, 35649, 35664, 35716, 35669, 35659, 35665, 35595, 35716, 35999],
[35993, 35998, 35626, 35653, 35647, 35649, 35716, 35664, 35659, 35716, 35657, 35649, 35649, 35664, 35716, 35669, 35659, 35665, 35595, 35716, 35999, 35993, 35998, 35620, 35649, 35656, 35656, 35659, 35582, 35716, 35999]
]
# fmt: on
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)

File diff suppressed because it is too large Load Diff

View File

@@ -1,407 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from unittest import skip
from transformers import is_torch_available
from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_torch_fp16,
slow,
torch_device,
)
from transformers.trainer_utils import set_seed
if is_torch_available():
import torch
from transformers import JukeboxModel, JukeboxPrior, JukeboxTokenizer
@require_torch
class Jukebox1bModelTester(unittest.TestCase):
all_model_classes = (JukeboxModel,) if is_torch_available() else ()
model_id = "openai/jukebox-1b-lyrics"
metas = {
"artist": "Zac Brown Band",
"genres": "Country",
"lyrics": """I met a traveller from an antique land,
Who said "Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
And wrinkled lip, and sneer of cold command,
Tell that its sculptor well those passions read
Which yet survive, stamped on these lifeless things,
The hand that mocked them, and the heart that fed;
And on the pedestal, these words appear:
My name is Ozymandias, King of Kings;
Look on my Works, ye Mighty, and despair!
Nothing beside remains. Round the decay
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
""",
}
# fmt: off
EXPECTED_OUTPUT_2 = [
1864, 1536, 1213, 1870, 1357, 1536, 519, 880, 1323, 789, 1082, 534,
1000, 1445, 1105, 1130, 967, 515, 1434, 1620, 534, 1495, 283, 1445,
333, 1307, 539, 1631, 1528, 375, 1434, 673, 627, 710, 778, 1883,
1405, 1276, 1455, 1228
]
EXPECTED_OUTPUT_2_PT_2 = [
1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653
]
EXPECTED_OUTPUT_1 = [
1125, 1751, 697, 1776, 1141, 1476, 391, 697, 1125, 684, 867, 416,
844, 1372, 1274, 717, 1274, 844, 1299, 1419, 697, 1370, 317, 1125,
191, 1440, 1370, 1440, 1370, 282, 1621, 1370, 368, 349, 867, 1872,
1262, 869, 1728, 747
]
EXPECTED_OUTPUT_1_PT_2 = [
416, 416, 1125, 1125, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416
]
EXPECTED_OUTPUT_0 = [
1755, 842, 307, 1843, 1022, 1395, 234, 1554, 806, 739, 1022, 442,
616, 556, 268, 1499, 933, 457, 1440, 1837, 755, 985, 308, 902,
293, 1443, 1671, 1141, 1533, 555, 1562, 1061, 287, 417, 1022, 2008,
1186, 1015, 1777, 268
]
EXPECTED_OUTPUT_0_PT_2 = [
854, 842, 1353, 114, 1353, 842, 185, 842, 185, 114, 591, 842,
185, 417, 185, 842, 307, 842, 591, 842, 185, 842, 307, 842,
591, 842, 1353, 842, 185, 842, 591, 842, 591, 114, 591, 842,
185, 842, 591, 89
]
EXPECTED_Y_COND = [1058304, 0, 786432, 7169, 507, 76, 27, 40, 30, 76]
EXPECTED_PRIMED_0 = [
390, 1160, 1002, 1907, 1788, 1788, 1788, 1907, 1002, 1002, 1854, 1002,
1002, 1002, 1002, 1002, 1002, 1160, 1160, 1606, 596, 596, 1160, 1002,
1516, 596, 1002, 1002, 1002, 1907, 1788, 1788, 1788, 1854, 1788, 1907,
1907, 1788, 596, 1626
]
EXPECTED_PRIMED_1 = [
1236, 1668, 1484, 1920, 1848, 1409, 139, 864, 1828, 1272, 1599, 824,
1672, 139, 555, 1484, 824, 1920, 555, 596, 1579, 1599, 1231, 1599,
1637, 1407, 212, 824, 1599, 116, 1433, 824, 258, 1599, 1433, 1895,
1063, 1433, 1433, 1599
]
EXPECTED_PRIMED_2 = [
1684, 1873, 1119, 1189, 395, 611, 1901, 972, 890, 1337, 1392, 1927,
96, 972, 672, 780, 1119, 890, 158, 771, 1073, 1927, 353, 1331,
1269, 1459, 1333, 1645, 812, 1577, 1337, 606, 353, 981, 1466, 619,
197, 391, 302, 1930
]
EXPECTED_VQVAE_ENCODE = [
390, 1160, 1002, 1907, 1788, 1788, 1788, 1907, 1002, 1002, 1854, 1002,
1002, 1002, 1002, 1002, 1002, 1160, 1160, 1606, 596, 596, 1160, 1002,
1516, 596, 1002, 1002, 1002, 1907, 1788, 1788, 1788, 1854, 1788, 1907,
1907, 1788, 596, 1626
]
EXPECTED_VQVAE_DECODE = [
-0.0492, -0.0524, -0.0565, -0.0640, -0.0686, -0.0684, -0.0677, -0.0664,
-0.0605, -0.0490, -0.0330, -0.0168, -0.0083, -0.0075, -0.0051, 0.0025,
0.0136, 0.0261, 0.0386, 0.0497, 0.0580, 0.0599, 0.0583, 0.0614,
0.0740, 0.0889, 0.1023, 0.1162, 0.1211, 0.1212, 0.1251, 0.1336,
0.1502, 0.1686, 0.1883, 0.2148, 0.2363, 0.2458, 0.2507, 0.2531
]
EXPECTED_AUDIO_COND = [
0.0256, -0.0544, 0.1600, -0.0032, 0.1066, 0.0825, -0.0013, 0.3440,
0.0210, 0.0412, -0.1777, -0.0892, -0.0164, 0.0285, -0.0613, -0.0617,
-0.0137, -0.0201, -0.0175, 0.0215, -0.0627, 0.0520, -0.0730, 0.0970,
-0.0100, 0.0442, -0.0586, 0.0207, -0.0015, -0.0082
]
EXPECTED_META_COND = [
0.0415, 0.0877, 0.0022, -0.0055, 0.0751, 0.0334, 0.0324, -0.0068,
0.0011, 0.0017, -0.0676, 0.0655, -0.0143, 0.0399, 0.0303, 0.0743,
-0.0168, -0.0394, -0.1113, 0.0124, 0.0442, 0.0267, -0.0003, -0.1536,
-0.0116, -0.1837, -0.0180, -0.1026, -0.0777, -0.0456
]
EXPECTED_LYRIC_COND = [
76, 27, 40, 30, 76, 46, 44, 47, 40, 37, 38, 31, 45, 45, 76, 38, 31, 33,
45, 76, 41, 32, 76, 45, 46, 41, 40, 31, 78, 76
]
# fmt: on
def prepare_inputs(self):
tokenizer = JukeboxTokenizer.from_pretrained(self.model_id)
tokens = tokenizer(**self.metas)["input_ids"]
return tokens
@slow
def test_sampling(self):
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
labels = self.prepare_inputs()
set_seed(0)
zs = [torch.zeros(1, 0, dtype=torch.long).cpu() for _ in range(3)]
zs = model._sample(zs, labels, [0], sample_length=40 * model.priors[0].raw_to_tokens, save_results=False)
self.assertIn(zs[0][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_2, self.EXPECTED_OUTPUT_2_PT_2])
set_seed(0)
zs = model._sample(zs, labels, [1], sample_length=40 * model.priors[1].raw_to_tokens, save_results=False)
self.assertIn(zs[1][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_1, self.EXPECTED_OUTPUT_1_PT_2])
set_seed(0)
zs = model._sample(zs, labels, [2], sample_length=40 * model.priors[2].raw_to_tokens, save_results=False)
self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
@slow
def test_conditioning(self):
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
labels = self.prepare_inputs()
set_seed(0)
zs = [torch.zeros(1, 0, dtype=torch.long) for _ in range(3)]
top_prior = model.priors[0]
start = 0
music_token_conds = top_prior.get_music_tokens_conds(zs, start=start, end=start + top_prior.n_ctx)
metadata = top_prior.get_metadata(labels[0].clone(), start, 1058304, 0)
self.assertIsNone(music_token_conds)
self.assertListEqual(metadata.numpy()[0][:10].tolist(), self.EXPECTED_Y_COND)
audio_conditioning, metadata_conditioning, lyric_tokens = top_prior.get_cond(music_token_conds, metadata)
torch.testing.assert_close(
audio_conditioning[0][0][:30].detach(), torch.tensor(self.EXPECTED_AUDIO_COND), atol=1e-4, rtol=1e-4
)
torch.testing.assert_close(
metadata_conditioning[0][0][:30].detach(), torch.tensor(self.EXPECTED_META_COND), atol=1e-4, rtol=1e-4
)
torch.testing.assert_close(
lyric_tokens[0, :30].detach(), torch.tensor(self.EXPECTED_LYRIC_COND), atol=1e-4, rtol=1e-4
)
@slow
def test_primed_sampling(self):
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
set_seed(0)
waveform = torch.rand((1, 5120, 1))
tokens = list(self.prepare_inputs())
zs = [model.vqvae.encode(waveform, start_level=2, bs_chunks=waveform.shape[0])[0], None, None]
zs = model._sample(
zs, tokens, sample_levels=[0], save_results=False, sample_length=40 * model.priors[0].raw_to_tokens
)
torch.testing.assert_close(zs[0][0][:40], torch.tensor(self.EXPECTED_PRIMED_0))
upper_2 = torch.cat((zs[0], torch.zeros(1, 2048 - zs[0].shape[-1])), dim=-1).long()
zs = [upper_2, model.vqvae.encode(waveform, start_level=1, bs_chunks=waveform.shape[0])[0], None]
zs = model._sample(
zs, tokens, sample_levels=[1], save_results=False, sample_length=40 * model.priors[1].raw_to_tokens
)
torch.testing.assert_close(zs[1][0][:40], torch.tensor(self.EXPECTED_PRIMED_1))
upper_1 = torch.cat((zs[1], torch.zeros(1, 2048 - zs[1].shape[-1])), dim=-1).long()
zs = [upper_2, upper_1, model.vqvae.encode(waveform, start_level=0, bs_chunks=waveform.shape[0])[0]]
zs = model._sample(
zs, tokens, sample_levels=[2], save_results=False, sample_length=40 * model.priors[2].raw_to_tokens
)
torch.testing.assert_close(zs[2][0][:40].cpu(), torch.tensor(self.EXPECTED_PRIMED_2))
@slow
def test_vqvae(self):
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
set_seed(0)
x = torch.rand((1, 5120, 1))
with torch.no_grad():
zs = model.vqvae.encode(x, start_level=2, bs_chunks=x.shape[0])
torch.testing.assert_close(zs[0][0], torch.tensor(self.EXPECTED_VQVAE_ENCODE))
with torch.no_grad():
x = model.vqvae.decode(zs, start_level=2, bs_chunks=x.shape[0])
torch.testing.assert_close(x[0, :40, 0], torch.tensor(self.EXPECTED_VQVAE_DECODE), atol=1e-4, rtol=1e-4)
@require_torch
class Jukebox5bModelTester(unittest.TestCase):
all_model_classes = (JukeboxModel,) if is_torch_available() else ()
model_id = "openai/jukebox-5b-lyrics"
metas = {
"artist": "Zac Brown Band",
"genres": "Country",
"lyrics": """I met a traveller from an antique land,
Who said "Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
And wrinkled lip, and sneer of cold command,
Tell that its sculptor well those passions read
Which yet survive, stamped on these lifeless things,
The hand that mocked them, and the heart that fed;
And on the pedestal, these words appear:
My name is Ozymandias, King of Kings;
Look on my Works, ye Mighty, and despair!
Nothing beside remains. Round the decay
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
""",
}
# fmt: off
EXPECTED_OUTPUT_2 = [
1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
1489, 1489, 1489, 1489, 1150, 1853, 1509, 1150, 1357, 1509, 6, 1272
]
EXPECTED_OUTPUT_2_PT_2 = [
1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653
]
EXPECTED_OUTPUT_1 = [
1125, 416, 1125, 1125, 1125, 1125, 1125, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416
]
EXPECTED_OUTPUT_1_PT_2 = [
416, 416, 1125, 1125, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416
]
EXPECTED_OUTPUT_0 = [
1755, 1061, 234, 1755, 1061, 1755, 185, 290, 307, 307, 616, 616,
616, 616, 616, 616, 307, 290, 417, 1755, 234, 1755, 185, 290,
290, 290, 307, 616, 616, 616, 616, 616, 290, 234, 234, 1755,
234, 234, 1755, 234, 185, 185, 307, 616, 616, 616, 616, 290,
1755, 1755, 1755, 234, 234, 1755, 1572, 290, 307, 616, 34, 616
]
EXPECTED_OUTPUT_0_PT_2 = [
854, 842, 1353, 114, 1353, 842, 185, 842, 185, 114, 591, 842, 185,
417, 185, 842, 307, 842, 591, 842, 185, 842, 185, 842, 591, 842,
1353, 842, 185, 842, 591, 842, 591, 114, 591, 842, 185, 842, 591,
89, 591, 842, 591, 842, 591, 417, 1372, 842, 1372, 842, 34, 842,
185, 89, 591, 842, 185, 842, 591, 632
]
EXPECTED_GPU_OUTPUTS_2 = [
1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653
]
EXPECTED_GPU_OUTPUTS_2_PT_2 = [
1489, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653, 653,
653, 653, 653, 653, 653, 653, 653, 1853, 1177, 1536, 1228,
710, 475, 1489, 1229, 1224, 231, 1224, 252, 1434, 653, 475,
1106, 1877, 1599, 1228, 1600, 1683, 1182, 1853, 475, 1864,
252, 1229, 1434, 2001
]
EXPECTED_GPU_OUTPUTS_1 = [
1125, 1125, 416, 1125, 1125, 416, 1125, 1125, 416, 416, 1125, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416,
416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416, 416
]
EXPECTED_GPU_OUTPUTS_0 = [
491, 1755, 34, 1613, 1755, 417, 992, 1613, 222, 842, 1353, 1613,
844, 632, 185, 1613, 844, 632, 185, 1613, 185, 842, 677, 1613,
185, 114, 1353, 1613, 307, 89, 844, 1613, 307, 1332, 234, 1979,
307, 89, 1353, 616, 34, 842, 185, 842, 34, 842, 185, 842,
307, 114, 185, 89, 34, 1268, 185, 89, 34, 842, 185, 89
]
# fmt: on
def prepare_inputs(self, model_id):
tokenizer = JukeboxTokenizer.from_pretrained(model_id)
tokens = tokenizer(**self.metas)["input_ids"]
return tokens
@slow
def test_sampling(self):
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
labels = self.prepare_inputs(self.model_id)
set_seed(0)
zs = [torch.zeros(1, 0, dtype=torch.long).cpu() for _ in range(3)]
zs = model._sample(zs, labels, [0], sample_length=60 * model.priors[0].raw_to_tokens, save_results=False)
self.assertIn(zs[0][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_2, self.EXPECTED_OUTPUT_2_PT_2])
set_seed(0)
zs = model._sample(zs, labels, [1], sample_length=60 * model.priors[1].raw_to_tokens, save_results=False)
self.assertIn(zs[1][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_1, self.EXPECTED_OUTPUT_1_PT_2])
set_seed(0)
zs = model._sample(zs, labels, [2], sample_length=60 * model.priors[2].raw_to_tokens, save_results=False)
self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
@slow
@require_torch_accelerator
@skip("Not enough GPU memory on CI runners")
def test_slow_sampling(self):
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
labels = [i.to(torch_device) for i in self.prepare_inputs(self.model_id)]
set_seed(0)
model.priors[0].to(torch_device)
zs = [torch.zeros(1, 0, dtype=torch.long).to(torch_device) for _ in range(3)]
zs = model._sample(zs, labels, [0], sample_length=60 * model.priors[0].raw_to_tokens, save_results=False)
torch.testing.assert_close(zs[0][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_2))
model.priors[0].cpu()
set_seed(0)
model.priors[1].to(torch_device)
zs = model._sample(zs, labels, [1], sample_length=60 * model.priors[1].raw_to_tokens, save_results=False)
torch.testing.assert_close(zs[1][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_1))
model.priors[1].cpu()
set_seed(0)
model.priors[2].to(torch_device)
zs = model._sample(zs, labels, [2], sample_length=60 * model.priors[2].raw_to_tokens, save_results=False)
torch.testing.assert_close(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0))
@slow
@require_torch_accelerator
@require_torch_fp16
def test_fp16_slow_sampling(self):
prior_id = "ArthurZ/jukebox_prior_0"
model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device)
labels = self.prepare_inputs(prior_id)[0].to(torch_device)
metadata = model.get_metadata(labels, 0, 7680, 0)
set_seed(0)
outputs = model.sample(1, metadata=metadata, sample_tokens=60)
self.assertIn(outputs[0].cpu().tolist(), [self.EXPECTED_GPU_OUTPUTS_2, self.EXPECTED_GPU_OUTPUTS_2_PT_2])

View File

@@ -1,209 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import JukeboxTokenizer
from transformers.testing_utils import require_torch
class JukeboxTokenizationTest(unittest.TestCase):
tokenizer_class = JukeboxTokenizer
metas = {
"artist": "Zac Brown Band",
"genres": "Country",
"lyrics": """I met a traveller from an antique land,
Who said "Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
And wrinkled lip, and sneer of cold command,
Tell that its sculptor well those passions read
Which yet survive, stamped on these lifeless things,
The hand that mocked them, and the heart that fed;
And on the pedestal, these words appear:
My name is Ozymandias, King of Kings;
Look on my Works, ye Mighty, and despair!
Nothing beside remains. Round the decay
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
""",
}
@require_torch
def test_1b_lyrics_tokenizer(self):
"""
how to run the same test with openAI
...
"""
import torch
tokenizer = JukeboxTokenizer.from_pretrained("openai/jukebox-1b-lyrics")
tokens = tokenizer(**self.metas)["input_ids"]
# fmt: off
EXPECTED_OUTPUT = [
torch.tensor([[
0, 0, 0, 7169, 507, 9, 76, 39, 31, 46, 76, 27,
76, 46, 44, 27, 48, 31, 38, 38, 31, 44, 76, 32,
44, 41, 39, 76, 27, 40, 76, 27, 40, 46, 35, 43,
47, 31, 76, 38, 27, 40, 30, 64, 78, 76, 76, 76,
76, 76, 76, 76, 76, 23, 34, 41, 76, 45, 27, 35,
30, 76, 71, 20, 49, 41, 76, 48, 27, 45, 46, 76,
27, 40, 30, 76, 46, 44, 47, 40, 37, 38, 31, 45,
45, 76, 38, 31, 33, 45, 76, 41, 32, 76, 45, 46,
41, 40, 31, 78, 76, 76, 76, 76, 76, 76, 76, 76,
19, 46, 27, 40, 30, 76, 35, 40, 76, 46, 34, 31,
76, 30, 31, 45, 31, 44, 46, 63, 76, 63, 76, 63,
76, 63, 76, 14, 31, 27, 44, 76, 46, 34, 31, 39,
64, 76, 41, 40, 76, 46, 34, 31, 76, 45, 27, 40,
30, 64, 78, 76, 76, 76, 76, 76, 76, 76, 76, 8,
27, 38, 32, 76, 45, 47, 40, 37, 76, 27, 76, 45,
34, 27, 46, 46, 31, 44, 31, 30, 76, 48, 35, 45,
27, 33, 31, 76, 38, 35, 31, 45, 64, 76, 49, 34,
41, 45, 31, 76, 32, 44, 41, 49, 40, 64, 78, 76,
76, 76, 76, 76, 76, 76, 76, 1, 40, 30, 76, 49,
44, 35, 40, 37, 38, 31, 30, 76, 38, 35, 42, 64,
76, 27, 40, 30, 76, 45, 40, 31, 31, 44, 76, 41,
32, 76, 29, 41, 38, 30, 76, 29, 41, 39, 39, 27,
40, 30, 64, 78, 76, 76, 76, 76, 76, 76, 76, 76,
20, 31, 38, 38, 76, 46, 34, 27, 46, 76, 35, 46,
45, 76, 45, 29, 47, 38, 42, 46, 41, 44, 76, 49,
31, 38, 38, 76, 46, 34, 41, 45, 31, 76, 42, 27,
45, 45, 35, 41, 40, 45, 76, 44, 31, 27, 30, 78,
76, 76, 76, 76, 76, 76, 76, 76, 23, 34, 35, 29,
34, 76, 51, 31, 46, 76, 45, 47, 44, 48, 35, 48,
31, 64, 76, 45, 46, 27, 39, 42, 31, 30, 76, 41,
40, 76, 46, 34, 31, 45, 31, 76, 38, 35, 32, 31,
38, 31, 45, 45, 76, 46, 34, 35, 40, 33, 45, 64,
78, 76, 76, 76, 76, 76, 76, 76, 76, 20, 34, 31,
76, 34, 27, 40, 30, 76, 46, 34, 27, 46, 76, 39,
41, 29, 37, 31, 30, 76, 46, 34, 31, 39, 64, 76,
27, 40, 30, 76, 46, 34, 31, 76, 34, 31, 27, 44,
46, 76, 46, 34, 27, 46, 76, 32, 31, 30, 66, 78,
76, 76, 76, 76, 76, 76, 76, 76, 1, 40, 30, 76,
41, 40, 76, 46, 34, 31, 76, 42, 31, 30, 31, 45,
46, 27, 38, 64, 76, 46, 34, 31, 45, 31, 76, 49,
41, 44, 30, 45, 76, 27, 42, 42, 31, 27, 44, 65,
78, 76, 76, 76, 76, 76, 76, 76, 76, 13, 51, 76,
40, 27, 39, 31, 76, 35, 45, 76, 15, 52, 51, 39,
27, 40, 30, 35, 27, 45, 64, 76, 11, 35, 40, 33,
76, 41, 32, 76, 11, 35, 40, 33, 45, 66, 78, 76,
76, 76, 76, 76, 76, 76, 76, 12, 41, 41, 37, 76,
41, 40, 76, 39, 51, 76, 23, 41, 44, 37, 45, 64,
76, 51, 31, 76, 13, 35, 33, 34, 46, 51, 64, 76,
27, 40, 30, 76, 30, 31, 45, 42, 27, 35, 44, 67,
78, 76, 76, 76, 76, 76, 76, 76, 76, 14, 41, 46,
34, 35, 40, 33, 76, 28, 31, 45, 35, 30, 31, 76,
44, 31, 39, 27, 35, 40, 45, 63, 76, 18, 41, 47,
40, 30, 76, 46, 34, 31, 76, 30, 31, 29, 27, 51,
78, 76, 76, 76, 76, 76, 76, 76, 76, 15, 32, 76,
46, 34, 27, 46, 76, 29, 41, 38, 41, 45, 45, 27,
38, 76, 23, 44, 31, 29, 37, 64, 76, 28, 41, 47,
40, 30, 38, 31, 45, 45, 76, 27, 40, 30, 76, 28,
27, 44, 31, 78, 76, 76, 76, 76, 76, 76, 76, 76,
20, 34, 31, 76, 38, 41, 40, 31, 76, 27, 40, 30,
76, 38, 31, 48, 31, 38, 76, 45, 27, 40, 30, 45,
76, 45, 46, 44, 31, 46, 29, 34, 76, 32, 27, 44,
76, 27, 49, 27, 51, 78, 76, 76, 76, 76, 76, 76,
76, 76]]),
torch.tensor([[0, 0, 0, 1069, 11]]),
torch.tensor([[0, 0, 0, 1069, 11]]),
]
# fmt: on
self.assertTrue(torch.allclose(tokens[0], EXPECTED_OUTPUT[0]))
self.assertTrue(torch.allclose(tokens[1], EXPECTED_OUTPUT[1]))
self.assertTrue(torch.allclose(tokens[2], EXPECTED_OUTPUT[2]))
@require_torch
def test_5b_lyrics_tokenizer(self):
"""
The outputs are similar that open AI but do not have the same format as this one is adapted to the HF integration.
"""
import torch
tokenizer = JukeboxTokenizer.from_pretrained("openai/jukebox-5b-lyrics")
tokens = tokenizer(**self.metas)["input_ids"]
# fmt: off
EXPECTED_OUTPUT = [
torch.tensor([[
0, 0, 0, 1069, 11, -1, -1, -1, -1, 9, 77, 39,
31, 46, 77, 27, 77, 46, 44, 27, 48, 31, 38, 38,
31, 44, 77, 32, 44, 41, 39, 77, 27, 40, 77, 27,
40, 46, 35, 43, 47, 31, 77, 38, 27, 40, 30, 64,
79, 77, 77, 77, 77, 77, 77, 77, 77, 23, 34, 41,
77, 45, 27, 35, 30, 77, 72, 20, 49, 41, 77, 48,
27, 45, 46, 77, 27, 40, 30, 77, 46, 44, 47, 40,
37, 38, 31, 45, 45, 77, 38, 31, 33, 45, 77, 41,
32, 77, 45, 46, 41, 40, 31, 79, 77, 77, 77, 77,
77, 77, 77, 77, 19, 46, 27, 40, 30, 77, 35, 40,
77, 46, 34, 31, 77, 30, 31, 45, 31, 44, 46, 63,
77, 63, 77, 63, 77, 63, 77, 14, 31, 27, 44, 77,
46, 34, 31, 39, 64, 77, 41, 40, 77, 46, 34, 31,
77, 45, 27, 40, 30, 64, 79, 77, 77, 77, 77, 77,
77, 77, 77, 8, 27, 38, 32, 77, 45, 47, 40, 37,
77, 27, 77, 45, 34, 27, 46, 46, 31, 44, 31, 30,
77, 48, 35, 45, 27, 33, 31, 77, 38, 35, 31, 45,
64, 77, 49, 34, 41, 45, 31, 77, 32, 44, 41, 49,
40, 64, 79, 77, 77, 77, 77, 77, 77, 77, 77, 1,
40, 30, 77, 49, 44, 35, 40, 37, 38, 31, 30, 77,
38, 35, 42, 64, 77, 27, 40, 30, 77, 45, 40, 31,
31, 44, 77, 41, 32, 77, 29, 41, 38, 30, 77, 29,
41, 39, 39, 27, 40, 30, 64, 79, 77, 77, 77, 77,
77, 77, 77, 77, 20, 31, 38, 38, 77, 46, 34, 27,
46, 77, 35, 46, 45, 77, 45, 29, 47, 38, 42, 46,
41, 44, 77, 49, 31, 38, 38, 77, 46, 34, 41, 45,
31, 77, 42, 27, 45, 45, 35, 41, 40, 45, 77, 44,
31, 27, 30, 79, 77, 77, 77, 77, 77, 77, 77, 77,
23, 34, 35, 29, 34, 77, 51, 31, 46, 77, 45, 47,
44, 48, 35, 48, 31, 64, 77, 45, 46, 27, 39, 42,
31, 30, 77, 41, 40, 77, 46, 34, 31, 45, 31, 77,
38, 35, 32, 31, 38, 31, 45, 45, 77, 46, 34, 35,
40, 33, 45, 64, 79, 77, 77, 77, 77, 77, 77, 77,
77, 20, 34, 31, 77, 34, 27, 40, 30, 77, 46, 34,
27, 46, 77, 39, 41, 29, 37, 31, 30, 77, 46, 34,
31, 39, 64, 77, 27, 40, 30, 77, 46, 34, 31, 77,
34, 31, 27, 44, 46, 77, 46, 34, 27, 46, 77, 32,
31, 30, 66, 79, 77, 77, 77, 77, 77, 77, 77, 77,
1, 40, 30, 77, 41, 40, 77, 46, 34, 31, 77, 42,
31, 30, 31, 45, 46, 27, 38, 64, 77, 46, 34, 31,
45, 31, 77, 49, 41, 44, 30, 45, 77, 27, 42, 42,
31, 27, 44, 65, 79, 77, 77, 77, 77, 77, 77, 77,
77, 13, 51, 77, 40, 27, 39, 31, 77, 35, 45, 77,
15, 52, 51, 39, 27, 40, 30, 35, 27, 45, 64, 77,
11, 35, 40, 33, 77, 41, 32, 77, 11, 35, 40, 33,
45, 66, 79, 77, 77, 77, 77, 77, 77, 77, 77, 12,
41, 41, 37, 77, 41, 40, 77, 39, 51, 77, 23, 41,
44, 37, 45, 64, 77, 51, 31, 77, 13, 35, 33, 34,
46, 51, 64, 77, 27, 40, 30, 77, 30, 31, 45, 42,
27, 35, 44, 67, 79, 77, 77, 77, 77, 77, 77, 77,
77, 14, 41, 46, 34, 35, 40, 33, 77, 28, 31, 45,
35, 30, 31, 77, 44, 31, 39, 27, 35, 40, 45, 63,
77, 18, 41, 47, 40, 30, 77, 46, 34, 31, 77, 30,
31, 29, 27, 51, 79, 77, 77, 77, 77, 77, 77, 77,
77, 15, 32, 77, 46, 34, 27, 46, 77, 29, 41, 38,
41, 45, 45, 27, 38, 77, 23, 44, 31, 29, 37, 64,
77, 28, 41, 47, 40, 30, 38, 31, 45, 45, 77, 27,
40, 30, 77, 28, 27, 44, 31, 79, 77, 77, 77, 77,
77, 77, 77, 77, 20, 34, 31, 77, 38, 41, 40, 31,
77, 27, 40, 30, 77, 38, 31, 48, 31, 38, 77, 45,
27, 40, 30, 45, 77, 45, 46, 44, 31, 46, 29, 34,
77, 32, 27, 44, 77, 27, 49, 27, 51, 79, 77, 77,
77, 77, 77, 77, 77, 77]]),
torch.tensor([[0, 0, 0, 1069, 11, -1, -1, -1, -1]]),
torch.tensor([[0, 0, 0, 1069, 11, -1, -1, -1, -1]]),
]
# fmt: on
self.assertTrue(torch.allclose(tokens[0], EXPECTED_OUTPUT[0]))
self.assertTrue(torch.allclose(tokens[1], EXPECTED_OUTPUT[1]))
self.assertTrue(torch.allclose(tokens[2], EXPECTED_OUTPUT[2]))

View File

@@ -1,744 +0,0 @@
# coding=utf-8
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import MegaConfig, is_torch_available
from transformers.testing_utils import (
TestCasePlus,
is_flaky,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers import (
MegaForCausalLM,
MegaForMaskedLM,
MegaForMultipleChoice,
MegaForQuestionAnswering,
MegaForSequenceClassification,
MegaForTokenClassification,
MegaModel,
)
class MegaModelTester:
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=2,
intermediate_size=37,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_positions=1024,
bidirectional=False, # needed for decoding, and can't modify common generation tests; test separately by overriding
ema_projection_size=16,
shared_representation_size=64,
use_chunking=False,
chunk_size=32,
attention_activation="softmax",
use_normalized_ffn=True,
nffn_hidden_size=24,
add_token_type_embeddings=True,
type_vocab_size=2,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.add_token_type_embeddings = add_token_type_embeddings
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.intermediate_size = intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_positions = max_positions
self.bidirectional = bidirectional
self.ema_projection_size = ema_projection_size
self.shared_representation_size = shared_representation_size
self.use_chunking = use_chunking
self.chunk_size = chunk_size
self.attention_activation = attention_activation
self.use_normalized_ffn = use_normalized_ffn
self.nffn_hidden_size = nffn_hidden_size
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
self.num_attention_heads = 1
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = random_attention_mask([self.batch_size, self.seq_length])
token_type_ids = None
if self.add_token_type_embeddings:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = self.get_config()
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self):
return MegaConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
intermediate_size=self.intermediate_size,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
# added args
add_token_type_embeddings=self.add_token_type_embeddings,
max_positions=self.max_positions,
bidirectional=self.bidirectional,
ema_projection_size=self.ema_projection_size,
shared_representation_size=self.shared_representation_size,
use_chunking=self.use_chunking,
chunk_size=self.chunk_size,
attention_activation=self.attention_activation,
use_normalized_ffn=self.use_normalized_ffn,
nffn_hidden_size=self.nffn_hidden_size,
)
def get_pipeline_config(self):
config = self.get_config()
config.vocab_size = 300
return config
def prepare_config_and_inputs_for_decoder(self):
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = self.prepare_config_and_inputs()
config.is_decoder = True
config.bidirectional = False
encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
return (
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def create_and_check_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = MegaModel(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
result = model(input_ids, token_type_ids=token_type_ids)
result = model(input_ids)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def create_and_check_model_as_decoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.add_cross_attention = True
model = MegaModel(config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
)
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def create_and_check_for_causal_lm(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
model = MegaForCausalLM(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_decoder_model_past_large_inputs(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.is_decoder = True
config.bidirectional = False
config.add_cross_attention = True
model = MegaForCausalLM(config=config).to(torch_device).eval()
# make sure that ids don't start with pad token
mask = input_ids.ne(config.pad_token_id).long()
input_ids = input_ids * mask
# first forward pass
outputs = model(
input_ids,
attention_mask=input_mask,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
use_cache=True,
)
past_key_values = outputs.past_key_values
# create hypothetical multiple next token and extent to next_input_ids
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
# make sure that ids don't start with pad token
mask = next_tokens.ne(config.pad_token_id).long()
next_tokens = next_tokens * mask
next_mask = ids_tensor((self.batch_size, 1), vocab_size=2)
# append to next input_ids and
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
next_attention_mask = torch.cat([input_mask, next_mask], dim=-1)
output_from_no_past = model(
next_input_ids,
attention_mask=next_attention_mask,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
output_hidden_states=True,
)["hidden_states"][0]
output_from_past = model(
next_tokens,
attention_mask=next_mask,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
past_key_values=past_key_values,
output_hidden_states=True,
)["hidden_states"][0]
# select random slice
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
output_from_no_past_slice = output_from_no_past[:, -1:, random_slice_idx].detach()
output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1])
# test that outputs are equal for slice
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
def create_and_check_decoder_model_with_chunking(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.use_chunking = True
config.output_attentions = True
config.attention_activation = "laplace"
config.chunk_size = input_ids.size(1) * 2
model = MegaForCausalLM(config).to(torch_device).eval()
input_ids = input_ids.repeat(1, 8)
# multiply the sequence length by 8 since we repeat the same ids 8 times in input_ids
input_mask = random_attention_mask([self.batch_size, self.seq_length * 8])
result = model(input_ids, attention_mask=input_mask)
# test if the sequence length of attentions is same provided chunk_size
self.parent.assertEqual(result["attentions"][0].shape[-1], config.chunk_size)
def create_and_check_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = MegaForMaskedLM(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = MegaForTokenClassification(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
def create_and_check_for_multiple_choice(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices
model = MegaForMultipleChoice(config=config)
model.to(torch_device)
model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
result = model(
multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
labels=choice_labels,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
def create_and_check_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = MegaForQuestionAnswering(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
# extra checks for Mega-specific model functionality
def create_and_check_bidirectionality(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.bidirectional = True
model = MegaModel(config)
model.to(torch_device)
model.eval()
# no mask
result = model(input_ids)
# with mask & token types
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size))
def check_chunking_shorter_sequence(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.use_chunking = True
config.chunk_size = input_ids.size(1) + 25
model = MegaModel(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size))
def check_chunking_longer_sequence(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.use_chunking = True
# we want the chunk size to be < sequence length, and the sequence length to be a multiple of chunk size
config.chunk_size = input_ids.size(1) * 2
model = MegaModel(config)
model.to(torch_device)
model.eval()
result = model(
input_ids.repeat(1, 8),
)
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length * 8, self.hidden_size))
def check_laplace_self_attention(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.attention_activation = "laplace"
model = MegaModel(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size))
def check_relu2_self_attention(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.attention_activation = "relu2"
model = MegaModel(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size))
def check_sequence_length_beyond_max_positions(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.max_positions = self.seq_length - 2
model = MegaModel(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.hidden_size))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict
@require_torch
class MegaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
MegaForCausalLM,
MegaForMaskedLM,
MegaModel,
MegaForSequenceClassification,
MegaForTokenClassification,
MegaForMultipleChoice,
MegaForQuestionAnswering,
)
if is_torch_available()
else ()
)
all_generative_model_classes = (MegaForCausalLM,) if is_torch_available() else ()
pipeline_model_mapping = (
{
"feature-extraction": MegaModel,
"fill-mask": MegaForMaskedLM,
"question-answering": MegaForQuestionAnswering,
"text-classification": MegaForSequenceClassification,
"text-generation": MegaForCausalLM,
"token-classification": MegaForTokenClassification,
"zero-shot": MegaForSequenceClassification,
}
if is_torch_available()
else {}
)
fx_compatible = False
test_head_masking = False
test_pruning = False
def setUp(self):
self.model_tester = MegaModelTester(self)
self.config_tester = ConfigTester(self, config_class=MegaConfig, hidden_size=37)
# TODO: @ydshieh
@is_flaky(description="Sometimes gives `AssertionError` on expected outputs")
def test_pipeline_fill_mask(self):
super().test_pipeline_fill_mask()
# TODO: @ydshieh
@is_flaky(
description="Sometimes gives `RuntimeError: probability tensor contains either `inf`, `nan` or element < 0`"
)
def test_pipeline_text_generation(self):
super().test_pipeline_text_generation()
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_model_as_decoder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
) = self.model_tester.prepare_config_and_inputs_for_decoder()
input_mask = None
self.model_tester.create_and_check_model_as_decoder(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def test_for_causal_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_for_causal_lm(*config_and_inputs)
def test_decoder_model_past_with_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
def test_decoder_model_with_chunking(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_decoder_model_with_chunking(*config_and_inputs)
def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
def test_for_multiple_choice(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
def test_for_question_answering(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
def test_for_bidirectionality(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bidirectionality(*config_and_inputs)
def test_for_chunking_shorter_sequence(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_chunking_shorter_sequence(*config_and_inputs)
def test_for_chunking_longer_sequence(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_chunking_longer_sequence(*config_and_inputs)
def test_for_laplace_attention(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_laplace_self_attention(*config_and_inputs)
def test_for_relu2_attention(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_relu2_self_attention(*config_and_inputs)
def test_for_sequence_length_beyond_max_positions(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_sequence_length_beyond_max_positions(*config_and_inputs)
@require_torch_fp16
def test_generate_fp16(self):
config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs_for_decoder()
# attention_mask = torch.LongTensor(input_ids.ne(1)).to(torch_device)
model = MegaForCausalLM(config).eval().to(torch_device)
model.half()
model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
def test_sequence_classification_model(self):
config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs()
config.num_labels = self.model_tester.num_labels
sequence_labels = ids_tensor([self.model_tester.batch_size], self.model_tester.type_sequence_label_size)
model = MegaForSequenceClassification(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
def test_sequence_classification_model_for_multi_label(self):
config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs()
config.num_labels = self.model_tester.num_labels
config.problem_type = "multi_label_classification"
sequence_labels = ids_tensor(
[self.model_tester.batch_size, config.num_labels], self.model_tester.type_sequence_label_size
).to(torch.float)
model = MegaForSequenceClassification(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
@slow
def test_model_from_pretrained(self):
model_name = "mnaylor/mega-base-wikitext"
model = MegaModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@unittest.skip(reason="Does not work on the tiny model as we keep hitting edge cases.")
def test_cpu_offload(self):
super().test_cpu_offload()
@unittest.skip(reason="Does not work on the tiny model as we keep hitting edge cases.")
def test_disk_offload(self):
super().test_disk_offload()
@unittest.skip(reason="Does not work on the tiny model as we keep hitting edge cases.")
def test_model_parallelism(self):
super().test_model_parallelism()
@unittest.skip(
reason=(
"Calling `self.attention_function` in `MegaMovingAverageGatedAttention.forward` changes the submodules on "
"device 1 to device 0 (also changes `requires_grad`). No idea how this could happen for now."
)
)
def test_multi_gpu_data_parallel_forward(self):
super().test_multi_gpu_data_parallel_forward()
@unittest.skip(reason="Tracing of the dynamically computed `MegaMultiDimensionDampedEma._kernel` doesn't work.")
def test_torchscript_simple(self):
super().test_torchscript_simple()
@unittest.skip(reason="Tracing of the dynamically computed `MegaMultiDimensionDampedEma._kernel` doesn't work.")
def test_torchscript_output_hidden_state(self):
super().test_torchscript_output_hidden_state()
@unittest.skip(reason="Tracing of the dynamically computed `MegaMultiDimensionDampedEma._kernel` doesn't work.")
def test_torchscript_output_attentions(self):
super().test_torchscript_output_attentions()
@require_torch
class MegaModelIntegrationTest(TestCasePlus):
@slow
def test_inference_masked_lm(self):
model = MegaForMaskedLM.from_pretrained("mnaylor/mega-base-wikitext")
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
with torch.no_grad():
output = model(input_ids)[0]
expected_shape = torch.Size((1, 11, 50265))
self.assertEqual(output.shape, expected_shape)
# compare the actual values for a slice.
expected_slice = torch.tensor(
[[[67.8389, 10.1470, -32.7148], [-11.1655, 29.1152, 23.1304], [-3.8015, 66.0397, 29.6733]]]
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
@slow
def test_inference_no_head(self):
model = MegaModel.from_pretrained("mnaylor/mega-base-wikitext")
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
with torch.no_grad():
output = model(input_ids)[0]
expected_shape = torch.Size((1, 11, 128))
self.assertEqual(output.shape, expected_shape)
# compare the actual values for a slice. taken from output[:, :3, :3]
expected_slice = torch.tensor(
[[[1.1767, -0.6349, 2.8494], [-0.5109, -0.7745, 1.9495], [-0.3287, -0.2111, 3.3367]]]
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))

View File

@@ -1,382 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch Nat model."""
import collections
import unittest
from transformers import NatConfig
from transformers.testing_utils import require_natten, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_backbone_common import BackboneTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from torch import nn
from transformers import NatBackbone, NatForImageClassification, NatModel
if is_vision_available():
from PIL import Image
from transformers import AutoImageProcessor
class NatModelTester:
def __init__(
self,
parent,
batch_size=13,
image_size=64,
patch_size=4,
num_channels=3,
embed_dim=16,
depths=[1, 2, 1],
num_heads=[2, 4, 8],
kernel_size=3,
mlp_ratio=2.0,
qkv_bias=True,
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
drop_path_rate=0.1,
hidden_act="gelu",
patch_norm=True,
initializer_range=0.02,
layer_norm_eps=1e-5,
is_training=True,
scope=None,
use_labels=True,
num_labels=10,
out_features=["stage1", "stage2"],
out_indices=[1, 2],
):
self.parent = parent
self.batch_size = batch_size
self.image_size = image_size
self.patch_size = patch_size
self.num_channels = num_channels
self.embed_dim = embed_dim
self.depths = depths
self.num_heads = num_heads
self.kernel_size = kernel_size
self.mlp_ratio = mlp_ratio
self.qkv_bias = qkv_bias
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.drop_path_rate = drop_path_rate
self.hidden_act = hidden_act
self.patch_norm = patch_norm
self.layer_norm_eps = layer_norm_eps
self.initializer_range = initializer_range
self.is_training = is_training
self.scope = scope
self.use_labels = use_labels
self.num_labels = num_labels
self.out_features = out_features
self.out_indices = out_indices
def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
labels = None
if self.use_labels:
labels = ids_tensor([self.batch_size], self.num_labels)
config = self.get_config()
return config, pixel_values, labels
def get_config(self):
return NatConfig(
num_labels=self.num_labels,
image_size=self.image_size,
patch_size=self.patch_size,
num_channels=self.num_channels,
embed_dim=self.embed_dim,
depths=self.depths,
num_heads=self.num_heads,
kernel_size=self.kernel_size,
mlp_ratio=self.mlp_ratio,
qkv_bias=self.qkv_bias,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
drop_path_rate=self.drop_path_rate,
hidden_act=self.hidden_act,
patch_norm=self.patch_norm,
layer_norm_eps=self.layer_norm_eps,
initializer_range=self.initializer_range,
out_features=self.out_features,
out_indices=self.out_indices,
)
def create_and_check_model(self, config, pixel_values, labels):
model = NatModel(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
expected_height = expected_width = (config.image_size // config.patch_size) // (2 ** (len(config.depths) - 1))
expected_dim = int(config.embed_dim * 2 ** (len(config.depths) - 1))
self.parent.assertEqual(
result.last_hidden_state.shape, (self.batch_size, expected_height, expected_width, expected_dim)
)
def create_and_check_for_image_classification(self, config, pixel_values, labels):
model = NatForImageClassification(config)
model.to(torch_device)
model.eval()
result = model(pixel_values, labels=labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
# test greyscale images
config.num_channels = 1
model = NatForImageClassification(config)
model.to(torch_device)
model.eval()
pixel_values = floats_tensor([self.batch_size, 1, self.image_size, self.image_size])
result = model(pixel_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def create_and_check_backbone(self, config, pixel_values, labels):
model = NatBackbone(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
# verify hidden states
self.parent.assertEqual(len(result.feature_maps), len(config.out_features))
self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, model.channels[0], 16, 16])
# verify channels
self.parent.assertEqual(len(model.channels), len(config.out_features))
# verify backbone works with out_features=None
config.out_features = None
model = NatBackbone(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
# verify feature maps
self.parent.assertEqual(len(result.feature_maps), 1)
self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, model.channels[-1], 4, 4])
# verify channels
self.parent.assertEqual(len(model.channels), 1)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, pixel_values, labels = config_and_inputs
inputs_dict = {"pixel_values": pixel_values}
return config, inputs_dict
@require_natten
@require_torch
class NatModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
NatModel,
NatForImageClassification,
NatBackbone,
)
if is_torch_available()
else ()
)
pipeline_model_mapping = (
{"image-feature-extraction": NatModel, "image-classification": NatForImageClassification}
if is_torch_available()
else {}
)
fx_compatible = False
test_torchscript = False
test_pruning = False
test_resize_embeddings = False
test_head_masking = False
def setUp(self):
self.model_tester = NatModelTester(self)
self.config_tester = ConfigTester(self, config_class=NatConfig, embed_dim=37)
def test_config(self):
self.create_and_test_config_common_properties()
self.config_tester.create_and_test_config_to_json_string()
self.config_tester.create_and_test_config_to_json_file()
self.config_tester.create_and_test_config_from_and_save_pretrained()
self.config_tester.create_and_test_config_with_num_labels()
self.config_tester.check_config_can_be_init_without_params()
self.config_tester.check_config_arguments_init()
def create_and_test_config_common_properties(self):
return
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
def test_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_backbone(*config_and_inputs)
@unittest.skip(reason="Nat does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip(reason="Nat does not use feedforward chunking")
def test_feed_forward_chunking(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, nn.Linear))
def test_attention_outputs(self):
self.skipTest("Nat's attention operation is handled entirely by NATTEN.")
def check_hidden_states_output(self, inputs_dict, config, model_class, image_size):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", len(self.model_tester.depths) + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
# Nat has a different seq_length
patch_size = (
config.patch_size
if isinstance(config.patch_size, collections.abc.Iterable)
else (config.patch_size, config.patch_size)
)
height = image_size[0] // patch_size[0]
width = image_size[1] // patch_size[1]
self.assertListEqual(
list(hidden_states[0].shape[-3:]),
[height, width, self.model_tester.embed_dim],
)
if model_class.__name__ != "NatBackbone":
reshaped_hidden_states = outputs.reshaped_hidden_states
self.assertEqual(len(reshaped_hidden_states), expected_num_layers)
batch_size, num_channels, height, width = reshaped_hidden_states[0].shape
reshaped_hidden_states = (
reshaped_hidden_states[0].view(batch_size, num_channels, height, width).permute(0, 2, 3, 1)
)
self.assertListEqual(
list(reshaped_hidden_states.shape[-3:]),
[height, width, self.model_tester.embed_dim],
)
def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
image_size = (
self.model_tester.image_size
if isinstance(self.model_tester.image_size, collections.abc.Iterable)
else (self.model_tester.image_size, self.model_tester.image_size)
)
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
self.check_hidden_states_output(inputs_dict, config, model_class, image_size)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
self.check_hidden_states_output(inputs_dict, config, model_class, image_size)
@slow
def test_model_from_pretrained(self):
model_name = "shi-labs/nat-mini-in1k-224"
model = NatModel.from_pretrained(model_name)
self.assertIsNotNone(model)
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if "embeddings" not in name and param.requires_grad:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
@require_natten
@require_vision
@require_torch
class NatModelIntegrationTest(unittest.TestCase):
@cached_property
def default_image_processor(self):
return AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224") if is_vision_available() else None
@slow
def test_inference_image_classification_head(self):
model = NatForImageClassification.from_pretrained("shi-labs/nat-mini-in1k-224").to(torch_device)
image_processor = self.default_image_processor
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
# verify the logits
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([0.3805, -0.8676, -0.3912]).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))
@require_torch
@require_natten
class NatBackboneTest(unittest.TestCase, BackboneTesterMixin):
all_model_classes = (NatBackbone,) if is_torch_available() else ()
config_class = NatConfig
def setUp(self):
self.model_tester = NatModelTester(self)

View File

@@ -1,489 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
from transformers import NezhaConfig, is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers import (
MODEL_FOR_PRETRAINING_MAPPING,
NezhaForMaskedLM,
NezhaForMultipleChoice,
NezhaForNextSentencePrediction,
NezhaForPreTraining,
NezhaForQuestionAnswering,
NezhaForSequenceClassification,
NezhaForTokenClassification,
NezhaModel,
)
class NezhaModelTester:
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=128,
max_relative_position=32,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = random_attention_mask([self.batch_size, self.seq_length])
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = self.get_config()
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self):
"""
Returns a tiny configuration by default.
"""
return NezhaConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
)
def prepare_config_and_inputs_for_decoder(self):
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = self.prepare_config_and_inputs()
config.is_decoder = True
encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
return (
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def create_and_check_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = NezhaModel(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
result = model(input_ids, token_type_ids=token_type_ids)
result = model(input_ids)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def create_and_check_model_as_decoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.add_cross_attention = True
model = NezhaModel(config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
)
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
)
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def create_and_check_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = NezhaForMaskedLM(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_for_next_sequence_prediction(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = NezhaForNextSentencePrediction(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=sequence_labels,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))
def create_and_check_for_pretraining(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = NezhaForPreTraining(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
next_sentence_label=sequence_labels,
)
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
self.parent.assertEqual(result.seq_relationship_logits.shape, (self.batch_size, 2))
def create_and_check_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = NezhaForQuestionAnswering(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
def create_and_check_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = NezhaForSequenceClassification(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def create_and_check_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = NezhaForTokenClassification(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
def create_and_check_for_multiple_choice(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices
model = NezhaForMultipleChoice(config=config)
model.to(torch_device)
model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
result = model(
multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
labels=choice_labels,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict
@require_torch
class NezhaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
NezhaModel,
NezhaForMaskedLM,
NezhaForMultipleChoice,
NezhaForNextSentencePrediction,
NezhaForPreTraining,
NezhaForQuestionAnswering,
NezhaForSequenceClassification,
NezhaForTokenClassification,
)
if is_torch_available()
else ()
)
pipeline_model_mapping = (
{
"feature-extraction": NezhaModel,
"fill-mask": NezhaForMaskedLM,
"question-answering": NezhaForQuestionAnswering,
"text-classification": NezhaForSequenceClassification,
"token-classification": NezhaForTokenClassification,
"zero-shot": NezhaForSequenceClassification,
}
if is_torch_available()
else {}
)
fx_compatible = True
# special case for ForPreTraining model
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels:
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
)
inputs_dict["next_sentence_label"] = torch.zeros(
self.model_tester.batch_size, dtype=torch.long, device=torch_device
)
return inputs_dict
def setUp(self):
self.model_tester = NezhaModelTester(self)
self.config_tester = ConfigTester(self, config_class=NezhaConfig, hidden_size=37)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_model_as_decoder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
) = self.model_tester.prepare_config_and_inputs_for_decoder()
input_mask = None
self.model_tester.create_and_check_model_as_decoder(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
def test_for_multiple_choice(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
def test_for_next_sequence_prediction(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_next_sequence_prediction(*config_and_inputs)
def test_for_pretraining(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_pretraining(*config_and_inputs)
def test_for_question_answering(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
model_name = "sijunhe/nezha-cn-base"
model = NezhaModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@slow
@require_torch_gpu
def test_torchscript_device_change(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
# NezhaForMultipleChoice behaves incorrectly in JIT environments.
if model_class == NezhaForMultipleChoice:
return
config.torchscript = True
model = model_class(config=config)
inputs_dict = self._prepare_for_class(inputs_dict, model_class)
traced_model = torch.jit.trace(
model, (inputs_dict["input_ids"].to("cpu"), inputs_dict["attention_mask"].to("cpu"))
)
with tempfile.TemporaryDirectory() as tmp:
torch.jit.save(traced_model, os.path.join(tmp, "bert.pt"))
loaded = torch.jit.load(os.path.join(tmp, "bert.pt"), map_location=torch_device)
loaded(inputs_dict["input_ids"].to(torch_device), inputs_dict["attention_mask"].to(torch_device))
@require_torch
class NezhaModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_nezha_model(self):
model = NezhaModel.from_pretrained("sijunhe/nezha-cn-base")
input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]])
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1]])
with torch.no_grad():
output = model(input_ids, attention_mask=attention_mask)[0]
expected_shape = torch.Size((1, 6, 768))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor([[[0.0685, 0.2441, 0.1102], [0.0600, 0.1906, 0.1349], [0.0221, 0.0819, 0.0586]]])
self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4))
@slow
def test_inference_nezha_masked_lm(self):
model = NezhaForMaskedLM.from_pretrained("sijunhe/nezha-cn-base")
input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]])
attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1]])
with torch.no_grad():
output = model(input_ids, attention_mask=attention_mask)[0]
expected_shape = torch.Size((1, 6, 21128))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor(
[[-2.7939, -1.7902, -2.2189], [-2.8585, -1.8908, -2.3723], [-2.6499, -1.7750, -2.2558]]
)
self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4))

View File

@@ -1,573 +0,0 @@
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
# Copyright 2021 NVIDIA Corporation. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch QDQBERT model."""
import unittest
from transformers import QDQBertConfig, is_torch_available
from transformers.testing_utils import require_pytorch_quantization, require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers import (
QDQBertForMaskedLM,
QDQBertForMultipleChoice,
QDQBertForNextSentencePrediction,
QDQBertForQuestionAnswering,
QDQBertForSequenceClassification,
QDQBertForTokenClassification,
QDQBertLMHeadModel,
QDQBertModel,
)
class QDQBertModelTester:
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
def prepare_config_and_inputs(self):
# Set default quantizers before creating the model.
import pytorch_quantization.nn as quant_nn
from pytorch_quantization.tensor_quant import QuantDescriptor
# The default tensor quantizer is set to use Max calibration method
input_desc = QuantDescriptor(num_bits=8, calib_method="max")
# The default tensor quantizer is set to be per-channel quantization for weights
weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
# For the test cases, since QDQBert model is tested in one run without calibration, the quantized tensors are set as fake quantized tensors which give float type tensors in the end.
quant_nn.TensorQuantizer.use_fb_fake_quant = True
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = random_attention_mask([self.batch_size, self.seq_length])
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = self.get_config()
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self):
return QDQBertConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
)
def prepare_config_and_inputs_for_decoder(self):
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = self.prepare_config_and_inputs()
config.is_decoder = True
encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
return (
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def create_and_check_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = QDQBertModel(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
result = model(input_ids, token_type_ids=token_type_ids)
result = model(input_ids)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_model_as_decoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.add_cross_attention = True
model = QDQBertModel(config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
)
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
)
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_causal_lm(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
model = QDQBertLMHeadModel(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = QDQBertForMaskedLM(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_model_for_causal_lm_as_decoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.add_cross_attention = True
model = QDQBertLMHeadModel(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
)
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
encoder_hidden_states=encoder_hidden_states,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_decoder_model_past_large_inputs(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
config.is_decoder = True
config.add_cross_attention = True
model = QDQBertLMHeadModel(config=config)
model.to(torch_device)
model.eval()
# first forward pass
outputs = model(
input_ids,
attention_mask=input_mask,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
use_cache=True,
)
past_key_values = outputs.past_key_values
# create hypothetical multiple next token and extent to next_input_ids
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
next_mask = ids_tensor((self.batch_size, 3), vocab_size=2)
# append to next input_ids and
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
next_attention_mask = torch.cat([input_mask, next_mask], dim=-1)
output_from_no_past = model(
next_input_ids,
attention_mask=next_attention_mask,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
output_hidden_states=True,
)["hidden_states"][0]
output_from_past = model(
next_tokens,
attention_mask=next_attention_mask,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
past_key_values=past_key_values,
output_hidden_states=True,
)["hidden_states"][0]
# select random slice
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1])
# test that outputs are equal for slice
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
def create_and_check_for_next_sequence_prediction(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = QDQBertForNextSentencePrediction(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=sequence_labels,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))
def create_and_check_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = QDQBertForQuestionAnswering(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
def create_and_check_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = QDQBertForSequenceClassification(config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def create_and_check_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = QDQBertForTokenClassification(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
def create_and_check_for_multiple_choice(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices
model = QDQBertForMultipleChoice(config=config)
model.to(torch_device)
model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
result = model(
multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
labels=choice_labels,
)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict
@require_torch
@require_pytorch_quantization
class QDQBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
QDQBertModel,
QDQBertForMaskedLM,
QDQBertForMultipleChoice,
QDQBertForNextSentencePrediction,
QDQBertForQuestionAnswering,
QDQBertForSequenceClassification,
QDQBertForTokenClassification,
QDQBertLMHeadModel,
)
if is_torch_available()
else ()
)
all_generative_model_classes = (QDQBertLMHeadModel,) if is_torch_available() else ()
pipeline_model_mapping = (
{
"feature-extraction": QDQBertModel,
"fill-mask": QDQBertForMaskedLM,
"question-answering": QDQBertForQuestionAnswering,
"text-classification": QDQBertForSequenceClassification,
"text-generation": QDQBertLMHeadModel,
"token-classification": QDQBertForTokenClassification,
"zero-shot": QDQBertForSequenceClassification,
}
if is_torch_available()
else {}
)
def setUp(self):
self.model_tester = QDQBertModelTester(self)
self.config_tester = ConfigTester(self, config_class=QDQBertConfig, hidden_size=37)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_model_various_embeddings(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
for type in ["absolute", "relative_key", "relative_key_query"]:
config_and_inputs[0].position_embedding_type = type
self.model_tester.create_and_check_model(*config_and_inputs)
def test_model_as_decoder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
) = self.model_tester.prepare_config_and_inputs_for_decoder()
input_mask = None
self.model_tester.create_and_check_model_as_decoder(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def test_for_causal_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_for_causal_lm(*config_and_inputs)
def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
def test_for_causal_lm_decoder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_for_causal_lm_as_decoder(*config_and_inputs)
def test_decoder_model_past_with_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
def test_for_multiple_choice(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
def test_for_next_sequence_prediction(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_next_sequence_prediction(*config_and_inputs)
def test_for_question_answering(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
model_name = "google-bert/bert-base-uncased"
model = QDQBertModel.from_pretrained(model_name)
self.assertIsNotNone(model)
# Override
def test_feed_forward_chunking(self):
# feed forward chunking is not supported in QDQBert
pass
@require_torch
@require_pytorch_quantization
class QDQBertModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_no_head_absolute_embedding(self):
# Set default quantizers before creating the model.
import pytorch_quantization.nn as quant_nn
from pytorch_quantization.tensor_quant import QuantDescriptor
# The default tensor quantizer is set to use Max calibration method
input_desc = QuantDescriptor(num_bits=8, calib_method="max")
# The default tensor quantizer is set to be per-channel quantization for weights
weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
model = QDQBertModel.from_pretrained("google-bert/bert-base-uncased")
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
output = model(input_ids, attention_mask=attention_mask)[0]
expected_shape = torch.Size((1, 11, 768))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor(
[[[0.4571, -0.0735, 0.8594], [0.2774, -0.0278, 0.8794], [0.3548, -0.0473, 0.7593]]]
)
self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4))

View File

@@ -1,554 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch REALM model."""
import copy
import unittest
import numpy as np
from transformers import RealmConfig, is_torch_available
from transformers.testing_utils import require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers import (
RealmEmbedder,
RealmForOpenQA,
RealmKnowledgeAugEncoder,
RealmReader,
RealmRetriever,
RealmScorer,
RealmTokenizer,
)
class RealmModelTester:
def __init__(
self,
parent,
batch_size=13,
retriever_proj_size=128,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
layer_norm_eps=1e-12,
span_hidden_size=50,
max_span_width=10,
reader_layer_norm_eps=1e-3,
reader_beam_size=4,
reader_seq_len=288 + 32,
num_block_records=13353718,
searcher_beam_size=8,
searcher_seq_len=64,
num_labels=3,
num_choices=4,
num_candidates=10,
scope=None,
):
# General config
self.parent = parent
self.batch_size = batch_size
self.retriever_proj_size = retriever_proj_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps
# Reader config
self.span_hidden_size = span_hidden_size
self.max_span_width = max_span_width
self.reader_layer_norm_eps = reader_layer_norm_eps
self.reader_beam_size = reader_beam_size
self.reader_seq_len = reader_seq_len
# Searcher config
self.num_block_records = num_block_records
self.searcher_beam_size = searcher_beam_size
self.searcher_seq_len = searcher_seq_len
self.num_labels = num_labels
self.num_choices = num_choices
self.num_candidates = num_candidates
self.scope = scope
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
candiate_input_ids = ids_tensor([self.batch_size, self.num_candidates, self.seq_length], self.vocab_size)
reader_input_ids = ids_tensor([self.reader_beam_size, self.reader_seq_len], self.vocab_size)
input_mask = None
candiate_input_mask = None
reader_input_mask = None
if self.use_input_mask:
input_mask = random_attention_mask([self.batch_size, self.seq_length])
candiate_input_mask = random_attention_mask([self.batch_size, self.num_candidates, self.seq_length])
reader_input_mask = random_attention_mask([self.reader_beam_size, self.reader_seq_len])
token_type_ids = None
candidate_token_type_ids = None
reader_token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
candidate_token_type_ids = ids_tensor(
[self.batch_size, self.num_candidates, self.seq_length], self.type_vocab_size
)
reader_token_type_ids = ids_tensor([self.reader_beam_size, self.reader_seq_len], self.type_vocab_size)
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = self.get_config()
# inputs with additional num_candidates axis.
scorer_encoder_inputs = (candiate_input_ids, candiate_input_mask, candidate_token_type_ids)
# reader inputs
reader_inputs = (reader_input_ids, reader_input_mask, reader_token_type_ids)
return (
config,
input_ids,
token_type_ids,
input_mask,
scorer_encoder_inputs,
reader_inputs,
sequence_labels,
token_labels,
choice_labels,
)
def get_config(self):
return RealmConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
retriever_proj_size=self.retriever_proj_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
num_candidates=self.num_candidates,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
)
def create_and_check_embedder(
self,
config,
input_ids,
token_type_ids,
input_mask,
scorer_encoder_inputs,
reader_inputs,
sequence_labels,
token_labels,
choice_labels,
):
model = RealmEmbedder(config=config)
model.to(torch_device)
model.eval()
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
self.parent.assertEqual(result.projected_score.shape, (self.batch_size, self.retriever_proj_size))
def create_and_check_encoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
scorer_encoder_inputs,
reader_inputs,
sequence_labels,
token_labels,
choice_labels,
):
model = RealmKnowledgeAugEncoder(config=config)
model.to(torch_device)
model.eval()
relevance_score = floats_tensor([self.batch_size, self.num_candidates])
result = model(
scorer_encoder_inputs[0],
attention_mask=scorer_encoder_inputs[1],
token_type_ids=scorer_encoder_inputs[2],
relevance_score=relevance_score,
labels=token_labels,
)
self.parent.assertEqual(
result.logits.shape, (self.batch_size * self.num_candidates, self.seq_length, self.vocab_size)
)
def create_and_check_reader(
self,
config,
input_ids,
token_type_ids,
input_mask,
scorer_encoder_inputs,
reader_inputs,
sequence_labels,
token_labels,
choice_labels,
):
model = RealmReader(config=config)
model.to(torch_device)
model.eval()
relevance_score = floats_tensor([self.reader_beam_size])
result = model(
reader_inputs[0],
attention_mask=reader_inputs[1],
token_type_ids=reader_inputs[2],
relevance_score=relevance_score,
)
self.parent.assertEqual(result.block_idx.shape, ())
self.parent.assertEqual(result.candidate.shape, ())
self.parent.assertEqual(result.start_pos.shape, ())
self.parent.assertEqual(result.end_pos.shape, ())
def create_and_check_scorer(
self,
config,
input_ids,
token_type_ids,
input_mask,
scorer_encoder_inputs,
reader_inputs,
sequence_labels,
token_labels,
choice_labels,
):
model = RealmScorer(config=config)
model.to(torch_device)
model.eval()
result = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
candidate_input_ids=scorer_encoder_inputs[0],
candidate_attention_mask=scorer_encoder_inputs[1],
candidate_token_type_ids=scorer_encoder_inputs[2],
)
self.parent.assertEqual(result.relevance_score.shape, (self.batch_size, self.num_candidates))
self.parent.assertEqual(result.query_score.shape, (self.batch_size, self.retriever_proj_size))
self.parent.assertEqual(
result.candidate_score.shape, (self.batch_size, self.num_candidates, self.retriever_proj_size)
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_mask,
scorer_encoder_inputs,
reader_inputs,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict
@require_torch
class RealmModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
RealmEmbedder,
RealmKnowledgeAugEncoder,
# RealmScorer is excluded from common tests as it is a container model
# consisting of two RealmEmbedders & a simple inner product calculation.
# RealmScorer
)
if is_torch_available()
else ()
)
all_generative_model_classes = ()
pipeline_model_mapping = {} if is_torch_available() else {}
# disable these tests because there is no base_model in Realm
test_save_load_fast_init_from_base = False
test_save_load_fast_init_to_base = False
def setUp(self):
self.test_pruning = False
self.model_tester = RealmModelTester(self)
self.config_tester = ConfigTester(self, config_class=RealmConfig)
def test_config(self):
self.config_tester.run_common_tests()
def test_embedder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_embedder(*config_and_inputs)
def test_encoder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_encoder(*config_and_inputs)
def test_model_various_embeddings(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
for type in ["absolute", "relative_key", "relative_key_query"]:
config_and_inputs[0].position_embedding_type = type
self.model_tester.create_and_check_embedder(*config_and_inputs)
self.model_tester.create_and_check_encoder(*config_and_inputs)
def test_scorer(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_scorer(*config_and_inputs)
def test_training(self):
if not self.model_tester.is_training:
return
config, *inputs = self.model_tester.prepare_config_and_inputs()
input_ids, token_type_ids, input_mask, scorer_encoder_inputs = inputs[0:4]
config.return_dict = True
tokenizer = RealmTokenizer.from_pretrained("google/realm-orqa-nq-openqa")
# RealmKnowledgeAugEncoder training
model = RealmKnowledgeAugEncoder(config)
model.to(torch_device)
model.train()
inputs_dict = {
"input_ids": scorer_encoder_inputs[0].to(torch_device),
"attention_mask": scorer_encoder_inputs[1].to(torch_device),
"token_type_ids": scorer_encoder_inputs[2].to(torch_device),
"relevance_score": floats_tensor([self.model_tester.batch_size, self.model_tester.num_candidates]),
}
inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
)
inputs = inputs_dict
loss = model(**inputs).loss
loss.backward()
# RealmForOpenQA training
openqa_config = copy.deepcopy(config)
openqa_config.vocab_size = 30522 # the retrieved texts will inevitably have more than 99 vocabs.
openqa_config.num_block_records = 5
openqa_config.searcher_beam_size = 2
block_records = np.array(
[
b"This is the first record.",
b"This is the second record.",
b"This is the third record.",
b"This is the fourth record.",
b"This is the fifth record.",
],
dtype=object,
)
retriever = RealmRetriever(block_records, tokenizer)
model = RealmForOpenQA(openqa_config, retriever)
model.to(torch_device)
model.train()
inputs_dict = {
"input_ids": input_ids[:1].to(torch_device),
"attention_mask": input_mask[:1].to(torch_device),
"token_type_ids": token_type_ids[:1].to(torch_device),
"answer_ids": input_ids[:1].tolist(),
}
inputs = self._prepare_for_class(inputs_dict, RealmForOpenQA)
loss = model(**inputs).reader_output.loss
loss.backward()
# Test model.block_embedding_to
device = torch.device("cpu")
model.block_embedding_to(device)
loss = model(**inputs).reader_output.loss
loss.backward()
self.assertEqual(model.block_emb.device.type, device.type)
@slow
def test_embedder_from_pretrained(self):
model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder")
self.assertIsNotNone(model)
@slow
def test_encoder_from_pretrained(self):
model = RealmKnowledgeAugEncoder.from_pretrained("google/realm-cc-news-pretrained-encoder")
self.assertIsNotNone(model)
@slow
def test_open_qa_from_pretrained(self):
model = RealmForOpenQA.from_pretrained("google/realm-orqa-nq-openqa")
self.assertIsNotNone(model)
@slow
def test_reader_from_pretrained(self):
model = RealmReader.from_pretrained("google/realm-orqa-nq-reader")
self.assertIsNotNone(model)
@slow
def test_scorer_from_pretrained(self):
model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer")
self.assertIsNotNone(model)
@require_torch
class RealmModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_embedder(self):
retriever_projected_size = 128
model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder")
input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]])
output = model(input_ids)[0]
expected_shape = torch.Size((1, retriever_projected_size))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor([[-0.0714, -0.0837, -0.1314]])
self.assertTrue(torch.allclose(output[:, :3], expected_slice, atol=1e-4))
@slow
def test_inference_encoder(self):
num_candidates = 2
vocab_size = 30522
model = RealmKnowledgeAugEncoder.from_pretrained(
"google/realm-cc-news-pretrained-encoder", num_candidates=num_candidates
)
input_ids = torch.tensor([[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]])
relevance_score = torch.tensor([[0.3, 0.7]], dtype=torch.float32)
output = model(input_ids, relevance_score=relevance_score)[0]
expected_shape = torch.Size((2, 6, vocab_size))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor([[[-11.0888, -11.2544], [-10.2170, -10.3874]]])
self.assertTrue(torch.allclose(output[1, :2, :2], expected_slice, atol=1e-4))
@slow
def test_inference_open_qa(self):
from transformers.models.realm.retrieval_realm import RealmRetriever
tokenizer = RealmTokenizer.from_pretrained("google/realm-orqa-nq-openqa")
retriever = RealmRetriever.from_pretrained("google/realm-orqa-nq-openqa")
model = RealmForOpenQA.from_pretrained(
"google/realm-orqa-nq-openqa",
retriever=retriever,
)
question = "Who is the pioneer in modern computer science?"
question = tokenizer(
[question],
padding=True,
truncation=True,
max_length=model.config.searcher_seq_len,
return_tensors="pt",
).to(model.device)
predicted_answer_ids = model(**question).predicted_answer_ids
predicted_answer = tokenizer.decode(predicted_answer_ids)
self.assertEqual(predicted_answer, "alan mathison turing")
@slow
def test_inference_reader(self):
config = RealmConfig(reader_beam_size=2, max_span_width=3)
model = RealmReader.from_pretrained("google/realm-orqa-nq-reader", config=config)
concat_input_ids = torch.arange(10).view((2, 5))
concat_token_type_ids = torch.tensor([[0, 0, 1, 1, 1], [0, 0, 1, 1, 1]], dtype=torch.int64)
concat_block_mask = torch.tensor([[0, 0, 1, 1, 0], [0, 0, 1, 1, 0]], dtype=torch.int64)
relevance_score = torch.tensor([0.3, 0.7], dtype=torch.float32)
output = model(
concat_input_ids,
token_type_ids=concat_token_type_ids,
relevance_score=relevance_score,
block_mask=concat_block_mask,
return_dict=True,
)
block_idx_expected_shape = torch.Size(())
start_pos_expected_shape = torch.Size((1,))
end_pos_expected_shape = torch.Size((1,))
self.assertEqual(output.block_idx.shape, block_idx_expected_shape)
self.assertEqual(output.start_pos.shape, start_pos_expected_shape)
self.assertEqual(output.end_pos.shape, end_pos_expected_shape)
expected_block_idx = torch.tensor(1)
expected_start_pos = torch.tensor(3)
expected_end_pos = torch.tensor(3)
self.assertTrue(torch.allclose(output.block_idx, expected_block_idx, atol=1e-4))
self.assertTrue(torch.allclose(output.start_pos, expected_start_pos, atol=1e-4))
self.assertTrue(torch.allclose(output.end_pos, expected_end_pos, atol=1e-4))
@slow
def test_inference_scorer(self):
num_candidates = 2
model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer", num_candidates=num_candidates)
input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]])
candidate_input_ids = torch.tensor([[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]])
output = model(input_ids, candidate_input_ids=candidate_input_ids)[0]
expected_shape = torch.Size((1, 2))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor([[0.7410, 0.7170]])
self.assertTrue(torch.allclose(output, expected_slice, atol=1e-4))

View File

@@ -1,187 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import tempfile
from unittest import TestCase
from unittest.mock import patch
import numpy as np
from datasets import Dataset
from transformers.models.realm.configuration_realm import RealmConfig
from transformers.models.realm.retrieval_realm import _REALM_BLOCK_RECORDS_FILENAME, RealmRetriever
from transformers.models.realm.tokenization_realm import VOCAB_FILES_NAMES, RealmTokenizer
class RealmRetrieverTest(TestCase):
def setUp(self):
self.tmpdirname = tempfile.mkdtemp()
self.num_block_records = 5
# Realm tok
vocab_tokens = [
"[UNK]",
"[CLS]",
"[SEP]",
"[PAD]",
"[MASK]",
"test",
"question",
"this",
"is",
"the",
"first",
"second",
"third",
"fourth",
"fifth",
"record",
"want",
"##want",
"##ed",
"wa",
"un",
"runn",
"##ing",
",",
"low",
"lowest",
]
realm_tokenizer_path = os.path.join(self.tmpdirname, "realm_tokenizer")
os.makedirs(realm_tokenizer_path, exist_ok=True)
self.vocab_file = os.path.join(realm_tokenizer_path, VOCAB_FILES_NAMES["vocab_file"])
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
realm_block_records_path = os.path.join(self.tmpdirname, "realm_block_records")
os.makedirs(realm_block_records_path, exist_ok=True)
def get_tokenizer(self) -> RealmTokenizer:
return RealmTokenizer.from_pretrained(os.path.join(self.tmpdirname, "realm_tokenizer"))
def tearDown(self):
shutil.rmtree(self.tmpdirname)
def get_config(self):
config = RealmConfig(num_block_records=self.num_block_records)
return config
def get_dummy_dataset(self):
dataset = Dataset.from_dict(
{
"id": ["0", "1"],
"question": ["foo", "bar"],
"answers": [["Foo", "Bar"], ["Bar"]],
}
)
return dataset
def get_dummy_block_records(self):
block_records = np.array(
[
b"This is the first record",
b"This is the second record",
b"This is the third record",
b"This is the fourth record",
b"This is the fifth record",
b"This is a longer longer longer record",
],
dtype=object,
)
return block_records
def get_dummy_retriever(self):
retriever = RealmRetriever(
block_records=self.get_dummy_block_records(),
tokenizer=self.get_tokenizer(),
)
return retriever
def test_retrieve(self):
config = self.get_config()
retriever = self.get_dummy_retriever()
tokenizer = retriever.tokenizer
retrieved_block_ids = np.array([0, 3], dtype="long")
question_input_ids = tokenizer(["Test question"]).input_ids
answer_ids = tokenizer(
["the fourth"],
add_special_tokens=False,
return_token_type_ids=False,
return_attention_mask=False,
).input_ids
max_length = config.reader_seq_len
has_answers, start_pos, end_pos, concat_inputs = retriever(
retrieved_block_ids, question_input_ids, answer_ids=answer_ids, max_length=max_length, return_tensors="np"
)
self.assertEqual(len(has_answers), 2)
self.assertEqual(len(start_pos), 2)
self.assertEqual(len(end_pos), 2)
self.assertEqual(concat_inputs.input_ids.shape, (2, 10))
self.assertEqual(concat_inputs.attention_mask.shape, (2, 10))
self.assertEqual(concat_inputs.token_type_ids.shape, (2, 10))
self.assertEqual(concat_inputs.special_tokens_mask.shape, (2, 10))
self.assertEqual(
tokenizer.convert_ids_to_tokens(concat_inputs.input_ids[0]),
["[CLS]", "test", "question", "[SEP]", "this", "is", "the", "first", "record", "[SEP]"],
)
self.assertEqual(
tokenizer.convert_ids_to_tokens(concat_inputs.input_ids[1]),
["[CLS]", "test", "question", "[SEP]", "this", "is", "the", "fourth", "record", "[SEP]"],
)
def test_block_has_answer(self):
config = self.get_config()
retriever = self.get_dummy_retriever()
tokenizer = retriever.tokenizer
retrieved_block_ids = np.array([0, 3, 5], dtype="long")
question_input_ids = tokenizer(["Test question"]).input_ids
answer_ids = tokenizer(
["the fourth", "longer longer"],
add_special_tokens=False,
return_token_type_ids=False,
return_attention_mask=False,
).input_ids
max_length = config.reader_seq_len
has_answers, start_pos, end_pos, _ = retriever(
retrieved_block_ids, question_input_ids, answer_ids=answer_ids, max_length=max_length, return_tensors="np"
)
self.assertEqual([False, True, True], has_answers)
self.assertEqual([[-1, -1, -1], [6, -1, -1], [6, 7, 8]], start_pos)
self.assertEqual([[-1, -1, -1], [7, -1, -1], [7, 8, 9]], end_pos)
def test_save_load_pretrained(self):
retriever = self.get_dummy_retriever()
retriever.save_pretrained(os.path.join(self.tmpdirname, "realm_block_records"))
# Test local path
retriever = retriever.from_pretrained(os.path.join(self.tmpdirname, "realm_block_records"))
self.assertEqual(retriever.block_records[0], b"This is the first record")
# Test mocked remote path
with patch("transformers.models.realm.retrieval_realm.hf_hub_download") as mock_hf_hub_download:
mock_hf_hub_download.return_value = os.path.join(
os.path.join(self.tmpdirname, "realm_block_records"), _REALM_BLOCK_RECORDS_FILENAME
)
retriever = RealmRetriever.from_pretrained("google/realm-cc-news-pretrained-openqa")
self.assertEqual(retriever.block_records[0], b"This is the first record")

View File

@@ -1,322 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
from transformers import RealmTokenizerFast
from transformers.models.bert.tokenization_bert import (
VOCAB_FILES_NAMES,
BasicTokenizer,
WordpieceTokenizer,
_is_control,
_is_punctuation,
_is_whitespace,
)
from transformers.models.realm.tokenization_realm import RealmTokenizer
from transformers.testing_utils import require_tokenizers, slow
from ...test_tokenization_common import TokenizerTesterMixin, filter_non_english
@require_tokenizers
class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
from_pretrained_id = "google/realm-cc-news-pretrained-embedder"
tokenizer_class = RealmTokenizer
rust_tokenizer_class = RealmTokenizerFast
test_rust_tokenizer = True
space_between_special_tokens = True
from_pretrained_filter = filter_non_english
def setUp(self):
super().setUp()
vocab_tokens = [
"[UNK]",
"[CLS]",
"[SEP]",
"[PAD]",
"[MASK]",
"want",
"##want",
"##ed",
"wa",
"un",
"runn",
"##ing",
",",
"low",
"lowest",
]
self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
def get_input_output_texts(self, tokenizer):
input_text = "UNwant\u00e9d,running"
output_text = "unwanted, running"
return input_text, output_text
def test_full_tokenizer(self):
tokenizer = self.tokenizer_class(self.vocab_file)
tokens = tokenizer.tokenize("UNwant\u00e9d,running")
self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11])
def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
return
tokenizer = self.get_tokenizer()
rust_tokenizer = self.get_rust_tokenizer()
sequence = "UNwant\u00e9d,running"
tokens = tokenizer.tokenize(sequence)
rust_tokens = rust_tokenizer.tokenize(sequence)
self.assertListEqual(tokens, rust_tokens)
ids = tokenizer.encode(sequence, add_special_tokens=False)
rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False)
self.assertListEqual(ids, rust_ids)
rust_tokenizer = self.get_rust_tokenizer()
ids = tokenizer.encode(sequence)
rust_ids = rust_tokenizer.encode(sequence)
self.assertListEqual(ids, rust_ids)
# With lower casing
tokenizer = self.get_tokenizer(do_lower_case=True)
rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True)
sequence = "UNwant\u00e9d,running"
tokens = tokenizer.tokenize(sequence)
rust_tokens = rust_tokenizer.tokenize(sequence)
self.assertListEqual(tokens, rust_tokens)
ids = tokenizer.encode(sequence, add_special_tokens=False)
rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False)
self.assertListEqual(ids, rust_ids)
rust_tokenizer = self.get_rust_tokenizer()
ids = tokenizer.encode(sequence)
rust_ids = rust_tokenizer.encode(sequence)
self.assertListEqual(ids, rust_ids)
def test_chinese(self):
tokenizer = BasicTokenizer()
self.assertListEqual(tokenizer.tokenize("ah\u535a\u63a8zz"), ["ah", "\u535a", "\u63a8", "zz"])
def test_basic_tokenizer_lower(self):
tokenizer = BasicTokenizer(do_lower_case=True)
self.assertListEqual(
tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["hello", "!", "how", "are", "you", "?"]
)
self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"])
def test_basic_tokenizer_lower_strip_accents_false(self):
tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False)
self.assertListEqual(
tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hällo", "!", "how", "are", "you", "?"]
)
self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["h\u00e9llo"])
def test_basic_tokenizer_lower_strip_accents_true(self):
tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True)
self.assertListEqual(
tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"]
)
self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"])
def test_basic_tokenizer_lower_strip_accents_default(self):
tokenizer = BasicTokenizer(do_lower_case=True)
self.assertListEqual(
tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["hallo", "!", "how", "are", "you", "?"]
)
self.assertListEqual(tokenizer.tokenize("H\u00e9llo"), ["hello"])
def test_basic_tokenizer_no_lower(self):
tokenizer = BasicTokenizer(do_lower_case=False)
self.assertListEqual(
tokenizer.tokenize(" \tHeLLo!how \n Are yoU? "), ["HeLLo", "!", "how", "Are", "yoU", "?"]
)
def test_basic_tokenizer_no_lower_strip_accents_false(self):
tokenizer = BasicTokenizer(do_lower_case=False, strip_accents=False)
self.assertListEqual(
tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["HäLLo", "!", "how", "Are", "yoU", "?"]
)
def test_basic_tokenizer_no_lower_strip_accents_true(self):
tokenizer = BasicTokenizer(do_lower_case=False, strip_accents=True)
self.assertListEqual(
tokenizer.tokenize(" \tHäLLo!how \n Are yoU? "), ["HaLLo", "!", "how", "Are", "yoU", "?"]
)
def test_basic_tokenizer_respects_never_split_tokens(self):
tokenizer = BasicTokenizer(do_lower_case=False, never_split=["[UNK]"])
self.assertListEqual(
tokenizer.tokenize(" \tHeLLo!how \n Are yoU? [UNK]"), ["HeLLo", "!", "how", "Are", "yoU", "?", "[UNK]"]
)
def test_wordpiece_tokenizer(self):
vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"]
vocab = {}
for i, token in enumerate(vocab_tokens):
vocab[token] = i
tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]")
self.assertListEqual(tokenizer.tokenize(""), [])
self.assertListEqual(tokenizer.tokenize("unwanted running"), ["un", "##want", "##ed", "runn", "##ing"])
self.assertListEqual(tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"])
def test_is_whitespace(self):
self.assertTrue(_is_whitespace(" "))
self.assertTrue(_is_whitespace("\t"))
self.assertTrue(_is_whitespace("\r"))
self.assertTrue(_is_whitespace("\n"))
self.assertTrue(_is_whitespace("\u00a0"))
self.assertFalse(_is_whitespace("A"))
self.assertFalse(_is_whitespace("-"))
def test_is_control(self):
self.assertTrue(_is_control("\u0005"))
self.assertFalse(_is_control("A"))
self.assertFalse(_is_control(" "))
self.assertFalse(_is_control("\t"))
self.assertFalse(_is_control("\r"))
def test_is_punctuation(self):
self.assertTrue(_is_punctuation("-"))
self.assertTrue(_is_punctuation("$"))
self.assertTrue(_is_punctuation("`"))
self.assertTrue(_is_punctuation("."))
self.assertFalse(_is_punctuation("A"))
self.assertFalse(_is_punctuation(" "))
def test_clean_text(self):
tokenizer = self.get_tokenizer()
# Example taken from the issue https://github.com/huggingface/tokenizers/issues/340
self.assertListEqual([tokenizer.tokenize(t) for t in ["Test", "\xad", "test"]], [["[UNK]"], [], ["[UNK]"]])
if self.test_rust_tokenizer:
rust_tokenizer = self.get_rust_tokenizer()
self.assertListEqual(
[rust_tokenizer.tokenize(t) for t in ["Test", "\xad", "test"]], [["[UNK]"], [], ["[UNK]"]]
)
@slow
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
assert encoded_sentence == [101] + text + [102]
assert encoded_pair == [101] + text + [102] + text_2 + [102]
def test_offsets_with_special_characters(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
tokens = tokenizer_r.encode_plus(
sentence,
return_attention_mask=False,
return_token_type_ids=False,
return_offsets_mapping=True,
add_special_tokens=True,
)
do_lower_case = tokenizer_r.do_lower_case if hasattr(tokenizer_r, "do_lower_case") else False
expected_results = (
[
((0, 0), tokenizer_r.cls_token),
((0, 1), "A"),
((1, 2), ","),
((3, 5), "na"),
((5, 6), "##ï"),
((6, 8), "##ve"),
((9, 15), tokenizer_r.mask_token),
((16, 21), "Allen"),
((21, 23), "##NL"),
((23, 24), "##P"),
((25, 33), "sentence"),
((33, 34), "."),
((0, 0), tokenizer_r.sep_token),
]
if not do_lower_case
else [
((0, 0), tokenizer_r.cls_token),
((0, 1), "a"),
((1, 2), ","),
((3, 8), "naive"),
((9, 15), tokenizer_r.mask_token),
((16, 21), "allen"),
((21, 23), "##nl"),
((23, 24), "##p"),
((25, 33), "sentence"),
((33, 34), "."),
((0, 0), tokenizer_r.sep_token),
]
)
self.assertEqual(
[e[1] for e in expected_results], tokenizer_r.convert_ids_to_tokens(tokens["input_ids"])
)
self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"])
@slow
def test_batch_encode_candidates(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
text = [["Hello world!", "Nice to meet you!"], ["The cute cat.", "The adorable dog."]]
encoded_sentence_r = tokenizer_r.batch_encode_candidates(text, max_length=10, return_tensors="np")
encoded_sentence_p = tokenizer_p.batch_encode_candidates(text, max_length=10, return_tensors="np")
expected_shape = (2, 2, 10)
self.assertEqual(encoded_sentence_r["input_ids"].shape, expected_shape)
self.assertEqual(encoded_sentence_r["attention_mask"].shape, expected_shape)
self.assertEqual(encoded_sentence_r["token_type_ids"].shape, expected_shape)
self.assertEqual(encoded_sentence_p["input_ids"].shape, expected_shape)
self.assertEqual(encoded_sentence_p["attention_mask"].shape, expected_shape)
self.assertEqual(encoded_sentence_p["token_type_ids"].shape, expected_shape)

View File

@@ -23,7 +23,6 @@ from transformers.testing_utils import require_deterministic_for_xpu, require_to
from ...test_modeling_common import floats_tensor, ids_tensor, random_attention_mask
from ..bert.test_modeling_bert import BertModelTester
from ..speech_to_text.test_modeling_speech_to_text import Speech2TextModelTester
from ..speech_to_text_2.test_modeling_speech_to_text_2 import Speech2Text2StandaloneDecoderModelTester
from ..wav2vec2.test_modeling_wav2vec2 import Wav2Vec2ModelTester
@@ -33,7 +32,6 @@ if is_torch_available():
from transformers import (
BertLMHeadModel,
Speech2Text2ForCausalLM,
SpeechEncoderDecoderConfig,
SpeechEncoderDecoderModel,
Wav2Vec2Model,
@@ -583,43 +581,3 @@ class Speech2TextBertModelTest(EncoderDecoderMixin, unittest.TestCase):
# all published pretrained models are Speech2TextModel != Speech2TextEncoder
def test_real_model_save_load_from_pretrained(self):
pass
@require_torch
class Wav2Vec2Speech2Text2(EncoderDecoderMixin, unittest.TestCase):
def get_encoder_decoder_model(self, config, decoder_config):
encoder_model = Wav2Vec2Model(config).eval()
decoder_model = Speech2Text2ForCausalLM(decoder_config).eval()
return encoder_model, decoder_model
def prepare_config_and_inputs(self):
model_tester_encoder = Wav2Vec2ModelTester(self, batch_size=13)
model_tester_decoder = Speech2Text2StandaloneDecoderModelTester(
self, batch_size=13, d_model=32, max_position_embeddings=512
)
encoder_config_and_inputs = model_tester_encoder.prepare_config_and_inputs()
decoder_config_and_inputs = model_tester_decoder.prepare_config_and_inputs()
(
config,
input_values,
input_mask,
) = encoder_config_and_inputs
(decoder_config, decoder_input_ids, decoder_attention_mask, _) = decoder_config_and_inputs
# make sure that cross attention layers are added
decoder_config.add_cross_attention = True
# disable cache for now
decoder_config.use_cache = False
return {
"config": config,
"input_values": input_values,
"attention_mask": input_mask,
"decoder_config": decoder_config,
"decoder_input_ids": decoder_input_ids,
"decoder_attention_mask": decoder_attention_mask,
"labels": decoder_input_ids,
}
# there are no published pretrained Speech2Text2ForCausalLM for now
def test_real_model_save_load_from_pretrained(self):
pass

View File

@@ -1,216 +0,0 @@
# coding=utf-8
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch Speech2Text model."""
import unittest
from transformers import Speech2Text2Config
from transformers.testing_utils import is_torch_available, require_torch, torch_device
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from transformers.models.speech_to_text_2.modeling_speech_to_text_2 import (
Speech2Text2Decoder,
Speech2Text2ForCausalLM,
)
@require_torch
class Speech2Text2StandaloneDecoderModelTester:
def __init__(
self,
parent,
vocab_size=99,
batch_size=13,
d_model=16,
decoder_seq_length=7,
is_training=True,
is_decoder=True,
use_attention_mask=True,
use_cache=False,
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=2,
decoder_attention_heads=4,
max_position_embeddings=30,
pad_token_id=0,
bos_token_id=1,
eos_token_id=2,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.decoder_seq_length = decoder_seq_length
# For common tests
self.seq_length = self.decoder_seq_length
self.is_training = is_training
self.use_attention_mask = use_attention_mask
self.use_labels = use_labels
self.vocab_size = vocab_size
self.d_model = d_model
self.hidden_size = d_model
self.num_hidden_layers = decoder_layers
self.decoder_layers = decoder_layers
self.decoder_ffn_dim = decoder_ffn_dim
self.decoder_attention_heads = decoder_attention_heads
self.num_attention_heads = decoder_attention_heads
self.eos_token_id = eos_token_id
self.bos_token_id = bos_token_id
self.pad_token_id = pad_token_id
self.decoder_start_token_id = decoder_start_token_id
self.use_cache = use_cache
self.max_position_embeddings = max_position_embeddings
self.scope = None
self.decoder_key_length = decoder_seq_length
self.base_model_out_len = 2
self.decoder_attention_idx = 1
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)
attention_mask = None
if self.use_attention_mask:
attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2)
lm_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)
config = Speech2Text2Config(
vocab_size=self.vocab_size,
d_model=self.d_model,
decoder_layers=self.decoder_layers,
decoder_ffn_dim=self.decoder_ffn_dim,
decoder_attention_heads=self.decoder_attention_heads,
eos_token_id=self.eos_token_id,
bos_token_id=self.bos_token_id,
use_cache=self.use_cache,
pad_token_id=self.pad_token_id,
decoder_start_token_id=self.decoder_start_token_id,
max_position_embeddings=self.max_position_embeddings,
)
return (
config,
input_ids,
attention_mask,
lm_labels,
)
def create_and_check_decoder_model_past(
self,
config,
input_ids,
attention_mask,
lm_labels,
):
config.use_cache = True
model = Speech2Text2Decoder(config=config).to(torch_device).eval()
input_ids = input_ids[:2]
input_ids[input_ids == 0] += 1
# first forward pass
outputs = model(input_ids, use_cache=True)
outputs_use_cache_conf = model(input_ids)
outputs_no_past = model(input_ids, use_cache=False)
self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)
past_key_values = outputs["past_key_values"]
# create hypothetical next token and extent to next_input_ids
next_tokens = ids_tensor((2, 1), config.vocab_size - 1) + 1
# append to next input_ids and
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
print(next_input_ids)
output_from_no_past = model(next_input_ids)["last_hidden_state"]
output_from_past = model(next_tokens, past_key_values=past_key_values)["last_hidden_state"]
# select random slice
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
output_from_no_past_slice = output_from_no_past[:, next_input_ids.shape[-1] - 1, random_slice_idx].detach()
output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach()
# test that outputs are equal for slice
assert torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
attention_mask,
lm_labels,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"attention_mask": attention_mask,
}
return config, inputs_dict
@require_torch
class Speech2Text2StandaloneDecoderModelTest(
ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase
):
all_model_classes = (Speech2Text2Decoder, Speech2Text2ForCausalLM) if is_torch_available() else ()
all_generative_model_classes = (Speech2Text2ForCausalLM,) if is_torch_available() else ()
pipeline_model_mapping = {"text-generation": Speech2Text2ForCausalLM} if is_torch_available() else {}
fx_compatible = True
test_pruning = False
def setUp(
self,
):
self.model_tester = Speech2Text2StandaloneDecoderModelTester(self, is_training=False)
self.config_tester = ConfigTester(self, config_class=Speech2Text2Config)
# not implemented currently
def test_inputs_embeds(self):
pass
# speech2text2 has no base model
def test_save_load_fast_init_from_base(self):
pass
# speech2text2 has no base model
def test_save_load_fast_init_to_base(self):
pass
def test_config(self):
self.config_tester.run_common_tests()
def test_decoder_model_past(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_decoder_model_past(*config_and_inputs)
# decoder cannot keep gradients
def test_retain_grad_hidden_states_attentions(self):
return

View File

@@ -1,98 +0,0 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import json
import os
import tempfile
import unittest
from transformers.models.speech_to_text_2 import Speech2Text2Tokenizer
from transformers.models.speech_to_text_2.tokenization_speech_to_text_2 import VOCAB_FILES_NAMES
from ...test_tokenization_common import TokenizerTesterMixin
class SpeechToTextTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
from_pretrained_id = "facebook/s2t-wav2vec2-large-en-de"
tokenizer_class = Speech2Text2Tokenizer
test_rust_tokenizer = False
def setUp(self):
super().setUp()
vocab = "<s> <pad> </s> <unk> here@@ a couple of@@ words for the he@@ re@@ vocab".split(" ")
merges = ["he re</w> 123", "here a 1456"]
vocab_tokens = dict(zip(vocab, range(len(vocab))))
self.special_tokens_map = {"pad_token": "<pad>", "unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>"}
self.tmpdirname = tempfile.mkdtemp()
self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
with open(self.vocab_file, "w", encoding="utf-8") as fp:
fp.write(json.dumps(vocab_tokens) + "\n")
with open(self.merges_file, "w") as fp:
fp.write("\n".join(merges))
def test_get_vocab(self):
vocab_keys = list(self.get_tokenizer().get_vocab().keys())
self.assertEqual(vocab_keys[0], "<s>")
self.assertEqual(vocab_keys[1], "<pad>")
self.assertEqual(vocab_keys[-1], "vocab")
self.assertEqual(len(vocab_keys), 14)
def test_vocab_size(self):
self.assertEqual(self.get_tokenizer().vocab_size, 14)
def test_tokenizer_decode(self):
tokenizer = Speech2Text2Tokenizer.from_pretrained(self.tmpdirname)
# make sure @@ is correctly concatenated
token_ids = [4, 6, 8, 7, 10] # ["here@@", "couple", "words", "of@@", "the"]
output_string = tokenizer.decode(token_ids)
self.assertTrue(output_string == "herecouple words ofthe")
def test_load_no_merges_file(self):
tokenizer = Speech2Text2Tokenizer.from_pretrained(self.tmpdirname)
with tempfile.TemporaryDirectory() as tmp_dirname:
tokenizer.save_pretrained(tmp_dirname)
os.remove(os.path.join(tmp_dirname, "merges.txt"))
# load tokenizer without merges file should not throw an error
tokenizer = Speech2Text2Tokenizer.from_pretrained(tmp_dirname)
with tempfile.TemporaryDirectory() as tmp_dirname:
# save tokenizer and load again
tokenizer.save_pretrained(tmp_dirname)
tokenizer = Speech2Text2Tokenizer.from_pretrained(tmp_dirname)
self.assertIsNotNone(tokenizer)
# overwrite since merges_file is optional
def test_tokenizer_slow_store_full_signature(self):
if not self.test_slow_tokenizer:
return
signature = inspect.signature(self.tokenizer_class.__init__)
tokenizer = self.get_tokenizer()
for parameter_name, parameter in signature.parameters.items():
if parameter.default != inspect.Parameter.empty and parameter_name != "merges_file":
self.assertIn(parameter_name, tokenizer.init_kwargs)

View File

@@ -1,182 +0,0 @@
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the TVLT feature extraction."""
import itertools
import random
import unittest
import numpy as np
from transformers import TvltFeatureExtractor, is_datasets_available
from transformers.testing_utils import require_torch, require_torchaudio
from transformers.utils.import_utils import is_torch_available
from ...test_sequence_feature_extraction_common import SequenceFeatureExtractionTestMixin
if is_torch_available():
import torch
if is_datasets_available():
from datasets import load_dataset
global_rng = random.Random()
# Copied from tests.models.whisper.test_feature_extraction_whisper.floats_list
def floats_list(shape, scale=1.0, rng=None, name=None):
"""Creates a random float32 tensor"""
if rng is None:
rng = global_rng
values = []
for batch_idx in range(shape[0]):
values.append([])
for _ in range(shape[1]):
values[-1].append(rng.random() * scale)
return values
class TvltFeatureExtractionTester(unittest.TestCase):
def __init__(
self,
parent,
batch_size=7,
min_seq_length=400,
max_seq_length=2000,
spectrogram_length=2048,
feature_size=128,
num_audio_channels=1,
hop_length=512,
chunk_length=30,
sampling_rate=44100,
):
self.parent = parent
self.batch_size = batch_size
self.min_seq_length = min_seq_length
self.max_seq_length = max_seq_length
self.seq_length_diff = (self.max_seq_length - self.min_seq_length) // (self.batch_size - 1)
self.spectrogram_length = spectrogram_length
self.feature_size = feature_size
self.num_audio_channels = num_audio_channels
self.hop_length = hop_length
self.chunk_length = chunk_length
self.sampling_rate = sampling_rate
def prepare_feat_extract_dict(self):
return {
"spectrogram_length": self.spectrogram_length,
"feature_size": self.feature_size,
"num_audio_channels": self.num_audio_channels,
"hop_length": self.hop_length,
"chunk_length": self.chunk_length,
"sampling_rate": self.sampling_rate,
}
def prepare_inputs_for_common(self, equal_length=False, numpify=False):
def _flatten(list_of_lists):
return list(itertools.chain(*list_of_lists))
if equal_length:
speech_inputs = [floats_list((self.max_seq_length, self.feature_size)) for _ in range(self.batch_size)]
else:
# make sure that inputs increase in size
speech_inputs = [
floats_list((x, self.feature_size))
for x in range(self.min_seq_length, self.max_seq_length, self.seq_length_diff)
]
if numpify:
speech_inputs = [np.asarray(x) for x in speech_inputs]
return speech_inputs
@require_torch
@require_torchaudio
class TvltFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.TestCase):
feature_extraction_class = TvltFeatureExtractor
def setUp(self):
self.feat_extract_tester = TvltFeatureExtractionTester(self)
def test_feat_extract_properties(self):
feature_extractor = self.feature_extraction_class(**self.feat_extract_dict)
self.assertTrue(hasattr(feature_extractor, "spectrogram_length"))
self.assertTrue(hasattr(feature_extractor, "feature_size"))
self.assertTrue(hasattr(feature_extractor, "num_audio_channels"))
self.assertTrue(hasattr(feature_extractor, "hop_length"))
self.assertTrue(hasattr(feature_extractor, "chunk_length"))
self.assertTrue(hasattr(feature_extractor, "sampling_rate"))
def test_call(self):
# Initialize feature_extractor
feature_extractor = self.feature_extraction_class(**self.feat_extract_dict)
# create three inputs of length 800, 1000, and 1200
speech_inputs = [floats_list((1, x))[0] for x in range(800, 1400, 200)]
np_speech_inputs = [np.asarray(speech_input) for speech_input in speech_inputs]
# Test not batched input
encoded_audios = feature_extractor(np_speech_inputs[0], return_tensors="np", sampling_rate=44100).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
# Test batched
encoded_audios = feature_extractor(np_speech_inputs, return_tensors="np", sampling_rate=44100).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
# Test audio masking
encoded_audios = feature_extractor(
np_speech_inputs, return_tensors="np", sampling_rate=44100, mask_audio=True
).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
# Test 2-D numpy arrays are batched.
speech_inputs = [floats_list((1, x))[0] for x in (800, 800, 800)]
np_speech_inputs = np.asarray(speech_inputs)
encoded_audios = feature_extractor(np_speech_inputs, return_tensors="np", sampling_rate=44100).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
return [x["array"] for x in speech_samples]
def test_integration(self):
input_speech = self._load_datasamples(1)
feature_extractor = TvltFeatureExtractor()
audio_values = feature_extractor(input_speech, return_tensors="pt").audio_values
self.assertEqual(audio_values.shape, (1, 1, 192, 128))
expected_slice = torch.tensor([[-0.3032, -0.2708], [-0.4434, -0.4007]])
self.assertTrue(torch.allclose(audio_values[0, 0, :2, :2], expected_slice, atol=1e-4))

View File

@@ -1,294 +0,0 @@
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the TVLT image processor."""
import unittest
import numpy as np
from transformers.testing_utils import require_torch, require_vision
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import TvltImageProcessor
def prepare_video(image_processor_tester, width=10, height=10, numpify=False, torchify=False):
"""This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors."""
video = []
for i in range(image_processor_tester.num_frames):
video.append(np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8))
if not numpify and not torchify:
# PIL expects the channel dimension as last dimension
video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video]
if torchify:
video = [torch.from_numpy(frame) for frame in video]
return video
def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False):
"""This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if
one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True.
One can specify whether the videos are of the same resolution or not.
"""
assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
video_inputs = []
for i in range(image_processor_tester.batch_size):
if equal_resolution:
width = height = image_processor_tester.max_resolution
else:
width, height = np.random.choice(
np.arange(image_processor_tester.min_resolution, image_processor_tester.max_resolution), 2
)
video = prepare_video(
image_processor_tester=image_processor_tester,
width=width,
height=height,
numpify=numpify,
torchify=torchify,
)
video_inputs.append(video)
return video_inputs
class TvltImageProcessorTester(unittest.TestCase):
def __init__(
self,
parent,
batch_size=7,
num_channels=3,
num_frames=4,
image_size=18,
min_resolution=30,
max_resolution=400,
do_resize=True,
size=None,
do_normalize=True,
image_mean=[0.5, 0.5, 0.5],
image_std=[0.5, 0.5, 0.5],
do_center_crop=True,
crop_size=None,
):
size = size if size is not None else {"shortest_edge": 18}
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
self.parent = parent
self.batch_size = batch_size
self.num_channels = num_channels
self.num_frames = num_frames
self.image_size = image_size
self.min_resolution = min_resolution
self.max_resolution = max_resolution
self.do_resize = do_resize
self.size = size
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
self.do_center_crop = do_center_crop
self.crop_size = crop_size
def prepare_image_processor_dict(self):
return {
"image_mean": self.image_mean,
"image_std": self.image_std,
"do_normalize": self.do_normalize,
"do_resize": self.do_resize,
"size": self.size,
"do_center_crop": self.do_center_crop,
"crop_size": self.crop_size,
}
@require_torch
@require_vision
class TvltImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = TvltImageProcessor if is_vision_available() else None
def setUp(self):
self.image_processor_tester = TvltImageProcessorTester(self)
@property
def image_processor_dict(self):
return self.image_processor_tester.prepare_image_processor_dict()
def test_image_processor_properties(self):
image_processor = self.image_processing_class(**self.image_processor_dict)
self.assertTrue(hasattr(image_processor, "image_mean"))
self.assertTrue(hasattr(image_processor, "image_std"))
self.assertTrue(hasattr(image_processor, "do_normalize"))
self.assertTrue(hasattr(image_processor, "do_resize"))
self.assertTrue(hasattr(image_processor, "do_center_crop"))
self.assertTrue(hasattr(image_processor, "size"))
def test_call_pil(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random PIL videos
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], Image.Image)
# Test not batched input
encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
def test_call_numpy(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random numpy tensors
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], np.ndarray)
# Test not batched input
encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
def test_call_numpy_4_channels(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random numpy tensors
self.image_processor_tester.num_channels = 4
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], np.ndarray)
# Test not batched input
encoded_videos = image_processor(
video_inputs[0], return_tensors="pt", input_data_format="channels_first", image_mean=0, image_std=1
).pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(
video_inputs, return_tensors="pt", input_data_format="channels_first", image_mean=0, image_std=1
).pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
self.image_processor_tester.num_channels = 3
def test_call_pytorch(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random PyTorch tensors
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, torchify=True)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], torch.Tensor)
# Test not batched input
encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)

View File

@@ -1,625 +0,0 @@
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch TVLT model."""
import copy
import inspect
import unittest
import numpy as np
from huggingface_hub import hf_hub_download
from transformers import (
TvltConfig,
is_datasets_available,
is_speech_available,
is_torch_available,
is_vision_available,
)
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
import torch.nn as nn
from transformers import TvltForAudioVisualClassification, TvltForPreTraining, TvltModel
if is_datasets_available():
from datasets import load_dataset
if is_vision_available():
from transformers import TvltImageProcessor
if is_speech_available():
from transformers import TvltFeatureExtractor
class TvltModelTester:
def __init__(
self,
parent,
batch_size=2,
image_size=32,
spectrogram_length=32,
frequency_length=16,
image_patch_size=[2, 2],
audio_patch_size=[2, 2],
num_image_channels=3,
num_audio_channels=1,
num_frames=2,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=128,
hidden_act="gelu",
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
initializer_range=0.02,
layer_norm_eps=1e-12,
qkv_bias=True,
use_mean_pooling=True,
decoder_num_attention_heads=4,
decoder_hidden_size=32,
decoder_num_hidden_layers=2,
decoder_intermediate_size=128,
image_mask_ratio=0.75,
audio_mask_ratio=0.15,
audio_mask_type="frame-level",
task_matching=True,
task_mae=True,
num_labels=1,
is_training=True,
):
self.parent = parent
self.batch_size = batch_size
self.image_size = image_size
self.spectrogram_length = spectrogram_length
self.frequency_length = frequency_length
self.image_patch_size = image_patch_size
self.audio_patch_size = audio_patch_size
self.num_image_channels = num_image_channels
self.num_audio_channels = num_audio_channels
self.num_frames = num_frames
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps
self.qkv_bias = qkv_bias
self.use_mean_pooling = use_mean_pooling
self.decoder_num_attention_heads = decoder_num_attention_heads
self.decoder_hidden_size = decoder_hidden_size
self.decoder_num_hidden_layers = decoder_num_hidden_layers
self.decoder_intermediate_size = decoder_intermediate_size
self.image_mask_ratio = image_mask_ratio
self.audio_mask_ratio = audio_mask_ratio
self.task_matching = task_matching
self.task_mae = task_mae
self.num_labels = num_labels
self.expected_pixel_seq_len = (self.image_size // self.image_patch_size[0]) ** 2 * self.num_frames
self.expected_audio_seq_len = (self.spectrogram_length // self.audio_patch_size[0]) * (
self.frequency_length // self.audio_patch_size[1]
)
# we set the expected sequence length (which is used in several tests)
# this is equal to the seq length of number of image/video patches + number of audio patches
self.expected_seq_len = self.expected_pixel_seq_len + self.expected_audio_seq_len + 1
self.image_mae_output_dim = image_patch_size[0] ** 2 * num_image_channels
self.audio_mae_output_dim = audio_patch_size[0] * audio_patch_size[1] * num_audio_channels
self.is_training = is_training
def prepare_config_and_inputs(self):
pixel_values = floats_tensor(
[self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size]
)
audio_values = floats_tensor(
[self.batch_size, self.num_audio_channels, self.spectrogram_length, self.frequency_length]
)
pixel_mask = floats_tensor([self.batch_size, self.expected_pixel_seq_len])
audio_mask = floats_tensor([self.batch_size, self.expected_audio_seq_len])
config = self.get_config()
return (config, pixel_values, audio_values, pixel_mask, audio_mask)
def prepare_config_and_inputs_for_pretraining(self):
pixel_values = floats_tensor(
[self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size]
)
audio_values = floats_tensor(
[self.batch_size, self.num_audio_channels, self.spectrogram_length, self.frequency_length]
)
pixel_mask = floats_tensor([self.batch_size, self.expected_pixel_seq_len])
audio_mask = floats_tensor([self.batch_size, self.expected_audio_seq_len])
pixel_values_mixed = floats_tensor(
[self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size]
)
pixel_mask_mixed = floats_tensor([self.batch_size, self.expected_pixel_seq_len])
labels = floats_tensor([self.batch_size])
config = self.get_config()
return (
config,
pixel_values,
audio_values,
pixel_mask,
audio_mask,
pixel_values_mixed,
pixel_mask_mixed,
labels,
)
def get_config(self):
return TvltConfig(
image_size=self.image_size,
spectrogram_length=self.spectrogram_length,
frequency_length=self.frequency_length,
image_patch_size=self.image_patch_size,
audio_patch_size=self.audio_patch_size,
num_image_channels=self.num_image_channels,
num_audio_channels=self.num_audio_channels,
num_frames=self.num_frames,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
initializer_range=self.initializer_range,
layer_norm_eps=self.layer_norm_eps,
qkv_bias=self.qkv_bias,
use_mean_pooling=self.use_mean_pooling,
decoder_num_attention_heads=self.decoder_num_attention_heads,
decoder_hidden_size=self.decoder_hidden_size,
decoder_num_hidden_layers=self.decoder_num_hidden_layers,
decoder_intermediate_size=self.decoder_intermediate_size,
image_mask_ratio=self.image_mask_ratio,
audio_mask_ratio=self.audio_mask_ratio,
task_matching=self.task_matching,
task_mae=self.task_mae,
num_labels=self.num_labels,
)
def create_and_check_model(self, config, pixel_values, audio_values, pixel_mask, audio_mask):
model = TvltModel(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values, audio_values, pixel_mask=pixel_mask, audio_mask=audio_mask)
result = model(pixel_values, audio_values)
self.parent.assertEqual(
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_len, self.hidden_size)
)
def create_and_check_for_audiovisual_classification(
self, config, pixel_values, audio_values, pixel_mask, audio_mask
):
model = TvltForAudioVisualClassification(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values, audio_values, pixel_mask=pixel_mask, audio_mask=audio_mask)
result = model(pixel_values, audio_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def create_and_check_for_pretraining(
self,
config,
pixel_values,
audio_values,
pixel_mask,
audio_mask,
pixel_values_mixed,
pixel_mask_mixed,
labels,
):
model = TvltForPreTraining(config=config)
model.to(torch_device)
model.train()
result = model(
pixel_values,
audio_values,
pixel_mask,
audio_mask,
pixel_values_mixed=pixel_values_mixed,
pixel_mask_mixed=pixel_mask_mixed,
labels=labels,
)
self.parent.assertEqual(
result.pixel_logits.shape, (self.batch_size, self.expected_pixel_seq_len, self.image_mae_output_dim)
)
self.parent.assertEqual(
result.audio_logits.shape, (self.batch_size, self.expected_audio_seq_len, self.audio_mae_output_dim)
)
self.parent.assertEqual(result.matching_logits.shape, (self.batch_size, self.num_labels))
def create_and_check_for_pretraining_inference(
self,
config,
pixel_values,
audio_values,
pixel_mask,
audio_mask,
pixel_values_mixed,
pixel_mask_mixed,
labels,
):
model = TvltForPreTraining(config=config)
model.to(torch_device)
model.eval()
result = model(
pixel_values,
audio_values,
pixel_mask,
audio_mask,
pixel_values_mixed=pixel_values_mixed,
pixel_mask_mixed=pixel_mask_mixed,
labels=labels,
)
if result.pixel_logits is not None:
self.parent.assertEqual(
result.pixel_logits.shape, (self.batch_size, self.expected_pixel_seq_len, self.image_mae_output_dim)
)
if result.audio_logits is not None:
self.parent.assertEqual(
result.audio_logits.shape, (self.batch_size, self.expected_audio_seq_len, self.audio_mae_output_dim)
)
self.parent.assertEqual(result.matching_logits.shape, (self.batch_size, self.num_labels))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, pixel_values, audio_values, pixel_mask, audio_mask) = config_and_inputs
inputs_dict = {
"pixel_values": pixel_values,
"audio_values": audio_values,
"pixel_mask": pixel_mask,
"audio_mask": audio_mask,
}
return config, inputs_dict
def prepare_pixel_values(self):
return floats_tensor(
[self.batch_size, self.num_frames, self.num_image_channels, self.image_size, self.image_size]
)
def prepare_audio_values(self):
return floats_tensor(
[self.batch_size, self.num_audio_channels, self.spectrogram_length, self.frequency_length]
)
@require_torch
class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(TvltModel, TvltForPreTraining, TvltForAudioVisualClassification) if is_torch_available() else ()
)
pipeline_model_mapping = {"feature-extraction": TvltModel} if is_torch_available() else {}
fx_compatible = False
test_pruning = False
test_headmasking = False
test_torchscript = False
test_resize_embeddings = False
main_input_name = "pixel_values"
# TvltForAudioVisualClassification and TvltForPreTraining require special treatment
def _prepare_for_class(self, inputs_dict, model_class, return_labels=True):
inputs_dict = copy.deepcopy(inputs_dict)
if return_labels:
if model_class.__name__ == "TvltForAudioVisualClassification":
inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size,), dtype=torch.long, device=torch_device
)
elif model_class.__name__ == "TvltForPreTraining":
inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size,), dtype=torch.float, device=torch_device
)
inputs_dict["pixel_values_mixed"] = torch.zeros(
(
self.model_tester.batch_size,
self.model_tester.num_frames,
self.model_tester.num_image_channels,
self.model_tester.image_size,
self.model_tester.image_size,
),
dtype=torch.float,
device=torch_device,
)
inputs_dict["pixel_mask_mixed"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.expected_pixel_seq_len),
dtype=torch.float,
device=torch_device,
)
return inputs_dict
def setUp(self):
self.model_tester = TvltModelTester(self)
self.config_tester = ConfigTester(self, config_class=TvltConfig, has_text_modality=False, hidden_size=37)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="TVLT does not use inputs_embeds")
def test_inputs_embeds(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
input_embeddings = model.get_input_embeddings()
self.assertIsInstance(input_embeddings, (tuple))
for embedding in input_embeddings:
self.assertIsInstance(embedding, (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, nn.Linear))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
signature = inspect.signature(model.forward)
# signature.parameters is an OrderedDict => so arg_names order is deterministic
arg_names = [*signature.parameters.keys()]
expected_arg_names = ["pixel_values", "audio_values"]
self.assertListEqual(arg_names[:2], expected_arg_names)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_for_audiovisual_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_audiovisual_classification(*config_and_inputs)
def test_for_pretraining(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_pretraining()
self.model_tester.create_and_check_for_pretraining(*config_and_inputs)
self.model_tester.create_and_check_for_pretraining_inference(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
model_name = "ZinengTang/tvlt-base"
model = TvltModel.from_pretrained(model_name)
self.assertIsNotNone(model)
def test_training(self):
if not self.model_tester.is_training:
return
for model_class in self.all_model_classes[1:]:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.train()
inputs = self._prepare_for_class(inputs_dict, model_class)
for k, v in inputs.items():
print(k, v.shape)
loss = model(**inputs).loss
loss.backward()
def test_training_gradient_checkpointing(self):
if not self.model_tester.is_training:
return
for model_class in self.all_model_classes[1:]:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.use_cache = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.gradient_checkpointing_enable()
model.train()
inputs = self._prepare_for_class(inputs_dict, model_class)
loss = model(**inputs).loss
loss.backward()
def test_attention_outputs(self):
if not self.has_attentions:
pass
else:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
for model_class in self.all_model_classes[2:]:
seq_len = self.model_tester.expected_seq_len
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(out_len + 1, len(outputs))
self_attentions = outputs.attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = self.model_tester.num_hidden_layers + 1
self.assertEqual(len(hidden_states), expected_num_layers)
seq_length = self.model_tester.expected_seq_len
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes[2:]:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
# We will verify our results on a video of eating spaghetti
# Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227]
def prepare_video(num_frames=8):
file = hf_hub_download(
repo_id="hf-internal-testing/spaghetti-video", filename="eating_spaghetti.npy", repo_type="dataset"
)
video = np.load(file)[:num_frames]
return list(video)
def prepare_audio(num_samples=1):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
return [x["array"] for x in speech_samples]
@require_torch
@require_vision
class TvltModelIntegrationTest(unittest.TestCase):
@cached_property
def default_processors(self):
# logits were tested with a different mean and std, so we use the same here
return (
TvltImageProcessor() if is_vision_available() else None,
TvltFeatureExtractor(),
)
def test_inference_for_base_model(self):
model = TvltModel.from_pretrained("ZinengTang/tvlt-base").to(torch_device)
image_processor, audio_feature_extractor = self.default_processors
video = prepare_video()
audio = prepare_audio()
video_inputs = image_processor(video, return_tensors="pt").to(torch_device)
audio_inputs = audio_feature_extractor(audio, return_tensors="pt").to(torch_device)
inputs = {}
inputs.update(video_inputs)
inputs.update(audio_inputs)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
# verify the logits
expected_last_hidden_state_slice = torch.tensor([[-0.0186, -0.0691], [0.0242, -0.0398]], device=torch_device)
self.assertTrue(
torch.allclose(outputs.last_hidden_state[:, :2, :2], expected_last_hidden_state_slice, atol=1e-4)
)
def test_inference_for_pretraining(self):
model = TvltForPreTraining.from_pretrained("ZinengTang/tvlt-base").to(torch_device)
image_processor, audio_feature_extractor = self.default_processors
video = prepare_video()
video_mixed = prepare_video()
audio = prepare_audio()
video_inputs = image_processor(video, return_tensors="pt", mask_pixel=True).to(torch_device)
video_mixed_inputs = image_processor(video_mixed, is_mixed=True, return_tensors="pt").to(torch_device)
audio_inputs = audio_feature_extractor(audio, return_tensors="pt", mask_audio=True).to(torch_device)
labels = torch.tensor([[0.0]], device=torch_device)
inputs = {}
inputs.update(video_inputs)
inputs.update(video_mixed_inputs)
inputs.update(audio_inputs)
inputs.update({"labels": labels})
# forward pass
with torch.no_grad():
outputs = model(**inputs)
# verify the logits
expected_pixel_logits_shape = torch.Size([1, 1568, 768])
expected_audio_logits_shape = torch.Size([1, 96, 256])
expected_matching_logits_shape = torch.Size([1, 1])
if outputs.pixel_logits is not None:
self.assertEqual(outputs.pixel_logits.shape, expected_pixel_logits_shape)
if outputs.audio_logits is not None:
self.assertEqual(outputs.audio_logits.shape, expected_audio_logits_shape)
self.assertTrue(outputs.matching_logits.shape, expected_matching_logits_shape)

View File

@@ -1,116 +0,0 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
import tempfile
import unittest
import numpy as np
import pytest
from transformers import is_speech_available, is_vision_available
from transformers.testing_utils import require_torch
if is_vision_available():
from transformers import TvltImageProcessor
if is_speech_available():
from transformers import TvltFeatureExtractor
from transformers import TvltProcessor
@require_torch
class TvltProcessorTest(unittest.TestCase):
def setUp(self):
self.checkpoint = "ZinengTang/tvlt-base"
self.tmpdirname = tempfile.mkdtemp()
def get_image_processor(self, **kwargs):
return TvltImageProcessor.from_pretrained(self.checkpoint, **kwargs)
def get_feature_extractor(self, **kwargs):
return TvltFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
def tearDown(self):
shutil.rmtree(self.tmpdirname)
def test_save_load_pretrained_default(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
processor.save_pretrained(self.tmpdirname)
processor = TvltProcessor.from_pretrained(self.tmpdirname)
self.assertIsInstance(processor.feature_extractor, TvltFeatureExtractor)
self.assertIsInstance(processor.image_processor, TvltImageProcessor)
def test_feature_extractor(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
audio = np.ones([12000])
audio_dict = feature_extractor(audio, return_tensors="np")
input_processor = processor(audio=audio, return_tensors="np")
for key in audio_dict.keys():
self.assertAlmostEqual(audio_dict[key].sum(), input_processor[key].sum(), delta=1e-2)
def test_image_processor(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
images = np.ones([3, 224, 224])
image_dict = image_processor(images, return_tensors="np")
input_processor = processor(images=images, return_tensors="np")
for key in image_dict.keys():
self.assertAlmostEqual(image_dict[key].sum(), input_processor[key].sum(), delta=1e-2)
def test_processor(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
audio = np.ones([12000])
images = np.ones([3, 224, 224])
inputs = processor(audio=audio, images=images)
self.assertListEqual(list(inputs.keys()), ["audio_values", "audio_mask", "pixel_values", "pixel_mask"])
# test if it raises when no input is passed
with pytest.raises(ValueError):
processor()
def test_model_input_names(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
self.assertListEqual(
processor.model_input_names,
image_processor.model_input_names + feature_extractor.model_input_names,
msg="`processor` and `image_processor`+`feature_extractor` model input names do not match",
)

View File

@@ -1,281 +0,0 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch ViT Hybrid model."""
import unittest
from transformers import ViTHybridConfig
from transformers.testing_utils import is_flaky, require_accelerate, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
from torch import nn
from transformers import ViTHybridForImageClassification, ViTHybridImageProcessor, ViTHybridModel
if is_vision_available():
from PIL import Image
class ViTHybridModelTester:
def __init__(
self,
parent,
batch_size=13,
image_size=64,
patch_size=2,
num_channels=3,
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
type_sequence_label_size=10,
initializer_range=0.02,
backbone_featmap_shape=[1, 16, 4, 4],
scope=None,
attn_implementation="eager",
):
self.parent = parent
self.batch_size = batch_size
self.image_size = image_size
self.patch_size = patch_size
self.num_channels = num_channels
self.is_training = is_training
self.use_labels = use_labels
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.scope = scope
self.backbone_featmap_shape = backbone_featmap_shape
self.attn_implementation = attn_implementation
# in ViT hybrid, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
# the number of patches is based on the feature map of the backbone, which by default uses an output stride
# of 32, which means that the feature map has a spatial resolution of 1/32 of the input image size
num_patches = (self.image_size // 32) ** 2
self.seq_length = num_patches + 1
def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
labels = None
if self.use_labels:
labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
config = self.get_config()
return config, pixel_values, labels
def get_config(self):
backbone_config = {
"global_padding": "same",
"layer_type": "bottleneck",
"depths": [3, 4, 9],
"out_features": ["stage1", "stage2", "stage3"],
"embedding_dynamic_padding": True,
"hidden_sizes": [4, 8, 16, 32],
"num_groups": 2,
}
return ViTHybridConfig(
image_size=self.image_size,
patch_size=self.patch_size,
num_channels=self.num_channels,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
is_decoder=False,
initializer_range=self.initializer_range,
backbone_featmap_shape=self.backbone_featmap_shape,
backbone_config=backbone_config,
backbone=None,
attn_implementation=self.attn_implementation,
)
def create_and_check_model(self, config, pixel_values, labels):
model = ViTHybridModel(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size
model = ViTHybridForImageClassification(config)
model.to(torch_device)
model.eval()
result = model(pixel_values, labels=labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, pixel_values, labels = config_and_inputs
inputs_dict = {"pixel_values": pixel_values}
return config, inputs_dict
@require_torch
class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
"""
Here we also overwrite some of the tests of test_modeling_common.py, as ViT does not use input_ids, inputs_embeds,
attention_mask and seq_length.
"""
all_model_classes = (ViTHybridModel, ViTHybridForImageClassification) if is_torch_available() else ()
pipeline_model_mapping = (
{"image-feature-extraction": ViTHybridModel, "image-classification": ViTHybridForImageClassification}
if is_torch_available()
else {}
)
test_pruning = False
test_resize_embeddings = False
test_head_masking = False
model_split_percents = [0.5, 0.9]
def setUp(self):
self.model_tester = ViTHybridModelTester(self)
self.config_tester = ConfigTester(self, config_class=ViTHybridConfig, has_text_modality=False, hidden_size=37)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="ViT does not use inputs_embeds")
def test_inputs_embeds(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, nn.Linear))
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
# Skip the check for the backbone
for name, module in model.named_modules():
if module.__class__.__name__ == "ViTHybridPatchEmbeddings":
backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()]
break
for name, param in model.named_parameters():
if param.requires_grad:
if name in backbone_params:
continue
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
@slow
def test_model_from_pretrained(self):
model_name = "google/vit-hybrid-base-bit-384"
model = ViTHybridModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
def test_batching_equivalence(self):
super().test_batching_equivalence()
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@require_torch
@require_vision
class ViTModelIntegrationTest(unittest.TestCase):
@cached_property
def default_image_processor(self):
return (
ViTHybridImageProcessor.from_pretrained("google/vit-hybrid-base-bit-384")
if is_vision_available()
else None
)
@slow
def test_inference_image_classification_head(self):
model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384").to(torch_device)
image_processor = self.default_image_processor
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
# verify the logits
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-1.9090, -0.4993, -0.2389]).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))
@slow
@require_accelerate
def test_accelerate_inference(self):
image_processor = ViTHybridImageProcessor.from_pretrained("google/vit-hybrid-base-bit-384")
model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", device_map="auto")
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
self.assertTrue(model.config.id2label[predicted_class_idx], "tabby, tabby cat")

View File

@@ -1,150 +0,0 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team, The Microsoft Research team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import is_torch_available
from transformers.testing_utils import require_torch, slow, torch_device
if is_torch_available():
import torch
from transformers import XLMProphetNetForConditionalGeneration, XLMProphetNetTokenizer
@require_torch
class XLMProphetNetModelIntegrationTest(unittest.TestCase):
@slow
def test_pretrained_checkpoint_hidden_states(self):
model = XLMProphetNetForConditionalGeneration.from_pretrained("microsoft/xprophetnet-large-wiki100-cased")
model.to(torch_device)
# encoder-decoder outputs
encoder_ids = torch.tensor([[17, 96208, 103471, 2]]).to(torch_device)
decoder_prev_ids = torch.tensor(
[[2, 250, 9953, 34, 69489, 1620, 32, 118424, 624, 210, 105, 2913, 1032, 351]]
).to(torch_device)
output = model(
input_ids=encoder_ids, attention_mask=None, encoder_outputs=None, decoder_input_ids=decoder_prev_ids
)
output_predited_logis = output[0]
expected_shape = torch.Size((1, 14, 250012))
self.assertEqual(output_predited_logis.shape, expected_shape)
expected_slice = torch.tensor(
[[[-6.3986, -8.2391, 12.5189], [-6.3289, -8.0864, 12.6211], [-6.2418, -8.0445, 12.7968]]]
).to(torch_device)
self.assertTrue(torch.allclose(output_predited_logis[:, :3, :3], expected_slice, atol=1e-4))
# encoder outputs
encoder_outputs = model.prophetnet.encoder(encoder_ids)[0]
expected_encoder_outputs_slice = torch.tensor(
[[[-1.4260, -0.7628, 0.8453], [-1.4719, -0.1391, 0.7807], [-1.7678, 0.0114, 0.4646]]]
).to(torch_device)
expected_shape_encoder = torch.Size((1, 4, 1024))
self.assertEqual(encoder_outputs.shape, expected_shape_encoder)
self.assertTrue(torch.allclose(encoder_outputs[:, :3, :3], expected_encoder_outputs_slice, atol=1e-4))
# decoder outputs
decoder_outputs = model.prophetnet.decoder(
decoder_prev_ids,
encoder_hidden_states=encoder_outputs,
)
predicting_streams = decoder_outputs[1].view(1, model.config.ngram, 14, -1)
predicting_streams_logits = model.lm_head(predicting_streams)
next_first_stream_logits = predicting_streams_logits[:, 0]
self.assertTrue(torch.allclose(next_first_stream_logits[:, :3, :3], expected_slice, atol=1e-4))
@slow
def test_ntg_hidden_states(self):
model = XLMProphetNetForConditionalGeneration.from_pretrained(
"microsoft/xprophetnet-large-wiki100-cased-xglue-ntg"
)
model.to(torch_device)
encoder_ids = torch.tensor([[17, 96208, 103471, 2]]).to(torch_device)
decoder_prev_ids = torch.tensor(
[[2, 250, 9953, 34, 69489, 1620, 32, 118424, 624, 210, 105, 2913, 1032, 351]]
).to(torch_device)
output = model(
input_ids=encoder_ids, attention_mask=None, encoder_outputs=None, decoder_input_ids=decoder_prev_ids
)
output_predited_logis = output[0]
expected_shape = torch.Size((1, 14, 250012))
self.assertEqual(output_predited_logis.shape, expected_shape)
# compare the actual values for a slice.
expected_slice = torch.tensor(
[[[-9.2253, -9.7173, -6.3529], [-7.6701, -9.0145, -1.9382], [-8.0195, -7.0004, -0.1523]]]
).to(torch_device)
self.assertTrue(torch.allclose(output_predited_logis[:, :3, :3], expected_slice, atol=1e-4))
@slow
def test_xprophetnet_ntg_inference(self):
model = XLMProphetNetForConditionalGeneration.from_pretrained(
"microsoft/xprophetnet-large-wiki100-cased-xglue-ntg"
)
model.to(torch_device)
model.config.max_length = 512
tokenizer = XLMProphetNetTokenizer.from_pretrained("microsoft/xprophetnet-large-wiki100-cased-xglue-ntg")
EN_SENTENCE = (
"Microsoft Corporation intends to officially end free support for the Windows 7 operating system after"
" January 14, 2020, according to the official portal of the organization. From that day, users of this"
" system will not be able to receive security updates, which could make their computers vulnerable to"
" cyber attacks."
)
RU_SENTENCE = (
"орпорация Microsoft намерена официально прекратить бесплатную поддержку операционной системы Windows 7"
" после 14 января 2020 года, сообщается на официальном портале организации . С указанного дня пользователи"
" этой системы не смогут получать обновления безопасности, из-за чего их компьютеры могут стать уязвимыми"
" к кибератакам."
)
ZH_SENTENCE = "根据该组织的官方门户网站微软公司打算在2020年1月14日之后正式终止对Windows 7操作系统的免费支持。从那时起该系统的用户将无法接收安全更新这可能会使他们的计算机容易受到网络攻击。"
input_ids = tokenizer(
[EN_SENTENCE, RU_SENTENCE, ZH_SENTENCE], padding=True, max_length=255, return_tensors="pt"
).input_ids
input_ids = input_ids.to(torch_device)
summary_ids = model.generate(
input_ids, num_beams=10, length_penalty=1.0, no_repeat_ngram_size=3, early_stopping=True
)
generated_titles = [tokenizer.decode(g, skip_special_tokens=True) for g in summary_ids]
EXPECTED_TITLE_EN = "Microsoft to end Windows 7 free support after January 14, 2020"
EXPECTED_TITLE_RU = "Microsoft намерена прекратить бесплатную поддержку Windows 7 после 14 января 2020 года"
EXPECTED_TITLE_ZH = "微软打算终止对Windows 7操作系统的免费支持"
self.assertListEqual(
[EXPECTED_TITLE_EN, EXPECTED_TITLE_RU, EXPECTED_TITLE_ZH],
generated_titles,
)
summary_ids_beam1 = model.generate(
input_ids, num_beams=1, length_penalty=1.0, no_repeat_ngram_size=3, early_stopping=True
)
generated_titles_beam1_tok = [
tokenizer.convert_ids_to_tokens(g, skip_special_tokens=True) for g in summary_ids_beam1
]
EXPECTED_TITLE_EN_BEAM1_TOK = "▁Microsoft ▁to ▁end ▁free ▁support ▁for ▁Windows ▁7".split(" ")
EXPECTED_TITLE_RU_BEAM1_TOK = "▁Microsoft ▁намерен а ▁прекрати ть ▁бес плат ную ▁поддержку ▁Windows ▁7 ▁после ▁14 ▁января ▁2020 ▁года".split(
" "
)
EXPECTED_TITLE_ZH_BEAM1_TOK = "微软 公司 打算 终止 对 Windows ▁7 操作 系统的 免费 支持".split(" ")
self.assertListEqual(
[EXPECTED_TITLE_EN_BEAM1_TOK, EXPECTED_TITLE_RU_BEAM1_TOK, EXPECTED_TITLE_ZH_BEAM1_TOK],
generated_titles_beam1_tok,
)

View File

@@ -1,154 +0,0 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team, The Microsoft Research team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers.models.xlm_prophetnet.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer
from transformers.testing_utils import get_tests_dir, require_sentencepiece, slow
from transformers.utils import cached_property
from ...test_tokenization_common import TokenizerTesterMixin
SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
@require_sentencepiece
class XLMProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
from_pretrained_id = "microsoft/xprophetnet-large-wiki100-cased"
tokenizer_class = XLMProphetNetTokenizer
test_rust_tokenizer = False
test_sentencepiece = True
def setUp(self):
super().setUp()
# We have a SentencePiece fixture for testing
tokenizer = XLMProphetNetTokenizer(SAMPLE_VOCAB, keep_accents=True)
tokenizer.save_pretrained(self.tmpdirname)
def test_convert_token_and_id(self):
"""Test ``_convert_token_to_id`` and ``_convert_id_to_token``."""
token = "[PAD]"
token_id = 0
self.assertEqual(self.get_tokenizer()._convert_token_to_id(token), token_id)
self.assertEqual(self.get_tokenizer()._convert_id_to_token(token_id), token)
def test_get_vocab(self):
vocab_keys = list(self.get_tokenizer().get_vocab().keys())
self.assertEqual(vocab_keys[0], "[PAD]")
self.assertEqual(vocab_keys[1], "[CLS]")
self.assertEqual(vocab_keys[-1], "j")
self.assertEqual(len(vocab_keys), 1_012)
def test_vocab_size(self):
self.assertEqual(self.get_tokenizer().vocab_size, 1_012)
def test_full_tokenizer(self):
tokenizer = XLMProphetNetTokenizer(SAMPLE_VOCAB, keep_accents=True)
tokens = tokenizer.tokenize("This is a test")
self.assertListEqual(tokens, ["▁This", "▁is", "▁a", "▁t", "est"])
self.assertListEqual(
tokenizer.convert_tokens_to_ids(tokens),
[value + tokenizer.fairseq_offset for value in [285, 46, 10, 170, 382]],
)
tokens = tokenizer.tokenize("I was born in 92000, and this is falsé.")
self.assertListEqual(
tokens,
[
SPIECE_UNDERLINE + "I",
SPIECE_UNDERLINE + "was",
SPIECE_UNDERLINE + "b",
"or",
"n",
SPIECE_UNDERLINE + "in",
SPIECE_UNDERLINE + "",
"9",
"2",
"0",
"0",
"0",
",",
SPIECE_UNDERLINE + "and",
SPIECE_UNDERLINE + "this",
SPIECE_UNDERLINE + "is",
SPIECE_UNDERLINE + "f",
"al",
"s",
"é",
".",
],
)
ids = tokenizer.convert_tokens_to_ids(tokens)
self.assertListEqual(
ids,
[
value + tokenizer.fairseq_offset
for value in [8, 21, 84, 55, 24, 19, 7, -9, 602, 347, 347, 347, 3, 12, 66, 46, 72, 80, 6, -9, 4]
],
)
back_tokens = tokenizer.convert_ids_to_tokens(ids)
self.assertListEqual(
back_tokens,
[
SPIECE_UNDERLINE + "I",
SPIECE_UNDERLINE + "was",
SPIECE_UNDERLINE + "b",
"or",
"n",
SPIECE_UNDERLINE + "in",
SPIECE_UNDERLINE + "",
"[UNK]",
"2",
"0",
"0",
"0",
",",
SPIECE_UNDERLINE + "and",
SPIECE_UNDERLINE + "this",
SPIECE_UNDERLINE + "is",
SPIECE_UNDERLINE + "f",
"al",
"s",
"[UNK]",
".",
],
)
@cached_property
def big_tokenizer(self):
return XLMProphetNetTokenizer.from_pretrained("microsoft/xprophetnet-large-wiki100-cased")
@slow
def test_tokenization_base_easy_symbols(self):
symbols = "Hello World!"
original_tokenizer_encodings = [35389, 6672, 49, 2]
self.assertListEqual(original_tokenizer_encodings, self.big_tokenizer.encode(symbols))
@slow
def test_tokenizer_integration(self):
expected_encoding = {'input_ids': [[11073, 82783, 18, 26, 82783, 549, 51540, 248, 17209, 1301, 217, 20, 215186, 1325, 147, 17209, 1301, 217, 20, 56370, 53, 122020, 20, 16477, 27, 87355, 4548, 20, 4728, 78392, 17, 159969, 18, 26, 24491, 629, 15, 538, 22704, 5439, 15, 2788, 24491, 9885, 15, 43534, 605, 15, 814, 18403, 33200, 29, 15, 43534, 24458, 12410, 111, 24966, 83669, 9637, 144068, 26, 850, 22346, 27, 147, 24966, 83669, 83490, 26, 39113, 735, 27, 689, 656, 2800, 1339, 4600, 53, 122020, 115785, 34, 816, 1339, 46887, 18, 147, 53905, 1951, 42238, 41170, 17732, 834, 436, 15, 27523, 98733, 217, 147, 5542, 4981, 930, 17347, 16, 2], [20091, 629, 94, 82786, 58, 490, 20, 1528, 84, 53905, 344, 80592, 110128, 18822, 5267, 1306, 62, 152537, 308, 7997, 401, 124427, 549, 35442, 225, 109, 15055, 25748, 147, 7119, 43712, 34, 767, 135366, 18, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [592, 63784, 119466, 17, 147808, 88214, 18, 656, 81, 32, 3296, 10280, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]} # fmt: skip
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="microsoft/xprophetnet-large-wiki100-cased",
revision="1acad1643ddd54a44df6a1b797ada8373685d90e",
)