Auto feature extractor (#11097)
* AutoFeatureExtractor * Init and first tests * Tests * Damn you gitignore * Quality * Defensive test for when not all backends are here * Use pattern for Speech2Text models
This commit is contained in:
3
tests/fixtures/dummy_feature_extractor_config.json
vendored
Normal file
3
tests/fixtures/dummy_feature_extractor_config.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"feature_extractor_type": "Wav2Vec2FeatureExtractor"
|
||||
}
|
||||
44
tests/test_feature_extraction_auto.py
Normal file
44
tests/test_feature_extraction_auto.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 the HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from transformers import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor, Wav2Vec2FeatureExtractor
|
||||
|
||||
|
||||
SAMPLE_FEATURE_EXTRACTION_CONFIG = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "fixtures/dummy_feature_extractor_config.json"
|
||||
)
|
||||
|
||||
|
||||
class AutoFeatureExtractorTest(unittest.TestCase):
|
||||
def test_feature_extractor_from_model_shortcut(self):
|
||||
config = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
|
||||
self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
|
||||
|
||||
def test_feature_extractor_from_local_file(self):
|
||||
config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG)
|
||||
self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
|
||||
|
||||
def test_pattern_matching_fallback(self):
|
||||
"""
|
||||
In cases where config.json doesn't include a model_type,
|
||||
perform a few safety checks on the config mapping's order.
|
||||
"""
|
||||
# no key string should be included in a later key string (typical failure case)
|
||||
keys = list(FEATURE_EXTRACTOR_MAPPING.keys())
|
||||
for i, key in enumerate(keys):
|
||||
self.assertFalse(any(key in later_key for later_key in keys[i + 1 :]))
|
||||
@@ -20,12 +20,15 @@ import unittest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers import Speech2TextFeatureExtractor
|
||||
from transformers import is_speech_available
|
||||
from transformers.testing_utils import require_torch, require_torchaudio
|
||||
|
||||
from .test_sequence_feature_extraction_common import SequenceFeatureExtractionTestMixin
|
||||
|
||||
|
||||
if is_speech_available():
|
||||
from transformers import Speech2TextFeatureExtractor
|
||||
|
||||
global_rng = random.Random()
|
||||
|
||||
|
||||
@@ -101,7 +104,7 @@ class Speech2TextFeatureExtractionTester(unittest.TestCase):
|
||||
@require_torchaudio
|
||||
class Speech2TextFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.TestCase):
|
||||
|
||||
feature_extraction_class = Speech2TextFeatureExtractor
|
||||
feature_extraction_class = Speech2TextFeatureExtractor if is_speech_available() else None
|
||||
|
||||
def setUp(self):
|
||||
self.feat_extract_tester = Speech2TextFeatureExtractionTester(self)
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
from pathlib import Path
|
||||
from shutil import copyfile
|
||||
|
||||
from transformers import Speech2TextFeatureExtractor, Speech2TextProcessor, Speech2TextTokenizer
|
||||
from transformers import Speech2TextTokenizer, is_speech_available
|
||||
from transformers.file_utils import FEATURE_EXTRACTOR_NAME
|
||||
from transformers.models.speech_to_text.tokenization_speech_to_text import VOCAB_FILES_NAMES, save_json
|
||||
from transformers.testing_utils import require_sentencepiece, require_torch, require_torchaudio
|
||||
@@ -27,6 +27,10 @@ from transformers.testing_utils import require_sentencepiece, require_torch, req
|
||||
from .test_feature_extraction_speech_to_text import floats_list
|
||||
|
||||
|
||||
if is_speech_available():
|
||||
from transformers import Speech2TextFeatureExtractor, Speech2TextProcessor
|
||||
|
||||
|
||||
SAMPLE_SP = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user