[AutoProcessor] Correct AutoProcessor and automatically add processor… (#14881)
* [AutoProcessor] Correct AutoProcessor and automatically add processor class * up * up * up * up * up * up * up * up * continue tomorrow * up * up * up * make processor class private * fix loop
This commit is contained in:
committed by
GitHub
parent
d7d60df0ec
commit
a1392883ce
@@ -13,13 +13,15 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from shutil import copyfile
|
||||
|
||||
from transformers import AutoProcessor, Wav2Vec2Config, Wav2Vec2Processor
|
||||
from transformers import AutoProcessor, AutoTokenizer, Wav2Vec2Config, Wav2Vec2FeatureExtractor, Wav2Vec2Processor
|
||||
from transformers.file_utils import FEATURE_EXTRACTOR_NAME
|
||||
from transformers.tokenization_utils import TOKENIZER_CONFIG_FILE
|
||||
|
||||
|
||||
SAMPLE_PROCESSOR_CONFIG = os.path.join(
|
||||
@@ -55,3 +57,61 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
||||
processor = AutoProcessor.from_pretrained(tmpdirname)
|
||||
|
||||
self.assertIsInstance(processor, Wav2Vec2Processor)
|
||||
|
||||
def test_processor_from_feat_extr_processor_class(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
feature_extractor = Wav2Vec2FeatureExtractor()
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
||||
|
||||
processor = Wav2Vec2Processor(feature_extractor, tokenizer)
|
||||
|
||||
# save in new folder
|
||||
processor.save_pretrained(tmpdirname)
|
||||
|
||||
# drop `processor_class` in tokenizer
|
||||
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
config_dict.pop("processor_class")
|
||||
|
||||
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "w") as f:
|
||||
f.write(json.dumps(config_dict))
|
||||
|
||||
processor = AutoProcessor.from_pretrained(tmpdirname)
|
||||
|
||||
self.assertIsInstance(processor, Wav2Vec2Processor)
|
||||
|
||||
def test_processor_from_tokenizer_processor_class(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
feature_extractor = Wav2Vec2FeatureExtractor()
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
||||
|
||||
processor = Wav2Vec2Processor(feature_extractor, tokenizer)
|
||||
|
||||
# save in new folder
|
||||
processor.save_pretrained(tmpdirname)
|
||||
|
||||
# drop `processor_class` in feature extractor
|
||||
with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
config_dict.pop("processor_class")
|
||||
|
||||
with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "w") as f:
|
||||
f.write(json.dumps(config_dict))
|
||||
|
||||
processor = AutoProcessor.from_pretrained(tmpdirname)
|
||||
|
||||
self.assertIsInstance(processor, Wav2Vec2Processor)
|
||||
|
||||
def test_processor_from_local_directory_from_model_config(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model_config = Wav2Vec2Config(processor_class="Wav2Vec2Processor")
|
||||
model_config.save_pretrained(tmpdirname)
|
||||
# copy relevant files
|
||||
copyfile(SAMPLE_VOCAB, os.path.join(tmpdirname, "vocab.json"))
|
||||
# create emtpy sample processor
|
||||
with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "w") as f:
|
||||
f.write("{}")
|
||||
|
||||
processor = AutoProcessor.from_pretrained(tmpdirname)
|
||||
|
||||
self.assertIsInstance(processor, Wav2Vec2Processor)
|
||||
|
||||
Reference in New Issue
Block a user