[Json configs] Make json prettier for all saved tokenizer files & ensure same json format for all processors (tok + feat_extract) (#17457)

* [Json dump] Make json prettier

* correct more tokenizeirs

* more patterns

* add aggressive test

* the aggressive test was actually useful :-)

* more tests

* Apply suggestions from code review
This commit is contained in:
Patrick von Platen
2022-05-31 17:07:30 +02:00
committed by GitHub
parent 6ee1474b67
commit f394a2a50d
22 changed files with 52 additions and 24 deletions

View File

@@ -25,7 +25,7 @@ from pathlib import Path
from huggingface_hub import Repository, delete_repo, login
from requests.exceptions import HTTPError
from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
from transformers.testing_utils import PASS, USER, get_tests_dir, is_staging_test
from transformers.testing_utils import PASS, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test
from transformers.utils import is_torch_available, is_vision_available
@@ -107,7 +107,8 @@ class FeatureExtractionSavingTestMixin:
feat_extract_first = self.feature_extraction_class(**self.feat_extract_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
feat_extract_first.save_pretrained(tmpdirname)
saved_file = feat_extract_first.save_pretrained(tmpdirname)[0]
check_json_file_has_correct_format(saved_file)
feat_extract_second = self.feature_extraction_class.from_pretrained(tmpdirname)
self.assertEqual(feat_extract_second.to_dict(), feat_extract_first.to_dict())