Fix tokenizer load from one file (#19073)

* Fix tokenizer load from one file

* Add a test

* Style

Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
Sylvain Gugger
2022-09-16 16:11:47 -04:00
committed by GitHub
parent 773314ab80
commit 9017ba4ca4
2 changed files with 13 additions and 0 deletions

View File

@@ -1726,6 +1726,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
for file_id, file_path in vocab_files.items():
if file_path is None:
resolved_vocab_files[file_id] = None
elif os.path.isfile(file_path):
resolved_vocab_files[file_id] = file_path
elif is_remote_url(file_path):
resolved_vocab_files[file_id] = download_url(file_path, proxies=proxies)
else: