Change default cache path (#8734)
* Change default cache path * Document changes * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
@@ -70,15 +70,15 @@ to check 🤗 Transformers is properly installed.
|
|||||||
|
|
||||||
This library provides pretrained models that will be downloaded and cached locally. Unless you specify a location with
|
This library provides pretrained models that will be downloaded and cached locally. Unless you specify a location with
|
||||||
`cache_dir=...` when you use methods like `from_pretrained`, these models will automatically be downloaded in the
|
`cache_dir=...` when you use methods like `from_pretrained`, these models will automatically be downloaded in the
|
||||||
folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the PyTorch
|
folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the Hugging
|
||||||
cache home followed by ``/transformers/`` (even if you don't have PyTorch installed). This is (by order of priority):
|
Face cache home followed by ``/transformers/``. This is (by order of priority):
|
||||||
|
|
||||||
* shell environment variable ``TORCH_HOME``
|
* shell environment variable ``HF_HOME``
|
||||||
* shell environment variable ``XDG_CACHE_HOME`` + ``/torch/``
|
* shell environment variable ``XDG_CACHE_HOME`` + ``/huggingface/``
|
||||||
* default: ``~/.cache/torch/``
|
* default: ``~/.cache/huggingface/``
|
||||||
|
|
||||||
So if you don't have any specific environment variable set, the cache directory will be at
|
So if you don't have any specific environment variable set, the cache directory will be at
|
||||||
``~/.cache/torch/transformers/``.
|
``~/.cache/huggingface/transformers/``.
|
||||||
|
|
||||||
**Note:** If you have set a shell environment variable for one of the predecessors of this library
|
**Note:** If you have set a shell environment variable for one of the predecessors of this library
|
||||||
(``PYTORCH_TRANSFORMERS_CACHE`` or ``PYTORCH_PRETRAINED_BERT_CACHE``), those will be used if there is no shell
|
(``PYTORCH_TRANSFORMERS_CACHE`` or ``PYTORCH_PRETRAINED_BERT_CACHE``), those will be used if there is no shell
|
||||||
|
|||||||
@@ -203,8 +203,28 @@ except ImportError:
|
|||||||
_tokenizers_available = False
|
_tokenizers_available = False
|
||||||
|
|
||||||
|
|
||||||
default_cache_path = os.path.join(torch_cache_home, "transformers")
|
old_default_cache_path = os.path.join(torch_cache_home, "transformers")
|
||||||
|
# New default cache, shared with the Datasets library
|
||||||
|
hf_cache_home = os.path.expanduser(
|
||||||
|
os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
|
||||||
|
)
|
||||||
|
default_cache_path = os.path.join(hf_cache_home, "transformers")
|
||||||
|
|
||||||
|
# Onetime move from the old location to the new one if no ENV variable has been set.
|
||||||
|
if (
|
||||||
|
os.path.isdir(old_default_cache_path)
|
||||||
|
and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ
|
||||||
|
and "PYTORCH_TRANSFORMERS_CACHE" not in os.environ
|
||||||
|
and "TRANSFORMERS_CACHE" not in os.environ
|
||||||
|
):
|
||||||
|
logger.warn(
|
||||||
|
"In Transformers v4.0.0, the default path to cache downloaded models changed from "
|
||||||
|
"'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden "
|
||||||
|
"and '~/.cache/torch/transformers' is a directory that exists, we're moving it to "
|
||||||
|
"'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should "
|
||||||
|
"only see this message once."
|
||||||
|
)
|
||||||
|
shutil.move(old_default_cache_path, default_cache_path)
|
||||||
|
|
||||||
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path)
|
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path)
|
||||||
PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
|
PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
|
||||||
|
|||||||
Reference in New Issue
Block a user