Add Mirror Option for Downloads (#6679)

* Add Tuna Mirror for Downloads from China

* format fix

* Use preset instead of hardcoding URL

* Fix

* make style

* update the mirror option doc

* update the mirror
This commit is contained in:
Kevin Canwen Xu
2020-09-14 23:50:22 +08:00
committed by GitHub
parent e0e0675ac7
commit 90cde2e938
6 changed files with 31 additions and 5 deletions

View File

@@ -141,6 +141,10 @@ DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]
S3_BUCKET_PREFIX = "https://s3.amazonaws.com/models.huggingface.co/bert"
CLOUDFRONT_DISTRIB_PREFIX = "https://cdn.huggingface.co"
PRESET_MIRROR_DICT = {
"tuna": "https://mirrors.tuna.tsinghua.edu.cn/hugging-face-models",
"bfsu": "https://mirrors.bfsu.edu.cn/hugging-face-models",
}
def is_torch_available():
@@ -570,7 +574,7 @@ def is_remote_url(url_or_filename):
return parsed.scheme in ("http", "https")
def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
def hf_bucket_url(model_id: str, filename: str, use_cdn=True, mirror=None) -> str:
"""
Resolve a model identifier, and a file name, to a HF-hosted url
on either S3 or Cloudfront (a Content Delivery Network, or CDN).
@@ -586,7 +590,13 @@ def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
are not shared between the two because the cached file's name contains
a hash of the url.
"""
endpoint = CLOUDFRONT_DISTRIB_PREFIX if use_cdn else S3_BUCKET_PREFIX
endpoint = (
PRESET_MIRROR_DICT.get(mirror, mirror)
if mirror
else CLOUDFRONT_DISTRIB_PREFIX
if use_cdn
else S3_BUCKET_PREFIX
)
legacy_format = "/" not in model_id
if legacy_format:
return f"{endpoint}/{model_id}-{filename}"