CDN urls (#4030)
* [file_utils] use_cdn + documentation * Move to cdn. urls for weights * [urls] Hotfix for bert-base-japanese
This commit is contained in:
@@ -94,7 +94,7 @@ DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]
|
||||
DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]
|
||||
|
||||
S3_BUCKET_PREFIX = "https://s3.amazonaws.com/models.huggingface.co/bert"
|
||||
CLOUDFRONT_DISTRIB_PREFIX = "https://d2ws9o8vfrpkyk.cloudfront.net"
|
||||
CLOUDFRONT_DISTRIB_PREFIX = "https://cdn.huggingface.co"
|
||||
|
||||
|
||||
def is_torch_available():
|
||||
@@ -144,12 +144,28 @@ def is_remote_url(url_or_filename):
|
||||
return parsed.scheme in ("http", "https")
|
||||
|
||||
|
||||
def hf_bucket_url(identifier, postfix=None, cdn=False) -> str:
|
||||
endpoint = CLOUDFRONT_DISTRIB_PREFIX if cdn else S3_BUCKET_PREFIX
|
||||
if postfix is None:
|
||||
return "/".join((endpoint, identifier))
|
||||
def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
|
||||
"""
|
||||
Resolve a model identifier, and a file name, to a HF-hosted url
|
||||
on either S3 or Cloudfront (a Content Delivery Network, or CDN).
|
||||
|
||||
Cloudfront is replicated over the globe so downloads are way faster
|
||||
for the end user (and it also lowers our bandwidth costs). However, it
|
||||
is more aggressively cached by default, so may not always reflect the
|
||||
latest changes to the underlying file (default TTL is 24 hours).
|
||||
|
||||
In terms of client-side caching from this library, even though
|
||||
Cloudfront relays the ETags from S3, using one or the other
|
||||
(or switching from one to the other) will affect caching: cached files
|
||||
are not shared between the two because the cached file's name contains
|
||||
a hash of the url.
|
||||
"""
|
||||
endpoint = CLOUDFRONT_DISTRIB_PREFIX if use_cdn else S3_BUCKET_PREFIX
|
||||
legacy_format = "/" not in model_id
|
||||
if legacy_format:
|
||||
return f"{endpoint}/{model_id}-{filename}"
|
||||
else:
|
||||
return "/".join((endpoint, identifier, postfix))
|
||||
return f"{endpoint}/{model_id}/{filename}"
|
||||
|
||||
|
||||
def url_to_filename(url, etag=None):
|
||||
|
||||
Reference in New Issue
Block a user