Prevent parallel downloads of the same file with a lock.
Since the file is written to the filesystem, a filesystem lock is the way to go here. Add a dependency on the third-party filelock library to get cross-platform functionality.
This commit is contained in:
1
setup.py
1
setup.py
@@ -59,6 +59,7 @@ setup(
|
||||
"tests.*", "tests"]),
|
||||
install_requires=['numpy',
|
||||
'boto3',
|
||||
'filelock',
|
||||
'requests',
|
||||
'tqdm',
|
||||
'regex != 2019.12.17',
|
||||
|
||||
@@ -24,6 +24,8 @@ from tqdm.auto import tqdm
|
||||
from contextlib import contextmanager
|
||||
from . import __version__
|
||||
|
||||
from filelock import FileLock
|
||||
|
||||
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
try:
|
||||
@@ -333,11 +335,18 @@ def get_from_cache(url, cache_dir=None, force_download=False, proxies=None, etag
|
||||
# If we don't have a connection (etag is None) and can't identify the file
|
||||
# try to get the last downloaded one
|
||||
if not os.path.exists(cache_path) and etag is None:
|
||||
matching_files = fnmatch.filter(os.listdir(cache_dir), filename + '.*')
|
||||
matching_files = list(filter(lambda s: not s.endswith('.json'), matching_files))
|
||||
matching_files = [
|
||||
file
|
||||
for file in fnmatch.filter(os.listdir(cache_dir), filename + '.*')
|
||||
if not file.endswith('.json') and not file.endswith('.lock')
|
||||
]
|
||||
if matching_files:
|
||||
cache_path = os.path.join(cache_dir, matching_files[-1])
|
||||
|
||||
# Prevent parallel downloads of the same file with a lock.
|
||||
lock_path = cache_path + '.lock'
|
||||
with FileLock(lock_path):
|
||||
|
||||
if resume_download:
|
||||
incomplete_path = cache_path + '.incomplete'
|
||||
@contextmanager
|
||||
|
||||
Reference in New Issue
Block a user