Download models directly to cache_dir.
This allows moving the file instead of copying it, which is more reliable. Also it avoids writing large amounts of data to /tmp, which may not be large enough to accomodate it. Refs #2222.
This commit is contained in:
Родитель
286d5bb6b7
Коммит
b67fa1a8d2
|
@ -10,10 +10,9 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import six
|
import six
|
||||||
import shutil
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import fnmatch
|
import fnmatch
|
||||||
from functools import wraps
|
from functools import partial, wraps
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
|
@ -345,14 +344,13 @@ def get_from_cache(url, cache_dir=None, force_download=False, proxies=None, etag
|
||||||
def _resumable_file_manager():
|
def _resumable_file_manager():
|
||||||
with open(incomplete_path,'a+b') as f:
|
with open(incomplete_path,'a+b') as f:
|
||||||
yield f
|
yield f
|
||||||
os.remove(incomplete_path)
|
|
||||||
temp_file_manager = _resumable_file_manager
|
temp_file_manager = _resumable_file_manager
|
||||||
if os.path.exists(incomplete_path):
|
if os.path.exists(incomplete_path):
|
||||||
resume_size = os.stat(incomplete_path).st_size
|
resume_size = os.stat(incomplete_path).st_size
|
||||||
else:
|
else:
|
||||||
resume_size = 0
|
resume_size = 0
|
||||||
else:
|
else:
|
||||||
temp_file_manager = tempfile.NamedTemporaryFile
|
temp_file_manager = partial(tempfile.NamedTemporaryFile, dir=cache_dir, delete=False)
|
||||||
resume_size = 0
|
resume_size = 0
|
||||||
|
|
||||||
if etag is not None and (not os.path.exists(cache_path) or force_download):
|
if etag is not None and (not os.path.exists(cache_path) or force_download):
|
||||||
|
@ -371,12 +369,9 @@ def get_from_cache(url, cache_dir=None, force_download=False, proxies=None, etag
|
||||||
|
|
||||||
# we are copying the file before closing it, so flush to avoid truncation
|
# we are copying the file before closing it, so flush to avoid truncation
|
||||||
temp_file.flush()
|
temp_file.flush()
|
||||||
# shutil.copyfileobj() starts at the current position, so go to the start
|
|
||||||
temp_file.seek(0)
|
|
||||||
|
|
||||||
logger.info("copying %s to cache at %s", temp_file.name, cache_path)
|
logger.info("storing %s in cache at %s", url, cache_path)
|
||||||
with open(cache_path, 'wb') as cache_file:
|
os.rename(temp_file.name, cache_path)
|
||||||
shutil.copyfileobj(temp_file, cache_file)
|
|
||||||
|
|
||||||
logger.info("creating metadata file for %s", cache_path)
|
logger.info("creating metadata file for %s", cache_path)
|
||||||
meta = {'url': url, 'etag': etag}
|
meta = {'url': url, 'etag': etag}
|
||||||
|
@ -387,6 +382,4 @@ def get_from_cache(url, cache_dir=None, force_download=False, proxies=None, etag
|
||||||
output_string = unicode(output_string, 'utf-8') # The beauty of python 2
|
output_string = unicode(output_string, 'utf-8') # The beauty of python 2
|
||||||
meta_file.write(output_string)
|
meta_file.write(output_string)
|
||||||
|
|
||||||
logger.info("removing temp file %s", temp_file.name)
|
|
||||||
|
|
||||||
return cache_path
|
return cache_path
|
||||||
|
|
Загрузка…
Ссылка в новой задаче