Added some new utility functions to path_utils and url_utils that are used a bunch in the CameraTraps repo.

2022-07-26 13:02:46 -07:00 · 2022-07-26 13:02:46 -07:00 · 1bbbb8030d
--- a/path_utils.py
+++ b/path_utils.py
@ -7,15 +7,16 @@ See unit tests in tests/test_path_utils.py.

 #%% Imports and constants

-from datetime import datetime
+import zipfile
 import glob
 import ntpath
 import os
 import posixpath
 import string
-from typing import Container, Iterable, List, Optional, Tuple
 import unicodedata

+from datetime import datetime
+from typing import Container, Iterable, List, Optional, Tuple

 IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png')

@ -251,3 +252,18 @@ def open_file(filename):
    else:
        opener = "open" if sys.platform == "darwin" else "xdg-open"
        subprocess.call([opener, filename])
+
+
+#%% zipfile management functions
+
+def unzip_file(input_file, output_folder=None):
+    """
+    Unzip a zipfile to the specified output folder, defaulting to the same location as
+    the input file    
+    """
+    
+    if output_folder is None:
+        output_folder = os.path.dirname(input_file)
+        
+    with zipfile.ZipFile(input_file, 'r') as zf:
+        zf.extractall(output_folder)
--- a/url_utils.py
+++ b/url_utils.py
@ -11,6 +11,8 @@ import urllib
 import os
 import tempfile

+from urllib.parse import urlparse
+
 # pip install progressbar2
 import progressbar

@ -53,7 +55,7 @@ def get_temp_folder(preferred_name='ai4eutils'):
    
           
 def download_url(url, destination_filename=None, progress_updater=None, 
-                 force_download=False):
+                 force_download=False, verbose=True):
    """
    Download a URL to a file.  If no file is specified, creates a temporary file, 
    with a semi-best-effort to avoid filename collisions.
@ -85,12 +87,29 @@ def download_url(url, destination_filename=None, progress_updater=None,
            os.path.join(target_folder,url_as_filename)
        
    if (not force_download) and (os.path.isfile(destination_filename)):
-        print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url_no_sas)))
+        if verbose:
+            print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url_no_sas)))
    else:
-        print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
+        if verbose:
+            print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
        urllib.request.urlretrieve(url, destination_filename, progress_updater)  
        assert(os.path.isfile(destination_filename))
        nBytes = os.path.getsize(destination_filename)
-        print('...done, {} bytes.'.format(nBytes))
+        if verbose:
+            print('...done, {} bytes.'.format(nBytes))
        
-    return destination_filename
+    return destination_filename
+
+
+def download_relative_filename(url, output_base, verbose=False):
+    """
+    Download a URL to output_base, preserving relative path
+    """
+    
+    p = urlparse(url)
+    # remove the leading '/'
+    assert p.path.startswith('/'); relative_filename = p.path[1:]
+    destination_filename = os.path.join(output_base,relative_filename)
+    download_url(url, destination_filename, verbose=verbose)
+    
+
--- a/write_html_image_list.py
+++ b/write_html_image_list.py
@ -24,7 +24,29 @@ import matlab_porting_tools as mpt
 #%% write_html_image_list

 def write_html_image_list(filename=None,images=None,options={}):
-
+    """
+    filename: the output file
+    
+    image: a list of image filenames or dictionaries with one or more of the following fields:
+        
+        filename
+        imageStyle
+        textStyle
+        title
+        linkTarget
+        
+    options: a dict with one or more of the following fields:
+        
+        hHtml
+        makeRelative
+        headerHtml
+        trailerHtml
+        defaultTextStyle
+        defaultImageStyle
+        maxFiguresPerHtmlFile
+        
+    """
+    
    # returns an options struct
    
    if 'fHtml' not in options: