Initial commit, utilities that used to live in my private space

2018-12-07 14:44:11 -08:00 · 2018-12-07 14:44:11 -08:00 · adae90bd94
--- a/README.md
+++ b/README.md
@ -1,3 +1,8 @@
 # Overview
 Shared utilities / handed-off scripts for the AI for Earth team
 The general convention in this repo is that users who want to consume these utilities will add the top-level path of the repo to their Python path, so it's OK to assume that other packages/modules within the repo are available.  The "scrap" directory can be used for standalone, one-time-use scripts that you might otherwise have emailed to someone.
 # Contributing
--- a/matlab_porting_tools.py
+++ b/matlab_porting_tools.py
@ -0,0 +1,186 @@
 #
 # matlab_porting_tools.py
 #
 # Module containing a few ported Matlab functions that makes it easier
 # for me to port other, larger Matlab functions.  Some of these are 
 # built-in Matlab functions (e.g. fileparts()), some of them are
 # new utility functions my Matlab workflow depends on (e.g. 
 # insert_before_extension()) .
 #
 # Some of these are silly one-liners where it's easier for me to remember
 # my Matlab-universe words than the Python-universe words, e.g. string_starts_with
 #
 # Owner: Dan Morris (dan@microsoft.com)
 #
 #%% Constants and imports
 import ntpath
 import os
 import datetime
 #%% fileparts()
 def fileparts(n):
    '''
    p,n,e = fileparts(filename)    
    fileparts(r'c:\blah\BLAH.jpg') returns ('c:\blah','BLAH','.jpg')
    Note that the '.' lives with the extension, and separators have been removed.
    '''
    p = ntpath.dirname(n)
    basename = ntpath.basename(n)
    n,e = ntpath.splitext(basename)
    return p,n,e
 if False:
    #%% Test driver for fileparts()
    # from danUtil.matlab_porting_tools import fileparts
    TEST_STRINGS = [
            r'c:\blah\BLAH.jpg',
            r'c:\blah.jpg',
            r'blah',
            r'c:\blah',
            r'c:\blah\BLAH',
            r'blah.jpg'
            ]
    for s in TEST_STRINGS:
        p,n,e = fileparts(s)
        print('{}:\n[{}],[{}],[{}]\n'.format(s,p,n,e))
 #%% insert_before_extension()
 def insert_before_extension(filename,s=''):
    '''
    function filename = insert_before_extension(filename,s)
    Inserts the string [s] before the extension in [filename], separating with '.'.  
    If [s] is empty, generates a date/timestamp.
    If [filename] has no extension, appends [s].    
    '''
    assert len(filename) > 0
    if len(s) == 0:
        s = datetime.datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
    p,n,e = fileparts(filename);
    fn = n + '.' + s + e
    filename = os.path.join(p,fn);
    return filename
 if False:
    #%% Test driver for insert_before_extension
    # from danUtil.matlab_porting_tools import insert_before_extension
    TEST_STRINGS = [
            r'c:\blah\BLAH.jpg',
            r'c:\blah.jpg',
            r'blah',
            r'c:\blah',
            r'c:\blah\BLAH',
            r'blah.jpg'
            ]
    for s in TEST_STRINGS:
        sOut = insert_before_extension(s)
        print('{}: {}'.format(s,sOut))
 #%% sec2hms()
 def sec2hms(tSeconds):
    '''
    function [str,h,m,s] = sec2hms(tSeconds,separator)
        Convert a time in seconds to a string of the form:
        1 hour, 2 minutes, 31.4 seconds
    I prefer using the humanfriendly package for this, but I use this when
    porting from Matlab.    
    '''
    # https://stackoverflow.com/questions/775049/python-time-seconds-to-hms    
    m, s = divmod(tSeconds, 60)
    h, m = divmod(m, 60)
    # colonString = '%d:%02d:%02d' % (h, m, s)
    # return (colonString,verboseString)
    hms = ''
    separator = ', '
    if (h > 0):
        pluralString = ''    
        if (h > 1):
            pluralString = 's'
        hms = hms + '%d hour%s%s' % (h,pluralString,separator)
    if (m > 0):
        pluralString = ''
        if (m > 1):
            pluralString = 's'
        hms = hms + '%d min%s%s' % (m,pluralString,separator)
    hms = hms + '%3.3fsec' % s
    return hms
 if False:
    #%% Test driver for sec2hms()
    # from danUtil.matlab_porting_tools import sec2hms
    TEST_VALUES = [
            60033, 30.4, 245234523454.1
            ]
    for n in TEST_VALUES:
        s = sec2hms(n)
        print('{} - {}'.format(n,s))
 #%% read_lines_from_file()
 def read_lines_from_file(filename):
    with open(filename) as f:
        content = f.readlines()
    # Remove trailing newlines
    content = [x.rstrip() for x in content] 
    return content
 #%% write_lines_to_file()
 def write_lines_to_file(lines, filename):
    with open(filename,'w') as f:
        for line in lines:
            f.write(line+ '\n')
 #%% string_ends_with()
 def string_ends_with(s,query):    
    return s.endswith(query)
 def string_starts_with(s,query):    
    return s.startswith(query)
--- a/path_utils.py
+++ b/path_utils.py
@ -0,0 +1,42 @@
 #
 # path_utils.py
 #
 # Miscellaneous useful utils for path manipulation, things that could *almost*
 # be in os.path, but aren't.
 #
 # Owner: Dan Morris (dan@microsoft.com)
 #
 import os
 def recursiveFileList(baseDir, bConvertSlashes=True):
    """
    Enumerate files (not directories) in [baseDir], optionally converting \ to /
    """
    allFiles = []
    for root, _, filenames in os.walk(baseDir):
        for filename in filenames: 
            fullPath = os.path.join(root,filename)
            if bConvertSlashes:
                fullPath = fullPath.replace('\\','/')
            allFiles.append(fullPath)
    return allFiles
 # http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
 def splitpath(path, maxdepth=100):
    """
    Splits [path] into all its constituent tokens, e.g.:
    c:\blah\boo\goo.txt
    ...becomes:
    ['c:\\', 'blah', 'boo', 'goo.txt']
    """
    ( head, tail ) = os.path.split(path)
    return splitpath(head, maxdepth - 1) + [ tail ] \
        if maxdepth and head and head != path \
        else [ head or tail ]
--- a/write_html_image_list.py
+++ b/write_html_image_list.py
@ -0,0 +1,174 @@
 #
 # function write_html_image_list(filename,imageFilenames,titles, options)
 #
 # Given a list of image file names, writes an HTML file that
 # shows all those images, with optional one-line headers above each.
 #
 # Each "filename" can also be a list array of filenames (they will share a title).
 #
 # Strips directory information away if options.makeRelative == 1.
 #
 # Tries to convert absolute to relative paths if options.makeRelative == 2.
 #
 # Owner: Dan Morris (dan@microsoft.com)
 #
 #%% Constants and imports
 import math
 import matlab_porting_tools as mpt
 #%% write_html_image_list
 def write_html_image_list(filename=None,imageFilenames=None,titles=(),options={}):
    # returns an options struct
    if 'fHtml' not in options:
        options['fHtml'] = -1
    if 'makeRelative' not in options:        
        options['makeRelative'] = 0
    if 'headerHtml' not in options:
        options['headerHtml'] = ''        
    if 'trailerHtml' not in options:
        options['trailerHtml'] = ''    
    if 'imageStyle' not in options:
        options['imageStyle'] = ''    
    # Possibly split the html output for figures into multiple files; Chrome gets sad with
    # thousands of images in a single tab.        
    if 'maxFiguresPerHtmlFile' not in options:
        options['maxFiguresPerHtmlFile'] = math.inf    
    if filename == None:
        return options
    # Remove leading directory information from filenames if requested
    if options['makeRelative'] == 1:
        for iImage in range(0,len(imageFilenames)):
            _,n,e = mpt.fileparts(imageFilenames[iImage])
            imageFilenames[iImage] = n + e
    elif options['makeRelative'] == 2:
        baseDir,_,_ = mpt.fileparts(filename)
        if len(baseDir) > 1 and baseDir[-1] != '\\':
            baseDir = baseDir + '\\'
        for iImage in range(0,len(imageFilenames)):
            fn = imageFilenames[iImage]
            fn = fn.replace(baseDir,'')
            imageFilenames[iImage] = fn        
    nImages = len(imageFilenames)
    if len(titles) != 0:
        assert len(titles) == nImages,'Title/image list mismatch'    
    # If we need to break this up into multiple files...
    if nImages > options['maxFiguresPerHtmlFile']:
        # You can't supply your own file handle in this case
        if options['fHtml'] != -1:
            raise ValueError(
                    'You can''t supply your own file handle if we have to page the image set')
        figureFileStartingIndices = list(range(0,nImages,options['maxFiguresPerHtmlFile']))
        assert len(figureFileStartingIndices) > 1
        # Open the meta-output file
        fMeta = open(filename,'w')
        # Write header stuff
        fMeta.write('<html><body>\n')    
        fMeta.write(options['headerHtml'])        
        fMeta.write('<table border = 0 cellpadding = 2>\n')
        for startingIndex in figureFileStartingIndices:
            iStart = startingIndex
            iEnd = startingIndex+options['maxFiguresPerHtmlFile']-1;
            if iEnd >= nImages:
                iEnd = nImages-1
            trailer = 'image_{:05d}_{:05d}'.format(iStart,iEnd)
            localFiguresHtmlFilename = mpt.insert_before_extension(filename,trailer)
            fMeta.write('<tr><td>\n')
            fMeta.write('<p style="padding-bottom:0px;margin-bottom:0px;text-align:left;font-family:''segoe ui'',calibri,arial;font-size:100%;text-decoration:none;font-weight:bold;">')
            fMeta.write('<a href="{}">Figures for images {} through {}</a></p></td></tr>\n'.format(
                localFiguresHtmlFilename,iStart,iEnd))
            localImageFilenames = imageFilenames[iStart:iEnd+1]
            if len(titles) == 0:
                localTitles = []
            else:
                localTitles = titles[iStart:iEnd+1]            
            localOptions = options.copy();
            localOptions['headerHtml'] = '';
            localOptions['trailerHtml'] = '';
            # Make a recursive call for this image set
            write_html_image_list(localFiguresHtmlFilename,localImageFilenames,localTitles,
                localOptions)
        # ...for each page of images
        fMeta.write('</table></body>\n')
        fMeta.write(options['trailerHtml'])
        fMeta.write('</html>\n')
        fMeta.close()
        return options
    # ...if we have to make multiple sub-pages
    bCleanupFile = False
    if options['fHtml'] == -1:
        bCleanupFile = True;
        fHtml = open(filename,'w')
    else:
        fHtml = options['fHtml']
    fHtml.write('<html><body>\n')
    fHtml.write(options['headerHtml'])
    # Write out images
    for iImage in range(0,len(imageFilenames)):
        if len(titles) > 0:
            s = titles[iImage];
            fHtml.write(
                    '<p style="font-family:calibri,verdana,arial;font-weight:bold;font-size:150%;text-align:left;">{}</p>\n'\
                    .format(s))            
        # If we have multiple images for this same title
        if (isinstance(imageFilenames[iImage],list)):
            files = imageFilenames[iImage];
            for iFile in range(0,len(files)):
                fHtml.write('<img src="{}" style="{}"><br/>\n'.format(files(iFile),options['imageStyle']))
                if iFile != len(files)-1:
                    fHtml.write('<br/>')                
            # ...for each file in this group
        else:
            fHtml.write('<img src="{}" style="{}"><br/>\n'.\
                        format(imageFilenames[iImage],options['imageStyle']))
    # ...for each image we need to write
    fHtml.write(options['trailerHtml'])
    fHtml.write('</body></html>\n')
    if bCleanupFile:
        fHtml.close()    
 # ...function