Initial commit, utilities that used to live in my private space

This commit is contained in:
Dan Morris 2018-12-07 14:44:11 -08:00
Родитель 0f1045cdb8
Коммит adae90bd94
4 изменённых файлов: 407 добавлений и 0 удалений

Просмотреть файл

@ -1,3 +1,8 @@
# Overview
Shared utilities / handed-off scripts for the AI for Earth team
The general convention in this repo is that users who want to consume these utilities will add the top-level path of the repo to their Python path, so it's OK to assume that other packages/modules within the repo are available. The "scrap" directory can be used for standalone, one-time-use scripts that you might otherwise have emailed to someone.
# Contributing # Contributing

186
matlab_porting_tools.py Normal file
Просмотреть файл

@ -0,0 +1,186 @@
#
# matlab_porting_tools.py
#
# Module containing a few ported Matlab functions that makes it easier
# for me to port other, larger Matlab functions. Some of these are
# built-in Matlab functions (e.g. fileparts()), some of them are
# new utility functions my Matlab workflow depends on (e.g.
# insert_before_extension()) .
#
# Some of these are silly one-liners where it's easier for me to remember
# my Matlab-universe words than the Python-universe words, e.g. string_starts_with
#
# Owner: Dan Morris (dan@microsoft.com)
#
#%% Constants and imports
import ntpath
import os
import datetime
#%% fileparts()
def fileparts(n):
'''
p,n,e = fileparts(filename)
fileparts(r'c:\blah\BLAH.jpg') returns ('c:\blah','BLAH','.jpg')
Note that the '.' lives with the extension, and separators have been removed.
'''
p = ntpath.dirname(n)
basename = ntpath.basename(n)
n,e = ntpath.splitext(basename)
return p,n,e
if False:
#%% Test driver for fileparts()
# from danUtil.matlab_porting_tools import fileparts
TEST_STRINGS = [
r'c:\blah\BLAH.jpg',
r'c:\blah.jpg',
r'blah',
r'c:\blah',
r'c:\blah\BLAH',
r'blah.jpg'
]
for s in TEST_STRINGS:
p,n,e = fileparts(s)
print('{}:\n[{}],[{}],[{}]\n'.format(s,p,n,e))
#%% insert_before_extension()
def insert_before_extension(filename,s=''):
'''
function filename = insert_before_extension(filename,s)
Inserts the string [s] before the extension in [filename], separating with '.'.
If [s] is empty, generates a date/timestamp.
If [filename] has no extension, appends [s].
'''
assert len(filename) > 0
if len(s) == 0:
s = datetime.datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
p,n,e = fileparts(filename);
fn = n + '.' + s + e
filename = os.path.join(p,fn);
return filename
if False:
#%% Test driver for insert_before_extension
# from danUtil.matlab_porting_tools import insert_before_extension
TEST_STRINGS = [
r'c:\blah\BLAH.jpg',
r'c:\blah.jpg',
r'blah',
r'c:\blah',
r'c:\blah\BLAH',
r'blah.jpg'
]
for s in TEST_STRINGS:
sOut = insert_before_extension(s)
print('{}: {}'.format(s,sOut))
#%% sec2hms()
def sec2hms(tSeconds):
'''
function [str,h,m,s] = sec2hms(tSeconds,separator)
Convert a time in seconds to a string of the form:
1 hour, 2 minutes, 31.4 seconds
I prefer using the humanfriendly package for this, but I use this when
porting from Matlab.
'''
# https://stackoverflow.com/questions/775049/python-time-seconds-to-hms
m, s = divmod(tSeconds, 60)
h, m = divmod(m, 60)
# colonString = '%d:%02d:%02d' % (h, m, s)
# return (colonString,verboseString)
hms = ''
separator = ', '
if (h > 0):
pluralString = ''
if (h > 1):
pluralString = 's'
hms = hms + '%d hour%s%s' % (h,pluralString,separator)
if (m > 0):
pluralString = ''
if (m > 1):
pluralString = 's'
hms = hms + '%d min%s%s' % (m,pluralString,separator)
hms = hms + '%3.3fsec' % s
return hms
if False:
#%% Test driver for sec2hms()
# from danUtil.matlab_porting_tools import sec2hms
TEST_VALUES = [
60033, 30.4, 245234523454.1
]
for n in TEST_VALUES:
s = sec2hms(n)
print('{} - {}'.format(n,s))
#%% read_lines_from_file()
def read_lines_from_file(filename):
with open(filename) as f:
content = f.readlines()
# Remove trailing newlines
content = [x.rstrip() for x in content]
return content
#%% write_lines_to_file()
def write_lines_to_file(lines, filename):
with open(filename,'w') as f:
for line in lines:
f.write(line+ '\n')
#%% string_ends_with()
def string_ends_with(s,query):
return s.endswith(query)
def string_starts_with(s,query):
return s.startswith(query)

42
path_utils.py Normal file
Просмотреть файл

@ -0,0 +1,42 @@
#
# path_utils.py
#
# Miscellaneous useful utils for path manipulation, things that could *almost*
# be in os.path, but aren't.
#
# Owner: Dan Morris (dan@microsoft.com)
#
import os
def recursiveFileList(baseDir, bConvertSlashes=True):
"""
Enumerate files (not directories) in [baseDir], optionally converting \ to /
"""
allFiles = []
for root, _, filenames in os.walk(baseDir):
for filename in filenames:
fullPath = os.path.join(root,filename)
if bConvertSlashes:
fullPath = fullPath.replace('\\','/')
allFiles.append(fullPath)
return allFiles
# http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
def splitpath(path, maxdepth=100):
"""
Splits [path] into all its constituent tokens, e.g.:
c:\blah\boo\goo.txt
...becomes:
['c:\\', 'blah', 'boo', 'goo.txt']
"""
( head, tail ) = os.path.split(path)
return splitpath(head, maxdepth - 1) + [ tail ] \
if maxdepth and head and head != path \
else [ head or tail ]

174
write_html_image_list.py Normal file
Просмотреть файл

@ -0,0 +1,174 @@
#
# function write_html_image_list(filename,imageFilenames,titles, options)
#
# Given a list of image file names, writes an HTML file that
# shows all those images, with optional one-line headers above each.
#
# Each "filename" can also be a list array of filenames (they will share a title).
#
# Strips directory information away if options.makeRelative == 1.
#
# Tries to convert absolute to relative paths if options.makeRelative == 2.
#
# Owner: Dan Morris (dan@microsoft.com)
#
#%% Constants and imports
import math
import matlab_porting_tools as mpt
#%% write_html_image_list
def write_html_image_list(filename=None,imageFilenames=None,titles=(),options={}):
# returns an options struct
if 'fHtml' not in options:
options['fHtml'] = -1
if 'makeRelative' not in options:
options['makeRelative'] = 0
if 'headerHtml' not in options:
options['headerHtml'] = ''
if 'trailerHtml' not in options:
options['trailerHtml'] = ''
if 'imageStyle' not in options:
options['imageStyle'] = ''
# Possibly split the html output for figures into multiple files; Chrome gets sad with
# thousands of images in a single tab.
if 'maxFiguresPerHtmlFile' not in options:
options['maxFiguresPerHtmlFile'] = math.inf
if filename == None:
return options
# Remove leading directory information from filenames if requested
if options['makeRelative'] == 1:
for iImage in range(0,len(imageFilenames)):
_,n,e = mpt.fileparts(imageFilenames[iImage])
imageFilenames[iImage] = n + e
elif options['makeRelative'] == 2:
baseDir,_,_ = mpt.fileparts(filename)
if len(baseDir) > 1 and baseDir[-1] != '\\':
baseDir = baseDir + '\\'
for iImage in range(0,len(imageFilenames)):
fn = imageFilenames[iImage]
fn = fn.replace(baseDir,'')
imageFilenames[iImage] = fn
nImages = len(imageFilenames)
if len(titles) != 0:
assert len(titles) == nImages,'Title/image list mismatch'
# If we need to break this up into multiple files...
if nImages > options['maxFiguresPerHtmlFile']:
# You can't supply your own file handle in this case
if options['fHtml'] != -1:
raise ValueError(
'You can''t supply your own file handle if we have to page the image set')
figureFileStartingIndices = list(range(0,nImages,options['maxFiguresPerHtmlFile']))
assert len(figureFileStartingIndices) > 1
# Open the meta-output file
fMeta = open(filename,'w')
# Write header stuff
fMeta.write('<html><body>\n')
fMeta.write(options['headerHtml'])
fMeta.write('<table border = 0 cellpadding = 2>\n')
for startingIndex in figureFileStartingIndices:
iStart = startingIndex
iEnd = startingIndex+options['maxFiguresPerHtmlFile']-1;
if iEnd >= nImages:
iEnd = nImages-1
trailer = 'image_{:05d}_{:05d}'.format(iStart,iEnd)
localFiguresHtmlFilename = mpt.insert_before_extension(filename,trailer)
fMeta.write('<tr><td>\n')
fMeta.write('<p style="padding-bottom:0px;margin-bottom:0px;text-align:left;font-family:''segoe ui'',calibri,arial;font-size:100%;text-decoration:none;font-weight:bold;">')
fMeta.write('<a href="{}">Figures for images {} through {}</a></p></td></tr>\n'.format(
localFiguresHtmlFilename,iStart,iEnd))
localImageFilenames = imageFilenames[iStart:iEnd+1]
if len(titles) == 0:
localTitles = []
else:
localTitles = titles[iStart:iEnd+1]
localOptions = options.copy();
localOptions['headerHtml'] = '';
localOptions['trailerHtml'] = '';
# Make a recursive call for this image set
write_html_image_list(localFiguresHtmlFilename,localImageFilenames,localTitles,
localOptions)
# ...for each page of images
fMeta.write('</table></body>\n')
fMeta.write(options['trailerHtml'])
fMeta.write('</html>\n')
fMeta.close()
return options
# ...if we have to make multiple sub-pages
bCleanupFile = False
if options['fHtml'] == -1:
bCleanupFile = True;
fHtml = open(filename,'w')
else:
fHtml = options['fHtml']
fHtml.write('<html><body>\n')
fHtml.write(options['headerHtml'])
# Write out images
for iImage in range(0,len(imageFilenames)):
if len(titles) > 0:
s = titles[iImage];
fHtml.write(
'<p style="font-family:calibri,verdana,arial;font-weight:bold;font-size:150%;text-align:left;">{}</p>\n'\
.format(s))
# If we have multiple images for this same title
if (isinstance(imageFilenames[iImage],list)):
files = imageFilenames[iImage];
for iFile in range(0,len(files)):
fHtml.write('<img src="{}" style="{}"><br/>\n'.format(files(iFile),options['imageStyle']))
if iFile != len(files)-1:
fHtml.write('<br/>')
# ...for each file in this group
else:
fHtml.write('<img src="{}" style="{}"><br/>\n'.\
format(imageFilenames[iImage],options['imageStyle']))
# ...for each image we need to write
fHtml.write(options['trailerHtml'])
fHtml.write('</body></html>\n')
if bCleanupFile:
fHtml.close()
# ...function