2010-10-16 03:09:53 +04:00
|
|
|
#!/usr/bin/python
|
2012-05-21 15:12:37 +04:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
2010-10-16 03:09:53 +04:00
|
|
|
|
2012-01-16 18:30:59 +04:00
|
|
|
from __future__ import with_statement
|
|
|
|
|
2010-10-16 03:09:53 +04:00
|
|
|
from optparse import OptionParser
|
2018-04-19 01:31:11 +03:00
|
|
|
import hashlib
|
2010-10-16 03:09:53 +04:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
|
2018-04-19 01:33:31 +03:00
|
|
|
logger = logging.getLogger('checksums.py')
|
2017-10-07 17:45:22 +03:00
|
|
|
|
2018-04-19 02:24:03 +03:00
|
|
|
|
2018-04-19 01:38:04 +03:00
|
|
|
def digest_file(filename, digest, chunk_size=131072):
|
2010-10-16 03:09:53 +04:00
|
|
|
'''Produce a checksum for the file specified by 'filename'. 'filename'
|
|
|
|
is a string path to a file that is opened and read in this function. The
|
|
|
|
checksum algorithm is specified by 'digest' and is a valid OpenSSL
|
|
|
|
algorithm. If the digest used is not valid or Python's hashlib doesn't
|
|
|
|
work, the None object will be returned instead. The size of blocks
|
|
|
|
that this function will read from the file object it opens based on
|
|
|
|
'filename' can be specified by 'chunk_size', which defaults to 1K'''
|
|
|
|
assert not os.path.isdir(filename), 'this function only works with files'
|
2018-04-19 01:31:11 +03:00
|
|
|
|
|
|
|
logger.debug('Creating new %s object' % digest)
|
|
|
|
h = hashlib.new(digest)
|
|
|
|
with open(filename, 'rb') as f:
|
|
|
|
while True:
|
|
|
|
data = f.read(chunk_size)
|
|
|
|
if not data:
|
|
|
|
logger.debug('Finished reading in file')
|
|
|
|
break
|
|
|
|
h.update(data)
|
|
|
|
hash = h.hexdigest()
|
|
|
|
logger.debug('Hash for %s is %s' % (filename, hash))
|
|
|
|
return hash
|
2010-10-16 03:09:53 +04:00
|
|
|
|
|
|
|
|
2018-04-19 02:24:03 +03:00
|
|
|
def process_files(dirs, output_filename, digests):
|
|
|
|
'''This function takes a list of directory names, 'drs'. It will then
|
|
|
|
compute the checksum for each of the files in these by by opening the files.
|
2010-10-16 03:09:53 +04:00
|
|
|
Once each file is read and its checksum is computed, this function
|
|
|
|
will write the information to the file specified by 'output_filename'.
|
|
|
|
The path written in the output file will have anything specified by 'strip'
|
|
|
|
removed from the path. The output file is closed before returning nothing
|
2017-10-07 17:45:22 +03:00
|
|
|
The algorithm to compute checksums with can be specified by 'digests'
|
2012-01-16 18:30:59 +04:00
|
|
|
and needs to be a list of valid OpenSSL algorithms.
|
2010-10-16 03:09:53 +04:00
|
|
|
|
|
|
|
The output file is written in the format:
|
|
|
|
<hash> <algorithm> <filesize> <filepath>
|
|
|
|
Example:
|
|
|
|
d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg
|
|
|
|
'''
|
|
|
|
|
|
|
|
if os.path.exists(output_filename):
|
|
|
|
logger.debug('Overwriting existing checksums file "%s"' %
|
|
|
|
output_filename)
|
|
|
|
else:
|
|
|
|
logger.debug('Creating a new checksums file "%s"' % output_filename)
|
2012-01-16 18:30:59 +04:00
|
|
|
with open(output_filename, 'w+') as output:
|
2018-04-19 02:24:03 +03:00
|
|
|
for d in dirs:
|
|
|
|
for root, dirs, files in os.walk(d):
|
|
|
|
for f in files:
|
|
|
|
full = os.path.join(root, f)
|
|
|
|
rel = os.path.relpath(full, d)
|
2018-04-19 01:36:55 +03:00
|
|
|
|
2018-04-19 02:24:03 +03:00
|
|
|
for digest in digests:
|
|
|
|
hash = digest_file(full, digest)
|
2018-04-19 01:39:26 +03:00
|
|
|
|
2018-04-19 02:24:03 +03:00
|
|
|
output.write('%s %s %s %s\n' % (
|
|
|
|
hash, digest, os.path.getsize(full), rel))
|
2010-10-16 03:09:53 +04:00
|
|
|
|
2017-10-07 17:45:22 +03:00
|
|
|
|
2010-10-16 03:09:53 +04:00
|
|
|
def setup_logging(level=logging.DEBUG):
|
|
|
|
'''This function sets up the logging module using a speficiable logging
|
|
|
|
module logging level. The default log level is DEBUG.
|
|
|
|
|
|
|
|
The output is in the format:
|
|
|
|
<level> - <message>
|
|
|
|
Example:
|
|
|
|
DEBUG - Finished reading in file
|
|
|
|
'''
|
|
|
|
|
|
|
|
logger = logging.getLogger('checksums.py')
|
|
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
handler = logging.StreamHandler()
|
|
|
|
handler.setLevel(level)
|
|
|
|
formatter = logging.Formatter("%(levelname)s - %(message)s")
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
logger.addHandler(handler)
|
|
|
|
|
2017-10-07 17:45:22 +03:00
|
|
|
|
2010-10-16 03:09:53 +04:00
|
|
|
def main():
|
|
|
|
'''This is a main function that parses arguments, sets up logging
|
|
|
|
and generates a checksum file'''
|
|
|
|
# Parse command line arguments
|
|
|
|
parser = OptionParser()
|
|
|
|
parser.add_option('-d', '--digest', help='checksum algorithm to use',
|
2012-01-16 18:30:59 +04:00
|
|
|
action='append', dest='digests')
|
2010-10-16 03:09:53 +04:00
|
|
|
parser.add_option('-o', '--output', help='output file to use',
|
|
|
|
action='store', dest='outfile', default='checksums')
|
|
|
|
parser.add_option('-v', '--verbose',
|
|
|
|
help='Be noisy (takes precedence over quiet)',
|
|
|
|
action='store_true', dest='verbose', default=False)
|
|
|
|
parser.add_option('-q', '--quiet', help='Be quiet', action='store_true',
|
|
|
|
dest='quiet', default=False)
|
2018-04-19 02:24:03 +03:00
|
|
|
|
2010-10-16 03:09:53 +04:00
|
|
|
options, args = parser.parse_args()
|
|
|
|
|
2017-10-07 17:45:22 +03:00
|
|
|
# Figure out which logging level to use
|
2010-10-16 03:09:53 +04:00
|
|
|
if options.verbose:
|
|
|
|
loglevel = logging.DEBUG
|
|
|
|
elif options.quiet:
|
|
|
|
loglevel = logging.ERROR
|
|
|
|
else:
|
|
|
|
loglevel = logging.INFO
|
|
|
|
|
2017-10-07 17:45:22 +03:00
|
|
|
# Set up logging
|
2010-10-16 03:09:53 +04:00
|
|
|
setup_logging(loglevel)
|
|
|
|
|
|
|
|
# Validate the digest type to use
|
2012-01-16 18:30:59 +04:00
|
|
|
if not options.digests:
|
|
|
|
options.digests = ['sha1']
|
2010-10-16 03:09:53 +04:00
|
|
|
|
|
|
|
for i in args:
|
2018-04-19 02:24:03 +03:00
|
|
|
if not os.path.isdir(i):
|
|
|
|
logger.error('%s is not a directory' % i)
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
process_files(args, options.outfile, options.digests)
|
2010-10-16 03:09:53 +04:00
|
|
|
|
2017-10-07 17:45:22 +03:00
|
|
|
|
2010-10-16 03:09:53 +04:00
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|