зеркало из https://github.com/mozilla/gecko-dev.git
193 строки
6.3 KiB
Python
193 строки
6.3 KiB
Python
"""Simple benchmark to compare the speed of scandir.walk() with os.walk()."""
|
|
|
|
import optparse
|
|
import os
|
|
import stat
|
|
import sys
|
|
import timeit
|
|
|
|
import warnings
|
|
with warnings.catch_warnings(record=True):
|
|
import scandir
|
|
|
|
DEPTH = 4
|
|
NUM_DIRS = 5
|
|
NUM_FILES = 50
|
|
|
|
|
|
def os_walk_pre_35(top, topdown=True, onerror=None, followlinks=False):
|
|
"""Pre Python 3.5 implementation of os.walk() that doesn't use scandir."""
|
|
islink, join, isdir = os.path.islink, os.path.join, os.path.isdir
|
|
|
|
try:
|
|
names = os.listdir(top)
|
|
except OSError as err:
|
|
if onerror is not None:
|
|
onerror(err)
|
|
return
|
|
|
|
dirs, nondirs = [], []
|
|
for name in names:
|
|
if isdir(join(top, name)):
|
|
dirs.append(name)
|
|
else:
|
|
nondirs.append(name)
|
|
|
|
if topdown:
|
|
yield top, dirs, nondirs
|
|
for name in dirs:
|
|
new_path = join(top, name)
|
|
if followlinks or not islink(new_path):
|
|
for x in os_walk_pre_35(new_path, topdown, onerror, followlinks):
|
|
yield x
|
|
if not topdown:
|
|
yield top, dirs, nondirs
|
|
|
|
|
|
def create_tree(path, depth=DEPTH):
|
|
"""Create a directory tree at path with given depth, and NUM_DIRS and
|
|
NUM_FILES at each level.
|
|
"""
|
|
os.mkdir(path)
|
|
for i in range(NUM_FILES):
|
|
filename = os.path.join(path, 'file{0:03}.txt'.format(i))
|
|
with open(filename, 'wb') as f:
|
|
f.write(b'foo')
|
|
if depth <= 1:
|
|
return
|
|
for i in range(NUM_DIRS):
|
|
dirname = os.path.join(path, 'dir{0:03}'.format(i))
|
|
create_tree(dirname, depth - 1)
|
|
|
|
|
|
def get_tree_size(path):
|
|
"""Return total size of all files in directory tree at path."""
|
|
size = 0
|
|
try:
|
|
for entry in scandir.scandir(path):
|
|
if entry.is_symlink():
|
|
pass
|
|
elif entry.is_dir():
|
|
size += get_tree_size(os.path.join(path, entry.name))
|
|
else:
|
|
size += entry.stat().st_size
|
|
except OSError:
|
|
pass
|
|
return size
|
|
|
|
|
|
def benchmark(path, get_size=False):
|
|
sizes = {}
|
|
|
|
if get_size:
|
|
def do_os_walk():
|
|
size = 0
|
|
for root, dirs, files in os.walk(path):
|
|
for filename in files:
|
|
fullname = os.path.join(root, filename)
|
|
st = os.lstat(fullname)
|
|
if not stat.S_ISLNK(st.st_mode):
|
|
size += st.st_size
|
|
sizes['os_walk'] = size
|
|
|
|
def do_scandir_walk():
|
|
sizes['scandir_walk'] = get_tree_size(path)
|
|
|
|
else:
|
|
def do_os_walk():
|
|
for root, dirs, files in os.walk(path):
|
|
pass
|
|
|
|
def do_scandir_walk():
|
|
for root, dirs, files in scandir.walk(path):
|
|
pass
|
|
|
|
# Run this once first to cache things, so we're not benchmarking I/O
|
|
print("Priming the system's cache...")
|
|
do_scandir_walk()
|
|
|
|
# Use the best of 3 time for each of them to eliminate high outliers
|
|
os_walk_time = 1000000
|
|
scandir_walk_time = 1000000
|
|
N = 3
|
|
for i in range(N):
|
|
print('Benchmarking walks on {0}, repeat {1}/{2}...'.format(
|
|
path, i + 1, N))
|
|
os_walk_time = min(os_walk_time, timeit.timeit(do_os_walk, number=1))
|
|
scandir_walk_time = min(scandir_walk_time,
|
|
timeit.timeit(do_scandir_walk, number=1))
|
|
|
|
if get_size:
|
|
if sizes['os_walk'] == sizes['scandir_walk']:
|
|
equality = 'equal'
|
|
else:
|
|
equality = 'NOT EQUAL!'
|
|
print('os.walk size {0}, scandir.walk size {1} -- {2}'.format(
|
|
sizes['os_walk'], sizes['scandir_walk'], equality))
|
|
|
|
print('os.walk took {0:.3f}s, scandir.walk took {1:.3f}s -- {2:.1f}x as fast'.format(
|
|
os_walk_time, scandir_walk_time, os_walk_time / scandir_walk_time))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
usage = """Usage: benchmark.py [-h] [tree_dir]
|
|
|
|
Create a large directory tree named "benchtree" (relative to this script) and
|
|
benchmark os.walk() versus scandir.walk(). If tree_dir is specified, benchmark
|
|
using it instead of creating a tree."""
|
|
parser = optparse.OptionParser(usage=usage)
|
|
parser.add_option('-s', '--size', action='store_true',
|
|
help='get size of directory tree while walking')
|
|
parser.add_option('-c', '--scandir', type='choice', choices=['best', 'generic', 'c', 'python', 'os'], default='best',
|
|
help='version of scandir() to use, default "%default"')
|
|
options, args = parser.parse_args()
|
|
|
|
if args:
|
|
tree_dir = args[0]
|
|
else:
|
|
tree_dir = os.path.join(os.path.dirname(__file__), 'benchtree')
|
|
if not os.path.exists(tree_dir):
|
|
print('Creating tree at {0}: depth={1}, num_dirs={2}, num_files={3}'.format(
|
|
tree_dir, DEPTH, NUM_DIRS, NUM_FILES))
|
|
create_tree(tree_dir)
|
|
|
|
if options.scandir == 'generic':
|
|
scandir.scandir = scandir.scandir_generic
|
|
elif options.scandir == 'c':
|
|
if scandir.scandir_c is None:
|
|
print("ERROR: Compiled C version of scandir not found!")
|
|
sys.exit(1)
|
|
scandir.scandir = scandir.scandir_c
|
|
elif options.scandir == 'python':
|
|
if scandir.scandir_python is None:
|
|
print("ERROR: Python version of scandir not found!")
|
|
sys.exit(1)
|
|
scandir.scandir = scandir.scandir_python
|
|
elif options.scandir == 'os':
|
|
if not hasattr(os, 'scandir'):
|
|
print("ERROR: Python 3.5's os.scandir() not found!")
|
|
sys.exit(1)
|
|
scandir.scandir = os.scandir
|
|
elif hasattr(os, 'scandir'):
|
|
scandir.scandir = os.scandir
|
|
|
|
if scandir.scandir == getattr(os, 'scandir', None):
|
|
print("Using Python 3.5's builtin os.scandir()")
|
|
elif scandir.scandir == scandir.scandir_c:
|
|
print('Using fast C version of scandir')
|
|
elif scandir.scandir == scandir.scandir_python:
|
|
print('Using slower ctypes version of scandir')
|
|
elif scandir.scandir == scandir.scandir_generic:
|
|
print('Using very slow generic version of scandir')
|
|
else:
|
|
print('ERROR: Unsure which version of scandir we are using!')
|
|
sys.exit(1)
|
|
|
|
if hasattr(os, 'scandir'):
|
|
os.walk = os_walk_pre_35
|
|
print('Comparing against pre-Python 3.5 version of os.walk()')
|
|
else:
|
|
print('Comparing against builtin version of os.walk()')
|
|
|
|
benchmark(tree_dir, get_size=options.size)
|