210 строки
7.0 KiB
Python
Executable File
210 строки
7.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Helper script to shard build bot steps and save results to disk.
|
|
|
|
Our buildbot infrastructure requires each slave to run steps serially.
|
|
This is sub-optimal for android, where these steps can run independently on
|
|
multiple connected devices.
|
|
|
|
The buildbots will run this script multiple times per cycle:
|
|
- First: all steps listed in -s in will be executed in parallel using all
|
|
connected devices. Step results will be pickled to disk. Each step has a unique
|
|
name. The result code will be ignored if the step name is listed in
|
|
--flaky_steps.
|
|
The buildbot will treat this step as a regular step, and will not process any
|
|
graph data.
|
|
|
|
- Then, with -p STEP_NAME: at this stage, we'll simply print the file with the
|
|
step results previously saved. The buildbot will then process the graph data
|
|
accordingly.
|
|
|
|
The JSON steps file contains a dictionary in the format:
|
|
{
|
|
"step_name_foo": "script_to_execute foo",
|
|
"step_name_bar": "script_to_execute bar"
|
|
}
|
|
|
|
The JSON flaky steps file contains a list with step names which results should
|
|
be ignored:
|
|
[
|
|
"step_name_foo",
|
|
"step_name_bar"
|
|
]
|
|
|
|
Note that script_to_execute necessarily have to take at least the following
|
|
options:
|
|
--device: the serial number to be passed to all adb commands.
|
|
--keep_test_server_ports: indicates it's being run as a shard, and shouldn't
|
|
reset test server port allocation.
|
|
"""
|
|
|
|
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import multiprocessing
|
|
import optparse
|
|
import pexpect
|
|
import pickle
|
|
import os
|
|
import signal
|
|
import shutil
|
|
import sys
|
|
|
|
from pylib import android_commands
|
|
from pylib import cmd_helper
|
|
from pylib import constants
|
|
from pylib import ports
|
|
|
|
|
|
_OUTPUT_DIR = os.path.join(constants.CHROME_DIR, 'out', 'step_results')
|
|
|
|
|
|
def _SaveResult(result):
|
|
with file(os.path.join(_OUTPUT_DIR, result['name']), 'w') as f:
|
|
f.write(pickle.dumps(result))
|
|
|
|
|
|
def _RunStepsPerDevice(steps):
|
|
results = []
|
|
for step in steps:
|
|
start_time = datetime.datetime.now()
|
|
print 'Starting %s: %s %s at %s' % (step['name'], step['cmd'],
|
|
start_time, step['device'])
|
|
output, exit_code = pexpect.run(
|
|
step['cmd'], cwd=os.path.abspath(constants.CHROME_DIR),
|
|
withexitstatus=True, logfile=sys.stdout, timeout=1800,
|
|
env=os.environ)
|
|
exit_code = exit_code or 0
|
|
end_time = datetime.datetime.now()
|
|
exit_msg = '%s %s' % (exit_code,
|
|
'(ignored, flaky step)' if step['is_flaky'] else '')
|
|
print 'Finished %s: %s %s %s at %s' % (step['name'], exit_msg, step['cmd'],
|
|
end_time, step['device'])
|
|
if step['is_flaky']:
|
|
exit_code = 0
|
|
result = {'name': step['name'],
|
|
'output': output,
|
|
'exit_code': exit_code,
|
|
'total_time': (end_time - start_time).seconds,
|
|
'device': step['device']}
|
|
_SaveResult(result)
|
|
results += [result]
|
|
return results
|
|
|
|
|
|
def _RunShardedSteps(steps, flaky_steps, devices):
|
|
assert steps
|
|
assert devices, 'No devices connected?'
|
|
if os.path.exists(_OUTPUT_DIR):
|
|
assert '/step_results' in _OUTPUT_DIR
|
|
shutil.rmtree(_OUTPUT_DIR)
|
|
if not os.path.exists(_OUTPUT_DIR):
|
|
os.makedirs(_OUTPUT_DIR)
|
|
step_names = sorted(steps.keys())
|
|
all_params = []
|
|
num_devices = len(devices)
|
|
shard_size = (len(steps) + num_devices - 1) / num_devices
|
|
for i, device in enumerate(devices):
|
|
steps_per_device = []
|
|
for s in steps.keys()[i * shard_size:(i + 1) * shard_size]:
|
|
steps_per_device += [{'name': s,
|
|
'device': device,
|
|
'is_flaky': s in flaky_steps,
|
|
'cmd': steps[s] + ' --device ' + device +
|
|
' --keep_test_server_ports'}]
|
|
all_params += [steps_per_device]
|
|
print 'Start sharding (note: output is not synchronized...)'
|
|
print '*' * 80
|
|
start_time = datetime.datetime.now()
|
|
pool = multiprocessing.Pool(processes=num_devices)
|
|
async_results = pool.map_async(_RunStepsPerDevice, all_params)
|
|
results_per_device = async_results.get(999999)
|
|
end_time = datetime.datetime.now()
|
|
print '*' * 80
|
|
print 'Finished sharding.'
|
|
print 'Summary'
|
|
total_time = 0
|
|
for results in results_per_device:
|
|
for result in results:
|
|
print('%s : exit_code=%d in %d secs at %s' %
|
|
(result['name'], result['exit_code'], result['total_time'],
|
|
result['device']))
|
|
total_time += result['total_time']
|
|
print 'Step time: %d secs' % ((end_time - start_time).seconds)
|
|
print 'Bots time: %d secs' % total_time
|
|
# No exit_code for the sharding step: the individual _PrintResults step
|
|
# will return the corresponding exit_code.
|
|
return 0
|
|
|
|
|
|
def _PrintStepOutput(step_name):
|
|
file_name = os.path.join(_OUTPUT_DIR, step_name)
|
|
if not os.path.exists(file_name):
|
|
print 'File not found ', file_name
|
|
return 1
|
|
with file(file_name, 'r') as f:
|
|
result = pickle.loads(f.read())
|
|
print result['output']
|
|
return result['exit_code']
|
|
|
|
|
|
def _KillPendingServers():
|
|
for retry in range(5):
|
|
for server in ['lighttpd', 'web-page-replay']:
|
|
pids = cmd_helper.GetCmdOutput(['pgrep', '-f', server])
|
|
pids = [pid.strip() for pid in pids.split('\n') if pid.strip()]
|
|
for pid in pids:
|
|
try:
|
|
logging.warning('Killing %s %s', server, pid)
|
|
os.kill(int(pid), signal.SIGQUIT)
|
|
except Exception as e:
|
|
logging.warning('Failed killing %s %s %s', server, pid, e)
|
|
|
|
|
|
def main(argv):
|
|
parser = optparse.OptionParser()
|
|
parser.add_option('-s', '--steps',
|
|
help='A JSON file containing all the steps to be '
|
|
'sharded.')
|
|
parser.add_option('--flaky_steps',
|
|
help='A JSON file containing steps that are flaky and '
|
|
'will have its exit code ignored.')
|
|
parser.add_option('-p', '--print_results',
|
|
help='Only prints the results for the previously '
|
|
'executed step, do not run it again.')
|
|
options, urls = parser.parse_args(argv)
|
|
if options.print_results:
|
|
return _PrintStepOutput(options.print_results)
|
|
|
|
# At this point, we should kill everything that may have been left over from
|
|
# previous runs.
|
|
_KillPendingServers()
|
|
|
|
# Reset the test port allocation. It's important to do it before starting
|
|
# to dispatch any step.
|
|
if not ports.ResetTestServerPortAllocation():
|
|
raise Exception('Failed to reset test server port.')
|
|
|
|
# Sort the devices so that we'll try to always run a step in the same device.
|
|
devices = sorted(android_commands.GetAttachedDevices())
|
|
if not devices:
|
|
print 'You must attach a device'
|
|
return 1
|
|
|
|
with file(options.steps, 'r') as f:
|
|
steps = json.load(f)
|
|
flaky_steps = []
|
|
if options.flaky_steps:
|
|
with file(options.flaky_steps, 'r') as f:
|
|
flaky_steps = json.load(f)
|
|
return _RunShardedSteps(steps, flaky_steps, devices)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main(sys.argv))
|