Bug 1619554 Improve performance of |mach try fuzzy| preview r=ahal

Remove use of requests module in preview pane

Reformat task duration data to avoid reprocessing in preview pane

Avoid loading task durations json more than once.

Increase required fzf version, use temporary file instead of arglist

Differential Revision: https://phabricator.services.mozilla.com/D65094

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Simon Fraser 2020-03-05 19:58:43 +00:00
Родитель 01d1683f4a
Коммит 54f21ea31e
5 изменённых файлов: 141 добавлений и 119 удалений

Просмотреть файл

@ -11,12 +11,11 @@ import sys
from mozboot.util import get_state_dir
from mozbuild.base import MozbuildObject
from mozversioncontrol import get_repository_object, MissingVCSExtension
from .util.estimates import (
duration_summary,
from .util.manage_estimates import (
download_task_history_data,
make_trimmed_taskgraph_cache
)
from .util.estimates import duration_summary
GIT_CINNABAR_NOT_FOUND = """
Could not detect `git-cinnabar`.

Просмотреть файл

@ -20,7 +20,7 @@ from mozterm import Terminal
from ..cli import BaseTryParser
from ..tasks import generate_tasks, filter_tasks_by_paths
from ..push import check_working_directory, push_to_try, generate_try_task_config
from ..util.estimates import download_task_history_data, make_trimmed_taskgraph_cache
from ..util.manage_estimates import download_task_history_data, make_trimmed_taskgraph_cache
terminal = Terminal()
@ -61,7 +61,7 @@ editor integrations, download the appropriate binary and put it on your $PATH:
FZF_VERSION_FAILED = """
Could not obtain the 'fzf' version.
The 'mach try fuzzy' command depends on fzf, and requires version > 0.18.0
The 'mach try fuzzy' command depends on fzf, and requires version > 0.20.0
for some of the features. Please install it following the appropriate
instructions for your platform:
@ -202,9 +202,9 @@ def should_force_fzf_update(fzf_bin):
# Some fzf versions have extra, e.g 0.18.0 (ff95134)
fzf_version = fzf_version.split()[0]
# 0.18.0 introduced FZF_PREVIEW_COLUMNS as an env variable
# in preview subprocesses, which is a feature we use.
if StrictVersion(fzf_version) < StrictVersion('0.18.0'):
# 0.20.0 introduced passing selections through a temporary file,
# which is good for large ctrl-a actions.
if StrictVersion(fzf_version) < StrictVersion('0.20.0'):
print("fzf version is old, forcing update.")
return True
return False
@ -341,12 +341,12 @@ def run(update=False, query=None, intersect_query=None, try_config=None, full=Fa
if show_estimates:
base_cmd.extend([
'--preview', 'python {} -g {} -s -c {} "{{+}}"'.format(
'--preview', 'python {} -g {} -s -c {} -t "{{+f}}"'.format(
PREVIEW_SCRIPT, dep_cache, cache_dir),
])
else:
base_cmd.extend([
'--preview', 'python {} "{{+}}"'.format(PREVIEW_SCRIPT),
'--preview', 'python {} -t "{{+f}}"'.format(PREVIEW_SCRIPT),
])
if exact:

Просмотреть файл

@ -12,16 +12,20 @@ import sys
here = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(here), 'util'))
from estimates import duration_summary, task_duration_data
from estimates import duration_summary
def process_args():
"""Process preview arguments."""
argparser = argparse.ArgumentParser()
argparser.add_argument('-s', '--show-estimates', action="store_true")
argparser.add_argument('-g', '--graph-cache', type=str, default=None)
argparser.add_argument('-c', '--cache_dir', type=str, default=None)
argparser.add_argument('tasklist', type=str)
argparser.add_argument('-s', '--show-estimates', action="store_true",
help="Show task duration estimates (default: False)")
argparser.add_argument('-g', '--graph-cache', type=str, default=None,
help="Filename of task graph dependencies")
argparser.add_argument('-c', '--cache_dir', type=str, default=None,
help="Path to cache directory containing task durations")
argparser.add_argument('-t', '--tasklist', type=str, default=None,
help="Path to temporary file containing the selected tasks")
return argparser.parse_args()
@ -30,9 +34,10 @@ def plain_display(tasklist):
print("\n".join(sorted(s.strip("'") for s in tasklist.split())))
def duration_display(graph_cache_file, tasklist, cache_dir):
def duration_display(graph_cache_file, taskfile, cache_dir):
"""Preview window display with task durations + metadata."""
tasklist = [t.strip("'") for t in tasklist.split()]
with open(taskfile, "r") as f:
tasklist = [line.strip() for line in f]
durations = duration_summary(graph_cache_file, tasklist, cache_dir)
output = ""
@ -51,10 +56,9 @@ def duration_display(graph_cache_file, tasklist, cache_dir):
durations["eta_datetime"].strftime("%H:%M"))
duration_width = 5 # show five numbers at most.
task_durations = task_duration_data(cache_dir)
output += "{:>{width}}\n".format("Duration", width=max_columns)
for task in tasklist:
duration = int(task_durations.get(task, 0.0))
duration = durations["task_durations"].get(task, 0.0)
output += "{:{align}{width}} {:{nalign}{nwidth}}s\n".format(
task,
duration,

Просмотреть файл

@ -5,111 +5,14 @@
from __future__ import absolute_import, print_function
import os
import requests
import json
from datetime import datetime, timedelta
TASK_DURATION_URL = 'https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json'
GRAPH_QUANTILES_URL = 'https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv'
TASK_DURATION_CACHE = 'task_duration_history.json'
GRAPH_QUANTILE_CACHE = 'graph_quantile_cache.csv'
TASK_DURATION_TAG_FILE = 'task_duration_tag.json'
def check_downloaded_history(tag_file, duration_cache, quantile_cache):
if not os.path.isfile(tag_file):
return False
try:
with open(tag_file) as f:
duration_tags = json.load(f)
download_date = datetime.strptime(duration_tags.get('download_date'), '%Y-%M-%d')
if download_date < datetime.now() - timedelta(days=30):
return False
except (IOError, ValueError):
return False
if not os.path.isfile(duration_cache):
return False
if not os.path.isfile(quantile_cache):
return False
return True
def download_task_history_data(cache_dir):
"""Fetch task duration data exported from BigQuery."""
task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE)
task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE)
graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)
if check_downloaded_history(task_duration_tag_file, task_duration_cache, graph_quantile_cache):
return
try:
os.unlink(task_duration_tag_file)
os.unlink(task_duration_cache)
os.unlink(graph_quantile_cache)
except OSError:
print("No existing task history to clean up.")
try:
r = requests.get(TASK_DURATION_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the durations just won't be in the preview window.
print("Error fetching task duration cache from {}: {}".format(TASK_DURATION_URL, exc))
return
# The data retrieved from google storage is a newline-separated
# list of json entries, which Python's json module can't parse.
duration_data = list()
for line in r.content.splitlines():
duration_data.append(json.loads(line))
with open(task_duration_cache, 'w') as f:
json.dump(duration_data, f, indent=4)
try:
r = requests.get(GRAPH_QUANTILES_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the percentile just won't be in the preview window.
print("Error fetching task group percentiles from {}: {}".format(GRAPH_QUANTILES_URL, exc))
return
with open(graph_quantile_cache, 'w') as f:
f.write(r.content)
with open(task_duration_tag_file, 'w') as f:
json.dump({
'download_date': datetime.now().strftime('%Y-%m-%d')
}, f, indent=4)
def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None):
"""Trim the taskgraph cache used for dependencies.
Speeds up the fzf preview window to less human-perceptible
ranges."""
if not os.path.isfile(graph_cache):
return
target_task_set = set()
if target_file:
with open(target_file) as f:
target_task_set = set(json.load(f).keys())
with open(graph_cache) as f:
graph = json.load(f)
graph = {
name: list(defn['dependencies'].values())
for name, defn in graph.items()
if name in target_task_set
}
with open(dep_cache, 'w') as f:
json.dump(graph, f, indent=4)
def find_all_dependencies(graph, tasklist):
all_dependencies = dict()
@ -176,8 +79,7 @@ def determine_quantile(quantiles_file, duration):
def task_duration_data(cache_dir):
with open(os.path.join(cache_dir, TASK_DURATION_CACHE)) as f:
durations = json.load(f)
return {d['name']: d['mean_duration_seconds'] for d in durations}
return json.load(f)
def duration_summary(graph_cache_file, tasklist, cache_dir):
@ -217,6 +119,7 @@ def duration_summary(graph_cache_file, tasklist, cache_dir):
output["wall_duration_seconds"] = timedelta(seconds=int(longest_path))
output["eta_datetime"] = datetime.now()+timedelta(seconds=longest_path)
# (datetime.now()+timedelta(seconds=longest_path)).strftime("%H:%M")
output["task_durations"] = {task: int(durations.get(task, 0.0)) for task in tasklist}
return output

Просмотреть файл

@ -0,0 +1,116 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function
import os
import requests
import json
from datetime import datetime, timedelta
TASK_DURATION_URL = 'https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json'
GRAPH_QUANTILES_URL = 'https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv'
from .estimates import TASK_DURATION_CACHE, GRAPH_QUANTILE_CACHE, TASK_DURATION_TAG_FILE
def check_downloaded_history(tag_file, duration_cache, quantile_cache):
if not os.path.isfile(tag_file):
return False
try:
with open(tag_file) as f:
duration_tags = json.load(f)
download_date = datetime.strptime(duration_tags.get('download_date'), '%Y-%M-%d')
if download_date < datetime.now() - timedelta(days=7):
return False
except (IOError, ValueError):
return False
if not os.path.isfile(duration_cache):
return False
# Check for old format version of file.
with open(duration_cache) as f:
data = json.load(f)
if isinstance(data, list):
return False
if not os.path.isfile(quantile_cache):
return False
return True
def download_task_history_data(cache_dir):
"""Fetch task duration data exported from BigQuery."""
task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE)
task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE)
graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)
if check_downloaded_history(task_duration_tag_file, task_duration_cache, graph_quantile_cache):
return
try:
os.unlink(task_duration_tag_file)
os.unlink(task_duration_cache)
os.unlink(graph_quantile_cache)
except OSError:
print("No existing task history to clean up.")
try:
r = requests.get(TASK_DURATION_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the durations just won't be in the preview window.
print("Error fetching task duration cache from {}: {}".format(TASK_DURATION_URL, exc))
return
# The data retrieved from google storage is a newline-separated
# list of json entries, which Python's json module can't parse.
duration_data = list()
for line in r.content.splitlines():
duration_data.append(json.loads(line))
# Reformat duration data to avoid list of dicts, as this is slow in the preview window
duration_data = {d['name']: d['mean_duration_seconds'] for d in duration_data}
with open(task_duration_cache, 'w') as f:
json.dump(duration_data, f, indent=4)
try:
r = requests.get(GRAPH_QUANTILES_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the percentile just won't be in the preview window.
print("Error fetching task group percentiles from {}: {}".format(GRAPH_QUANTILES_URL, exc))
return
with open(graph_quantile_cache, 'w') as f:
f.write(r.content)
with open(task_duration_tag_file, 'w') as f:
json.dump({
'download_date': datetime.now().strftime('%Y-%m-%d')
}, f, indent=4)
def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None):
"""Trim the taskgraph cache used for dependencies.
Speeds up the fzf preview window to less human-perceptible
ranges."""
if not os.path.isfile(graph_cache):
return
target_task_set = set()
if target_file:
with open(target_file) as f:
target_task_set = set(json.load(f).keys())
with open(graph_cache) as f:
graph = json.load(f)
graph = {
name: list(defn['dependencies'].values())
for name, defn in graph.items()
if name in target_task_set
}
with open(dep_cache, 'w') as f:
json.dump(graph, f, indent=4)