2019-12-11 17:37:28 +03:00
|
|
|
#!/usr/bin/env python3
|
2017-04-17 12:24:54 +03:00
|
|
|
|
2019-09-17 14:16:32 +03:00
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
# or more contributor license agreements. See the NOTICE file
|
|
|
|
# distributed with this work for additional information
|
|
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
|
|
# to you under the Apache License, Version 2.0 (the
|
|
|
|
# "License"); you may not use this file except in compliance
|
|
|
|
# with the License. You may obtain a copy of the License at
|
2017-04-17 12:24:54 +03:00
|
|
|
#
|
2019-09-17 14:16:32 +03:00
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
2017-04-17 12:24:54 +03:00
|
|
|
#
|
2019-09-17 14:16:32 +03:00
|
|
|
# Unless required by applicable law or agreed to in writing,
|
|
|
|
# software distributed under the License is distributed on an
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
# KIND, either express or implied. See the License for the
|
|
|
|
# specific language governing permissions and limitations
|
|
|
|
# under the License.
|
2017-04-17 12:24:54 +03:00
|
|
|
|
|
|
|
# This tool is based on the Spark merge_spark_pr script:
|
|
|
|
# https://github.com/apache/spark/blob/master/dev/merge_spark_pr.py
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
from collections import defaultdict, Counter
|
2017-04-17 12:24:54 +03:00
|
|
|
|
2019-08-29 05:31:56 +03:00
|
|
|
import jira
|
2019-12-11 17:37:28 +03:00
|
|
|
import re
|
|
|
|
import sys
|
2019-08-29 05:31:56 +03:00
|
|
|
|
2017-04-17 12:24:54 +03:00
|
|
|
PROJECT = "AIRFLOW"
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
import click
|
|
|
|
except ImportError:
|
|
|
|
print("Could not find the click library. Run 'sudo pip install click' to install.")
|
|
|
|
sys.exit(-1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
import git
|
|
|
|
except ImportError:
|
|
|
|
print("Could not import git. Run 'sudo pip install gitpython' to install")
|
|
|
|
sys.exit(-1)
|
|
|
|
|
|
|
|
JIRA_BASE = "https://issues.apache.org/jira/browse"
|
|
|
|
JIRA_API_BASE = "https://issues.apache.org/jira"
|
|
|
|
|
|
|
|
GIT_COMMIT_FIELDS = ['id', 'author_name', 'author_email', 'date', 'subject', 'body']
|
2019-03-08 17:14:24 +03:00
|
|
|
GIT_LOG_FORMAT = '%x1f'.join(['%h', '%an', '%ae', '%ad', '%s', '%b']) + '%x1e'
|
|
|
|
|
|
|
|
STATUS_COLUR_MAP = {
|
|
|
|
'Resolved': 'green',
|
|
|
|
'Open': 'red',
|
|
|
|
'Closed': 'yellow'
|
|
|
|
}
|
2017-04-17 12:24:54 +03:00
|
|
|
|
|
|
|
|
|
|
|
def get_jiras_for_version(version):
|
2018-09-04 12:26:17 +03:00
|
|
|
asf_jira = jira.client.JIRA({'server': JIRA_API_BASE})
|
2017-04-17 12:24:54 +03:00
|
|
|
|
2018-12-15 14:50:22 +03:00
|
|
|
start_at = 0
|
|
|
|
page_size = 50
|
|
|
|
while True:
|
|
|
|
results = asf_jira.search_issues(
|
2019-12-11 17:37:28 +03:00
|
|
|
'''PROJECT={} and fixVersion={} AND
|
|
|
|
(Resolution IS NULL OR Resolution NOT IN (Duplicate, "Cannot Reproduce"))
|
|
|
|
ORDER BY updated DESC
|
|
|
|
'''.format(PROJECT, version),
|
2018-12-15 14:50:22 +03:00
|
|
|
maxResults=page_size,
|
|
|
|
startAt=start_at,
|
|
|
|
)
|
|
|
|
|
|
|
|
for r in results:
|
|
|
|
yield r
|
|
|
|
|
|
|
|
if len(results) < page_size:
|
|
|
|
break
|
|
|
|
|
|
|
|
start_at += page_size
|
2017-04-17 12:24:54 +03:00
|
|
|
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
issue_re = re.compile(r".*? (AIRFLOW[- ][0-9]{1,6}) (?: \]|\s|: )", flags=re.X | re.I)
|
|
|
|
issue_id_re = re.compile(r"AIRFLOW[- ]([0-9]{1,6})", flags=re.I)
|
2019-03-08 17:14:24 +03:00
|
|
|
pr_re = re.compile(r"(.*)Closes (#[0-9]{1,6})", flags=re.DOTALL)
|
|
|
|
pr_title_re = re.compile(r".*\((#[0-9]{1,6})\)$")
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
def _process_git_log_item(log_item):
|
|
|
|
match = pr_title_re.match(log_item['subject'])
|
|
|
|
if match:
|
|
|
|
log_item['pull_request'] = match.group(1)
|
|
|
|
elif 'body' in log_item:
|
|
|
|
match = pr_re.match(log_item['body'])
|
|
|
|
if match:
|
|
|
|
log_item['pull_request'] = match.group(2)
|
|
|
|
else:
|
|
|
|
log_item['pull_request'] = '-'
|
|
|
|
else:
|
|
|
|
log_item['pull_request'] = '-'
|
|
|
|
|
|
|
|
def normalize_issue_id(issue_id):
|
|
|
|
return "AIRFLOW-" + issue_id_re.match(issue_id)[1]
|
2019-03-08 17:14:24 +03:00
|
|
|
|
|
|
|
def get_merged_issues(repo, version, previous_version=None):
|
|
|
|
log_args = ['--format={}'.format(GIT_LOG_FORMAT)]
|
|
|
|
if previous_version:
|
|
|
|
log_args.append(previous_version + "..")
|
|
|
|
log = repo.git.log(*log_args)
|
|
|
|
|
2017-04-17 12:24:54 +03:00
|
|
|
log = log.strip('\n\x1e').split("\x1e")
|
|
|
|
log = [row.strip().split("\x1f") for row in log]
|
|
|
|
log = [dict(zip(GIT_COMMIT_FIELDS, row)) for row in log]
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
merges = defaultdict(list)
|
2017-04-17 12:24:54 +03:00
|
|
|
for log_item in log:
|
|
|
|
issue_id = None
|
2019-03-08 17:14:24 +03:00
|
|
|
issue_ids = issue_re.findall(log_item['subject'])
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
_process_git_log_item(log_item)
|
|
|
|
|
|
|
|
log_item['merged'] = True
|
2017-04-17 12:24:54 +03:00
|
|
|
|
2019-03-08 17:14:24 +03:00
|
|
|
for issue_id in issue_ids:
|
2019-12-11 17:37:28 +03:00
|
|
|
merges[normalize_issue_id(issue_id)].append(log_item)
|
2017-04-17 12:24:54 +03:00
|
|
|
|
|
|
|
return merges
|
|
|
|
|
2018-09-04 12:26:17 +03:00
|
|
|
|
2019-03-08 17:14:24 +03:00
|
|
|
def get_commits_from_master(repo, issue):
|
|
|
|
log = repo.git.log(
|
2019-12-11 17:37:28 +03:00
|
|
|
'--format={}'.format(GIT_LOG_FORMAT),
|
2019-03-08 17:14:24 +03:00
|
|
|
'--grep',
|
|
|
|
r'.*{issue}\(\]\|:\|\s\)'.format(issue=issue.key),
|
|
|
|
'origin/master')
|
|
|
|
if not log:
|
2019-12-11 17:37:28 +03:00
|
|
|
return []
|
2019-03-08 17:14:24 +03:00
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
log = log.strip('\n\x1e').split("\x1e")
|
|
|
|
log = [row.strip().split("\x1f") for row in log]
|
|
|
|
log = [dict(zip(GIT_COMMIT_FIELDS, row)) for row in log]
|
|
|
|
merges = []
|
|
|
|
for log_item in log:
|
|
|
|
_process_git_log_item(log_item)
|
|
|
|
log_item['merged'] = False
|
|
|
|
# We may have pulled up a message from not the subject
|
|
|
|
if issue.key in issue_re.findall(log_item['subject']):
|
|
|
|
merges.append(log_item)
|
2019-03-08 17:14:24 +03:00
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
return merges
|
2019-03-08 17:14:24 +03:00
|
|
|
|
|
|
|
|
2017-04-17 12:24:54 +03:00
|
|
|
@click.group()
|
|
|
|
def cli():
|
|
|
|
r"""
|
|
|
|
This tool should be used by Airflow Release Manager to verify what Jira's
|
|
|
|
were merged in the current working branch.
|
|
|
|
|
|
|
|
airflow-jira compare <target_version>
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
@cli.command(short_help='Compare a jira target version against git merges')
|
|
|
|
@click.argument('target_version', default=None)
|
2019-12-11 17:37:28 +03:00
|
|
|
@click.option('--previous-version',
|
|
|
|
'previous_version',
|
|
|
|
help="Specify the previous tag on the working branch to limit"
|
|
|
|
" searching for few commits to find the cherry-picked commits")
|
2019-03-08 17:14:24 +03:00
|
|
|
@click.option('--unmerged', 'unmerged_only', help="Show unmerged issues only", is_flag=True)
|
|
|
|
def compare(target_version, previous_version=None, unmerged_only=False):
|
|
|
|
repo = git.Repo(".", search_parent_directories=True)
|
2019-12-11 17:37:28 +03:00
|
|
|
# Get a list of issues/PRs that have been commited on the current branch.
|
|
|
|
branch_merges = get_merged_issues(repo, target_version, previous_version)
|
|
|
|
|
2017-04-17 12:24:54 +03:00
|
|
|
issues = get_jiras_for_version(target_version)
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
num_merged = 0
|
|
|
|
num_unmerged = Counter()
|
|
|
|
|
2018-09-04 12:26:17 +03:00
|
|
|
# :<18 says left align, pad to 18
|
|
|
|
# :<50.50 truncates after 50 chars
|
|
|
|
# !s forces as string - some of the Jira objects have a string method, but
|
|
|
|
# Py3 doesn't call by default
|
2019-03-08 17:14:24 +03:00
|
|
|
formatstr = "{id:<18}|{typ!s:<12}||{priority!s:<10}||{status!s}|" \
|
2019-12-11 17:37:28 +03:00
|
|
|
"{description:<83.83}|{merged:<6}|{pr:<6}|{commit:>9}"
|
2018-09-04 12:26:17 +03:00
|
|
|
|
|
|
|
print(formatstr.format(
|
|
|
|
id="ISSUE ID",
|
|
|
|
typ="TYPE",
|
|
|
|
priority="PRIORITY",
|
2019-03-08 17:14:24 +03:00
|
|
|
status="STATUS".ljust(10),
|
2018-09-04 12:26:17 +03:00
|
|
|
description="DESCRIPTION",
|
|
|
|
merged="MERGED",
|
|
|
|
pr="PR",
|
|
|
|
commit="COMMIT"))
|
2017-04-17 12:24:54 +03:00
|
|
|
|
|
|
|
for issue in issues:
|
2019-03-08 17:14:24 +03:00
|
|
|
|
|
|
|
# Put colour on the status field. Since it will have non-printable
|
|
|
|
# characters we can't us string format to limit the length
|
|
|
|
status = issue.fields.status.name
|
|
|
|
if status in STATUS_COLUR_MAP:
|
|
|
|
status = click.style(status[:10].ljust(10), STATUS_COLUR_MAP[status])
|
|
|
|
else:
|
|
|
|
status = status[:10].ljust(10)
|
|
|
|
|
2019-12-11 17:37:28 +03:00
|
|
|
# Find the merges in master targeting this issue
|
|
|
|
master_merges = get_commits_from_master(repo, issue)
|
|
|
|
|
|
|
|
on_branch = {
|
|
|
|
m['pull_request']: m
|
|
|
|
for m in branch_merges.get(issue.key, [])
|
|
|
|
}
|
|
|
|
|
|
|
|
def print_merge_info(merge, printed_desc):
|
|
|
|
nonlocal num_merged
|
|
|
|
|
|
|
|
is_merged = merge['merged']
|
|
|
|
|
|
|
|
if is_merged:
|
|
|
|
num_merged += 1
|
|
|
|
if unmerged_only:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
num_unmerged[issue.fields.status.name] += 1
|
|
|
|
|
|
|
|
if not printed_desc:
|
|
|
|
# Only print info on first line for each issue
|
|
|
|
fields = dict(
|
|
|
|
id=issue.key,
|
|
|
|
typ=issue.fields.issuetype,
|
|
|
|
priority=issue.fields.priority,
|
|
|
|
status=status,
|
|
|
|
description=issue.fields.summary,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
fields = dict(
|
|
|
|
id=issue.key,
|
|
|
|
typ="",
|
|
|
|
priority="",
|
|
|
|
status=" " * 10,
|
|
|
|
description="",
|
|
|
|
)
|
|
|
|
print(formatstr.format(
|
|
|
|
**fields,
|
|
|
|
merged=is_merged,
|
|
|
|
pr=merge['pull_request'],
|
|
|
|
commit=merge['id']))
|
|
|
|
return True
|
|
|
|
|
|
|
|
printed_desc = False
|
|
|
|
for merge in master_merges:
|
|
|
|
if merge['pull_request'] in on_branch:
|
|
|
|
merge = on_branch[merge['pull_request']]
|
|
|
|
|
|
|
|
printed_desc = print_merge_info(merge, printed_desc)
|
|
|
|
|
|
|
|
if not master_merges:
|
|
|
|
if on_branch:
|
|
|
|
for merge in branch_merges.get(issue.key):
|
|
|
|
printed_desc = print_merge_info(merge, printed_desc)
|
|
|
|
else:
|
|
|
|
# No merges, issue likely still open
|
|
|
|
print_merge_info({
|
|
|
|
'merged': 0,
|
|
|
|
'pull_request': '-',
|
|
|
|
'id': '-',
|
|
|
|
}, printed_desc)
|
|
|
|
|
|
|
|
print("Commits on branch: {0:d}, {1:d} ({2}) yet to be cherry-picked".format(num_merged, sum(num_unmerged.values()), dict(num_unmerged)))
|
|
|
|
|
|
|
|
|
|
|
|
@cli.command(short_help='Build a CHANGELOG grouped by Jira Issue type')
|
|
|
|
@click.argument('previous_version')
|
|
|
|
@click.argument('target_version')
|
|
|
|
def changelog(previous_version, target_version):
|
|
|
|
repo = git.Repo(".", search_parent_directories=True)
|
|
|
|
# Get a list of issues/PRs that have been commited on the current branch.
|
|
|
|
log_args = ['--format={}'.format(GIT_LOG_FORMAT), previous_version + ".." + target_version]
|
|
|
|
log = repo.git.log(*log_args)
|
|
|
|
|
|
|
|
log = log.strip('\n\x1e').split("\x1e")
|
|
|
|
log = [row.strip().split("\x1f") for row in log]
|
|
|
|
log = [dict(zip(GIT_COMMIT_FIELDS, row)) for row in log]
|
|
|
|
|
|
|
|
asf_jira = jira.client.JIRA({'server': JIRA_API_BASE})
|
|
|
|
|
|
|
|
sections = defaultdict(list)
|
|
|
|
|
|
|
|
batch = []
|
|
|
|
|
|
|
|
def process_batch(batch):
|
|
|
|
page_size = 50
|
|
|
|
results = {i.key: i for i in asf_jira.search_issues(
|
|
|
|
'PROJECT={} AND key IN ({})'.format(
|
|
|
|
PROJECT,
|
|
|
|
','.join(key for key, _ in batch)),
|
|
|
|
maxResults=page_size,
|
|
|
|
)}
|
|
|
|
|
|
|
|
for key, subject in batch:
|
|
|
|
sections[results[key].fields.issuetype.name].append(subject)
|
|
|
|
|
|
|
|
for commit in log:
|
|
|
|
tickets = issue_re.findall(commit['subject'])
|
|
|
|
|
|
|
|
if not tickets or 'AIRFLOW-XXX' in tickets:
|
|
|
|
# TODO: Guess by files changed?
|
|
|
|
sections['uncategorized'].append(commit['subject'])
|
|
|
|
|
|
|
|
else:
|
|
|
|
# The Jira API is kinda slow, so ask for 50 issues at a time.
|
|
|
|
batch.append((normalize_issue_id(tickets[0]), commit['subject']))
|
|
|
|
|
|
|
|
if len(batch) == 50:
|
|
|
|
process_batch(batch)
|
|
|
|
batch = []
|
|
|
|
|
|
|
|
for section, lines in sections.items():
|
|
|
|
print(section)
|
|
|
|
print('"' * len(section))
|
|
|
|
for line in lines:
|
|
|
|
print('-', line)
|
|
|
|
print()
|
2017-04-17 12:24:54 +03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import doctest
|
|
|
|
(failure_count, test_count) = doctest.testmod()
|
|
|
|
if failure_count:
|
|
|
|
exit(-1)
|
|
|
|
try:
|
|
|
|
cli()
|
2019-03-08 17:14:24 +03:00
|
|
|
except Exception:
|
2017-04-17 12:24:54 +03:00
|
|
|
raise
|