Move upstream operations to class
This commit is contained in:
Родитель
ae8dde7186
Коммит
393a70ed47
|
@ -11,7 +11,7 @@ from comma.cli.parser import parse_args
|
|||
from comma.database.driver import DatabaseDriver
|
||||
from comma.database.model import Distros, MonitoringSubjects
|
||||
from comma.downstream import Downstream
|
||||
from comma.upstream import process_commits
|
||||
from comma.upstream import Upstream
|
||||
from comma.util import config
|
||||
from comma.util.spreadsheet import export_commits, import_commits, update_commits
|
||||
from comma.util.symbols import get_missing_commits
|
||||
|
@ -39,7 +39,7 @@ def run(options):
|
|||
|
||||
if options.upstream:
|
||||
LOGGER.info("Begin monitoring upstream")
|
||||
process_commits(add_to_database=True, since=config.since)
|
||||
Upstream(config, DatabaseDriver()).process_commits()
|
||||
LOGGER.info("Finishing monitoring upstream")
|
||||
|
||||
if options.downstream:
|
||||
|
|
|
@ -4,10 +4,18 @@
|
|||
ORM models for database objects
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import git
|
||||
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from comma.util import format_diffs
|
||||
from comma.util.tracking import get_filenames
|
||||
|
||||
|
||||
IGNORED_IN_CMSG = "reported-by:", "signed-off-by:", "reviewed-by:", "acked-by:", "cc:"
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
@ -46,6 +54,46 @@ class PatchData(Base):
|
|||
lazy="dynamic",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create(cls, commit: git.Commit, paths) -> "PatchData":
|
||||
"""
|
||||
Create patch object from a commit object
|
||||
"""
|
||||
|
||||
patch = cls(
|
||||
commitID=commit.hexsha,
|
||||
author=commit.author.name,
|
||||
authorEmail=commit.author.email,
|
||||
authorTime=datetime.utcfromtimestamp(commit.authored_date),
|
||||
commitTime=datetime.utcfromtimestamp(commit.committed_date),
|
||||
)
|
||||
|
||||
description = []
|
||||
fixed_patches = []
|
||||
for num, line in enumerate(commit.message.splitlines()):
|
||||
line = line.strip() # pylint: disable=redefined-loop-name
|
||||
if not num:
|
||||
patch.subject = line
|
||||
continue
|
||||
|
||||
if line.lower().startswith(IGNORED_IN_CMSG):
|
||||
continue
|
||||
|
||||
description.append(line)
|
||||
|
||||
# Check if this patch fixes other patches
|
||||
if line.lower().startswith("fixes:"):
|
||||
words = line.split(" ")
|
||||
if len(words) > 1:
|
||||
fixed_patches.append(words[1])
|
||||
|
||||
patch.description = "\n".join(description)
|
||||
patch.fixedPatches = " ".join(fixed_patches) # e.g. "SHA1 SHA2 SHA3"
|
||||
patch.affectedFilenames = " ".join(get_filenames(commit))
|
||||
patch.commitDiffs = format_diffs(commit, paths)
|
||||
|
||||
return patch
|
||||
|
||||
|
||||
class PatchDataMeta(Base):
|
||||
"""
|
||||
|
|
|
@ -15,7 +15,6 @@ from comma.database.model import (
|
|||
PatchData,
|
||||
)
|
||||
from comma.downstream.matcher import patch_matches
|
||||
from comma.upstream import process_commits
|
||||
from comma.util.tracking import get_linux_repo
|
||||
|
||||
|
||||
|
@ -186,6 +185,8 @@ class Downstream:
|
|||
Attempt to determine which patches are missing from a list of missing cherries
|
||||
"""
|
||||
|
||||
paths = self.repo.get_tracked_paths(self.config.sections)
|
||||
|
||||
with self.database.get_session() as session:
|
||||
patches = (
|
||||
session.query(PatchData)
|
||||
|
@ -199,7 +200,19 @@ class Downstream:
|
|||
|
||||
# Get the downstream commits for this revision (these are distinct from upstream because
|
||||
# they have been cherry-picked). This is slow but necessary!
|
||||
downstream_patches = process_commits(revision=reference, since=earliest_commit_date)
|
||||
|
||||
LOGGER.info("Determining downstream commits from tracked files")
|
||||
# We use `--min-parents=1 --max-parents=1` to avoid both merges and graft commits
|
||||
downstream_patches = tuple(
|
||||
PatchData.create(commit, paths)
|
||||
for commit in self.repo.iter_commits(
|
||||
rev=reference,
|
||||
paths=paths,
|
||||
min_parents=1,
|
||||
max_parents=1,
|
||||
since=earliest_commit_date,
|
||||
)
|
||||
)
|
||||
|
||||
# Double check the missing cherries using our fuzzy algorithm.
|
||||
LOGGER.info("Starting confidence matching for %d upstream patches...", len(patches))
|
||||
|
|
|
@ -6,7 +6,7 @@ Functions to compare commits for similarities
|
|||
|
||||
import logging
|
||||
import os
|
||||
from typing import Iterable, List
|
||||
from typing import Iterable
|
||||
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
|
@ -64,7 +64,7 @@ def calculate_filenames_confidence(
|
|||
return total_filepaths_match / len(upstream_filepaths)
|
||||
|
||||
|
||||
def patch_matches(downstream_patches: List[PatchData], upstream: PatchData) -> bool:
|
||||
def patch_matches(downstream_patches: Iterable[PatchData], upstream: PatchData) -> bool:
|
||||
"""Check if 'upstream' has an equivalent in 'downstream_patches'."""
|
||||
|
||||
# Preprocessing for matching filenames
|
||||
|
|
|
@ -5,144 +5,59 @@ Functions for parsing commit objects into patch objects
|
|||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Set
|
||||
from functools import cached_property
|
||||
|
||||
from comma.database.driver import DatabaseDriver
|
||||
from comma.database.model import PatchData
|
||||
from comma.util import config
|
||||
from comma.util.tracking import get_filenames, get_linux_repo
|
||||
from comma.util.tracking import get_linux_repo
|
||||
|
||||
|
||||
IGNORED_FIELDS = "reported-by:", "signed-off-by:", "reviewed-by:", "acked-by:", "cc:"
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def format_diffs(commit, paths):
|
||||
class Upstream:
|
||||
"""
|
||||
Format diffs from commit object into string
|
||||
Parent object for downstream operations
|
||||
"""
|
||||
|
||||
diffs = []
|
||||
# We are ignoring merges so all commits should have a single parent
|
||||
for diff in commit.tree.diff(commit.parents[0], paths=paths, create_patch=True):
|
||||
if diff.a_path is not None:
|
||||
# The patch commit diffs are stored as "(filename1)\n(diff1)\n(filename2)\n(diff2)..."
|
||||
lines = "\n".join(
|
||||
line
|
||||
for line in diff.diff.decode("utf-8").splitlines()
|
||||
if line.startswith(("+", "-"))
|
||||
)
|
||||
diffs.append(f"{diff.a_path}\n{lines}")
|
||||
def __init__(self, config, database) -> None:
|
||||
self.config = config
|
||||
self.database = database
|
||||
|
||||
return "\n".join(diffs)
|
||||
@cached_property
|
||||
def repo(self):
|
||||
"""
|
||||
Get repo when first accessed
|
||||
"""
|
||||
return get_linux_repo(since=self.config.since)
|
||||
|
||||
def process_commits(self):
|
||||
"""
|
||||
Generate patches for commits affecting tracked paths
|
||||
"""
|
||||
|
||||
def create_patch(commit, paths) -> PatchData:
|
||||
"""
|
||||
Create patch object from a commit object
|
||||
"""
|
||||
paths = self.repo.get_tracked_paths(self.config.sections)
|
||||
added = 0
|
||||
total = 0
|
||||
|
||||
patch: PatchData = PatchData(
|
||||
commitID=commit.hexsha,
|
||||
author=commit.author.name,
|
||||
authorEmail=commit.author.email,
|
||||
authorTime=datetime.utcfromtimestamp(commit.authored_date),
|
||||
commitTime=datetime.utcfromtimestamp(commit.committed_date),
|
||||
)
|
||||
|
||||
description = []
|
||||
fixed_patches = []
|
||||
for num, line in enumerate(commit.message.splitlines()):
|
||||
line = line.strip() # pylint: disable=redefined-loop-name
|
||||
if not num:
|
||||
patch.subject = line
|
||||
continue
|
||||
|
||||
if line.lower().startswith(IGNORED_FIELDS):
|
||||
continue
|
||||
|
||||
description.append(line)
|
||||
|
||||
# Check if this patch fixes other patches
|
||||
if line.lower().startswith("fixes:"):
|
||||
words = line.split(" ")
|
||||
if len(words) > 1:
|
||||
fixed_patches.append(words[1])
|
||||
|
||||
patch.description = "\n".join(description)
|
||||
patch.fixedPatches = " ".join(fixed_patches) # e.g. "SHA1 SHA2 SHA3"
|
||||
patch.affectedFilenames = " ".join(get_filenames(commit))
|
||||
patch.commitDiffs = format_diffs(commit, paths)
|
||||
|
||||
return patch
|
||||
|
||||
|
||||
def process_commits(
|
||||
commit_ids: Optional[Set[str]] = None,
|
||||
revision: str = "origin/master",
|
||||
add_to_database: bool = False,
|
||||
since: str = config.since,
|
||||
) -> List[PatchData]:
|
||||
"""
|
||||
Look at all commits in the given repo and handle based on distro.
|
||||
|
||||
commit_ids: Set of commits to process
|
||||
revision: revision we want to see the commits of, or None
|
||||
add_to_database: whether or not to add to database (side-effect)
|
||||
since: if provided, will only process commits after this commit
|
||||
"""
|
||||
|
||||
repo = get_linux_repo(since=since)
|
||||
paths = repo.get_tracked_paths(config.sections)
|
||||
|
||||
if commit_ids is None:
|
||||
# We use `--min-parents=1 --max-parents=1` to avoid both merges and graft commits.
|
||||
LOGGER.info("Determining commits from tracked files")
|
||||
commits = repo.iter_commits(
|
||||
rev=revision,
|
||||
LOGGER.info("Determining upstream commits from tracked files")
|
||||
for commit in self.repo.iter_commits(
|
||||
rev="origin/master",
|
||||
paths=paths,
|
||||
min_parents=1,
|
||||
max_parents=1,
|
||||
since=since,
|
||||
)
|
||||
else:
|
||||
# If given a set of commit SHAs, get the commit objects.
|
||||
commits = []
|
||||
for id_ in commit_ids:
|
||||
try:
|
||||
commits.append(repo.commit(id_))
|
||||
except ValueError:
|
||||
LOGGER.warning("Commit '%s' does not exist in the repo! Skipping...", id_)
|
||||
since=self.config.since,
|
||||
):
|
||||
total += 1
|
||||
with self.database.get_session() as session:
|
||||
# See if commit ID is in database
|
||||
if (
|
||||
session.query(PatchData.commitID)
|
||||
.filter_by(commitID=commit.hexsha)
|
||||
.one_or_none()
|
||||
is None
|
||||
):
|
||||
session.add(PatchData.create(commit, paths))
|
||||
added += 1
|
||||
|
||||
LOGGER.info("Starting commit processing...")
|
||||
|
||||
all_patches = []
|
||||
num_patches_added = 0
|
||||
for num, commit in enumerate(commits, 1):
|
||||
# Skip root (initial) commit since it should always be present
|
||||
# TODO (Issue 54): This can probably be removed
|
||||
if commit.parents:
|
||||
LOGGER.debug("Parsing commit %s", commit.hexsha)
|
||||
patch: PatchData = create_patch(commit, paths)
|
||||
|
||||
if add_to_database:
|
||||
with DatabaseDriver.get_session() as session:
|
||||
if (
|
||||
session.query(PatchData.commitID)
|
||||
.filter_by(commitID=patch.commitID)
|
||||
.one_or_none()
|
||||
is None
|
||||
):
|
||||
session.add(patch)
|
||||
num_patches_added += 1
|
||||
else:
|
||||
all_patches.append(patch)
|
||||
|
||||
if not num % 250:
|
||||
LOGGER.debug(" %d commits processed...", num)
|
||||
|
||||
if add_to_database:
|
||||
LOGGER.info("%d patches added to database.", num_patches_added)
|
||||
|
||||
return all_patches
|
||||
LOGGER.info("%d of %d patches added to database.", added, total)
|
||||
|
|
|
@ -5,6 +5,26 @@ Utility functions and classes
|
|||
"""
|
||||
|
||||
|
||||
def format_diffs(commit, paths):
|
||||
"""
|
||||
Format diffs from commit object into string
|
||||
"""
|
||||
|
||||
diffs = []
|
||||
# We are ignoring merges so all commits should have a single parent
|
||||
for diff in commit.tree.diff(commit.parents[0], paths=paths, create_patch=True):
|
||||
if diff.a_path is not None:
|
||||
# The patch commit diffs are stored as "(filename1)\n(diff1)\n(filename2)\n(diff2)..."
|
||||
lines = "\n".join(
|
||||
line
|
||||
for line in diff.diff.decode("utf-8").splitlines()
|
||||
if line.startswith(("+", "-"))
|
||||
)
|
||||
diffs.append(f"{diff.a_path}\n{lines}")
|
||||
|
||||
return "\n".join(diffs)
|
||||
|
||||
|
||||
class PatchDiff:
|
||||
"""
|
||||
Representation of code changes in a patch
|
||||
|
|
Загрузка…
Ссылка в новой задаче