[code_review] Use previously rejected comments as examples in the filtering phase

2025-01-10 16:01:12 -05:00 · 2025-01-10 16:01:12 -05:00 · faede07bf0
--- a/bugbug/tools/code_review.py
+++ b/bugbug/tools/code_review.py
@ -28,7 +28,7 @@ from bugbug import db, phabricator, utils
 from bugbug.code_search.function_search import FunctionSearch
 from bugbug.generative_model_tool import GenerativeModelTool, get_tokenizer
 from bugbug.utils import get_secret
-from bugbug.vectordb import VectorDB, VectorPoint
+from bugbug.vectordb import PayloadScore, QueryFilter, VectorDB, VectorPoint

 basicConfig(level=INFO)
 logger = getLogger(__name__)
@ -93,7 +93,10 @@ As valid comments, consider the examples below:


 Here is the patch that we need you to review:
-{patch}"""
+{patch}
+
+Do not report any explanation about your choice. Only return a valid JSON list.
+"""
 )


@ -135,14 +138,18 @@ Patch:
 {patch}

 As examples of not expected comments, not related to the current patch, please, check some below:
-    - Please note that these are minor improvements and the overall quality of the patch is good. The documentation is being expanded in a clear and structured way, which will likely be beneficial for future development.
+    - {rejected_examples}
+"""
+)
+
+
+DEFAULT_REJECTED_EXAMPLES = """Please note that these are minor improvements and the overall quality of the patch is good. The documentation is being expanded in a clear and structured way, which will likely be beneficial for future development.
    - Please note that these are just suggestions and the code might work perfectly fine as it is. It's always a good idea to test all changes thoroughly to ensure they work as expected.
    - Overall, the patch seems to be well implemented with no major concerns. The developers have made a conscious decision to align with Chrome's behavior, and the reasoning is well documented.
    - There are no complex code changes in this patch, so there's no potential for major readability regressions or bugs introduced by the changes.
    - The `focus(...)` method is called without checking if the element and its associated parameters exist or not. It would be better to check if the element exists before calling the `focus()` method to avoid potential errors.
    - It's not clear if the `SearchService.sys.mjs` file exists or not. If it doesn't exist, this could cause an error. Please ensure that the file path is correct.
    - This is a good addition to the code."""
-)


 PROMPT_TEMPLATE_DEDUPLICATE = """Please, double check the code review comments below.
@ -969,7 +976,7 @@ def get_structured_functions(target, functions_declaration):
    return function_declaration_text


-def generate_processed_output(output: str, patch: PatchSet) -> Iterable[InlineComment]:
+def parse_model_output(output: str) -> list[dict]:
    output = output.strip()
    if output.startswith("Review:"):
        output = output[len("Review:") :].strip()
@ -979,6 +986,12 @@ def generate_processed_output(output: str, patch: PatchSet) -> Iterable[InlineCo

    comments = json.loads(output)

+    return comments
+
+
+def generate_processed_output(output: str, patch: PatchSet) -> Iterable[InlineComment]:
+    comments = parse_model_output(output)
+
    patched_files_map = {
        patched_file.target_file: patched_file for patched_file in patch
    }
@ -1026,6 +1039,7 @@ class CodeReviewTool(GenerativeModelTool):
        review_comments_db: Optional["ReviewCommentsDB"] = None,
        show_patch_example: bool = False,
        verbose: bool = True,
+        suggestions_feedback_db: Optional["SuggestionsFeedbackDB"] = None,
    ) -> None:
        super().__init__()

@ -1071,6 +1085,8 @@ class CodeReviewTool(GenerativeModelTool):

        self.verbose = verbose

+        self.suggestions_feedback_db = suggestions_feedback_db
+
    def count_tokens(self, text):
        return len(self._tokenizer.encode(text))

@ -1206,8 +1222,20 @@ class CodeReviewTool(GenerativeModelTool):
            if self.verbose:
                GenerativeModelTool._print_answer(output)

+        rejected_examples = (
+            "\n    - ".join(
+                self.get_similar_rejected_comments(parse_model_output(output))
+            )
+            if self.suggestions_feedback_db
+            else DEFAULT_REJECTED_EXAMPLES
+        )
+
        raw_output = self.filtering_chain.invoke(
-            {"review": output, "patch": patch.raw_diff},
+            {
+                "review": output,
+                "patch": patch.raw_diff,
+                "rejected_examples": rejected_examples,
+            },
            return_only_outputs=True,
        )["text"]

@ -1266,6 +1294,19 @@ class CodeReviewTool(GenerativeModelTool):
            for num, example in enumerate(comment_examples)
        )

+    def get_similar_rejected_comments(self, suggestions) -> Iterable[str]:
+        if not self.suggestions_feedback_db:
+            raise Exception("Suggestions feedback database is not available")
+
+        for suggestion in suggestions:
+            similar_rejected_suggestions = (
+                self.suggestions_feedback_db.find_similar_rejected_suggestions(
+                    suggestion["comment"], limit=2
+                )
+            )
+            for rejected_suggestion in similar_rejected_suggestions:
+                yield rejected_suggestion.comment
+

 class ReviewCommentsDB:
    NAV_PATTERN = re.compile(r"\{nav, [^}]+\}")
@ -1339,6 +1380,16 @@ class SuggestionFeedback:
    action: Literal["APPROVE", "REJECT", "IGNORE"]
    user: str

+    @staticmethod
+    def from_payload_score(point: PayloadScore):
+        return SuggestionFeedback(
+            id=point.id,
+            comment=point.payload["comment"],
+            file_path=point.payload["file_path"],
+            action=point.payload["action"],
+            user=point.payload["user"],
+        )
+

 class SuggestionsFeedbackDB:
    def __init__(self, vector_db: VectorDB) -> None:
@ -1364,12 +1415,18 @@ class SuggestionsFeedbackDB:

    def find_similar_suggestions(self, comment: str):
        return (
-            SuggestionFeedback(
-                id=point.id,
-                comment=point.payload["comment"],
-                file_path=point.payload["file_path"],
-                action=point.payload["action"],
-                user=point.payload["user"],
-            )
+            SuggestionFeedback.from_payload_score(point)
            for point in self.vector_db.search(self.embeddings.embed_query(comment))
        )
+
+    def find_similar_rejected_suggestions(self, comment: str, limit: int):
+        return (
+            SuggestionFeedback.from_payload_score(point)
+            for point in self.vector_db.search(
+                self.embeddings.embed_query(comment),
+                filter=QueryFilter(
+                    must_match={"action": "REJECT"},
+                ),
+                limit=limit,
+            )
+        )
--- a/bugbug/vectordb.py
+++ b/bugbug/vectordb.py
@ -8,6 +8,7 @@ from dataclasses import dataclass
 from typing import Iterable

 from qdrant_client import QdrantClient
+from qdrant_client.conversions import common_types as qdrant_types
 from qdrant_client.http.exceptions import UnexpectedResponse
 from qdrant_client.models import Distance, PointStruct, VectorParams

@ -28,6 +29,25 @@ class PayloadScore:
    payload: dict


+@dataclass
+class QueryFilter:
+    """A filter for a vector DB query.
+
+    Attributes:
+        must_match: The key is the field name and the value that must be matched.
+    """
+
+    must_match: dict[str, str | int]
+
+    def to_qdrant_filter(self) -> qdrant_types.Filter:
+        return {
+            "must": [
+                {"key": key, "match": {"value": value}}
+                for key, value in self.must_match.items()
+            ]
+        }
+
+
 class VectorDB(ABC):
    """Abstract class for a vector database.

@ -43,7 +63,9 @@ class VectorDB(ABC):
        ...

    @abstractmethod
-    def search(self, query: list[float]) -> Iterable[PayloadScore]:
+    def search(
+        self, query: list[float], filter: QueryFilter | None = None, limit: int = 10
+    ) -> Iterable[PayloadScore]:
        ...

    @abstractmethod
@ -84,8 +106,14 @@ class QdrantVectorDB(VectorDB):
            ),
        )

-    def search(self, query: list[float]) -> Iterable[PayloadScore]:
-        for item in self.client.search(self.collection_name, query):
+    def search(
+        self, query: list[float], filter: QueryFilter | None = None, limit: int = 10
+    ) -> Iterable[PayloadScore]:
+        qdrant_filter = filter.to_qdrant_filter() if filter else None
+
+        for item in self.client.search(
+            self.collection_name, query, qdrant_filter, limit=limit
+        ):
            yield PayloadScore(item.score, item.id, item.payload)

    def get_largest_id(self) -> int: