model: [feat] add collection of instruction coverage

2024-01-25 11:48:54 +00:00 · 2024-01-25 11:48:54 +00:00 · 79ec4a6215
--- a/docs/config.md
+++ b/docs/config.md
@ -89,8 +89,8 @@ For a complete list, see `src/config.py`.

 * `coverage_type` [str]: Coverage type.
  Available options:
-  'none' - disable coverage tracking;
-  'dependent-pairs' - coverage of pairs of dependent instructions.
+  'none' - disable coverage tracking (default);
+  'model_instructions' - how many times the model executed each instruction.


 # x86-specific configuration
--- a/docs/development.md
+++ b/docs/development.md
@ -96,7 +96,6 @@ multiple Python files:
  size while still maintaining the violation-inducing behavior.
 * `fuzzer.py` - implements `fuzz` mode that utilizes all main components to
  perform end-to-end hardware fuzzing.
-* `coverage.py` - will collect coverage in the future; currently not in use.
 * `factory.py` - used to configure revizor accordingly to the user provided
  YAML configuration. Implements a simplified version of the Factory pattern:
  Defines a series of dictionaries that allows revizor to choose
--- a/src/fuzzer.py
+++ b/src/fuzzer.py
@ -133,6 +133,7 @@ class FuzzerGeneric(Fuzzer):
                    break

        self.LOG.fuzzer_finish()
+        self.LOG.dbg_report_coverage(self.model)
        return STAT.violations > 0

    def filter(self, test_case, inputs):
--- a/src/interfaces.py
+++ b/src/interfaces.py
@ -409,6 +409,7 @@ class Instruction:
    section_offset: int = 0
    section_id: int = 0
    size: int = 0
+    _inst_brief: str = ""

    # TODO: remove latest_reg_operand from this class. It belongs in the generator
    latest_reg_operand: Optional[Operand] = None  # for avoiding dependencies
@ -562,6 +563,29 @@ class Instruction:

        return res

+    def get_brief(self) -> str:
+        if self._inst_brief:
+            return self._inst_brief
+
+        brief = self.name
+        for o in self.operands:
+            if o.type == OT.REG:
+                brief += f" R{o.width}"
+            elif o.type == OT.MEM:
+                brief += f" M{o.width}"
+            elif o.type == OT.IMM:
+                brief += f" I{o.width}"
+            elif o.type == OT.LABEL:
+                brief += " L"
+            elif o.type == OT.AGEN:
+                brief += f" A{o.width}"
+            elif o.type == OT.FLAGS:
+                brief += " F"
+            elif o.type == OT.COND:
+                brief += " C"
+        self._inst_brief = brief
+        return brief
+

 class BasicBlock:
    name: str
@ -975,9 +999,11 @@ class Model(ABC):
    data_start: int = 0
    data_end: int = 0
    tracer: Tracer
+    instruction_coverage: Dict[str, int]

    @abstractmethod
    def __init__(self, sandbox_base: int, code_base: int):
+        self.instruction_coverage = defaultdict(int)
        super().__init__()

    @abstractmethod
--- a/src/model.py
+++ b/src/model.py
@ -17,6 +17,7 @@ SPDX-License-Identifier: MIT
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import List, Tuple, Optional, Set, Dict
+from collections import defaultdict

 import copy
 import re
@ -433,6 +434,7 @@ class UnicornSeq(UnicornModel):
    test_case: TestCase  # the test case being traced
    current_instruction: Instruction  # the instruction currently being executed
    current_actor: Actor  # the active actor
+    local_coverage: Optional[Dict[str, int]]  # local coverage for the current test case

    # test case code
    code_start: UcPointer  # the lower bound of the code area
@ -576,6 +578,8 @@ class UnicornSeq(UnicornModel):
        contract_traces: List[CTrace] = []
        execution_traces: List[ExecutionTrace] = []
        taints = []
+        self.local_coverage = \
+            defaultdict(int) if CONF.coverage_type == "model_instructions" else None

        for index, input_ in enumerate(inputs):
            self.LOG.dbg_model_header(index)
@ -626,8 +630,10 @@ class UnicornSeq(UnicornModel):
            execution_traces.append(self.tracer.get_execution_trace())
            taints.append(self.taint_tracker.get_taint())

-        if self.coverage:
-            self.coverage.model_hook(execution_traces)
+        # update coverage
+        if self.local_coverage is not None:
+            for inst_name in self.local_coverage.keys():
+                self.instruction_coverage[inst_name] += 1

        return contract_traces, taints

@ -673,6 +679,11 @@ class UnicornSeq(UnicornModel):
        model.current_instruction = model.test_case.address_map[aid][address - section_start]
        model.trace_instruction(emulator, address, size, model)

+        # collect coverage
+        if model.local_coverage is not None:
+            if not model.current_instruction.is_instrumentation:
+                model.local_coverage[model.current_instruction.get_brief()] += 1
+
        # if the current instruction is a macro, interpret it
        if model.current_instruction.name == "macro":
            model.macro_interpreter.interpret(model.current_instruction, address)
--- a/src/util.py
+++ b/src/util.py
@ -10,7 +10,7 @@ from datetime import datetime
 from typing import NoReturn, Dict
 from pprint import pformat
 from traceback import print_stack
-from .interfaces import EquivalenceClass, SANDBOX_CODE_SIZE
+from .interfaces import EquivalenceClass, SANDBOX_CODE_SIZE, Model
 from .config import CONF

 MASK_64BIT = pow(2, 64)
@ -440,10 +440,18 @@ class Logger:

    # ==============================================================================================
    # Coverage
-    def dbg_report_coverage(self, round_id, msg):
-        if __debug__:
-            if self.dbg_coverage and round_id and round_id % 100 == 0:
-                print(f"\nDBG: [coverage] {msg}")
+    def dbg_report_coverage(self, model: Model):
+        if not __debug__:
+            return
+        if not self.dbg_coverage:
+            return
+
+        inst_names = sorted(model.instruction_coverage.items(), key=lambda x: x[1], reverse=True)
+        with open("coverage.txt", "w") as f:
+            for inst_name, count in inst_names:
+                f.write(f"{inst_name:<20} {count}\n")
+            if not inst_names:
+                f.write("    No coverage data available")


 # ==================================================================================================
--- a/src/x86/x86_config.py
+++ b/src/x86/x86_config.py
@ -162,6 +162,10 @@ _option_values = {
        "dbg_model",
        "dbg_coverage",
        "dbg_isa_loader",
+    ],
+    "coverage_type": [
+        "none",
+        "model_instructions",
    ]
 }