зеркало из https://github.com/github/codeql.git
Basic script to generate shared code metrics
This commit is contained in:
Родитель
f4cb920624
Коммит
7d8b624a71
|
@ -0,0 +1,329 @@
|
|||
# Generates a report on the amount of code sharing in this repo
|
||||
#
|
||||
# The purpose of this is
|
||||
# a) To be able to understand the structure and dependencies
|
||||
# b) To provide a metric that measures the amount of shared vs non-shared code
|
||||
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
import json
|
||||
import yaml
|
||||
|
||||
# To add more languages, add them to this list:
|
||||
languages = ['cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ql', 'ruby', 'swift']
|
||||
|
||||
repo_location = Path(__file__).parent.parent.parent
|
||||
|
||||
# Gets the total number of lines in a file
|
||||
def linecount(file):
|
||||
with open(file, 'r') as fp: return len(fp.readlines())
|
||||
|
||||
# Gets the language name from the path
|
||||
def get_language(path):
|
||||
return path.parts[len(repo_location.parts)]
|
||||
|
||||
# Is this path a CodeQL query file
|
||||
def is_query(path):
|
||||
return path.suffix == '.ql'
|
||||
|
||||
# Is this path a CodeQL library file
|
||||
def is_library(path):
|
||||
return path.suffix == '.qll'
|
||||
|
||||
# Is this path a relevant CodeQL file
|
||||
def is_ql(path):
|
||||
return is_query(path) or is_library(path)
|
||||
|
||||
# Is this file a CodeQL package file
|
||||
def is_package(path):
|
||||
return path.name == 'qlpack.yml'
|
||||
|
||||
# A CodeQL source file
|
||||
class QlFile:
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
self.lines = linecount(path)
|
||||
shared = False
|
||||
|
||||
def language(self):
|
||||
return get_language(self.path)
|
||||
|
||||
def query(self):
|
||||
return is_query(self.path)
|
||||
|
||||
def library(self):
|
||||
return is_library(self.path)
|
||||
|
||||
# Returns if this qlfile is not shared, and is in a pack that is only in one language
|
||||
def isOnlyInLanguage(self, language):
|
||||
return not self.shared and (self.package is None or self.package.languages == {language}) and self.language() == language
|
||||
|
||||
# Represents a language folder
|
||||
class Language:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.packs = []
|
||||
self.nonshared_files = 0
|
||||
self.nonshared_lines = 0
|
||||
self.imported_files = 0
|
||||
self.imported_lines = 0
|
||||
|
||||
def addQlFile(self, qlfile):
|
||||
if not qlfile.shared:
|
||||
self.nonshared_files += 1
|
||||
self.nonshared_lines += qlfile.lines
|
||||
|
||||
def addSharedAsset(self, package):
|
||||
self.imported_files += package.files
|
||||
self.imported_lines += package.lines
|
||||
|
||||
# A shared package or file
|
||||
class SharedAsset:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
# A file shared using identical-files.json
|
||||
class IdenticalFileSet(SharedAsset):
|
||||
def __init__(self, name, ql_files):
|
||||
self.name = name
|
||||
self.languages = set()
|
||||
self.files = 0
|
||||
self.lines = 0
|
||||
for file in ql_files:
|
||||
file.package = self
|
||||
file.shared = True
|
||||
self.files = 1
|
||||
self.lines = file.lines
|
||||
self.languages.add(file.language())
|
||||
|
||||
# Gets a pretty-printed markdown link
|
||||
def link(self):
|
||||
return self.name
|
||||
|
||||
# Represents all files shared in `identical-files.json`
|
||||
# Reads the file and builds a list of assets
|
||||
class IdenticalFiles:
|
||||
def __init__(self, repo_location, ql_file_index):
|
||||
identical_files = repo_location/'config'/'identical-files.json'
|
||||
with open(identical_files, "r") as fp:
|
||||
identical_files_json = json.load(fp)
|
||||
# Create a list of assets
|
||||
self.assets = []
|
||||
for group in identical_files_json:
|
||||
paths = []
|
||||
for file in identical_files_json[group]:
|
||||
path = repo_location / file
|
||||
if is_ql(path):
|
||||
ql_file_index[path].shared = True
|
||||
paths.append(ql_file_index[path])
|
||||
self.assets.append(IdenticalFileSet(group, paths))
|
||||
|
||||
# A package created from a `qlpack.yml`` file
|
||||
class Package(SharedAsset):
|
||||
def __init__(self, path, ql_file_index):
|
||||
self.path = path
|
||||
self.language = get_language(path)
|
||||
self.lines = 0
|
||||
self.files = 0
|
||||
self.languages = set()
|
||||
self.languages.add(self.language)
|
||||
self.identical_files_dependencies = set()
|
||||
with open(path, 'r') as fp:
|
||||
y = yaml.safe_load(fp)
|
||||
if 'name' in y:
|
||||
self.name = y['name']
|
||||
else:
|
||||
self.name = path.parent.name
|
||||
if 'dependencies' in y:
|
||||
self.deps = y['dependencies']
|
||||
if self.deps is None:
|
||||
self.deps = {}
|
||||
else:
|
||||
self.deps = {}
|
||||
# Mark all relevant files with their package
|
||||
for file in ql_file_index:
|
||||
if self.containsDirectory(file):
|
||||
file = ql_file_index[file]
|
||||
if not file.shared:
|
||||
file.package = self
|
||||
self.lines += file.lines
|
||||
self.files += 1
|
||||
else:
|
||||
self.identical_files_dependencies.add(file.package)
|
||||
self.url = "https://github.com/github/codeql/blob/main/" + str(path.relative_to(repo_location))
|
||||
|
||||
# Gets a pretty-printed markdown link
|
||||
def link(self):
|
||||
return '[' + self.name + '](' + self.url + ')'
|
||||
|
||||
def containsDirectory(self, dir):
|
||||
return self.path.parent.parts == dir.parts[:len(self.path.parent.parts)]
|
||||
# dir.startsWith(self.path.parent)
|
||||
|
||||
# Constructs a list of transitive depedencies of this package.
|
||||
def calculateDependencies(self, packageNameMap):
|
||||
self.transitive_dependencies = set(self.deps)
|
||||
queue = list(self.deps)
|
||||
while len(queue):
|
||||
item = queue.pop()
|
||||
for dep2 in packageNameMap[item].deps:
|
||||
if dep2 not in self.transitive_dependencies:
|
||||
self.transitive_dependencies.add(dep2)
|
||||
queue.append(dep2)
|
||||
# Calculate the amount of imported code
|
||||
self.total_imported_files = 0
|
||||
self.total_imported_lines = 0
|
||||
self.all_dependencies = set(self.identical_files_dependencies)
|
||||
for dep in self.transitive_dependencies:
|
||||
self.all_dependencies.add(packageNameMap[dep])
|
||||
for dep in self.all_dependencies:
|
||||
self.total_imported_files += dep.files
|
||||
self.total_imported_lines += dep.lines
|
||||
dep.languages.add(self.language)
|
||||
|
||||
# Create a big index of all files and their line counts.
|
||||
|
||||
# Map from path to line count
|
||||
ql_file_index = {}
|
||||
package_files = []
|
||||
|
||||
# Queue of directories to read
|
||||
directories_to_scan = [repo_location]
|
||||
|
||||
while len(directories_to_scan)!=0:
|
||||
dir = directories_to_scan.pop()
|
||||
for p in dir.iterdir():
|
||||
if p.is_dir():
|
||||
directories_to_scan.append(p)
|
||||
elif is_ql(p):
|
||||
ql_file_index[p] = QlFile(p)
|
||||
elif is_package(p):
|
||||
package_files.append(p)
|
||||
|
||||
# Create identical_files_json
|
||||
identical_files = IdenticalFiles(repo_location, ql_file_index)
|
||||
|
||||
# Create packages
|
||||
# Do this after identical_files so that we can figure out the package sizes
|
||||
# Do this after getting the ql_file_index fully built
|
||||
packages = []
|
||||
for file in package_files:
|
||||
packages.append(Package(file, ql_file_index))
|
||||
|
||||
# List all shared assets
|
||||
shared_assets = packages + identical_files.assets
|
||||
|
||||
# Construct statistics for each language
|
||||
language_info = {}
|
||||
for l in languages:
|
||||
language_info[l] = Language(l)
|
||||
|
||||
for qlfile in ql_file_index.values():
|
||||
lang = qlfile.language()
|
||||
if lang in language_info:
|
||||
info = language_info[lang]
|
||||
if qlfile.isOnlyInLanguage(lang):
|
||||
info.addQlFile(qlfile)
|
||||
|
||||
# Determine all package dependencies
|
||||
|
||||
packageNameMap = {}
|
||||
|
||||
for package in packages:
|
||||
packageNameMap[package.name] = package
|
||||
|
||||
for package in packages:
|
||||
package.calculateDependencies(packageNameMap)
|
||||
|
||||
for asset in shared_assets:
|
||||
if len(asset.languages)>1:
|
||||
for lang in asset.languages:
|
||||
if lang in language_info:
|
||||
language_info[lang].addSharedAsset(asset)
|
||||
|
||||
|
||||
# Functions to output the results
|
||||
|
||||
def list_assets(shared_assets, language_info):
|
||||
print('| Asset | Files | Lines |', end='')
|
||||
for lang in language_info:
|
||||
print('', lang, '|', end='')
|
||||
print()
|
||||
print('| ----- | ----- | ----- |', end='')
|
||||
for lang in language_info:
|
||||
print(' ---- |', end='')
|
||||
print()
|
||||
for asset in shared_assets:
|
||||
print('|', asset.link(), '|', asset.files ,'|', asset.lines, '|', end=' ')
|
||||
for lang in language_info:
|
||||
if lang in asset.languages:
|
||||
print('yes |', end=' ')
|
||||
else:
|
||||
print(' |', end=' ');
|
||||
print()
|
||||
print()
|
||||
|
||||
def list_package_dependencies(package):
|
||||
print("Package", package.path, package.name, package.files, package.lines, package.total_imported_files, package.total_imported_lines)
|
||||
for dep in package.all_dependencies:
|
||||
print(" ", dep.name, dep.files, dep.lines)
|
||||
|
||||
def print_package_dependencies(packages):
|
||||
print('| Package name | Non-shared files | Non-shared lines of code | Imported files | Imported lines of code | Shared code % |')
|
||||
print('| ------------ | ---------------- | ------------------------ | -------------- | ---------------------- | ------------- |')
|
||||
for package in packages:
|
||||
nlines = package.lines + package.total_imported_lines
|
||||
shared_percentage = 100 * package.total_imported_lines / nlines if nlines>0 else 0
|
||||
print('|', package.link(), '|', package.files, '|', package.lines, '|', package.total_imported_files, '|', package.total_imported_lines, '|',
|
||||
# ','.join([p.name for p in package.all_dependencies]),
|
||||
"%.2f" % shared_percentage, '|')
|
||||
print()
|
||||
|
||||
def print_language_dependencies(packages):
|
||||
print_package_dependencies([p for p in packages if p.name.endswith('-all') and p.name.count('-')==1])
|
||||
|
||||
def list_shared_code_by_language(language_info):
|
||||
# For each language directory, list the files that are (1) inside the directory and not shared,
|
||||
# (2) packages from outside the directory, plus identical files
|
||||
print('| Language | Non-shared files | Non-shared lines of code | Imported files | Imported lines of code | Shared code % |')
|
||||
print('| -------- | ---------------- | ------------------------ | -------------- | ---------------------- | ------------- |')
|
||||
for lang in language_info:
|
||||
info = language_info[lang]
|
||||
total = info.imported_lines + info.nonshared_lines
|
||||
shared_percentage = 100 * info.imported_lines / total if total>0 else 0
|
||||
print('|', lang, '|', info.nonshared_files, '|', info.nonshared_lines, '|', info.imported_files, '|', info.imported_lines, '|', "%.2f" % shared_percentage, '|')
|
||||
print()
|
||||
|
||||
|
||||
# Output reports
|
||||
|
||||
print('# Report on CodeQL code sharing\n')
|
||||
print('Generated on', datetime.datetime.now())
|
||||
print()
|
||||
|
||||
print('## Shared code by language\n')
|
||||
|
||||
list_shared_code_by_language(language_info)
|
||||
|
||||
print('''
|
||||
* *Non-shared files*: The number of CodeQL files (`.ql`/`.qll`) that are only used within this language folder. Excludes `identical-files.json` that are shared between multiple languages.
|
||||
* *Non-shared lines of code*: The number of lines of code in the non-shared files.
|
||||
* *Imported files*: All CodeQL files (`.ql`/`.qll`) files that are transitively used in this language folder, either via packages or `identical-files.json`
|
||||
* *Imported lines of code*: The number of lines of code in the imported files
|
||||
* *Shared code %*: The proportion of imported lines / total lines (nonshared + imported).
|
||||
|
||||
## Shared packages use by language
|
||||
|
||||
A package is *used* if it is a direct or indirect dependency, or a file shared via `identical-files.json`.
|
||||
|
||||
''')
|
||||
|
||||
list_assets(shared_assets, language_info)
|
||||
|
||||
print('## Shared code by language pack\n')
|
||||
|
||||
print_language_dependencies(packages)
|
||||
|
||||
print('## Shared code by package\n')
|
||||
|
||||
print_package_dependencies(packages)
|
Загрузка…
Ссылка в новой задаче