codeql/misc/scripts/shared-code-metrics.py

331 строка
11 KiB
Python
Исходник Ответственный История

Этот файл содержит невидимые символы Юникода!

Этот файл содержит невидимые символы Юникода, которые могут быть отображены не так, как показано ниже. Если это намеренно, можете спокойно проигнорировать это предупреждение. Используйте кнопку Экранировать, чтобы показать скрытые символы.

#!/bin/env python3
# Generates a report on the amount of code sharing in this repo
#
# The purpose of this is
# a) To be able to understand the structure and dependencies
# b) To provide a metric that measures the amount of shared vs non-shared code
import datetime
from pathlib import Path
import json
import yaml
# To add more languages, add them to this list:
languages = ['cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ql', 'ruby', 'swift']
repo_location = Path(__file__).parent.parent.parent
# Gets the total number of lines in a file
def linecount(file):
with open(file, 'r') as fp: return len(fp.readlines())
# Gets the language name from the path
def get_language(path):
return path.parts[len(repo_location.parts)]
# Is this path a CodeQL query file
def is_query(path):
return path.suffix == '.ql'
# Is this path a CodeQL library file
def is_library(path):
return path.suffix == '.qll'
# Is this path a relevant CodeQL file
def is_ql(path):
return is_query(path) or is_library(path)
# Is this file a CodeQL package file
def is_package(path):
return path.name == 'qlpack.yml'
# A CodeQL source file
class QlFile:
def __init__(self, path):
self.path = path
self.lines = linecount(path)
shared = False
def language(self):
return get_language(self.path)
def query(self):
return is_query(self.path)
def library(self):
return is_library(self.path)
# Returns if this qlfile is not shared, and is in a pack that is only in one language
def isOnlyInLanguage(self, language):
return not self.shared and (self.package is None or self.package.languages == {language}) and self.language() == language
# Represents a language folder
class Language:
def __init__(self, name):
self.name = name
self.packs = []
self.nonshared_files = 0
self.nonshared_lines = 0
self.imported_files = 0
self.imported_lines = 0
def addQlFile(self, qlfile):
if not qlfile.shared:
self.nonshared_files += 1
self.nonshared_lines += qlfile.lines
def addSharedAsset(self, package):
self.imported_files += package.files
self.imported_lines += package.lines
# A shared package or file
class SharedAsset:
def __init__(self, name):
self.name = name
# A file shared using identical-files.json
class IdenticalFileSet(SharedAsset):
def __init__(self, name, ql_files):
self.name = name
self.languages = set()
self.files = 0
self.lines = 0
for file in ql_files:
file.package = self
file.shared = True
self.files = 1
self.lines = file.lines
self.languages.add(file.language())
# Gets a pretty-printed markdown link
def link(self):
return self.name
# Represents all files shared in `identical-files.json`
# Reads the file and builds a list of assets
class IdenticalFiles:
def __init__(self, repo_location, ql_file_index):
identical_files = repo_location/'config'/'identical-files.json'
with open(identical_files, "r") as fp:
identical_files_json = json.load(fp)
# Create a list of assets
self.assets = []
for group in identical_files_json:
paths = []
for file in identical_files_json[group]:
path = repo_location / file
if is_ql(path):
ql_file_index[path].shared = True
paths.append(ql_file_index[path])
self.assets.append(IdenticalFileSet(group, paths))
# A package created from a `qlpack.yml`` file
class Package(SharedAsset):
def __init__(self, path, ql_file_index):
self.path = path
self.language = get_language(path)
self.lines = 0
self.files = 0
self.languages = set()
self.languages.add(self.language)
self.identical_files_dependencies = set()
with open(path, 'r') as fp:
y = yaml.safe_load(fp)
if 'name' in y:
self.name = y['name']
else:
self.name = path.parent.name
if 'dependencies' in y:
self.deps = y['dependencies']
if self.deps is None:
self.deps = {}
else:
self.deps = {}
# Mark all relevant files with their package
for file in ql_file_index:
if self.containsDirectory(file):
file = ql_file_index[file]
if not file.shared:
file.package = self
self.lines += file.lines
self.files += 1
else:
self.identical_files_dependencies.add(file.package)
self.url = "https://github.com/github/codeql/blob/main/" + str(path.relative_to(repo_location))
# Gets a pretty-printed markdown link
def link(self):
return '[' + self.name + '](' + self.url + ')'
def containsDirectory(self, dir):
return self.path.parent.parts == dir.parts[:len(self.path.parent.parts)]
# dir.startsWith(self.path.parent)
# Constructs a list of transitive depedencies of this package.
def calculateDependencies(self, packageNameMap):
self.transitive_dependencies = set(self.deps)
queue = list(self.deps)
while len(queue):
item = queue.pop()
for dep2 in packageNameMap[item].deps:
if dep2 not in self.transitive_dependencies:
self.transitive_dependencies.add(dep2)
queue.append(dep2)
# Calculate the amount of imported code
self.total_imported_files = 0
self.total_imported_lines = 0
self.all_dependencies = set(self.identical_files_dependencies)
for dep in self.transitive_dependencies:
self.all_dependencies.add(packageNameMap[dep])
for dep in self.all_dependencies:
self.total_imported_files += dep.files
self.total_imported_lines += dep.lines
dep.languages.add(self.language)
# Create a big index of all files and their line counts.
# Map from path to line count
ql_file_index = {}
package_files = []
# Queue of directories to read
directories_to_scan = [repo_location]
while len(directories_to_scan)!=0:
dir = directories_to_scan.pop()
for p in dir.iterdir():
if p.is_dir():
directories_to_scan.append(p)
elif is_ql(p):
ql_file_index[p] = QlFile(p)
elif is_package(p):
package_files.append(p)
# Create identical_files_json
identical_files = IdenticalFiles(repo_location, ql_file_index)
# Create packages
# Do this after identical_files so that we can figure out the package sizes
# Do this after getting the ql_file_index fully built
packages = []
for file in package_files:
packages.append(Package(file, ql_file_index))
# List all shared assets
shared_assets = packages + identical_files.assets
# Construct statistics for each language
language_info = {}
for l in languages:
language_info[l] = Language(l)
for qlfile in ql_file_index.values():
lang = qlfile.language()
if lang in language_info:
info = language_info[lang]
if qlfile.isOnlyInLanguage(lang):
info.addQlFile(qlfile)
# Determine all package dependencies
packageNameMap = {}
for package in packages:
packageNameMap[package.name] = package
for package in packages:
package.calculateDependencies(packageNameMap)
for asset in shared_assets:
if len(asset.languages)>1:
for lang in asset.languages:
if lang in language_info:
language_info[lang].addSharedAsset(asset)
# Functions to output the results
def list_assets(shared_assets, language_info):
print('| Asset | Files | Lines |', end='')
for lang in language_info:
print('', lang, '|', end='')
print()
print('| ----- | ----- | ----- |', end='')
for lang in language_info:
print(' ---- |', end='')
print()
for asset in shared_assets:
print('|', asset.link(), '|', asset.files ,'|', asset.lines, '|', end=' ')
for lang in language_info:
if lang in asset.languages:
print('yes |', end=' ')
else:
print(' |', end=' ');
print()
print()
def list_package_dependencies(package):
print("Package", package.path, package.name, package.files, package.lines, package.total_imported_files, package.total_imported_lines)
for dep in package.all_dependencies:
print(" ", dep.name, dep.files, dep.lines)
def print_package_dependencies(packages):
print('| Package name | Non-shared files | Non-shared lines of code | Imported files | Imported lines of code | Shared code % |')
print('| ------------ | ---------------- | ------------------------ | -------------- | ---------------------- | ------------- |')
for package in packages:
nlines = package.lines + package.total_imported_lines
shared_percentage = 100 * package.total_imported_lines / nlines if nlines>0 else 0
print('|', package.link(), '|', package.files, '|', package.lines, '|', package.total_imported_files, '|', package.total_imported_lines, '|',
# ','.join([p.name for p in package.all_dependencies]),
"%.2f" % shared_percentage, '|')
print()
def print_language_dependencies(packages):
print_package_dependencies([p for p in packages if p.name.endswith('-all') and p.name.count('-')==1])
def list_shared_code_by_language(language_info):
# For each language directory, list the files that are (1) inside the directory and not shared,
# (2) packages from outside the directory, plus identical files
print('| Language | Non-shared files | Non-shared lines of code | Imported files | Imported lines of code | Shared code % |')
print('| -------- | ---------------- | ------------------------ | -------------- | ---------------------- | ------------- |')
for lang in language_info:
info = language_info[lang]
total = info.imported_lines + info.nonshared_lines
shared_percentage = 100 * info.imported_lines / total if total>0 else 0
print('|', lang, '|', info.nonshared_files, '|', info.nonshared_lines, '|', info.imported_files, '|', info.imported_lines, '|', "%.2f" % shared_percentage, '|')
print()
# Output reports
print('# Report on CodeQL code sharing\n')
print('Generated on', datetime.datetime.now())
print()
print('## Shared code by language\n')
list_shared_code_by_language(language_info)
print('''
* *Non-shared files*: The number of CodeQL files (`.ql`/`.qll`) that are only used within this language folder. Excludes `identical-files.json` that are shared between multiple languages.
* *Non-shared lines of code*: The number of lines of code in the non-shared files.
* *Imported files*: All CodeQL files (`.ql`/`.qll`) files that are transitively used in this language folder, either via packages or `identical-files.json`
* *Imported lines of code*: The number of lines of code in the imported files
* *Shared code %*: The proportion of imported lines / total lines (nonshared + imported).
## Shared packages use by language
A package is *used* if it is a direct or indirect dependency, or a file shared via `identical-files.json`.
''')
list_assets(shared_assets, language_info)
print('## Shared code by language pack\n')
print_language_dependencies(packages)
print('## Shared code by package\n')
print_package_dependencies(packages)