Add new license validator tool (#9060)

Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>
This commit is contained in:
Daniel McIlvaney 2024-07-25 14:26:09 -07:00 коммит произвёл GitHub
Родитель 7b1635b878
Коммит 5016f3f5f9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
29 изменённых файлов: 644392 добавлений и 6 удалений

Просмотреть файл

@ -78,6 +78,14 @@ ENABLE_CPU_PROFILE ?= n
ENABLE_MEM_PROFILE ?= n
ENABLE_TRACE ?= n
# License checking tool
##help:var:LICENSE_CHECK_DIRS:"<rpm_dir_1> <rpm_dir_2>"=Space separated list of directories to recursively validate with the manual 'license-check' target.
LICENSE_CHECK_DIRS ?=
LICENSE_CHECK_EXCEPTION_FILE ?= $(MANIFESTS_DIR)/package/license_file_exceptions.json
LICENSE_CHECK_NAME_FILE ?= $(MANIFESTS_DIR)/package/license_file_names.json
##help:var:LICENSE_CHECK_MODE:{none,warn,fatal,pedantic}=Set the license check mode during package and image builds. 'none' will disable the license check, 'warn' will print warnings, 'fatal' will stop the build on errors, 'pedantic' will stop the build on warnings and errors.
LICENSE_CHECK_MODE ?= none
# Folder defines
TOOLS_DIR ?= $(toolkit_root)/tools
TOOL_BINS_DIR ?= $(toolkit_root)/out/tools
@ -254,7 +262,11 @@ include $(SCRIPTS_DIR)/pkggen.mk
include $(SCRIPTS_DIR)/imggen.mk
# Add make targets for sodiff to determine if additional packages are required to be recompiled:
# sodiff-check, build-summary, build-package-summary, fake-built-packages-list, sodiff-setup
# sodiff-check, sodiff-setup
# Get build info with:
# build-summary, build-package-summary, fake-built-packages-list
# Validate rpm licenses with:
# license-check, license-check-img, clean-license-check
include $(SCRIPTS_DIR)/analysis.mk
##help:target:clean=Clean all built files.

Просмотреть файл

@ -88,6 +88,8 @@ The `imagepkgfetcher` tool is similar to the `graphpkgfetcher` tool. It will fin
The `imager` tool is responsible for composing an image based on the selected configuration file. It creates partitions, installs packages, configures the users, etc. It can output either a `*.raw` file or a simple filesystem.
#### isomaker
The `isomaker` tool creates an installable ISO which can be booted from a CD or other device. The ISO contains the `initrd` used to boot from a read-only device, and all the packages needed to create a copy of the selected configuration on a new computer.
#### licensechecker
The `licensechecker` tool is used to validate the licensing files in packages. It will check all `*.rpm` files in a directory and provide a list of issues found.
#### liveinstaller
The `liveinstaller` tool is included in the ISO `initrd` and is responsible for installing the requested image onto a new computer.
#### pkgworker

Просмотреть файл

@ -0,0 +1,37 @@
{
"_comment1": "This file is used to allow specific files to be omitted from the license check process.",
"_comment2": "Each PkgException entry is a {'PackageName' and 'IgnoredFilesRegexList'}",
"_comment3": "The 'PackageName' is the name of the (sub)package to which the exception applies.",
"_comment4": "The 'IgnoredFilesRegexList' is a list of regular expressions that match files to be omitted from the license check.",
"_comment5": "The 'GlobalExceptionsRegexList' is a list of regular expressions that match all packages.",
"PkgExceptions": [
{
"PackageName": "gcc",
"IgnoredFilesRegexList": [
"^/usr/share/man/man7/gpl\\.7\\.gz$"
]
},
{
"PackageName": "libdb-docs",
"IgnoredFilesRegexList": [
"^/usr/share/doc/libdb-[0-9\\.]+/installation/build_unix_freebsd\\.html$",
"^/usr/share/doc/libdb-[0-9\\.]+/license/license_db\\.html$"
]
},
{
"PackageName": "perl-doc",
"IgnoredFilesRegexList": [
"^/usr/share/man/.*"
]
},
{
"PackageName": "tar",
"IgnoredFilesRegexList": [
"^/usr/share/doc/tar-[0-9\\.]+/tar\\.html/GNU-Free-Documentation-License\\.html$"
]
}
],
"GlobalExceptionsRegexList": [
]
}

Просмотреть файл

@ -0,0 +1,29 @@
{
"_comment1": "This file lists the regexes used to match license files in packages.",
"_comment2": "FuzzyLicenseNamesRegexList is a list of license names that should be matched in a case-insensitive sub-string search",
"_comment3": "Any common license path prefixes are removed before matching (ie /usr/share/licenses/<pkg>/).",
"_comment4": "VerbatimLicenseNamesRegexList is a list of license names that should be matched exactly against the basename of a file",
"_comment5": "SkipLicenseNamesRegexList is a list of files that may appear as a license file but generally aren't really licenses",
"FuzzyLicenseNamesRegexList": [
"(?i).*copying.*",
"(?i).*license.*",
"(?i).*licence.*",
"(?i).*licensing.*",
"(?i).*notice.*",
"(?i).*copyright.*",
"(?i).*artistic.*",
"(?i).*bsd.*",
"(?i).*gpl.*",
"(?i).*cc0.*",
"(?i).*mit\\.txt.*"
],
"VerbatimLicenseNamesRegexList": [
"^MIT$"
],
"SkipLicenseNamesRegexList": [
"(?i).*AUTHORS.*",
"(?i).*CONTRIBUTORS.*",
"(?i).*README.*",
"(?i).*CREDITS.*"
]
}

Просмотреть файл

@ -5,9 +5,12 @@
# - Generate list of built packages
# - Run check for ABI changes of built packages.
# - Run check for .so files version change of built packages.
# - Validate package licenses
# Requires DNF on Azure Linux / yum and yum-utils on Ubuntu.
######## SODIFF and BUILD SUMMARY ########
# A folder with sodiff-related artifacts
SODIFF_OUTPUT_FOLDER=$(BUILD_DIR)/sodiff
RPM_BUILD_LOGS_DIR=$(LOGS_DIR)/pkggen/rpmbuilding
@ -86,3 +89,55 @@ sodiff-check: $(BUILT_PACKAGES_FILE) | $(SODIFF_REPO_FILE)
<$(BUILT_PACKAGES_FILE) $(SODIFF_SCRIPT) $(RPMS_DIR)/ $(SODIFF_REPO_FILE) $(RELEASE_MAJOR_ID) $(SODIFF_OUTPUT_FOLDER)
package-toolkit: $(SODIFF_REPO_FILE)
######## LICENSE CHECK ########
license_check_build_dir = $(BUILD_DIR)/license_check_tool
license_out_dir = $(OUT_DIR)/license_check
license_results_file_pkg = $(license_out_dir)/license_check_results_pkg.json
license_summary = $(license_check_build_dir)/license_check_summary.txt
.PHONY: license-check license-check-pkg license-check-img clean-license-check
clean: clean-license-check
clean-license-check:
@echo Verifying no mountpoints present in $(license_check_build_dir)
$(SCRIPTS_DIR)/safeunmount.sh "$(license_check_build_dir)" && \
rm -rf $(license_check_build_dir) && \
rm -rf $(license_out_dir)
license_check_common_deps = $(go-licensecheck) $(chroot_worker) $(LICENSE_CHECK_EXCEPTION_FILE) $(LICENSE_CHECK_NAME_FILE) $(depend_LICENSE_CHECK_MODE)
# licensecheck-command: Helper function to run licensecheck with the given parameters.
# $(1): List of directories to check for licenses.
# $(2): (optional)Results .json file
# $(3): (optional)Results summary .txt file
# $(4): Log file
define licensecheck-command
$(go-licensecheck) \
$(foreach license_dir, $(1),--rpm-dirs="$(license_dir)" ) \
--exception-file="$(LICENSE_CHECK_EXCEPTION_FILE)" \
--name-file="$(LICENSE_CHECK_NAME_FILE)" \
--worker-tar="$(chroot_worker)" \
--build-dir="$(license_check_build_dir)" \
--dist-tag=$(DIST_TAG) \
--mode="$(LICENSE_CHECK_MODE)" \
$(if $(2),--results-file="$(2)") \
$(if $(3),--summary-file="$(3)") \
--log-file=$(4) \
--log-level=$(LOG_LEVEL)
endef
##help:target:license-check=Validate all packages in any of LICENSE_CHECK_DIRS for license compliance.
license-check: $(license_check_common_deps)
$(if $(LICENSE_CHECK_DIRS),,$(error Must set LICENSE_CHECK_DIRS=))
$(call licensecheck-command,$(LICENSE_CHECK_DIRS),$(license_results_file_pkg),$(license_summary),$(LOGS_DIR)/licensecheck/license-check-manual.log)
##help:target:license-check-pkg=Validate all packages in $(RPMS_DIR) for license compliance, building packages as needed.
license-check-pkg: $(license_check_common_deps) $(RPMS_DIR)
$(call licensecheck-command,$(RPMS_DIR),$(license_results_file_pkg),$(license_summary),$(LOGS_DIR)/licensecheck/license-check-pkg.log)
##help:target:license-check-img=Validate all packages needed for an image for license compliance. Must set CONFIG_FILE=<path_to_config>.
license-check-img: $(license_results_file_img)
$(license_results_file_img): $(license_check_common_deps) $(image_package_cache_summary)
$(call licensecheck-command,$(local_and_external_rpm_cache),$(license_results_file_img),$(license_summary),$(LOGS_DIR)/licensecheck/license-check-img.log)

Просмотреть файл

@ -44,6 +44,7 @@ meta_user_data_tmp_dir = $(IMAGEGEN_DIR)/meta-user-data_tmp
image_package_cache_summary = $(imggen_config_dir)/image_deps.json
image_external_package_cache_summary = $(imggen_config_dir)/image_external_deps.json
image_package_manifest = $(imggen_config_dir)/image_pkg_manifest.json
license_results_file_img = $(imggen_config_dir)/license_check_results.json
# Outputs
artifact_dir = $(IMAGES_DIR)/$(config_name)

Просмотреть файл

@ -42,6 +42,7 @@ go_tool_list = \
imagepkgfetcher \
imager \
isomaker \
licensecheck \
liveinstaller \
osmodifier \
pkgworker \

Просмотреть файл

@ -55,10 +55,10 @@ endef
######## VARIABLE DEPENDENCY TRACKING ########
# List of variables to watch for changes.
watch_vars=PACKAGE_BUILD_LIST PACKAGE_REBUILD_LIST PACKAGE_IGNORE_LIST REPO_LIST CONFIG_FILE STOP_ON_PKG_FAIL TOOLCHAIN_ARCHIVE REBUILD_TOOLCHAIN SRPM_PACK_LIST SPECS_DIR MAX_CASCADING_REBUILDS RUN_CHECK TEST_RUN_LIST TEST_RERUN_LIST TEST_IGNORE_LIST EXTRA_BUILD_LAYERS
watch_vars=PACKAGE_BUILD_LIST PACKAGE_REBUILD_LIST PACKAGE_IGNORE_LIST REPO_LIST CONFIG_FILE STOP_ON_PKG_FAIL TOOLCHAIN_ARCHIVE REBUILD_TOOLCHAIN SRPM_PACK_LIST SPECS_DIR MAX_CASCADING_REBUILDS RUN_CHECK TEST_RUN_LIST TEST_RERUN_LIST TEST_IGNORE_LIST EXTRA_BUILD_LAYERS LICENSE_CHECK_MODE
# Current list: $(depend_PACKAGE_BUILD_LIST) $(depend_PACKAGE_REBUILD_LIST) $(depend_PACKAGE_IGNORE_LIST) $(depend_REPO_LIST) $(depend_CONFIG_FILE) $(depend_STOP_ON_PKG_FAIL)
# $(depend_TOOLCHAIN_ARCHIVE) $(depend_REBUILD_TOOLCHAIN) $(depend_SRPM_PACK_LIST) $(depend_SPECS_DIR) $(depend_EXTRA_BUILD_LAYERS) $(depend_MAX_CASCADING_REBUILDS) $(depend_RUN_CHECK) $(depend_TEST_RUN_LIST)
# $(depend_TEST_RERUN_LIST) $(depend_TEST_IGNORE_LIST)
# $(depend_TEST_RERUN_LIST) $(depend_TEST_IGNORE_LIST) $(depend_LICENSE_CHECK_MODE)
.PHONY: variable_depends_on_phony clean-variable_depends_on_phony setfacl_always_run_phony
clean: clean-variable_depends_on_phony
@ -73,9 +73,9 @@ clean-variable_depends_on_phony:
# they will alway run. Each rule will check the currently stored value in the file and only
# update it if needed.
# Generate a target which watches a variable for changes so rebuilds can be
# triggered if needed. Uses one file per variable. If the value of the variable
# is not the same as recorded in the file, update the file to match. This will
# Generate a target which watches a variable for changes so rebuilds can be
# triggered if needed. Uses one file per variable. If the value of the variable
# is not the same as recorded in the file, update the file to match. This will
# force a rebuild of any dependent targets.
#
# $1 - name of the variable to watch for changes

Просмотреть файл

@ -368,6 +368,50 @@ func QueryPackage(packageFile, queryFormat string, defines map[string]string, ex
return executeRpmCommand(rpmProgram, args...)
}
// QueryPackageFiles queries an RPM for its file contents. The results are split into several categories:
// - allFilesAndDirectories: all files and directories in the package
// - files: all files in the package (ie allFilesAndDirectories minus directories)
// - directories: all directories in the package (ie allFilesAndDirectories minus files, symlinks etc.)
// - documentFiles: all files marked as documentation (%doc)
// - licenseFiles: all files marked as license (%license)
func QueryPackageFiles(packageFile string, defines map[string]string,
) (allFilesAndDirectories, files, directories, documentFiles, licenseFiles []string, err error) {
const allFilesQueryFormat = "[%{FILEMODES:perms} %{FILENAMES}\n]"
allFilesWithPerms, err := QueryPackage(packageFile, allFilesQueryFormat, defines)
if err != nil {
return nil, nil, nil, nil, nil, fmt.Errorf("failed to query package (%s) files:\n%w", packageFile, err)
}
// Parse the output of the query to separarate directories. Output will be of the form:
// drwxr-xr-x /a/directory
// -rw-r--r-- /a/directory/a_file
// Any line that starts with a 'd' is a directory, everything else is a file (or symlink etc.).
for _, fileLine := range allFilesWithPerms {
perms, filePath, found := strings.Cut(fileLine, " ")
if !found {
return nil, nil, nil, nil, nil, fmt.Errorf("failed to parse package (%s) file contents (%s)", packageFile, fileLine)
}
if strings.HasPrefix(perms, "d") {
directories = append(directories, filePath)
} else {
files = append(files, filePath)
}
allFilesAndDirectories = append(allFilesAndDirectories, filePath)
}
// rpm has dedicated tags for documentation and license files, so we can query them directly.
documentFiles, err = QueryPackage(packageFile, "", defines, "-d")
if err != nil {
return nil, nil, nil, nil, nil, fmt.Errorf("failed to query package (%s) documentation files:\n%w", packageFile, err)
}
licenseFiles, err = QueryPackage(packageFile, "", defines, "-L")
if err != nil {
return nil, nil, nil, nil, nil, fmt.Errorf("failed to query package (%s) license files:\n%w", packageFile, err)
}
return allFilesAndDirectories, files, directories, documentFiles, licenseFiles, nil
}
// BuildRPMFromSRPM builds an RPM from the given SRPM file but does not run its '%check' section.
func BuildRPMFromSRPM(srpmFile, outArch string, defines map[string]string) (err error) {
const squashErrors = true

Просмотреть файл

@ -0,0 +1,169 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
// A tool for validating the license files of RPM packages in a set of directories.
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/microsoft/azurelinux/toolkit/tools/internal/exe"
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck/licensecheckformat"
"gopkg.in/alecthomas/kingpin.v2"
)
var (
app = kingpin.New("licensecheck", "A tool for validating the license files of RPM packages.")
rpmDirs = app.Flag("rpm-dirs", "Directories to recursively scan for RPMs to validate").Required().ExistingDirs()
nameFile = app.Flag("name-file", "File containing license names to check for.").Required().ExistingFile()
exceptionFile = app.Flag("exception-file", "File containing license exceptions.").ExistingFile()
mode = app.Flag("mode", "Level of license validation to perform").Default(string(licensecheck.LicenseCheckModeDefault)).Enum(licensecheck.ValidLicenseCheckModeStrings()...)
buildDirPath = app.Flag("build-dir", "Directory to store temporary files.").Required().String()
distTag = app.Flag("dist-tag", "The distribution tag.").Required().String()
workerTar = app.Flag("worker-tar", "Full path to worker_chroot.tar.gz.").Required().ExistingFile()
logFlags = exe.SetupLogFlags(app)
resultFile = app.Flag("results-file", "The file to store the search result.").Default("").String()
summaryFile = app.Flag("summary-file", "File to save the license check summary to.").String()
)
func main() {
app.Version(exe.ToolkitVersion)
kingpin.MustParse(app.Parse(os.Args[1:]))
logger.InitBestEffort(logFlags)
mode := licensecheck.LicenseCheckMode(*mode)
results, numFailures, numWarnings := scanDirectories(*rpmDirs, *buildDirPath, *workerTar, *nameFile, *exceptionFile, *distTag, mode)
printSummary(numFailures, numWarnings)
if *resultFile != "" {
logger.Log.Infof("Writing results to file (%s)", *resultFile)
err := licensecheck.SaveLicenseCheckResults(*resultFile, results)
if err != nil {
logger.Log.Fatalf("Failed to write results to file:\n%v", err)
}
}
if *summaryFile != "" {
logger.Log.Infof("Writing summary to file (%s)", *summaryFile)
resultsString := licensecheckformat.FormatResults(results, mode)
err := os.MkdirAll(filepath.Dir(*summaryFile), os.ModePerm)
if err != nil {
logger.Log.Fatalf("failed to create directory for results file. Error:\n%v", err)
}
err = file.Write(resultsString, *summaryFile)
if err != nil {
logger.Log.Fatalf("Failed to write summary to file:\n%v", err)
}
}
if numFailures > 0 {
logger.Log.Fatal("License check failed")
}
if numWarnings > 0 {
logger.Log.Warn("License check completed with warnings")
}
}
func scanDirectories(rpmDirs []string, buildDirPath, workerTar, nameFile, exceptionFile, distTag string,
mode licensecheck.LicenseCheckMode,
) (results []licensecheck.LicenseCheckResult, failed int, warnings int) {
if mode == licensecheck.LicenseCheckModeNone {
logger.Log.Infof("License check mode is set to (%s), skipping license check", mode)
return nil, 0, 0
}
totalResults := []licensecheck.LicenseCheckResult{}
totalFailedPackages := 0
totalWarningPackages := 0
for _, rpmDir := range rpmDirs {
allResults, errorResults, warningResults, err := validateRpmDir(buildDirPath, workerTar, rpmDir, nameFile, exceptionFile, distTag, mode)
if err != nil {
logger.Log.Fatalf("Failed to search RPM directory:\n%v", err)
}
totalFailedPackages += len(errorResults)
totalWarningPackages += len(warningResults)
totalResults = append(totalResults, allResults...)
}
return totalResults, totalFailedPackages, totalWarningPackages
}
func printSummary(numFailures, numWarnings int) {
const explanation = `
Errors/warnings fall into three buckets:
1. 'bad %doc files': A %doc documentation file that the tool believes to be a license file.
2. 'bad general file': A file that is placed into '/usr/share/licenses/' that is not flagged as
a license file. These files should use %license instead of %doc. Ideally whey should also
not be placed in a directory manually. (e.g. prefer '%license COPYING' over
'%license %{_docdir}/%{name}/COPYING')
3. 'duplicated license files': A license file that is both a %license and a %doc file, pick one.")
This is a warning, unless the tool is run in pedantic mode, in which case it is an error.
How to fix:
- 'False positives': In all cases, a detection may be suppressed by using the exception file:
{{.exceptionFile}}.
This file contains per-package and global exceptions in the form of regexes.
- 'bad %%doc files': Mark it using %license, ideally without using a buildroot path (e.g. use '%license COPYING').
- 'bad general file': Mark it using %license, ideally without using a buildroot path (e.g. use '%license COPYING').
- 'duplicated license files': If they are actually equivalent, remove the copy in the documentation.
- Query package contents with 'rpm -ql <package>.rpm' to see all files, 'rpm -qL <package>.rpm' to
see only the license files, and 'rpm -qd <package>.rpm' to see only the documentation files.`
if numFailures > 0 {
logger.Log.Info(strings.ReplaceAll(explanation, "{{.exceptionFile}}", *exceptionFile))
logger.Log.Errorf("Found %d packages with license errors", numFailures)
logger.Log.Warnf("Found %d packages with non-fatal license issues", numWarnings)
} else if numWarnings > 0 {
logger.Log.Info(strings.ReplaceAll(explanation, "{{.exceptionFile}}", *exceptionFile))
logger.Log.Warnf("Found %d packages with non-fatal license issues", numWarnings)
} else {
logger.Log.Infof("No license issues found")
}
}
// validateRpmDir scans the given directory for RPMs and validates their licenses. It will return all findings split into warnings and failures.
// Each call to this function will generate a new chroot environment and clean it up after the scan.
func validateRpmDir(buildDirPath, workerTar, rpmDir, nameFile, exceptionFile, distTag string,
mode licensecheck.LicenseCheckMode,
) (allResults, warningResults, failedResults []licensecheck.LicenseCheckResult, err error) {
logger.Log.Infof("Preparing license check environment for (%s)", rpmDir)
licenseChecker, err := licensecheck.New(buildDirPath, workerTar, rpmDir, nameFile, exceptionFile, distTag)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to initialize RPM license checker:\n%w", err)
}
defer func() {
cleanupErr := licenseChecker.Cleanup()
if cleanupErr != nil {
if err == nil {
err = fmt.Errorf("failed to cleanup after RPM license checker:\n%w", cleanupErr)
} else {
// Append the cleanup error to the existing error
err = fmt.Errorf("%w\nfailed to cleanup after RPM license checker failed:\n%w", err, cleanupErr)
}
}
}()
logger.Log.Infof("Scanning (%s) for license issues", rpmDir)
_, err = licenseChecker.CheckLicenses(false)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to generate license scan:\n%w", err)
}
allResults, warningResults, failedResults = licenseChecker.GetResults(mode)
resultsString := licensecheckformat.FormatResults(allResults, mode)
logger.Log.Infof("Search results for (%s):\n%s", rpmDir, resultsString)
return allResults, failedResults, warningResults, nil
}

Просмотреть файл

@ -0,0 +1,359 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
/*
Package licensecheck provides a tool for searching RPMs for bad licenses, as well as several directly callable functions.
The core of the tool is the LicenseChecker struct, which is responsible for searching RPMs for bad licenses. The tool is
based on a 'simpletoolchroot' which is a wrapper that allows for easy chroot creation and cleanup.
The lifecycle of the LicenseChecker is as follows:
1. Create a new LicenseChecker with New()
2. Call CheckLicenses() to search for bad licenses
3. Either:
- Call FormatResults() to get a formatted string of the results
- Call GetAllResults() to get all the results, split into buckets.
4. Call CleanUp() to tear down the chroot
Also provided are several directly callable functions (these expect to be run in an environment with the necessary
macros, i.e. a chroot): CheckRpmLicenses(), IsALicenseFile(), IsASkippedLicenseFile()
The LicenseCheckerResult struct is used to store the results of the search. It contains the path to the RPM, a list of
bad documents, a list of bad files, and a list of duplicated documents. The bad documents are %doc files that are not
at least also in the license files. The bad files are general files that are misplaced in the licenses directory.
The duplicated documents are %doc files that are also in the license files. These are not technically bad, but are messy
and should be cleaned up.
*/
package licensecheck
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
"github.com/microsoft/azurelinux/toolkit/tools/internal/rpm"
"github.com/microsoft/azurelinux/toolkit/tools/internal/sliceutils"
"github.com/microsoft/azurelinux/toolkit/tools/pkg/simpletoolchroot"
)
const licensePrefix = "/usr/share/licenses"
// LicenseChecker is a tool for searching RPMs for bad licenses
type LicenseChecker struct {
simpleToolChroot *simpletoolchroot.SimpleToolChroot // The chroot to scan the RPMs in
distTag string // The distribution tag to use when parsing RPMs
licenseNames LicenseNames // The regexes used to match license files
exceptions LicenseExceptions // Files that should be ignored
results []LicenseCheckResult // The results of the search
jobSemaphore chan struct{} // Limit the number of parallel jobs
}
// New creates a new license checker. If this returns successfully the caller is responsible for calling CleanUp().
// - buildDirPath: The path to create the chroot inside
// - workerTarPath: The path to the worker tarball
// - rpmDirPath: The path to the directory containing the RPMs
// - nameFilePath: The path to the .json file containing license names
// - exceptionFilePath: Optional, the path to the .json file containing license exceptions to ignore
// - distTag: The distribution tag to use when parsing RPMs
func New(buildDirPath, workerTarPath, rpmDirPath, nameFilePath, exceptionFilePath, distTag string,
) (newLicenseChecker *LicenseChecker, err error) {
const chrootName = "license_chroot"
newLicenseChecker = &LicenseChecker{
distTag: distTag,
simpleToolChroot: &simpletoolchroot.SimpleToolChroot{},
jobSemaphore: make(chan struct{}, runtime.NumCPU()*2),
}
err = newLicenseChecker.simpleToolChroot.InitializeChroot(buildDirPath, chrootName, workerTarPath, rpmDirPath)
if err != nil {
err = fmt.Errorf("failed to initialize chroot:\n%w", err)
return nil, err
}
defer func() {
if err != nil {
cleanupErr := newLicenseChecker.Cleanup()
if cleanupErr != nil {
// Append the cleanup error to the existing error
err = fmt.Errorf("%w\nfailed to cleanup after failing to create a new LicenseChecker:\n%w", err, cleanupErr)
}
}
}()
newLicenseChecker.licenseNames, err = LoadLicenseNames(nameFilePath)
if err != nil {
err = fmt.Errorf("failed to load license names:\n%w", err)
return nil, err
}
if exceptionFilePath != "" {
newLicenseChecker.exceptions, err = LoadLicenseExceptions(exceptionFilePath)
if err != nil {
err = fmt.Errorf("failed to load license exceptions:\n%w", err)
return nil, err
}
}
return newLicenseChecker, err
}
// Cleanup tears down the chroot. If the chroot was created it will be cleaned up. Reset the struct to its initial state.
func (l *LicenseChecker) Cleanup() error {
if l.simpleToolChroot != nil {
err := l.simpleToolChroot.CleanUp()
if err != nil {
return fmt.Errorf("failed to cleanup chroot:\n%w", err)
}
l.simpleToolChroot = nil
}
return nil
}
// CheckLicenses will scan all .rpm files in the chroot for bad licenses. New unfiltered results will be returned but
// also appended to the internal results list which can be accessed with GetResults().
func (l *LicenseChecker) CheckLicenses(quiet bool) (latestResults []LicenseCheckResult, err error) {
if l.simpleToolChroot == nil {
return nil, fmt.Errorf("license checker is not initialized, use New() to create a new license checker")
}
err = l.simpleToolChroot.RunInChroot(func() (searchErr error) {
latestResults, searchErr = l.runLicenseCheckInChroot(quiet)
return searchErr
})
if err != nil {
return nil, fmt.Errorf("failed to scan for license issues:\n%w", err)
}
// Sort the results by RPM path
// This is done to ensure that the output is deterministic
sort.Slice(latestResults, func(i, j int) bool {
return latestResults[i].RpmPath < latestResults[j].RpmPath
})
l.results = append(l.results, latestResults...)
return latestResults, nil
}
// GetResults returns the cumulative results of the search, split into:
// - All results: Every scan result
// - Any result that has at least one warning
// - Any result that has at least one error
func (l *LicenseChecker) GetResults(mode LicenseCheckMode) (all, warnings, errors []LicenseCheckResult) {
_, warnings, errors = SortAndFilterResults(l.results, mode)
return l.results, warnings, errors
}
type licenseCheckReturn struct {
finding LicenseCheckResult
err error
}
// runLicenseCheckInChroot searches for bad licenses amongst the RPMs mounted into the chroot. This function is meant
// to be called from inside the chroot's context.
func (l *LicenseChecker) runLicenseCheckInChroot(quiet bool) (findings []LicenseCheckResult, err error) {
const searchReportIntervalPercent = 10 // Report progress to the user every 10%
// Find all the rpms in the chroot
rpmsToSearchPaths, err := l.findRpmPaths()
if err != nil {
return nil, fmt.Errorf("failed to walk rpm directory:\n%w", err)
}
if len(rpmsToSearchPaths) == 0 {
logger.Log.Warnf("No rpms found in (%s)", l.simpleToolChroot.ChrootRelativeMountDir())
return nil, nil
}
// Scan each rpm in parallel
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
resultsChannel := make(chan licenseCheckReturn, len(rpmsToSearchPaths))
if !quiet {
logger.Log.Infof("Queuing %d rpms for license check", len(rpmsToSearchPaths))
}
go l.queueWorkers(ctx, rpmsToSearchPaths, resultsChannel)
if !quiet {
logger.Log.Infof("Checking RPMs for license issues")
}
// Wait for all the workers to finish, updating the progress as results come in
numProcessed := 0
lastReportPercent := 0
for range rpmsToSearchPaths {
result := <-resultsChannel
if result.err != nil {
// Signal the workers to stop if there is an error
err = fmt.Errorf("failed to search rpm for license issues:\n%w", result.err)
cancelFunc()
return nil, err
}
// Report progress to the user every 10%
numProcessed++
percentProcessed := (numProcessed * 100) / len(rpmsToSearchPaths)
if percentProcessed-lastReportPercent >= searchReportIntervalPercent && !quiet {
logger.Log.Infof("Checked %d/%d rpms (%d%%)", numProcessed, len(rpmsToSearchPaths), percentProcessed)
lastReportPercent = percentProcessed
}
findings = append(findings, result.finding)
}
return
}
// findRpmPaths walks the chroots's mount directory to find all *.rpm files. The paths are returned relative to the
// chroot's root.
func (l *LicenseChecker) findRpmPaths() (foundRpmPaths []string, err error) {
const rpmExtension = ".rpm"
err = filepath.Walk(l.simpleToolChroot.ChrootRelativeMountDir(), func(path string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if info.IsDir() {
return nil
}
if !strings.HasSuffix(path, rpmExtension) {
return nil
}
foundRpmPaths = append(foundRpmPaths, path)
return nil
})
if err != nil {
err = fmt.Errorf("failed to walk directory:\n%w", err)
return nil, err
}
return foundRpmPaths, nil
}
// queueWorkers queues up workers to search the RPMs in parallel. Each worker will wait on the jobSemaphore before starting.
// This function will return once all workers have been queued.
func (l *LicenseChecker) queueWorkers(ctx context.Context, rpmsToSearchPaths []string, resultsChannel chan licenseCheckReturn) {
for _, rpmPath := range rpmsToSearchPaths {
// Wait for the semaphore, or allow cancel before running
select {
case l.jobSemaphore <- struct{}{}:
case <-ctx.Done():
return
}
go func(rpmPath string) {
defer func() {
<-l.jobSemaphore
}()
logger.Log.Debugf("Searching (%s)", filepath.Base(rpmPath))
searchResult, err := checkRpmLicenses(rpmPath, l.distTag, l.licenseNames, l.exceptions)
logger.Log.Debugf("Finished searching (%s)", filepath.Base(rpmPath))
if err != nil {
logger.Log.Errorf("License check worker failed with error: %v", err)
resultsChannel <- licenseCheckReturn{err: err}
return
}
resultsChannel <- licenseCheckReturn{finding: searchResult, err: nil}
}(rpmPath)
}
}
// checkRpmLicenses checks the licenses of an RPM at the given path. It returns result struct holding all the license
// issues found. This function will use the host's macros to query the RPM so it is expected to be called in a chroot.
// - rpmPath: The path to the RPM to check relative to the chroot's root.
func checkRpmLicenses(rpmPath, distTag string, licenseNames LicenseNames, exceptions LicenseExceptions) (result LicenseCheckResult, err error) {
defines := rpm.DefaultDistroDefines(false, distTag)
_, files, _, documentFiles, licenseFiles, err := rpm.QueryPackageFiles(rpmPath, defines)
if err != nil {
return LicenseCheckResult{}, fmt.Errorf("failed to query package contents:\n%w", err)
}
pkgNameLines, err := rpm.QueryPackage(rpmPath, "%{NAME}", defines)
if err != nil {
return LicenseCheckResult{}, fmt.Errorf("failed to query package:\n%w", err)
}
if len(pkgNameLines) != 1 {
return LicenseCheckResult{}, fmt.Errorf("failed to query package:\nexpected 1 package name, got %d", len(pkgNameLines))
}
pkgName := pkgNameLines[0]
badDocFiles, badOtherFiles, duplicatedDocs := interpretResults(pkgName, files, documentFiles, licenseFiles, licenseNames, exceptions)
result = LicenseCheckResult{
RpmPath: rpmPath,
PackageName: pkgName,
BadDocs: badDocFiles,
BadFiles: badOtherFiles,
DuplicatedDocs: duplicatedDocs,
}
return result, nil
}
// interpretResults scans file lists for packing issues:
// - badDocFiles: %doc files that appear to be licenses, but are not at least also in the license files
// - badOtherFiles: files that are misplaced in the licenses directory
// - duplicatedDocs: %doc files that are also in the license files
func interpretResults(pkgName string, files, documentFiles, licenseFiles []string, licenseNames LicenseNames, exceptions LicenseExceptions) (badDocFiles, badOtherFiles, duplicatedDocs []string) {
badDocFiles = []string{}
badOtherFiles = []string{}
duplicatedDocs = []string{}
// Check the documentation files
for _, documentFile := range documentFiles {
if licenseNames.IsALicenseFile(pkgName, documentFile) && !exceptions.ShouldIgnoreFile(pkgName, documentFile) {
if isDocumentInLicenseFiles(documentFile, licenseFiles) {
duplicatedDocs = append(duplicatedDocs, documentFile)
} else {
badDocFiles = append(badDocFiles, documentFile)
}
}
}
// Make sure we don't put random files in the license directory. They need to be %license.
licenseFileSet := sliceutils.SliceToSet(licenseFiles)
for _, file := range files {
if isFileMisplacedInLicensesFolder(file, licenseFileSet) && !exceptions.ShouldIgnoreFile(pkgName, file) {
badOtherFiles = append(badOtherFiles, file)
}
}
sort.Strings(badDocFiles)
sort.Strings(duplicatedDocs)
sort.Strings(badOtherFiles)
return badDocFiles, badOtherFiles, duplicatedDocs
}
// isDocumentInLicenseFiles checks if a document file is in the list of license files (based on basename of the file).
func isDocumentInLicenseFiles(documentFile string, licenseFiles []string) bool {
docBasename := filepath.Base(documentFile)
for _, licenseFile := range licenseFiles {
licenseBasename := filepath.Base(licenseFile)
if strings.Contains(licenseBasename, docBasename) {
return true
}
}
return false
}
// isFileMisplacedInLicensesFolder returns true if the filePath is present in the /usr/share/licenses/<pkg> tree but is
// not included in the set of license files. Every file in /usr/share/licenses/<pkg> should be a license file and tagged.
// - filePath: The path to the file to check. Directories are not included as %license so only actual file paths should
// be passed.
// -
// - licenseFileSet: A set of all the license files in the package. This is used to check if the file is a license file.
func isFileMisplacedInLicensesFolder(filePath string, licenseFileSet map[string]bool) bool {
// Files that don't start with '/usr/share/licenses' are by definition not misplaced in the licenses folder
if !strings.HasPrefix(filePath, licensePrefix) {
return false
} else {
// If the path appears in the license set, it's correctly tagged.
isARealLicenseFile := licenseFileSet[filePath]
return !isARealLicenseFile
}
}

Просмотреть файл

@ -0,0 +1,285 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheck
import (
"fmt"
"os"
"testing"
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
"github.com/microsoft/azurelinux/toolkit/tools/internal/sliceutils"
"github.com/stretchr/testify/assert"
)
func loadDefaultLicenseNames(t *testing.T) LicenseNames {
const pathToDefaultNamesJson = "../../../resources/manifests/package/license_file_names.json"
t.Helper()
names, err := LoadLicenseNames(pathToDefaultNamesJson)
if err != nil {
t.Fatalf("Failed to load default license names: %v", err)
}
return names
}
func TestMain(m *testing.M) {
logger.InitStderrLog()
os.Exit(m.Run())
}
func TestSearchLicenseFilesForMatch(t *testing.T) {
defaultLicenseFiles := []string{"/usr/share/licenses/pkg/COPYING", "/usr/share/licenses/pkg/COPYING.LIB"}
testCases := []struct {
name string
documentFile string
licenseFiles []string
expectedResponse bool
}{
{
name: "Not a license file",
documentFile: "file1",
licenseFiles: defaultLicenseFiles,
expectedResponse: false,
},
{
name: "License file different dir",
documentFile: "/usr/share/docs/pkg/COPYING",
licenseFiles: defaultLicenseFiles,
expectedResponse: true,
},
{
name: "License file found exact match",
documentFile: "/usr/share/licenses/pkg/COPYING",
licenseFiles: defaultLicenseFiles,
expectedResponse: true,
},
{
name: "License file case mismatch",
documentFile: "/usr/share/licenses/pkg/copying",
licenseFiles: defaultLicenseFiles,
expectedResponse: false,
},
{
name: "License file found with extension",
documentFile: "/usr/share/licenses/pkg/COPYING.LIB",
licenseFiles: defaultLicenseFiles,
expectedResponse: true,
},
{
name: "License file extension mismatch",
documentFile: "/usr/share/licenses/pkg/COPYING.wrong_ext",
licenseFiles: defaultLicenseFiles,
expectedResponse: false,
},
{
name: "License file found with extra bits",
documentFile: "/usr/share/licenses/pkg/mypkg-COPYING",
licenseFiles: []string{"/usr/share/licenses/pkg/mypkg-COPYING"},
expectedResponse: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actualResponse := isDocumentInLicenseFiles(tc.documentFile, tc.licenseFiles)
if actualResponse != tc.expectedResponse {
t.Errorf("Expected %v, got %v", tc.expectedResponse, actualResponse)
}
})
}
}
func TestIsFileMisplacedInLicensesFolder(t *testing.T) {
licenseFiles := []string{"/usr/share/licenses/pkg/COPYING", "/usr/share/licenses/pkg/COPYING.LIB"}
licenseFileSet := sliceutils.SliceToSet(licenseFiles)
assert.False(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/pkg/COPYING", licenseFileSet))
assert.False(t, isFileMisplacedInLicensesFolder("/usr/share/not/in/licenses.txt", licenseFileSet))
assert.True(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/pkg/NOTICE", licenseFileSet))
}
func TestIsFileMisplacedInLicensesFolderDetectPackageFolder(t *testing.T) {
emptyLicenseFiles := make(map[string]bool)
assert.True(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/OTHER_PKG/", emptyLicenseFiles))
assert.True(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/OTHER_PKG", emptyLicenseFiles))
}
func makeResult(name string, numBadDocs, numBadFiles, numDupes int) LicenseCheckResult {
badDocs := make([]string, numBadDocs)
for i := 0; i < numBadDocs; i++ {
badDocs[i] = fmt.Sprintf("doc%d", i)
}
badFiles := make([]string, numBadFiles)
for i := 0; i < numBadFiles; i++ {
badFiles[i] = fmt.Sprintf("file%d", i)
}
dupes := make([]string, numDupes)
for i := 0; i < numDupes; i++ {
dupes[i] = fmt.Sprintf("dupe%d", i)
}
return LicenseCheckResult{
RpmPath: name,
BadDocs: badDocs,
BadFiles: badFiles,
DuplicatedDocs: dupes,
}
}
func TestGetResults(t *testing.T) {
type expected struct {
all []LicenseCheckResult
warn []LicenseCheckResult
fail []LicenseCheckResult
}
testCases := []struct {
name string
results []LicenseCheckResult
expected expected
}{
{
name: "No results",
results: []LicenseCheckResult{},
expected: expected{
all: []LicenseCheckResult{},
warn: []LicenseCheckResult{},
fail: []LicenseCheckResult{},
},
},
{
name: "No issues",
results: []LicenseCheckResult{
makeResult("pkg1", 0, 0, 0),
},
expected: expected{
all: []LicenseCheckResult{
makeResult("pkg1", 0, 0, 0),
},
warn: []LicenseCheckResult{},
fail: []LicenseCheckResult{},
},
},
{
name: "Single error",
results: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
},
expected: expected{
all: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
},
warn: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1)},
fail: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
},
},
},
{
name: "Single warn",
results: []LicenseCheckResult{
makeResult("pkg1", 0, 0, 1),
},
expected: expected{
all: []LicenseCheckResult{
makeResult("pkg1", 0, 0, 1),
},
warn: []LicenseCheckResult{
makeResult("pkg1", 0, 0, 1),
},
fail: []LicenseCheckResult{},
},
},
{
name: "Double error",
results: []LicenseCheckResult{
makeResult("pkg1", 1, 0, 0),
makeResult("pkg2", 1, 0, 0),
},
expected: expected{
all: []LicenseCheckResult{
makeResult("pkg1", 1, 0, 0),
makeResult("pkg2", 1, 0, 0),
},
warn: []LicenseCheckResult{},
fail: []LicenseCheckResult{
makeResult("pkg1", 1, 0, 0),
makeResult("pkg2", 1, 0, 0),
},
},
},
{
name: "Multiple results with warn",
results: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
makeResult("pkg2", 0, 0, 2),
},
expected: expected{
all: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
makeResult("pkg2", 0, 0, 2),
},
warn: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
makeResult("pkg2", 0, 0, 2),
},
fail: []LicenseCheckResult{
makeResult("pkg1", 1, 1, 1),
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
checker := LicenseChecker{
results: tc.results,
}
all, warn, fail := checker.GetResults(LicenseCheckModeDefault)
assert.Equal(t, tc.expected.all, all)
assert.Equal(t, tc.expected.warn, warn)
assert.Equal(t, tc.expected.fail, fail)
})
}
}
func TestParseCheckResults(t *testing.T) {
pkgName := "testpkg"
files := []string{
"/some/random/file",
"/usr/share/docs/testpkg/doc.txt",
"/usr/share/docs/testpkg/COPYING",
"/usr/share/licenses/testpkg/other_misplaced_2",
"/usr/share/licenses/testpkg/misplaced",
"/usr/share/docs/testpkg/licenses/duplicated",
}
documentFiles := []string{
"/usr/share/docs/testpkg/doc.txt",
"/usr/share/docs/testpkg/COPYING",
"/usr/share/licenses/testpkg/other_misplaced_2",
"/usr/share/licenses/testpkg/other_misplaced",
"/usr/share/docs/testpkg/licenses/duplicated",
}
licenseFiles := []string{
"/usr/share/licenses/testpkg/duplicated",
}
exceptions := LicenseExceptions{}
expectedBadDocFiles := []string{
"/usr/share/docs/testpkg/COPYING",
}
expectedBadOtherFiles := []string{
"/usr/share/licenses/testpkg/misplaced",
"/usr/share/licenses/testpkg/other_misplaced_2",
}
expectedDuplicatedDocs := []string{
"/usr/share/docs/testpkg/licenses/duplicated",
}
badDocFiles, badOtherFiles, duplicatedDocs := interpretResults(pkgName, files, documentFiles, licenseFiles, loadDefaultLicenseNames(t), exceptions)
assert.Equal(t, expectedBadDocFiles, badDocFiles)
assert.Equal(t, expectedBadOtherFiles, badOtherFiles)
assert.Equal(t, expectedDuplicatedDocs, duplicatedDocs)
}

Просмотреть файл

@ -0,0 +1,83 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
/*
Package licensecheckformat provides functions to handle the output of the licensecheck package.
*/
package licensecheckformat
import (
"fmt"
"path/filepath"
"strings"
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
)
// FormatResults formats the results of the search to a string. Results will be ordered as follows:
// - Packages with warnings only, sorted alphabetically
// - Packages with errors (and possibly warnings), sorted alphabetically
// If pedantic is true, warnings will be treated as errors.
func FormatResults(results []licensecheck.LicenseCheckResult, mode licensecheck.LicenseCheckMode) string {
var sb strings.Builder
_, warnings, errors := licensecheck.SortAndFilterResults(results, mode)
if len(warnings) == 0 && len(errors) == 0 {
return "No license issues found\n"
}
// Print warnings first, but only if they don't also have an error
for _, result := range warnings {
if result.HasWarningResult(mode) && !result.HasErrorResult(mode) {
sb.WriteString(formatResult(result, mode))
}
}
// Now print the errors
for _, result := range errors {
sb.WriteString(formatResult(result, mode))
}
return sb.String()
}
func formatResult(result licensecheck.LicenseCheckResult, mode licensecheck.LicenseCheckMode) string {
badDocIsError := true
badFileIsError := true
dupIsError := false
if mode == licensecheck.LicenseCheckModePedantic {
dupIsError = true
} else if mode == licensecheck.LicenseCheckModeWarnOnly {
badDocIsError = false
badFileIsError = false
}
var sb strings.Builder
// Print errors first if they exist
if result.HasErrorResult(mode) {
sb.WriteString(fmt.Sprintf("ERROR: (%s) has license errors:\n", filepath.Base(result.RpmPath)))
if badDocIsError && len(result.BadDocs) > 0 {
sb.WriteString(fmt.Sprintf("\tbad %%doc files:\n\t\t%s\n", strings.Join(result.BadDocs, "\n\t\t")))
}
if badFileIsError && len(result.BadFiles) > 0 {
sb.WriteString(fmt.Sprintf("\tbad general file:\n\t\t%s\n", strings.Join(result.BadFiles, "\n\t\t")))
}
if dupIsError && len(result.DuplicatedDocs) > 0 {
sb.WriteString(fmt.Sprintf("\tduplicated license files:\n\t\t%s\n", strings.Join(result.DuplicatedDocs, "\n\t\t")))
}
}
// Now add warnings if they exist
if result.HasWarningResult(mode) {
sb.WriteString(fmt.Sprintf("WARN: (%s) has license warnings:\n", filepath.Base(result.RpmPath)))
if !badDocIsError && len(result.BadDocs) > 0 {
sb.WriteString(fmt.Sprintf("\tbad %%doc files:\n\t\t%s\n", strings.Join(result.BadDocs, "\n\t\t")))
}
if !badFileIsError && len(result.BadFiles) > 0 {
sb.WriteString(fmt.Sprintf("\tbad general file:\n\t\t%s\n", strings.Join(result.BadFiles, "\n\t\t")))
}
if !dupIsError && len(result.DuplicatedDocs) > 0 {
sb.WriteString(fmt.Sprintf("\tduplicated license files:\n\t\t%s\n", strings.Join(result.DuplicatedDocs, "\n\t\t")))
}
}
return sb.String()
}

Просмотреть файл

@ -0,0 +1,242 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheckformat
import (
"testing"
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
"github.com/stretchr/testify/assert"
)
func TestFormatResultsNonPedantic(t *testing.T) {
testCases := []struct {
name string
results []licensecheck.LicenseCheckResult
expected string
}{
{
name: "No results",
results: []licensecheck.LicenseCheckResult{},
expected: "No license issues found\n",
},
{
name: "Single result",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
BadDocs: []string{"doc1"},
},
},
expected: "ERROR: (package.rpm) has license errors:\n" +
"\tbad %doc files:\n" +
"\t\tdoc1\n",
},
{
name: "Multiple results",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
BadDocs: []string{"doc1", "doc2"},
BadFiles: []string{"file1", "file2"},
DuplicatedDocs: []string{"dupe1", "dupe2"},
},
{
RpmPath: "/path/to/another-package.rpm",
DuplicatedDocs: []string{"dupe3", "dupe4"},
},
},
expected: "WARN: (another-package.rpm) has license warnings:\n" +
"\tduplicated license files:\n" +
"\t\tdupe3\n" +
"\t\tdupe4\n" +
"ERROR: (package.rpm) has license errors:\n" +
"\tbad %doc files:\n" +
"\t\tdoc1\n" +
"\t\tdoc2\n" +
"\tbad general file:\n" +
"\t\tfile1\n" +
"\t\tfile2\n" +
"WARN: (package.rpm) has license warnings:\n" +
"\tduplicated license files:\n" +
"\t\tdupe1\n" +
"\t\tdupe2\n",
},
{
name: "Duplicated docs only",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
DuplicatedDocs: []string{"dupe1", "dupe2"},
},
},
expected: "WARN: (package.rpm) has license warnings:\n" +
"\tduplicated license files:\n" +
"\t\tdupe1\n" +
"\t\tdupe2\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actual := FormatResults(tc.results, licensecheck.LicenseCheckModeDefault)
assert.Equal(t, tc.expected, actual)
})
}
}
func TestFormatResultsPedantic(t *testing.T) {
testCases := []struct {
name string
results []licensecheck.LicenseCheckResult
expected string
}{
{
name: "No results",
results: []licensecheck.LicenseCheckResult{},
expected: "No license issues found\n",
},
{
name: "Single result",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
BadDocs: []string{"doc1"},
},
},
expected: "ERROR: (package.rpm) has license errors:\n" +
"\tbad %doc files:\n" +
"\t\tdoc1\n",
},
{
name: "Multiple results",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
BadDocs: []string{"doc1", "doc2"},
BadFiles: []string{"file1", "file2"},
DuplicatedDocs: []string{"dupe1", "dupe2"},
},
{
RpmPath: "/path/to/another-package.rpm",
DuplicatedDocs: []string{"dupe3", "dupe4"},
},
},
expected: "ERROR: (another-package.rpm) has license errors:\n" +
"\tduplicated license files:\n" +
"\t\tdupe3\n" +
"\t\tdupe4\n" +
"ERROR: (package.rpm) has license errors:\n" +
"\tbad %doc files:\n" +
"\t\tdoc1\n" +
"\t\tdoc2\n" +
"\tbad general file:\n" +
"\t\tfile1\n" +
"\t\tfile2\n" +
"\tduplicated license files:\n" +
"\t\tdupe1\n" +
"\t\tdupe2\n",
},
{
name: "Duplicated docs only",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
DuplicatedDocs: []string{"dupe1", "dupe2"},
},
},
expected: "ERROR: (package.rpm) has license errors:\n" +
"\tduplicated license files:\n" +
"\t\tdupe1\n" +
"\t\tdupe2\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actual := FormatResults(tc.results, licensecheck.LicenseCheckModePedantic)
assert.Equal(t, tc.expected, actual)
})
}
}
func TestFormatResultsWarnOnly(t *testing.T) {
testCases := []struct {
name string
results []licensecheck.LicenseCheckResult
expected string
}{
{
name: "No results",
results: []licensecheck.LicenseCheckResult{},
expected: "No license issues found\n",
},
{
name: "Single result",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
BadDocs: []string{"doc1"},
},
},
expected: "WARN: (package.rpm) has license warnings:\n" +
"\tbad %doc files:\n" +
"\t\tdoc1\n",
},
{
name: "Multiple results",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
BadDocs: []string{"doc1", "doc2"},
BadFiles: []string{"file1", "file2"},
DuplicatedDocs: []string{"dupe1", "dupe2"},
},
{
RpmPath: "/path/to/another-package.rpm",
DuplicatedDocs: []string{"dupe3", "dupe4"},
},
},
expected: "WARN: (another-package.rpm) has license warnings:\n" +
"\tduplicated license files:\n" +
"\t\tdupe3\n" +
"\t\tdupe4\n" +
"WARN: (package.rpm) has license warnings:\n" +
"\tbad %doc files:\n" +
"\t\tdoc1\n" +
"\t\tdoc2\n" +
"\tbad general file:\n" +
"\t\tfile1\n" +
"\t\tfile2\n" +
"\tduplicated license files:\n" +
"\t\tdupe1\n" +
"\t\tdupe2\n",
},
{
name: "Duplicated docs only",
results: []licensecheck.LicenseCheckResult{
{
RpmPath: "/path/to/package.rpm",
DuplicatedDocs: []string{"dupe1", "dupe2"},
},
},
expected: "WARN: (package.rpm) has license warnings:\n" +
"\tduplicated license files:\n" +
"\t\tdupe1\n" +
"\t\tdupe2\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actual := FormatResults(tc.results, licensecheck.LicenseCheckModeWarnOnly)
assert.Equal(t, tc.expected, actual)
})
}
}
func TestFormatResultsEmpty(t *testing.T) {
actual := FormatResults([]licensecheck.LicenseCheckResult{}, licensecheck.LicenseCheckModeDefault)
assert.Equal(t, "No license issues found\n", actual)
actual = FormatResults([]licensecheck.LicenseCheckResult{{RpmPath: "/path/to/package.rpm", PackageName: "pkg1"}}, licensecheck.LicenseCheckModeDefault)
assert.Equal(t, "No license issues found\n", actual)
}

Просмотреть файл

@ -0,0 +1,35 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheck
import "slices"
// Valid license check modes which controls the behavior of the license checker package when filtering issues.
// These are intended to be used as command line flags in addition to being used in code.
type LicenseCheckMode string
const (
LicenseCheckModeNone = LicenseCheckMode("none") // Disable license checking
LicenseCheckModeWarnOnly = LicenseCheckMode("warn") // Convert all findings into warnings
LicenseCheckModeFatalOnly = LicenseCheckMode("fatal") // Report critical errors, but allow warnings
LicenseCheckModePedantic = LicenseCheckMode("pedantic") // Convert all findings into errors
LicenseCheckModeDefault = LicenseCheckModeFatalOnly
)
// ValidLicenseCheckModes is a list of all valid license check modes
var validLicenseCheckModes = []LicenseCheckMode{LicenseCheckModeNone, LicenseCheckModeWarnOnly, LicenseCheckModePedantic, LicenseCheckModeFatalOnly}
// IsValidLicenseCheckMode returns true if the given mode is a valid license check mode
func IsValidLicenseCheckMode(mode LicenseCheckMode) bool {
return slices.Contains(validLicenseCheckModes, mode)
}
// ValidLicenseCheckModeStrings returns a list of all valid license check modes as strings for use with the command line
func ValidLicenseCheckModeStrings() (modes []string) {
for _, mode := range validLicenseCheckModes {
modes = append(modes, string(mode))
}
return modes
}

Просмотреть файл

@ -0,0 +1,108 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheck
import (
"fmt"
"os"
"path/filepath"
"sort"
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
)
// LicenseCheckResult is the result of a license check on an single RPM
type LicenseCheckResult struct {
RpmPath string `json:"RpmPath"`
PackageName string `json:"PackageName,omitempty"`
BadDocs []string `json:"BadDocs,omitempty"`
BadFiles []string `json:"BadFiles,omitempty"`
DuplicatedDocs []string `json:"DuplicatedDocs,omitempty"`
}
// HasErrorResult returns true if the result contains at least one finding that should be treated as an error based on
// the provided mode.
func (r *LicenseCheckResult) HasErrorResult(mode LicenseCheckMode) (hasErrorResult bool) {
switch mode {
case LicenseCheckModeNone:
return false
case LicenseCheckModeWarnOnly:
return false
case LicenseCheckModePedantic:
if len(r.DuplicatedDocs) > 0 {
return true
}
fallthrough
case LicenseCheckModeFatalOnly:
return len(r.BadDocs) > 0 || len(r.BadFiles) > 0
}
return false
}
// HasWarningResult returns true if the result contains at least one finding that should be treated as a warning based on
// the provided mode.
func (r *LicenseCheckResult) HasWarningResult(mode LicenseCheckMode) bool {
switch mode {
case LicenseCheckModeNone:
return false
case LicenseCheckModePedantic:
// Pedantic mode treats warnings as errors, so we never have warnings
return false
case LicenseCheckModeWarnOnly:
// We are treating all findings as warnings
if r.HasErrorResult(LicenseCheckModeFatalOnly) {
return true
}
fallthrough
case LicenseCheckModeFatalOnly:
return len(r.DuplicatedDocs) > 0
}
return false
}
// SaveLicenseCheckResults saves a list of all warnings and errors to a json file.
func SaveLicenseCheckResults(savePath string, resultsList []LicenseCheckResult) error {
// Create parent dir if missing
err := os.MkdirAll(filepath.Dir(savePath), os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create directory for results file. Error:\n%w", err)
}
sortedListOfFindings, _, _ := SortAndFilterResults(resultsList, LicenseCheckModeDefault)
err = jsonutils.WriteJSONFile(savePath, sortedListOfFindings)
if err != nil {
return fmt.Errorf("failed to save license check results. Error:\n%w", err)
}
return nil
}
// SortAndFilterResults sorts the provided input slice, then filters them into three categories: anyResult, warnings, and errors.
// The results slice passed to the function will also be sorted in-place. The mode flag will control how the results are filtered.
func SortAndFilterResults(results []LicenseCheckResult, mode LicenseCheckMode) (anyResult, warnings, errors []LicenseCheckResult) {
// Sort the input
sort.Slice(results, func(i, j int) bool {
return results[i].RpmPath < results[j].RpmPath
})
anyResult = []LicenseCheckResult{}
warnings = []LicenseCheckResult{}
errors = []LicenseCheckResult{}
for _, result := range results {
if result.HasErrorResult(mode) || result.HasWarningResult(mode) {
anyResult = append(anyResult, result)
}
if result.HasErrorResult(mode) {
errors = append(errors, result)
}
if result.HasWarningResult(mode) {
warnings = append(warnings, result)
}
}
return anyResult, warnings, errors
}

Просмотреть файл

@ -0,0 +1,180 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheck
import (
"path/filepath"
"testing"
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
"github.com/stretchr/testify/assert"
)
func TestCategorizeResults(t *testing.T) {
testCases := []struct {
name string
result LicenseCheckResult
expectedBad bool
expectedWarning bool
}{
{
name: "All results",
result: LicenseCheckResult{
BadDocs: []string{"doc"},
BadFiles: []string{"file"},
DuplicatedDocs: []string{"dupe"},
},
expectedBad: true,
expectedWarning: true,
},
{
name: "BadDocs",
result: LicenseCheckResult{
BadDocs: []string{"doc"},
},
expectedBad: true,
expectedWarning: false,
},
{
name: "BadFiles",
result: LicenseCheckResult{
BadFiles: []string{"file"},
},
expectedBad: true,
expectedWarning: false,
},
{
name: "DuplicatedDocs",
result: LicenseCheckResult{
DuplicatedDocs: []string{"dupe"},
},
expectedBad: false,
expectedWarning: true,
},
{
name: "BadDocsAndBadFiles",
result: LicenseCheckResult{
BadDocs: []string{"doc"},
BadFiles: []string{"file"},
},
expectedBad: true,
expectedWarning: false,
},
{
name: "Dupes with bad doc",
result: LicenseCheckResult{
BadDocs: []string{"doc"},
DuplicatedDocs: []string{"dupe"},
},
expectedBad: true,
expectedWarning: true,
},
{
name: "Dupes with bad file",
result: LicenseCheckResult{
BadFiles: []string{"file"},
DuplicatedDocs: []string{"dupe"},
},
expectedBad: true,
expectedWarning: true,
},
{
name: "No results",
result: LicenseCheckResult{},
expectedBad: false,
expectedWarning: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.expectedBad, tc.result.HasErrorResult(LicenseCheckModeDefault))
assert.Equal(t, tc.expectedWarning, tc.result.HasWarningResult(LicenseCheckModeDefault))
})
}
}
func TestSaveResultsToFile(t *testing.T) {
results := []LicenseCheckResult{
{
RpmPath: "/path/to/rpm",
BadDocs: []string{"/docs/doc1", "/docs/doc2"},
DuplicatedDocs: []string{"/docs/COPY"},
},
}
tempFile := filepath.Join(t.TempDir(), "missing_dir", "results.json")
err := SaveLicenseCheckResults(tempFile, results)
assert.Nil(t, err)
// Load it back and see if it matches.
resultsCheck := []LicenseCheckResult{}
err = jsonutils.ReadJSONFile(tempFile, &resultsCheck)
assert.Nil(t, err)
assert.Equal(t, results, resultsCheck)
}
func TestSortAndFilter(t *testing.T) {
r1 := LicenseCheckResult{
RpmPath: "/path/to/rpm1",
BadDocs: []string{"/docs/doc1", "/docs/doc2"},
DuplicatedDocs: []string{"/docs/COPY"},
}
r2 := LicenseCheckResult{
RpmPath: "/path/to/rpm2",
BadFiles: []string{"/docs/doc1", "/docs/doc2"},
DuplicatedDocs: []string{"/docs/COPY"},
}
r3 := LicenseCheckResult{
RpmPath: "/path/to/rpm3",
BadDocs: []string{"/docs/doc1", "/docs/doc2"},
}
r4 := LicenseCheckResult{
RpmPath: "/path/to/rpm4",
}
r5 := LicenseCheckResult{
RpmPath: "/path/to/rpm5",
DuplicatedDocs: []string{"/docs/COPY"},
}
unsortedList := []LicenseCheckResult{r5, r4, r2, r1, r3}
sortedList := []LicenseCheckResult{r1, r2, r3, r4, r5}
expectedAll := []LicenseCheckResult{r1, r2, r3, r5}
expectedWarnings := []LicenseCheckResult{r1, r2, r5}
expectedWarningsPedantic := []LicenseCheckResult{}
expectedWarningsWarn := []LicenseCheckResult{r1, r2, r3, r5}
expectedErrors := []LicenseCheckResult{r1, r2, r3}
expectedErrorsPedantic := []LicenseCheckResult{r1, r2, r3, r5}
expectedErrorsWarn := []LicenseCheckResult{}
input := make([]LicenseCheckResult, len(unsortedList))
copy(input, unsortedList)
all, warnings, errors := SortAndFilterResults(input, LicenseCheckModeFatalOnly)
assert.Equal(t, sortedList, input)
assert.Equal(t, expectedAll, all)
assert.Equal(t, expectedWarnings, warnings)
assert.Equal(t, expectedErrors, errors)
copy(input, unsortedList)
all, warnings, errors = SortAndFilterResults(input, LicenseCheckModePedantic)
assert.Equal(t, sortedList, input)
assert.Equal(t, expectedAll, all)
assert.Equal(t, expectedWarningsPedantic, warnings)
assert.Equal(t, expectedErrorsPedantic, errors)
copy(input, unsortedList)
all, warnings, errors = SortAndFilterResults(input, LicenseCheckModeWarnOnly)
assert.Equal(t, sortedList, input)
assert.Equal(t, expectedAll, all)
assert.Equal(t, expectedWarningsWarn, warnings)
assert.Equal(t, expectedErrorsWarn, errors)
copy(input, unsortedList)
all, warnings, errors = SortAndFilterResults(input, LicenseCheckModeNone)
assert.Equal(t, sortedList, input)
assert.Equal(t, []LicenseCheckResult{}, all)
assert.Equal(t, []LicenseCheckResult{}, warnings)
assert.Equal(t, []LicenseCheckResult{}, errors)
}

Просмотреть файл

@ -0,0 +1,78 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheck
import (
"fmt"
"regexp"
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
)
type PkgExceptions struct {
PackageName string `json:"PackageName"`
IgnoredFilesRegexList []string `json:"IgnoredFilesRegexList"`
compiledIgnoreRegexList []*regexp.Regexp
}
type LicenseExceptions struct {
PkgExceptions []PkgExceptions `json:"PkgExceptions"`
GlobalExceptionsRegexList []string `json:"GlobalExceptionsRegexList"`
compiledGlobalIgnoreRegexList []*regexp.Regexp
}
// ShouldIgnoreFile checks if the given file should be ignored based on the license exceptions
// - packageName: the name of the package as returned by rpm query '%{NAME}'
// - filePath: the path of the file to be checked as returned by rpm query '%{FILENAMES}'
func (l *LicenseExceptions) ShouldIgnoreFile(packageName, filePath string) bool {
// Check if the file should be ignored globally
for _, ignoredRegex := range l.compiledGlobalIgnoreRegexList {
if ignoredRegex.MatchString(filePath) {
return true
}
}
// Check if the file should be ignored for the given package
for _, exception := range l.PkgExceptions {
if exception.PackageName == packageName {
for _, ignoredRegex := range exception.compiledIgnoreRegexList {
if ignoredRegex.MatchString(filePath) {
return true
}
}
}
}
return false
}
// LoadLicenseExceptions loads the license exceptions from the given .json file into a LicenseExceptions struct
func LoadLicenseExceptions(file string) (LicenseExceptions, error) {
config := LicenseExceptions{}
err := jsonutils.ReadJSONFile(file, &config)
if err != nil {
return LicenseExceptions{}, fmt.Errorf("failed to read license exceptions file (%s):\n%w", file, err)
}
// Compile regexes for ignored files
for i := range config.PkgExceptions {
for j := range config.PkgExceptions[i].IgnoredFilesRegexList {
regex, err := regexp.Compile(config.PkgExceptions[i].IgnoredFilesRegexList[j])
if err != nil {
return LicenseExceptions{}, fmt.Errorf("failed to compile regex for ignored files (%s):\n%w", config.PkgExceptions[i].IgnoredFilesRegexList[j], err)
}
config.PkgExceptions[i].compiledIgnoreRegexList = append(config.PkgExceptions[i].compiledIgnoreRegexList, regex)
}
}
// Compile regexes for global ignored files
for i := range config.GlobalExceptionsRegexList {
regex, err := regexp.Compile(config.GlobalExceptionsRegexList[i])
if err != nil {
return LicenseExceptions{}, fmt.Errorf("failed to compile regex for global ignored files (%s):\n%w", config.GlobalExceptionsRegexList[i], err)
}
config.compiledGlobalIgnoreRegexList = append(config.compiledGlobalIgnoreRegexList, regex)
}
return config, nil
}

Просмотреть файл

@ -0,0 +1,212 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package licensecheck
import (
"path/filepath"
"regexp"
"testing"
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
"github.com/stretchr/testify/assert"
)
func TestLoadLicenseExceptions(t *testing.T) {
file := "testdata/test_license_exceptions.json"
expectedExceptions := LicenseExceptions{
PkgExceptions: []PkgExceptions{
{
PackageName: "TestPackage1",
IgnoredFilesRegexList: []string{
"/usr/share/doc/LICENSE",
"/usr/share/doc/README.GPL",
".*GLOB1",
},
compiledIgnoreRegexList: []*regexp.Regexp{
regexp.MustCompile("/usr/share/doc/LICENSE"),
regexp.MustCompile("/usr/share/doc/README.GPL"),
regexp.MustCompile(".*GLOB1"),
},
},
{
PackageName: "TestPackage2",
IgnoredFilesRegexList: []string{
"/usr/share/doc/LICENSE",
"/usr/share/doc/README.GPL",
".*GLOB2",
},
compiledIgnoreRegexList: []*regexp.Regexp{
regexp.MustCompile("/usr/share/doc/LICENSE"),
regexp.MustCompile("/usr/share/doc/README.GPL"),
regexp.MustCompile(".*GLOB2"),
},
},
},
GlobalExceptionsRegexList: []string{
".*GLOB3",
},
compiledGlobalIgnoreRegexList: []*regexp.Regexp{
regexp.MustCompile(".*GLOB3"),
},
}
exceptions, err := LoadLicenseExceptions(file)
// Check if there was an error loading the license exceptions
if err != nil {
t.Errorf("Failed to load license exceptions: %v", err)
}
// Check if the loaded exceptions match the expected exceptions
assert.Equal(t, expectedExceptions, exceptions)
}
func TestShouldIgnoreFile(t *testing.T) {
exceptions := LicenseExceptions{
PkgExceptions: []PkgExceptions{
{
PackageName: "TestPackage1",
IgnoredFilesRegexList: []string{
"/usr/share/doc/LICENSE",
"/usr/share/doc/README.GPL",
".*GLOB1",
},
compiledIgnoreRegexList: []*regexp.Regexp{
regexp.MustCompile("/usr/share/doc/LICENSE"),
regexp.MustCompile("/usr/share/doc/README.GPL"),
regexp.MustCompile(".*GLOB1"),
},
},
{
PackageName: "TestPackage2",
IgnoredFilesRegexList: []string{
"/usr/share/doc/LICENSE",
"/usr/share/doc/README.GPL",
".*GLOB2",
},
compiledIgnoreRegexList: []*regexp.Regexp{
regexp.MustCompile("/usr/share/doc/LICENSE"),
regexp.MustCompile("/usr/share/doc/README.GPL"),
regexp.MustCompile(".*GLOB2"),
},
},
},
GlobalExceptionsRegexList: []string{
".*GLOB3",
},
compiledGlobalIgnoreRegexList: []*regexp.Regexp{
regexp.MustCompile(".*GLOB3"),
},
}
testCases := []struct {
name string
packageName string
filePath string
expectedResponse bool
}{
{
name: "File should be ignored",
packageName: "TestPackage1",
filePath: "/usr/share/doc/LICENSE",
expectedResponse: true,
},
{
name: "2nd File should be ignored",
packageName: "TestPackage1",
filePath: "/usr/share/doc/README.GPL",
expectedResponse: true,
},
{
name: "File should be ignored in other package",
packageName: "TestPackage2",
filePath: "/usr/share/doc/LICENSE",
expectedResponse: true,
},
{
name: "File should not be ignored in listed package",
packageName: "TestPackage1",
filePath: "/usr/share/doc/other_file",
expectedResponse: false,
},
{
name: "File should not be ignored in other package",
packageName: "TestPackage3",
filePath: "/usr/share/doc/LICENSE",
expectedResponse: false,
},
{
name: "File should match package glob",
packageName: "TestPackage1",
filePath: "/usr/share/doc/GLOB1",
expectedResponse: true,
},
{
name: "File should not match package glob",
packageName: "TestPackage1",
filePath: "/usr/share/doc/GLOB2",
expectedResponse: false,
},
{
name: "File should match global glob",
packageName: "TestPackage1",
filePath: "/usr/share/doc/GLOB3",
expectedResponse: true,
},
{
name: "File should match unkown package with global glob",
packageName: "NOT_A_PACKAGE",
filePath: "/usr/share/doc/GLOB3",
expectedResponse: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.expectedResponse, exceptions.ShouldIgnoreFile(tc.packageName, tc.filePath))
})
}
}
func TestNotPanicMissingFile(t *testing.T) {
tempPath := t.TempDir()
file := filepath.Join(tempPath, "missing_file.json")
assert.NotPanics(t, func() {
_, err := LoadLicenseExceptions(file)
assert.EqualError(t, err, "failed to read license exceptions file ("+file+"):\nopen "+file+": no such file or directory")
})
}
func TestInvalidRegex(t *testing.T) {
const invalidRegex = `.*[`
testCases := []struct {
name string
json string
expectedErr string
}{
{
name: "Invalid regex",
json: `{"PkgExceptions": [{"PackageName": "TestPackage1", "IgnoredFilesRegexList": ["` + invalidRegex + `"]}], "GlobalExceptionsRegexList": []}`,
expectedErr: "failed to compile regex for ignored files (.*[):\nerror parsing regexp: missing closing ]: `[`",
},
{
name: "Invalid global regex",
json: `{"PkgExceptions": [], "GlobalExceptionsRegexList": ["` + invalidRegex + `"]}`,
expectedErr: "failed to compile regex for global ignored files (.*[):\nerror parsing regexp: missing closing ]: `[`",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tempPath := t.TempDir()
jsonFilePath := filepath.Join(tempPath, "invalid_regex.json")
err := file.Write(tc.json, jsonFilePath)
assert.NoError(t, err)
exceptions, err := LoadLicenseExceptions(jsonFilePath)
assert.Error(t, err)
assert.EqualError(t, err, tc.expectedErr)
assert.Equal(t, LicenseExceptions{}, exceptions)
})
}
}

Просмотреть файл

@ -0,0 +1,111 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
// A tool for validating %license entries in rpms
package licensecheck
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
)
type LicenseNames struct {
FuzzyLicenseNamesRegexList []string `json:"FuzzyLicenseNamesRegexList"`
compiledFuzzyLicenseNamesList []*regexp.Regexp
VerbatimLicenseNamesRegexList []string `json:"VerbatimLicenseNamesRegexList"`
compiledVerbatimLicenseNamesList []*regexp.Regexp
SkipLicenseNamesRegexList []string `json:"SkipLicenseNamesRegexList"`
compiledSkipLicenseNamesList []*regexp.Regexp
}
// IsALicenseFile makes a best effort guess if a file is a license file or not. This is a heuristic and is NOT foolproof however.
// Some examples of files that may be incorrectly identified as licenses:
// - /path/to/code/gpl/README.md ("gpl")
// - /path/to/a/hash/CC05f4dcc3b5aa765d61d8327deb882cf ("cc0")
// - /path/to/freebsd-parts/file.ext ("bds")
func (l *LicenseNames) IsALicenseFile(pkgName, licenseFilePath string) bool {
// Check if the file is in the list of explicit known license files
for _, name := range l.compiledVerbatimLicenseNamesList {
baseName := filepath.Base(licenseFilePath)
if name.MatchString(baseName) {
return true
}
}
return checkFilePath(pkgName, licenseFilePath, l.compiledFuzzyLicenseNamesList) && !l.IsASkippedLicenseFile(pkgName, licenseFilePath)
}
// IsASkippedLicenseFile checks if a file is a known non-license file.
func (l *LicenseNames) IsASkippedLicenseFile(pkgName, licenseFilePath string) bool {
return checkFilePath(pkgName, licenseFilePath, l.compiledSkipLicenseNamesList)
}
// checkFilePath checks if a file path matches any of the given names. Any leading common path is stripped before
// matching (i.e. "/usr/share/licenses/<pkg>/file/path" -> "file/path"). The matching is a case-insensitive sub-string
// search.
func checkFilePath(pkgName, licenseFilePath string, licenseFilesMatches []*regexp.Regexp) bool {
// For each path, strip the prefix plus package name if it exists
// i.e. "/usr/share/licenses/<pkg>/file/path" -> "file/path"
// Those paths would always match since they contain "license" in the name.
strippedPath := filepath.Clean(licenseFilePath)
pkgPrefix := filepath.Join(licensePrefix, pkgName)
if strings.HasPrefix(licenseFilePath, licensePrefix) {
strippedPath = strings.TrimPrefix(licenseFilePath, pkgPrefix) // Remove the license + pkg prefix
strippedPath = strings.TrimPrefix(strippedPath, licensePrefix) // Remove the license prefix
strippedPath = strings.TrimPrefix(strippedPath, string(os.PathSeparator)) // Remove the leading path separator if it exists
// Rebuild the path without the 1st component
if len(strippedPath) == 0 {
// It was just the license directory
return false
}
}
for _, name := range licenseFilesMatches {
if name.MatchString(strippedPath) {
return true
}
}
return false
}
// LoadLicenseNames loads the license name regexes from the given .json file into a LicenseNames struct
func LoadLicenseNames(file string) (LicenseNames, error) {
config := LicenseNames{}
err := jsonutils.ReadJSONFile(file, &config)
if err != nil {
return LicenseNames{}, fmt.Errorf("failed to read license names file (%s):\n%w", file, err)
}
for i := range config.FuzzyLicenseNamesRegexList {
regex, err := regexp.Compile(config.FuzzyLicenseNamesRegexList[i])
if err != nil {
return LicenseNames{}, fmt.Errorf("failed to compile regex for license names (%s):\n%w", config.FuzzyLicenseNamesRegexList[i], err)
}
config.compiledFuzzyLicenseNamesList = append(config.compiledFuzzyLicenseNamesList, regex)
}
for i := range config.VerbatimLicenseNamesRegexList {
regex, err := regexp.Compile(config.VerbatimLicenseNamesRegexList[i])
if err != nil {
return LicenseNames{}, fmt.Errorf("failed to compile regex for license names (%s):\n%w", config.VerbatimLicenseNamesRegexList[i], err)
}
config.compiledVerbatimLicenseNamesList = append(config.compiledVerbatimLicenseNamesList, regex)
}
for i := range config.SkipLicenseNamesRegexList {
regex, err := regexp.Compile(config.SkipLicenseNamesRegexList[i])
if err != nil {
return LicenseNames{}, fmt.Errorf("failed to compile regex for license names (%s):\n%w", config.SkipLicenseNamesRegexList[i], err)
}
config.compiledSkipLicenseNamesList = append(config.compiledSkipLicenseNamesList, regex)
}
return config, nil
}

Просмотреть файл

@ -0,0 +1,359 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
// A tool for validating %license entries in rpms
package licensecheck
import (
"path/filepath"
"regexp"
"strings"
"testing"
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
"github.com/stretchr/testify/assert"
)
type testData struct {
UniqueFiles int
UniquePackages int
TestDataEntries []testDataEntry
}
type testDataEntry struct {
Pkg string `json:"Pkg"`
Path string `json:"Path"`
}
func TestLoadLicenseNames(t *testing.T) {
file := "testdata/test_license_names.json"
expectedNames := LicenseNames{
FuzzyLicenseNamesRegexList: []string{
"(?i).*fuzzy.*",
},
compiledFuzzyLicenseNamesList: []*regexp.Regexp{
regexp.MustCompile("(?i).*fuzzy.*"),
},
VerbatimLicenseNamesRegexList: []string{
"^vErBaTiM$",
},
compiledVerbatimLicenseNamesList: []*regexp.Regexp{
regexp.MustCompile("^vErBaTiM$"),
},
SkipLicenseNamesRegexList: []string{
"(?i).*skip.*",
},
compiledSkipLicenseNamesList: []*regexp.Regexp{
regexp.MustCompile("(?i).*skip.*"),
},
}
names, err := LoadLicenseNames(file)
// Check if there was an error loading the license exceptions
if err != nil {
t.Errorf("Failed to load license names: %v", err)
}
// Check if the loaded exceptions match the expected exceptions
assert.Equal(t, expectedNames, names)
}
func TestNotPanicMissingNameFile(t *testing.T) {
tempPath := t.TempDir()
file := filepath.Join(tempPath, "missing_file.json")
assert.NotPanics(t, func() {
_, err := LoadLicenseNames(file)
assert.EqualError(t, err, "failed to read license names file ("+file+"):\nopen "+file+": no such file or directory")
})
}
func TestInvalidNameRegex(t *testing.T) {
const invalidRegex = `.*[`
testCases := []struct {
name string
json string
expectedErr string
}{
{
name: "Invalid fuzzy regex",
json: `{"FuzzyLicenseNamesRegexList": ["` + invalidRegex + `"], "VerbatimLicenseNamesRegexList": [], "SkipLicenseNamesRegexList": []}`,
expectedErr: "failed to compile regex for license names (.*[):\nerror parsing regexp: missing closing ]: `[`",
},
{
name: "Invalid verbatim regex",
json: `{"FuzzyLicenseNamesRegexList": [], "VerbatimLicenseNamesRegexList": ["` + invalidRegex + `"], "SkipLicenseNamesRegexList": []}`,
expectedErr: "failed to compile regex for license names (.*[):\nerror parsing regexp: missing closing ]: `[`",
},
{
name: "Invalid skip regex",
json: `{"FuzzyLicenseNamesRegexList": [], "VerbatimLicenseNamesRegexList": [], "SkipLicenseNamesRegexList": ["` + invalidRegex + `"]}`,
expectedErr: "failed to compile regex for license names (.*[):\nerror parsing regexp: missing closing ]: `[`",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tempPath := t.TempDir()
jsonFilePath := filepath.Join(tempPath, "invalid_regex.json")
err := file.Write(tc.json, jsonFilePath)
assert.NoError(t, err)
names, err := LoadLicenseNames(jsonFilePath)
assert.Error(t, err)
assert.EqualError(t, err, tc.expectedErr)
assert.Equal(t, LicenseNames{}, names)
})
}
}
func generateTestVariantStrings(pkgName, base string) []string {
upperCase := strings.ToUpper(base)
lowerCase := strings.ToLower(base)
randomizedCase := ""
basePath := filepath.Join("/usr/share/licenses/", pkgName)
for i, c := range base {
if i%2 == 0 {
randomizedCase += strings.ToLower(string(c))
} else {
randomizedCase += strings.ToUpper(string(c))
}
}
fileNames := []string{
lowerCase,
upperCase,
lowerCase + ".txt",
upperCase + ".txt",
lowerCase + ".mypkg.txt",
randomizedCase,
upperCase + "-mypkg",
upperCase + "-mypkg-ver",
"mypkg-" + upperCase,
"mypkg-" + upperCase + ".txt",
upperCase + ".MYPKG",
upperCase + "_MYPKG",
}
for i := range fileNames {
fileNames[i] = filepath.Join(basePath, fileNames[i])
}
return fileNames
}
// Test common variations on license file names
func TestIsALicenseFile_Common(t *testing.T) {
const pkgName = "pkg"
n := loadDefaultLicenseNames(t)
names := []string{
"copying",
"license",
"licence", // British spelling is sometimes used
"notice",
"copyright",
"artistic",
"bsd",
"gpl",
"cc0",
"mit.txt",
}
for _, name := range names {
testCases := generateTestVariantStrings(pkgName, name)
t.Run(name, func(t *testing.T) {
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
assert.True(t, n.IsALicenseFile(pkgName, tc))
assert.False(t, n.IsASkippedLicenseFile(pkgName, tc))
})
}
})
}
}
func TestIsASkippedLicenseFile(t *testing.T) {
const pkgName = "pkg"
n := loadDefaultLicenseNames(t)
testCases := []string{
"AUTHORS",
"CONTRIBUTORS",
"README",
"CREDITS",
"/usr/share/licenses/pkg/AUTHORS",
"/usr/share/licenses/pkg/AUTHORS.txt",
"/usr/share/licenses/pkg/docs/AUTHORS-1.0",
}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
assert.True(t, n.IsASkippedLicenseFile(pkgName, tc))
})
}
}
func TestIsALicenseFile_Specific(t *testing.T) {
const pkgName = "pkg"
n := loadDefaultLicenseNames(t)
testCases := []struct {
file string
expected bool
}{
{"MIT", true},
{"MIT_other", false},
{"other_MIT", false},
}
for _, tc := range testCases {
t.Run(tc.file, func(t *testing.T) {
res := n.IsALicenseFile(pkgName, tc.file)
assert.Equal(t, tc.expected, res)
})
}
}
func TestIsNotALicenseFile(t *testing.T) {
const (
pkgName = "pkg"
basePath = "/usr/share/licenses/"
)
n := loadDefaultLicenseNames(t)
testCases := []string{
filepath.Join(basePath, pkgName, "file"),
filepath.Join(basePath, pkgName, "README"),
filepath.Join(basePath, pkgName, "MIT-file"),
filepath.Join(basePath, pkgName, "AUTHORS.txt"),
filepath.Join(basePath, pkgName),
basePath,
"/",
}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
assert.False(t, n.IsALicenseFile(pkgName, tc))
})
}
}
func TestSubDirsMatch(t *testing.T) {
const pkgName = "pkg"
n := loadDefaultLicenseNames(t)
testCases := []string{
"/usr/share/licenses/pkg/COPYING",
"/usr/share/licenses/pkg/subdir/COPYING",
"/usr/share/licenses/pkg/LICENSES/random_file",
"/usr/share/licenses/pkg/licenses/random_file",
"/path/to/LICENSE",
}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
assert.True(t, n.IsALicenseFile(pkgName, tc))
assert.False(t, n.IsASkippedLicenseFile(pkgName, tc))
})
}
}
// The license directory itself isn't a valid match.
func TestLicenseDirDoesNotMatch(t *testing.T) {
const pkgName = "pkg"
n := loadDefaultLicenseNames(t)
testCases := []string{
"/usr/share/licenses/",
"/usr/share/licenses/pkg",
"/usr/share/licenses/pkg/",
}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
assert.False(t, n.IsALicenseFile(pkgName, tc))
})
}
}
func TestAgainstKnownLicenses(t *testing.T) {
// We store all the %license files from the distro in ./testdata/all_licenses_<date>.json
// See ./testdata/README.md for more information on how to generate this file
// This test will check that MOST of the known licenses are correctly identified as licenses. It is not
// exhaustive, but it should catch most common cases. This value can be increased as the quality of the
// packages improves.
const acceptablePercentage = 0.98
n := loadDefaultLicenseNames(t)
// Find all data files in the testdata directory
testDataFile := ""
paths, err := filepath.Glob("./testdata/all_licenses_*.json")
if err != nil {
t.Fatalf("Failed to find test data file: %v", err)
}
// Get the most recent file
for _, path := range paths {
if testDataFile < path {
testDataFile = path
}
}
if testDataFile == "" {
t.Fatalf("Failed to find test data file")
}
test_data := testData{}
err = jsonutils.ReadJSONFile(testDataFile, &test_data)
if err != nil || test_data.UniqueFiles == 0 {
t.Fatalf("failed to read input file: %v", err)
}
invalid_entires := 0
for _, test := range test_data.TestDataEntries {
if !n.IsALicenseFile(test.Pkg, test.Path) {
invalid_entires++
}
}
invalidPercentage := float64(invalid_entires) / float64(test_data.UniqueFiles)
if invalidPercentage > 1.0-acceptablePercentage {
t.Errorf("Failed to identify %d out of %d known licenses (%.2f%%)", invalid_entires, test_data.UniqueFiles, invalidPercentage*100)
}
}
func TestAgainstKnownDocs(t *testing.T) {
// We store all the %doc files from the distro in ./testdata/all_docs_<date>.json
// See ./testdata/README.md for more information on how to generate this file
// This test will check that MOST of the known docs are correctly identified as not licenses. It is not
// exhaustive, but it should catch most common cases.
const acceptablePercentage = 0.99
n := loadDefaultLicenseNames(t)
// Find all data files in the testdata directory
testDataFile := ""
paths, err := filepath.Glob("./testdata/all_docs_*.json")
if err != nil {
t.Fatalf("Failed to find test data file: %v", err)
}
// Get the most recent file
for _, path := range paths {
if testDataFile < path {
testDataFile = path
}
}
if testDataFile == "" {
t.Fatalf("Failed to find test data file")
}
test_data := testData{}
err = jsonutils.ReadJSONFile(testDataFile, &test_data)
if err != nil || test_data.UniqueFiles == 0 {
t.Fatalf("failed to read input file: %v", err)
}
invalid_entires := 0
for _, test := range test_data.TestDataEntries {
if n.IsALicenseFile(test.Pkg, test.Path) {
invalid_entires++
}
}
invalidPercentage := float64(invalid_entires) / float64(test_data.UniqueFiles)
if invalidPercentage > 1.0-acceptablePercentage {
t.Errorf("Failed to skip %d out of %d known docs (%.2f%%)", invalid_entires, test_data.UniqueFiles, invalidPercentage*100)
}
}

2
toolkit/tools/pkg/licensecheck/testdata/.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,2 @@
all_other_files*.json
_tmp*.json

42
toolkit/tools/pkg/licensecheck/testdata/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,42 @@
# Test data for the license checker
The `licensecheck` package uses a heuristic to identify license files, the input data to this tool comes from the
packages currently in the distro.
The test data is generated from all the files packaged into `/usr/share/licenses/<pkg>/*` and is gathered via `repoquery`.
## Generating new test data
In a AzureLinux environment (specifically an environment with access the the package repos) run:
```bash
cd ./testdata
rm *.json
tdnf -y install dnf-utils python3 ca-certificates
./generate_test_data.py
```
This will query the available repos and generate two files: `all_licenses_<date>.json`, `all_docs_<date>.json`, and
`all_other_files_<date>.json` containing lists of all files that are either `%license` or `%doc` respectively, and all
other files (but not directories).
** Note: `all_other_files_*.json` is marked to be ignored by git, it is a very large file and is less important to
validate against than `all_docs_<date>.json`.
## Quick validation of the test data
This will read the files from above and report false positive/negative results, and generate a set of files containing
all "incorrect" findings.
```bash
cd ./testdata
find . -name 'all_other_files_*.json' | grep -q . || echo "**** Generate test data first! ****"
go run . --licenses ./all_licenses_*.json --licenses-output ./_tmp_bad_licenses.json --docs ./all_docs_*.json --docs-output ./_tmp_bad_docs.json --other-files ./all_other_files_*.json --other-files-output ./_tmp_bad_other_files.json --name-file ../../../../resources/manifests/package/license_file_names.json --exception-file ../../../../resources/manifests/package/license_file_exceptions.json
# Check ./_tmp_bad_licenses.json, _tmp_bad_docs.json, _tmp_bad_other_files.json for any files that fail the classification
```
As of 2024-05-22 the results are:
- `1.9%` false negative (licenses)
- `0.25%` false positive (docs)
- `0.47%` false positive (all other files)

627738
toolkit/tools/pkg/licensecheck/testdata/all_docs_20240522.json поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

13882
toolkit/tools/pkg/licensecheck/testdata/all_licenses_20240522.json поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

170
toolkit/tools/pkg/licensecheck/testdata/generate_test_data.py поставляемый Executable file
Просмотреть файл

@ -0,0 +1,170 @@
#!/usr/bin/python3
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import concurrent.futures
import json
import os
import random
import tempfile
import time
import subprocess
import urllib.request
# generate_test_data.py generates a pair of files that contain all the license and doc files for all RPMs in the repository.
# The intent is to use this data to test the licensecheck tool for false positives/negatives.
# This tool should be run in an azl-like environment, specifically the 'repoquery' tool must be available, and it must
# be able to pull rpms from a representitive repo that contains all RPMs to measure (ie PMC).
# get_all_rpms() returns a list of URLs to each RPM in the default repos. It only looks at the latest version of each RPM.
def get_all_rpms() -> list[str]:
cmd = ["repoquery", "-y", "--latest-limit=1", "--all", "--location"]
output = subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
# Clean the output:
# - Split into a list
# - Remove anything that is *.src.rpm (some environemnts will give us the source RPMs as well)
# - Remove any empty strings after stirpping
output = output.split("\n")
output = [url.strip() for url in output if not url.endswith(".src.rpm")]
output = [url for url in output if url]
return output
# query_rpm_url() runs the 'rpm' command with the given query and URL. It returns a list of files based on the query.
def query_rpm_url(out_file: str, args: list[str]) -> list[str]:
cmd = ["rpm"] + args + [out_file]
# Run the bash script and capture the output.
output = subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
# If the output has the string '(contains no files)', then there are no files to list
if "(contains no files)" in output:
return []
output = output.split("\n")
output = [file for file in output if file]
return output
def get_name(out_file: str) -> str:
return query_rpm_url(out_file, ["-q", "--qf", "%{NAME}\n"])
def get_license_files(out_file: str) -> list[str]:
return query_rpm_url(out_file, ["-qL"])
def get_doc_files(out_file: str) -> list[str]:
return query_rpm_url(out_file, ["-qd"])
def get_all_files(out_file: str, filter_list: list[str]) -> list[str]:
all_files_and_dirs = query_rpm_url(out_file, ["-q", "--qf", "[%{FILEMODES:perms} %{FILENAMES}\n]"])
# Each line will be in the format "drwxr-xr-x /a/directory" or "-rw-r--r-- /a/directory/a_file", remove the
# directories and keep the files, then drop the permissions part of the string.
all_files = [file.split(' ', 1)[1] for file in all_files_and_dirs if file[0] != "d"]
filter_set = set(filter_list)
filtered_files = [file for file in all_files if file not in filter_set]
return filtered_files
# get_files_for_url() returns a result object with the URL, license files, and doc files for a given URL.
def get_files_for_url(url: str) -> dict:
# Get a tempdir to hold the rpm in so we can query it
with tempfile.TemporaryDirectory() as tempdir:
# Download the file to the tempdir
out_file = os.path.join(tempdir, "pkg.rpm")
urllib.request.urlretrieve(url, out_file)
license_files = get_license_files(out_file)
doc_files = get_doc_files(out_file)
all_other_files = get_all_files(out_file, license_files + doc_files)
res = {
"url": url,
"pkg_name": get_name(out_file)[0],
"license_files": license_files,
"doc_files": doc_files,
"all_other_files": all_other_files
}
return res
# Corresponding go structs for the output of this script:
# type testData struct {
# UniqueFiles int
# UniquePackages int
# TestDataEntries []testDataEntry
# }
# type testDataEntry struct {
# Pkg string `json:"Pkg"`
# Path string `json:"Path"`
# }
# Write the results to a file.
def write_to_file(file_list: list[(str,list[str])], output_file: str):
print(f"Writing to {output_file}")
file_list.sort()
testDataEntires = []
for pkg_name, files in file_list:
for file in files:
testDataEntires.append({
"Pkg": pkg_name,
"Path": file
})
# Count the unique packages
test_data = {
"UniqueFiles": len(testDataEntires),
"UniquePackages": len([pkg_name for pkg_name, files in file_list if files]), # Only packages with files are counted
"TestDataEntries": testDataEntires
}
with open(output_file, "w") as f:
json.dump(test_data, f, indent=0)
def main():
# Put the debug info packages first since they tend to be really big,
# then the remaining URLs,
# Randomize the lists to even out the load
all_urls = get_all_rpms()
debug_urls = [url for url in all_urls if "debuginfo" in url]
other_urls = [url for url in all_urls if "debuginfo" not in url]
random.shuffle(debug_urls)
random.shuffle(other_urls)
jobs = debug_urls + other_urls
# Queue each URL to be processed in parallel
num_processes = 4 * os.cpu_count()
license_files=[]
doc_files=[]
all_other_files=[]
with concurrent.futures.ProcessPoolExecutor(max_workers=num_processes) as executor:
results = [executor.submit(get_files_for_url, url) for url in jobs]
total_processed = 0
start_time = time.time()
for future in concurrent.futures.as_completed(results):
res = future.result()
license_files.append((res["pkg_name"], res["license_files"]))
doc_files.append((res["pkg_name"],res["doc_files"]))
all_other_files.append((res["pkg_name"],res["all_other_files"]))
total_processed += 1
# Estimated time remaining
elapsed_time = time.time() - start_time
time_per_file = elapsed_time / total_processed
remaining_files = len(jobs) - total_processed
remaining_time = time_per_file * remaining_files
percent_done = (total_processed / len(jobs)) * 100
base_name = res["url"].split("/")[-1]
print(f"~{remaining_time:.0f}s remaining ({total_processed}/{len(jobs)} ({percent_done:.2f}%))... {base_name} ")
# Write the results to 'all_licenses_<date>.json' and 'all_docs_<date>.json'
date = time.strftime('%Y%m%d')
license_file_path=f"all_licenses_{date}.json"
doc_file_path=f"all_docs_{date}.json"
all_other_file_path=f"all_other_files_{date}.json"
write_to_file(license_files, license_file_path)
write_to_file(doc_files, doc_file_path)
write_to_file(all_other_files, all_other_file_path)
if __name__ == "__main__":
main()

116
toolkit/tools/pkg/licensecheck/testdata/licensetestchecker.go поставляемый Normal file
Просмотреть файл

@ -0,0 +1,116 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
// Tool to validate test data for the licensecheck package unit tests.
package main
import (
"os"
"github.com/microsoft/azurelinux/toolkit/tools/internal/exe"
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
"gopkg.in/alecthomas/kingpin.v2"
)
var (
app = kingpin.New("licensetestchecker", "Checks test data for licenses.")
licenses = app.Flag("licenses", "Path to the input file of license file paths to check for false negatives.").Required().ExistingFile()
licensesOut = app.Flag("licenses-output", "Path to the output file to list all false negatives.").Required().String()
docs = app.Flag("docs", "Path to the input file of doc file paths to check for false positives.").Required().ExistingFile()
docsOut = app.Flag("docs-output", "Path to the output file to list all false positives.").Required().String()
otherFiles = app.Flag("other-files", "Path to the input file of other file paths to check for false positives.").Required().ExistingFile()
otherFilesOut = app.Flag("other-files-output", "Path to the output file to list all false positives.").Required().String()
nameFile = app.Flag("name-file", "Path to the file containing the list of license names to check for.").Required().ExistingFile()
exceptionFile = app.Flag("exception-file", "Path to the file containing the list of exceptions to the license check.").Required().ExistingFile()
)
type testData struct {
UniqueFiles int
UniquePackages int
TestDataEntries []testDataEntry
}
type testDataEntry struct {
Pkg string `json:"Pkg"`
Path string `json:"Path"`
}
func main() {
app.Version(exe.ToolkitVersion)
kingpin.MustParse(app.Parse(os.Args[1:]))
logger.InitStderrLog()
names, err := licensecheck.LoadLicenseNames(*nameFile)
if err != nil {
logger.Log.Fatalf("Failed to load license names: %v", err)
}
exceptions, err := licensecheck.LoadLicenseExceptions(*exceptionFile)
if err != nil {
logger.Log.Fatalf("Failed to load license exceptions: %v", err)
}
// Validate actual license files, checking for false negatives
realLicenses := readTestData(*licenses)
filesNotDetectedAsLicense := checkFalseNegatives(realLicenses, names, exceptions)
writeTestData(filesNotDetectedAsLicense, *licensesOut)
falseNegativeRatio := float64(len(filesNotDetectedAsLicense.TestDataEntries)) / float64(len(realLicenses.TestDataEntries))
logger.Log.Infof("Wrote %d invalid entries to '%s' (%.2f%% false negative)", len(filesNotDetectedAsLicense.TestDataEntries), *licensesOut, falseNegativeRatio*100)
// Validate doc files, checking for false positives
docs := readTestData(*docs)
invalidDocs := checkFalsePositives(docs, names, exceptions)
writeTestData(invalidDocs, *docsOut)
falsePositiveRatio := float64(len(invalidDocs.TestDataEntries)) / float64(len(docs.TestDataEntries))
logger.Log.Infof("Wrote %d invalid docs to '%s' (%.2f%% false positive)", len(invalidDocs.TestDataEntries), *docsOut, falsePositiveRatio*100)
// Validate other files, checking for false positives
otherFiles := readTestData(*otherFiles)
invalidOtherFiles := checkFalsePositives(otherFiles, names, exceptions)
writeTestData(invalidOtherFiles, *otherFilesOut)
falsePositiveRatio = float64(len(invalidOtherFiles.TestDataEntries)) / float64(len(otherFiles.TestDataEntries))
logger.Log.Infof("Wrote %d invalid other files to '%s' (%.2f%% false positive)", len(invalidOtherFiles.TestDataEntries), *otherFilesOut, falsePositiveRatio*100)
}
func readTestData(filePath string) testData {
var tests testData
err := jsonutils.ReadJSONFile(filePath, &tests)
if err != nil {
logger.Log.Fatalf("failed to read input file: %v", err)
}
return tests
}
func writeTestData(tests testData, filePath string) {
err := jsonutils.WriteJSONFile(filePath, tests)
if err != nil {
logger.Log.Fatalf("failed to write output file: %v", err)
}
}
func checkFalseNegatives(tests testData, names licensecheck.LicenseNames, exceptions licensecheck.LicenseExceptions) (falseNegatives testData) {
for _, test := range tests.TestDataEntries {
if !names.IsALicenseFile(test.Pkg, test.Path) || exceptions.ShouldIgnoreFile(test.Pkg, test.Path) {
falseNegatives.TestDataEntries = append(falseNegatives.TestDataEntries, test)
}
}
falseNegatives.UniqueFiles = len(falseNegatives.TestDataEntries)
falseNegatives.UniquePackages = len(falseNegatives.TestDataEntries)
return falseNegatives
}
func checkFalsePositives(tests testData, names licensecheck.LicenseNames, exceptions licensecheck.LicenseExceptions) (falsePositives testData) {
for _, test := range tests.TestDataEntries {
if names.IsALicenseFile(test.Pkg, test.Path) && !exceptions.ShouldIgnoreFile(test.Pkg, test.Path) {
falsePositives.TestDataEntries = append(falsePositives.TestDataEntries, test)
}
}
falsePositives.UniqueFiles = len(falsePositives.TestDataEntries)
falsePositives.UniquePackages = len(falsePositives.TestDataEntries)
return falsePositives
}

Просмотреть файл

@ -0,0 +1,23 @@
{
"PkgExceptions": [
{
"PackageName": "TestPackage1",
"IgnoredFilesRegexList": [
"/usr/share/doc/LICENSE",
"/usr/share/doc/README.GPL",
".*GLOB1"
]
},
{
"PackageName": "TestPackage2",
"IgnoredFilesRegexList": [
"/usr/share/doc/LICENSE",
"/usr/share/doc/README.GPL",
".*GLOB2"
]
}
],
"GlobalExceptionsRegexList": [
".*GLOB3"
]
}

11
toolkit/tools/pkg/licensecheck/testdata/test_license_names.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1,11 @@
{
"FuzzyLicenseNamesRegexList": [
"(?i).*fuzzy.*"
],
"VerbatimLicenseNamesRegexList": [
"^vErBaTiM$"
],
"SkipLicenseNamesRegexList": [
"(?i).*skip.*"
]
}