Add new license validator tool (#9060)
Co-authored-by: Pawel Winogrodzki <pawelwi@microsoft.com>
This commit is contained in:
Родитель
7b1635b878
Коммит
5016f3f5f9
|
@ -78,6 +78,14 @@ ENABLE_CPU_PROFILE ?= n
|
|||
ENABLE_MEM_PROFILE ?= n
|
||||
ENABLE_TRACE ?= n
|
||||
|
||||
# License checking tool
|
||||
##help:var:LICENSE_CHECK_DIRS:"<rpm_dir_1> <rpm_dir_2>"=Space separated list of directories to recursively validate with the manual 'license-check' target.
|
||||
LICENSE_CHECK_DIRS ?=
|
||||
LICENSE_CHECK_EXCEPTION_FILE ?= $(MANIFESTS_DIR)/package/license_file_exceptions.json
|
||||
LICENSE_CHECK_NAME_FILE ?= $(MANIFESTS_DIR)/package/license_file_names.json
|
||||
##help:var:LICENSE_CHECK_MODE:{none,warn,fatal,pedantic}=Set the license check mode during package and image builds. 'none' will disable the license check, 'warn' will print warnings, 'fatal' will stop the build on errors, 'pedantic' will stop the build on warnings and errors.
|
||||
LICENSE_CHECK_MODE ?= none
|
||||
|
||||
# Folder defines
|
||||
TOOLS_DIR ?= $(toolkit_root)/tools
|
||||
TOOL_BINS_DIR ?= $(toolkit_root)/out/tools
|
||||
|
@ -254,7 +262,11 @@ include $(SCRIPTS_DIR)/pkggen.mk
|
|||
include $(SCRIPTS_DIR)/imggen.mk
|
||||
|
||||
# Add make targets for sodiff to determine if additional packages are required to be recompiled:
|
||||
# sodiff-check, build-summary, build-package-summary, fake-built-packages-list, sodiff-setup
|
||||
# sodiff-check, sodiff-setup
|
||||
# Get build info with:
|
||||
# build-summary, build-package-summary, fake-built-packages-list
|
||||
# Validate rpm licenses with:
|
||||
# license-check, license-check-img, clean-license-check
|
||||
include $(SCRIPTS_DIR)/analysis.mk
|
||||
|
||||
##help:target:clean=Clean all built files.
|
||||
|
|
|
@ -88,6 +88,8 @@ The `imagepkgfetcher` tool is similar to the `graphpkgfetcher` tool. It will fin
|
|||
The `imager` tool is responsible for composing an image based on the selected configuration file. It creates partitions, installs packages, configures the users, etc. It can output either a `*.raw` file or a simple filesystem.
|
||||
#### isomaker
|
||||
The `isomaker` tool creates an installable ISO which can be booted from a CD or other device. The ISO contains the `initrd` used to boot from a read-only device, and all the packages needed to create a copy of the selected configuration on a new computer.
|
||||
#### licensechecker
|
||||
The `licensechecker` tool is used to validate the licensing files in packages. It will check all `*.rpm` files in a directory and provide a list of issues found.
|
||||
#### liveinstaller
|
||||
The `liveinstaller` tool is included in the ISO `initrd` and is responsible for installing the requested image onto a new computer.
|
||||
#### pkgworker
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"_comment1": "This file is used to allow specific files to be omitted from the license check process.",
|
||||
"_comment2": "Each PkgException entry is a {'PackageName' and 'IgnoredFilesRegexList'}",
|
||||
"_comment3": "The 'PackageName' is the name of the (sub)package to which the exception applies.",
|
||||
"_comment4": "The 'IgnoredFilesRegexList' is a list of regular expressions that match files to be omitted from the license check.",
|
||||
"_comment5": "The 'GlobalExceptionsRegexList' is a list of regular expressions that match all packages.",
|
||||
|
||||
"PkgExceptions": [
|
||||
{
|
||||
"PackageName": "gcc",
|
||||
"IgnoredFilesRegexList": [
|
||||
"^/usr/share/man/man7/gpl\\.7\\.gz$"
|
||||
]
|
||||
},
|
||||
{
|
||||
"PackageName": "libdb-docs",
|
||||
"IgnoredFilesRegexList": [
|
||||
"^/usr/share/doc/libdb-[0-9\\.]+/installation/build_unix_freebsd\\.html$",
|
||||
"^/usr/share/doc/libdb-[0-9\\.]+/license/license_db\\.html$"
|
||||
]
|
||||
},
|
||||
{
|
||||
"PackageName": "perl-doc",
|
||||
"IgnoredFilesRegexList": [
|
||||
"^/usr/share/man/.*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"PackageName": "tar",
|
||||
"IgnoredFilesRegexList": [
|
||||
"^/usr/share/doc/tar-[0-9\\.]+/tar\\.html/GNU-Free-Documentation-License\\.html$"
|
||||
]
|
||||
}
|
||||
],
|
||||
"GlobalExceptionsRegexList": [
|
||||
]
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"_comment1": "This file lists the regexes used to match license files in packages.",
|
||||
"_comment2": "FuzzyLicenseNamesRegexList is a list of license names that should be matched in a case-insensitive sub-string search",
|
||||
"_comment3": "Any common license path prefixes are removed before matching (ie /usr/share/licenses/<pkg>/).",
|
||||
"_comment4": "VerbatimLicenseNamesRegexList is a list of license names that should be matched exactly against the basename of a file",
|
||||
"_comment5": "SkipLicenseNamesRegexList is a list of files that may appear as a license file but generally aren't really licenses",
|
||||
"FuzzyLicenseNamesRegexList": [
|
||||
"(?i).*copying.*",
|
||||
"(?i).*license.*",
|
||||
"(?i).*licence.*",
|
||||
"(?i).*licensing.*",
|
||||
"(?i).*notice.*",
|
||||
"(?i).*copyright.*",
|
||||
"(?i).*artistic.*",
|
||||
"(?i).*bsd.*",
|
||||
"(?i).*gpl.*",
|
||||
"(?i).*cc0.*",
|
||||
"(?i).*mit\\.txt.*"
|
||||
],
|
||||
"VerbatimLicenseNamesRegexList": [
|
||||
"^MIT$"
|
||||
],
|
||||
"SkipLicenseNamesRegexList": [
|
||||
"(?i).*AUTHORS.*",
|
||||
"(?i).*CONTRIBUTORS.*",
|
||||
"(?i).*README.*",
|
||||
"(?i).*CREDITS.*"
|
||||
]
|
||||
}
|
|
@ -5,9 +5,12 @@
|
|||
# - Generate list of built packages
|
||||
# - Run check for ABI changes of built packages.
|
||||
# - Run check for .so files version change of built packages.
|
||||
# - Validate package licenses
|
||||
|
||||
# Requires DNF on Azure Linux / yum and yum-utils on Ubuntu.
|
||||
|
||||
######## SODIFF and BUILD SUMMARY ########
|
||||
|
||||
# A folder with sodiff-related artifacts
|
||||
SODIFF_OUTPUT_FOLDER=$(BUILD_DIR)/sodiff
|
||||
RPM_BUILD_LOGS_DIR=$(LOGS_DIR)/pkggen/rpmbuilding
|
||||
|
@ -86,3 +89,55 @@ sodiff-check: $(BUILT_PACKAGES_FILE) | $(SODIFF_REPO_FILE)
|
|||
<$(BUILT_PACKAGES_FILE) $(SODIFF_SCRIPT) $(RPMS_DIR)/ $(SODIFF_REPO_FILE) $(RELEASE_MAJOR_ID) $(SODIFF_OUTPUT_FOLDER)
|
||||
|
||||
package-toolkit: $(SODIFF_REPO_FILE)
|
||||
|
||||
######## LICENSE CHECK ########
|
||||
|
||||
license_check_build_dir = $(BUILD_DIR)/license_check_tool
|
||||
license_out_dir = $(OUT_DIR)/license_check
|
||||
license_results_file_pkg = $(license_out_dir)/license_check_results_pkg.json
|
||||
license_summary = $(license_check_build_dir)/license_check_summary.txt
|
||||
|
||||
.PHONY: license-check license-check-pkg license-check-img clean-license-check
|
||||
|
||||
clean: clean-license-check
|
||||
clean-license-check:
|
||||
@echo Verifying no mountpoints present in $(license_check_build_dir)
|
||||
$(SCRIPTS_DIR)/safeunmount.sh "$(license_check_build_dir)" && \
|
||||
rm -rf $(license_check_build_dir) && \
|
||||
rm -rf $(license_out_dir)
|
||||
|
||||
license_check_common_deps = $(go-licensecheck) $(chroot_worker) $(LICENSE_CHECK_EXCEPTION_FILE) $(LICENSE_CHECK_NAME_FILE) $(depend_LICENSE_CHECK_MODE)
|
||||
# licensecheck-command: Helper function to run licensecheck with the given parameters.
|
||||
# $(1): List of directories to check for licenses.
|
||||
# $(2): (optional)Results .json file
|
||||
# $(3): (optional)Results summary .txt file
|
||||
# $(4): Log file
|
||||
|
||||
define licensecheck-command
|
||||
$(go-licensecheck) \
|
||||
$(foreach license_dir, $(1),--rpm-dirs="$(license_dir)" ) \
|
||||
--exception-file="$(LICENSE_CHECK_EXCEPTION_FILE)" \
|
||||
--name-file="$(LICENSE_CHECK_NAME_FILE)" \
|
||||
--worker-tar="$(chroot_worker)" \
|
||||
--build-dir="$(license_check_build_dir)" \
|
||||
--dist-tag=$(DIST_TAG) \
|
||||
--mode="$(LICENSE_CHECK_MODE)" \
|
||||
$(if $(2),--results-file="$(2)") \
|
||||
$(if $(3),--summary-file="$(3)") \
|
||||
--log-file=$(4) \
|
||||
--log-level=$(LOG_LEVEL)
|
||||
endef
|
||||
|
||||
##help:target:license-check=Validate all packages in any of LICENSE_CHECK_DIRS for license compliance.
|
||||
license-check: $(license_check_common_deps)
|
||||
$(if $(LICENSE_CHECK_DIRS),,$(error Must set LICENSE_CHECK_DIRS=))
|
||||
$(call licensecheck-command,$(LICENSE_CHECK_DIRS),$(license_results_file_pkg),$(license_summary),$(LOGS_DIR)/licensecheck/license-check-manual.log)
|
||||
|
||||
##help:target:license-check-pkg=Validate all packages in $(RPMS_DIR) for license compliance, building packages as needed.
|
||||
license-check-pkg: $(license_check_common_deps) $(RPMS_DIR)
|
||||
$(call licensecheck-command,$(RPMS_DIR),$(license_results_file_pkg),$(license_summary),$(LOGS_DIR)/licensecheck/license-check-pkg.log)
|
||||
|
||||
##help:target:license-check-img=Validate all packages needed for an image for license compliance. Must set CONFIG_FILE=<path_to_config>.
|
||||
license-check-img: $(license_results_file_img)
|
||||
$(license_results_file_img): $(license_check_common_deps) $(image_package_cache_summary)
|
||||
$(call licensecheck-command,$(local_and_external_rpm_cache),$(license_results_file_img),$(license_summary),$(LOGS_DIR)/licensecheck/license-check-img.log)
|
||||
|
|
|
@ -44,6 +44,7 @@ meta_user_data_tmp_dir = $(IMAGEGEN_DIR)/meta-user-data_tmp
|
|||
image_package_cache_summary = $(imggen_config_dir)/image_deps.json
|
||||
image_external_package_cache_summary = $(imggen_config_dir)/image_external_deps.json
|
||||
image_package_manifest = $(imggen_config_dir)/image_pkg_manifest.json
|
||||
license_results_file_img = $(imggen_config_dir)/license_check_results.json
|
||||
|
||||
# Outputs
|
||||
artifact_dir = $(IMAGES_DIR)/$(config_name)
|
||||
|
|
|
@ -42,6 +42,7 @@ go_tool_list = \
|
|||
imagepkgfetcher \
|
||||
imager \
|
||||
isomaker \
|
||||
licensecheck \
|
||||
liveinstaller \
|
||||
osmodifier \
|
||||
pkgworker \
|
||||
|
|
|
@ -55,10 +55,10 @@ endef
|
|||
######## VARIABLE DEPENDENCY TRACKING ########
|
||||
|
||||
# List of variables to watch for changes.
|
||||
watch_vars=PACKAGE_BUILD_LIST PACKAGE_REBUILD_LIST PACKAGE_IGNORE_LIST REPO_LIST CONFIG_FILE STOP_ON_PKG_FAIL TOOLCHAIN_ARCHIVE REBUILD_TOOLCHAIN SRPM_PACK_LIST SPECS_DIR MAX_CASCADING_REBUILDS RUN_CHECK TEST_RUN_LIST TEST_RERUN_LIST TEST_IGNORE_LIST EXTRA_BUILD_LAYERS
|
||||
watch_vars=PACKAGE_BUILD_LIST PACKAGE_REBUILD_LIST PACKAGE_IGNORE_LIST REPO_LIST CONFIG_FILE STOP_ON_PKG_FAIL TOOLCHAIN_ARCHIVE REBUILD_TOOLCHAIN SRPM_PACK_LIST SPECS_DIR MAX_CASCADING_REBUILDS RUN_CHECK TEST_RUN_LIST TEST_RERUN_LIST TEST_IGNORE_LIST EXTRA_BUILD_LAYERS LICENSE_CHECK_MODE
|
||||
# Current list: $(depend_PACKAGE_BUILD_LIST) $(depend_PACKAGE_REBUILD_LIST) $(depend_PACKAGE_IGNORE_LIST) $(depend_REPO_LIST) $(depend_CONFIG_FILE) $(depend_STOP_ON_PKG_FAIL)
|
||||
# $(depend_TOOLCHAIN_ARCHIVE) $(depend_REBUILD_TOOLCHAIN) $(depend_SRPM_PACK_LIST) $(depend_SPECS_DIR) $(depend_EXTRA_BUILD_LAYERS) $(depend_MAX_CASCADING_REBUILDS) $(depend_RUN_CHECK) $(depend_TEST_RUN_LIST)
|
||||
# $(depend_TEST_RERUN_LIST) $(depend_TEST_IGNORE_LIST)
|
||||
# $(depend_TEST_RERUN_LIST) $(depend_TEST_IGNORE_LIST) $(depend_LICENSE_CHECK_MODE)
|
||||
|
||||
.PHONY: variable_depends_on_phony clean-variable_depends_on_phony setfacl_always_run_phony
|
||||
clean: clean-variable_depends_on_phony
|
||||
|
@ -73,9 +73,9 @@ clean-variable_depends_on_phony:
|
|||
# they will alway run. Each rule will check the currently stored value in the file and only
|
||||
# update it if needed.
|
||||
|
||||
# Generate a target which watches a variable for changes so rebuilds can be
|
||||
# triggered if needed. Uses one file per variable. If the value of the variable
|
||||
# is not the same as recorded in the file, update the file to match. This will
|
||||
# Generate a target which watches a variable for changes so rebuilds can be
|
||||
# triggered if needed. Uses one file per variable. If the value of the variable
|
||||
# is not the same as recorded in the file, update the file to match. This will
|
||||
# force a rebuild of any dependent targets.
|
||||
#
|
||||
# $1 - name of the variable to watch for changes
|
||||
|
|
|
@ -368,6 +368,50 @@ func QueryPackage(packageFile, queryFormat string, defines map[string]string, ex
|
|||
return executeRpmCommand(rpmProgram, args...)
|
||||
}
|
||||
|
||||
// QueryPackageFiles queries an RPM for its file contents. The results are split into several categories:
|
||||
// - allFilesAndDirectories: all files and directories in the package
|
||||
// - files: all files in the package (ie allFilesAndDirectories minus directories)
|
||||
// - directories: all directories in the package (ie allFilesAndDirectories minus files, symlinks etc.)
|
||||
// - documentFiles: all files marked as documentation (%doc)
|
||||
// - licenseFiles: all files marked as license (%license)
|
||||
func QueryPackageFiles(packageFile string, defines map[string]string,
|
||||
) (allFilesAndDirectories, files, directories, documentFiles, licenseFiles []string, err error) {
|
||||
const allFilesQueryFormat = "[%{FILEMODES:perms} %{FILENAMES}\n]"
|
||||
allFilesWithPerms, err := QueryPackage(packageFile, allFilesQueryFormat, defines)
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, nil, fmt.Errorf("failed to query package (%s) files:\n%w", packageFile, err)
|
||||
}
|
||||
// Parse the output of the query to separarate directories. Output will be of the form:
|
||||
// drwxr-xr-x /a/directory
|
||||
// -rw-r--r-- /a/directory/a_file
|
||||
// Any line that starts with a 'd' is a directory, everything else is a file (or symlink etc.).
|
||||
for _, fileLine := range allFilesWithPerms {
|
||||
perms, filePath, found := strings.Cut(fileLine, " ")
|
||||
if !found {
|
||||
return nil, nil, nil, nil, nil, fmt.Errorf("failed to parse package (%s) file contents (%s)", packageFile, fileLine)
|
||||
}
|
||||
if strings.HasPrefix(perms, "d") {
|
||||
directories = append(directories, filePath)
|
||||
} else {
|
||||
files = append(files, filePath)
|
||||
}
|
||||
allFilesAndDirectories = append(allFilesAndDirectories, filePath)
|
||||
}
|
||||
|
||||
// rpm has dedicated tags for documentation and license files, so we can query them directly.
|
||||
documentFiles, err = QueryPackage(packageFile, "", defines, "-d")
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, nil, fmt.Errorf("failed to query package (%s) documentation files:\n%w", packageFile, err)
|
||||
}
|
||||
|
||||
licenseFiles, err = QueryPackage(packageFile, "", defines, "-L")
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, nil, fmt.Errorf("failed to query package (%s) license files:\n%w", packageFile, err)
|
||||
}
|
||||
|
||||
return allFilesAndDirectories, files, directories, documentFiles, licenseFiles, nil
|
||||
}
|
||||
|
||||
// BuildRPMFromSRPM builds an RPM from the given SRPM file but does not run its '%check' section.
|
||||
func BuildRPMFromSRPM(srpmFile, outArch string, defines map[string]string) (err error) {
|
||||
const squashErrors = true
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// A tool for validating the license files of RPM packages in a set of directories.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/exe"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck/licensecheckformat"
|
||||
|
||||
"gopkg.in/alecthomas/kingpin.v2"
|
||||
)
|
||||
|
||||
var (
|
||||
app = kingpin.New("licensecheck", "A tool for validating the license files of RPM packages.")
|
||||
|
||||
rpmDirs = app.Flag("rpm-dirs", "Directories to recursively scan for RPMs to validate").Required().ExistingDirs()
|
||||
nameFile = app.Flag("name-file", "File containing license names to check for.").Required().ExistingFile()
|
||||
exceptionFile = app.Flag("exception-file", "File containing license exceptions.").ExistingFile()
|
||||
mode = app.Flag("mode", "Level of license validation to perform").Default(string(licensecheck.LicenseCheckModeDefault)).Enum(licensecheck.ValidLicenseCheckModeStrings()...)
|
||||
|
||||
buildDirPath = app.Flag("build-dir", "Directory to store temporary files.").Required().String()
|
||||
distTag = app.Flag("dist-tag", "The distribution tag.").Required().String()
|
||||
workerTar = app.Flag("worker-tar", "Full path to worker_chroot.tar.gz.").Required().ExistingFile()
|
||||
|
||||
logFlags = exe.SetupLogFlags(app)
|
||||
resultFile = app.Flag("results-file", "The file to store the search result.").Default("").String()
|
||||
summaryFile = app.Flag("summary-file", "File to save the license check summary to.").String()
|
||||
)
|
||||
|
||||
func main() {
|
||||
app.Version(exe.ToolkitVersion)
|
||||
kingpin.MustParse(app.Parse(os.Args[1:]))
|
||||
logger.InitBestEffort(logFlags)
|
||||
|
||||
mode := licensecheck.LicenseCheckMode(*mode)
|
||||
|
||||
results, numFailures, numWarnings := scanDirectories(*rpmDirs, *buildDirPath, *workerTar, *nameFile, *exceptionFile, *distTag, mode)
|
||||
|
||||
printSummary(numFailures, numWarnings)
|
||||
|
||||
if *resultFile != "" {
|
||||
logger.Log.Infof("Writing results to file (%s)", *resultFile)
|
||||
err := licensecheck.SaveLicenseCheckResults(*resultFile, results)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("Failed to write results to file:\n%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if *summaryFile != "" {
|
||||
logger.Log.Infof("Writing summary to file (%s)", *summaryFile)
|
||||
resultsString := licensecheckformat.FormatResults(results, mode)
|
||||
err := os.MkdirAll(filepath.Dir(*summaryFile), os.ModePerm)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("failed to create directory for results file. Error:\n%v", err)
|
||||
}
|
||||
err = file.Write(resultsString, *summaryFile)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("Failed to write summary to file:\n%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if numFailures > 0 {
|
||||
logger.Log.Fatal("License check failed")
|
||||
}
|
||||
if numWarnings > 0 {
|
||||
logger.Log.Warn("License check completed with warnings")
|
||||
}
|
||||
}
|
||||
|
||||
func scanDirectories(rpmDirs []string, buildDirPath, workerTar, nameFile, exceptionFile, distTag string,
|
||||
mode licensecheck.LicenseCheckMode,
|
||||
) (results []licensecheck.LicenseCheckResult, failed int, warnings int) {
|
||||
|
||||
if mode == licensecheck.LicenseCheckModeNone {
|
||||
logger.Log.Infof("License check mode is set to (%s), skipping license check", mode)
|
||||
return nil, 0, 0
|
||||
}
|
||||
|
||||
totalResults := []licensecheck.LicenseCheckResult{}
|
||||
totalFailedPackages := 0
|
||||
totalWarningPackages := 0
|
||||
for _, rpmDir := range rpmDirs {
|
||||
allResults, errorResults, warningResults, err := validateRpmDir(buildDirPath, workerTar, rpmDir, nameFile, exceptionFile, distTag, mode)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("Failed to search RPM directory:\n%v", err)
|
||||
}
|
||||
totalFailedPackages += len(errorResults)
|
||||
totalWarningPackages += len(warningResults)
|
||||
totalResults = append(totalResults, allResults...)
|
||||
}
|
||||
return totalResults, totalFailedPackages, totalWarningPackages
|
||||
}
|
||||
|
||||
func printSummary(numFailures, numWarnings int) {
|
||||
const explanation = `
|
||||
Errors/warnings fall into three buckets:
|
||||
1. 'bad %doc files': A %doc documentation file that the tool believes to be a license file.
|
||||
2. 'bad general file': A file that is placed into '/usr/share/licenses/' that is not flagged as
|
||||
a license file. These files should use %license instead of %doc. Ideally whey should also
|
||||
not be placed in a directory manually. (e.g. prefer '%license COPYING' over
|
||||
'%license %{_docdir}/%{name}/COPYING')
|
||||
3. 'duplicated license files': A license file that is both a %license and a %doc file, pick one.")
|
||||
This is a warning, unless the tool is run in pedantic mode, in which case it is an error.
|
||||
How to fix:
|
||||
- 'False positives': In all cases, a detection may be suppressed by using the exception file:
|
||||
{{.exceptionFile}}.
|
||||
This file contains per-package and global exceptions in the form of regexes.
|
||||
- 'bad %%doc files': Mark it using %license, ideally without using a buildroot path (e.g. use '%license COPYING').
|
||||
- 'bad general file': Mark it using %license, ideally without using a buildroot path (e.g. use '%license COPYING').
|
||||
- 'duplicated license files': If they are actually equivalent, remove the copy in the documentation.
|
||||
- Query package contents with 'rpm -ql <package>.rpm' to see all files, 'rpm -qL <package>.rpm' to
|
||||
see only the license files, and 'rpm -qd <package>.rpm' to see only the documentation files.`
|
||||
|
||||
if numFailures > 0 {
|
||||
logger.Log.Info(strings.ReplaceAll(explanation, "{{.exceptionFile}}", *exceptionFile))
|
||||
logger.Log.Errorf("Found %d packages with license errors", numFailures)
|
||||
logger.Log.Warnf("Found %d packages with non-fatal license issues", numWarnings)
|
||||
} else if numWarnings > 0 {
|
||||
logger.Log.Info(strings.ReplaceAll(explanation, "{{.exceptionFile}}", *exceptionFile))
|
||||
logger.Log.Warnf("Found %d packages with non-fatal license issues", numWarnings)
|
||||
} else {
|
||||
logger.Log.Infof("No license issues found")
|
||||
}
|
||||
}
|
||||
|
||||
// validateRpmDir scans the given directory for RPMs and validates their licenses. It will return all findings split into warnings and failures.
|
||||
// Each call to this function will generate a new chroot environment and clean it up after the scan.
|
||||
func validateRpmDir(buildDirPath, workerTar, rpmDir, nameFile, exceptionFile, distTag string,
|
||||
mode licensecheck.LicenseCheckMode,
|
||||
) (allResults, warningResults, failedResults []licensecheck.LicenseCheckResult, err error) {
|
||||
|
||||
logger.Log.Infof("Preparing license check environment for (%s)", rpmDir)
|
||||
licenseChecker, err := licensecheck.New(buildDirPath, workerTar, rpmDir, nameFile, exceptionFile, distTag)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("failed to initialize RPM license checker:\n%w", err)
|
||||
}
|
||||
defer func() {
|
||||
cleanupErr := licenseChecker.Cleanup()
|
||||
if cleanupErr != nil {
|
||||
if err == nil {
|
||||
err = fmt.Errorf("failed to cleanup after RPM license checker:\n%w", cleanupErr)
|
||||
} else {
|
||||
// Append the cleanup error to the existing error
|
||||
err = fmt.Errorf("%w\nfailed to cleanup after RPM license checker failed:\n%w", err, cleanupErr)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
logger.Log.Infof("Scanning (%s) for license issues", rpmDir)
|
||||
_, err = licenseChecker.CheckLicenses(false)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("failed to generate license scan:\n%w", err)
|
||||
}
|
||||
|
||||
allResults, warningResults, failedResults = licenseChecker.GetResults(mode)
|
||||
resultsString := licensecheckformat.FormatResults(allResults, mode)
|
||||
logger.Log.Infof("Search results for (%s):\n%s", rpmDir, resultsString)
|
||||
return allResults, failedResults, warningResults, nil
|
||||
}
|
|
@ -0,0 +1,359 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
/*
|
||||
Package licensecheck provides a tool for searching RPMs for bad licenses, as well as several directly callable functions.
|
||||
The core of the tool is the LicenseChecker struct, which is responsible for searching RPMs for bad licenses. The tool is
|
||||
based on a 'simpletoolchroot' which is a wrapper that allows for easy chroot creation and cleanup.
|
||||
|
||||
The lifecycle of the LicenseChecker is as follows:
|
||||
|
||||
1. Create a new LicenseChecker with New()
|
||||
|
||||
2. Call CheckLicenses() to search for bad licenses
|
||||
|
||||
3. Either:
|
||||
- Call FormatResults() to get a formatted string of the results
|
||||
- Call GetAllResults() to get all the results, split into buckets.
|
||||
|
||||
4. Call CleanUp() to tear down the chroot
|
||||
|
||||
Also provided are several directly callable functions (these expect to be run in an environment with the necessary
|
||||
macros, i.e. a chroot): CheckRpmLicenses(), IsALicenseFile(), IsASkippedLicenseFile()
|
||||
|
||||
The LicenseCheckerResult struct is used to store the results of the search. It contains the path to the RPM, a list of
|
||||
bad documents, a list of bad files, and a list of duplicated documents. The bad documents are %doc files that are not
|
||||
at least also in the license files. The bad files are general files that are misplaced in the licenses directory.
|
||||
|
||||
The duplicated documents are %doc files that are also in the license files. These are not technically bad, but are messy
|
||||
and should be cleaned up.
|
||||
*/
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/rpm"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/sliceutils"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/pkg/simpletoolchroot"
|
||||
)
|
||||
|
||||
const licensePrefix = "/usr/share/licenses"
|
||||
|
||||
// LicenseChecker is a tool for searching RPMs for bad licenses
|
||||
type LicenseChecker struct {
|
||||
simpleToolChroot *simpletoolchroot.SimpleToolChroot // The chroot to scan the RPMs in
|
||||
distTag string // The distribution tag to use when parsing RPMs
|
||||
licenseNames LicenseNames // The regexes used to match license files
|
||||
exceptions LicenseExceptions // Files that should be ignored
|
||||
results []LicenseCheckResult // The results of the search
|
||||
jobSemaphore chan struct{} // Limit the number of parallel jobs
|
||||
}
|
||||
|
||||
// New creates a new license checker. If this returns successfully the caller is responsible for calling CleanUp().
|
||||
// - buildDirPath: The path to create the chroot inside
|
||||
// - workerTarPath: The path to the worker tarball
|
||||
// - rpmDirPath: The path to the directory containing the RPMs
|
||||
// - nameFilePath: The path to the .json file containing license names
|
||||
// - exceptionFilePath: Optional, the path to the .json file containing license exceptions to ignore
|
||||
// - distTag: The distribution tag to use when parsing RPMs
|
||||
func New(buildDirPath, workerTarPath, rpmDirPath, nameFilePath, exceptionFilePath, distTag string,
|
||||
) (newLicenseChecker *LicenseChecker, err error) {
|
||||
const chrootName = "license_chroot"
|
||||
|
||||
newLicenseChecker = &LicenseChecker{
|
||||
distTag: distTag,
|
||||
simpleToolChroot: &simpletoolchroot.SimpleToolChroot{},
|
||||
jobSemaphore: make(chan struct{}, runtime.NumCPU()*2),
|
||||
}
|
||||
|
||||
err = newLicenseChecker.simpleToolChroot.InitializeChroot(buildDirPath, chrootName, workerTarPath, rpmDirPath)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to initialize chroot:\n%w", err)
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
cleanupErr := newLicenseChecker.Cleanup()
|
||||
if cleanupErr != nil {
|
||||
// Append the cleanup error to the existing error
|
||||
err = fmt.Errorf("%w\nfailed to cleanup after failing to create a new LicenseChecker:\n%w", err, cleanupErr)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
newLicenseChecker.licenseNames, err = LoadLicenseNames(nameFilePath)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to load license names:\n%w", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if exceptionFilePath != "" {
|
||||
newLicenseChecker.exceptions, err = LoadLicenseExceptions(exceptionFilePath)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to load license exceptions:\n%w", err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return newLicenseChecker, err
|
||||
}
|
||||
|
||||
// Cleanup tears down the chroot. If the chroot was created it will be cleaned up. Reset the struct to its initial state.
|
||||
func (l *LicenseChecker) Cleanup() error {
|
||||
if l.simpleToolChroot != nil {
|
||||
err := l.simpleToolChroot.CleanUp()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to cleanup chroot:\n%w", err)
|
||||
}
|
||||
l.simpleToolChroot = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CheckLicenses will scan all .rpm files in the chroot for bad licenses. New unfiltered results will be returned but
|
||||
// also appended to the internal results list which can be accessed with GetResults().
|
||||
func (l *LicenseChecker) CheckLicenses(quiet bool) (latestResults []LicenseCheckResult, err error) {
|
||||
if l.simpleToolChroot == nil {
|
||||
return nil, fmt.Errorf("license checker is not initialized, use New() to create a new license checker")
|
||||
}
|
||||
|
||||
err = l.simpleToolChroot.RunInChroot(func() (searchErr error) {
|
||||
latestResults, searchErr = l.runLicenseCheckInChroot(quiet)
|
||||
return searchErr
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan for license issues:\n%w", err)
|
||||
}
|
||||
|
||||
// Sort the results by RPM path
|
||||
// This is done to ensure that the output is deterministic
|
||||
sort.Slice(latestResults, func(i, j int) bool {
|
||||
return latestResults[i].RpmPath < latestResults[j].RpmPath
|
||||
})
|
||||
l.results = append(l.results, latestResults...)
|
||||
|
||||
return latestResults, nil
|
||||
}
|
||||
|
||||
// GetResults returns the cumulative results of the search, split into:
|
||||
// - All results: Every scan result
|
||||
// - Any result that has at least one warning
|
||||
// - Any result that has at least one error
|
||||
func (l *LicenseChecker) GetResults(mode LicenseCheckMode) (all, warnings, errors []LicenseCheckResult) {
|
||||
_, warnings, errors = SortAndFilterResults(l.results, mode)
|
||||
return l.results, warnings, errors
|
||||
}
|
||||
|
||||
type licenseCheckReturn struct {
|
||||
finding LicenseCheckResult
|
||||
err error
|
||||
}
|
||||
|
||||
// runLicenseCheckInChroot searches for bad licenses amongst the RPMs mounted into the chroot. This function is meant
|
||||
// to be called from inside the chroot's context.
|
||||
func (l *LicenseChecker) runLicenseCheckInChroot(quiet bool) (findings []LicenseCheckResult, err error) {
|
||||
const searchReportIntervalPercent = 10 // Report progress to the user every 10%
|
||||
|
||||
// Find all the rpms in the chroot
|
||||
rpmsToSearchPaths, err := l.findRpmPaths()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to walk rpm directory:\n%w", err)
|
||||
}
|
||||
if len(rpmsToSearchPaths) == 0 {
|
||||
logger.Log.Warnf("No rpms found in (%s)", l.simpleToolChroot.ChrootRelativeMountDir())
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Scan each rpm in parallel
|
||||
ctx, cancelFunc := context.WithCancel(context.Background())
|
||||
defer cancelFunc()
|
||||
resultsChannel := make(chan licenseCheckReturn, len(rpmsToSearchPaths))
|
||||
if !quiet {
|
||||
logger.Log.Infof("Queuing %d rpms for license check", len(rpmsToSearchPaths))
|
||||
}
|
||||
go l.queueWorkers(ctx, rpmsToSearchPaths, resultsChannel)
|
||||
if !quiet {
|
||||
logger.Log.Infof("Checking RPMs for license issues")
|
||||
}
|
||||
|
||||
// Wait for all the workers to finish, updating the progress as results come in
|
||||
numProcessed := 0
|
||||
lastReportPercent := 0
|
||||
for range rpmsToSearchPaths {
|
||||
result := <-resultsChannel
|
||||
if result.err != nil {
|
||||
// Signal the workers to stop if there is an error
|
||||
err = fmt.Errorf("failed to search rpm for license issues:\n%w", result.err)
|
||||
cancelFunc()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Report progress to the user every 10%
|
||||
numProcessed++
|
||||
percentProcessed := (numProcessed * 100) / len(rpmsToSearchPaths)
|
||||
if percentProcessed-lastReportPercent >= searchReportIntervalPercent && !quiet {
|
||||
logger.Log.Infof("Checked %d/%d rpms (%d%%)", numProcessed, len(rpmsToSearchPaths), percentProcessed)
|
||||
lastReportPercent = percentProcessed
|
||||
}
|
||||
findings = append(findings, result.finding)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// findRpmPaths walks the chroots's mount directory to find all *.rpm files. The paths are returned relative to the
|
||||
// chroot's root.
|
||||
func (l *LicenseChecker) findRpmPaths() (foundRpmPaths []string, err error) {
|
||||
const rpmExtension = ".rpm"
|
||||
err = filepath.Walk(l.simpleToolChroot.ChrootRelativeMountDir(), func(path string, info os.FileInfo, walkErr error) error {
|
||||
if walkErr != nil {
|
||||
return walkErr
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if !strings.HasSuffix(path, rpmExtension) {
|
||||
return nil
|
||||
}
|
||||
|
||||
foundRpmPaths = append(foundRpmPaths, path)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to walk directory:\n%w", err)
|
||||
return nil, err
|
||||
}
|
||||
return foundRpmPaths, nil
|
||||
}
|
||||
|
||||
// queueWorkers queues up workers to search the RPMs in parallel. Each worker will wait on the jobSemaphore before starting.
|
||||
// This function will return once all workers have been queued.
|
||||
func (l *LicenseChecker) queueWorkers(ctx context.Context, rpmsToSearchPaths []string, resultsChannel chan licenseCheckReturn) {
|
||||
for _, rpmPath := range rpmsToSearchPaths {
|
||||
// Wait for the semaphore, or allow cancel before running
|
||||
select {
|
||||
case l.jobSemaphore <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
go func(rpmPath string) {
|
||||
defer func() {
|
||||
<-l.jobSemaphore
|
||||
}()
|
||||
|
||||
logger.Log.Debugf("Searching (%s)", filepath.Base(rpmPath))
|
||||
searchResult, err := checkRpmLicenses(rpmPath, l.distTag, l.licenseNames, l.exceptions)
|
||||
logger.Log.Debugf("Finished searching (%s)", filepath.Base(rpmPath))
|
||||
if err != nil {
|
||||
logger.Log.Errorf("License check worker failed with error: %v", err)
|
||||
resultsChannel <- licenseCheckReturn{err: err}
|
||||
return
|
||||
}
|
||||
resultsChannel <- licenseCheckReturn{finding: searchResult, err: nil}
|
||||
}(rpmPath)
|
||||
}
|
||||
}
|
||||
|
||||
// checkRpmLicenses checks the licenses of an RPM at the given path. It returns result struct holding all the license
|
||||
// issues found. This function will use the host's macros to query the RPM so it is expected to be called in a chroot.
|
||||
// - rpmPath: The path to the RPM to check relative to the chroot's root.
|
||||
func checkRpmLicenses(rpmPath, distTag string, licenseNames LicenseNames, exceptions LicenseExceptions) (result LicenseCheckResult, err error) {
|
||||
defines := rpm.DefaultDistroDefines(false, distTag)
|
||||
|
||||
_, files, _, documentFiles, licenseFiles, err := rpm.QueryPackageFiles(rpmPath, defines)
|
||||
if err != nil {
|
||||
return LicenseCheckResult{}, fmt.Errorf("failed to query package contents:\n%w", err)
|
||||
}
|
||||
|
||||
pkgNameLines, err := rpm.QueryPackage(rpmPath, "%{NAME}", defines)
|
||||
if err != nil {
|
||||
return LicenseCheckResult{}, fmt.Errorf("failed to query package:\n%w", err)
|
||||
}
|
||||
if len(pkgNameLines) != 1 {
|
||||
return LicenseCheckResult{}, fmt.Errorf("failed to query package:\nexpected 1 package name, got %d", len(pkgNameLines))
|
||||
}
|
||||
pkgName := pkgNameLines[0]
|
||||
|
||||
badDocFiles, badOtherFiles, duplicatedDocs := interpretResults(pkgName, files, documentFiles, licenseFiles, licenseNames, exceptions)
|
||||
|
||||
result = LicenseCheckResult{
|
||||
RpmPath: rpmPath,
|
||||
PackageName: pkgName,
|
||||
BadDocs: badDocFiles,
|
||||
BadFiles: badOtherFiles,
|
||||
DuplicatedDocs: duplicatedDocs,
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// interpretResults scans file lists for packing issues:
|
||||
// - badDocFiles: %doc files that appear to be licenses, but are not at least also in the license files
|
||||
// - badOtherFiles: files that are misplaced in the licenses directory
|
||||
// - duplicatedDocs: %doc files that are also in the license files
|
||||
func interpretResults(pkgName string, files, documentFiles, licenseFiles []string, licenseNames LicenseNames, exceptions LicenseExceptions) (badDocFiles, badOtherFiles, duplicatedDocs []string) {
|
||||
badDocFiles = []string{}
|
||||
badOtherFiles = []string{}
|
||||
duplicatedDocs = []string{}
|
||||
|
||||
// Check the documentation files
|
||||
for _, documentFile := range documentFiles {
|
||||
if licenseNames.IsALicenseFile(pkgName, documentFile) && !exceptions.ShouldIgnoreFile(pkgName, documentFile) {
|
||||
if isDocumentInLicenseFiles(documentFile, licenseFiles) {
|
||||
duplicatedDocs = append(duplicatedDocs, documentFile)
|
||||
} else {
|
||||
badDocFiles = append(badDocFiles, documentFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we don't put random files in the license directory. They need to be %license.
|
||||
licenseFileSet := sliceutils.SliceToSet(licenseFiles)
|
||||
for _, file := range files {
|
||||
if isFileMisplacedInLicensesFolder(file, licenseFileSet) && !exceptions.ShouldIgnoreFile(pkgName, file) {
|
||||
badOtherFiles = append(badOtherFiles, file)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(badDocFiles)
|
||||
sort.Strings(duplicatedDocs)
|
||||
sort.Strings(badOtherFiles)
|
||||
|
||||
return badDocFiles, badOtherFiles, duplicatedDocs
|
||||
}
|
||||
|
||||
// isDocumentInLicenseFiles checks if a document file is in the list of license files (based on basename of the file).
|
||||
func isDocumentInLicenseFiles(documentFile string, licenseFiles []string) bool {
|
||||
docBasename := filepath.Base(documentFile)
|
||||
for _, licenseFile := range licenseFiles {
|
||||
licenseBasename := filepath.Base(licenseFile)
|
||||
if strings.Contains(licenseBasename, docBasename) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isFileMisplacedInLicensesFolder returns true if the filePath is present in the /usr/share/licenses/<pkg> tree but is
|
||||
// not included in the set of license files. Every file in /usr/share/licenses/<pkg> should be a license file and tagged.
|
||||
// - filePath: The path to the file to check. Directories are not included as %license so only actual file paths should
|
||||
// be passed.
|
||||
// -
|
||||
// - licenseFileSet: A set of all the license files in the package. This is used to check if the file is a license file.
|
||||
func isFileMisplacedInLicensesFolder(filePath string, licenseFileSet map[string]bool) bool {
|
||||
// Files that don't start with '/usr/share/licenses' are by definition not misplaced in the licenses folder
|
||||
if !strings.HasPrefix(filePath, licensePrefix) {
|
||||
return false
|
||||
} else {
|
||||
// If the path appears in the license set, it's correctly tagged.
|
||||
isARealLicenseFile := licenseFileSet[filePath]
|
||||
return !isARealLicenseFile
|
||||
}
|
||||
}
|
|
@ -0,0 +1,285 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/sliceutils"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func loadDefaultLicenseNames(t *testing.T) LicenseNames {
|
||||
const pathToDefaultNamesJson = "../../../resources/manifests/package/license_file_names.json"
|
||||
t.Helper()
|
||||
|
||||
names, err := LoadLicenseNames(pathToDefaultNamesJson)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load default license names: %v", err)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
logger.InitStderrLog()
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestSearchLicenseFilesForMatch(t *testing.T) {
|
||||
defaultLicenseFiles := []string{"/usr/share/licenses/pkg/COPYING", "/usr/share/licenses/pkg/COPYING.LIB"}
|
||||
testCases := []struct {
|
||||
name string
|
||||
documentFile string
|
||||
licenseFiles []string
|
||||
expectedResponse bool
|
||||
}{
|
||||
{
|
||||
name: "Not a license file",
|
||||
documentFile: "file1",
|
||||
licenseFiles: defaultLicenseFiles,
|
||||
expectedResponse: false,
|
||||
},
|
||||
{
|
||||
name: "License file different dir",
|
||||
documentFile: "/usr/share/docs/pkg/COPYING",
|
||||
licenseFiles: defaultLicenseFiles,
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "License file found exact match",
|
||||
documentFile: "/usr/share/licenses/pkg/COPYING",
|
||||
licenseFiles: defaultLicenseFiles,
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "License file case mismatch",
|
||||
documentFile: "/usr/share/licenses/pkg/copying",
|
||||
licenseFiles: defaultLicenseFiles,
|
||||
expectedResponse: false,
|
||||
},
|
||||
{
|
||||
name: "License file found with extension",
|
||||
documentFile: "/usr/share/licenses/pkg/COPYING.LIB",
|
||||
licenseFiles: defaultLicenseFiles,
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "License file extension mismatch",
|
||||
documentFile: "/usr/share/licenses/pkg/COPYING.wrong_ext",
|
||||
licenseFiles: defaultLicenseFiles,
|
||||
expectedResponse: false,
|
||||
},
|
||||
{
|
||||
name: "License file found with extra bits",
|
||||
documentFile: "/usr/share/licenses/pkg/mypkg-COPYING",
|
||||
licenseFiles: []string{"/usr/share/licenses/pkg/mypkg-COPYING"},
|
||||
expectedResponse: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actualResponse := isDocumentInLicenseFiles(tc.documentFile, tc.licenseFiles)
|
||||
if actualResponse != tc.expectedResponse {
|
||||
t.Errorf("Expected %v, got %v", tc.expectedResponse, actualResponse)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsFileMisplacedInLicensesFolder(t *testing.T) {
|
||||
licenseFiles := []string{"/usr/share/licenses/pkg/COPYING", "/usr/share/licenses/pkg/COPYING.LIB"}
|
||||
licenseFileSet := sliceutils.SliceToSet(licenseFiles)
|
||||
assert.False(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/pkg/COPYING", licenseFileSet))
|
||||
assert.False(t, isFileMisplacedInLicensesFolder("/usr/share/not/in/licenses.txt", licenseFileSet))
|
||||
assert.True(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/pkg/NOTICE", licenseFileSet))
|
||||
}
|
||||
|
||||
func TestIsFileMisplacedInLicensesFolderDetectPackageFolder(t *testing.T) {
|
||||
emptyLicenseFiles := make(map[string]bool)
|
||||
assert.True(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/OTHER_PKG/", emptyLicenseFiles))
|
||||
assert.True(t, isFileMisplacedInLicensesFolder("/usr/share/licenses/OTHER_PKG", emptyLicenseFiles))
|
||||
}
|
||||
|
||||
func makeResult(name string, numBadDocs, numBadFiles, numDupes int) LicenseCheckResult {
|
||||
badDocs := make([]string, numBadDocs)
|
||||
for i := 0; i < numBadDocs; i++ {
|
||||
badDocs[i] = fmt.Sprintf("doc%d", i)
|
||||
}
|
||||
badFiles := make([]string, numBadFiles)
|
||||
for i := 0; i < numBadFiles; i++ {
|
||||
badFiles[i] = fmt.Sprintf("file%d", i)
|
||||
}
|
||||
dupes := make([]string, numDupes)
|
||||
for i := 0; i < numDupes; i++ {
|
||||
dupes[i] = fmt.Sprintf("dupe%d", i)
|
||||
}
|
||||
return LicenseCheckResult{
|
||||
RpmPath: name,
|
||||
BadDocs: badDocs,
|
||||
BadFiles: badFiles,
|
||||
DuplicatedDocs: dupes,
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetResults(t *testing.T) {
|
||||
type expected struct {
|
||||
all []LicenseCheckResult
|
||||
warn []LicenseCheckResult
|
||||
fail []LicenseCheckResult
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
results []LicenseCheckResult
|
||||
expected expected
|
||||
}{
|
||||
{
|
||||
name: "No results",
|
||||
results: []LicenseCheckResult{},
|
||||
expected: expected{
|
||||
all: []LicenseCheckResult{},
|
||||
warn: []LicenseCheckResult{},
|
||||
fail: []LicenseCheckResult{},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "No issues",
|
||||
results: []LicenseCheckResult{
|
||||
makeResult("pkg1", 0, 0, 0),
|
||||
},
|
||||
expected: expected{
|
||||
all: []LicenseCheckResult{
|
||||
makeResult("pkg1", 0, 0, 0),
|
||||
},
|
||||
warn: []LicenseCheckResult{},
|
||||
fail: []LicenseCheckResult{},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Single error",
|
||||
results: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
},
|
||||
expected: expected{
|
||||
all: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
},
|
||||
warn: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1)},
|
||||
fail: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Single warn",
|
||||
results: []LicenseCheckResult{
|
||||
makeResult("pkg1", 0, 0, 1),
|
||||
},
|
||||
expected: expected{
|
||||
all: []LicenseCheckResult{
|
||||
makeResult("pkg1", 0, 0, 1),
|
||||
},
|
||||
warn: []LicenseCheckResult{
|
||||
makeResult("pkg1", 0, 0, 1),
|
||||
},
|
||||
fail: []LicenseCheckResult{},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Double error",
|
||||
results: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 0, 0),
|
||||
makeResult("pkg2", 1, 0, 0),
|
||||
},
|
||||
expected: expected{
|
||||
all: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 0, 0),
|
||||
makeResult("pkg2", 1, 0, 0),
|
||||
},
|
||||
warn: []LicenseCheckResult{},
|
||||
fail: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 0, 0),
|
||||
makeResult("pkg2", 1, 0, 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Multiple results with warn",
|
||||
results: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
makeResult("pkg2", 0, 0, 2),
|
||||
},
|
||||
expected: expected{
|
||||
all: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
makeResult("pkg2", 0, 0, 2),
|
||||
},
|
||||
warn: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
makeResult("pkg2", 0, 0, 2),
|
||||
},
|
||||
fail: []LicenseCheckResult{
|
||||
makeResult("pkg1", 1, 1, 1),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
checker := LicenseChecker{
|
||||
results: tc.results,
|
||||
}
|
||||
all, warn, fail := checker.GetResults(LicenseCheckModeDefault)
|
||||
assert.Equal(t, tc.expected.all, all)
|
||||
assert.Equal(t, tc.expected.warn, warn)
|
||||
assert.Equal(t, tc.expected.fail, fail)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCheckResults(t *testing.T) {
|
||||
pkgName := "testpkg"
|
||||
files := []string{
|
||||
"/some/random/file",
|
||||
"/usr/share/docs/testpkg/doc.txt",
|
||||
"/usr/share/docs/testpkg/COPYING",
|
||||
"/usr/share/licenses/testpkg/other_misplaced_2",
|
||||
"/usr/share/licenses/testpkg/misplaced",
|
||||
"/usr/share/docs/testpkg/licenses/duplicated",
|
||||
}
|
||||
documentFiles := []string{
|
||||
"/usr/share/docs/testpkg/doc.txt",
|
||||
"/usr/share/docs/testpkg/COPYING",
|
||||
"/usr/share/licenses/testpkg/other_misplaced_2",
|
||||
"/usr/share/licenses/testpkg/other_misplaced",
|
||||
"/usr/share/docs/testpkg/licenses/duplicated",
|
||||
}
|
||||
licenseFiles := []string{
|
||||
"/usr/share/licenses/testpkg/duplicated",
|
||||
}
|
||||
exceptions := LicenseExceptions{}
|
||||
|
||||
expectedBadDocFiles := []string{
|
||||
"/usr/share/docs/testpkg/COPYING",
|
||||
}
|
||||
expectedBadOtherFiles := []string{
|
||||
"/usr/share/licenses/testpkg/misplaced",
|
||||
"/usr/share/licenses/testpkg/other_misplaced_2",
|
||||
}
|
||||
expectedDuplicatedDocs := []string{
|
||||
"/usr/share/docs/testpkg/licenses/duplicated",
|
||||
}
|
||||
|
||||
badDocFiles, badOtherFiles, duplicatedDocs := interpretResults(pkgName, files, documentFiles, licenseFiles, loadDefaultLicenseNames(t), exceptions)
|
||||
|
||||
assert.Equal(t, expectedBadDocFiles, badDocFiles)
|
||||
assert.Equal(t, expectedBadOtherFiles, badOtherFiles)
|
||||
assert.Equal(t, expectedDuplicatedDocs, duplicatedDocs)
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
/*
|
||||
Package licensecheckformat provides functions to handle the output of the licensecheck package.
|
||||
*/
|
||||
package licensecheckformat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
|
||||
)
|
||||
|
||||
// FormatResults formats the results of the search to a string. Results will be ordered as follows:
|
||||
// - Packages with warnings only, sorted alphabetically
|
||||
// - Packages with errors (and possibly warnings), sorted alphabetically
|
||||
// If pedantic is true, warnings will be treated as errors.
|
||||
func FormatResults(results []licensecheck.LicenseCheckResult, mode licensecheck.LicenseCheckMode) string {
|
||||
var sb strings.Builder
|
||||
_, warnings, errors := licensecheck.SortAndFilterResults(results, mode)
|
||||
|
||||
if len(warnings) == 0 && len(errors) == 0 {
|
||||
return "No license issues found\n"
|
||||
}
|
||||
|
||||
// Print warnings first, but only if they don't also have an error
|
||||
for _, result := range warnings {
|
||||
if result.HasWarningResult(mode) && !result.HasErrorResult(mode) {
|
||||
sb.WriteString(formatResult(result, mode))
|
||||
}
|
||||
}
|
||||
|
||||
// Now print the errors
|
||||
for _, result := range errors {
|
||||
sb.WriteString(formatResult(result, mode))
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func formatResult(result licensecheck.LicenseCheckResult, mode licensecheck.LicenseCheckMode) string {
|
||||
badDocIsError := true
|
||||
badFileIsError := true
|
||||
dupIsError := false
|
||||
if mode == licensecheck.LicenseCheckModePedantic {
|
||||
dupIsError = true
|
||||
} else if mode == licensecheck.LicenseCheckModeWarnOnly {
|
||||
badDocIsError = false
|
||||
badFileIsError = false
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
// Print errors first if they exist
|
||||
if result.HasErrorResult(mode) {
|
||||
sb.WriteString(fmt.Sprintf("ERROR: (%s) has license errors:\n", filepath.Base(result.RpmPath)))
|
||||
if badDocIsError && len(result.BadDocs) > 0 {
|
||||
sb.WriteString(fmt.Sprintf("\tbad %%doc files:\n\t\t%s\n", strings.Join(result.BadDocs, "\n\t\t")))
|
||||
}
|
||||
if badFileIsError && len(result.BadFiles) > 0 {
|
||||
sb.WriteString(fmt.Sprintf("\tbad general file:\n\t\t%s\n", strings.Join(result.BadFiles, "\n\t\t")))
|
||||
}
|
||||
if dupIsError && len(result.DuplicatedDocs) > 0 {
|
||||
sb.WriteString(fmt.Sprintf("\tduplicated license files:\n\t\t%s\n", strings.Join(result.DuplicatedDocs, "\n\t\t")))
|
||||
}
|
||||
}
|
||||
// Now add warnings if they exist
|
||||
if result.HasWarningResult(mode) {
|
||||
sb.WriteString(fmt.Sprintf("WARN: (%s) has license warnings:\n", filepath.Base(result.RpmPath)))
|
||||
if !badDocIsError && len(result.BadDocs) > 0 {
|
||||
sb.WriteString(fmt.Sprintf("\tbad %%doc files:\n\t\t%s\n", strings.Join(result.BadDocs, "\n\t\t")))
|
||||
}
|
||||
if !badFileIsError && len(result.BadFiles) > 0 {
|
||||
sb.WriteString(fmt.Sprintf("\tbad general file:\n\t\t%s\n", strings.Join(result.BadFiles, "\n\t\t")))
|
||||
}
|
||||
if !dupIsError && len(result.DuplicatedDocs) > 0 {
|
||||
sb.WriteString(fmt.Sprintf("\tduplicated license files:\n\t\t%s\n", strings.Join(result.DuplicatedDocs, "\n\t\t")))
|
||||
}
|
||||
}
|
||||
return sb.String()
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheckformat
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFormatResultsNonPedantic(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
results []licensecheck.LicenseCheckResult
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "No results",
|
||||
results: []licensecheck.LicenseCheckResult{},
|
||||
expected: "No license issues found\n",
|
||||
},
|
||||
{
|
||||
name: "Single result",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
BadDocs: []string{"doc1"},
|
||||
},
|
||||
},
|
||||
expected: "ERROR: (package.rpm) has license errors:\n" +
|
||||
"\tbad %doc files:\n" +
|
||||
"\t\tdoc1\n",
|
||||
},
|
||||
{
|
||||
name: "Multiple results",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
BadDocs: []string{"doc1", "doc2"},
|
||||
BadFiles: []string{"file1", "file2"},
|
||||
DuplicatedDocs: []string{"dupe1", "dupe2"},
|
||||
},
|
||||
{
|
||||
RpmPath: "/path/to/another-package.rpm",
|
||||
DuplicatedDocs: []string{"dupe3", "dupe4"},
|
||||
},
|
||||
},
|
||||
expected: "WARN: (another-package.rpm) has license warnings:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe3\n" +
|
||||
"\t\tdupe4\n" +
|
||||
"ERROR: (package.rpm) has license errors:\n" +
|
||||
"\tbad %doc files:\n" +
|
||||
"\t\tdoc1\n" +
|
||||
"\t\tdoc2\n" +
|
||||
"\tbad general file:\n" +
|
||||
"\t\tfile1\n" +
|
||||
"\t\tfile2\n" +
|
||||
"WARN: (package.rpm) has license warnings:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe1\n" +
|
||||
"\t\tdupe2\n",
|
||||
},
|
||||
{
|
||||
name: "Duplicated docs only",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
DuplicatedDocs: []string{"dupe1", "dupe2"},
|
||||
},
|
||||
},
|
||||
expected: "WARN: (package.rpm) has license warnings:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe1\n" +
|
||||
"\t\tdupe2\n",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actual := FormatResults(tc.results, licensecheck.LicenseCheckModeDefault)
|
||||
assert.Equal(t, tc.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatResultsPedantic(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
results []licensecheck.LicenseCheckResult
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "No results",
|
||||
results: []licensecheck.LicenseCheckResult{},
|
||||
expected: "No license issues found\n",
|
||||
},
|
||||
{
|
||||
name: "Single result",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
BadDocs: []string{"doc1"},
|
||||
},
|
||||
},
|
||||
expected: "ERROR: (package.rpm) has license errors:\n" +
|
||||
"\tbad %doc files:\n" +
|
||||
"\t\tdoc1\n",
|
||||
},
|
||||
{
|
||||
name: "Multiple results",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
BadDocs: []string{"doc1", "doc2"},
|
||||
BadFiles: []string{"file1", "file2"},
|
||||
DuplicatedDocs: []string{"dupe1", "dupe2"},
|
||||
},
|
||||
{
|
||||
RpmPath: "/path/to/another-package.rpm",
|
||||
DuplicatedDocs: []string{"dupe3", "dupe4"},
|
||||
},
|
||||
},
|
||||
expected: "ERROR: (another-package.rpm) has license errors:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe3\n" +
|
||||
"\t\tdupe4\n" +
|
||||
"ERROR: (package.rpm) has license errors:\n" +
|
||||
"\tbad %doc files:\n" +
|
||||
"\t\tdoc1\n" +
|
||||
"\t\tdoc2\n" +
|
||||
"\tbad general file:\n" +
|
||||
"\t\tfile1\n" +
|
||||
"\t\tfile2\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe1\n" +
|
||||
"\t\tdupe2\n",
|
||||
},
|
||||
{
|
||||
name: "Duplicated docs only",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
DuplicatedDocs: []string{"dupe1", "dupe2"},
|
||||
},
|
||||
},
|
||||
expected: "ERROR: (package.rpm) has license errors:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe1\n" +
|
||||
"\t\tdupe2\n",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actual := FormatResults(tc.results, licensecheck.LicenseCheckModePedantic)
|
||||
assert.Equal(t, tc.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatResultsWarnOnly(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
results []licensecheck.LicenseCheckResult
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "No results",
|
||||
results: []licensecheck.LicenseCheckResult{},
|
||||
expected: "No license issues found\n",
|
||||
},
|
||||
{
|
||||
name: "Single result",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
BadDocs: []string{"doc1"},
|
||||
},
|
||||
},
|
||||
expected: "WARN: (package.rpm) has license warnings:\n" +
|
||||
"\tbad %doc files:\n" +
|
||||
"\t\tdoc1\n",
|
||||
},
|
||||
{
|
||||
name: "Multiple results",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
BadDocs: []string{"doc1", "doc2"},
|
||||
BadFiles: []string{"file1", "file2"},
|
||||
DuplicatedDocs: []string{"dupe1", "dupe2"},
|
||||
},
|
||||
{
|
||||
RpmPath: "/path/to/another-package.rpm",
|
||||
DuplicatedDocs: []string{"dupe3", "dupe4"},
|
||||
},
|
||||
},
|
||||
expected: "WARN: (another-package.rpm) has license warnings:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe3\n" +
|
||||
"\t\tdupe4\n" +
|
||||
"WARN: (package.rpm) has license warnings:\n" +
|
||||
"\tbad %doc files:\n" +
|
||||
"\t\tdoc1\n" +
|
||||
"\t\tdoc2\n" +
|
||||
"\tbad general file:\n" +
|
||||
"\t\tfile1\n" +
|
||||
"\t\tfile2\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe1\n" +
|
||||
"\t\tdupe2\n",
|
||||
},
|
||||
{
|
||||
name: "Duplicated docs only",
|
||||
results: []licensecheck.LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/package.rpm",
|
||||
DuplicatedDocs: []string{"dupe1", "dupe2"},
|
||||
},
|
||||
},
|
||||
expected: "WARN: (package.rpm) has license warnings:\n" +
|
||||
"\tduplicated license files:\n" +
|
||||
"\t\tdupe1\n" +
|
||||
"\t\tdupe2\n",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actual := FormatResults(tc.results, licensecheck.LicenseCheckModeWarnOnly)
|
||||
assert.Equal(t, tc.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatResultsEmpty(t *testing.T) {
|
||||
actual := FormatResults([]licensecheck.LicenseCheckResult{}, licensecheck.LicenseCheckModeDefault)
|
||||
assert.Equal(t, "No license issues found\n", actual)
|
||||
|
||||
actual = FormatResults([]licensecheck.LicenseCheckResult{{RpmPath: "/path/to/package.rpm", PackageName: "pkg1"}}, licensecheck.LicenseCheckModeDefault)
|
||||
assert.Equal(t, "No license issues found\n", actual)
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheck
|
||||
|
||||
import "slices"
|
||||
|
||||
// Valid license check modes which controls the behavior of the license checker package when filtering issues.
|
||||
// These are intended to be used as command line flags in addition to being used in code.
|
||||
type LicenseCheckMode string
|
||||
|
||||
const (
|
||||
LicenseCheckModeNone = LicenseCheckMode("none") // Disable license checking
|
||||
LicenseCheckModeWarnOnly = LicenseCheckMode("warn") // Convert all findings into warnings
|
||||
LicenseCheckModeFatalOnly = LicenseCheckMode("fatal") // Report critical errors, but allow warnings
|
||||
LicenseCheckModePedantic = LicenseCheckMode("pedantic") // Convert all findings into errors
|
||||
|
||||
LicenseCheckModeDefault = LicenseCheckModeFatalOnly
|
||||
)
|
||||
|
||||
// ValidLicenseCheckModes is a list of all valid license check modes
|
||||
var validLicenseCheckModes = []LicenseCheckMode{LicenseCheckModeNone, LicenseCheckModeWarnOnly, LicenseCheckModePedantic, LicenseCheckModeFatalOnly}
|
||||
|
||||
// IsValidLicenseCheckMode returns true if the given mode is a valid license check mode
|
||||
func IsValidLicenseCheckMode(mode LicenseCheckMode) bool {
|
||||
return slices.Contains(validLicenseCheckModes, mode)
|
||||
}
|
||||
|
||||
// ValidLicenseCheckModeStrings returns a list of all valid license check modes as strings for use with the command line
|
||||
func ValidLicenseCheckModeStrings() (modes []string) {
|
||||
for _, mode := range validLicenseCheckModes {
|
||||
modes = append(modes, string(mode))
|
||||
}
|
||||
return modes
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
|
||||
)
|
||||
|
||||
// LicenseCheckResult is the result of a license check on an single RPM
|
||||
type LicenseCheckResult struct {
|
||||
RpmPath string `json:"RpmPath"`
|
||||
PackageName string `json:"PackageName,omitempty"`
|
||||
BadDocs []string `json:"BadDocs,omitempty"`
|
||||
BadFiles []string `json:"BadFiles,omitempty"`
|
||||
DuplicatedDocs []string `json:"DuplicatedDocs,omitempty"`
|
||||
}
|
||||
|
||||
// HasErrorResult returns true if the result contains at least one finding that should be treated as an error based on
|
||||
// the provided mode.
|
||||
func (r *LicenseCheckResult) HasErrorResult(mode LicenseCheckMode) (hasErrorResult bool) {
|
||||
switch mode {
|
||||
case LicenseCheckModeNone:
|
||||
return false
|
||||
case LicenseCheckModeWarnOnly:
|
||||
return false
|
||||
case LicenseCheckModePedantic:
|
||||
if len(r.DuplicatedDocs) > 0 {
|
||||
return true
|
||||
}
|
||||
fallthrough
|
||||
case LicenseCheckModeFatalOnly:
|
||||
return len(r.BadDocs) > 0 || len(r.BadFiles) > 0
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// HasWarningResult returns true if the result contains at least one finding that should be treated as a warning based on
|
||||
// the provided mode.
|
||||
func (r *LicenseCheckResult) HasWarningResult(mode LicenseCheckMode) bool {
|
||||
switch mode {
|
||||
case LicenseCheckModeNone:
|
||||
return false
|
||||
case LicenseCheckModePedantic:
|
||||
// Pedantic mode treats warnings as errors, so we never have warnings
|
||||
return false
|
||||
case LicenseCheckModeWarnOnly:
|
||||
// We are treating all findings as warnings
|
||||
if r.HasErrorResult(LicenseCheckModeFatalOnly) {
|
||||
return true
|
||||
}
|
||||
fallthrough
|
||||
case LicenseCheckModeFatalOnly:
|
||||
return len(r.DuplicatedDocs) > 0
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// SaveLicenseCheckResults saves a list of all warnings and errors to a json file.
|
||||
func SaveLicenseCheckResults(savePath string, resultsList []LicenseCheckResult) error {
|
||||
// Create parent dir if missing
|
||||
err := os.MkdirAll(filepath.Dir(savePath), os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory for results file. Error:\n%w", err)
|
||||
}
|
||||
|
||||
sortedListOfFindings, _, _ := SortAndFilterResults(resultsList, LicenseCheckModeDefault)
|
||||
err = jsonutils.WriteJSONFile(savePath, sortedListOfFindings)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save license check results. Error:\n%w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SortAndFilterResults sorts the provided input slice, then filters them into three categories: anyResult, warnings, and errors.
|
||||
// The results slice passed to the function will also be sorted in-place. The mode flag will control how the results are filtered.
|
||||
func SortAndFilterResults(results []LicenseCheckResult, mode LicenseCheckMode) (anyResult, warnings, errors []LicenseCheckResult) {
|
||||
// Sort the input
|
||||
sort.Slice(results, func(i, j int) bool {
|
||||
return results[i].RpmPath < results[j].RpmPath
|
||||
})
|
||||
|
||||
anyResult = []LicenseCheckResult{}
|
||||
warnings = []LicenseCheckResult{}
|
||||
errors = []LicenseCheckResult{}
|
||||
for _, result := range results {
|
||||
if result.HasErrorResult(mode) || result.HasWarningResult(mode) {
|
||||
anyResult = append(anyResult, result)
|
||||
}
|
||||
|
||||
if result.HasErrorResult(mode) {
|
||||
errors = append(errors, result)
|
||||
}
|
||||
|
||||
if result.HasWarningResult(mode) {
|
||||
warnings = append(warnings, result)
|
||||
}
|
||||
}
|
||||
|
||||
return anyResult, warnings, errors
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestCategorizeResults(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
result LicenseCheckResult
|
||||
expectedBad bool
|
||||
expectedWarning bool
|
||||
}{
|
||||
{
|
||||
name: "All results",
|
||||
result: LicenseCheckResult{
|
||||
BadDocs: []string{"doc"},
|
||||
BadFiles: []string{"file"},
|
||||
DuplicatedDocs: []string{"dupe"},
|
||||
},
|
||||
expectedBad: true,
|
||||
expectedWarning: true,
|
||||
},
|
||||
{
|
||||
name: "BadDocs",
|
||||
result: LicenseCheckResult{
|
||||
BadDocs: []string{"doc"},
|
||||
},
|
||||
expectedBad: true,
|
||||
expectedWarning: false,
|
||||
},
|
||||
{
|
||||
name: "BadFiles",
|
||||
result: LicenseCheckResult{
|
||||
BadFiles: []string{"file"},
|
||||
},
|
||||
expectedBad: true,
|
||||
expectedWarning: false,
|
||||
},
|
||||
{
|
||||
name: "DuplicatedDocs",
|
||||
result: LicenseCheckResult{
|
||||
DuplicatedDocs: []string{"dupe"},
|
||||
},
|
||||
expectedBad: false,
|
||||
expectedWarning: true,
|
||||
},
|
||||
{
|
||||
name: "BadDocsAndBadFiles",
|
||||
result: LicenseCheckResult{
|
||||
BadDocs: []string{"doc"},
|
||||
BadFiles: []string{"file"},
|
||||
},
|
||||
expectedBad: true,
|
||||
expectedWarning: false,
|
||||
},
|
||||
{
|
||||
name: "Dupes with bad doc",
|
||||
result: LicenseCheckResult{
|
||||
BadDocs: []string{"doc"},
|
||||
DuplicatedDocs: []string{"dupe"},
|
||||
},
|
||||
expectedBad: true,
|
||||
expectedWarning: true,
|
||||
},
|
||||
{
|
||||
name: "Dupes with bad file",
|
||||
result: LicenseCheckResult{
|
||||
BadFiles: []string{"file"},
|
||||
DuplicatedDocs: []string{"dupe"},
|
||||
},
|
||||
expectedBad: true,
|
||||
expectedWarning: true,
|
||||
},
|
||||
{
|
||||
name: "No results",
|
||||
result: LicenseCheckResult{},
|
||||
expectedBad: false,
|
||||
expectedWarning: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.expectedBad, tc.result.HasErrorResult(LicenseCheckModeDefault))
|
||||
assert.Equal(t, tc.expectedWarning, tc.result.HasWarningResult(LicenseCheckModeDefault))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSaveResultsToFile(t *testing.T) {
|
||||
results := []LicenseCheckResult{
|
||||
{
|
||||
RpmPath: "/path/to/rpm",
|
||||
BadDocs: []string{"/docs/doc1", "/docs/doc2"},
|
||||
DuplicatedDocs: []string{"/docs/COPY"},
|
||||
},
|
||||
}
|
||||
tempFile := filepath.Join(t.TempDir(), "missing_dir", "results.json")
|
||||
err := SaveLicenseCheckResults(tempFile, results)
|
||||
assert.Nil(t, err)
|
||||
|
||||
// Load it back and see if it matches.
|
||||
resultsCheck := []LicenseCheckResult{}
|
||||
err = jsonutils.ReadJSONFile(tempFile, &resultsCheck)
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.Equal(t, results, resultsCheck)
|
||||
}
|
||||
|
||||
func TestSortAndFilter(t *testing.T) {
|
||||
r1 := LicenseCheckResult{
|
||||
RpmPath: "/path/to/rpm1",
|
||||
BadDocs: []string{"/docs/doc1", "/docs/doc2"},
|
||||
DuplicatedDocs: []string{"/docs/COPY"},
|
||||
}
|
||||
r2 := LicenseCheckResult{
|
||||
RpmPath: "/path/to/rpm2",
|
||||
BadFiles: []string{"/docs/doc1", "/docs/doc2"},
|
||||
DuplicatedDocs: []string{"/docs/COPY"},
|
||||
}
|
||||
r3 := LicenseCheckResult{
|
||||
RpmPath: "/path/to/rpm3",
|
||||
BadDocs: []string{"/docs/doc1", "/docs/doc2"},
|
||||
}
|
||||
r4 := LicenseCheckResult{
|
||||
RpmPath: "/path/to/rpm4",
|
||||
}
|
||||
r5 := LicenseCheckResult{
|
||||
RpmPath: "/path/to/rpm5",
|
||||
DuplicatedDocs: []string{"/docs/COPY"},
|
||||
}
|
||||
|
||||
unsortedList := []LicenseCheckResult{r5, r4, r2, r1, r3}
|
||||
sortedList := []LicenseCheckResult{r1, r2, r3, r4, r5}
|
||||
|
||||
expectedAll := []LicenseCheckResult{r1, r2, r3, r5}
|
||||
expectedWarnings := []LicenseCheckResult{r1, r2, r5}
|
||||
expectedWarningsPedantic := []LicenseCheckResult{}
|
||||
expectedWarningsWarn := []LicenseCheckResult{r1, r2, r3, r5}
|
||||
expectedErrors := []LicenseCheckResult{r1, r2, r3}
|
||||
expectedErrorsPedantic := []LicenseCheckResult{r1, r2, r3, r5}
|
||||
expectedErrorsWarn := []LicenseCheckResult{}
|
||||
|
||||
input := make([]LicenseCheckResult, len(unsortedList))
|
||||
copy(input, unsortedList)
|
||||
all, warnings, errors := SortAndFilterResults(input, LicenseCheckModeFatalOnly)
|
||||
assert.Equal(t, sortedList, input)
|
||||
assert.Equal(t, expectedAll, all)
|
||||
assert.Equal(t, expectedWarnings, warnings)
|
||||
assert.Equal(t, expectedErrors, errors)
|
||||
|
||||
copy(input, unsortedList)
|
||||
all, warnings, errors = SortAndFilterResults(input, LicenseCheckModePedantic)
|
||||
assert.Equal(t, sortedList, input)
|
||||
assert.Equal(t, expectedAll, all)
|
||||
assert.Equal(t, expectedWarningsPedantic, warnings)
|
||||
assert.Equal(t, expectedErrorsPedantic, errors)
|
||||
|
||||
copy(input, unsortedList)
|
||||
all, warnings, errors = SortAndFilterResults(input, LicenseCheckModeWarnOnly)
|
||||
assert.Equal(t, sortedList, input)
|
||||
assert.Equal(t, expectedAll, all)
|
||||
assert.Equal(t, expectedWarningsWarn, warnings)
|
||||
assert.Equal(t, expectedErrorsWarn, errors)
|
||||
|
||||
copy(input, unsortedList)
|
||||
all, warnings, errors = SortAndFilterResults(input, LicenseCheckModeNone)
|
||||
assert.Equal(t, sortedList, input)
|
||||
assert.Equal(t, []LicenseCheckResult{}, all)
|
||||
assert.Equal(t, []LicenseCheckResult{}, warnings)
|
||||
assert.Equal(t, []LicenseCheckResult{}, errors)
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
|
||||
)
|
||||
|
||||
type PkgExceptions struct {
|
||||
PackageName string `json:"PackageName"`
|
||||
IgnoredFilesRegexList []string `json:"IgnoredFilesRegexList"`
|
||||
compiledIgnoreRegexList []*regexp.Regexp
|
||||
}
|
||||
|
||||
type LicenseExceptions struct {
|
||||
PkgExceptions []PkgExceptions `json:"PkgExceptions"`
|
||||
GlobalExceptionsRegexList []string `json:"GlobalExceptionsRegexList"`
|
||||
compiledGlobalIgnoreRegexList []*regexp.Regexp
|
||||
}
|
||||
|
||||
// ShouldIgnoreFile checks if the given file should be ignored based on the license exceptions
|
||||
// - packageName: the name of the package as returned by rpm query '%{NAME}'
|
||||
// - filePath: the path of the file to be checked as returned by rpm query '%{FILENAMES}'
|
||||
func (l *LicenseExceptions) ShouldIgnoreFile(packageName, filePath string) bool {
|
||||
// Check if the file should be ignored globally
|
||||
for _, ignoredRegex := range l.compiledGlobalIgnoreRegexList {
|
||||
if ignoredRegex.MatchString(filePath) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the file should be ignored for the given package
|
||||
for _, exception := range l.PkgExceptions {
|
||||
if exception.PackageName == packageName {
|
||||
for _, ignoredRegex := range exception.compiledIgnoreRegexList {
|
||||
if ignoredRegex.MatchString(filePath) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// LoadLicenseExceptions loads the license exceptions from the given .json file into a LicenseExceptions struct
|
||||
func LoadLicenseExceptions(file string) (LicenseExceptions, error) {
|
||||
config := LicenseExceptions{}
|
||||
err := jsonutils.ReadJSONFile(file, &config)
|
||||
if err != nil {
|
||||
return LicenseExceptions{}, fmt.Errorf("failed to read license exceptions file (%s):\n%w", file, err)
|
||||
}
|
||||
|
||||
// Compile regexes for ignored files
|
||||
for i := range config.PkgExceptions {
|
||||
for j := range config.PkgExceptions[i].IgnoredFilesRegexList {
|
||||
regex, err := regexp.Compile(config.PkgExceptions[i].IgnoredFilesRegexList[j])
|
||||
if err != nil {
|
||||
return LicenseExceptions{}, fmt.Errorf("failed to compile regex for ignored files (%s):\n%w", config.PkgExceptions[i].IgnoredFilesRegexList[j], err)
|
||||
}
|
||||
config.PkgExceptions[i].compiledIgnoreRegexList = append(config.PkgExceptions[i].compiledIgnoreRegexList, regex)
|
||||
}
|
||||
}
|
||||
|
||||
// Compile regexes for global ignored files
|
||||
for i := range config.GlobalExceptionsRegexList {
|
||||
regex, err := regexp.Compile(config.GlobalExceptionsRegexList[i])
|
||||
if err != nil {
|
||||
return LicenseExceptions{}, fmt.Errorf("failed to compile regex for global ignored files (%s):\n%w", config.GlobalExceptionsRegexList[i], err)
|
||||
}
|
||||
config.compiledGlobalIgnoreRegexList = append(config.compiledGlobalIgnoreRegexList, regex)
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestLoadLicenseExceptions(t *testing.T) {
|
||||
file := "testdata/test_license_exceptions.json"
|
||||
expectedExceptions := LicenseExceptions{
|
||||
PkgExceptions: []PkgExceptions{
|
||||
{
|
||||
PackageName: "TestPackage1",
|
||||
IgnoredFilesRegexList: []string{
|
||||
"/usr/share/doc/LICENSE",
|
||||
"/usr/share/doc/README.GPL",
|
||||
".*GLOB1",
|
||||
},
|
||||
compiledIgnoreRegexList: []*regexp.Regexp{
|
||||
regexp.MustCompile("/usr/share/doc/LICENSE"),
|
||||
regexp.MustCompile("/usr/share/doc/README.GPL"),
|
||||
regexp.MustCompile(".*GLOB1"),
|
||||
},
|
||||
},
|
||||
{
|
||||
PackageName: "TestPackage2",
|
||||
IgnoredFilesRegexList: []string{
|
||||
"/usr/share/doc/LICENSE",
|
||||
"/usr/share/doc/README.GPL",
|
||||
".*GLOB2",
|
||||
},
|
||||
compiledIgnoreRegexList: []*regexp.Regexp{
|
||||
regexp.MustCompile("/usr/share/doc/LICENSE"),
|
||||
regexp.MustCompile("/usr/share/doc/README.GPL"),
|
||||
regexp.MustCompile(".*GLOB2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
GlobalExceptionsRegexList: []string{
|
||||
".*GLOB3",
|
||||
},
|
||||
compiledGlobalIgnoreRegexList: []*regexp.Regexp{
|
||||
regexp.MustCompile(".*GLOB3"),
|
||||
},
|
||||
}
|
||||
|
||||
exceptions, err := LoadLicenseExceptions(file)
|
||||
|
||||
// Check if there was an error loading the license exceptions
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load license exceptions: %v", err)
|
||||
}
|
||||
|
||||
// Check if the loaded exceptions match the expected exceptions
|
||||
assert.Equal(t, expectedExceptions, exceptions)
|
||||
}
|
||||
|
||||
func TestShouldIgnoreFile(t *testing.T) {
|
||||
exceptions := LicenseExceptions{
|
||||
PkgExceptions: []PkgExceptions{
|
||||
{
|
||||
PackageName: "TestPackage1",
|
||||
IgnoredFilesRegexList: []string{
|
||||
"/usr/share/doc/LICENSE",
|
||||
"/usr/share/doc/README.GPL",
|
||||
".*GLOB1",
|
||||
},
|
||||
compiledIgnoreRegexList: []*regexp.Regexp{
|
||||
regexp.MustCompile("/usr/share/doc/LICENSE"),
|
||||
regexp.MustCompile("/usr/share/doc/README.GPL"),
|
||||
regexp.MustCompile(".*GLOB1"),
|
||||
},
|
||||
},
|
||||
{
|
||||
PackageName: "TestPackage2",
|
||||
IgnoredFilesRegexList: []string{
|
||||
"/usr/share/doc/LICENSE",
|
||||
"/usr/share/doc/README.GPL",
|
||||
".*GLOB2",
|
||||
},
|
||||
compiledIgnoreRegexList: []*regexp.Regexp{
|
||||
regexp.MustCompile("/usr/share/doc/LICENSE"),
|
||||
regexp.MustCompile("/usr/share/doc/README.GPL"),
|
||||
regexp.MustCompile(".*GLOB2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
GlobalExceptionsRegexList: []string{
|
||||
".*GLOB3",
|
||||
},
|
||||
compiledGlobalIgnoreRegexList: []*regexp.Regexp{
|
||||
regexp.MustCompile(".*GLOB3"),
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
packageName string
|
||||
filePath string
|
||||
expectedResponse bool
|
||||
}{
|
||||
{
|
||||
name: "File should be ignored",
|
||||
packageName: "TestPackage1",
|
||||
filePath: "/usr/share/doc/LICENSE",
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "2nd File should be ignored",
|
||||
packageName: "TestPackage1",
|
||||
filePath: "/usr/share/doc/README.GPL",
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "File should be ignored in other package",
|
||||
packageName: "TestPackage2",
|
||||
filePath: "/usr/share/doc/LICENSE",
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "File should not be ignored in listed package",
|
||||
packageName: "TestPackage1",
|
||||
filePath: "/usr/share/doc/other_file",
|
||||
expectedResponse: false,
|
||||
},
|
||||
{
|
||||
name: "File should not be ignored in other package",
|
||||
packageName: "TestPackage3",
|
||||
filePath: "/usr/share/doc/LICENSE",
|
||||
expectedResponse: false,
|
||||
},
|
||||
{
|
||||
name: "File should match package glob",
|
||||
packageName: "TestPackage1",
|
||||
filePath: "/usr/share/doc/GLOB1",
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "File should not match package glob",
|
||||
packageName: "TestPackage1",
|
||||
filePath: "/usr/share/doc/GLOB2",
|
||||
expectedResponse: false,
|
||||
},
|
||||
{
|
||||
name: "File should match global glob",
|
||||
packageName: "TestPackage1",
|
||||
filePath: "/usr/share/doc/GLOB3",
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "File should match unkown package with global glob",
|
||||
packageName: "NOT_A_PACKAGE",
|
||||
filePath: "/usr/share/doc/GLOB3",
|
||||
expectedResponse: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.expectedResponse, exceptions.ShouldIgnoreFile(tc.packageName, tc.filePath))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNotPanicMissingFile(t *testing.T) {
|
||||
tempPath := t.TempDir()
|
||||
file := filepath.Join(tempPath, "missing_file.json")
|
||||
assert.NotPanics(t, func() {
|
||||
_, err := LoadLicenseExceptions(file)
|
||||
assert.EqualError(t, err, "failed to read license exceptions file ("+file+"):\nopen "+file+": no such file or directory")
|
||||
})
|
||||
}
|
||||
|
||||
func TestInvalidRegex(t *testing.T) {
|
||||
const invalidRegex = `.*[`
|
||||
testCases := []struct {
|
||||
name string
|
||||
json string
|
||||
expectedErr string
|
||||
}{
|
||||
{
|
||||
name: "Invalid regex",
|
||||
json: `{"PkgExceptions": [{"PackageName": "TestPackage1", "IgnoredFilesRegexList": ["` + invalidRegex + `"]}], "GlobalExceptionsRegexList": []}`,
|
||||
expectedErr: "failed to compile regex for ignored files (.*[):\nerror parsing regexp: missing closing ]: `[`",
|
||||
},
|
||||
{
|
||||
name: "Invalid global regex",
|
||||
json: `{"PkgExceptions": [], "GlobalExceptionsRegexList": ["` + invalidRegex + `"]}`,
|
||||
expectedErr: "failed to compile regex for global ignored files (.*[):\nerror parsing regexp: missing closing ]: `[`",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
tempPath := t.TempDir()
|
||||
jsonFilePath := filepath.Join(tempPath, "invalid_regex.json")
|
||||
err := file.Write(tc.json, jsonFilePath)
|
||||
assert.NoError(t, err)
|
||||
exceptions, err := LoadLicenseExceptions(jsonFilePath)
|
||||
assert.Error(t, err)
|
||||
assert.EqualError(t, err, tc.expectedErr)
|
||||
assert.Equal(t, LicenseExceptions{}, exceptions)
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// A tool for validating %license entries in rpms
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
|
||||
)
|
||||
|
||||
type LicenseNames struct {
|
||||
FuzzyLicenseNamesRegexList []string `json:"FuzzyLicenseNamesRegexList"`
|
||||
compiledFuzzyLicenseNamesList []*regexp.Regexp
|
||||
VerbatimLicenseNamesRegexList []string `json:"VerbatimLicenseNamesRegexList"`
|
||||
compiledVerbatimLicenseNamesList []*regexp.Regexp
|
||||
SkipLicenseNamesRegexList []string `json:"SkipLicenseNamesRegexList"`
|
||||
compiledSkipLicenseNamesList []*regexp.Regexp
|
||||
}
|
||||
|
||||
// IsALicenseFile makes a best effort guess if a file is a license file or not. This is a heuristic and is NOT foolproof however.
|
||||
// Some examples of files that may be incorrectly identified as licenses:
|
||||
// - /path/to/code/gpl/README.md ("gpl")
|
||||
// - /path/to/a/hash/CC05f4dcc3b5aa765d61d8327deb882cf ("cc0")
|
||||
// - /path/to/freebsd-parts/file.ext ("bds")
|
||||
func (l *LicenseNames) IsALicenseFile(pkgName, licenseFilePath string) bool {
|
||||
// Check if the file is in the list of explicit known license files
|
||||
for _, name := range l.compiledVerbatimLicenseNamesList {
|
||||
baseName := filepath.Base(licenseFilePath)
|
||||
if name.MatchString(baseName) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return checkFilePath(pkgName, licenseFilePath, l.compiledFuzzyLicenseNamesList) && !l.IsASkippedLicenseFile(pkgName, licenseFilePath)
|
||||
}
|
||||
|
||||
// IsASkippedLicenseFile checks if a file is a known non-license file.
|
||||
func (l *LicenseNames) IsASkippedLicenseFile(pkgName, licenseFilePath string) bool {
|
||||
return checkFilePath(pkgName, licenseFilePath, l.compiledSkipLicenseNamesList)
|
||||
}
|
||||
|
||||
// checkFilePath checks if a file path matches any of the given names. Any leading common path is stripped before
|
||||
// matching (i.e. "/usr/share/licenses/<pkg>/file/path" -> "file/path"). The matching is a case-insensitive sub-string
|
||||
// search.
|
||||
func checkFilePath(pkgName, licenseFilePath string, licenseFilesMatches []*regexp.Regexp) bool {
|
||||
// For each path, strip the prefix plus package name if it exists
|
||||
// i.e. "/usr/share/licenses/<pkg>/file/path" -> "file/path"
|
||||
// Those paths would always match since they contain "license" in the name.
|
||||
strippedPath := filepath.Clean(licenseFilePath)
|
||||
pkgPrefix := filepath.Join(licensePrefix, pkgName)
|
||||
if strings.HasPrefix(licenseFilePath, licensePrefix) {
|
||||
strippedPath = strings.TrimPrefix(licenseFilePath, pkgPrefix) // Remove the license + pkg prefix
|
||||
strippedPath = strings.TrimPrefix(strippedPath, licensePrefix) // Remove the license prefix
|
||||
strippedPath = strings.TrimPrefix(strippedPath, string(os.PathSeparator)) // Remove the leading path separator if it exists
|
||||
|
||||
// Rebuild the path without the 1st component
|
||||
if len(strippedPath) == 0 {
|
||||
// It was just the license directory
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
for _, name := range licenseFilesMatches {
|
||||
if name.MatchString(strippedPath) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// LoadLicenseNames loads the license name regexes from the given .json file into a LicenseNames struct
|
||||
func LoadLicenseNames(file string) (LicenseNames, error) {
|
||||
config := LicenseNames{}
|
||||
err := jsonutils.ReadJSONFile(file, &config)
|
||||
if err != nil {
|
||||
return LicenseNames{}, fmt.Errorf("failed to read license names file (%s):\n%w", file, err)
|
||||
}
|
||||
|
||||
for i := range config.FuzzyLicenseNamesRegexList {
|
||||
regex, err := regexp.Compile(config.FuzzyLicenseNamesRegexList[i])
|
||||
if err != nil {
|
||||
return LicenseNames{}, fmt.Errorf("failed to compile regex for license names (%s):\n%w", config.FuzzyLicenseNamesRegexList[i], err)
|
||||
}
|
||||
config.compiledFuzzyLicenseNamesList = append(config.compiledFuzzyLicenseNamesList, regex)
|
||||
}
|
||||
|
||||
for i := range config.VerbatimLicenseNamesRegexList {
|
||||
regex, err := regexp.Compile(config.VerbatimLicenseNamesRegexList[i])
|
||||
if err != nil {
|
||||
return LicenseNames{}, fmt.Errorf("failed to compile regex for license names (%s):\n%w", config.VerbatimLicenseNamesRegexList[i], err)
|
||||
}
|
||||
config.compiledVerbatimLicenseNamesList = append(config.compiledVerbatimLicenseNamesList, regex)
|
||||
}
|
||||
|
||||
for i := range config.SkipLicenseNamesRegexList {
|
||||
regex, err := regexp.Compile(config.SkipLicenseNamesRegexList[i])
|
||||
if err != nil {
|
||||
return LicenseNames{}, fmt.Errorf("failed to compile regex for license names (%s):\n%w", config.SkipLicenseNamesRegexList[i], err)
|
||||
}
|
||||
config.compiledSkipLicenseNamesList = append(config.compiledSkipLicenseNamesList, regex)
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
|
@ -0,0 +1,359 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// A tool for validating %license entries in rpms
|
||||
|
||||
package licensecheck
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/file"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type testData struct {
|
||||
UniqueFiles int
|
||||
UniquePackages int
|
||||
TestDataEntries []testDataEntry
|
||||
}
|
||||
|
||||
type testDataEntry struct {
|
||||
Pkg string `json:"Pkg"`
|
||||
Path string `json:"Path"`
|
||||
}
|
||||
|
||||
func TestLoadLicenseNames(t *testing.T) {
|
||||
file := "testdata/test_license_names.json"
|
||||
expectedNames := LicenseNames{
|
||||
FuzzyLicenseNamesRegexList: []string{
|
||||
"(?i).*fuzzy.*",
|
||||
},
|
||||
compiledFuzzyLicenseNamesList: []*regexp.Regexp{
|
||||
regexp.MustCompile("(?i).*fuzzy.*"),
|
||||
},
|
||||
VerbatimLicenseNamesRegexList: []string{
|
||||
"^vErBaTiM$",
|
||||
},
|
||||
compiledVerbatimLicenseNamesList: []*regexp.Regexp{
|
||||
regexp.MustCompile("^vErBaTiM$"),
|
||||
},
|
||||
SkipLicenseNamesRegexList: []string{
|
||||
"(?i).*skip.*",
|
||||
},
|
||||
compiledSkipLicenseNamesList: []*regexp.Regexp{
|
||||
regexp.MustCompile("(?i).*skip.*"),
|
||||
},
|
||||
}
|
||||
|
||||
names, err := LoadLicenseNames(file)
|
||||
|
||||
// Check if there was an error loading the license exceptions
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load license names: %v", err)
|
||||
}
|
||||
|
||||
// Check if the loaded exceptions match the expected exceptions
|
||||
assert.Equal(t, expectedNames, names)
|
||||
}
|
||||
|
||||
func TestNotPanicMissingNameFile(t *testing.T) {
|
||||
tempPath := t.TempDir()
|
||||
file := filepath.Join(tempPath, "missing_file.json")
|
||||
assert.NotPanics(t, func() {
|
||||
_, err := LoadLicenseNames(file)
|
||||
assert.EqualError(t, err, "failed to read license names file ("+file+"):\nopen "+file+": no such file or directory")
|
||||
})
|
||||
}
|
||||
|
||||
func TestInvalidNameRegex(t *testing.T) {
|
||||
const invalidRegex = `.*[`
|
||||
testCases := []struct {
|
||||
name string
|
||||
json string
|
||||
expectedErr string
|
||||
}{
|
||||
{
|
||||
name: "Invalid fuzzy regex",
|
||||
json: `{"FuzzyLicenseNamesRegexList": ["` + invalidRegex + `"], "VerbatimLicenseNamesRegexList": [], "SkipLicenseNamesRegexList": []}`,
|
||||
expectedErr: "failed to compile regex for license names (.*[):\nerror parsing regexp: missing closing ]: `[`",
|
||||
},
|
||||
{
|
||||
name: "Invalid verbatim regex",
|
||||
json: `{"FuzzyLicenseNamesRegexList": [], "VerbatimLicenseNamesRegexList": ["` + invalidRegex + `"], "SkipLicenseNamesRegexList": []}`,
|
||||
expectedErr: "failed to compile regex for license names (.*[):\nerror parsing regexp: missing closing ]: `[`",
|
||||
},
|
||||
{
|
||||
name: "Invalid skip regex",
|
||||
json: `{"FuzzyLicenseNamesRegexList": [], "VerbatimLicenseNamesRegexList": [], "SkipLicenseNamesRegexList": ["` + invalidRegex + `"]}`,
|
||||
expectedErr: "failed to compile regex for license names (.*[):\nerror parsing regexp: missing closing ]: `[`",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
tempPath := t.TempDir()
|
||||
jsonFilePath := filepath.Join(tempPath, "invalid_regex.json")
|
||||
err := file.Write(tc.json, jsonFilePath)
|
||||
assert.NoError(t, err)
|
||||
names, err := LoadLicenseNames(jsonFilePath)
|
||||
assert.Error(t, err)
|
||||
assert.EqualError(t, err, tc.expectedErr)
|
||||
assert.Equal(t, LicenseNames{}, names)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func generateTestVariantStrings(pkgName, base string) []string {
|
||||
upperCase := strings.ToUpper(base)
|
||||
lowerCase := strings.ToLower(base)
|
||||
randomizedCase := ""
|
||||
basePath := filepath.Join("/usr/share/licenses/", pkgName)
|
||||
for i, c := range base {
|
||||
if i%2 == 0 {
|
||||
randomizedCase += strings.ToLower(string(c))
|
||||
} else {
|
||||
randomizedCase += strings.ToUpper(string(c))
|
||||
}
|
||||
}
|
||||
fileNames := []string{
|
||||
lowerCase,
|
||||
upperCase,
|
||||
lowerCase + ".txt",
|
||||
upperCase + ".txt",
|
||||
lowerCase + ".mypkg.txt",
|
||||
randomizedCase,
|
||||
upperCase + "-mypkg",
|
||||
upperCase + "-mypkg-ver",
|
||||
"mypkg-" + upperCase,
|
||||
"mypkg-" + upperCase + ".txt",
|
||||
upperCase + ".MYPKG",
|
||||
upperCase + "_MYPKG",
|
||||
}
|
||||
for i := range fileNames {
|
||||
fileNames[i] = filepath.Join(basePath, fileNames[i])
|
||||
}
|
||||
return fileNames
|
||||
}
|
||||
|
||||
// Test common variations on license file names
|
||||
func TestIsALicenseFile_Common(t *testing.T) {
|
||||
const pkgName = "pkg"
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
names := []string{
|
||||
"copying",
|
||||
"license",
|
||||
"licence", // British spelling is sometimes used
|
||||
"notice",
|
||||
"copyright",
|
||||
"artistic",
|
||||
"bsd",
|
||||
"gpl",
|
||||
"cc0",
|
||||
"mit.txt",
|
||||
}
|
||||
for _, name := range names {
|
||||
testCases := generateTestVariantStrings(pkgName, name)
|
||||
t.Run(name, func(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
assert.True(t, n.IsALicenseFile(pkgName, tc))
|
||||
assert.False(t, n.IsASkippedLicenseFile(pkgName, tc))
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsASkippedLicenseFile(t *testing.T) {
|
||||
const pkgName = "pkg"
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
testCases := []string{
|
||||
"AUTHORS",
|
||||
"CONTRIBUTORS",
|
||||
"README",
|
||||
"CREDITS",
|
||||
"/usr/share/licenses/pkg/AUTHORS",
|
||||
"/usr/share/licenses/pkg/AUTHORS.txt",
|
||||
"/usr/share/licenses/pkg/docs/AUTHORS-1.0",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
assert.True(t, n.IsASkippedLicenseFile(pkgName, tc))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsALicenseFile_Specific(t *testing.T) {
|
||||
const pkgName = "pkg"
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
testCases := []struct {
|
||||
file string
|
||||
expected bool
|
||||
}{
|
||||
{"MIT", true},
|
||||
{"MIT_other", false},
|
||||
{"other_MIT", false},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.file, func(t *testing.T) {
|
||||
res := n.IsALicenseFile(pkgName, tc.file)
|
||||
assert.Equal(t, tc.expected, res)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsNotALicenseFile(t *testing.T) {
|
||||
const (
|
||||
pkgName = "pkg"
|
||||
basePath = "/usr/share/licenses/"
|
||||
)
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
testCases := []string{
|
||||
filepath.Join(basePath, pkgName, "file"),
|
||||
filepath.Join(basePath, pkgName, "README"),
|
||||
filepath.Join(basePath, pkgName, "MIT-file"),
|
||||
filepath.Join(basePath, pkgName, "AUTHORS.txt"),
|
||||
filepath.Join(basePath, pkgName),
|
||||
basePath,
|
||||
"/",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
assert.False(t, n.IsALicenseFile(pkgName, tc))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubDirsMatch(t *testing.T) {
|
||||
const pkgName = "pkg"
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
testCases := []string{
|
||||
"/usr/share/licenses/pkg/COPYING",
|
||||
"/usr/share/licenses/pkg/subdir/COPYING",
|
||||
"/usr/share/licenses/pkg/LICENSES/random_file",
|
||||
"/usr/share/licenses/pkg/licenses/random_file",
|
||||
"/path/to/LICENSE",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
assert.True(t, n.IsALicenseFile(pkgName, tc))
|
||||
assert.False(t, n.IsASkippedLicenseFile(pkgName, tc))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// The license directory itself isn't a valid match.
|
||||
func TestLicenseDirDoesNotMatch(t *testing.T) {
|
||||
const pkgName = "pkg"
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
testCases := []string{
|
||||
"/usr/share/licenses/",
|
||||
"/usr/share/licenses/pkg",
|
||||
"/usr/share/licenses/pkg/",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
assert.False(t, n.IsALicenseFile(pkgName, tc))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgainstKnownLicenses(t *testing.T) {
|
||||
// We store all the %license files from the distro in ./testdata/all_licenses_<date>.json
|
||||
// See ./testdata/README.md for more information on how to generate this file
|
||||
|
||||
// This test will check that MOST of the known licenses are correctly identified as licenses. It is not
|
||||
// exhaustive, but it should catch most common cases. This value can be increased as the quality of the
|
||||
// packages improves.
|
||||
const acceptablePercentage = 0.98
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
// Find all data files in the testdata directory
|
||||
testDataFile := ""
|
||||
paths, err := filepath.Glob("./testdata/all_licenses_*.json")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to find test data file: %v", err)
|
||||
}
|
||||
// Get the most recent file
|
||||
for _, path := range paths {
|
||||
if testDataFile < path {
|
||||
testDataFile = path
|
||||
}
|
||||
}
|
||||
if testDataFile == "" {
|
||||
t.Fatalf("Failed to find test data file")
|
||||
}
|
||||
|
||||
test_data := testData{}
|
||||
err = jsonutils.ReadJSONFile(testDataFile, &test_data)
|
||||
if err != nil || test_data.UniqueFiles == 0 {
|
||||
t.Fatalf("failed to read input file: %v", err)
|
||||
}
|
||||
|
||||
invalid_entires := 0
|
||||
for _, test := range test_data.TestDataEntries {
|
||||
if !n.IsALicenseFile(test.Pkg, test.Path) {
|
||||
invalid_entires++
|
||||
}
|
||||
}
|
||||
|
||||
invalidPercentage := float64(invalid_entires) / float64(test_data.UniqueFiles)
|
||||
if invalidPercentage > 1.0-acceptablePercentage {
|
||||
t.Errorf("Failed to identify %d out of %d known licenses (%.2f%%)", invalid_entires, test_data.UniqueFiles, invalidPercentage*100)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgainstKnownDocs(t *testing.T) {
|
||||
// We store all the %doc files from the distro in ./testdata/all_docs_<date>.json
|
||||
// See ./testdata/README.md for more information on how to generate this file
|
||||
|
||||
// This test will check that MOST of the known docs are correctly identified as not licenses. It is not
|
||||
// exhaustive, but it should catch most common cases.
|
||||
const acceptablePercentage = 0.99
|
||||
n := loadDefaultLicenseNames(t)
|
||||
|
||||
// Find all data files in the testdata directory
|
||||
testDataFile := ""
|
||||
paths, err := filepath.Glob("./testdata/all_docs_*.json")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to find test data file: %v", err)
|
||||
}
|
||||
// Get the most recent file
|
||||
for _, path := range paths {
|
||||
if testDataFile < path {
|
||||
testDataFile = path
|
||||
}
|
||||
}
|
||||
if testDataFile == "" {
|
||||
t.Fatalf("Failed to find test data file")
|
||||
}
|
||||
|
||||
test_data := testData{}
|
||||
err = jsonutils.ReadJSONFile(testDataFile, &test_data)
|
||||
if err != nil || test_data.UniqueFiles == 0 {
|
||||
t.Fatalf("failed to read input file: %v", err)
|
||||
}
|
||||
|
||||
invalid_entires := 0
|
||||
for _, test := range test_data.TestDataEntries {
|
||||
if n.IsALicenseFile(test.Pkg, test.Path) {
|
||||
invalid_entires++
|
||||
}
|
||||
}
|
||||
|
||||
invalidPercentage := float64(invalid_entires) / float64(test_data.UniqueFiles)
|
||||
if invalidPercentage > 1.0-acceptablePercentage {
|
||||
t.Errorf("Failed to skip %d out of %d known docs (%.2f%%)", invalid_entires, test_data.UniqueFiles, invalidPercentage*100)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
all_other_files*.json
|
||||
_tmp*.json
|
|
@ -0,0 +1,42 @@
|
|||
# Test data for the license checker
|
||||
|
||||
The `licensecheck` package uses a heuristic to identify license files, the input data to this tool comes from the
|
||||
packages currently in the distro.
|
||||
|
||||
The test data is generated from all the files packaged into `/usr/share/licenses/<pkg>/*` and is gathered via `repoquery`.
|
||||
|
||||
## Generating new test data
|
||||
|
||||
In a AzureLinux environment (specifically an environment with access the the package repos) run:
|
||||
|
||||
```bash
|
||||
cd ./testdata
|
||||
rm *.json
|
||||
tdnf -y install dnf-utils python3 ca-certificates
|
||||
./generate_test_data.py
|
||||
```
|
||||
|
||||
This will query the available repos and generate two files: `all_licenses_<date>.json`, `all_docs_<date>.json`, and
|
||||
`all_other_files_<date>.json` containing lists of all files that are either `%license` or `%doc` respectively, and all
|
||||
other files (but not directories).
|
||||
|
||||
** Note: `all_other_files_*.json` is marked to be ignored by git, it is a very large file and is less important to
|
||||
validate against than `all_docs_<date>.json`.
|
||||
|
||||
## Quick validation of the test data
|
||||
|
||||
This will read the files from above and report false positive/negative results, and generate a set of files containing
|
||||
all "incorrect" findings.
|
||||
|
||||
```bash
|
||||
cd ./testdata
|
||||
find . -name 'all_other_files_*.json' | grep -q . || echo "**** Generate test data first! ****"
|
||||
go run . --licenses ./all_licenses_*.json --licenses-output ./_tmp_bad_licenses.json --docs ./all_docs_*.json --docs-output ./_tmp_bad_docs.json --other-files ./all_other_files_*.json --other-files-output ./_tmp_bad_other_files.json --name-file ../../../../resources/manifests/package/license_file_names.json --exception-file ../../../../resources/manifests/package/license_file_exceptions.json
|
||||
# Check ./_tmp_bad_licenses.json, _tmp_bad_docs.json, _tmp_bad_other_files.json for any files that fail the classification
|
||||
```
|
||||
|
||||
As of 2024-05-22 the results are:
|
||||
|
||||
- `1.9%` false negative (licenses)
|
||||
- `0.25%` false positive (docs)
|
||||
- `0.47%` false positive (all other files)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,170 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import concurrent.futures
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import subprocess
|
||||
import urllib.request
|
||||
|
||||
# generate_test_data.py generates a pair of files that contain all the license and doc files for all RPMs in the repository.
|
||||
# The intent is to use this data to test the licensecheck tool for false positives/negatives.
|
||||
|
||||
# This tool should be run in an azl-like environment, specifically the 'repoquery' tool must be available, and it must
|
||||
# be able to pull rpms from a representitive repo that contains all RPMs to measure (ie PMC).
|
||||
|
||||
# get_all_rpms() returns a list of URLs to each RPM in the default repos. It only looks at the latest version of each RPM.
|
||||
def get_all_rpms() -> list[str]:
|
||||
cmd = ["repoquery", "-y", "--latest-limit=1", "--all", "--location"]
|
||||
output = subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
|
||||
|
||||
# Clean the output:
|
||||
# - Split into a list
|
||||
# - Remove anything that is *.src.rpm (some environemnts will give us the source RPMs as well)
|
||||
# - Remove any empty strings after stirpping
|
||||
output = output.split("\n")
|
||||
output = [url.strip() for url in output if not url.endswith(".src.rpm")]
|
||||
output = [url for url in output if url]
|
||||
|
||||
return output
|
||||
|
||||
# query_rpm_url() runs the 'rpm' command with the given query and URL. It returns a list of files based on the query.
|
||||
def query_rpm_url(out_file: str, args: list[str]) -> list[str]:
|
||||
cmd = ["rpm"] + args + [out_file]
|
||||
# Run the bash script and capture the output.
|
||||
output = subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
|
||||
|
||||
# If the output has the string '(contains no files)', then there are no files to list
|
||||
if "(contains no files)" in output:
|
||||
return []
|
||||
|
||||
output = output.split("\n")
|
||||
output = [file for file in output if file]
|
||||
return output
|
||||
|
||||
def get_name(out_file: str) -> str:
|
||||
return query_rpm_url(out_file, ["-q", "--qf", "%{NAME}\n"])
|
||||
|
||||
def get_license_files(out_file: str) -> list[str]:
|
||||
return query_rpm_url(out_file, ["-qL"])
|
||||
|
||||
def get_doc_files(out_file: str) -> list[str]:
|
||||
return query_rpm_url(out_file, ["-qd"])
|
||||
|
||||
def get_all_files(out_file: str, filter_list: list[str]) -> list[str]:
|
||||
all_files_and_dirs = query_rpm_url(out_file, ["-q", "--qf", "[%{FILEMODES:perms} %{FILENAMES}\n]"])
|
||||
# Each line will be in the format "drwxr-xr-x /a/directory" or "-rw-r--r-- /a/directory/a_file", remove the
|
||||
# directories and keep the files, then drop the permissions part of the string.
|
||||
all_files = [file.split(' ', 1)[1] for file in all_files_and_dirs if file[0] != "d"]
|
||||
filter_set = set(filter_list)
|
||||
filtered_files = [file for file in all_files if file not in filter_set]
|
||||
return filtered_files
|
||||
|
||||
# get_files_for_url() returns a result object with the URL, license files, and doc files for a given URL.
|
||||
def get_files_for_url(url: str) -> dict:
|
||||
# Get a tempdir to hold the rpm in so we can query it
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
# Download the file to the tempdir
|
||||
out_file = os.path.join(tempdir, "pkg.rpm")
|
||||
urllib.request.urlretrieve(url, out_file)
|
||||
license_files = get_license_files(out_file)
|
||||
doc_files = get_doc_files(out_file)
|
||||
all_other_files = get_all_files(out_file, license_files + doc_files)
|
||||
res = {
|
||||
"url": url,
|
||||
"pkg_name": get_name(out_file)[0],
|
||||
"license_files": license_files,
|
||||
"doc_files": doc_files,
|
||||
"all_other_files": all_other_files
|
||||
|
||||
}
|
||||
return res
|
||||
|
||||
# Corresponding go structs for the output of this script:
|
||||
|
||||
# type testData struct {
|
||||
# UniqueFiles int
|
||||
# UniquePackages int
|
||||
# TestDataEntries []testDataEntry
|
||||
# }
|
||||
|
||||
# type testDataEntry struct {
|
||||
# Pkg string `json:"Pkg"`
|
||||
# Path string `json:"Path"`
|
||||
# }
|
||||
|
||||
# Write the results to a file.
|
||||
def write_to_file(file_list: list[(str,list[str])], output_file: str):
|
||||
print(f"Writing to {output_file}")
|
||||
file_list.sort()
|
||||
|
||||
testDataEntires = []
|
||||
for pkg_name, files in file_list:
|
||||
for file in files:
|
||||
testDataEntires.append({
|
||||
"Pkg": pkg_name,
|
||||
"Path": file
|
||||
})
|
||||
# Count the unique packages
|
||||
test_data = {
|
||||
"UniqueFiles": len(testDataEntires),
|
||||
"UniquePackages": len([pkg_name for pkg_name, files in file_list if files]), # Only packages with files are counted
|
||||
"TestDataEntries": testDataEntires
|
||||
}
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(test_data, f, indent=0)
|
||||
|
||||
def main():
|
||||
# Put the debug info packages first since they tend to be really big,
|
||||
# then the remaining URLs,
|
||||
# Randomize the lists to even out the load
|
||||
all_urls = get_all_rpms()
|
||||
debug_urls = [url for url in all_urls if "debuginfo" in url]
|
||||
other_urls = [url for url in all_urls if "debuginfo" not in url]
|
||||
random.shuffle(debug_urls)
|
||||
random.shuffle(other_urls)
|
||||
jobs = debug_urls + other_urls
|
||||
|
||||
# Queue each URL to be processed in parallel
|
||||
num_processes = 4 * os.cpu_count()
|
||||
license_files=[]
|
||||
doc_files=[]
|
||||
all_other_files=[]
|
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=num_processes) as executor:
|
||||
results = [executor.submit(get_files_for_url, url) for url in jobs]
|
||||
total_processed = 0
|
||||
start_time = time.time()
|
||||
for future in concurrent.futures.as_completed(results):
|
||||
res = future.result()
|
||||
license_files.append((res["pkg_name"], res["license_files"]))
|
||||
doc_files.append((res["pkg_name"],res["doc_files"]))
|
||||
all_other_files.append((res["pkg_name"],res["all_other_files"]))
|
||||
total_processed += 1
|
||||
|
||||
# Estimated time remaining
|
||||
elapsed_time = time.time() - start_time
|
||||
time_per_file = elapsed_time / total_processed
|
||||
remaining_files = len(jobs) - total_processed
|
||||
remaining_time = time_per_file * remaining_files
|
||||
|
||||
percent_done = (total_processed / len(jobs)) * 100
|
||||
base_name = res["url"].split("/")[-1]
|
||||
print(f"~{remaining_time:.0f}s remaining ({total_processed}/{len(jobs)} ({percent_done:.2f}%))... {base_name} ")
|
||||
|
||||
# Write the results to 'all_licenses_<date>.json' and 'all_docs_<date>.json'
|
||||
date = time.strftime('%Y%m%d')
|
||||
license_file_path=f"all_licenses_{date}.json"
|
||||
doc_file_path=f"all_docs_{date}.json"
|
||||
all_other_file_path=f"all_other_files_{date}.json"
|
||||
write_to_file(license_files, license_file_path)
|
||||
write_to_file(doc_files, doc_file_path)
|
||||
write_to_file(all_other_files, all_other_file_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,116 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// Tool to validate test data for the licensecheck package unit tests.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/exe"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/jsonutils"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/internal/logger"
|
||||
"github.com/microsoft/azurelinux/toolkit/tools/pkg/licensecheck"
|
||||
|
||||
"gopkg.in/alecthomas/kingpin.v2"
|
||||
)
|
||||
|
||||
var (
|
||||
app = kingpin.New("licensetestchecker", "Checks test data for licenses.")
|
||||
licenses = app.Flag("licenses", "Path to the input file of license file paths to check for false negatives.").Required().ExistingFile()
|
||||
licensesOut = app.Flag("licenses-output", "Path to the output file to list all false negatives.").Required().String()
|
||||
docs = app.Flag("docs", "Path to the input file of doc file paths to check for false positives.").Required().ExistingFile()
|
||||
docsOut = app.Flag("docs-output", "Path to the output file to list all false positives.").Required().String()
|
||||
otherFiles = app.Flag("other-files", "Path to the input file of other file paths to check for false positives.").Required().ExistingFile()
|
||||
otherFilesOut = app.Flag("other-files-output", "Path to the output file to list all false positives.").Required().String()
|
||||
nameFile = app.Flag("name-file", "Path to the file containing the list of license names to check for.").Required().ExistingFile()
|
||||
exceptionFile = app.Flag("exception-file", "Path to the file containing the list of exceptions to the license check.").Required().ExistingFile()
|
||||
)
|
||||
|
||||
type testData struct {
|
||||
UniqueFiles int
|
||||
UniquePackages int
|
||||
TestDataEntries []testDataEntry
|
||||
}
|
||||
|
||||
type testDataEntry struct {
|
||||
Pkg string `json:"Pkg"`
|
||||
Path string `json:"Path"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
app.Version(exe.ToolkitVersion)
|
||||
kingpin.MustParse(app.Parse(os.Args[1:]))
|
||||
logger.InitStderrLog()
|
||||
|
||||
names, err := licensecheck.LoadLicenseNames(*nameFile)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("Failed to load license names: %v", err)
|
||||
}
|
||||
|
||||
exceptions, err := licensecheck.LoadLicenseExceptions(*exceptionFile)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("Failed to load license exceptions: %v", err)
|
||||
}
|
||||
|
||||
// Validate actual license files, checking for false negatives
|
||||
realLicenses := readTestData(*licenses)
|
||||
filesNotDetectedAsLicense := checkFalseNegatives(realLicenses, names, exceptions)
|
||||
writeTestData(filesNotDetectedAsLicense, *licensesOut)
|
||||
falseNegativeRatio := float64(len(filesNotDetectedAsLicense.TestDataEntries)) / float64(len(realLicenses.TestDataEntries))
|
||||
logger.Log.Infof("Wrote %d invalid entries to '%s' (%.2f%% false negative)", len(filesNotDetectedAsLicense.TestDataEntries), *licensesOut, falseNegativeRatio*100)
|
||||
|
||||
// Validate doc files, checking for false positives
|
||||
docs := readTestData(*docs)
|
||||
invalidDocs := checkFalsePositives(docs, names, exceptions)
|
||||
writeTestData(invalidDocs, *docsOut)
|
||||
falsePositiveRatio := float64(len(invalidDocs.TestDataEntries)) / float64(len(docs.TestDataEntries))
|
||||
logger.Log.Infof("Wrote %d invalid docs to '%s' (%.2f%% false positive)", len(invalidDocs.TestDataEntries), *docsOut, falsePositiveRatio*100)
|
||||
|
||||
// Validate other files, checking for false positives
|
||||
otherFiles := readTestData(*otherFiles)
|
||||
invalidOtherFiles := checkFalsePositives(otherFiles, names, exceptions)
|
||||
writeTestData(invalidOtherFiles, *otherFilesOut)
|
||||
falsePositiveRatio = float64(len(invalidOtherFiles.TestDataEntries)) / float64(len(otherFiles.TestDataEntries))
|
||||
logger.Log.Infof("Wrote %d invalid other files to '%s' (%.2f%% false positive)", len(invalidOtherFiles.TestDataEntries), *otherFilesOut, falsePositiveRatio*100)
|
||||
|
||||
}
|
||||
|
||||
func readTestData(filePath string) testData {
|
||||
var tests testData
|
||||
err := jsonutils.ReadJSONFile(filePath, &tests)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("failed to read input file: %v", err)
|
||||
}
|
||||
return tests
|
||||
}
|
||||
|
||||
func writeTestData(tests testData, filePath string) {
|
||||
err := jsonutils.WriteJSONFile(filePath, tests)
|
||||
if err != nil {
|
||||
logger.Log.Fatalf("failed to write output file: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func checkFalseNegatives(tests testData, names licensecheck.LicenseNames, exceptions licensecheck.LicenseExceptions) (falseNegatives testData) {
|
||||
for _, test := range tests.TestDataEntries {
|
||||
if !names.IsALicenseFile(test.Pkg, test.Path) || exceptions.ShouldIgnoreFile(test.Pkg, test.Path) {
|
||||
falseNegatives.TestDataEntries = append(falseNegatives.TestDataEntries, test)
|
||||
}
|
||||
}
|
||||
falseNegatives.UniqueFiles = len(falseNegatives.TestDataEntries)
|
||||
falseNegatives.UniquePackages = len(falseNegatives.TestDataEntries)
|
||||
return falseNegatives
|
||||
}
|
||||
|
||||
func checkFalsePositives(tests testData, names licensecheck.LicenseNames, exceptions licensecheck.LicenseExceptions) (falsePositives testData) {
|
||||
for _, test := range tests.TestDataEntries {
|
||||
if names.IsALicenseFile(test.Pkg, test.Path) && !exceptions.ShouldIgnoreFile(test.Pkg, test.Path) {
|
||||
falsePositives.TestDataEntries = append(falsePositives.TestDataEntries, test)
|
||||
}
|
||||
}
|
||||
falsePositives.UniqueFiles = len(falsePositives.TestDataEntries)
|
||||
falsePositives.UniquePackages = len(falsePositives.TestDataEntries)
|
||||
return falsePositives
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"PkgExceptions": [
|
||||
{
|
||||
"PackageName": "TestPackage1",
|
||||
"IgnoredFilesRegexList": [
|
||||
"/usr/share/doc/LICENSE",
|
||||
"/usr/share/doc/README.GPL",
|
||||
".*GLOB1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"PackageName": "TestPackage2",
|
||||
"IgnoredFilesRegexList": [
|
||||
"/usr/share/doc/LICENSE",
|
||||
"/usr/share/doc/README.GPL",
|
||||
".*GLOB2"
|
||||
]
|
||||
}
|
||||
],
|
||||
"GlobalExceptionsRegexList": [
|
||||
".*GLOB3"
|
||||
]
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"FuzzyLicenseNamesRegexList": [
|
||||
"(?i).*fuzzy.*"
|
||||
],
|
||||
"VerbatimLicenseNamesRegexList": [
|
||||
"^vErBaTiM$"
|
||||
],
|
||||
"SkipLicenseNamesRegexList": [
|
||||
"(?i).*skip.*"
|
||||
]
|
||||
}
|
Загрузка…
Ссылка в новой задаче