2022-01-10 21:10:21 +03:00
#!/usr/bin/python3
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import argparse
from collections import OrderedDict
import json
from os . path import isdir , isfile
from pathlib import Path
import sys
from spec_source_attributions import get_spec_source , KNOWN_SOURCE_ORIGINS
# Expected Schema:
# class LicenseCollection:
# header: str
# table_headers: List[str]
# licenses: List[License]
#
# class License:
# origin: str
# license: str
# specs: List[str]
def generate_markdown ( license_collection ) :
res = [ ]
res . append ( license_collection [ " header " ] )
res . append ( ' ' )
res . append ( ' | ' + ' | ' . join ( license_collection [ " table_headers " ] ) + ' | ' )
res . append ( ' | ' + ' | ' . join ( [ ' --- ' for _ in license_collection [ " table_headers " ] ] ) + ' | ' )
for origin , details in license_collection [ " licenses " ] . items ( ) :
details [ " specs " ] . sort ( key = str . lower )
res . append ( ' | {0} | {1} | {2} | ' . format ( origin , details [ " license " ] , ' <br> ' . join ( details [ " specs " ] ) ) )
return ' \n ' . join ( res ) + ' \n '
def sort_licenses ( license_collection ) :
license_collection [ " licenses " ] = OrderedDict ( sorted ( license_collection [ " licenses " ] . items ( ) , key = lambda item : str . lower ( item [ 0 ] ) ) )
for details in license_collection [ " licenses " ] . values ( ) :
# Remove duplicates with "set()" and return a sorted list.
details [ " specs " ] = sorted ( set ( details [ " specs " ] ) , key = str . lower )
def process_spec_file ( spec_path , license_collection , specs_in_files , specs_unknown_distro ) :
spec_name = spec_path . stem
distribution = get_spec_source ( spec_path )
if distribution is None :
specs_unknown_distro . add ( spec_name )
else :
specs_in_files [ distribution ] . add ( spec_name )
license_collection [ " licenses " ] [ distribution ] [ " specs " ] . append ( spec_name )
def retrieve_license_info ( file_paths , license_collection ) :
specs_in_json = { }
for origin , details in license_collection [ " licenses " ] . items ( ) :
specs_in_json [ origin ] = set ( details [ " specs " ] )
specs_in_files = { }
for origin in KNOWN_SOURCE_ORIGINS :
specs_in_files [ origin ] = set ( )
specs_unknown_distro = set ( )
updated_license_collection = license_collection
for file_path in file_paths :
if isdir ( file_path ) :
for spec_path in file_path . glob ( ' **/*.spec ' ) :
process_spec_file ( spec_path , updated_license_collection , specs_in_files , specs_unknown_distro )
else :
process_spec_file ( file_path , updated_license_collection , specs_in_files , specs_unknown_distro )
specs_not_in_json = { }
specs_not_in_files = { }
for origin , specs_in_files_for_origin in specs_in_files . items ( ) :
specs_not_in_json [ origin ] = specs_in_files_for_origin - specs_in_json [ origin ]
specs_not_in_files [ origin ] = specs_in_json [ origin ] - specs_in_files_for_origin
return specs_not_in_json , specs_not_in_files , specs_unknown_distro , updated_license_collection
def print_specs_error ( header_message , specs_list ) :
if len ( specs_list ) :
print ( header_message )
for s in sorted ( specs_list , key = str . lower ) :
print ( ' \t ' + s )
print ( )
def remove_missing_specs ( license_collection , specs_not_in_files ) :
for origin , specs_not_in_files_for_origin in specs_not_in_files . items ( ) :
origin_licenses_set = set ( license_collection [ " licenses " ] [ origin ] [ " specs " ] )
license_collection [ " licenses " ] [ origin ] [ " specs " ] = list ( origin_licenses_set - specs_not_in_files_for_origin )
def print_specs_error_by_origin ( header_message , specs_by_origin_list ) :
for origin , specs in specs_by_origin_list . items ( ) :
print_specs_error ( f " [Origin ' { origin } ] ' { header_message } " , specs )
def process_licenses ( json_filename , markdown_filename , file_paths , check , update , remove_missing ) :
with open ( json_filename , ' r ' ) as licenses_file :
license_collection = json . load ( licenses_file )
specs_not_in_json , specs_not_in_files , specs_unknown_distro , updated_license_collection = retrieve_license_info ( file_paths , license_collection )
if remove_missing :
remove_missing_specs ( updated_license_collection , specs_not_in_files )
2022-01-20 22:43:27 +03:00
2022-01-10 21:10:21 +03:00
sort_licenses ( updated_license_collection )
2022-01-20 22:43:27 +03:00
with open ( markdown_filename , ' r ' ) as output_file :
old_content = output_file . read ( )
new_content = generate_markdown ( updated_license_collection )
2022-01-10 21:10:21 +03:00
if update :
with ( open ( json_filename , " w " ) ) as licenses_file :
json . dump ( updated_license_collection , licenses_file , indent = 4 )
licenses_file . write ( " \n " )
with open ( markdown_filename , ' w ' ) as output_file :
output_file . write ( new_content )
if check :
missing_specs = False
for origin in specs_not_in_json . keys ( ) :
if len ( specs_not_in_json [ origin ] ) or len ( specs_not_in_files [ origin ] ) :
missing_specs = True
break
2022-01-20 22:43:27 +03:00
outdated_markdown = old_content != new_content
if missing_specs or len ( specs_unknown_distro ) or outdated_markdown :
2022-01-10 21:10:21 +03:00
print_specs_error_by_origin ( " Specs present in the spec files that are not present in the JSON file: " , specs_not_in_json )
print_specs_error_by_origin ( " Specs present in the JSON file that are not present in the spec files: " , specs_not_in_files )
print_specs_error ( " Specs from unknown distributions: " , specs_unknown_distro )
2022-01-20 22:43:27 +03:00
if outdated_markdown :
print ( f " License map ' { markdown_filename } ' is out of date. " )
2022-01-26 00:10:44 +03:00
print ( f """
Specs ' license information is out of date. Run the following command to regenerate:
{ __file__ } - - no_check - - update - - remove_missing \\
{ json_filename } \\
{ markdown_filename } \\
SPECS SPECS - EXTENDED SPECS - SIGNED
NOTE : the script requires installation of the ' python-rpm-spec ' module :
python3 - m pip install python - rpm - spec
""" )
2022-01-10 21:10:21 +03:00
sys . exit ( 1 )
def is_valid_path ( parser , file_path ) :
if isdir ( file_path ) or isfile ( file_path ) :
return Path ( file_path )
parser . error ( f " The path ' { file_path } ' must exist and be either a directory or a regular file! " )
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( description = ' Processes spec license data, find missing entries, and regenerate license map file. ' )
parser . add_argument ( ' json_filename ' , type = Path , help = ' Path to data file with license data. ' )
parser . add_argument ( ' markdown_filename ' , type = Path , help = ' Path to license map markdown file. ' )
parser . add_argument ( ' file_paths ' , type = lambda file_path : is_valid_path ( parser , file_path ) , nargs = ' + ' , help = ' Directories containing specs or spec files themselves. ' )
parser . add_argument ( ' --no_check ' , dest = " check " , help = ' Don \' t compare the spec information from the JSON file with the information from the spec files. ' , action = ' store_false ' )
parser . add_argument ( ' --remove_missing ' , help = ' Remove entries from the JSON file, which are not present in any of the spec files. ' , action = ' store_true ' )
parser . add_argument ( ' --update ' , help = ' Removes licenses not found in the provided directories and spec files from the JSON and markdown files. ' , action = ' store_true ' )
p = parser . parse_args ( )
if not p . check and not p . update :
print ( " WARNING: nothing to do. Must at least check or update the spec licenses. " )
exit ( 1 )
process_licenses ( p . json_filename , p . markdown_filename , p . file_paths , p . check , p . update , p . remove_missing )