#!/usr/bin/env python # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os import re import string import slugify # order is important _licenses = { 'MIT': [ 'Permission is hereby granted free of charge', 'The above copyright notice and this permission notice shall', ], 'BSD-3': [ 'Redistributions of source code must retain the above copyright', 'Redistributions in binary form must reproduce the above copyright', 'specific prior written permission', ], 'BSD-2': [ 'Redistributions of source code must retain the above copyright', 'Redistributions in binary form must reproduce the above copyright', ], 'AL': ['http://www.apache.org/licenses/LICENSE-2.0'], } def get_notices(): license_file = open("../LICENSE") regex = r"\((.+?)\) (.+?) \((http.+?)\)" return list(filter(None, [re.findall(regex, line) for line in license_file])) def parse_license_file(project_name): name = re.match(r"^[a-z0-9\-]+", project_name.lower()) name = slugify.slugify(name.group(0)) path = f"../licenses/LICENSE-{name}.txt" if os.path.exists(path): data = " ".join(line.strip() for line in open(path)).lower() data = data.translate(None, string.punctuation) for k in _licenses: matches = 0 for v in _licenses[k]: if v.lower() in data: matches += 1 if matches == len(_licenses[k]): return k return False if __name__ == "__main__": print("{:<30}|{:<50}||{:<20}||{:<10}".format("PROJECT", "URL", "LICENSE TYPE DEFINED", "DETECTED")) notices = get_notices() for notice in notices: notice = notice[0] license = parse_license_file(notice[1]) print("{:<30}|{:<50}||{:<20}||{:<10}".format(notice[1], notice[2][:50], notice[0], license)) file_count = len([name for name in os.listdir("../licenses")]) print("Defined licenses: {} Files found: {}".format(len(notices), file_count))