зеркало из https://github.com/microsoft/DeepSpeed.git
Fix copyright check, add copyright replace script (#3141)
* fix copyright script and add replace-copyright script
This commit is contained in:
Родитель
c48c97991b
Коммит
1f85569e1c
|
@ -17,24 +17,26 @@ import sys
|
||||||
def err(s: str) -> None:
|
def err(s: str) -> None:
|
||||||
print(s, file=sys.stderr)
|
print(s, file=sys.stderr)
|
||||||
|
|
||||||
COPYRIGHT = \
|
|
||||||
r"""# Copyright (c) Microsoft Corporation.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
# DeepSpeed Team
|
COPYRIGHT = [
|
||||||
"""
|
r"^\(\/\/\|#\) Copyright (c) Microsoft Corporation.$", r"^\(\/\/\|#\) SPDX-License-Identifier: Apache-2.0$",
|
||||||
|
r"^\(\/\/\|#\) DeepSpeed Team$"
|
||||||
|
]
|
||||||
|
|
||||||
success = True
|
success = True
|
||||||
failures = []
|
failures = []
|
||||||
for f in sys.argv[1:]:
|
for f in sys.argv[1:]:
|
||||||
res = subprocess.run(["git", "grep", "--quiet", "-e", COPYRIGHT, f], capture_output=True)
|
for copyright_line in COPYRIGHT:
|
||||||
if res.returncode == 1:
|
if not success:
|
||||||
success = False
|
break
|
||||||
failures.append(f)
|
res = subprocess.run(["git", "grep", "--quiet", "-e", copyright_line, f], capture_output=True)
|
||||||
elif res.returncode == 2:
|
if res.returncode == 1:
|
||||||
err(f"Error invoking grep on {', '.join(sys.argv[1:])}:")
|
success = False
|
||||||
err(res.stderr.decode("utf-8"))
|
failures.append(f)
|
||||||
sys.exit(2)
|
elif res.returncode == 2:
|
||||||
|
err(f"Error invoking grep on {', '.join(sys.argv[1:])}:")
|
||||||
|
err(res.stderr.decode("utf-8"))
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
err(f'{failures}: Missing license at top of file')
|
err(f'{failures}: Missing license at top of file')
|
||||||
|
|
|
@ -0,0 +1,235 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Microsoft Corporation.
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# DeepSpeed Team
|
||||||
|
"""
|
||||||
|
USAGE:
|
||||||
|
$ python3 script/replace_copyright.py --repo_dir ./
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
NEW_COPYRIGHT = ("Copyright (c) Microsoft Corporation.", "SPDX-License-Identifier: Apache-2.0", "", "DeepSpeed Team")
|
||||||
|
|
||||||
|
PY_SL_COMMENT = "#"
|
||||||
|
PY_ML_SINGLE = "'''"
|
||||||
|
PY_ML_DOUBLE = '"""'
|
||||||
|
PY_COMMENTS = (PY_SL_COMMENT, PY_ML_SINGLE, PY_ML_DOUBLE)
|
||||||
|
|
||||||
|
C_SL_COMMENT = "//"
|
||||||
|
C_ML_OPEN = "/*"
|
||||||
|
C_ML_CLOSE = "*/"
|
||||||
|
C_COMMENTS = (C_SL_COMMENT, C_ML_OPEN, C_ML_CLOSE)
|
||||||
|
|
||||||
|
BASH_SL_COMMENT = "#"
|
||||||
|
BASH_COMMENTS = (BASH_SL_COMMENT, )
|
||||||
|
|
||||||
|
DELIM = "|/-\|/-\|BARRIER|/-\|/-\|" # noqa: W605
|
||||||
|
|
||||||
|
|
||||||
|
def parser_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--repo_dir", type=str, help="Repository directory")
|
||||||
|
parser.add_argument("--python_style_ext",
|
||||||
|
type=str,
|
||||||
|
nargs="+",
|
||||||
|
default=[".py"],
|
||||||
|
help="File types to process with python-style comments")
|
||||||
|
parser.add_argument("--bash_style_ext",
|
||||||
|
type=str,
|
||||||
|
nargs="+",
|
||||||
|
default=[".sh"],
|
||||||
|
help="File types to process with bash-style comments")
|
||||||
|
parser.add_argument("--c_style_ext",
|
||||||
|
type=str,
|
||||||
|
nargs="+",
|
||||||
|
default=[
|
||||||
|
".c",
|
||||||
|
".cpp",
|
||||||
|
".cu",
|
||||||
|
".h",
|
||||||
|
".hpp",
|
||||||
|
".cuh",
|
||||||
|
".cc",
|
||||||
|
".hip",
|
||||||
|
".tr",
|
||||||
|
],
|
||||||
|
help="File types to process with C-style comments")
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
# These get_header_* functions are ugly, but they work :)
|
||||||
|
def get_header_py(fp):
|
||||||
|
with open(fp, "r") as f:
|
||||||
|
lines = iter(l for l in f.readlines())
|
||||||
|
|
||||||
|
header = []
|
||||||
|
rest = []
|
||||||
|
in_multiline = False
|
||||||
|
multiline_type = None
|
||||||
|
|
||||||
|
while (l := next(lines, None)) is not None:
|
||||||
|
l = l.strip()
|
||||||
|
if l.startswith(PY_ML_SINGLE) or l.startswith(PY_ML_DOUBLE):
|
||||||
|
# Detected multiline comment
|
||||||
|
if in_multiline and multiline_type == l[:3]:
|
||||||
|
# Ended a multiline comment
|
||||||
|
in_multiline = False
|
||||||
|
else:
|
||||||
|
# Started a multiline comment
|
||||||
|
in_multiline = True
|
||||||
|
multiline_type = l[:3]
|
||||||
|
if l.endswith(multiline_type) and len(l) >= 6:
|
||||||
|
# Opened and closed multiline comment on single line
|
||||||
|
in_multiline = False
|
||||||
|
elif in_multiline and l.endswith(multiline_type):
|
||||||
|
# Ended a multiline comment
|
||||||
|
in_multiline = False
|
||||||
|
elif not (in_multiline or l.startswith(PY_SL_COMMENT) or l == ""):
|
||||||
|
# Not in a comment
|
||||||
|
rest += [l + "\n"]
|
||||||
|
break
|
||||||
|
header.append(l)
|
||||||
|
|
||||||
|
rest += list(lines)
|
||||||
|
|
||||||
|
return header, rest
|
||||||
|
|
||||||
|
|
||||||
|
def get_header_c(fp):
|
||||||
|
with open(fp, "r") as f:
|
||||||
|
lines = iter(l for l in f.readlines())
|
||||||
|
|
||||||
|
header = []
|
||||||
|
rest = []
|
||||||
|
in_multiline = False
|
||||||
|
|
||||||
|
while (l := next(lines, None)) is not None:
|
||||||
|
l = l.strip()
|
||||||
|
if l.startswith(C_ML_OPEN):
|
||||||
|
# Detected multiline comment
|
||||||
|
if not l.endswith(C_ML_CLOSE):
|
||||||
|
# multiline comment not closed on same line
|
||||||
|
in_multiline = True
|
||||||
|
elif l.endswith(C_ML_CLOSE):
|
||||||
|
# Ended a multline comment
|
||||||
|
in_multiline = False
|
||||||
|
elif not in_multiline or l.startswith(C_SL_COMMENT) or l.isspace():
|
||||||
|
# Not in a comment
|
||||||
|
rest += [l + "\n"]
|
||||||
|
break
|
||||||
|
header.append(l)
|
||||||
|
|
||||||
|
rest += list(lines)
|
||||||
|
|
||||||
|
return header, rest
|
||||||
|
|
||||||
|
|
||||||
|
def get_header_bash(fp):
|
||||||
|
with open(fp, "r") as f:
|
||||||
|
lines = iter(l for l in f.readlines())
|
||||||
|
|
||||||
|
header = []
|
||||||
|
rest = []
|
||||||
|
|
||||||
|
while (l := next(lines, None)) is not None:
|
||||||
|
l = l.strip()
|
||||||
|
if not l.startswith(BASH_SL_COMMENT) or l.isspace():
|
||||||
|
# Not in a comment
|
||||||
|
rest += [l + "\n"]
|
||||||
|
break
|
||||||
|
header.append(l)
|
||||||
|
|
||||||
|
rest += list(lines)
|
||||||
|
|
||||||
|
return header, rest
|
||||||
|
|
||||||
|
|
||||||
|
def remove_comments(line, comment_strs):
|
||||||
|
for cstr in comment_strs:
|
||||||
|
line = line.replace(cstr, "")
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def format_multiline_comment(text, comment_type):
|
||||||
|
if comment_type == PY_COMMENTS:
|
||||||
|
text = f"\n{comment_type[2]}\n" + "\n".join(text) + f"{comment_type[2]}"
|
||||||
|
if comment_type == C_COMMENTS:
|
||||||
|
text = f"\n{comment_type[1]}\n" + "\n".join(text) + f"{comment_type[2]}"
|
||||||
|
if comment_type == BASH_COMMENTS:
|
||||||
|
text = "\n".join([f"{comment_type[0]}{l}" for l in text])
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def modify_file_header(fp, file_header, rest_of_file, preserve_text_store, comment_type):
|
||||||
|
header_text = "\n".join(file_header)
|
||||||
|
if not (header_text.strip() == "" or header_text in preserve_text_store):
|
||||||
|
# Unique header, need to get user input
|
||||||
|
print("\n", DELIM, "\n")
|
||||||
|
for idx, line in enumerate(file_header):
|
||||||
|
print(f"{idx}: {line}")
|
||||||
|
print("\n", DELIM, "\n")
|
||||||
|
print("\nIndicate the FIRST line of the Header to KEEP")
|
||||||
|
print("(shebang #! lines will be automatically processed and should not be included).")
|
||||||
|
keep_idx = input("Enter number (or leave blank if no lines should be preserved): ")
|
||||||
|
preserve_text_store[header_text] = file_header[int(keep_idx):] if keep_idx != "" else ""
|
||||||
|
|
||||||
|
# Identify any shebang lines in the file
|
||||||
|
shebang = "\n".join([l for l in file_header if l.startswith("#!")])
|
||||||
|
if shebang != "":
|
||||||
|
shebang += "\n"
|
||||||
|
|
||||||
|
# Get the text we should preserve in this file and process to remove comment characters
|
||||||
|
text_to_preserve = preserve_text_store.get(header_text, [""])
|
||||||
|
text_to_preserve = [remove_comments(l, comment_type) for l in text_to_preserve]
|
||||||
|
|
||||||
|
# Format the text we want to keep into a new multiline comment
|
||||||
|
if "".join(text_to_preserve) == "":
|
||||||
|
text_to_preserve = ""
|
||||||
|
else:
|
||||||
|
text_to_preserve = format_multiline_comment(text_to_preserve, comment_type)
|
||||||
|
|
||||||
|
# Generate the copyright text we will be adding
|
||||||
|
copyright_text = "\n".join([f"{comment_type[0]} {l}" if l != "" else l for l in NEW_COPYRIGHT])
|
||||||
|
|
||||||
|
# Assemble the new header
|
||||||
|
new_header = shebang + copyright_text + text_to_preserve
|
||||||
|
|
||||||
|
# Write out the new file
|
||||||
|
new_file_contents = new_header + "\n" + "".join(rest_of_file)
|
||||||
|
with open(fp, "w") as f:
|
||||||
|
f.write(new_file_contents)
|
||||||
|
|
||||||
|
return preserve_text_store # Return so we can reuse for future files
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
preserve_text_store = {} # Used to track header comments we should preserve
|
||||||
|
for root, dirs, fnames in os.walk(args.repo_dir):
|
||||||
|
# Walk across directory looking for all files with extensions we want to modify
|
||||||
|
for ext in args.python_style_ext:
|
||||||
|
fpaths = [os.path.join(root, fn) for fn in fnames if fn.endswith(ext)]
|
||||||
|
for fp in fpaths:
|
||||||
|
file_header, rest_of_file = get_header_py(fp)
|
||||||
|
preserve_text_store = modify_file_header(fp, file_header, rest_of_file, preserve_text_store,
|
||||||
|
PY_COMMENTS)
|
||||||
|
for ext in args.c_style_ext:
|
||||||
|
fpaths = [os.path.join(root, fn) for fn in fnames if fn.endswith(ext)]
|
||||||
|
for fp in fpaths:
|
||||||
|
file_header, rest_of_file = get_header_c(fp)
|
||||||
|
preserve_text_store = modify_file_header(fp, file_header, rest_of_file, preserve_text_store,
|
||||||
|
C_COMMENTS)
|
||||||
|
for ext in args.bash_style_ext:
|
||||||
|
fpaths = [os.path.join(root, fn) for fn in fnames if fn.endswith(ext)]
|
||||||
|
for fp in fpaths:
|
||||||
|
file_header, rest_of_file = get_header_bash(fp)
|
||||||
|
preserve_text_store = modify_file_header(fp, file_header, rest_of_file, preserve_text_store,
|
||||||
|
BASH_COMMENTS)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parser_args()
|
||||||
|
main(args)
|
Загрузка…
Ссылка в новой задаче