This commit is contained in:
Taus 2024-04-03 14:30:29 +00:00
Родитель 599f573a4a
Коммит ef9f99b3be
3 изменённых файлов: 0 добавлений и 711 удалений

Просмотреть файл

@ -5,7 +5,6 @@ py_binary(
srcs = [
"make_zips.py",
"python_tracer.py",
"unparse.py",
],
data = [
"LICENSE-PSF.md",

Просмотреть файл

@ -6,7 +6,6 @@
| `tsg-python/tree-sitter-python` | Y | MIT | Used in `tsg-python` to parse Python files |
| `tsg-python` | Y | MIT / Apache | This is our own creation, so are free to choose what license it is covered by. |
| `tree-sitter-graph` | N | MIT / Apache | Used in `tsg-python` to execute files written in the `tree-sitter-graph` language. |
| `unparse.py` | Y | PSF | Copied and adapted from `Tools/unparse.py` from the `cpython` source code, with attribution. |
| `imp.py` | Y | PSF | Copied and adapted from `Lib/imp.py` from the `cpython` source code, with attribution. |
| `semmle/data/*.trap` | Y | PSF | These files were derived from the C source code of the `cpython` project, and are used in our modelling of built-in objects. No attribution, currently. |
| `semmle/thrift/parse.py` | Y | Apache | Includes a grammar based on https://github.com/apache/thrift/blob/master/doc/specs/idl.md, with comment stating this attribution. |

Просмотреть файл

@ -1,709 +0,0 @@
#Copied Tools.unparse.py with modifications. Copyright PSF.
"Usage: unparse.py <path to source file>"
import sys
import ast
import tokenize
import io
import os
import shutil
# Large float and imaginary literals get turned into infinities in the AST.
# We unparse those infinities to INFSTR.
INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1)
def interleave(inter, f, seq):
"""Call f on each item in seq, calling inter() in between.
"""
seq = iter(seq)
try:
f(next(seq))
except StopIteration:
pass
else:
for x in seq:
inter()
f(x)
class Unparser:
"""Methods in this class recursively traverse an AST and
output source code for the abstract syntax; original formatting
is disregarded. """
def __init__(self, tree, file = sys.stdout):
"""Unparser(tree, file=sys.stdout) -> None.
Print the source for tree to file."""
self.f = file
self._indent = 0
self.dispatch(tree)
print("", file=self.f)
self.f.flush()
def fill(self, text = ""):
"Indent a piece of text, according to the current indentation level"
self.f.write("\n"+" "*self._indent + text)
def write(self, text):
"Append a piece of text to the current line."
self.f.write(text)
def enter(self):
"Print ':', and increase the indentation."
self.write(":")
self._indent += 1
def leave(self):
"Decrease the indentation level."
self._indent -= 1
def dispatch(self, tree):
"Dispatcher function, dispatching tree type T to method _T."
if isinstance(tree, list):
for t in tree:
self.dispatch(t)
return
meth = getattr(self, "_"+tree.__class__.__name__)
meth(tree)
def remove_docstring(self, t):
if hasattr(t, "docstring"):
return
if not t.body:
return
if not isinstance(t.body[0], ast.Expr):
return
if not isinstance(t.body[0].value, ast.Str):
return
t.body = t.body[1:]
def add_pass(self, t):
if t.body:
#No pass needed
return
t.body = [ast.Pass()]
############### Unparsing methods ######################
# There should be one method per concrete grammar type #
# Constructors should be grouped by sum type. Ideally, #
# this would follow the order in the grammar, but #
# currently doesn't. #
########################################################
def _Module(self, tree):
self.remove_docstring(tree)
self.add_pass(tree)
for stmt in tree.body:
self.dispatch(stmt)
# stmt
def _Expr(self, tree):
self.fill()
self.dispatch(tree.value)
def _Import(self, t):
self.fill("import ")
interleave(lambda: self.write(", "), self.dispatch, t.names)
def _ImportFrom(self, t):
self.fill("from ")
self.write("." * t.level)
if t.module:
self.write(t.module)
self.write(" import ")
interleave(lambda: self.write(", "), self.dispatch, t.names)
def _Assign(self, t):
self.fill()
for target in t.targets:
self.dispatch(target)
self.write(" = ")
self.dispatch(t.value)
def _AugAssign(self, t):
self.fill()
self.dispatch(t.target)
self.write(" "+self.binop[t.op.__class__.__name__]+"= ")
self.dispatch(t.value)
def _AnnAssign(self, t):
self.fill()
if not t.simple and isinstance(t.target, ast.Name):
self.write('(')
self.dispatch(t.target)
if not t.simple and isinstance(t.target, ast.Name):
self.write(')')
self.write(": ")
self.dispatch(t.annotation)
if t.value:
self.write(" = ")
self.dispatch(t.value)
def _Return(self, t):
self.fill("return")
if t.value:
self.write(" ")
self.dispatch(t.value)
def _Pass(self, t):
self.fill("pass")
def _Break(self, t):
self.fill("break")
def _Continue(self, t):
self.fill("continue")
def _Delete(self, t):
self.fill("del ")
interleave(lambda: self.write(", "), self.dispatch, t.targets)
def _Assert(self, t):
self.fill("assert ")
self.dispatch(t.test)
if t.msg:
self.write(", ")
self.dispatch(t.msg)
def _Global(self, t):
self.fill("global ")
interleave(lambda: self.write(", "), self.write, t.names)
def _Nonlocal(self, t):
self.fill("nonlocal ")
interleave(lambda: self.write(", "), self.write, t.names)
def _Await(self, t):
self.write("(")
self.write("await")
if t.value:
self.write(" ")
self.dispatch(t.value)
self.write(")")
def _Yield(self, t):
self.write("(")
self.write("yield")
if t.value:
self.write(" ")
self.dispatch(t.value)
self.write(")")
def _YieldFrom(self, t):
self.write("(")
self.write("yield from")
if t.value:
self.write(" ")
self.dispatch(t.value)
self.write(")")
def _Raise(self, t):
self.fill("raise")
if not t.exc:
assert not t.cause
return
self.write(" ")
self.dispatch(t.exc)
if t.cause:
self.write(" from ")
self.dispatch(t.cause)
def _Try(self, t):
self.fill("try")
self.enter()
self.dispatch(t.body)
self.leave()
for ex in t.handlers:
self.dispatch(ex)
if t.orelse:
self.fill("else")
self.enter()
self.dispatch(t.orelse)
self.leave()
if t.finalbody:
self.fill("finally")
self.enter()
self.dispatch(t.finalbody)
self.leave()
def _ExceptHandler(self, t):
self.fill("except")
if t.type:
self.write(" ")
self.dispatch(t.type)
if t.name:
self.write(" as ")
self.write(t.name)
self.enter()
self.dispatch(t.body)
self.leave()
def _ClassDef(self, t):
self.write("\n")
for deco in t.decorator_list:
self.fill("@")
self.dispatch(deco)
self.fill("class "+t.name)
self.write("(")
comma = False
for e in t.bases:
if comma: self.write(", ")
else: comma = True
self.dispatch(e)
for e in t.keywords:
if comma: self.write(", ")
else: comma = True
self.dispatch(e)
self.write(")")
self.enter()
self.remove_docstring(t)
self.add_pass(t)
self.dispatch(t.body)
self.leave()
def _FunctionDef(self, t):
self.__FunctionDef_helper(t, "def")
def _AsyncFunctionDef(self, t):
self.__FunctionDef_helper(t, "async def")
def __FunctionDef_helper(self, t, fill_suffix):
self.write("\n")
for deco in t.decorator_list:
self.fill("@")
self.dispatch(deco)
def_str = fill_suffix+" "+t.name + "("
self.fill(def_str)
self.dispatch(t.args)
self.write(")")
if t.returns:
self.write(" -> ")
self.dispatch(t.returns)
self.enter()
self.remove_docstring(t)
self.add_pass(t)
self.dispatch(t.body)
self.leave()
def _For(self, t):
self.__For_helper("for ", t)
def _AsyncFor(self, t):
self.__For_helper("async for ", t)
def __For_helper(self, fill, t):
self.fill(fill)
self.dispatch(t.target)
self.write(" in ")
self.dispatch(t.iter)
self.enter()
self.dispatch(t.body)
self.leave()
if t.orelse:
self.fill("else")
self.enter()
self.dispatch(t.orelse)
self.leave()
def _If(self, t):
self.fill("if ")
self.dispatch(t.test)
self.enter()
self.dispatch(t.body)
self.leave()
# collapse nested ifs into equivalent elifs.
while (t.orelse and len(t.orelse) == 1 and
isinstance(t.orelse[0], ast.If)):
t = t.orelse[0]
self.fill("elif ")
self.dispatch(t.test)
self.enter()
self.dispatch(t.body)
self.leave()
# final else
if t.orelse:
self.fill("else")
self.enter()
self.dispatch(t.orelse)
self.leave()
def _While(self, t):
self.fill("while ")
self.dispatch(t.test)
self.enter()
self.dispatch(t.body)
self.leave()
if t.orelse:
self.fill("else")
self.enter()
self.dispatch(t.orelse)
self.leave()
def _With(self, t):
self.fill("with ")
interleave(lambda: self.write(", "), self.dispatch, t.items)
self.enter()
self.dispatch(t.body)
self.leave()
def _AsyncWith(self, t):
self.fill("async with ")
interleave(lambda: self.write(", "), self.dispatch, t.items)
self.enter()
self.dispatch(t.body)
self.leave()
# expr
def _Bytes(self, t):
self.write(repr(t.s))
def _Str(self, tree):
s = repr(tree.s).encode("ascii", errors="backslashreplace").decode("ascii")
self.write(s)
def _JoinedStr(self, t):
self.write("f")
string = io.StringIO()
self._fstring_JoinedStr(t, string.write)
self.write(repr(string.getvalue()))
def _FormattedValue(self, t):
self.write("f")
string = io.StringIO()
self._fstring_FormattedValue(t, string.write)
self.write(repr(string.getvalue()))
def _fstring_JoinedStr(self, t, write):
for value in t.values:
meth = getattr(self, "_fstring_" + type(value).__name__)
meth(value, write)
def _fstring_Str(self, t, write):
value = t.s.replace("{", "{{").replace("}", "}}")
write(value)
def _fstring_Constant(self, t, write):
assert isinstance(t.value, str)
value = t.value.replace("{", "{{").replace("}", "}}")
write(value)
def _fstring_FormattedValue(self, t, write):
write("{")
expr = io.StringIO()
Unparser(t.value, expr)
expr = expr.getvalue().rstrip("\n")
if expr.startswith("{"):
write(" ") # Separate pair of opening brackets as "{ {"
write(expr)
if t.conversion != -1:
conversion = chr(t.conversion)
assert conversion in "sra"
write("!%s" % conversion)
if t.format_spec:
write(":")
meth = getattr(self, "_fstring_" + type(t.format_spec).__name__)
meth(t.format_spec, write)
write("}")
def _Name(self, t):
self.write(t.id)
def _write_constant(self, value):
if isinstance(value, (float, complex)):
self.write(repr(value).replace("inf", INFSTR))
else:
self.write(repr(value))
def _Constant(self, t):
value = t.value
if isinstance(value, tuple):
self.write("(")
if len(value) == 1:
self._write_constant(value[0])
self.write(",")
else:
interleave(lambda: self.write(", "), self._write_constant, value)
self.write(")")
else:
self._write_constant(t.value)
def _NameConstant(self, t):
self.write(repr(t.value))
def _Num(self, t):
# Substitute overflowing decimal literal for AST infinities.
self.write(repr(t.n).replace("inf", INFSTR))
def _List(self, t):
self.write("[")
interleave(lambda: self.write(", "), self.dispatch, t.elts)
self.write("]")
def _ListComp(self, t):
self.write("[")
self.dispatch(t.elt)
for gen in t.generators:
self.dispatch(gen)
self.write("]")
def _GeneratorExp(self, t):
self.write("(")
self.dispatch(t.elt)
for gen in t.generators:
self.dispatch(gen)
self.write(")")
def _SetComp(self, t):
self.write("{")
self.dispatch(t.elt)
for gen in t.generators:
self.dispatch(gen)
self.write("}")
def _DictComp(self, t):
self.write("{")
self.dispatch(t.key)
self.write(": ")
self.dispatch(t.value)
for gen in t.generators:
self.dispatch(gen)
self.write("}")
def _comprehension(self, t):
if hasattr(t, "is_async") and t.is_async:
self.write(" async for ")
else:
self.write(" for ")
self.dispatch(t.target)
self.write(" in ")
self.dispatch(t.iter)
for if_clause in t.ifs:
self.write(" if ")
self.dispatch(if_clause)
def _IfExp(self, t):
self.write("(")
self.dispatch(t.body)
self.write(" if ")
self.dispatch(t.test)
self.write(" else ")
self.dispatch(t.orelse)
self.write(")")
def _Set(self, t):
assert(t.elts) # should be at least one element
self.write("{")
interleave(lambda: self.write(", "), self.dispatch, t.elts)
self.write("}")
def _Dict(self, t):
self.write("{")
def write_key_value_pair(k, v):
self.dispatch(k)
self.write(": ")
self.dispatch(v)
def write_item(item):
k, v = item
if k is None:
# for dictionary unpacking operator in dicts {**{'y': 2}}
# see PEP 448 for details
self.write("**")
self.dispatch(v)
else:
write_key_value_pair(k, v)
interleave(lambda: self.write(", "), write_item, zip(t.keys, t.values))
self.write("}")
def _Tuple(self, t):
self.write("(")
if len(t.elts) == 1:
elt = t.elts[0]
self.dispatch(elt)
self.write(",")
else:
interleave(lambda: self.write(", "), self.dispatch, t.elts)
self.write(")")
unop = {"Invert":"~", "Not": "not", "UAdd":"+", "USub":"-"}
def _UnaryOp(self, t):
self.write("(")
self.write(self.unop[t.op.__class__.__name__])
self.write(" ")
self.dispatch(t.operand)
self.write(")")
binop = { "Add":"+", "Sub":"-", "Mult":"*", "MatMult":"@", "Div":"/", "Mod":"%",
"LShift":"<<", "RShift":">>", "BitOr":"|", "BitXor":"^", "BitAnd":"&",
"FloorDiv":"//", "Pow": "**"}
def _BinOp(self, t):
self.write("(")
self.dispatch(t.left)
self.write(" " + self.binop[t.op.__class__.__name__] + " ")
self.dispatch(t.right)
self.write(")")
cmpops = {"Eq":"==", "NotEq":"!=", "Lt":"<", "LtE":"<=", "Gt":">", "GtE":">=",
"Is":"is", "IsNot":"is not", "In":"in", "NotIn":"not in"}
def _Compare(self, t):
self.write("(")
self.dispatch(t.left)
for o, e in zip(t.ops, t.comparators):
self.write(" " + self.cmpops[o.__class__.__name__] + " ")
self.dispatch(e)
self.write(")")
boolops = {ast.And: 'and', ast.Or: 'or'}
def _BoolOp(self, t):
self.write("(")
s = " %s " % self.boolops[t.op.__class__]
interleave(lambda: self.write(s), self.dispatch, t.values)
self.write(")")
def _Attribute(self,t):
self.dispatch(t.value)
# Special case: 3.__abs__() is a syntax error, so if t.value
# is an integer literal then we need to either parenthesize
# it or add an extra space to get 3 .__abs__().
if isinstance(t.value, ast.Num) and isinstance(t.value.n, int):
self.write(" ")
self.write(".")
self.write(t.attr)
def _Call(self, t):
self.dispatch(t.func)
self.write("(")
comma = False
for e in t.args:
if comma: self.write(", ")
else: comma = True
self.dispatch(e)
for e in t.keywords:
if comma: self.write(", ")
else: comma = True
self.dispatch(e)
self.write(")")
def _Subscript(self, t):
self.dispatch(t.value)
self.write("[")
self.dispatch(t.slice)
self.write("]")
def _Starred(self, t):
self.write("*")
self.dispatch(t.value)
# slice
def _Ellipsis(self, t):
self.write("...")
def _Index(self, t):
self.dispatch(t.value)
def _Slice(self, t):
if t.lower:
self.dispatch(t.lower)
self.write(":")
if t.upper:
self.dispatch(t.upper)
if t.step:
self.write(":")
self.dispatch(t.step)
def _ExtSlice(self, t):
interleave(lambda: self.write(', '), self.dispatch, t.dims)
# argument
def _arg(self, t):
self.write(t.arg)
if t.annotation:
self.write(": ")
self.dispatch(t.annotation)
# others
def _arguments(self, t):
first = True
# normal arguments
defaults = [None] * (len(t.args) - len(t.defaults)) + t.defaults
for a, d in zip(t.args, defaults):
if first:first = False
else: self.write(", ")
self.dispatch(a)
if d:
self.write("=")
self.dispatch(d)
# varargs, or bare '*' if no varargs but keyword-only arguments present
if t.vararg or t.kwonlyargs:
if first:first = False
else: self.write(", ")
self.write("*")
if t.vararg:
self.write(t.vararg.arg)
if t.vararg.annotation:
self.write(": ")
self.dispatch(t.vararg.annotation)
# keyword-only arguments
if t.kwonlyargs:
for a, d in zip(t.kwonlyargs, t.kw_defaults):
if first:first = False
else: self.write(", ")
self.dispatch(a),
if d:
self.write("=")
self.dispatch(d)
# kwargs
if t.kwarg:
if first:first = False
else: self.write(", ")
self.write("**"+t.kwarg.arg)
if t.kwarg.annotation:
self.write(": ")
self.dispatch(t.kwarg.annotation)
def _keyword(self, t):
if t.arg is None:
self.write("**")
else:
self.write(t.arg)
self.write("=")
self.dispatch(t.value)
def _Lambda(self, t):
self.write("(")
self.write("lambda ")
self.dispatch(t.args)
self.write(": ")
self.dispatch(t.body)
self.write(")")
def _alias(self, t):
self.write(t.name)
if t.asname:
self.write(" as "+t.asname)
def _withitem(self, t):
self.dispatch(t.context_expr)
if t.optional_vars:
self.write(" as ")
self.dispatch(t.optional_vars)
def roundtrip(filename, outpath):
with open(filename, "rb") as pyfile:
encoding = tokenize.detect_encoding(pyfile.readline)[0]
with open(filename, "r", encoding=encoding) as pyfile:
source = pyfile.read()
tree = compile(source, filename, "exec", ast.PyCF_ONLY_AST)
with open(outpath, "w", encoding=encoding) as output:
Unparser(tree, output)
def strip_comments_and_docstrings(path):
tmp = path + ".tmp"
if path.endswith(".py"):
roundtrip(path, tmp)
else:
shutil.copy(path, tmp)
return tmp