#!/usr/bin/env python # # Copyright 2008, Google Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """pump v0.2.0 - Pretty Useful for Meta Programming. A tool for preprocessor meta programming. Useful for generating repetitive boilerplate code. Especially useful for writing C++ classes, functions, macros, and templates that need to work with various number of arguments. USAGE: pump.py SOURCE_FILE EXAMPLES: pump.py foo.cc.pump Converts foo.cc.pump to foo.cc. GRAMMAR: CODE ::= ATOMIC_CODE* ATOMIC_CODE ::= $var ID = EXPRESSION | $var ID = [[ CODE ]] | $range ID EXPRESSION..EXPRESSION | $for ID SEPARATOR [[ CODE ]] | $($) | $ID | $(EXPRESSION) | $if EXPRESSION [[ CODE ]] ELSE_BRANCH | [[ CODE ]] | RAW_CODE SEPARATOR ::= RAW_CODE | EMPTY ELSE_BRANCH ::= $else [[ CODE ]] | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH | EMPTY EXPRESSION has Python syntax. """ __author__ = 'wan@google.com (Zhanyong Wan)' import os import re import sys TOKEN_TABLE = [ (re.compile(r'\$var\s+'), '$var'), (re.compile(r'\$elif\s+'), '$elif'), (re.compile(r'\$else\s+'), '$else'), (re.compile(r'\$for\s+'), '$for'), (re.compile(r'\$if\s+'), '$if'), (re.compile(r'\$range\s+'), '$range'), (re.compile(r'\$[_A-Za-z]\w*'), '$id'), (re.compile(r'\$\(\$\)'), '$($)'), (re.compile(r'\$'), '$'), (re.compile(r'\[\[\n?'), '[['), (re.compile(r'\]\]\n?'), ']]'), ] class Cursor: """Represents a position (line and column) in a text file.""" def __init__(self, line=-1, column=-1): self.line = line self.column = column def __eq__(self, rhs): return self.line == rhs.line and self.column == rhs.column def __ne__(self, rhs): return not self == rhs def __lt__(self, rhs): return self.line < rhs.line or ( self.line == rhs.line and self.column < rhs.column) def __le__(self, rhs): return self < rhs or self == rhs def __gt__(self, rhs): return rhs < self def __ge__(self, rhs): return rhs <= self def __str__(self): if self == Eof(): return 'EOF' else: return '%s(%s)' % (self.line + 1, self.column) def __add__(self, offset): return Cursor(self.line, self.column + offset) def __sub__(self, offset): return Cursor(self.line, self.column - offset) def Clone(self): """Returns a copy of self.""" return Cursor(self.line, self.column) # Special cursor to indicate the end-of-file. def Eof(): """Returns the special cursor to denote the end-of-file.""" return Cursor(-1, -1) class Token: """Represents a token in a Pump source file.""" def __init__(self, start=None, end=None, value=None, token_type=None): if start is None: self.start = Eof() else: self.start = start if end is None: self.end = Eof() else: self.end = end self.value = value self.token_type = token_type def __str__(self): return 'Token @%s: \'%s\' type=%s' % ( self.start, self.value, self.token_type) def Clone(self): """Returns a copy of self.""" return Token(self.start.Clone(), self.end.Clone(), self.value, self.token_type) def StartsWith(lines, pos, string): """Returns True iff the given position in lines starts with 'string'.""" return lines[pos.line][pos.column:].startswith(string) def FindFirstInLine(line, token_table): best_match_start = -1 for (regex, token_type) in token_table: m = regex.search(line) if m: # We found regex in lines if best_match_start < 0 or m.start() < best_match_start: best_match_start = m.start() best_match_length = m.end() - m.start() best_match_token_type = token_type if best_match_start < 0: return None return (best_match_start, best_match_length, best_match_token_type) def FindFirst(lines, token_table, cursor): """Finds the first occurrence of any string in strings in lines.""" start = cursor.Clone() cur_line_number = cursor.line for line in lines[start.line:]: if cur_line_number == start.line: line = line[start.column:] m = FindFirstInLine(line, token_table) if m: # We found a regex in line. (start_column, length, token_type) = m if cur_line_number == start.line: start_column += start.column found_start = Cursor(cur_line_number, start_column) found_end = found_start + length return MakeToken(lines, found_start, found_end, token_type) cur_line_number += 1 # We failed to find str in lines return None def SubString(lines, start, end): """Returns a substring in lines.""" if end == Eof(): end = Cursor(len(lines) - 1, len(lines[-1])) if start >= end: return '' if start.line == end.line: return lines[start.line][start.column:end.column] result_lines = ([lines[start.line][start.column:]] + lines[start.line + 1:end.line] + [lines[end.line][:end.column]]) return ''.join(result_lines) def StripMetaComments(_str): """Strip meta comments from each line in the given string.""" # First, completely remove lines containing nothing but a meta # comment, including the trailing \n. _str = re.sub(r'^\s*\$\$.*\n', '', _str) # Then, remove meta comments from contentful lines. return re.sub(r'\s*\$\$.*', '', _str) def MakeToken(lines, start, end, token_type): """Creates a new instance of Token.""" return Token(start, end, SubString(lines, start, end), token_type) def ParseToken(lines, pos, regex, token_type): line = lines[pos.line][pos.column:] m = regex.search(line) if m and not m.start(): return MakeToken(lines, pos, pos + m.end(), token_type) else: print 'ERROR: %s expected at %s.' % (token_type, pos) sys.exit(1) ID_REGEX = re.compile(r'[_A-Za-z]\w*') EQ_REGEX = re.compile(r'=') REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)') OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*') WHITE_SPACE_REGEX = re.compile(r'\s') DOT_DOT_REGEX = re.compile(r'\.\.') def Skip(lines, pos, regex): line = lines[pos.line][pos.column:] m = re.search(regex, line) if m and not m.start(): return pos + m.end() else: return pos def SkipUntil(lines, pos, regex, token_type): line = lines[pos.line][pos.column:] m = re.search(regex, line) if m: return pos + m.start() else: print ('ERROR: %s expected on line %s after column %s.' % (token_type, pos.line + 1, pos.column)) sys.exit(1) def ParseExpTokenInParens(lines, pos): def ParseInParens(pos): pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX) pos = Skip(lines, pos, r'\(') pos = Parse(pos) pos = Skip(lines, pos, r'\)') return pos def Parse(pos): pos = SkipUntil(lines, pos, r'\(|\)', ')') if SubString(lines, pos, pos + 1) == '(': pos = Parse(pos + 1) pos = Skip(lines, pos, r'\)') return Parse(pos) else: return pos start = pos.Clone() pos = ParseInParens(pos) return MakeToken(lines, start, pos, 'exp') def RStripNewLineFromToken(token): if token.value.endswith('\n'): return Token(token.start, token.end, token.value[:-1], token.token_type) else: return token def TokenizeLines(lines, pos): while True: found = FindFirst(lines, TOKEN_TABLE, pos) if not found: yield MakeToken(lines, pos, Eof(), 'code') return if found.start == pos: prev_token = None prev_token_rstripped = None else: prev_token = MakeToken(lines, pos, found.start, 'code') prev_token_rstripped = RStripNewLineFromToken(prev_token) if found.token_type == '$var': if prev_token_rstripped: yield prev_token_rstripped yield found id_token = ParseToken(lines, found.end, ID_REGEX, 'id') yield id_token pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX) eq_token = ParseToken(lines, pos, EQ_REGEX, '=') yield eq_token pos = Skip(lines, eq_token.end, r'\s*') if SubString(lines, pos, pos + 2) != '[[': exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp') yield exp_token pos = Cursor(exp_token.end.line + 1, 0) elif found.token_type == '$for': if prev_token_rstripped: yield prev_token_rstripped yield found id_token = ParseToken(lines, found.end, ID_REGEX, 'id') yield id_token pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX) elif found.token_type == '$range': if prev_token_rstripped: yield prev_token_rstripped yield found id_token = ParseToken(lines, found.end, ID_REGEX, 'id') yield id_token pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX) dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..') yield MakeToken(lines, pos, dots_pos, 'exp') yield MakeToken(lines, dots_pos, dots_pos + 2, '..') pos = dots_pos + 2 new_pos = Cursor(pos.line + 1, 0) yield MakeToken(lines, pos, new_pos, 'exp') pos = new_pos elif found.token_type == '$': if prev_token: yield prev_token yield found exp_token = ParseExpTokenInParens(lines, found.end) yield exp_token pos = exp_token.end elif (found.token_type == ']]' or found.token_type == '$if' or found.token_type == '$elif' or found.token_type == '$else'): if prev_token_rstripped: yield prev_token_rstripped yield found pos = found.end else: if prev_token: yield prev_token yield found pos = found.end def Tokenize(s): """A generator that yields the tokens in the given string.""" if s != '': lines = s.splitlines(True) for token in TokenizeLines(lines, Cursor(0, 0)): yield token class CodeNode: def __init__(self, atomic_code_list=None): self.atomic_code = atomic_code_list class VarNode: def __init__(self, identifier=None, atomic_code=None): self.identifier = identifier self.atomic_code = atomic_code class RangeNode: def __init__(self, identifier=None, exp1=None, exp2=None): self.identifier = identifier self.exp1 = exp1 self.exp2 = exp2 class ForNode: def __init__(self, identifier=None, sep=None, code=None): self.identifier = identifier self.sep = sep self.code = code class ElseNode: def __init__(self, else_branch=None): self.else_branch = else_branch class IfNode: def __init__(self, exp=None, then_branch=None, else_branch=None): self.exp = exp self.then_branch = then_branch self.else_branch = else_branch class RawCodeNode: def __init__(self, token=None): self.raw_code = token class LiteralDollarNode: def __init__(self, token): self.token = token class ExpNode: def __init__(self, token, python_exp): self.token = token self.python_exp = python_exp def PopFront(a_list): head = a_list[0] a_list[:1] = [] return head def PushFront(a_list, elem): a_list[:0] = [elem] def PopToken(a_list, token_type=None): token = PopFront(a_list) if token_type is not None and token.token_type != token_type: print 'ERROR: %s expected at %s' % (token_type, token.start) print 'ERROR: %s found instead' % (token,) sys.exit(1) return token def PeekToken(a_list): if not a_list: return None return a_list[0] def ParseExpNode(token): python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value) return ExpNode(token, python_exp) def ParseElseNode(tokens): def Pop(token_type=None): return PopToken(tokens, token_type) _next = PeekToken(tokens) if not next: return None if _next.token_type == '$else': Pop('$else') Pop('[[') code_node = ParseCodeNode(tokens) Pop(']]') return code_node elif _next.token_type == '$elif': Pop('$elif') exp = Pop('code') Pop('[[') code_node = ParseCodeNode(tokens) Pop(']]') inner_else_node = ParseElseNode(tokens) return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)]) elif not _next.value.strip(): Pop('code') return ParseElseNode(tokens) else: return None def ParseAtomicCodeNode(tokens): def Pop(token_type=None): return PopToken(tokens, token_type) head = PopFront(tokens) t = head.token_type if t == 'code': return RawCodeNode(head) elif t == '$var': id_token = Pop('id') Pop('=') _next = PeekToken(tokens) if _next.token_type == 'exp': exp_token = Pop() return VarNode(id_token, ParseExpNode(exp_token)) Pop('[[') code_node = ParseCodeNode(tokens) Pop(']]') return VarNode(id_token, code_node) elif t == '$for': id_token = Pop('id') next_token = PeekToken(tokens) if next_token.token_type == 'code': sep_token = next_token Pop('code') else: sep_token = None Pop('[[') code_node = ParseCodeNode(tokens) Pop(']]') return ForNode(id_token, sep_token, code_node) elif t == '$if': exp_token = Pop('code') Pop('[[') code_node = ParseCodeNode(tokens) Pop(']]') else_node = ParseElseNode(tokens) return IfNode(ParseExpNode(exp_token), code_node, else_node) elif t == '$range': id_token = Pop('id') exp1_token = Pop('exp') Pop('..') exp2_token = Pop('exp') return RangeNode(id_token, ParseExpNode(exp1_token), ParseExpNode(exp2_token)) elif t == '$id': return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id')) elif t == '$($)': return LiteralDollarNode(head) elif t == '$': exp_token = Pop('exp') return ParseExpNode(exp_token) elif t == '[[': code_node = ParseCodeNode(tokens) Pop(']]') return code_node else: PushFront(tokens, head) return None def ParseCodeNode(tokens): atomic_code_list = [] while True: if not tokens: break atomic_code_node = ParseAtomicCodeNode(tokens) if atomic_code_node: atomic_code_list.append(atomic_code_node) else: break return CodeNode(atomic_code_list) def ParseToAST(pump_src_text): """Convert the given Pump source text into an AST.""" tokens = list(Tokenize(pump_src_text)) code_node = ParseCodeNode(tokens) return code_node class Env: def __init__(self): self.variables = [] self.ranges = [] def Clone(self): clone = Env() clone.variables = self.variables[:] clone.ranges = self.ranges[:] return clone def PushVariable(self, var, value): # If value looks like an int, store it as an int. try: int_value = int(value) if ('%s' % int_value) == value: value = int_value except Exception: pass self.variables[:0] = [(var, value)] def PopVariable(self): self.variables[:1] = [] def PushRange(self, var, lower, upper): self.ranges[:0] = [(var, lower, upper)] def PopRange(self): self.ranges[:1] = [] def GetValue(self, identifier): for (var, value) in self.variables: if identifier == var: return value print 'ERROR: meta variable %s is undefined.' % (identifier,) sys.exit(1) def EvalExp(self, exp): try: result = eval(exp.python_exp) except Exception, e: print 'ERROR: caught exception %s: %s' % (e.__class__.__name__, e) print ('ERROR: failed to evaluate meta expression %s at %s' % (exp.python_exp, exp.token.start)) sys.exit(1) return result def GetRange(self, identifier): for (var, lower, upper) in self.ranges: if identifier == var: return (lower, upper) print 'ERROR: range %s is undefined.' % (identifier,) sys.exit(1) class Output: def __init__(self): self.string = '' def GetLastLine(self): index = self.string.rfind('\n') if index < 0: return '' return self.string[index + 1:] def Append(self, s): self.string += s def RunAtomicCode(env, node, output): if isinstance(node, VarNode): identifier = node.identifier.value.strip() result = Output() RunAtomicCode(env.Clone(), node.atomic_code, result) value = result.string env.PushVariable(identifier, value) elif isinstance(node, RangeNode): identifier = node.identifier.value.strip() lower = int(env.EvalExp(node.exp1)) upper = int(env.EvalExp(node.exp2)) env.PushRange(identifier, lower, upper) elif isinstance(node, ForNode): identifier = node.identifier.value.strip() if node.sep is None: sep = '' else: sep = node.sep.value (lower, upper) = env.GetRange(identifier) for i in range(lower, upper + 1): new_env = env.Clone() new_env.PushVariable(identifier, i) RunCode(new_env, node.code, output) if i != upper: output.Append(sep) elif isinstance(node, RawCodeNode): output.Append(node.raw_code.value) elif isinstance(node, IfNode): cond = env.EvalExp(node.exp) if cond: RunCode(env.Clone(), node.then_branch, output) elif node.else_branch is not None: RunCode(env.Clone(), node.else_branch, output) elif isinstance(node, ExpNode): value = env.EvalExp(node) output.Append('%s' % (value,)) elif isinstance(node, LiteralDollarNode): output.Append('$') elif isinstance(node, CodeNode): RunCode(env.Clone(), node, output) else: print 'BAD' print node sys.exit(1) def RunCode(env, code_node, output): for atomic_code in code_node.atomic_code: RunAtomicCode(env, atomic_code, output) def IsSingleLineComment(cur_line): return '//' in cur_line def IsInPreprocessorDirective(prev_lines, cur_line): if cur_line.lstrip().startswith('#'): return True return prev_lines and prev_lines[-1].endswith('\\') def WrapComment(line, output): loc = line.find('//') before_comment = line[:loc].rstrip() if before_comment == '': indent = loc else: output.append(before_comment) indent = len(before_comment) - len(before_comment.lstrip()) prefix = indent*' ' + '// ' max_len = 80 - len(prefix) comment = line[loc + 2:].strip() segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != ''] cur_line = '' for seg in segs: if len((cur_line + seg).rstrip()) < max_len: cur_line += seg else: if cur_line.strip() != '': output.append(prefix + cur_line.rstrip()) cur_line = seg.lstrip() if cur_line.strip() != '': output.append(prefix + cur_line.strip()) def WrapCode(line, line_concat, output): indent = len(line) - len(line.lstrip()) prefix = indent*' ' # Prefix of the current line max_len = 80 - indent - len(line_concat) # Maximum length of the current line new_prefix = prefix + 4*' ' # Prefix of a continuation line new_max_len = max_len - 4 # Maximum length of a continuation line # Prefers to wrap a line after a ',' or ';'. segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != ''] cur_line = '' # The current line without leading spaces. for seg in segs: # If the line is still too long, wrap at a space. while cur_line == '' and len(seg.strip()) > max_len: seg = seg.lstrip() split_at = seg.rfind(' ', 0, max_len) output.append(prefix + seg[:split_at].strip() + line_concat) seg = seg[split_at + 1:] prefix = new_prefix max_len = new_max_len if len((cur_line + seg).rstrip()) < max_len: cur_line = (cur_line + seg).lstrip() else: output.append(prefix + cur_line.rstrip() + line_concat) prefix = new_prefix max_len = new_max_len cur_line = seg.lstrip() if cur_line.strip() != '': output.append(prefix + cur_line.strip()) def WrapPreprocessorDirective(line, output): WrapCode(line, ' \\', output) def WrapPlainCode(line, output): WrapCode(line, '', output) def IsMultiLineIWYUPragma(line): return re.search(r'/\* IWYU pragma: ', line) def IsHeaderGuardIncludeOrOneLineIWYUPragma(line): return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or re.match(r'^#include\s', line) or # Don't break IWYU pragmas, either; that causes iwyu.py problems. re.search(r'// IWYU pragma: ', line)) def WrapLongLine(line, output): line = line.rstrip() if len(line) <= 80: output.append(line) elif IsSingleLineComment(line): if IsHeaderGuardIncludeOrOneLineIWYUPragma(line): # The style guide made an exception to allow long header guard lines, # includes and IWYU pragmas. output.append(line) else: WrapComment(line, output) elif IsInPreprocessorDirective(output, line): if IsHeaderGuardIncludeOrOneLineIWYUPragma(line): # The style guide made an exception to allow long header guard lines, # includes and IWYU pragmas. output.append(line) else: WrapPreprocessorDirective(line, output) elif IsMultiLineIWYUPragma(line): output.append(line) else: WrapPlainCode(line, output) def BeautifyCode(string): lines = string.splitlines() output = [] for line in lines: WrapLongLine(line, output) output2 = [line.rstrip() for line in output] return '\n'.join(output2) + '\n' def ConvertFromPumpSource(src_text): """Return the text generated from the given Pump source text.""" ast = ParseToAST(StripMetaComments(src_text)) output = Output() RunCode(Env(), ast, output) return BeautifyCode(output.string) def main(argv): if len(argv) == 1: print __doc__ sys.exit(1) file_path = argv[-1] output_str = ConvertFromPumpSource(file(file_path, 'r').read()) if file_path.endswith('.pump'): output_file_path = file_path[:-5] else: output_file_path = '-' if output_file_path == '-': print output_str, else: output_file = file(output_file_path, 'w') output_file.write('// This file was GENERATED by command:\n') output_file.write('// %s %s\n' % (os.path.basename(__file__), os.path.basename(file_path))) output_file.write('// DO NOT EDIT BY HAND!!!\n\n') output_file.write(output_str) output_file.close() if __name__ == '__main__': main(sys.argv)