diff --git a/utils/FindSpecRefs b/utils/FindSpecRefs new file mode 100755 index 0000000000..a117942eb2 --- /dev/null +++ b/utils/FindSpecRefs @@ -0,0 +1,606 @@ +#!/usr/bin/python + +import os +import re +import time +from pprint import pprint + +### + +c99URL = 'http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf' +c99TOC = [('Foreword', 'xi'), +('Introduction', 'xiv'), +('1. Scope', '1'), +('2. Normative references', '2'), +('3. Terms, definitions, and symbols', '3'), +('4. Conformance', '7'), +('5. Environment', '9'), +('5.1 Conceptual models', '9'), +('5.1.1 Translation environment', '9'), +('5.1.2 Execution environments', '11'), +('5.2 Environmental considerations', '17'), +('5.2.1 Character sets', '17'), +('5.2.2 Character display semantics', '19'), +('5.2.3 Signals and interrupts', '20'), +('5.2.4 Environmental limits', '20'), +('6. Language', '29'), +('6.1 Notation', '29'), +('6.2 Concepts', '29'), +('6.2.1 Scopes of identifiers', '29'), +('6.2.2 Linkages of identifiers', '30'), +('6.2.3 Name spaces of identifiers', '31'), +('6.2.4 Storage durations of objects', '32'), +('6.2.5 Types', '33'), +('6.2.6 Representations of types', '37'), +('6.2.7 Compatible type and composite type', '40'), +('6.3 Conversions', '42'), +('6.3.1 Arithmetic operands', '42'), +('6.3.2 Other operands', '46'), +('6.4 Lexical elements', '49'), +('6.4.1 Keywords', '50'), +('6.4.2 Identifiers', '51'), +('6.4.3 Universal character names', '53'), +('6.4.4 Constants', '54'), +('6.4.5 String literals', '62'), +('6.4.6 Punctuators', '63'), +('6.4.7 Header names', '64'), +('6.4.8 Preprocessing numbers', '65'), +('6.4.9 Comments', '66'), +('6.5 Expressions', '67'), +('6.5.1 Primary expressions', '69'), +('6.5.2 Postfix operators', '69'), +('6.5.3 Unary operators', '78'), +('6.5.4 Cast operators', '81'), +('6.5.5 Multiplicative operators', '82'), +('6.5.6 Additive operators', '82'), +('6.5.7 Bitwise shift operators', '84'), +('6.5.8 Relational operators', '85'), +('6.5.9 Equality operators', '86'), +('6.5.10 Bitwise AND operator', '87'), +('6.5.11 Bitwise exclusive OR operator', '88'), +('6.5.12 Bitwise inclusive OR operator', '88'), +('6.5.13 Logical AND operator', '89'), +('6.5.14 Logical OR operator', '89'), +('6.5.15 Conditional operator', '90'), +('6.5.16 Assignment operators', '91'), +('6.5.17 Comma operator', '94'), +('6.6 Constant expressions', '95'), +('6.7 Declarations', '97'), +('6.7.1 Storage-class specifiers', '98'), +('6.7.2 Type specifiers', '99'), +('6.7.3 Type qualifiers', '108'), +('6.7.4 Function specifiers', '112'), +('6.7.5 Declarators', '114'), +('6.7.6 Type names', '122'), +('6.7.7 Type definitions', '123'), +('6.7.8 Initialization', '125'), +('6.8 Statements and blocks', '131'), +('6.8.1 Labeled statements', '131'), +('6.8.2 Compound statement', '132'), +('6.8.3 Expression and null statements', '132'), +('6.8.4 Selection statements', '133'), +('6.8.5 Iteration statements', '135'), +('6.8.6 Jump statements', '136'), +('6.9 External definitions', '140'), +('6.9.1 Function definitions', '141'), +('6.9.2 External object definitions', '143'), +('6.10 Preprocessing directives', '145'), +('6.10.1 Conditional inclusion', '147'), +('6.10.2 Source file inclusion', '149'), +('6.10.3 Macro replacement', '151'), +('6.10.4 Line control', '158'), +('6.10.5 Error directive', '159'), +('6.10.6 Pragma directive', '159'), +('6.10.7 Null directive', '160'), +('6.10.8 Predefined macro names', '160'), +('6.10.9 Pragma operator', '161'), +('6.11 Future language directions', '163'), +('6.11.1 Floating types', '163'), +('6.11.2 Linkages of identifiers', '163'), +('6.11.3 External names', '163'), +('6.11.4 Character escape sequences', '163'), +('6.11.5 Storage-class specifiers', '163'), +('6.11.6 Function declarators', '163'), +('6.11.7 Function definitions', '163'), +('6.11.8 Pragma directives', '163'), +('6.11.9 Predefined macro names', '163'), +('7. Library', '164'), +('7.1 Introduction', '164'), +('7.1.1 Definitions of terms', '164'), +('7.1.2 Standard headers', '165'), +('7.1.3 Reserved identifiers', '166'), +('7.1.4 Use of library functions', '166'), +('7.2 Diagnostics ', '169'), +('7.2.1 Program diagnostics', '169'), +('7.3 Complex arithmetic ', '170'), +('7.3.1 Introduction', '170'), +('7.3.2 Conventions', '170'), +('7.3.3 Branch cuts', '171'), +('7.3.4 The CX_LIMITED_RANGE pragma', '171'), +('7.3.5 Trigonometric functions', '172'), +('7.3.6 Hyperbolic functions', '174'), +('7.3.7 Exponential and logarithmic functions', '176'), +('7.3.8 Power and absolute-value functions', '177'), +('7.3.9 Manipulation functions', '178'), +('7.4 Character handling ', '181'), +('7.4.1 Character classification functions', '181'), +('7.4.2 Character case mapping functions', '184'), +('7.5 Errors ', '186'), +('7.6 Floating-point environment ', '187'), +('7.6.1 The FENV_ACCESS pragma', '189'), +('7.6.2 Floating-point exceptions', '190'), +('7.6.3 Rounding', '193'), +('7.6.4 Environment', '194'), +('7.7 Characteristics of floating types ', '197'), +('7.8 Format conversion of integer types ', '198'), +('7.8.1 Macros for format specifiers', '198'), +('7.8.2 Functions for greatest-width integer types', '199'), +('7.9 Alternative spellings ', '202'), +('7.10 Sizes of integer types ', '203'), +('7.11 Localization ', '204'), +('7.11.1 Locale control', '205'), +('7.11.2 Numeric formatting convention inquiry', '206'), +('7.12 Mathematics ', '212'), +('7.12.1 Treatment of error conditions', '214'), +('7.12.2 The FP_CONTRACT pragma', '215'), +('7.12.3 Classification macros', '216'), +('7.12.4 Trigonometric functions', '218'), +('7.12.5 Hyperbolic functions', '221'), +('7.12.6 Exponential and logarithmic functions', '223'), +('7.12.7 Power and absolute-value functions', '228'), +('7.12.8 Error and gamma functions', '230'), +('7.12.9 Nearest integer functions', '231'), +('7.12.10 Remainder functions', '235'), +('7.12.11 Manipulation functions', '236'), +('7.12.12 Maximum, minimum, and positive difference functions', '238'), +('7.12.13 Floating multiply-add', '239'), +('7.12.14 Comparison macros', '240'), +('7.13 Nonlocal jumps ', '243'), +('7.13.1 Save calling environment', '243'), +('7.13.2 Restore calling environment', '244'), +('7.14 Signal handling ', '246'), +('7.14.1 Specify signal handling', '247'), +('7.14.2 Send signal', '248'), +('7.15 Variable arguments ', '249'), +('7.15.1 Variable argument list access macros', '249'), +('7.16 Boolean type and values ', '253'), +('7.17 Common definitions ', '254'), +('7.18 Integer types ', '255'), +('7.18.1 Integer types', '255'), +('7.18.2 Limits of specified-width integer types', '257'), +('7.18.3 Limits of other integer types', '259'), +('7.18.4 Macros for integer constants', '260'), +('7.19 Input/output ', '262'), +('7.19.1 Introduction', '262'), +('7.19.2 Streams', '264'), +('7.19.3 Files', '266'), +('7.19.4 Operations on files', '268'), +('7.19.5 File access functions', '270'), +('7.19.6 Formatted input/output functions', '274'), +('7.19.7 Character input/output functions', '296'), +('7.19.8 Direct input/output functions', '301'), +('7.19.9 File positioning functions', '302'), +('7.19.10 Error-handling functions', '304'), +('7.20 General utilities ', '306'), +('7.20.1 Numeric conversion functions', '307'), +('7.20.2 Pseudo-random sequence generation functions', '312'), +('7.20.3 Memory management functions', '313'), +('7.20.4 Communication with the environment', '315'), +('7.20.5 Searching and sorting utilities', '318'), +('7.20.6 Integer arithmetic functions', '320'), +('7.20.7 Multibyte/wide character conversion functions', '321'), +('7.20.8 Multibyte/wide string conversion functions', '323'), +('7.21 String handling ', '325'), +('7.21.1 String function conventions', '325'), +('7.21.2 Copying functions', '325'), +('7.21.3 Concatenation functions', '327'), +('7.21.4 Comparison functions', '328'), +('7.21.5 Search functions', '330'), +('7.21.6 Miscellaneous functions', '333'), +('7.22 Type-generic math ', '335'), +('7.23 Date and time ', '338'), +('7.23.1 Components of time', '338'), +('7.23.2 Time manipulation functions', '339'), +('7.23.3 Time conversion functions', '341'), +('7.24 Extended multibyte and wide character utilities ', '348'), +('7.24.1 Introduction', '348'), +('7.24.2 Formatted wide character input/output functions', '349'), +('7.24.3 Wide character input/output functions', '367'), +('7.24.4 General wide string utilities', '371'), +('7.24.5 Wide character time conversion functions', '385'), +('7.24.6 Extended multibyte/wide character conversion utilities', '386'), +('7.25 Wide character classification and mapping utilities ', + '393'), +('7.25.1 Introduction', '393'), +('7.25.2 Wide character classification utilities', '394'), +('7.25.3 Wide character case mapping utilities', '399'), +('7.26 Future library directions', '401'), +('7.26.1 Complex arithmetic ', '401'), +('7.26.2 Character handling ', '401'), +('7.26.3 Errors ', '401'), +('7.26.4 Format conversion of integer types ', '401'), +('7.26.5 Localization ', '401'), +('7.26.6 Signal handling ', '401'), +('7.26.7 Boolean type and values ', '401'), +('7.26.8 Integer types ', '401'), +('7.26.9 Input/output ', '402'), +('7.26.10 General utilities ', '402'), +('7.26.11 String handling ', '402'), +('', '402'), +('', '402'), +('Annex A (informative) Language syntax summary', '403'), +('A.1 Lexical grammar', '403'), +('A.2 Phrase structure grammar', '409'), +('A.3 Preprocessing directives', '416'), +('Annex B (informative) Library summary', '418'), +('B.1 Diagnostics ', '418'), +('B.2 Complex ', '418'), +('B.3 Character handling ', '420'), +('B.4 Errors ', '420'), +('B.5 Floating-point environment ', '420'), +('B.6 Characteristics of floating types ', '421'), +('B.7 Format conversion of integer types ', '421'), +('B.8 Alternative spellings ', '422'), +('B.9 Sizes of integer types ', '422'), +('B.10 Localization ', '422'), +('B.11 Mathematics ', '422'), +('B.12 Nonlocal jumps ', '427'), +('B.13 Signal handling ', '427'), +('B.14 Variable arguments ', '427'), +('B.15 Boolean type and values ', '427'), +('B.16 Common definitions ', '428'), +('B.17 Integer types ', '428'), +('B.18 Input/output ', '428'), +('B.19 General utilities ', '430'), +('B.20 String handling ', '432'), +('B.21 Type-generic math ', '433'), +('B.22 Date and time ', '433'), +('B.23 Extended multibyte/wide character utilities ', '434'), +('B.24 Wide character classification and mapping utilities ', + '436'), +('Annex C (informative) Sequence points', '438'), +('Annex D (normative) Universal character names for identifiers', '439'), +('Annex E (informative) Implementation limits', '441'), +('Annex F (normative) IEC 60559 floating-point arithmetic', '443'), +('F.1 Introduction', '443'), +('F.2 Types', '443'), +('F.3 Operators and functions', '444'), +('F.4 Floating to integer conversion', '446'), +('F.5 Binary-decimal conversion', '446'), +('F.6 Contracted expressions', '447'), +('F.7 Floating-point environment', '447'), +('F.8 Optimization', '450'), +('F.9 Mathematics ', '453'), +('Annex G (informative) IEC 60559-compatible complex arithmetic', '466'), +('G.1 Introduction', '466'), +('G.2 Types', '466'), +('G.3 Conventions', '466'), +('G.4 Conversions', '467'), +('G.5 Binary operators', '467'), +('G.6 Complex arithmetic ', '471'), +('G.7 Type-generic math ', '479'), +('Annex H (informative) Language independent arithmetic', '480'), +('H.1 Introduction', '480'), +('H.2 Types', '480'), +('H.3 Notification', '484'), +('Annex I (informative) Common warnings', '486'), +('Annex J (informative) Portability issues', '488'), +('J.1 Unspecified behavior', '488'), +('J.2 Undefined behavior', '491'), +('J.3 Implementation-defined behavior', '504'), +('J.4 Locale-specific behavior', '511'), +('J.5 Common extensions', '512'), +('Bibliography', '515'), +('Index', '517')] + +kDocuments = { + 'C99' : (c99URL, c99TOC, 12) +} + +def findClosestTOCEntry(data, target): + offset = data[2] + best = None + for (name,page) in data[1]: + if ' ' in name: + section,name = name.split(' ',1) + if section == 'Annex': + section,name = name.split(' ',1) + section = 'Annex '+section + else: + section = None + try: + page = int(page) + offset + except: + page = 1 + try: + spec = SpecIndex.fromstring(section) + except: + spec = None + + # Meh, could be better... + if spec is not None: + dist = spec - target + if best is None or dist < best[0]: + best = (dist, (section, name, page)) + return best[1] + +# What a hack. Slow to boot. +doxyLineRefRE = re.compile(r"") +def findClosestLineReference(clangRoot, doxyName, target): + try: + f = open(os.path.join(clangRoot, 'docs', 'doxygen', 'html', doxyName)) + except: + return None + + best = None + for m in doxyLineRefRE.finditer(f.read()): + line = int(m.group(1), 10) + dist = abs(line - target) + if best is None or dist < best[0]: + best = (dist,'l'+m.group(1)) + f.close() + if best is not None: + return best[1] + return None + +### + +nameAndSpecRefRE = re.compile(r"(C99|C90|C\+\+|H\&S) (([0-9]+)(\.[0-9]+)*(p[0-9]+)?)") +loneSpecRefRE = re.compile(r" (([0-9]+)(\.[0-9]+){2,100}(p[0-9]+)?)") +def scanFile(path, filename): + try: + f = open(path) + except IOError: + print >>sys.stderr,'WARNING: Unable to open:',path + return + + try: + for i,ln in enumerate(f): + ignore = set() + for m in nameAndSpecRefRE.finditer(ln): + section = m.group(2) + name = m.group(1) + if section.endswith('.'): + section = section[:-1] + yield RefItem(name, section, filename, path, i+1) + ignore.add(section) + for m in loneSpecRefRE.finditer(ln): + section = m.group(1) + if section.endswith('.'): + section = section[:-1] + if section not in ignore: + yield RefItem(None, section, filename, path, i+1) + finally: + f.close() + +### + +class SpecIndex: + @staticmethod + def fromstring(str): + secs = str.split('.') + paragraph = None + if 'p' in secs[-1]: + secs[-1],p = secs[-1].split('p',1) + paragraph = int(p) + indices = map(int, secs) + return SpecIndex(indices, paragraph) + + def __init__(self, indices, paragraph=None): + assert len(indices)>0 + self.indices = tuple(indices) + self.paragraph = paragraph + + def __str__(self): + s = '.'.join(map(str,self.indices)) + if self.paragraph is not None: + s += '.p%d'%(self.paragraph,) + return s + + def __repr__(self): + return 'SpecIndex(%s, %s)'%(self.indices, self.paragraph) + + def __cmp__(self, b): + return cmp((self.indices,self.paragraph), + (b.indices,b.paragraph)) + + def __hash__(self): + return hash((self.indices,self.paragraph)) + + def __sub__(self, indices): + def sub(a,b): + a = a or 0 + b = b or 0 + return abs(a-b) + return map(sub,self.indices,indices) + +class RefItem: + def __init__(self, name, section, filename, path, line): + self.name = name + self.section = SpecIndex.fromstring(section) + self.filename = filename + self.path = path + self.line = line + + def __str__(self): + if self.name is not None: + return '%s %s'%(self.name, self.section) + else: + return '--- %s'%(self.section,) + + def __repr__(self): + return 'RefItem(%s, %r, "%s", "%s", %d)'%(self.name, + self.section, + self.filename, + self.path, + self.line) + + def __cmp__(self, b): + return cmp((self.name,self.section,self.filename,self.path,self.line), + (b.name,b.section,self.filename,self.path,self.line)) + + def __hash__(self): + return hash((self.name,self.section,self.filename,self.path,self.line)) + +### + +def sorted(l): + l = list(l) + l.sort() + return l + +def getRevision(path): + import svn, svn.core, svn.client + + revision = [None] + + def info_cb(path, info, pool): + revision[0] = info.rev + + try: + root = os.path.abspath(path) + svn.core.apr_initialize() + pool = svn.core.svn_pool_create(None) + ctx = svn.client.svn_client_ctx_t() + svn.client.svn_client_info(root, + None, + None, + info_cb, + False, + ctx, + pool) + svn.core.svn_pool_destroy(pool) + except: + pass + + return revision[0] + +def buildRefTree(references): + root = (None, {}, []) + + def getNode(keys): + if not keys: + return root + key,parent = keys[-1],getNode(keys[:-1]) + node = parent[1].get(key) + if node is None: + parent[1][key] = node = (key, {}, []) + return node + + for ref in references: + n = getNode((ref.name,) + ref.section.indices) + n[2].append(ref) + + def flatten((key, children, data)): + children = sorted(map(flatten,children.values())) + return (key, children, sorted(data)) + + return flatten(root) + +def preorder(node,parents=(),first=True): + (key,children,data) = node + if first: + yield parents+(node,) + for c in children: + for item in preorder(c, parents+(node,)): + yield item + +def main(): + global options + from optparse import OptionParser + parser = OptionParser("usage: %prog [options] CLANG_ROOT ") + + (options, args) = parser.parse_args() + + if len(args) != 2: + parser.error("incorrect number of arguments") + + references = [] + root,outputDir = args + for (dirpath, dirnames, filenames) in os.walk(root): + for filename in filenames: + name,ext = os.path.splitext(filename) + if ext in ('.c', '.cpp', '.h', '.def'): + fullpath = os.path.join(dirpath, filename) + references.extend(list(scanFile(fullpath, filename))) + + refTree = buildRefTree(references) + + specs = {} + for ref in references: + spec = specs[ref.name] = specs.get(ref.name,{}) + items = spec[ref.section] = spec.get(ref.section,[]) + items.append(ref) + + print 'Found %d references.'%(len(references),) + + referencesPath = os.path.join(outputDir,'references.html') + print 'Writing: %s'%(referencesPath,) + f = open(referencesPath,'w') + print >>f, 'clang: Specification References' + print >>f, '' + print >>f, '\t

Specification References

' + for i,node in enumerate(refTree[1]): + specName = node[0] or 'Unknown' + print >>f, '%s
'%(i,specName) + for i,node in enumerate(refTree[1]): + specName = node[0] or 'Unknown' + print >>f, '
' + print >>f, ''%(i,) + print >>f, '

Document: %s

'%(specName or 'Unknown',) + print >>f, '' + print >>f, '' + docData = kDocuments.get(specName) + for path in preorder(node,first=False): + if not path[-1][2]: + continue + components = '.'.join([str(p[0]) for p in path[1:]]) + print >>f, '\t' + tocEntry = None + if docData is not None: + tocEntry = findClosestTOCEntry(docData, [p[0] for p in path[1:]]) + if tocEntry is not None: + section,name,page = tocEntry + # If section is exact print the TOC name + if page is not None: + linkStr = '%s (pg.%d)'%(docData[0],page,components,page) + else: + linkStr = components + if section == components: + print >>f, '\t\t'%(linkStr,name) + else: + print >>f, '\t\t'%(linkStr,) + else: + print >>f, '\t\t'%(components,) + print >>f, '\t\t' + print >>f, '\t' + print >>f, '
NameReferences
%s
%s
%s%s' + for item in path[-1][2]: + # XXX total hack + relativePath = item.path[len(root):] + if relativePath.startswith('/'): + relativePath = relativePath[1:] + # XXX this is broken, how does doxygen mangle w/ multiple + # refs? Can we just read its map? + filename = os.path.basename(relativePath) + doxyName = '%s-source.html'%(filename.replace('.','_8'),) + # Grrr, why can't doxygen write line number references. + lineReference = findClosestLineReference(root,doxyName,item.line) + if lineReference is not None: + linkStr = 'http://clang.llvm.org/doxygen/%s#%s'%(doxyName,lineReference) + else: + linkStr = 'http://clang.llvm.org/doxygen/%s'%(doxyName,) + if item.section.paragraph is not None: + paraText = ' (p%d)'%(item.section.paragraph,) + else: + paraText = '' + print >>f,'%s:%d%s
'%(linkStr,relativePath,item.line,paraText) + print >>f, '\t\t
' + print >>f, '
' + print >>f, 'Generated: %s
'%(time.strftime('%Y-%m-%d %H:%M'),) + print >>f, 'SVN Revision: %s'%(getRevision(root),) + print >>f, '' + f.close() + +if __name__=='__main__': + main()