clang-1/lib/AST/CommentParser.cpp

//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "clang/AST/CommentParser.h"
#include "clang/AST/CommentSema.h"
#include "llvm/Support/ErrorHandling.h"

namespace clang {
namespace comments {

Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
    L(L), S(S), Allocator(Allocator) {
  consumeToken();
}

ParamCommandComment *Parser::parseParamCommandArgs(
    ParamCommandComment *PC,
    TextTokenRetokenizer &Retokenizer) {
  Token Arg;
  // Check if argument looks like direction specification: [dir]
  // e.g., [in], [out], [in,out]
  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
    PC = S.actOnParamCommandArg(PC,
                                Arg.getLocation(),
                                Arg.getEndLocation(),
                                Arg.getText(),
                                /* IsDirection = */ true);

  if (Retokenizer.lexWord(Arg))
    PC = S.actOnParamCommandArg(PC,
                                Arg.getLocation(),
                                Arg.getEndLocation(),
                                Arg.getText(),
                                /* IsDirection = */ false);

  return PC;
}

BlockCommandComment *Parser::parseBlockCommandArgs(
    BlockCommandComment *BC,
    TextTokenRetokenizer &Retokenizer,
    unsigned NumArgs) {
  typedef BlockCommandComment::Argument Argument;
  Argument *Args =
      new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
  unsigned ParsedArgs = 0;
  Token Arg;
  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
    Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
                                            Arg.getEndLocation()),
                                Arg.getText());
    ParsedArgs++;
  }

  return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
}

BlockCommandComment *Parser::parseBlockCommand() {
  assert(Tok.is(tok::command));

  ParamCommandComment *PC;
  BlockCommandComment *BC;
  bool IsParam = false;
  unsigned NumArgs = 0;
  if (S.isParamCommand(Tok.getCommandName())) {
    IsParam = true;
    PC = S.actOnParamCommandStart(Tok.getLocation(),
                                  Tok.getEndLocation(),
                                  Tok.getCommandName());
  } else {
    NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
    BC = S.actOnBlockCommandStart(Tok.getLocation(),
                                  Tok.getEndLocation(),
                                  Tok.getCommandName());
  }
  consumeToken();

  if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
    // Block command ahead.  We can't nest block commands, so pretend that this
    // command has an empty argument.
    // TODO: Diag() Warn empty arg to block command
    ParagraphComment *PC = S.actOnParagraphComment(
                                ArrayRef<InlineContentComment *>());
    return S.actOnBlockCommandFinish(BC, PC);
  }

  if (IsParam || NumArgs > 0) {
    // In order to parse command arguments we need to retokenize a few
    // following text tokens.
    TextTokenRetokenizer Retokenizer(Allocator);
    while (Tok.is(tok::text)) {
      if (Retokenizer.addToken(Tok))
        consumeToken();
    }

    if (IsParam)
      PC = parseParamCommandArgs(PC, Retokenizer);
    else
      BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);

    // Put back tokens we didn't use.
    Token Text;
    while (Retokenizer.lexText(Text))
      putBack(Text);
  }

  BlockContentComment *Block = parseParagraphOrBlockCommand();
  // Since we have checked for a block command, we should have parsed a
  // paragraph.
  if (IsParam)
    return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
  else
    return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
}

InlineCommandComment *Parser::parseInlineCommand() {
  assert(Tok.is(tok::command));

  const Token CommandTok = Tok;
  consumeToken();

  TextTokenRetokenizer Retokenizer(Allocator);
  while (Tok.is(tok::text)) {
    if (Retokenizer.addToken(Tok))
      consumeToken();
  }

  Token ArgTok;
  bool ArgTokValid = Retokenizer.lexWord(ArgTok);

  InlineCommandComment *IC;
  if (ArgTokValid) {
    IC = S.actOnInlineCommand(CommandTok.getLocation(),
                              CommandTok.getEndLocation(),
                              CommandTok.getCommandName(),
                              ArgTok.getLocation(),
                              ArgTok.getEndLocation(),
                              ArgTok.getText());
  } else {
    IC = S.actOnInlineCommand(CommandTok.getLocation(),
                              CommandTok.getEndLocation(),
                              CommandTok.getCommandName());
  }

  Token Text;
  while (Retokenizer.lexText(Text))
    putBack(Text);

  return IC;
}

HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
  assert(Tok.is(tok::html_tag_open));
  HTMLOpenTagComment *HOT =
      S.actOnHTMLOpenTagStart(Tok.getLocation(),
                              Tok.getHTMLTagOpenName());
  consumeToken();

  SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
  while (true) {
    if (Tok.is(tok::html_ident)) {
      Token Ident = Tok;
      consumeToken();
      if (Tok.isNot(tok::html_equals)) {
        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
                                                      Ident.getHTMLIdent()));
        continue;
      }
      Token Equals = Tok;
      consumeToken();
      if (Tok.isNot(tok::html_quoted_string)) {
        // TODO: Diag() expected quoted string
        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
                                                      Ident.getHTMLIdent()));
        continue;
      }
      Attrs.push_back(HTMLOpenTagComment::Attribute(
                              Ident.getLocation(),
                              Ident.getHTMLIdent(),
                              Equals.getLocation(),
                              SourceRange(Tok.getLocation(),
                                          Tok.getEndLocation()),
                              Tok.getHTMLQuotedString()));
      consumeToken();
      continue;
    } else if (Tok.is(tok::html_greater)) {
      HOT = S.actOnHTMLOpenTagFinish(HOT,
                                     copyArray(llvm::makeArrayRef(Attrs)),
                                     Tok.getLocation());
      consumeToken();
      return HOT;
    } else if (Tok.is(tok::html_equals) ||
               Tok.is(tok::html_quoted_string)) {
      // TODO: Diag() Err expected ident
      while (Tok.is(tok::html_equals) ||
             Tok.is(tok::html_quoted_string))
        consumeToken();
    } else {
      // Not a token from HTML open tag.  Thus HTML tag prematurely ended.
      // TODO: Diag() Err HTML tag prematurely ended
      return S.actOnHTMLOpenTagFinish(HOT,
                                      copyArray(llvm::makeArrayRef(Attrs)),
                                      SourceLocation());
    }
  }
}

HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
  assert(Tok.is(tok::html_tag_close));
  Token TokTagOpen = Tok;
  consumeToken();
  SourceLocation Loc;
  if (Tok.is(tok::html_greater)) {
    Loc = Tok.getLocation();
    consumeToken();
  }

  return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
                             Loc,
                             TokTagOpen.getHTMLTagCloseName());
}

BlockContentComment *Parser::parseParagraphOrBlockCommand() {
  SmallVector<InlineContentComment *, 8> Content;

  while (true) {
    switch (Tok.getKind()) {
    case tok::verbatim_block_begin:
    case tok::verbatim_line_name:
    case tok::eof:
      assert(Content.size() != 0);
      break; // Block content or EOF ahead, finish this parapgaph.

    case tok::command:
      if (S.isBlockCommand(Tok.getCommandName())) {
        if (Content.size() == 0)
          return parseBlockCommand();
        break; // Block command ahead, finish this parapgaph.
      }
      if (S.isInlineCommand(Tok.getCommandName())) {
        Content.push_back(parseInlineCommand());
        continue;
      }

      // Not a block command, not an inline command ==> an unknown command.
      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
                                              Tok.getEndLocation(),
                                              Tok.getCommandName()));
      consumeToken();
      continue;

    case tok::newline: {
      consumeToken();
      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
        consumeToken();
        break; // Two newlines -- end of paragraph.
      }
      if (Content.size() > 0)
        Content.back()->addTrailingNewline();
      continue;
    }

    // Don't deal with HTML tag soup now.
    case tok::html_tag_open:
      Content.push_back(parseHTMLOpenTag());
      continue;

    case tok::html_tag_close:
      Content.push_back(parseHTMLCloseTag());
      continue;

    case tok::text:
      Content.push_back(S.actOnText(Tok.getLocation(),
                                    Tok.getEndLocation(),
                                    Tok.getText()));
      consumeToken();
      continue;

    case tok::verbatim_block_line:
    case tok::verbatim_block_end:
    case tok::verbatim_line_text:
    case tok::html_ident:
    case tok::html_equals:
    case tok::html_quoted_string:
    case tok::html_greater:
      llvm_unreachable("should not see this token");
    }
    break;
  }

  return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
}

VerbatimBlockComment *Parser::parseVerbatimBlock() {
  assert(Tok.is(tok::verbatim_block_begin));

  VerbatimBlockComment *VB =
      S.actOnVerbatimBlockStart(Tok.getLocation(),
                                Tok.getVerbatimBlockName());
  consumeToken();

  // Don't create an empty line if verbatim opening command is followed
  // by a newline.
  if (Tok.is(tok::newline))
    consumeToken();

  SmallVector<VerbatimBlockLineComment *, 8> Lines;
  while (Tok.is(tok::verbatim_block_line) ||
         Tok.is(tok::newline)) {
    VerbatimBlockLineComment *Line;
    if (Tok.is(tok::verbatim_block_line)) {
      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
                                      Tok.getVerbatimBlockText());
      consumeToken();
      if (Tok.is(tok::newline)) {
        consumeToken();
      }
    } else {
      // Empty line, just a tok::newline.
      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
                                      "");
      consumeToken();
    }
    Lines.push_back(Line);
  }

  assert(Tok.is(tok::verbatim_block_end));
  VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
                                  Tok.getVerbatimBlockName(),
                                  copyArray(llvm::makeArrayRef(Lines)));
  consumeToken();

  return VB;
}

VerbatimLineComment *Parser::parseVerbatimLine() {
  assert(Tok.is(tok::verbatim_line_name));

  Token NameTok = Tok;
  consumeToken();

  SourceLocation TextBegin;
  StringRef Text;
  // Next token might not be a tok::verbatim_line_text if verbatim line
  // starting command comes just before a newline or comment end.
  if (Tok.is(tok::verbatim_line_text)) {
    TextBegin = Tok.getLocation();
    Text = Tok.getVerbatimLineText();
  } else {
    TextBegin = NameTok.getEndLocation();
    Text = "";
  }

  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
                                                NameTok.getVerbatimLineName(),
                                                TextBegin,
                                                Text);
  consumeToken();
  return VL;
}

BlockContentComment *Parser::parseBlockContent() {
  switch (Tok.getKind()) {
  case tok::text:
  case tok::command:
  case tok::html_tag_open:
  case tok::html_tag_close:
    return parseParagraphOrBlockCommand();

  case tok::verbatim_block_begin:
    return parseVerbatimBlock();

  case tok::verbatim_line_name:
    return parseVerbatimLine();

  case tok::eof:
  case tok::newline:
  case tok::verbatim_block_line:
  case tok::verbatim_block_end:
  case tok::verbatim_line_text:
  case tok::html_ident:
  case tok::html_equals:
  case tok::html_quoted_string:
  case tok::html_greater:
    llvm_unreachable("should not see this token");
  }
  llvm_unreachable("bogus token kind");
}

FullComment *Parser::parseFullComment() {
  // Skip newlines at the beginning of the comment.
  while (Tok.is(tok::newline))
    consumeToken();

  SmallVector<BlockContentComment *, 8> Blocks;
  while (Tok.isNot(tok::eof)) {
    Blocks.push_back(parseBlockContent());

    // Skip extra newlines after paragraph end.
    while (Tok.is(tok::newline))
      consumeToken();
  }
  return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
}

} // end namespace comments
} // end namespace clang
Implement AST classes for comments, a real parser for Doxygen comments and a very simple semantic analysis that just builds the AST; minor changes for lexer to pick up source locations I didn't think about before. Comments AST is modelled along the ideas of HTML AST: block and inline content. * Block content is a paragraph or a command that has a paragraph as an argument or verbatim command. * Inline content is placed within some block. Inline content includes plain text, inline commands and HTML as tag soup. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8 2012-07-06 04:28:32 +04:00			`//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "clang/AST/CommentParser.h"`
			`#include "clang/AST/CommentSema.h"`
			`#include "llvm/Support/ErrorHandling.h"`

			`namespace clang {`
			`namespace comments {`

			`Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):`
			`L(L), S(S), Allocator(Allocator) {`
			`consumeToken();`
			`}`

			`ParamCommandComment *Parser::parseParamCommandArgs(`
			`ParamCommandComment *PC,`
			`TextTokenRetokenizer &Retokenizer) {`
			`Token Arg;`
			`// Check if argument looks like direction specification: [dir]`
			`// e.g., [in], [out], [in,out]`
			`if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))`
			`PC = S.actOnParamCommandArg(PC,`
			`Arg.getLocation(),`
			`Arg.getEndLocation(),`
			`Arg.getText(),`
			`/* IsDirection = */ true);`

			`if (Retokenizer.lexWord(Arg))`
			`PC = S.actOnParamCommandArg(PC,`
			`Arg.getLocation(),`
			`Arg.getEndLocation(),`
			`Arg.getText(),`
			`/* IsDirection = */ false);`

			`return PC;`
			`}`

			`BlockCommandComment *Parser::parseBlockCommandArgs(`
			`BlockCommandComment *BC,`
			`TextTokenRetokenizer &Retokenizer,`
			`unsigned NumArgs) {`
			`typedef BlockCommandComment::Argument Argument;`
Stop using new[] on llvm::BumpPtrAllocator. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159833 91177308-0d34-0410-b5e6-96231b3b80d8 2012-07-06 20:41:59 +04:00			`Argument *Args =`
			`new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];`
Implement AST classes for comments, a real parser for Doxygen comments and a very simple semantic analysis that just builds the AST; minor changes for lexer to pick up source locations I didn't think about before. Comments AST is modelled along the ideas of HTML AST: block and inline content. * Block content is a paragraph or a command that has a paragraph as an argument or verbatim command. * Inline content is placed within some block. Inline content includes plain text, inline commands and HTML as tag soup. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8 2012-07-06 04:28:32 +04:00			`unsigned ParsedArgs = 0;`
			`Token Arg;`
			`while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {`
			`Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),`
			`Arg.getEndLocation()),`
			`Arg.getText());`
			`ParsedArgs++;`
			`}`

			`return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));`
			`}`

			`BlockCommandComment *Parser::parseBlockCommand() {`
			`assert(Tok.is(tok::command));`

			`ParamCommandComment *PC;`
			`BlockCommandComment *BC;`
			`bool IsParam = false;`
			`unsigned NumArgs = 0;`
			`if (S.isParamCommand(Tok.getCommandName())) {`
			`IsParam = true;`
			`PC = S.actOnParamCommandStart(Tok.getLocation(),`
			`Tok.getEndLocation(),`
			`Tok.getCommandName());`
			`} else {`
			`NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());`
			`BC = S.actOnBlockCommandStart(Tok.getLocation(),`
			`Tok.getEndLocation(),`
			`Tok.getCommandName());`
			`}`
			`consumeToken();`

			`if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {`
			`// Block command ahead. We can't nest block commands, so pretend that this`
			`// command has an empty argument.`
			`// TODO: Diag() Warn empty arg to block command`
			`ParagraphComment *PC = S.actOnParagraphComment(`
			`ArrayRef<InlineContentComment *>());`
			`return S.actOnBlockCommandFinish(BC, PC);`
			`}`

			`if (IsParam \|\| NumArgs > 0) {`
			`// In order to parse command arguments we need to retokenize a few`
			`// following text tokens.`
			`TextTokenRetokenizer Retokenizer(Allocator);`
			`while (Tok.is(tok::text)) {`
			`if (Retokenizer.addToken(Tok))`
			`consumeToken();`
			`}`

			`if (IsParam)`
			`PC = parseParamCommandArgs(PC, Retokenizer);`
			`else`
			`BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);`

			`// Put back tokens we didn't use.`
			`Token Text;`
			`while (Retokenizer.lexText(Text))`
			`putBack(Text);`
			`}`

			`BlockContentComment *Block = parseParagraphOrBlockCommand();`
			`// Since we have checked for a block command, we should have parsed a`
			`// paragraph.`
			`if (IsParam)`
			`return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));`
			`else`
			`return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));`
			`}`

			`InlineCommandComment *Parser::parseInlineCommand() {`
			`assert(Tok.is(tok::command));`

			`const Token CommandTok = Tok;`
			`consumeToken();`

			`TextTokenRetokenizer Retokenizer(Allocator);`
			`while (Tok.is(tok::text)) {`
			`if (Retokenizer.addToken(Tok))`
			`consumeToken();`
			`}`

			`Token ArgTok;`
			`bool ArgTokValid = Retokenizer.lexWord(ArgTok);`

			`InlineCommandComment *IC;`
			`if (ArgTokValid) {`
			`IC = S.actOnInlineCommand(CommandTok.getLocation(),`
			`CommandTok.getEndLocation(),`
			`CommandTok.getCommandName(),`
			`ArgTok.getLocation(),`
			`ArgTok.getEndLocation(),`
			`ArgTok.getText());`
			`} else {`
			`IC = S.actOnInlineCommand(CommandTok.getLocation(),`
			`CommandTok.getEndLocation(),`
			`CommandTok.getCommandName());`
			`}`

			`Token Text;`
			`while (Retokenizer.lexText(Text))`
			`putBack(Text);`

			`return IC;`
			`}`

			`HTMLOpenTagComment *Parser::parseHTMLOpenTag() {`
			`assert(Tok.is(tok::html_tag_open));`
			`HTMLOpenTagComment *HOT =`
			`S.actOnHTMLOpenTagStart(Tok.getLocation(),`
			`Tok.getHTMLTagOpenName());`
			`consumeToken();`

			`SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;`
			`while (true) {`
			`if (Tok.is(tok::html_ident)) {`
			`Token Ident = Tok;`
			`consumeToken();`
			`if (Tok.isNot(tok::html_equals)) {`
			`Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),`
			`Ident.getHTMLIdent()));`
			`continue;`
			`}`
			`Token Equals = Tok;`
			`consumeToken();`
			`if (Tok.isNot(tok::html_quoted_string)) {`
			`// TODO: Diag() expected quoted string`
			`Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),`
			`Ident.getHTMLIdent()));`
			`continue;`
			`}`
			`Attrs.push_back(HTMLOpenTagComment::Attribute(`
			`Ident.getLocation(),`
			`Ident.getHTMLIdent(),`
			`Equals.getLocation(),`
			`SourceRange(Tok.getLocation(),`
			`Tok.getEndLocation()),`
			`Tok.getHTMLQuotedString()));`
			`consumeToken();`
			`continue;`
			`} else if (Tok.is(tok::html_greater)) {`
			`HOT = S.actOnHTMLOpenTagFinish(HOT,`
			`copyArray(llvm::makeArrayRef(Attrs)),`
			`Tok.getLocation());`
			`consumeToken();`
			`return HOT;`
			`} else if (Tok.is(tok::html_equals) \|\|`
			`Tok.is(tok::html_quoted_string)) {`
			`// TODO: Diag() Err expected ident`
			`while (Tok.is(tok::html_equals) \|\|`
			`Tok.is(tok::html_quoted_string))`
			`consumeToken();`
			`} else {`
			`// Not a token from HTML open tag. Thus HTML tag prematurely ended.`
			`// TODO: Diag() Err HTML tag prematurely ended`
			`return S.actOnHTMLOpenTagFinish(HOT,`
			`copyArray(llvm::makeArrayRef(Attrs)),`
			`SourceLocation());`
			`}`
			`}`
			`}`

			`HTMLCloseTagComment *Parser::parseHTMLCloseTag() {`
			`assert(Tok.is(tok::html_tag_close));`
			`Token TokTagOpen = Tok;`
			`consumeToken();`
			`SourceLocation Loc;`
			`if (Tok.is(tok::html_greater)) {`
			`Loc = Tok.getLocation();`
			`consumeToken();`
			`}`

			`return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),`
			`Loc,`
			`TokTagOpen.getHTMLTagCloseName());`
			`}`

			`BlockContentComment *Parser::parseParagraphOrBlockCommand() {`
			`SmallVector<InlineContentComment *, 8> Content;`

			`while (true) {`
			`switch (Tok.getKind()) {`
			`case tok::verbatim_block_begin:`
			`case tok::verbatim_line_name:`
			`case tok::eof:`
			`assert(Content.size() != 0);`
			`break; // Block content or EOF ahead, finish this parapgaph.`

			`case tok::command:`
			`if (S.isBlockCommand(Tok.getCommandName())) {`
			`if (Content.size() == 0)`
			`return parseBlockCommand();`
			`break; // Block command ahead, finish this parapgaph.`
			`}`
			`if (S.isInlineCommand(Tok.getCommandName())) {`
			`Content.push_back(parseInlineCommand());`
			`continue;`
			`}`

			`// Not a block command, not an inline command ==> an unknown command.`
			`Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),`
			`Tok.getEndLocation(),`
			`Tok.getCommandName()));`
			`consumeToken();`
			`continue;`

			`case tok::newline: {`
			`consumeToken();`
			`if (Tok.is(tok::newline) \|\| Tok.is(tok::eof)) {`
			`consumeToken();`
			`break; // Two newlines -- end of paragraph.`
			`}`
			`if (Content.size() > 0)`
			`Content.back()->addTrailingNewline();`
			`continue;`
			`}`

			`// Don't deal with HTML tag soup now.`
			`case tok::html_tag_open:`
			`Content.push_back(parseHTMLOpenTag());`
			`continue;`

			`case tok::html_tag_close:`
			`Content.push_back(parseHTMLCloseTag());`
			`continue;`

			`case tok::text:`
			`Content.push_back(S.actOnText(Tok.getLocation(),`
			`Tok.getEndLocation(),`
			`Tok.getText()));`
			`consumeToken();`
			`continue;`

			`case tok::verbatim_block_line:`
			`case tok::verbatim_block_end:`
			`case tok::verbatim_line_text:`
			`case tok::html_ident:`
			`case tok::html_equals:`
			`case tok::html_quoted_string:`
			`case tok::html_greater:`
			`llvm_unreachable("should not see this token");`
			`}`
			`break;`
			`}`

			`return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));`
			`}`

			`VerbatimBlockComment *Parser::parseVerbatimBlock() {`
			`assert(Tok.is(tok::verbatim_block_begin));`

			`VerbatimBlockComment *VB =`
			`S.actOnVerbatimBlockStart(Tok.getLocation(),`
			`Tok.getVerbatimBlockName());`
			`consumeToken();`

			`// Don't create an empty line if verbatim opening command is followed`
			`// by a newline.`
			`if (Tok.is(tok::newline))`
			`consumeToken();`

			`SmallVector<VerbatimBlockLineComment *, 8> Lines;`
			`while (Tok.is(tok::verbatim_block_line) \|\|`
			`Tok.is(tok::newline)) {`
			`VerbatimBlockLineComment *Line;`
			`if (Tok.is(tok::verbatim_block_line)) {`
			`Line = S.actOnVerbatimBlockLine(Tok.getLocation(),`
			`Tok.getVerbatimBlockText());`
			`consumeToken();`
			`if (Tok.is(tok::newline)) {`
			`consumeToken();`
			`}`
			`} else {`
			`// Empty line, just a tok::newline.`
			`Line = S.actOnVerbatimBlockLine(Tok.getLocation(),`
			`"");`
			`consumeToken();`
			`}`
			`Lines.push_back(Line);`
			`}`

			`assert(Tok.is(tok::verbatim_block_end));`
			`VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),`
			`Tok.getVerbatimBlockName(),`
			`copyArray(llvm::makeArrayRef(Lines)));`
			`consumeToken();`

			`return VB;`
			`}`

			`VerbatimLineComment *Parser::parseVerbatimLine() {`
			`assert(Tok.is(tok::verbatim_line_name));`

			`Token NameTok = Tok;`
			`consumeToken();`

			`SourceLocation TextBegin;`
			`StringRef Text;`
			`// Next token might not be a tok::verbatim_line_text if verbatim line`
			`// starting command comes just before a newline or comment end.`
			`if (Tok.is(tok::verbatim_line_text)) {`
			`TextBegin = Tok.getLocation();`
			`Text = Tok.getVerbatimLineText();`
			`} else {`
			`TextBegin = NameTok.getEndLocation();`
			`Text = "";`
			`}`

			`VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),`
			`NameTok.getVerbatimLineName(),`
			`TextBegin,`
			`Text);`
			`consumeToken();`
			`return VL;`
			`}`

			`BlockContentComment *Parser::parseBlockContent() {`
			`switch (Tok.getKind()) {`
			`case tok::text:`
			`case tok::command:`
			`case tok::html_tag_open:`
			`case tok::html_tag_close:`
			`return parseParagraphOrBlockCommand();`

			`case tok::verbatim_block_begin:`
			`return parseVerbatimBlock();`

			`case tok::verbatim_line_name:`
			`return parseVerbatimLine();`

			`case tok::eof:`
			`case tok::newline:`
			`case tok::verbatim_block_line:`
			`case tok::verbatim_block_end:`
			`case tok::verbatim_line_text:`
			`case tok::html_ident:`
			`case tok::html_equals:`
			`case tok::html_quoted_string:`
			`case tok::html_greater:`
			`llvm_unreachable("should not see this token");`
			`}`
Sprinkle llvm_unreachable around to placate GCC's -Wreturn-type. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159860 91177308-0d34-0410-b5e6-96231b3b80d8 2012-07-07 01:13:09 +04:00			`llvm_unreachable("bogus token kind");`
Implement AST classes for comments, a real parser for Doxygen comments and a very simple semantic analysis that just builds the AST; minor changes for lexer to pick up source locations I didn't think about before. Comments AST is modelled along the ideas of HTML AST: block and inline content. * Block content is a paragraph or a command that has a paragraph as an argument or verbatim command. * Inline content is placed within some block. Inline content includes plain text, inline commands and HTML as tag soup. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8 2012-07-06 04:28:32 +04:00			`}`

			`FullComment *Parser::parseFullComment() {`
			`// Skip newlines at the beginning of the comment.`
			`while (Tok.is(tok::newline))`
			`consumeToken();`

			`SmallVector<BlockContentComment *, 8> Blocks;`
			`while (Tok.isNot(tok::eof)) {`
			`Blocks.push_back(parseBlockContent());`

			`// Skip extra newlines after paragraph end.`
			`while (Tok.is(tok::newline))`
			`consumeToken();`
			`}`
			`return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));`
			`}`

			`} // end namespace comments`
			`} // end namespace clang`