clang-1/lib/AST/CommentParser.cpp

414 строки
13 KiB
C++
Исходник Обычный вид История

//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/CommentParser.h"
#include "clang/AST/CommentSema.h"
#include "llvm/Support/ErrorHandling.h"
namespace clang {
namespace comments {
Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
L(L), S(S), Allocator(Allocator) {
consumeToken();
}
ParamCommandComment *Parser::parseParamCommandArgs(
ParamCommandComment *PC,
TextTokenRetokenizer &Retokenizer) {
Token Arg;
// Check if argument looks like direction specification: [dir]
// e.g., [in], [out], [in,out]
if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
PC = S.actOnParamCommandArg(PC,
Arg.getLocation(),
Arg.getEndLocation(),
Arg.getText(),
/* IsDirection = */ true);
if (Retokenizer.lexWord(Arg))
PC = S.actOnParamCommandArg(PC,
Arg.getLocation(),
Arg.getEndLocation(),
Arg.getText(),
/* IsDirection = */ false);
return PC;
}
BlockCommandComment *Parser::parseBlockCommandArgs(
BlockCommandComment *BC,
TextTokenRetokenizer &Retokenizer,
unsigned NumArgs) {
typedef BlockCommandComment::Argument Argument;
Argument *Args = new (Allocator) Argument[NumArgs];
unsigned ParsedArgs = 0;
Token Arg;
while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
Arg.getEndLocation()),
Arg.getText());
ParsedArgs++;
}
return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
}
BlockCommandComment *Parser::parseBlockCommand() {
assert(Tok.is(tok::command));
ParamCommandComment *PC;
BlockCommandComment *BC;
bool IsParam = false;
unsigned NumArgs = 0;
if (S.isParamCommand(Tok.getCommandName())) {
IsParam = true;
PC = S.actOnParamCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandName());
} else {
NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
BC = S.actOnBlockCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandName());
}
consumeToken();
if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
// Block command ahead. We can't nest block commands, so pretend that this
// command has an empty argument.
// TODO: Diag() Warn empty arg to block command
ParagraphComment *PC = S.actOnParagraphComment(
ArrayRef<InlineContentComment *>());
return S.actOnBlockCommandFinish(BC, PC);
}
if (IsParam || NumArgs > 0) {
// In order to parse command arguments we need to retokenize a few
// following text tokens.
TextTokenRetokenizer Retokenizer(Allocator);
while (Tok.is(tok::text)) {
if (Retokenizer.addToken(Tok))
consumeToken();
}
if (IsParam)
PC = parseParamCommandArgs(PC, Retokenizer);
else
BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
// Put back tokens we didn't use.
Token Text;
while (Retokenizer.lexText(Text))
putBack(Text);
}
BlockContentComment *Block = parseParagraphOrBlockCommand();
// Since we have checked for a block command, we should have parsed a
// paragraph.
if (IsParam)
return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
else
return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
}
InlineCommandComment *Parser::parseInlineCommand() {
assert(Tok.is(tok::command));
const Token CommandTok = Tok;
consumeToken();
TextTokenRetokenizer Retokenizer(Allocator);
while (Tok.is(tok::text)) {
if (Retokenizer.addToken(Tok))
consumeToken();
}
Token ArgTok;
bool ArgTokValid = Retokenizer.lexWord(ArgTok);
InlineCommandComment *IC;
if (ArgTokValid) {
IC = S.actOnInlineCommand(CommandTok.getLocation(),
CommandTok.getEndLocation(),
CommandTok.getCommandName(),
ArgTok.getLocation(),
ArgTok.getEndLocation(),
ArgTok.getText());
} else {
IC = S.actOnInlineCommand(CommandTok.getLocation(),
CommandTok.getEndLocation(),
CommandTok.getCommandName());
}
Token Text;
while (Retokenizer.lexText(Text))
putBack(Text);
return IC;
}
HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
assert(Tok.is(tok::html_tag_open));
HTMLOpenTagComment *HOT =
S.actOnHTMLOpenTagStart(Tok.getLocation(),
Tok.getHTMLTagOpenName());
consumeToken();
SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
while (true) {
if (Tok.is(tok::html_ident)) {
Token Ident = Tok;
consumeToken();
if (Tok.isNot(tok::html_equals)) {
Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
Ident.getHTMLIdent()));
continue;
}
Token Equals = Tok;
consumeToken();
if (Tok.isNot(tok::html_quoted_string)) {
// TODO: Diag() expected quoted string
Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
Ident.getHTMLIdent()));
continue;
}
Attrs.push_back(HTMLOpenTagComment::Attribute(
Ident.getLocation(),
Ident.getHTMLIdent(),
Equals.getLocation(),
SourceRange(Tok.getLocation(),
Tok.getEndLocation()),
Tok.getHTMLQuotedString()));
consumeToken();
continue;
} else if (Tok.is(tok::html_greater)) {
HOT = S.actOnHTMLOpenTagFinish(HOT,
copyArray(llvm::makeArrayRef(Attrs)),
Tok.getLocation());
consumeToken();
return HOT;
} else if (Tok.is(tok::html_equals) ||
Tok.is(tok::html_quoted_string)) {
// TODO: Diag() Err expected ident
while (Tok.is(tok::html_equals) ||
Tok.is(tok::html_quoted_string))
consumeToken();
} else {
// Not a token from HTML open tag. Thus HTML tag prematurely ended.
// TODO: Diag() Err HTML tag prematurely ended
return S.actOnHTMLOpenTagFinish(HOT,
copyArray(llvm::makeArrayRef(Attrs)),
SourceLocation());
}
}
}
HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
assert(Tok.is(tok::html_tag_close));
Token TokTagOpen = Tok;
consumeToken();
SourceLocation Loc;
if (Tok.is(tok::html_greater)) {
Loc = Tok.getLocation();
consumeToken();
}
return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
Loc,
TokTagOpen.getHTMLTagCloseName());
}
BlockContentComment *Parser::parseParagraphOrBlockCommand() {
SmallVector<InlineContentComment *, 8> Content;
while (true) {
switch (Tok.getKind()) {
case tok::verbatim_block_begin:
case tok::verbatim_line_name:
case tok::eof:
assert(Content.size() != 0);
break; // Block content or EOF ahead, finish this parapgaph.
case tok::command:
if (S.isBlockCommand(Tok.getCommandName())) {
if (Content.size() == 0)
return parseBlockCommand();
break; // Block command ahead, finish this parapgaph.
}
if (S.isInlineCommand(Tok.getCommandName())) {
Content.push_back(parseInlineCommand());
continue;
}
// Not a block command, not an inline command ==> an unknown command.
Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandName()));
consumeToken();
continue;
case tok::newline: {
consumeToken();
if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
consumeToken();
break; // Two newlines -- end of paragraph.
}
if (Content.size() > 0)
Content.back()->addTrailingNewline();
continue;
}
// Don't deal with HTML tag soup now.
case tok::html_tag_open:
Content.push_back(parseHTMLOpenTag());
continue;
case tok::html_tag_close:
Content.push_back(parseHTMLCloseTag());
continue;
case tok::text:
Content.push_back(S.actOnText(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getText()));
consumeToken();
continue;
case tok::verbatim_block_line:
case tok::verbatim_block_end:
case tok::verbatim_line_text:
case tok::html_ident:
case tok::html_equals:
case tok::html_quoted_string:
case tok::html_greater:
llvm_unreachable("should not see this token");
}
break;
}
return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
}
VerbatimBlockComment *Parser::parseVerbatimBlock() {
assert(Tok.is(tok::verbatim_block_begin));
VerbatimBlockComment *VB =
S.actOnVerbatimBlockStart(Tok.getLocation(),
Tok.getVerbatimBlockName());
consumeToken();
// Don't create an empty line if verbatim opening command is followed
// by a newline.
if (Tok.is(tok::newline))
consumeToken();
SmallVector<VerbatimBlockLineComment *, 8> Lines;
while (Tok.is(tok::verbatim_block_line) ||
Tok.is(tok::newline)) {
VerbatimBlockLineComment *Line;
if (Tok.is(tok::verbatim_block_line)) {
Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
Tok.getVerbatimBlockText());
consumeToken();
if (Tok.is(tok::newline)) {
consumeToken();
}
} else {
// Empty line, just a tok::newline.
Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
"");
consumeToken();
}
Lines.push_back(Line);
}
assert(Tok.is(tok::verbatim_block_end));
VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
Tok.getVerbatimBlockName(),
copyArray(llvm::makeArrayRef(Lines)));
consumeToken();
return VB;
}
VerbatimLineComment *Parser::parseVerbatimLine() {
assert(Tok.is(tok::verbatim_line_name));
Token NameTok = Tok;
consumeToken();
SourceLocation TextBegin;
StringRef Text;
// Next token might not be a tok::verbatim_line_text if verbatim line
// starting command comes just before a newline or comment end.
if (Tok.is(tok::verbatim_line_text)) {
TextBegin = Tok.getLocation();
Text = Tok.getVerbatimLineText();
} else {
TextBegin = NameTok.getEndLocation();
Text = "";
}
VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
NameTok.getVerbatimLineName(),
TextBegin,
Text);
consumeToken();
return VL;
}
BlockContentComment *Parser::parseBlockContent() {
switch (Tok.getKind()) {
case tok::text:
case tok::command:
case tok::html_tag_open:
case tok::html_tag_close:
return parseParagraphOrBlockCommand();
case tok::verbatim_block_begin:
return parseVerbatimBlock();
case tok::verbatim_line_name:
return parseVerbatimLine();
case tok::eof:
case tok::newline:
case tok::verbatim_block_line:
case tok::verbatim_block_end:
case tok::verbatim_line_text:
case tok::html_ident:
case tok::html_equals:
case tok::html_quoted_string:
case tok::html_greater:
llvm_unreachable("should not see this token");
}
}
FullComment *Parser::parseFullComment() {
// Skip newlines at the beginning of the comment.
while (Tok.is(tok::newline))
consumeToken();
SmallVector<BlockContentComment *, 8> Blocks;
while (Tok.isNot(tok::eof)) {
Blocks.push_back(parseBlockContent());
// Skip extra newlines after paragraph end.
while (Tok.is(tok::newline))
consumeToken();
}
return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
}
} // end namespace comments
} // end namespace clang