From 32d28ee6061930b2a9c170cbaaeea028c88a7b89 Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Tue, 29 Jan 2013 21:01:14 +0000 Subject: [PATCH] Move the token annotator into separate files. No functional changes. Also removed experimental-warning from all of clang-format's files, as it is no longer accurate. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173830 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Format/Format.h | 3 - lib/Format/CMakeLists.txt | 1 + lib/Format/Format.cpp | 1031 +--------------------------- lib/Format/TokenAnnotator.cpp | 875 +++++++++++++++++++++++ lib/Format/TokenAnnotator.h | 228 ++++++ lib/Format/UnwrappedLineParser.cpp | 3 - lib/Format/UnwrappedLineParser.h | 3 - 7 files changed, 1105 insertions(+), 1039 deletions(-) create mode 100644 lib/Format/TokenAnnotator.cpp create mode 100644 lib/Format/TokenAnnotator.h diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h index d34a04306e..451b6183e2 100644 --- a/include/clang/Format/Format.h +++ b/include/clang/Format/Format.h @@ -10,9 +10,6 @@ /// \file /// Various functions to configurably format source code. /// -/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, -/// where it can be used to format real code. -/// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_FORMAT_FORMAT_H diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index 4b14e91414..d8630eeeea 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat + TokenAnnotator.cpp UnwrappedLineParser.cpp Format.cpp ) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index ecd6f5d385..9dfde34e8b 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -11,13 +11,11 @@ /// \brief This file implements functions declared in Format.h. This will be /// split into separate files as we go. /// -/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, -/// where it can be used to format real code. -/// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "format-formatter" +#include "TokenAnnotator.h" #include "UnwrappedLineParser.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/OperatorPrecedence.h" @@ -34,140 +32,6 @@ namespace clang { namespace format { -enum TokenType { - TT_BinaryOperator, - TT_BlockComment, - TT_CastRParen, - TT_ConditionalExpr, - TT_CtorInitializerColon, - TT_ImplicitStringLiteral, - TT_LineComment, - TT_ObjCBlockLParen, - TT_ObjCDecl, - TT_ObjCMethodSpecifier, - TT_ObjCMethodExpr, - TT_ObjCProperty, - TT_OverloadedOperator, - TT_PointerOrReference, - TT_PureVirtualSpecifier, - TT_RangeBasedForLoopColon, - TT_StartOfName, - TT_TemplateCloser, - TT_TemplateOpener, - TT_TrailingUnaryOperator, - TT_UnaryOperator, - TT_Unknown -}; - -enum LineType { - LT_Invalid, - LT_Other, - LT_BuilderTypeCall, - LT_PreprocessorDirective, - LT_VirtualFunctionDecl, - LT_ObjCDecl, // An @interface, @implementation, or @protocol line. - LT_ObjCMethodDecl, - LT_ObjCProperty // An @property line. -}; - -class AnnotatedToken { -public: - explicit AnnotatedToken(const FormatToken &FormatTok) - : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false), - CanBreakBefore(false), MustBreakBefore(false), - ClosesTemplateDeclaration(false), MatchingParen(NULL), - ParameterCount(1), Parent(NULL) { - } - - bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } - bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } - - bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { - return FormatTok.Tok.isObjCAtKeyword(Kind); - } - - FormatToken FormatTok; - - TokenType Type; - - bool SpaceRequiredBefore; - bool CanBreakBefore; - bool MustBreakBefore; - - bool ClosesTemplateDeclaration; - - AnnotatedToken *MatchingParen; - - /// \brief Number of parameters, if this is "(", "[" or "<". - /// - /// This is initialized to 1 as we don't need to distinguish functions with - /// 0 parameters from functions with 1 parameter. Thus, we can simply count - /// the number of commas. - unsigned ParameterCount; - - /// \brief The total length of the line up to and including this token. - unsigned TotalLength; - - /// \brief Penalty for inserting a line break before this token. - unsigned SplitPenalty; - - std::vector Children; - AnnotatedToken *Parent; - - const AnnotatedToken *getPreviousNoneComment() const { - AnnotatedToken *Tok = Parent; - while (Tok != NULL && Tok->is(tok::comment)) - Tok = Tok->Parent; - return Tok; - } -}; - -class AnnotatedLine { -public: - AnnotatedLine(const UnwrappedLine &Line) - : First(Line.Tokens.front()), Level(Line.Level), - InPPDirective(Line.InPPDirective), - MustBeDeclaration(Line.MustBeDeclaration) { - assert(!Line.Tokens.empty()); - AnnotatedToken *Current = &First; - for (std::list::const_iterator I = ++Line.Tokens.begin(), - E = Line.Tokens.end(); - I != E; ++I) { - Current->Children.push_back(AnnotatedToken(*I)); - Current->Children[0].Parent = Current; - Current = &Current->Children[0]; - } - Last = Current; - } - AnnotatedLine(const AnnotatedLine &Other) - : First(Other.First), Type(Other.Type), Level(Other.Level), - InPPDirective(Other.InPPDirective), - MustBeDeclaration(Other.MustBeDeclaration) { - Last = &First; - while (!Last->Children.empty()) { - Last->Children[0].Parent = Last; - Last = &Last->Children[0]; - } - } - - AnnotatedToken First; - AnnotatedToken *Last; - - LineType Type; - unsigned Level; - bool InPPDirective; - bool MustBeDeclaration; -}; - -static prec::Level getPrecedence(const AnnotatedToken &Tok) { - return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); -} - -bool isBinaryOperator(const AnnotatedToken &Tok) { - // Comma is a binary operator, but does not behave as such wrt. formatting. - return getPrecedence(Tok) > prec::Comma; -} - FormatStyle getLLVMStyle() { FormatStyle LLVMStyle; LLVMStyle.ColumnLimit = 80; @@ -338,15 +202,6 @@ private: tooling::Replacements Replaces; }; -/// \brief Returns if a token is an Objective-C selector name. -/// -/// For example, "bar" is a selector name in [foo bar:(4 + 5)]. -static bool isObjCSelectorName(const AnnotatedToken &Tok) { - return Tok.is(tok::identifier) && !Tok.Children.empty() && - Tok.Children[0].is(tok::colon) && - Tok.Children[0].Type == TT_ObjCMethodExpr; -} - class UnwrappedLineFormatter { public: UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, @@ -813,890 +668,6 @@ private: OptimizationParameters Parameters; }; -/// \brief Determines extra information about the tokens comprising an -/// \c UnwrappedLine. -class TokenAnnotator { -public: - TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, - AnnotatedLine &Line) - : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) { - } - - /// \brief A parser that gathers additional information about tokens. - /// - /// The \c TokenAnnotator tries to matches parenthesis and square brakets and - /// store a parenthesis levels. It also tries to resolve matching "<" and ">" - /// into template parameter lists. - class AnnotatingParser { - public: - AnnotatingParser(AnnotatedToken &RootToken) - : CurrentToken(&RootToken), KeywordVirtualFound(false), - ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) { - } - - /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr. - struct ObjCSelectorRAII { - AnnotatingParser &P; - bool ColonWasObjCMethodExpr; - - ObjCSelectorRAII(AnnotatingParser &P) - : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) { - } - - ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; } - - void markStart(AnnotatedToken &Left) { - P.ColonIsObjCMethodExpr = true; - Left.Type = TT_ObjCMethodExpr; - } - - void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; } - }; - - bool parseAngle() { - if (CurrentToken == NULL) - return false; - AnnotatedToken *Left = CurrentToken->Parent; - while (CurrentToken != NULL) { - if (CurrentToken->is(tok::greater)) { - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - CurrentToken->Type = TT_TemplateCloser; - next(); - return true; - } - if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || - CurrentToken->is(tok::r_brace)) - return false; - if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || - CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) - return false; - if (CurrentToken->is(tok::comma)) - ++Left->ParameterCount; - if (!consumeToken()) - return false; - } - return false; - } - - bool parseParens(bool LookForDecls = false) { - if (CurrentToken == NULL) - return false; - bool StartsObjCMethodExpr = false; - AnnotatedToken *Left = CurrentToken->Parent; - if (CurrentToken->is(tok::caret)) { - // ^( starts a block. - Left->Type = TT_ObjCBlockLParen; - } else if (AnnotatedToken *MaybeSel = Left->Parent) { - // @selector( starts a selector. - if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && - MaybeSel->Parent->is(tok::at)) { - StartsObjCMethodExpr = true; - } - } - - ObjCSelectorRAII objCSelector(*this); - if (StartsObjCMethodExpr) - objCSelector.markStart(*Left); - - while (CurrentToken != NULL) { - // LookForDecls is set when "if (" has been seen. Check for - // 'identifier' '*' 'identifier' followed by not '=' -- this - // '*' has to be a binary operator but determineStarAmpUsage() will - // categorize it as an unary operator, so set the right type here. - if (LookForDecls && !CurrentToken->Children.empty()) { - AnnotatedToken &Prev = *CurrentToken->Parent; - AnnotatedToken &Next = CurrentToken->Children[0]; - if (Prev.Parent->is(tok::identifier) && - (Prev.is(tok::star) || Prev.is(tok::amp)) && - CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { - Prev.Type = TT_BinaryOperator; - LookForDecls = false; - } - } - - if (CurrentToken->is(tok::r_paren)) { - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - - if (StartsObjCMethodExpr) - objCSelector.markEnd(*CurrentToken); - - next(); - return true; - } - if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) - return false; - if (CurrentToken->is(tok::comma)) - ++Left->ParameterCount; - if (!consumeToken()) - return false; - } - return false; - } - - bool parseSquare() { - if (!CurrentToken) - return false; - - // A '[' could be an index subscript (after an indentifier or after - // ')' or ']'), or it could be the start of an Objective-C method - // expression. - AnnotatedToken *Left = CurrentToken->Parent; - bool StartsObjCMethodExpr = - !Left->Parent || Left->Parent->is(tok::colon) || - Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) || - Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) || - getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, - true) > prec::Unknown; - - ObjCSelectorRAII objCSelector(*this); - if (StartsObjCMethodExpr) - objCSelector.markStart(*Left); - - while (CurrentToken != NULL) { - if (CurrentToken->is(tok::r_square)) { - if (!CurrentToken->Children.empty() && - CurrentToken->Children[0].is(tok::l_paren)) { - // An ObjC method call can't be followed by an open parenthesis. - // FIXME: Do we incorrectly label ":" with this? - StartsObjCMethodExpr = false; - Left->Type = TT_Unknown; - } - if (StartsObjCMethodExpr) - objCSelector.markEnd(*CurrentToken); - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - next(); - return true; - } - if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) - return false; - if (CurrentToken->is(tok::comma)) - ++Left->ParameterCount; - if (!consumeToken()) - return false; - } - return false; - } - - bool parseBrace() { - // Lines are fine to end with '{'. - if (CurrentToken == NULL) - return true; - AnnotatedToken *Left = CurrentToken->Parent; - while (CurrentToken != NULL) { - if (CurrentToken->is(tok::r_brace)) { - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - next(); - return true; - } - if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) - return false; - if (!consumeToken()) - return false; - } - return true; - } - - bool parseConditional() { - while (CurrentToken != NULL) { - if (CurrentToken->is(tok::colon)) { - CurrentToken->Type = TT_ConditionalExpr; - next(); - return true; - } - if (!consumeToken()) - return false; - } - return false; - } - - bool parseTemplateDeclaration() { - if (CurrentToken != NULL && CurrentToken->is(tok::less)) { - CurrentToken->Type = TT_TemplateOpener; - next(); - if (!parseAngle()) - return false; - CurrentToken->Parent->ClosesTemplateDeclaration = true; - return true; - } - return false; - } - - bool consumeToken() { - AnnotatedToken *Tok = CurrentToken; - next(); - switch (Tok->FormatTok.Tok.getKind()) { - case tok::plus: - case tok::minus: - // At the start of the line, +/- specific ObjectiveC method - // declarations. - if (Tok->Parent == NULL) - Tok->Type = TT_ObjCMethodSpecifier; - break; - case tok::colon: - // Colons from ?: are handled in parseConditional(). - if (Tok->Parent->is(tok::r_paren)) - Tok->Type = TT_CtorInitializerColon; - else if (ColonIsObjCMethodExpr) - Tok->Type = TT_ObjCMethodExpr; - else if (ColonIsForRangeExpr) - Tok->Type = TT_RangeBasedForLoopColon; - break; - case tok::kw_if: - case tok::kw_while: - if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { - next(); - if (!parseParens(/*LookForDecls=*/ true)) - return false; - } - break; - case tok::kw_for: - ColonIsForRangeExpr = true; - next(); - if (!parseParens()) - return false; - break; - case tok::l_paren: - if (!parseParens()) - return false; - break; - case tok::l_square: - if (!parseSquare()) - return false; - break; - case tok::l_brace: - if (!parseBrace()) - return false; - break; - case tok::less: - if (parseAngle()) - Tok->Type = TT_TemplateOpener; - else { - Tok->Type = TT_BinaryOperator; - CurrentToken = Tok; - next(); - } - break; - case tok::r_paren: - case tok::r_square: - return false; - case tok::r_brace: - // Lines can start with '}'. - if (Tok->Parent != NULL) - return false; - break; - case tok::greater: - Tok->Type = TT_BinaryOperator; - break; - case tok::kw_operator: - if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { - CurrentToken->Type = TT_OverloadedOperator; - next(); - if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) { - CurrentToken->Type = TT_OverloadedOperator; - next(); - } - } else { - while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) { - CurrentToken->Type = TT_OverloadedOperator; - next(); - } - } - break; - case tok::question: - parseConditional(); - break; - case tok::kw_template: - parseTemplateDeclaration(); - break; - default: - break; - } - return true; - } - - void parseIncludeDirective() { - next(); - if (CurrentToken != NULL && CurrentToken->is(tok::less)) { - next(); - while (CurrentToken != NULL) { - if (CurrentToken->isNot(tok::comment) || - !CurrentToken->Children.empty()) - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } - } else { - while (CurrentToken != NULL) { - next(); - } - } - } - - void parseWarningOrError() { - next(); - // We still want to format the whitespace left of the first token of the - // warning or error. - next(); - while (CurrentToken != NULL) { - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } - } - - void parsePreprocessorDirective() { - next(); - if (CurrentToken == NULL) - return; - // Hashes in the middle of a line can lead to any strange token - // sequence. - if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) - return; - switch ( - CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { - case tok::pp_include: - case tok::pp_import: - parseIncludeDirective(); - break; - case tok::pp_error: - case tok::pp_warning: - parseWarningOrError(); - break; - default: - break; - } - } - - LineType parseLine() { - int PeriodsAndArrows = 0; - bool CanBeBuilderTypeStmt = true; - if (CurrentToken->is(tok::hash)) { - parsePreprocessorDirective(); - return LT_PreprocessorDirective; - } - while (CurrentToken != NULL) { - if (CurrentToken->is(tok::kw_virtual)) - KeywordVirtualFound = true; - if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow)) - ++PeriodsAndArrows; - if (getPrecedence(*CurrentToken) > prec::Assignment && - CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater)) - CanBeBuilderTypeStmt = false; - if (!consumeToken()) - return LT_Invalid; - } - if (KeywordVirtualFound) - return LT_VirtualFunctionDecl; - - // Assume a builder-type call if there are 2 or more "." and "->". - if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) - return LT_BuilderTypeCall; - - return LT_Other; - } - - void next() { - if (CurrentToken != NULL && !CurrentToken->Children.empty()) - CurrentToken = &CurrentToken->Children[0]; - else - CurrentToken = NULL; - } - - private: - AnnotatedToken *CurrentToken; - bool KeywordVirtualFound; - bool ColonIsObjCMethodExpr; - bool ColonIsForRangeExpr; - }; - - void calculateExtraInformation(AnnotatedToken &Current) { - Current.SpaceRequiredBefore = spaceRequiredBefore(Current); - - if (Current.FormatTok.MustBreakBefore) { - Current.MustBreakBefore = true; - } else { - if (Current.Type == TT_LineComment) { - Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0; - } else if ((Current.Parent->is(tok::comment) && - Current.FormatTok.NewlinesBefore > 0) || - (Current.is(tok::string_literal) && - Current.Parent->is(tok::string_literal))) { - Current.MustBreakBefore = true; - } else { - Current.MustBreakBefore = false; - } - } - Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current); - if (Current.MustBreakBefore) - Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit; - else - Current.TotalLength = - Current.Parent->TotalLength + Current.FormatTok.TokenLength + - (Current.SpaceRequiredBefore ? 1 : 0); - // FIXME: Only calculate this if CanBreakBefore is true once static - // initializers etc. are sorted out. - Current.SplitPenalty = splitPenalty(Current); - if (!Current.Children.empty()) - calculateExtraInformation(Current.Children[0]); - } - - void annotate() { - AnnotatingParser Parser(Line.First); - Line.Type = Parser.parseLine(); - if (Line.Type == LT_Invalid) - return; - - bool LookForFunctionName = Line.MustBeDeclaration; - determineTokenTypes(Line.First, /*IsExpression=*/ false, - LookForFunctionName); - - if (Line.First.Type == TT_ObjCMethodSpecifier) - Line.Type = LT_ObjCMethodDecl; - else if (Line.First.Type == TT_ObjCDecl) - Line.Type = LT_ObjCDecl; - else if (Line.First.Type == TT_ObjCProperty) - Line.Type = LT_ObjCProperty; - - Line.First.SpaceRequiredBefore = true; - Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; - Line.First.CanBreakBefore = Line.First.MustBreakBefore; - - Line.First.TotalLength = Line.First.FormatTok.TokenLength; - if (!Line.First.Children.empty()) - calculateExtraInformation(Line.First.Children[0]); - } - -private: - /// \brief Calculate the penalty for splitting before \c Tok. - unsigned splitPenalty(const AnnotatedToken &Tok) { - const AnnotatedToken &Left = *Tok.Parent; - const AnnotatedToken &Right = Tok; - - if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace)) - return 50; - if (Left.is(tok::equal) && Right.is(tok::l_brace)) - return 150; - if (Left.is(tok::coloncolon)) - return 500; - - if (Left.Type == TT_RangeBasedForLoopColon) - return 5; - - if (Right.is(tok::arrow) || Right.is(tok::period)) { - if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall) - return 5; // Should be smaller than breaking at a nested comma. - return 150; - } - - // In for-loops, prefer breaking at ',' and ';'. - if (Line.First.is(tok::kw_for) && - (Left.isNot(tok::comma) && Left.isNot(tok::semi))) - return 20; - - if (Left.is(tok::semi) || Left.is(tok::comma)) - return 0; - - // In Objective-C method expressions, prefer breaking before "param:" over - // breaking after it. - if (isObjCSelectorName(Right)) - return 0; - if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) - return 20; - - if (Left.is(tok::l_paren)) - return 20; - // FIXME: The penalty for a trailing "<" or "[" being higher than the - // penalty for a trainling "(" is a temporary workaround until we can - // properly avoid breaking in array subscripts or template parameters. - if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener) - return 50; - - if (Left.Type == TT_ConditionalExpr) - return prec::Assignment; - prec::Level Level = getPrecedence(Left); - - if (Level != prec::Unknown) - return Level; - - return 3; - } - - void determineTokenTypes(AnnotatedToken &Current, bool IsExpression, - bool LookForFunctionName) { - if (getPrecedence(Current) == prec::Assignment) { - IsExpression = true; - AnnotatedToken *Previous = Current.Parent; - while (Previous != NULL) { - if (Previous->Type == TT_BinaryOperator && - (Previous->is(tok::star) || Previous->is(tok::amp))) { - Previous->Type = TT_PointerOrReference; - } - Previous = Previous->Parent; - } - } - if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) || - (Current.is(tok::l_paren) && !Line.MustBeDeclaration && - (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for)))) - IsExpression = true; - - if (Current.Type == TT_Unknown) { - if (LookForFunctionName && Current.is(tok::l_paren)) { - findFunctionName(&Current); - LookForFunctionName = false; - } else if (Current.is(tok::star) || Current.is(tok::amp)) { - Current.Type = determineStarAmpUsage(Current, IsExpression); - } else if (Current.is(tok::minus) || Current.is(tok::plus) || - Current.is(tok::caret)) { - Current.Type = determinePlusMinusCaretUsage(Current); - } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { - Current.Type = determineIncrementUsage(Current); - } else if (Current.is(tok::exclaim)) { - Current.Type = TT_UnaryOperator; - } else if (isBinaryOperator(Current)) { - Current.Type = TT_BinaryOperator; - } else if (Current.is(tok::comment)) { - std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, - Lex.getLangOpts())); - if (StringRef(Data).startswith("//")) - Current.Type = TT_LineComment; - else - Current.Type = TT_BlockComment; - } else if (Current.is(tok::r_paren) && - (Current.Parent->Type == TT_PointerOrReference || - Current.Parent->Type == TT_TemplateCloser) && - (Current.Children.empty() || - (Current.Children[0].isNot(tok::equal) && - Current.Children[0].isNot(tok::semi) && - Current.Children[0].isNot(tok::l_brace)))) { - // FIXME: We need to get smarter and understand more cases of casts. - Current.Type = TT_CastRParen; - } else if (Current.is(tok::at) && Current.Children.size()) { - switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; - } - } - } - - if (!Current.Children.empty()) - determineTokenTypes(Current.Children[0], IsExpression, - LookForFunctionName); - } - - /// \brief Starting from \p Current, this searches backwards for an - /// identifier which could be the start of a function name and marks it. - void findFunctionName(AnnotatedToken *Current) { - AnnotatedToken *Parent = Current->Parent; - while (Parent != NULL && Parent->Parent != NULL) { - if (Parent->is(tok::identifier) && - (Parent->Parent->is(tok::identifier) || - Parent->Parent->Type == TT_PointerOrReference || - Parent->Parent->Type == TT_TemplateCloser)) { - Parent->Type = TT_StartOfName; - break; - } - Parent = Parent->Parent; - } - } - - /// \brief Returns the previous token ignoring comments. - const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = Tok.Parent; - while (PrevToken != NULL && PrevToken->is(tok::comment)) - PrevToken = PrevToken->Parent; - return PrevToken; - } - - /// \brief Returns the next token ignoring comments. - const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { - if (Tok.Children.empty()) - return NULL; - const AnnotatedToken *NextToken = &Tok.Children[0]; - while (NextToken->is(tok::comment)) { - if (NextToken->Children.empty()) - return NULL; - NextToken = &NextToken->Children[0]; - } - return NextToken; - } - - /// \brief Return the type of the given token assuming it is * or &. - TokenType - determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { - const AnnotatedToken *PrevToken = getPreviousToken(Tok); - if (PrevToken == NULL) - return TT_UnaryOperator; - - const AnnotatedToken *NextToken = getNextToken(Tok); - if (NextToken == NULL) - return TT_Unknown; - - if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr) - return TT_PointerOrReference; - - if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) || - PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) || - PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) || - PrevToken->Type == TT_BinaryOperator || - PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) - return TT_UnaryOperator; - - if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) || - PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() || - NextToken->is(tok::plus) || NextToken->is(tok::minus) || - NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) || - NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) || - NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) || - NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof)) - return TT_BinaryOperator; - - if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) || - NextToken->is(tok::greater)) - return TT_PointerOrReference; - - // It is very unlikely that we are going to find a pointer or reference type - // definition on the RHS of an assignment. - if (IsExpression) - return TT_BinaryOperator; - - return TT_PointerOrReference; - } - - TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = getPreviousToken(Tok); - if (PrevToken == NULL) - return TT_UnaryOperator; - - // Use heuristics to recognize unary operators. - if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) || - PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) || - PrevToken->is(tok::question) || PrevToken->is(tok::colon) || - PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) || - PrevToken->is(tok::at) || PrevToken->is(tok::l_brace)) - return TT_UnaryOperator; - - // There can't be to consecutive binary operators. - if (PrevToken->Type == TT_BinaryOperator) - return TT_UnaryOperator; - - // Fall back to marking the token as binary operator. - return TT_BinaryOperator; - } - - /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. - TokenType determineIncrementUsage(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = getPreviousToken(Tok); - if (PrevToken == NULL) - return TT_UnaryOperator; - if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) || - PrevToken->is(tok::identifier)) - return TT_TrailingUnaryOperator; - - return TT_UnaryOperator; - } - - bool spaceRequiredBetween(const AnnotatedToken &Left, - const AnnotatedToken &Right) { - if (Right.is(tok::hashhash)) - return Left.is(tok::hash); - if (Left.is(tok::hashhash) || Left.is(tok::hash)) - return Right.is(tok::hash); - if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) - return false; - if (Right.is(tok::less) && - (Left.is(tok::kw_template) || - (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) - return true; - if (Left.is(tok::arrow) || Right.is(tok::arrow)) - return false; - if (Left.is(tok::exclaim) || Left.is(tok::tilde)) - return false; - if (Left.is(tok::at) && - (Right.is(tok::identifier) || Right.is(tok::string_literal) || - Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || - Right.is(tok::l_paren) || Right.is(tok::l_brace) || - Right.is(tok::kw_true) || Right.is(tok::kw_false))) - return false; - if (Left.is(tok::coloncolon)) - return false; - if (Right.is(tok::coloncolon)) - return Left.isNot(tok::identifier) && Left.isNot(tok::greater); - if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) - return false; - if (Right.is(tok::amp) || Right.is(tok::star)) - return Left.FormatTok.Tok.isLiteral() || - (Left.isNot(tok::star) && Left.isNot(tok::amp) && - !Style.PointerAndReferenceBindToType); - if (Left.is(tok::amp) || Left.is(tok::star)) - return Right.FormatTok.Tok.isLiteral() || - Style.PointerAndReferenceBindToType; - if (Right.is(tok::star) && Left.is(tok::l_paren)) - return false; - if (Left.is(tok::l_square) || Right.is(tok::r_square)) - return false; - if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) - return false; - if (Left.is(tok::period) || Right.is(tok::period)) - return false; - if (Left.is(tok::colon)) - return Left.Type != TT_ObjCMethodExpr; - if (Right.is(tok::colon)) - return Right.Type != TT_ObjCMethodExpr; - if (Left.is(tok::l_paren)) - return false; - if (Right.is(tok::l_paren)) { - return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) || - Left.is(tok::kw_for) || Left.is(tok::kw_while) || - Left.is(tok::kw_switch) || Left.is(tok::kw_return) || - Left.is(tok::kw_catch) || Left.is(tok::kw_new) || - Left.is(tok::kw_delete); - } - if (Left.is(tok::at) && - Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) - return false; - if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) - return false; - return true; - } - - bool spaceRequiredBefore(const AnnotatedToken &Tok) { - if (Line.Type == LT_ObjCMethodDecl) { - if (Tok.is(tok::identifier) && !Tok.Children.empty() && - Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) - return true; - if (Tok.is(tok::colon)) - return false; - if (Tok.Parent->Type == TT_ObjCMethodSpecifier) - return true; - if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) - // Don't space between ')' and - return false; - if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren)) - // Don't space between ':' and '(' - return false; - } - if (Line.Type == LT_ObjCProperty && - (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) - return false; - - if (Tok.Parent->is(tok::comma)) - return true; - if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) - return true; - if (Tok.Type == TT_OverloadedOperator) - return Tok.is(tok::identifier) || Tok.is(tok::kw_new) || - Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool); - if (Tok.Parent->Type == TT_OverloadedOperator) - return false; - if (Tok.is(tok::colon)) - return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() && - Tok.Type != TT_ObjCMethodExpr; - if (Tok.Parent->Type == TT_UnaryOperator || - Tok.Parent->Type == TT_CastRParen) - return false; - if (Tok.Type == TT_UnaryOperator) - return Tok.Parent->isNot(tok::l_paren) && - Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) && - (Tok.Parent->isNot(tok::colon) || - Tok.Parent->Type != TT_ObjCMethodExpr); - if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { - return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == - TT_TemplateCloser && Style.SplitTemplateClosingGreater; - } - if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) - return true; - if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) - return false; - if (Tok.is(tok::less) && Line.First.is(tok::hash)) - return true; - if (Tok.Type == TT_TrailingUnaryOperator) - return false; - return spaceRequiredBetween(*Tok.Parent, Tok); - } - - bool canBreakBefore(const AnnotatedToken &Right) { - const AnnotatedToken &Left = *Right.Parent; - if (Line.Type == LT_ObjCMethodDecl) { - if (Right.is(tok::identifier) && !Right.Children.empty() && - Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) - return true; - if (Right.is(tok::identifier) && Left.is(tok::l_paren) && - Left.Parent->is(tok::colon)) - // Don't break this identifier as ':' or identifier - // before it will break. - return false; - if (Right.is(tok::colon) && Left.is(tok::identifier) && - Left.CanBreakBefore) - // Don't break at ':' if identifier before it can beak. - return false; - } - if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine) - return true; - if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) - return false; - if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) - return true; - if (isObjCSelectorName(Right)) - return true; - if (Left.ClosesTemplateDeclaration) - return true; - if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) - return true; - if (Left.Type == TT_RangeBasedForLoopColon) - return true; - if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || - Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || - Left.is(tok::question)) - return false; - if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) - return false; - - if (Right.Type == TT_LineComment) - // We rely on MustBreakBefore being set correctly here as we should not - // change the "binding" behavior of a comment. - return false; - - // Allow breaking after a trailing 'const', e.g. after a method declaration, - // unless it is follow by ';', '{' or '='. - if (Left.is(tok::kw_const) && Left.Parent != NULL && - Left.Parent->is(tok::r_paren)) - return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) && - Right.isNot(tok::equal); - - // We only break before r_brace if there was a corresponding break before - // the l_brace, which is tracked by BreakBeforeClosingBrace. - if (Right.is(tok::r_brace)) - return false; - - if (Right.is(tok::r_paren) || Right.is(tok::greater)) - return false; - return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || - Left.is(tok::comma) || Right.is(tok::lessless) || - Right.is(tok::arrow) || Right.is(tok::period) || - Right.is(tok::colon) || Left.is(tok::coloncolon) || - Left.is(tok::semi) || Left.is(tok::l_brace) || - (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && - Right.is(tok::identifier)) || - (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || - (Left.is(tok::l_square) && !Right.is(tok::r_square)); - } - - FormatStyle Style; - SourceManager &SourceMgr; - Lexer &Lex; - AnnotatedLine &Line; -}; - class LexerBasedFormatTokenSource : public FormatTokenSource { public: LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp new file mode 100644 index 0000000000..eabc8cff28 --- /dev/null +++ b/lib/Format/TokenAnnotator.cpp @@ -0,0 +1,875 @@ +//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a token annotator, i.e. creates +/// \c AnnotatedTokens out of \c FormatTokens with required extra information. +/// +//===----------------------------------------------------------------------===// + +#include "TokenAnnotator.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace format { + +/// \brief Returns if a token is an Objective-C selector name. +/// +/// For example, "bar" is a selector name in [foo bar:(4 + 5)]. +static bool isObjCSelectorName(const AnnotatedToken &Tok) { + return Tok.is(tok::identifier) && !Tok.Children.empty() && + Tok.Children[0].is(tok::colon) && + Tok.Children[0].Type == TT_ObjCMethodExpr; +} + +static bool isBinaryOperator(const AnnotatedToken &Tok) { + // Comma is a binary operator, but does not behave as such wrt. formatting. + return getPrecedence(Tok) > prec::Comma; +} + +/// \brief A parser that gathers additional information about tokens. +/// +/// The \c TokenAnnotator tries to matches parenthesis and square brakets and +/// store a parenthesis levels. It also tries to resolve matching "<" and ">" +/// into template parameter lists. +class AnnotatingParser { +public: + AnnotatingParser(AnnotatedToken &RootToken) + : CurrentToken(&RootToken), KeywordVirtualFound(false), + ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) { + } + + /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr. + struct ObjCSelectorRAII { + AnnotatingParser &P; + bool ColonWasObjCMethodExpr; + + ObjCSelectorRAII(AnnotatingParser &P) + : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) { + } + + ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; } + + void markStart(AnnotatedToken &Left) { + P.ColonIsObjCMethodExpr = true; + Left.Type = TT_ObjCMethodExpr; + } + + void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; } + }; + + bool parseAngle() { + if (CurrentToken == NULL) + return false; + AnnotatedToken *Left = CurrentToken->Parent; + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::greater)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + CurrentToken->Type = TT_TemplateCloser; + next(); + return true; + } + if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || + CurrentToken->is(tok::r_brace)) + return false; + if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || + CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) + return false; + if (CurrentToken->is(tok::comma)) + ++Left->ParameterCount; + if (!consumeToken()) + return false; + } + return false; + } + + bool parseParens(bool LookForDecls = false) { + if (CurrentToken == NULL) + return false; + bool StartsObjCMethodExpr = false; + AnnotatedToken *Left = CurrentToken->Parent; + if (CurrentToken->is(tok::caret)) { + // ^( starts a block. + Left->Type = TT_ObjCBlockLParen; + } else if (AnnotatedToken *MaybeSel = Left->Parent) { + // @selector( starts a selector. + if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && + MaybeSel->Parent->is(tok::at)) { + StartsObjCMethodExpr = true; + } + } + + ObjCSelectorRAII objCSelector(*this); + if (StartsObjCMethodExpr) + objCSelector.markStart(*Left); + + while (CurrentToken != NULL) { + // LookForDecls is set when "if (" has been seen. Check for + // 'identifier' '*' 'identifier' followed by not '=' -- this + // '*' has to be a binary operator but determineStarAmpUsage() will + // categorize it as an unary operator, so set the right type here. + if (LookForDecls && !CurrentToken->Children.empty()) { + AnnotatedToken &Prev = *CurrentToken->Parent; + AnnotatedToken &Next = CurrentToken->Children[0]; + if (Prev.Parent->is(tok::identifier) && + (Prev.is(tok::star) || Prev.is(tok::amp)) && + CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { + Prev.Type = TT_BinaryOperator; + LookForDecls = false; + } + } + + if (CurrentToken->is(tok::r_paren)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + + if (StartsObjCMethodExpr) + objCSelector.markEnd(*CurrentToken); + + next(); + return true; + } + if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) + return false; + if (CurrentToken->is(tok::comma)) + ++Left->ParameterCount; + if (!consumeToken()) + return false; + } + return false; + } + + bool parseSquare() { + if (!CurrentToken) + return false; + + // A '[' could be an index subscript (after an indentifier or after + // ')' or ']'), or it could be the start of an Objective-C method + // expression. + AnnotatedToken *Left = CurrentToken->Parent; + bool StartsObjCMethodExpr = + !Left->Parent || Left->Parent->is(tok::colon) || + Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) || + Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) || + getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) > + prec::Unknown; + + ObjCSelectorRAII objCSelector(*this); + if (StartsObjCMethodExpr) + objCSelector.markStart(*Left); + + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::r_square)) { + if (!CurrentToken->Children.empty() && + CurrentToken->Children[0].is(tok::l_paren)) { + // An ObjC method call can't be followed by an open parenthesis. + // FIXME: Do we incorrectly label ":" with this? + StartsObjCMethodExpr = false; + Left->Type = TT_Unknown; + } + if (StartsObjCMethodExpr) + objCSelector.markEnd(*CurrentToken); + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + next(); + return true; + } + if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) + return false; + if (CurrentToken->is(tok::comma)) + ++Left->ParameterCount; + if (!consumeToken()) + return false; + } + return false; + } + + bool parseBrace() { + // Lines are fine to end with '{'. + if (CurrentToken == NULL) + return true; + AnnotatedToken *Left = CurrentToken->Parent; + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::r_brace)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + next(); + return true; + } + if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) + return false; + if (!consumeToken()) + return false; + } + return true; + } + + bool parseConditional() { + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::colon)) { + CurrentToken->Type = TT_ConditionalExpr; + next(); + return true; + } + if (!consumeToken()) + return false; + } + return false; + } + + bool parseTemplateDeclaration() { + if (CurrentToken != NULL && CurrentToken->is(tok::less)) { + CurrentToken->Type = TT_TemplateOpener; + next(); + if (!parseAngle()) + return false; + CurrentToken->Parent->ClosesTemplateDeclaration = true; + return true; + } + return false; + } + + bool consumeToken() { + AnnotatedToken *Tok = CurrentToken; + next(); + switch (Tok->FormatTok.Tok.getKind()) { + case tok::plus: + case tok::minus: + // At the start of the line, +/- specific ObjectiveC method + // declarations. + if (Tok->Parent == NULL) + Tok->Type = TT_ObjCMethodSpecifier; + break; + case tok::colon: + // Colons from ?: are handled in parseConditional(). + if (Tok->Parent->is(tok::r_paren)) + Tok->Type = TT_CtorInitializerColon; + else if (ColonIsObjCMethodExpr) + Tok->Type = TT_ObjCMethodExpr; + else if (ColonIsForRangeExpr) + Tok->Type = TT_RangeBasedForLoopColon; + break; + case tok::kw_if: + case tok::kw_while: + if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { + next(); + if (!parseParens(/*LookForDecls=*/ true)) + return false; + } + break; + case tok::kw_for: + ColonIsForRangeExpr = true; + next(); + if (!parseParens()) + return false; + break; + case tok::l_paren: + if (!parseParens()) + return false; + break; + case tok::l_square: + if (!parseSquare()) + return false; + break; + case tok::l_brace: + if (!parseBrace()) + return false; + break; + case tok::less: + if (parseAngle()) + Tok->Type = TT_TemplateOpener; + else { + Tok->Type = TT_BinaryOperator; + CurrentToken = Tok; + next(); + } + break; + case tok::r_paren: + case tok::r_square: + return false; + case tok::r_brace: + // Lines can start with '}'. + if (Tok->Parent != NULL) + return false; + break; + case tok::greater: + Tok->Type = TT_BinaryOperator; + break; + case tok::kw_operator: + if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { + CurrentToken->Type = TT_OverloadedOperator; + next(); + if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) { + CurrentToken->Type = TT_OverloadedOperator; + next(); + } + } else { + while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) { + CurrentToken->Type = TT_OverloadedOperator; + next(); + } + } + break; + case tok::question: + parseConditional(); + break; + case tok::kw_template: + parseTemplateDeclaration(); + break; + default: + break; + } + return true; + } + + void parseIncludeDirective() { + next(); + if (CurrentToken != NULL && CurrentToken->is(tok::less)) { + next(); + while (CurrentToken != NULL) { + if (CurrentToken->isNot(tok::comment) || + !CurrentToken->Children.empty()) + CurrentToken->Type = TT_ImplicitStringLiteral; + next(); + } + } else { + while (CurrentToken != NULL) { + next(); + } + } + } + + void parseWarningOrError() { + next(); + // We still want to format the whitespace left of the first token of the + // warning or error. + next(); + while (CurrentToken != NULL) { + CurrentToken->Type = TT_ImplicitStringLiteral; + next(); + } + } + + void parsePreprocessorDirective() { + next(); + if (CurrentToken == NULL) + return; + // Hashes in the middle of a line can lead to any strange token + // sequence. + if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) + return; + switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { + case tok::pp_include: + case tok::pp_import: + parseIncludeDirective(); + break; + case tok::pp_error: + case tok::pp_warning: + parseWarningOrError(); + break; + default: + break; + } + } + + LineType parseLine() { + int PeriodsAndArrows = 0; + bool CanBeBuilderTypeStmt = true; + if (CurrentToken->is(tok::hash)) { + parsePreprocessorDirective(); + return LT_PreprocessorDirective; + } + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::kw_virtual)) + KeywordVirtualFound = true; + if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow)) + ++PeriodsAndArrows; + if (getPrecedence(*CurrentToken) > prec::Assignment && + CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater)) + CanBeBuilderTypeStmt = false; + if (!consumeToken()) + return LT_Invalid; + } + if (KeywordVirtualFound) + return LT_VirtualFunctionDecl; + + // Assume a builder-type call if there are 2 or more "." and "->". + if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) + return LT_BuilderTypeCall; + + return LT_Other; + } + + void next() { + if (CurrentToken != NULL && !CurrentToken->Children.empty()) + CurrentToken = &CurrentToken->Children[0]; + else + CurrentToken = NULL; + } + +private: + AnnotatedToken *CurrentToken; + bool KeywordVirtualFound; + bool ColonIsObjCMethodExpr; + bool ColonIsForRangeExpr; +}; + +void TokenAnnotator::annotate() { + AnnotatingParser Parser(Line.First); + Line.Type = Parser.parseLine(); + if (Line.Type == LT_Invalid) + return; + + bool LookForFunctionName = Line.MustBeDeclaration; + determineTokenTypes(Line.First, /*IsExpression=*/ false, LookForFunctionName); + + if (Line.First.Type == TT_ObjCMethodSpecifier) + Line.Type = LT_ObjCMethodDecl; + else if (Line.First.Type == TT_ObjCDecl) + Line.Type = LT_ObjCDecl; + else if (Line.First.Type == TT_ObjCProperty) + Line.Type = LT_ObjCProperty; + + Line.First.SpaceRequiredBefore = true; + Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; + Line.First.CanBreakBefore = Line.First.MustBreakBefore; + + Line.First.TotalLength = Line.First.FormatTok.TokenLength; + if (!Line.First.Children.empty()) + calculateExtraInformation(Line.First.Children[0]); +} + +void TokenAnnotator::calculateExtraInformation(AnnotatedToken &Current) { + Current.SpaceRequiredBefore = spaceRequiredBefore(Current); + + if (Current.FormatTok.MustBreakBefore) { + Current.MustBreakBefore = true; + } else { + if (Current.Type == TT_LineComment) { + Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0; + } else if ((Current.Parent->is(tok::comment) && + Current.FormatTok.NewlinesBefore > 0) || + (Current.is(tok::string_literal) && + Current.Parent->is(tok::string_literal))) { + Current.MustBreakBefore = true; + } else { + Current.MustBreakBefore = false; + } + } + Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current); + if (Current.MustBreakBefore) + Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit; + else + Current.TotalLength = + Current.Parent->TotalLength + Current.FormatTok.TokenLength + + (Current.SpaceRequiredBefore ? 1 : 0); + // FIXME: Only calculate this if CanBreakBefore is true once static + // initializers etc. are sorted out. + Current.SplitPenalty = splitPenalty(Current); + if (!Current.Children.empty()) + calculateExtraInformation(Current.Children[0]); +} + +unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) { + const AnnotatedToken &Left = *Tok.Parent; + const AnnotatedToken &Right = Tok; + + if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace)) + return 50; + if (Left.is(tok::equal) && Right.is(tok::l_brace)) + return 150; + if (Left.is(tok::coloncolon)) + return 500; + + if (Left.Type == TT_RangeBasedForLoopColon) + return 5; + + if (Right.is(tok::arrow) || Right.is(tok::period)) { + if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall) + return 5; // Should be smaller than breaking at a nested comma. + return 150; + } + + // In for-loops, prefer breaking at ',' and ';'. + if (Line.First.is(tok::kw_for) && + (Left.isNot(tok::comma) && Left.isNot(tok::semi))) + return 20; + + if (Left.is(tok::semi) || Left.is(tok::comma)) + return 0; + + // In Objective-C method expressions, prefer breaking before "param:" over + // breaking after it. + if (isObjCSelectorName(Right)) + return 0; + if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) + return 20; + + if (Left.is(tok::l_paren)) + return 20; + // FIXME: The penalty for a trailing "<" or "[" being higher than the + // penalty for a trainling "(" is a temporary workaround until we can + // properly avoid breaking in array subscripts or template parameters. + if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener) + return 50; + + if (Left.Type == TT_ConditionalExpr) + return prec::Assignment; + prec::Level Level = getPrecedence(Left); + + if (Level != prec::Unknown) + return Level; + + return 3; +} + +void TokenAnnotator::determineTokenTypes( + AnnotatedToken &Current, bool IsExpression, bool LookForFunctionName) { + if (getPrecedence(Current) == prec::Assignment) { + IsExpression = true; + AnnotatedToken *Previous = Current.Parent; + while (Previous != NULL) { + if (Previous->Type == TT_BinaryOperator && + (Previous->is(tok::star) || Previous->is(tok::amp))) { + Previous->Type = TT_PointerOrReference; + } + Previous = Previous->Parent; + } + } + if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) || + (Current.is(tok::l_paren) && !Line.MustBeDeclaration && + (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for)))) + IsExpression = true; + + if (Current.Type == TT_Unknown) { + if (LookForFunctionName && Current.is(tok::l_paren)) { + findFunctionName(&Current); + LookForFunctionName = false; + } else if (Current.is(tok::star) || Current.is(tok::amp)) { + Current.Type = determineStarAmpUsage(Current, IsExpression); + } else if (Current.is(tok::minus) || Current.is(tok::plus) || + Current.is(tok::caret)) { + Current.Type = determinePlusMinusCaretUsage(Current); + } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { + Current.Type = determineIncrementUsage(Current); + } else if (Current.is(tok::exclaim)) { + Current.Type = TT_UnaryOperator; + } else if (isBinaryOperator(Current)) { + Current.Type = TT_BinaryOperator; + } else if (Current.is(tok::comment)) { + std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, + Lex.getLangOpts())); + if (StringRef(Data).startswith("//")) + Current.Type = TT_LineComment; + else + Current.Type = TT_BlockComment; + } else if (Current.is(tok::r_paren) && + (Current.Parent->Type == TT_PointerOrReference || + Current.Parent->Type == TT_TemplateCloser) && + (Current.Children.empty() || + (Current.Children[0].isNot(tok::equal) && + Current.Children[0].isNot(tok::semi) && + Current.Children[0].isNot(tok::l_brace)))) { + // FIXME: We need to get smarter and understand more cases of casts. + Current.Type = TT_CastRParen; + } else if (Current.is(tok::at) && Current.Children.size()) { + switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; + } + } + } + + if (!Current.Children.empty()) + determineTokenTypes(Current.Children[0], IsExpression, LookForFunctionName); +} + +void TokenAnnotator::findFunctionName(AnnotatedToken *Current) { + AnnotatedToken *Parent = Current->Parent; + while (Parent != NULL && Parent->Parent != NULL) { + if (Parent->is(tok::identifier) && + (Parent->Parent->is(tok::identifier) || Parent->Parent->Type == + TT_PointerOrReference || Parent->Parent->Type == TT_TemplateCloser)) { + Parent->Type = TT_StartOfName; + break; + } + Parent = Parent->Parent; + } +} + +TokenType TokenAnnotator::determineStarAmpUsage(const AnnotatedToken &Tok, + bool IsExpression) { + const AnnotatedToken *PrevToken = getPreviousToken(Tok); + if (PrevToken == NULL) + return TT_UnaryOperator; + + const AnnotatedToken *NextToken = getNextToken(Tok); + if (NextToken == NULL) + return TT_Unknown; + + if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr) + return TT_PointerOrReference; + + if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) || + PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) || + PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) || + PrevToken->Type == TT_BinaryOperator || + PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) + return TT_UnaryOperator; + + if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) || + PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() || + NextToken->is(tok::plus) || NextToken->is(tok::minus) || + NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) || + NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) || + NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) || + NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof)) + return TT_BinaryOperator; + + if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) || + NextToken->is(tok::greater)) + return TT_PointerOrReference; + + // It is very unlikely that we are going to find a pointer or reference type + // definition on the RHS of an assignment. + if (IsExpression) + return TT_BinaryOperator; + + return TT_PointerOrReference; +} + +TokenType +TokenAnnotator::determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { + const AnnotatedToken *PrevToken = getPreviousToken(Tok); + if (PrevToken == NULL) + return TT_UnaryOperator; + + // Use heuristics to recognize unary operators. + if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) || + PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) || + PrevToken->is(tok::question) || PrevToken->is(tok::colon) || + PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) || + PrevToken->is(tok::at) || PrevToken->is(tok::l_brace)) + return TT_UnaryOperator; + + // There can't be to consecutive binary operators. + if (PrevToken->Type == TT_BinaryOperator) + return TT_UnaryOperator; + + // Fall back to marking the token as binary operator. + return TT_BinaryOperator; +} + +TokenType TokenAnnotator::determineIncrementUsage(const AnnotatedToken &Tok) { + const AnnotatedToken *PrevToken = getPreviousToken(Tok); + if (PrevToken == NULL) + return TT_UnaryOperator; + if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) || + PrevToken->is(tok::identifier)) + return TT_TrailingUnaryOperator; + + return TT_UnaryOperator; +} + +bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left, + const AnnotatedToken &Right) { + if (Right.is(tok::hashhash)) + return Left.is(tok::hash); + if (Left.is(tok::hashhash) || Left.is(tok::hash)) + return Right.is(tok::hash); + if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) + return false; + if (Right.is(tok::less) && + (Left.is(tok::kw_template) || + (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) + return true; + if (Left.is(tok::arrow) || Right.is(tok::arrow)) + return false; + if (Left.is(tok::exclaim) || Left.is(tok::tilde)) + return false; + if (Left.is(tok::at) && + (Right.is(tok::identifier) || Right.is(tok::string_literal) || + Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || + Right.is(tok::l_paren) || Right.is(tok::l_brace) || + Right.is(tok::kw_true) || Right.is(tok::kw_false))) + return false; + if (Left.is(tok::coloncolon)) + return false; + if (Right.is(tok::coloncolon)) + return Left.isNot(tok::identifier) && Left.isNot(tok::greater); + if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) + return false; + if (Right.is(tok::amp) || Right.is(tok::star)) + return Left.FormatTok.Tok.isLiteral() || + (Left.isNot(tok::star) && Left.isNot(tok::amp) && + !Style.PointerAndReferenceBindToType); + if (Left.is(tok::amp) || Left.is(tok::star)) + return Right.FormatTok.Tok.isLiteral() || + Style.PointerAndReferenceBindToType; + if (Right.is(tok::star) && Left.is(tok::l_paren)) + return false; + if (Left.is(tok::l_square) || Right.is(tok::r_square)) + return false; + if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) + return false; + if (Left.is(tok::period) || Right.is(tok::period)) + return false; + if (Left.is(tok::colon)) + return Left.Type != TT_ObjCMethodExpr; + if (Right.is(tok::colon)) + return Right.Type != TT_ObjCMethodExpr; + if (Left.is(tok::l_paren)) + return false; + if (Right.is(tok::l_paren)) { + return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) || + Left.is(tok::kw_for) || Left.is(tok::kw_while) || + Left.is(tok::kw_switch) || Left.is(tok::kw_return) || + Left.is(tok::kw_catch) || Left.is(tok::kw_new) || + Left.is(tok::kw_delete); + } + if (Left.is(tok::at) && + Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) + return false; + if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) + return false; + return true; +} + +bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) { + if (Line.Type == LT_ObjCMethodDecl) { + if (Tok.is(tok::identifier) && !Tok.Children.empty() && + Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) + return true; + if (Tok.is(tok::colon)) + return false; + if (Tok.Parent->Type == TT_ObjCMethodSpecifier) + return true; + if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) + // Don't space between ')' and + return false; + if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren)) + // Don't space between ':' and '(' + return false; + } + if (Line.Type == LT_ObjCProperty && + (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) + return false; + + if (Tok.Parent->is(tok::comma)) + return true; + if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) + return true; + if (Tok.Type == TT_OverloadedOperator) + return Tok.is(tok::identifier) || Tok.is(tok::kw_new) || + Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool); + if (Tok.Parent->Type == TT_OverloadedOperator) + return false; + if (Tok.is(tok::colon)) + return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() && + Tok.Type != TT_ObjCMethodExpr; + if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) + return false; + if (Tok.Type == TT_UnaryOperator) + return Tok.Parent->isNot(tok::l_paren) && + Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) && + (Tok.Parent->isNot(tok::colon) || + Tok.Parent->Type != TT_ObjCMethodExpr); + if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { + return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == + TT_TemplateCloser && Style.SplitTemplateClosingGreater; + } + if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) + return true; + if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) + return false; + if (Tok.is(tok::less) && Line.First.is(tok::hash)) + return true; + if (Tok.Type == TT_TrailingUnaryOperator) + return false; + return spaceRequiredBetween(*Tok.Parent, Tok); +} + +bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) { + const AnnotatedToken &Left = *Right.Parent; + if (Line.Type == LT_ObjCMethodDecl) { + if (Right.is(tok::identifier) && !Right.Children.empty() && + Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) + return true; + if (Right.is(tok::identifier) && Left.is(tok::l_paren) && + Left.Parent->is(tok::colon)) + // Don't break this identifier as ':' or identifier + // before it will break. + return false; + if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore) + // Don't break at ':' if identifier before it can beak. + return false; + } + if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine) + return true; + if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) + return false; + if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) + return true; + if (isObjCSelectorName(Right)) + return true; + if (Left.ClosesTemplateDeclaration) + return true; + if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) + return true; + if (Left.Type == TT_RangeBasedForLoopColon) + return true; + if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || + Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || + Left.is(tok::question)) + return false; + if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) + return false; + + if (Right.Type == TT_LineComment) + // We rely on MustBreakBefore being set correctly here as we should not + // change the "binding" behavior of a comment. + return false; + + // Allow breaking after a trailing 'const', e.g. after a method declaration, + // unless it is follow by ';', '{' or '='. + if (Left.is(tok::kw_const) && Left.Parent != NULL && + Left.Parent->is(tok::r_paren)) + return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) && + Right.isNot(tok::equal); + + // We only break before r_brace if there was a corresponding break before + // the l_brace, which is tracked by BreakBeforeClosingBrace. + if (Right.is(tok::r_brace)) + return false; + + if (Right.is(tok::r_paren) || Right.is(tok::greater)) + return false; + return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || + Left.is(tok::comma) || Right.is(tok::lessless) || + Right.is(tok::arrow) || Right.is(tok::period) || + Right.is(tok::colon) || Left.is(tok::coloncolon) || + Left.is(tok::semi) || Left.is(tok::l_brace) || + (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && + Right.is(tok::identifier)) || + (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || + (Left.is(tok::l_square) && !Right.is(tok::r_square)); +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h new file mode 100644 index 0000000000..5ffa2c4f29 --- /dev/null +++ b/lib/Format/TokenAnnotator.h @@ -0,0 +1,228 @@ +//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a token annotator, i.e. creates +/// \c AnnotatedTokens out of \c FormatTokens with required extra information. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H +#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H + +#include "UnwrappedLineParser.h" +#include "clang/Basic/OperatorPrecedence.h" +#include "clang/Format/Format.h" +#include + +namespace clang { +class Lexer; +class SourceManager; + +namespace format { + +enum TokenType { + TT_BinaryOperator, + TT_BlockComment, + TT_CastRParen, + TT_ConditionalExpr, + TT_CtorInitializerColon, + TT_ImplicitStringLiteral, + TT_LineComment, + TT_ObjCBlockLParen, + TT_ObjCDecl, + TT_ObjCMethodSpecifier, + TT_ObjCMethodExpr, + TT_ObjCProperty, + TT_OverloadedOperator, + TT_PointerOrReference, + TT_PureVirtualSpecifier, + TT_RangeBasedForLoopColon, + TT_StartOfName, + TT_TemplateCloser, + TT_TemplateOpener, + TT_TrailingUnaryOperator, + TT_UnaryOperator, + TT_Unknown +}; + +enum LineType { + LT_Invalid, + LT_Other, + LT_BuilderTypeCall, + LT_PreprocessorDirective, + LT_VirtualFunctionDecl, + LT_ObjCDecl, // An @interface, @implementation, or @protocol line. + LT_ObjCMethodDecl, + LT_ObjCProperty // An @property line. +}; + +class AnnotatedToken { +public: + explicit AnnotatedToken(const FormatToken &FormatTok) + : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false), + CanBreakBefore(false), MustBreakBefore(false), + ClosesTemplateDeclaration(false), MatchingParen(NULL), + ParameterCount(1), Parent(NULL) { + } + + bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } + bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } + + bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { + return FormatTok.Tok.isObjCAtKeyword(Kind); + } + + FormatToken FormatTok; + + TokenType Type; + + bool SpaceRequiredBefore; + bool CanBreakBefore; + bool MustBreakBefore; + + bool ClosesTemplateDeclaration; + + AnnotatedToken *MatchingParen; + + /// \brief Number of parameters, if this is "(", "[" or "<". + /// + /// This is initialized to 1 as we don't need to distinguish functions with + /// 0 parameters from functions with 1 parameter. Thus, we can simply count + /// the number of commas. + unsigned ParameterCount; + + /// \brief The total length of the line up to and including this token. + unsigned TotalLength; + + /// \brief Penalty for inserting a line break before this token. + unsigned SplitPenalty; + + std::vector Children; + AnnotatedToken *Parent; + + const AnnotatedToken *getPreviousNoneComment() const { + AnnotatedToken *Tok = Parent; + while (Tok != NULL && Tok->is(tok::comment)) + Tok = Tok->Parent; + return Tok; + } +}; + +class AnnotatedLine { +public: + AnnotatedLine(const UnwrappedLine &Line) + : First(Line.Tokens.front()), Level(Line.Level), + InPPDirective(Line.InPPDirective), + MustBeDeclaration(Line.MustBeDeclaration) { + assert(!Line.Tokens.empty()); + AnnotatedToken *Current = &First; + for (std::list::const_iterator I = ++Line.Tokens.begin(), + E = Line.Tokens.end(); + I != E; ++I) { + Current->Children.push_back(AnnotatedToken(*I)); + Current->Children[0].Parent = Current; + Current = &Current->Children[0]; + } + Last = Current; + } + AnnotatedLine(const AnnotatedLine &Other) + : First(Other.First), Type(Other.Type), Level(Other.Level), + InPPDirective(Other.InPPDirective), + MustBeDeclaration(Other.MustBeDeclaration) { + Last = &First; + while (!Last->Children.empty()) { + Last->Children[0].Parent = Last; + Last = &Last->Children[0]; + } + } + + AnnotatedToken First; + AnnotatedToken *Last; + + LineType Type; + unsigned Level; + bool InPPDirective; + bool MustBeDeclaration; +}; + +inline prec::Level getPrecedence(const AnnotatedToken &Tok) { + return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); +} + +/// \brief Determines extra information about the tokens comprising an +/// \c UnwrappedLine. +class TokenAnnotator { +public: + TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, + AnnotatedLine &Line) + : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) { + } + + void annotate(); + void calculateExtraInformation(AnnotatedToken &Current); + +private: + /// \brief Calculate the penalty for splitting before \c Tok. + unsigned splitPenalty(const AnnotatedToken &Tok); + + void determineTokenTypes(AnnotatedToken &Current, bool IsExpression, + bool LookForFunctionName); + + /// \brief Starting from \p Current, this searches backwards for an + /// identifier which could be the start of a function name and marks it. + void findFunctionName(AnnotatedToken *Current); + + /// \brief Returns the previous token ignoring comments. + const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { + const AnnotatedToken *PrevToken = Tok.Parent; + while (PrevToken != NULL && PrevToken->is(tok::comment)) + PrevToken = PrevToken->Parent; + return PrevToken; + } + + /// \brief Returns the next token ignoring comments. + const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { + if (Tok.Children.empty()) + return NULL; + const AnnotatedToken *NextToken = &Tok.Children[0]; + while (NextToken->is(tok::comment)) { + if (NextToken->Children.empty()) + return NULL; + NextToken = &NextToken->Children[0]; + } + return NextToken; + } + + /// \brief Return the type of the given token assuming it is * or &. + TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression); + + TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok); + + /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. + TokenType determineIncrementUsage(const AnnotatedToken &Tok); + + bool spaceRequiredBetween(const AnnotatedToken &Left, + const AnnotatedToken &Right); + + bool spaceRequiredBefore(const AnnotatedToken &Tok); + + bool canBreakBefore(const AnnotatedToken &Right); + + FormatStyle Style; + SourceManager &SourceMgr; + Lexer &Lex; + AnnotatedLine &Line; +}; + + +} // end namespace format +} // end namespace clang + +#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index ad641a29b1..e724c69eb8 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -11,9 +11,6 @@ /// \brief This file contains the implementation of the UnwrappedLineParser, /// which turns a stream of tokens into UnwrappedLines. /// -/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, -/// where it can be used to format real code. -/// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "format-parser" diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 1ab1ca21dc..5db5e7ba21 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -11,9 +11,6 @@ /// \brief This file contains the declaration of the UnwrappedLineParser, /// which turns a stream of tokens into UnwrappedLines. /// -/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, -/// where it can be used to format real code. -/// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H