Implement PR6423 by using one token of lookahead to disambiguate

an *almost* always incorrect case. This only does the lookahead in the insanely unlikely case, so it shouldn't impact performance. On this testcase: struct foo { } typedef int x; Before: t.c:3:9: error: cannot combine with previous 'struct' declaration specifier typedef int x; ^ After: t.c:2:2: error: expected ';' after struct } ^ ; git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@97403 91177308-0d34-0410-b5e6-96231b3b80d8
2010-02-28 18:18:36 +00:00 · 2010-02-28 18:18:36 +00:00 · b3a4e432c9
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@ -329,7 +329,8 @@ private:
  /// replacing them with the non-context-sensitive keywords.  This returns
  /// true if the token was replaced.
  bool TryAltiVecToken(DeclSpec &DS, SourceLocation Loc,
-      const char *&PrevSpec, unsigned &DiagID, bool &isInvalid) {
+                       const char *&PrevSpec, unsigned &DiagID,
+                       bool &isInvalid) {
    if (getLang().AltiVec) {
      if (Tok.getIdentifierInfo() == Ident_vector) {
        const Token nextToken = NextToken();
@ -369,33 +370,31 @@ private:
  /// identifier token, replacing it with the non-context-sensitive __vector.
  /// This returns true if the token was replaced.
  bool TryAltiVecVectorToken() {
-    if (getLang().AltiVec) {
-      if (Tok.getIdentifierInfo() == Ident_vector) {
-        const Token nextToken = NextToken();
-        switch (nextToken.getKind()) {
-          case tok::kw_short:
-          case tok::kw_long:
-          case tok::kw_signed:
-          case tok::kw_unsigned:
-          case tok::kw_void:
-          case tok::kw_char:
-          case tok::kw_int:
-          case tok::kw_float:
-          case tok::kw_double:
-          case tok::kw_bool:
-          case tok::kw___pixel:
-            Tok.setKind(tok::kw___vector);
-            return true;
-          case tok::identifier:
-            if (nextToken.getIdentifierInfo() == Ident_pixel) {
-              Tok.setKind(tok::kw___vector);
-              return true;
-            }
-            break;
-          default:
-            break;
-        }
+    if (!getLang().AltiVec ||
+        Tok.getIdentifierInfo() != Ident_vector) return false;
+    const Token nextToken = NextToken();
+    switch (nextToken.getKind()) {
+    case tok::kw_short:
+    case tok::kw_long:
+    case tok::kw_signed:
+    case tok::kw_unsigned:
+    case tok::kw_void:
+    case tok::kw_char:
+    case tok::kw_int:
+    case tok::kw_float:
+    case tok::kw_double:
+    case tok::kw_bool:
+    case tok::kw___pixel:
+      Tok.setKind(tok::kw___vector);
+      return true;
+    case tok::identifier:
+      if (nextToken.getIdentifierInfo() == Ident_pixel) {
+        Tok.setKind(tok::kw___vector);
+        return true;
      }
+      break;
+    default:
+      break;
    }
    return false;
  }
@ -1181,6 +1180,11 @@ private:
  bool isDeclarationSpecifier();
  bool isTypeSpecifierQualifier();
  bool isTypeQualifier() const;
+  
+  /// isKnownToBeTypeSpecifier - Return true if we know that the specified token
+  /// is definitely a type-specifier.  Return false if it isn't part of a type
+  /// specifier or if we're not sure.
+  bool isKnownToBeTypeSpecifier(const Token &Tok) const;

  /// isDeclarationStatement - Disambiguates between a declaration or an
  /// expression statement, when parsing function bodies.
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@ -2019,6 +2019,47 @@ bool Parser::isTypeQualifier() const {
  }
 }

+/// isKnownToBeTypeSpecifier - Return true if we know that the specified token
+/// is definitely a type-specifier.  Return false if it isn't part of a type
+/// specifier or if we're not sure.
+bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
+  switch (Tok.getKind()) {
+  default: return false;
+    // type-specifiers
+  case tok::kw_short:
+  case tok::kw_long:
+  case tok::kw_signed:
+  case tok::kw_unsigned:
+  case tok::kw__Complex:
+  case tok::kw__Imaginary:
+  case tok::kw_void:
+  case tok::kw_char:
+  case tok::kw_wchar_t:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
+  case tok::kw_int:
+  case tok::kw_float:
+  case tok::kw_double:
+  case tok::kw_bool:
+  case tok::kw__Bool:
+  case tok::kw__Decimal32:
+  case tok::kw__Decimal64:
+  case tok::kw__Decimal128:
+  case tok::kw___vector:
+    
+    // struct-or-union-specifier (C99) or class-specifier (C++)
+  case tok::kw_class:
+  case tok::kw_struct:
+  case tok::kw_union:
+    // enum-specifier
+  case tok::kw_enum:
+    
+    // typedef-name
+  case tok::annot_typename:
+    return true;
+  }
+}
+
 /// isTypeSpecifierQualifier - Return true if the current token could be the
 /// start of a specifier-qualifier-list.
 bool Parser::isTypeSpecifierQualifier() {
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@ -940,7 +940,9 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
  //
  // This switch enumerates the valid "follow" set for definition.
  if (TUK == Action::TUK_Definition) {
+    bool ExpectedSemi = true;
    switch (Tok.getKind()) {
+    default: break;
    case tok::semi:               // struct foo {...} ;
    case tok::star:               // struct foo {...} *         P;
    case tok::amp:                // struct foo {...} &         R = ...
@ -951,24 +953,46 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
    case tok::annot_template_id:  // struct foo {...} a<int>    ::b;
    case tok::l_paren:            // struct foo {...} (         x);
    case tok::comma:              // __builtin_offsetof(struct foo{...} ,
+      ExpectedSemi = false;
+      break;
+    // Type qualifiers
+    case tok::kw_const:           // struct foo {...} const     x;
+    case tok::kw_volatile:        // struct foo {...} volatile  x;
+    case tok::kw_restrict:        // struct foo {...} restrict  x;
+    case tok::kw_inline:          // struct foo {...} inline    foo() {};
    // Storage-class specifiers
    case tok::kw_static:          // struct foo {...} static    x;
    case tok::kw_extern:          // struct foo {...} extern    x;
    case tok::kw_typedef:         // struct foo {...} typedef   x;
    case tok::kw_register:        // struct foo {...} register  x;
    case tok::kw_auto:            // struct foo {...} auto      x;
-    // Type qualifiers
-    case tok::kw_const:           // struct foo {...} const     x;
-    case tok::kw_volatile:        // struct foo {...} volatile  x;
-    case tok::kw_restrict:        // struct foo {...} restrict  x;
-    case tok::kw_inline:          // struct foo {...} inline    foo() {};
+      // As shown above, type qualifiers and storage class specifiers absolutely
+      // can occur after class specifiers according to the grammar.  However,
+      // almost noone actually writes code like this.  If we see one of these,
+      // it is much more likely that someone missed a semi colon and the
+      // type/storage class specifier we're seeing is part of the *next*
+      // intended declaration, as in:
+      //
+      //   struct foo { ... }
+      //   typedef int X;
+      //
+      // We'd really like to emit a missing semicolon error instead of emitting
+      // an error on the 'int' saying that you can't have two type specifiers in
+      // the same declaration of X.  Because of this, we look ahead past this
+      // token to see if it's a type specifier.  If so, we know the code is
+      // otherwise invalid, so we can produce the expected semi error.
+      if (!isKnownToBeTypeSpecifier(NextToken()))
+        ExpectedSemi = false;
      break;
        
    case tok::r_brace:  // struct bar { struct foo {...} } 
      // Missing ';' at end of struct is accepted as an extension in C mode.
-      if (!getLang().CPlusPlus) break;
-      // FALL THROUGH.
-    default:
+      if (!getLang().CPlusPlus)
+        ExpectedSemi = false;
+      break;
+    }
+    
+    if (ExpectedSemi) {
      ExpectAndConsume(tok::semi, diag::err_expected_semi_after_tagdecl,
                       TagType == DeclSpec::TST_class ? "class"
                       : TagType == DeclSpec::TST_struct? "struct" : "union");
@ -977,7 +1001,6 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
      // ';' after the definition.
      PP.EnterToken(Tok);
      Tok.setKind(tok::semi);  
-      break;
    }
  }
 }
--- a/test/Sema/declspec.c
+++ b/test/Sema/declspec.c
@ -31,3 +31,8 @@ struct test1 {

 void test2() {}

+
+// PR6423
+struct test3s {
+} // expected-error {{expected ';' after struct}}
+typedef int test3g;