зеркало из https://github.com/microsoft/clang-1.git
Use several weighted factors to determine typo candidate viablity.
Replace the simple Levenshtein edit distance for typo correction candidates--and the hacky way adding namespace qualifiers would affect the edit distance--with a synthetic "edit distance" comprised of several factors and their relative weights. This also allows the typo correction callback object to convey more information about the viability of a correction candidate than simply viable or not viable. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@150495 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Родитель
9d665044b8
Коммит
63aae82bb1
|
@ -23,32 +23,46 @@ namespace clang {
|
|||
/// @brief Simple class containing the result of Sema::CorrectTypo
|
||||
class TypoCorrection {
|
||||
public:
|
||||
// "Distance" for unusable corrections
|
||||
static const unsigned InvalidDistance = ~0U;
|
||||
// The largest distance still considered valid (larger edit distances are
|
||||
// mapped to InvalidDistance by getEditDistance).
|
||||
static const unsigned MaximumDistance = 10000U;
|
||||
|
||||
// Relative weightings of the "edit distance" components. The higher the
|
||||
// weight, the more of a penalty to fitness the component will give (higher
|
||||
// weights mean greater contribution to the total edit distance, with the
|
||||
// best correction candidates having the lowest edit distance).
|
||||
static const unsigned CharDistanceWeight = 100U;
|
||||
static const unsigned QualifierDistanceWeight = 110U;
|
||||
static const unsigned CallbackDistanceWeight = 150U;
|
||||
|
||||
TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
|
||||
NestedNameSpecifier *NNS=0, unsigned distance=0)
|
||||
: CorrectionName(Name),
|
||||
CorrectionNameSpec(NNS),
|
||||
EditDistance(distance) {
|
||||
NestedNameSpecifier *NNS=0, unsigned CharDistance=0,
|
||||
unsigned QualifierDistance=0)
|
||||
: CorrectionName(Name), CorrectionNameSpec(NNS),
|
||||
CharDistance(CharDistance), QualifierDistance(QualifierDistance),
|
||||
CallbackDistance(0) {
|
||||
if (NameDecl)
|
||||
CorrectionDecls.push_back(NameDecl);
|
||||
}
|
||||
|
||||
TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0,
|
||||
unsigned distance=0)
|
||||
: CorrectionName(Name->getDeclName()),
|
||||
CorrectionNameSpec(NNS),
|
||||
EditDistance(distance) {
|
||||
unsigned CharDistance=0)
|
||||
: CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
|
||||
CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {
|
||||
if (Name)
|
||||
CorrectionDecls.push_back(Name);
|
||||
}
|
||||
|
||||
TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0,
|
||||
unsigned distance=0)
|
||||
: CorrectionName(Name),
|
||||
CorrectionNameSpec(NNS),
|
||||
EditDistance(distance) {}
|
||||
unsigned CharDistance=0)
|
||||
: CorrectionName(Name), CorrectionNameSpec(NNS),
|
||||
CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {}
|
||||
|
||||
TypoCorrection()
|
||||
: CorrectionNameSpec(0), EditDistance(0) {}
|
||||
: CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0),
|
||||
CallbackDistance(0) {}
|
||||
|
||||
/// \brief Gets the DeclarationName of the typo correction
|
||||
DeclarationName getCorrection() const { return CorrectionName; }
|
||||
|
@ -64,8 +78,41 @@ public:
|
|||
CorrectionNameSpec = NNS;
|
||||
}
|
||||
|
||||
/// \brief Gets the "edit distance" of the typo correction from the typo
|
||||
unsigned getEditDistance() const { return EditDistance; }
|
||||
void setQualifierDistance(unsigned ED) {
|
||||
QualifierDistance = ED;
|
||||
}
|
||||
|
||||
void setCallbackDistance(unsigned ED) {
|
||||
CallbackDistance = ED;
|
||||
}
|
||||
|
||||
// Convert the given weighted edit distance to a roughly equivalent number of
|
||||
// single-character edits (typically for comparison to the length of the
|
||||
// string being edited).
|
||||
static unsigned NormalizeEditDistance(unsigned ED) {
|
||||
if (ED > MaximumDistance)
|
||||
return InvalidDistance;
|
||||
return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
|
||||
}
|
||||
|
||||
/// \brief Gets the "edit distance" of the typo correction from the typo.
|
||||
/// If Normalized is true, scale the distance down by the CharDistanceWeight
|
||||
/// to return the edit distance in terms of single-character edits.
|
||||
unsigned getEditDistance(bool Normalized = true) const {
|
||||
if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
|
||||
CallbackDistance > MaximumDistance)
|
||||
return InvalidDistance;
|
||||
unsigned ED =
|
||||
CharDistance * CharDistanceWeight +
|
||||
QualifierDistance * QualifierDistanceWeight +
|
||||
CallbackDistance * CallbackDistanceWeight;
|
||||
if (ED > MaximumDistance)
|
||||
return InvalidDistance;
|
||||
// Half the CharDistanceWeight is added to ED to simulate rounding since
|
||||
// integer division truncates the value (i.e. round-to-nearest-int instead
|
||||
// of round-to-zero).
|
||||
return Normalized ? NormalizeEditDistance(ED) : ED;
|
||||
}
|
||||
|
||||
/// \brief Gets the pointer to the declaration of the typo correction
|
||||
NamedDecl* getCorrectionDecl() const {
|
||||
|
@ -143,13 +190,17 @@ private:
|
|||
DeclarationName CorrectionName;
|
||||
NestedNameSpecifier *CorrectionNameSpec;
|
||||
llvm::SmallVector<NamedDecl*, 1> CorrectionDecls;
|
||||
unsigned EditDistance;
|
||||
unsigned CharDistance;
|
||||
unsigned QualifierDistance;
|
||||
unsigned CallbackDistance;
|
||||
};
|
||||
|
||||
/// @brief Base class for callback objects used by Sema::CorrectTypo to check
|
||||
/// the validity of a potential typo correction.
|
||||
class CorrectionCandidateCallback {
|
||||
public:
|
||||
static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
|
||||
|
||||
CorrectionCandidateCallback()
|
||||
: WantTypeSpecifiers(true), WantExpressionKeywords(true),
|
||||
WantCXXNamedCasts(true), WantRemainingKeywords(true),
|
||||
|
@ -158,10 +209,26 @@ class CorrectionCandidateCallback {
|
|||
|
||||
virtual ~CorrectionCandidateCallback() {}
|
||||
|
||||
/// \brief Simple predicate used by the default RankCandidate to
|
||||
/// determine whether to return an edit distance of 0 or InvalidDistance.
|
||||
/// This can be overrided by validators that only need to determine if a
|
||||
/// candidate is viable, without ranking potentially viable candidates.
|
||||
/// Only ValidateCandidate or RankCandidate need to be overriden by a
|
||||
/// callback wishing to check the viability of correction candidates.
|
||||
virtual bool ValidateCandidate(const TypoCorrection &candidate) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank
|
||||
/// to a candidate (where a lower value represents a better candidate), or
|
||||
/// returning InvalidDistance if the candidate is not at all viable. For
|
||||
/// validation callbacks that only need to determine if a candidate is viable,
|
||||
/// the default RankCandidate returns either 0 or InvalidDistance depending
|
||||
/// whether ValidateCandidate returns true or false.
|
||||
virtual unsigned RankCandidate(const TypoCorrection &candidate) {
|
||||
return ValidateCandidate(candidate) ? 0 : InvalidDistance;
|
||||
}
|
||||
|
||||
// Flags for context-dependent keywords.
|
||||
// TODO: Expand these to apply to non-keywords or possibly remove them.
|
||||
bool WantTypeSpecifiers;
|
||||
|
|
|
@ -3109,9 +3109,12 @@ public:
|
|||
return (*BestResults.begin()->second)[Name];
|
||||
}
|
||||
|
||||
unsigned getBestEditDistance() {
|
||||
return (BestResults.empty()) ?
|
||||
(std::numeric_limits<unsigned>::max)() : BestResults.begin()->first;
|
||||
unsigned getBestEditDistance(bool Normalized) {
|
||||
if (BestResults.empty())
|
||||
return (std::numeric_limits<unsigned>::max)();
|
||||
|
||||
unsigned BestED = BestResults.begin()->first;
|
||||
return Normalized ? TypoCorrection::NormalizeEditDistance(BestED) : BestED;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -3167,7 +3170,7 @@ void TypoCorrectionConsumer::addName(StringRef Name,
|
|||
|
||||
void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
|
||||
StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();
|
||||
TypoResultsMap *& Map = BestResults[Correction.getEditDistance()];
|
||||
TypoResultsMap *& Map = BestResults[Correction.getEditDistance(false)];
|
||||
if (!Map)
|
||||
Map = new TypoResultsMap;
|
||||
|
||||
|
@ -3478,6 +3481,12 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
|
|||
}
|
||||
}
|
||||
|
||||
static bool isCandidateViable(CorrectionCandidateCallback &CCC,
|
||||
TypoCorrection &Candidate) {
|
||||
Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
|
||||
return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
|
||||
}
|
||||
|
||||
/// \brief Try to "correct" a typo in the source code by finding
|
||||
/// visible declarations whose names are similar to the name that was
|
||||
/// present in the source code.
|
||||
|
@ -3545,10 +3554,10 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
|
||||
TypoCorrectionConsumer Consumer(*this, Typo);
|
||||
|
||||
// If a callback object returns true for an empty typo correction candidate,
|
||||
// assume it does not do any actual validation of the candidates.
|
||||
// If a callback object considers an empty typo correction candidate to be
|
||||
// viable, assume it does not do any actual validation of the candidates.
|
||||
TypoCorrection EmptyCorrection;
|
||||
bool ValidatingCallback = !CCC.ValidateCandidate(EmptyCorrection);
|
||||
bool ValidatingCallback = !isCandidateViable(CCC, EmptyCorrection);
|
||||
|
||||
// Perform name lookup to find visible, similarly-named entities.
|
||||
bool IsUnqualifiedLookup = false;
|
||||
|
@ -3584,7 +3593,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
// keyword case, we'll end up adding the keyword below.
|
||||
if (Cached->second) {
|
||||
if (!Cached->second.isKeyword() &&
|
||||
CCC.ValidateCandidate(Cached->second))
|
||||
isCandidateViable(CCC, Cached->second))
|
||||
Consumer.addCorrection(Cached->second);
|
||||
} else {
|
||||
// Only honor no-correction cache hits when a callback that will validate
|
||||
|
@ -3637,7 +3646,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
|
||||
// Make sure that the user typed at least 3 characters for each correction
|
||||
// made. Otherwise, we don't even both looking at the results.
|
||||
unsigned ED = Consumer.getBestEditDistance();
|
||||
unsigned ED = Consumer.getBestEditDistance(true);
|
||||
if (ED > 0 && Typo->getName().size() / ED < 3) {
|
||||
// If this was an unqualified lookup, note that no correction was found.
|
||||
if (IsUnqualifiedLookup)
|
||||
|
@ -3666,7 +3675,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
|
||||
// Weed out any names that could not be found by name lookup or, if a
|
||||
// CorrectionCandidateCallback object was provided, failed validation.
|
||||
llvm::SmallPtrSet<IdentifierInfo*, 16> QualifiedResults;
|
||||
llvm::SmallVector<TypoCorrection, 16> QualifiedResults;
|
||||
LookupResult TmpRes(*this, TypoName, LookupKind);
|
||||
TmpRes.suppressDiagnostics();
|
||||
while (!Consumer.empty()) {
|
||||
|
@ -3681,7 +3690,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
if (I->second.isResolved()) {
|
||||
TypoCorrectionConsumer::result_iterator Prev = I;
|
||||
++I;
|
||||
if (!CCC.ValidateCandidate(Prev->second))
|
||||
if (!isCandidateViable(CCC, Prev->second))
|
||||
DI->second->erase(Prev);
|
||||
continue;
|
||||
}
|
||||
|
@ -3695,7 +3704,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
case LookupResult::NotFound:
|
||||
case LookupResult::NotFoundInCurrentInstantiation:
|
||||
case LookupResult::FoundUnresolvedValue:
|
||||
QualifiedResults.insert(Name);
|
||||
QualifiedResults.push_back(I->second);
|
||||
// We didn't find this name in our scope, or didn't like what we found;
|
||||
// ignore it.
|
||||
{
|
||||
|
@ -3718,7 +3727,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
TRD != TRDEnd; ++TRD)
|
||||
I->second.addCorrectionDecl(*TRD);
|
||||
++I;
|
||||
if (!CCC.ValidateCandidate(Prev->second))
|
||||
if (!isCandidateViable(CCC, Prev->second))
|
||||
DI->second->erase(Prev);
|
||||
break;
|
||||
}
|
||||
|
@ -3727,7 +3736,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
TypoCorrectionConsumer::result_iterator Prev = I;
|
||||
I->second.setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
|
||||
++I;
|
||||
if (!CCC.ValidateCandidate(Prev->second))
|
||||
if (!isCandidateViable(CCC, Prev->second))
|
||||
DI->second->erase(Prev);
|
||||
break;
|
||||
}
|
||||
|
@ -3744,7 +3753,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
// Only perform the qualified lookups for C++
|
||||
if (getLangOptions().CPlusPlus) {
|
||||
TmpRes.suppressDiagnostics();
|
||||
for (llvm::SmallPtrSet<IdentifierInfo*,
|
||||
for (llvm::SmallVector<TypoCorrection,
|
||||
16>::iterator QRI = QualifiedResults.begin(),
|
||||
QRIEnd = QualifiedResults.end();
|
||||
QRI != QRIEnd; ++QRI) {
|
||||
|
@ -3752,33 +3761,33 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
NIEnd = Namespaces.end();
|
||||
NI != NIEnd; ++NI) {
|
||||
DeclContext *Ctx = NI->DeclCtx;
|
||||
unsigned QualifiedED = ED + NI->EditDistance;
|
||||
|
||||
// FIXME: Stop searching once the namespaces are too far away to create
|
||||
// acceptable corrections for this identifier (since the namespaces
|
||||
// are sorted in ascending order by edit distance).
|
||||
|
||||
TmpRes.clear();
|
||||
TmpRes.setLookupName(*QRI);
|
||||
TmpRes.setLookupName(QRI->getCorrectionAsIdentifierInfo());
|
||||
if (!LookupQualifiedName(TmpRes, Ctx)) continue;
|
||||
|
||||
// Any corrections added below will be validated in subsequent
|
||||
// iterations of the main while() loop over the Consumer's contents.
|
||||
switch (TmpRes.getResultKind()) {
|
||||
case LookupResult::Found:
|
||||
Consumer.addName((*QRI)->getName(), TmpRes.getAsSingle<NamedDecl>(),
|
||||
QualifiedED, NI->NameSpecifier);
|
||||
QRI->setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
|
||||
QRI->setCorrectionSpecifier(NI->NameSpecifier);
|
||||
QRI->setQualifierDistance(NI->EditDistance);
|
||||
Consumer.addCorrection(*QRI);
|
||||
break;
|
||||
case LookupResult::FoundOverloaded: {
|
||||
TypoCorrection corr(&Context.Idents.get((*QRI)->getName()), NULL,
|
||||
NI->NameSpecifier, QualifiedED);
|
||||
case LookupResult::FoundOverloaded:
|
||||
QRI->setCorrectionSpecifier(NI->NameSpecifier);
|
||||
QRI->setQualifierDistance(NI->EditDistance);
|
||||
for (LookupResult::iterator TRD = TmpRes.begin(),
|
||||
TRDEnd = TmpRes.end();
|
||||
TRD != TRDEnd; ++TRD)
|
||||
corr.addCorrectionDecl(*TRD);
|
||||
Consumer.addCorrection(corr);
|
||||
QRI->addCorrectionDecl(*TRD);
|
||||
Consumer.addCorrection(*QRI);
|
||||
break;
|
||||
}
|
||||
case LookupResult::NotFound:
|
||||
case LookupResult::NotFoundInCurrentInstantiation:
|
||||
case LookupResult::Ambiguous:
|
||||
|
@ -3796,7 +3805,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
if (Consumer.empty()) return TypoCorrection();
|
||||
|
||||
TypoResultsMap &BestResults = *Consumer.begin()->second;
|
||||
ED = Consumer.begin()->first;
|
||||
ED = TypoCorrection::NormalizeEditDistance(Consumer.begin()->first);
|
||||
|
||||
if (ED > 0 && Typo->getName().size() / ED < 3) {
|
||||
// If this was an unqualified lookup and we believe the callback
|
||||
|
@ -3808,22 +3817,6 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
|
|||
return TypoCorrection();
|
||||
}
|
||||
|
||||
// If we have multiple possible corrections, eliminate the ones where we
|
||||
// added namespace qualifiers to try to resolve the ambiguity (and to favor
|
||||
// corrections without additional namespace qualifiers)
|
||||
if (getLangOptions().CPlusPlus && BestResults.size() > 1) {
|
||||
TypoCorrectionConsumer::distance_iterator DI = Consumer.begin();
|
||||
for (TypoCorrectionConsumer::result_iterator I = DI->second->begin(),
|
||||
IEnd = DI->second->end();
|
||||
I != IEnd; /* Increment in loop. */) {
|
||||
if (I->second.getCorrectionSpecifier() != NULL) {
|
||||
TypoCorrectionConsumer::result_iterator Cur = I;
|
||||
++I;
|
||||
DI->second->erase(Cur);
|
||||
} else ++I;
|
||||
}
|
||||
}
|
||||
|
||||
// If only a single name remains, return that result.
|
||||
if (BestResults.size() == 1) {
|
||||
const llvm::StringMapEntry<TypoCorrection> &Correction = *(BestResults.begin());
|
||||
|
|
|
@ -74,3 +74,24 @@ void bar() {
|
|||
(void)new llvm::GraphWriter; // expected-error {{expected a type}}
|
||||
(void)new llvm::Graphwriter<S>; // expected-error {{no template named 'Graphwriter' in namespace 'llvm'; did you mean 'GraphWriter'?}}
|
||||
}
|
||||
|
||||
// If namespace prefixes and character edits have the same weight, correcting
|
||||
// "fimish" to "N::famish" would have the same edit distance as correcting
|
||||
// "fimish" to "Finish". The result would be no correction being suggested
|
||||
// unless one of the corrections is given precedence (e.g. by filtering out
|
||||
// suggestions with added namespace qualifiers).
|
||||
namespace N { void famish(int); }
|
||||
void Finish(int); // expected-note {{'Finish' declared here}}
|
||||
void Start() {
|
||||
fimish(7); // expected-error {{use of undeclared identifier 'fimish'; did you mean 'Finish'?}}
|
||||
}
|
||||
|
||||
// But just eliminating the corrections containing added namespace qualifiers
|
||||
// won't work if both of the tied corrections have namespace qualifiers added.
|
||||
namespace N {
|
||||
void someCheck(int); // expected-note {{'N::someCheck' declared here}}
|
||||
namespace O { void somechock(int); }
|
||||
}
|
||||
void confusing() {
|
||||
somechick(7); // expected-error {{use of undeclared identifier 'somechick'; did you mean 'N::someCheck'?}}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче