Use several weighted factors to determine typo candidate viablity.

Replace the simple Levenshtein edit distance for typo correction
candidates--and the hacky way adding namespace qualifiers would affect
the edit distance--with a synthetic "edit distance" comprised of several
factors and their relative weights. This also allows the typo correction
callback object to convey more information about the viability of a
correction candidate than simply viable or not viable.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@150495 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kaelyn Uhrain 2012-02-14 18:56:48 +00:00
Родитель 9d665044b8
Коммит 63aae82bb1
3 изменённых файлов: 139 добавлений и 58 удалений

Просмотреть файл

@ -23,32 +23,46 @@ namespace clang {
/// @brief Simple class containing the result of Sema::CorrectTypo
class TypoCorrection {
public:
// "Distance" for unusable corrections
static const unsigned InvalidDistance = ~0U;
// The largest distance still considered valid (larger edit distances are
// mapped to InvalidDistance by getEditDistance).
static const unsigned MaximumDistance = 10000U;
// Relative weightings of the "edit distance" components. The higher the
// weight, the more of a penalty to fitness the component will give (higher
// weights mean greater contribution to the total edit distance, with the
// best correction candidates having the lowest edit distance).
static const unsigned CharDistanceWeight = 100U;
static const unsigned QualifierDistanceWeight = 110U;
static const unsigned CallbackDistanceWeight = 150U;
TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
NestedNameSpecifier *NNS=0, unsigned distance=0)
: CorrectionName(Name),
CorrectionNameSpec(NNS),
EditDistance(distance) {
NestedNameSpecifier *NNS=0, unsigned CharDistance=0,
unsigned QualifierDistance=0)
: CorrectionName(Name), CorrectionNameSpec(NNS),
CharDistance(CharDistance), QualifierDistance(QualifierDistance),
CallbackDistance(0) {
if (NameDecl)
CorrectionDecls.push_back(NameDecl);
}
TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0,
unsigned distance=0)
: CorrectionName(Name->getDeclName()),
CorrectionNameSpec(NNS),
EditDistance(distance) {
unsigned CharDistance=0)
: CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {
if (Name)
CorrectionDecls.push_back(Name);
}
TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0,
unsigned distance=0)
: CorrectionName(Name),
CorrectionNameSpec(NNS),
EditDistance(distance) {}
unsigned CharDistance=0)
: CorrectionName(Name), CorrectionNameSpec(NNS),
CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {}
TypoCorrection()
: CorrectionNameSpec(0), EditDistance(0) {}
: CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0),
CallbackDistance(0) {}
/// \brief Gets the DeclarationName of the typo correction
DeclarationName getCorrection() const { return CorrectionName; }
@ -64,8 +78,41 @@ public:
CorrectionNameSpec = NNS;
}
/// \brief Gets the "edit distance" of the typo correction from the typo
unsigned getEditDistance() const { return EditDistance; }
void setQualifierDistance(unsigned ED) {
QualifierDistance = ED;
}
void setCallbackDistance(unsigned ED) {
CallbackDistance = ED;
}
// Convert the given weighted edit distance to a roughly equivalent number of
// single-character edits (typically for comparison to the length of the
// string being edited).
static unsigned NormalizeEditDistance(unsigned ED) {
if (ED > MaximumDistance)
return InvalidDistance;
return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
}
/// \brief Gets the "edit distance" of the typo correction from the typo.
/// If Normalized is true, scale the distance down by the CharDistanceWeight
/// to return the edit distance in terms of single-character edits.
unsigned getEditDistance(bool Normalized = true) const {
if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
CallbackDistance > MaximumDistance)
return InvalidDistance;
unsigned ED =
CharDistance * CharDistanceWeight +
QualifierDistance * QualifierDistanceWeight +
CallbackDistance * CallbackDistanceWeight;
if (ED > MaximumDistance)
return InvalidDistance;
// Half the CharDistanceWeight is added to ED to simulate rounding since
// integer division truncates the value (i.e. round-to-nearest-int instead
// of round-to-zero).
return Normalized ? NormalizeEditDistance(ED) : ED;
}
/// \brief Gets the pointer to the declaration of the typo correction
NamedDecl* getCorrectionDecl() const {
@ -143,13 +190,17 @@ private:
DeclarationName CorrectionName;
NestedNameSpecifier *CorrectionNameSpec;
llvm::SmallVector<NamedDecl*, 1> CorrectionDecls;
unsigned EditDistance;
unsigned CharDistance;
unsigned QualifierDistance;
unsigned CallbackDistance;
};
/// @brief Base class for callback objects used by Sema::CorrectTypo to check
/// the validity of a potential typo correction.
class CorrectionCandidateCallback {
public:
static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
CorrectionCandidateCallback()
: WantTypeSpecifiers(true), WantExpressionKeywords(true),
WantCXXNamedCasts(true), WantRemainingKeywords(true),
@ -158,10 +209,26 @@ class CorrectionCandidateCallback {
virtual ~CorrectionCandidateCallback() {}
/// \brief Simple predicate used by the default RankCandidate to
/// determine whether to return an edit distance of 0 or InvalidDistance.
/// This can be overrided by validators that only need to determine if a
/// candidate is viable, without ranking potentially viable candidates.
/// Only ValidateCandidate or RankCandidate need to be overriden by a
/// callback wishing to check the viability of correction candidates.
virtual bool ValidateCandidate(const TypoCorrection &candidate) {
return true;
}
/// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank
/// to a candidate (where a lower value represents a better candidate), or
/// returning InvalidDistance if the candidate is not at all viable. For
/// validation callbacks that only need to determine if a candidate is viable,
/// the default RankCandidate returns either 0 or InvalidDistance depending
/// whether ValidateCandidate returns true or false.
virtual unsigned RankCandidate(const TypoCorrection &candidate) {
return ValidateCandidate(candidate) ? 0 : InvalidDistance;
}
// Flags for context-dependent keywords.
// TODO: Expand these to apply to non-keywords or possibly remove them.
bool WantTypeSpecifiers;

Просмотреть файл

@ -3109,9 +3109,12 @@ public:
return (*BestResults.begin()->second)[Name];
}
unsigned getBestEditDistance() {
return (BestResults.empty()) ?
(std::numeric_limits<unsigned>::max)() : BestResults.begin()->first;
unsigned getBestEditDistance(bool Normalized) {
if (BestResults.empty())
return (std::numeric_limits<unsigned>::max)();
unsigned BestED = BestResults.begin()->first;
return Normalized ? TypoCorrection::NormalizeEditDistance(BestED) : BestED;
}
};
@ -3167,7 +3170,7 @@ void TypoCorrectionConsumer::addName(StringRef Name,
void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();
TypoResultsMap *& Map = BestResults[Correction.getEditDistance()];
TypoResultsMap *& Map = BestResults[Correction.getEditDistance(false)];
if (!Map)
Map = new TypoResultsMap;
@ -3478,6 +3481,12 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
}
}
static bool isCandidateViable(CorrectionCandidateCallback &CCC,
TypoCorrection &Candidate) {
Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
}
/// \brief Try to "correct" a typo in the source code by finding
/// visible declarations whose names are similar to the name that was
/// present in the source code.
@ -3545,10 +3554,10 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
TypoCorrectionConsumer Consumer(*this, Typo);
// If a callback object returns true for an empty typo correction candidate,
// assume it does not do any actual validation of the candidates.
// If a callback object considers an empty typo correction candidate to be
// viable, assume it does not do any actual validation of the candidates.
TypoCorrection EmptyCorrection;
bool ValidatingCallback = !CCC.ValidateCandidate(EmptyCorrection);
bool ValidatingCallback = !isCandidateViable(CCC, EmptyCorrection);
// Perform name lookup to find visible, similarly-named entities.
bool IsUnqualifiedLookup = false;
@ -3584,7 +3593,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
// keyword case, we'll end up adding the keyword below.
if (Cached->second) {
if (!Cached->second.isKeyword() &&
CCC.ValidateCandidate(Cached->second))
isCandidateViable(CCC, Cached->second))
Consumer.addCorrection(Cached->second);
} else {
// Only honor no-correction cache hits when a callback that will validate
@ -3637,7 +3646,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
// Make sure that the user typed at least 3 characters for each correction
// made. Otherwise, we don't even both looking at the results.
unsigned ED = Consumer.getBestEditDistance();
unsigned ED = Consumer.getBestEditDistance(true);
if (ED > 0 && Typo->getName().size() / ED < 3) {
// If this was an unqualified lookup, note that no correction was found.
if (IsUnqualifiedLookup)
@ -3666,7 +3675,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
// Weed out any names that could not be found by name lookup or, if a
// CorrectionCandidateCallback object was provided, failed validation.
llvm::SmallPtrSet<IdentifierInfo*, 16> QualifiedResults;
llvm::SmallVector<TypoCorrection, 16> QualifiedResults;
LookupResult TmpRes(*this, TypoName, LookupKind);
TmpRes.suppressDiagnostics();
while (!Consumer.empty()) {
@ -3681,7 +3690,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
if (I->second.isResolved()) {
TypoCorrectionConsumer::result_iterator Prev = I;
++I;
if (!CCC.ValidateCandidate(Prev->second))
if (!isCandidateViable(CCC, Prev->second))
DI->second->erase(Prev);
continue;
}
@ -3695,7 +3704,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::FoundUnresolvedValue:
QualifiedResults.insert(Name);
QualifiedResults.push_back(I->second);
// We didn't find this name in our scope, or didn't like what we found;
// ignore it.
{
@ -3718,7 +3727,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
TRD != TRDEnd; ++TRD)
I->second.addCorrectionDecl(*TRD);
++I;
if (!CCC.ValidateCandidate(Prev->second))
if (!isCandidateViable(CCC, Prev->second))
DI->second->erase(Prev);
break;
}
@ -3727,7 +3736,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
TypoCorrectionConsumer::result_iterator Prev = I;
I->second.setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
++I;
if (!CCC.ValidateCandidate(Prev->second))
if (!isCandidateViable(CCC, Prev->second))
DI->second->erase(Prev);
break;
}
@ -3744,7 +3753,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
// Only perform the qualified lookups for C++
if (getLangOptions().CPlusPlus) {
TmpRes.suppressDiagnostics();
for (llvm::SmallPtrSet<IdentifierInfo*,
for (llvm::SmallVector<TypoCorrection,
16>::iterator QRI = QualifiedResults.begin(),
QRIEnd = QualifiedResults.end();
QRI != QRIEnd; ++QRI) {
@ -3752,33 +3761,33 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
NIEnd = Namespaces.end();
NI != NIEnd; ++NI) {
DeclContext *Ctx = NI->DeclCtx;
unsigned QualifiedED = ED + NI->EditDistance;
// FIXME: Stop searching once the namespaces are too far away to create
// acceptable corrections for this identifier (since the namespaces
// are sorted in ascending order by edit distance).
TmpRes.clear();
TmpRes.setLookupName(*QRI);
TmpRes.setLookupName(QRI->getCorrectionAsIdentifierInfo());
if (!LookupQualifiedName(TmpRes, Ctx)) continue;
// Any corrections added below will be validated in subsequent
// iterations of the main while() loop over the Consumer's contents.
switch (TmpRes.getResultKind()) {
case LookupResult::Found:
Consumer.addName((*QRI)->getName(), TmpRes.getAsSingle<NamedDecl>(),
QualifiedED, NI->NameSpecifier);
QRI->setCorrectionDecl(TmpRes.getAsSingle<NamedDecl>());
QRI->setCorrectionSpecifier(NI->NameSpecifier);
QRI->setQualifierDistance(NI->EditDistance);
Consumer.addCorrection(*QRI);
break;
case LookupResult::FoundOverloaded: {
TypoCorrection corr(&Context.Idents.get((*QRI)->getName()), NULL,
NI->NameSpecifier, QualifiedED);
case LookupResult::FoundOverloaded:
QRI->setCorrectionSpecifier(NI->NameSpecifier);
QRI->setQualifierDistance(NI->EditDistance);
for (LookupResult::iterator TRD = TmpRes.begin(),
TRDEnd = TmpRes.end();
TRD != TRDEnd; ++TRD)
corr.addCorrectionDecl(*TRD);
Consumer.addCorrection(corr);
QRI->addCorrectionDecl(*TRD);
Consumer.addCorrection(*QRI);
break;
}
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
case LookupResult::Ambiguous:
@ -3796,7 +3805,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
if (Consumer.empty()) return TypoCorrection();
TypoResultsMap &BestResults = *Consumer.begin()->second;
ED = Consumer.begin()->first;
ED = TypoCorrection::NormalizeEditDistance(Consumer.begin()->first);
if (ED > 0 && Typo->getName().size() / ED < 3) {
// If this was an unqualified lookup and we believe the callback
@ -3808,22 +3817,6 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
return TypoCorrection();
}
// If we have multiple possible corrections, eliminate the ones where we
// added namespace qualifiers to try to resolve the ambiguity (and to favor
// corrections without additional namespace qualifiers)
if (getLangOptions().CPlusPlus && BestResults.size() > 1) {
TypoCorrectionConsumer::distance_iterator DI = Consumer.begin();
for (TypoCorrectionConsumer::result_iterator I = DI->second->begin(),
IEnd = DI->second->end();
I != IEnd; /* Increment in loop. */) {
if (I->second.getCorrectionSpecifier() != NULL) {
TypoCorrectionConsumer::result_iterator Cur = I;
++I;
DI->second->erase(Cur);
} else ++I;
}
}
// If only a single name remains, return that result.
if (BestResults.size() == 1) {
const llvm::StringMapEntry<TypoCorrection> &Correction = *(BestResults.begin());

Просмотреть файл

@ -74,3 +74,24 @@ void bar() {
(void)new llvm::GraphWriter; // expected-error {{expected a type}}
(void)new llvm::Graphwriter<S>; // expected-error {{no template named 'Graphwriter' in namespace 'llvm'; did you mean 'GraphWriter'?}}
}
// If namespace prefixes and character edits have the same weight, correcting
// "fimish" to "N::famish" would have the same edit distance as correcting
// "fimish" to "Finish". The result would be no correction being suggested
// unless one of the corrections is given precedence (e.g. by filtering out
// suggestions with added namespace qualifiers).
namespace N { void famish(int); }
void Finish(int); // expected-note {{'Finish' declared here}}
void Start() {
fimish(7); // expected-error {{use of undeclared identifier 'fimish'; did you mean 'Finish'?}}
}
// But just eliminating the corrections containing added namespace qualifiers
// won't work if both of the tied corrections have namespace qualifiers added.
namespace N {
void someCheck(int); // expected-note {{'N::someCheck' declared here}}
namespace O { void somechock(int); }
}
void confusing() {
somechick(7); // expected-error {{use of undeclared identifier 'somechick'; did you mean 'N::someCheck'?}}
}