Skip to content

Commit fcdb946

Browse files
committed
Suggest a similarly-spelled identifier on undefined-identifier errors (#9124)
1 parent e751322 commit fcdb946

11 files changed

Lines changed: 416 additions & 3 deletions

File tree

source/core/slang-string-util.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "slang-blob.h"
44
#include "slang-char-util.h"
5+
#include "slang-math.h"
56
#include "slang-text-io.h"
67

78
namespace Slang
@@ -975,4 +976,61 @@ int StringUtil::parseIntAndAdvancePos(UnownedStringSlice text, Index& pos)
975976
return result;
976977
}
977978

979+
// Shared Levenshtein core. When `caseInsensitive` is set, characters are folded
980+
// with `CharUtil::toLower` during the comparison, avoiding the need to allocate
981+
// lowercased copies of the inputs.
982+
static Index _calcLevenshteinDistance(
983+
const UnownedStringSlice& a,
984+
const UnownedStringSlice& b,
985+
bool caseInsensitive)
986+
{
987+
const Index lenA = a.getLength();
988+
const Index lenB = b.getLength();
989+
if (lenA == 0)
990+
return lenB;
991+
if (lenB == 0)
992+
return lenA;
993+
994+
auto fold = [&](char c) -> char { return caseInsensitive ? CharUtil::toLower(c) : c; };
995+
996+
// Classic dynamic-programming Levenshtein using two rolling rows so the
997+
// working set is O(lenB) rather than O(lenA * lenB).
998+
List<Index> prevRow;
999+
List<Index> currRow;
1000+
prevRow.setCount(lenB + 1);
1001+
currRow.setCount(lenB + 1);
1002+
1003+
for (Index j = 0; j <= lenB; ++j)
1004+
prevRow[j] = j;
1005+
1006+
for (Index i = 1; i <= lenA; ++i)
1007+
{
1008+
currRow[0] = i;
1009+
const char ca = fold(a[i - 1]);
1010+
for (Index j = 1; j <= lenB; ++j)
1011+
{
1012+
const Index cost = (ca == fold(b[j - 1])) ? 0 : 1;
1013+
const Index deletion = prevRow[j] + 1;
1014+
const Index insertion = currRow[j - 1] + 1;
1015+
const Index substitution = prevRow[j - 1] + cost;
1016+
currRow[j] = Math::Min(deletion, insertion, substitution);
1017+
}
1018+
prevRow.swapWith(currRow);
1019+
}
1020+
1021+
return prevRow[lenB];
1022+
}
1023+
1024+
Index StringUtil::calcLevenshteinDistance(const UnownedStringSlice& a, const UnownedStringSlice& b)
1025+
{
1026+
return _calcLevenshteinDistance(a, b, /*caseInsensitive:*/ false);
1027+
}
1028+
1029+
Index StringUtil::calcLevenshteinDistanceCaseInsensitive(
1030+
const UnownedStringSlice& a,
1031+
const UnownedStringSlice& b)
1032+
{
1033+
return _calcLevenshteinDistance(a, b, /*caseInsensitive:*/ true);
1034+
}
1035+
9781036
} // namespace Slang

source/core/slang-string-util.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,20 @@ struct StringUtil
198198
/// Modifies pos to the position where parsing ends.
199199
/// Returns parsed integer.
200200
static int parseIntAndAdvancePos(UnownedStringSlice text, Index& pos);
201+
202+
/// Compute the Levenshtein edit distance between `a` and `b`: the minimum number of
203+
/// single-character insertions, deletions, or substitutions needed to turn one string
204+
/// into the other. Used, for example, to suggest the closest known identifier for a
205+
/// misspelled name ("did you mean ...?"). The comparison is case-sensitive; use
206+
/// `calcLevenshteinDistanceCaseInsensitive` (or lower-case the inputs beforehand) for a
207+
/// case-insensitive distance.
208+
static Index calcLevenshteinDistance(const UnownedStringSlice& a, const UnownedStringSlice& b);
209+
210+
/// Like `calcLevenshteinDistance`, but folds characters with `CharUtil::toLower` during
211+
/// the comparison, so it does not allocate lower-cased copies of the inputs.
212+
static Index calcLevenshteinDistanceCaseInsensitive(
213+
const UnownedStringSlice& a,
214+
const UnownedStringSlice& b);
201215
};
202216

203217
/* A helper class that allows parsing of lines from text with iteration. Uses

source/slang/slang-check-decl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,6 +1775,11 @@ QualType getTypeForDeclRef(
17751775
// TODO: This code could break if we ever go down this path with
17761776
// an identifier that doesn't have a name.
17771777
//
1778+
// Note: unlike the `VarExpr` lookup path, we do not attach a "did you
1779+
// mean ...?" suggestion here. This site is reached for a name that
1780+
// already resolved to a contextual keyword (not an ordinary in-scope
1781+
// lookup), so a Levenshtein match against the lexical scope would be
1782+
// misleading.
17781783
sink->diagnose(
17791784
Diagnostics::UndefinedIdentifier{.name = declRef.getName(), .location = loc});
17801785
}

source/slang/slang-check-expr.cpp

Lines changed: 135 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// * `slang-check-conversion.cpp` is responsible for the logic of handling type conversion/coercion
1313

1414
#include "../core/slang-math.h"
15+
#include "../core/slang-string-util.h"
1516
#include "core/slang-char-util.h"
1617
#include "slang-ast-decl.h"
1718
#include "slang-ast-natural-layout.h"
@@ -5053,6 +5054,127 @@ Expr* SemanticsExprVisitor::visitInvokeExpr(InvokeExpr* expr)
50535054
return checkedExpr;
50545055
}
50555056

5057+
// Find the in-scope identifier whose spelling is closest to `name`, to power a
5058+
// "did you mean ...?" suggestion when `name` failed to resolve. Walks the scope
5059+
// chain (and each scope's sibling chain) collecting the names of direct members,
5060+
// computes the case-insensitive Levenshtein distance to each, and returns the
5061+
// single closest candidate within a small distance threshold. Returns nullptr if
5062+
// nothing is close enough (so the caller can simply omit the suggestion), or if
5063+
// two candidates tie at the closest distance (so the output never depends on
5064+
// import/scope-walk order).
5065+
//
5066+
// Only direct members of the lexical scope chain are considered; inherited and
5067+
// extension members reached via the `this`-parameter breadcrumb in real lookup
5068+
// are deliberately not searched, to keep this off the hot path of successful
5069+
// lookups. `semantics` is used to skip candidates the user could not access.
5070+
static Name* findClosestInScopeName(
5071+
SemanticsVisitor* semantics,
5072+
Name* name,
5073+
Scope* scope,
5074+
Decl* declToExclude)
5075+
{
5076+
if (!name)
5077+
return nullptr;
5078+
5079+
const UnownedStringSlice target = getUnownedStringSliceText(name);
5080+
5081+
// Scale the allowed edit distance with the identifier length and cap it, so
5082+
// that we only offer genuinely-close names (e.g. `lenght` -> `length`, or
5083+
// `f_a` -> `f_b`) and never a wildly different one (e.g. the keyword `case`
5084+
// -> the module `core`, a distance-2 edit on a 4-char name). The heuristic is
5085+
// roughly one edit per three characters with a floor of one. Names shorter
5086+
// than 3 chars are too short to suggest against; and we also refuse
5087+
// pathologically long names, since suggestions are advisory and not worth an
5088+
// O(N*M) Levenshtein DP per candidate on an adversarial multi-kilobyte
5089+
// identifier.
5090+
if (target.getLength() < 3 || target.getLength() > 256)
5091+
return nullptr;
5092+
const Index maxDistance = Math::Min<Index>(3, Math::Max<Index>(1, target.getLength() / 3));
5093+
5094+
Index bestDistance = maxDistance + 1;
5095+
// The *distinct* candidate names sharing the current best distance. Names are
5096+
// deduped (the "nub" of the candidate list) so that, e.g., two overloads
5097+
// both called `length` count once: they would print the identical
5098+
// suggestion, so they are not a genuine ambiguity. A suggestion is offered
5099+
// only when this set ends up with exactly one name.
5100+
HashSet<Name*> bestNames;
5101+
5102+
for (Scope* s = scope; s; s = s->parent)
5103+
{
5104+
for (Scope* sib = s; sib; sib = sib->nextSibling)
5105+
{
5106+
auto containerDecl = sib->containerDecl;
5107+
if (!containerDecl)
5108+
continue;
5109+
5110+
// Don't suggest names from the core module. Its global scope holds
5111+
// thousands of builtins, so almost any identifier finds a spurious
5112+
// close match there (e.g. `instance` -> `distance`); restricting to
5113+
// user-written declarations keeps suggestions quiet and relevant.
5114+
// Skipping the whole container here (rather than per-member) also
5115+
// avoids iterating — and, for any on-demand-deserialized core
5116+
// module, materializing — its members via `getDirectMemberDecls()`.
5117+
if (isFromCoreModule(containerDecl))
5118+
continue;
5119+
5120+
for (auto candidateDecl : containerDecl->getDirectMemberDecls())
5121+
{
5122+
Name* candidateName = candidateDecl->getName();
5123+
if (!candidateName || candidateName == name)
5124+
continue;
5125+
5126+
// Skip the declaration currently being checked, mirroring the
5127+
// `getDeclToExcludeFromLookup()` exclusion that real lookup
5128+
// applies: suggesting it would name something the same lookup
5129+
// path still cannot resolve.
5130+
if (candidateDecl == declToExclude)
5131+
continue;
5132+
5133+
const UnownedStringSlice candidateText = getUnownedStringSliceText(candidateName);
5134+
if (candidateText.getLength() == 0)
5135+
continue;
5136+
5137+
// Cheap length pre-filter: |lenA - lenB| is a lower bound on the
5138+
// edit distance, so skip candidates that cannot possibly be
5139+
// within `maxDistance` before paying for the O(lenA*lenB) DP (and
5140+
// before forcing any lazy member materialization downstream).
5141+
if (Math::Abs(candidateText.getLength() - target.getLength()) > maxDistance)
5142+
continue;
5143+
5144+
// Don't suggest a declaration the user could not have referenced:
5145+
// real lookup already filtered inaccessible (`private`/`internal`)
5146+
// candidates, so offering one as a "did you mean" would name a
5147+
// forbidden symbol (and leak imported module contents via typos).
5148+
if (!semantics->isDeclVisibleFromScope(makeDeclRef(candidateDecl), scope))
5149+
continue;
5150+
5151+
const Index distance =
5152+
StringUtil::calcLevenshteinDistanceCaseInsensitive(target, candidateText);
5153+
if (distance < bestDistance)
5154+
{
5155+
bestDistance = distance;
5156+
bestNames.clear();
5157+
bestNames.add(candidateName);
5158+
}
5159+
else if (distance == bestDistance)
5160+
{
5161+
bestNames.add(candidateName);
5162+
}
5163+
}
5164+
}
5165+
}
5166+
5167+
// Offer a suggestion only when there is a single, unambiguous closest name.
5168+
// Multiple distinct names at the best distance would make the chosen one
5169+
// depend on import/scope-walk order, so suppress the suggestion instead.
5170+
if (bestDistance > maxDistance || bestNames.getCount() != 1)
5171+
return nullptr;
5172+
Name* best = nullptr;
5173+
for (auto n : bestNames)
5174+
best = n;
5175+
return best;
5176+
}
5177+
50565178
Expr* SemanticsExprVisitor::visitVarExpr(VarExpr* expr)
50575179
{
50585180
// If we've already resolved this expression, don't try again.
@@ -5097,8 +5219,19 @@ Expr* SemanticsExprVisitor::visitVarExpr(VarExpr* expr)
50975219
}
50985220

50995221
if (!diagnosed)
5100-
getSink()->diagnose(
5101-
Diagnostics::UndefinedIdentifier{.name = expr->name, .location = expr->loc});
5222+
{
5223+
// If a similarly-spelled identifier is in scope, attach a "did you mean
5224+
// 'length'?" note to the error; this turns a bare "undefined identifier
5225+
// 'lenght'" into an actionable hint. The note only renders when
5226+
// `suggestionLocation` is valid, so a null suggestion is harmless.
5227+
auto suggestion =
5228+
findClosestInScopeName(this, expr->name, expr->scope, getDeclToExcludeFromLookup());
5229+
getSink()->diagnose(Diagnostics::UndefinedIdentifier{
5230+
.name = expr->name,
5231+
.suggestion = suggestion,
5232+
.location = expr->loc,
5233+
.suggestionLocation = suggestion ? expr->loc : SourceLoc{}});
5234+
}
51025235

51035236
return resultExpr;
51045237
}

source/slang/slang-check-modifier.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,11 @@ Modifier* SemanticsVisitor::checkModifier(
19681968
genDecl->ownedScope);
19691969
if (!scrutineeResults.isValid())
19701970
{
1971+
// No "did you mean ...?" suggestion here (unlike the
1972+
// `VarExpr` path): a `__target_intrinsic` scrutinee names a
1973+
// capability/target identifier, not an ordinary in-scope
1974+
// declaration, so a Levenshtein match against the lexical
1975+
// scope would be misleading.
19711976
getSink()->diagnose(Diagnostics::UndefinedIdentifier{
19721977
.name = targetIntrinsic->scrutinee.name,
19731978
.location = targetIntrinsic->scrutinee.loc});

source/slang/slang-diagnostics.lua

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,10 @@ err(
11301130
"undefined-identifier",
11311131
30015,
11321132
"undefined identifier",
1133-
span { loc = "location", message = "undefined identifier '~name:Name'." }
1133+
span { loc = "location", message = "undefined identifier '~name:Name'." },
1134+
-- Optional "did you mean ...?" note, rendered only when `suggestionLocation`
1135+
-- is set to a valid location by the caller (i.e. a close in-scope name exists).
1136+
note { message = "did you mean '~suggestion:Name'?", span { loc = "suggestionLocation" } }
11341137
)
11351138

11361139
err(

tests/diagnostics/autodiff-custom-diff-unresolved.slang

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ void __d_f(float x, float.Differential dout)
1212
/*CHECK:
1313
^^^^^ undefined identifier
1414
^^^^^ undefined identifier '__d_g'.
15+
^^^^^ did you mean '__d_f'?
1516
^^^^^^^^^^^^^^^^^^ cannot resolve derivative function
1617
^^^^^^^^^^^^^^^^^^ cannot resolve the custom derivative function
1718
*/

0 commit comments

Comments
 (0)