Skip to content

Commit 44349dd

Browse files
committed
Suggest a similarly-spelled identifier on undefined-identifier errors (#9124)
1 parent e751322 commit 44349dd

8 files changed

Lines changed: 350 additions & 3 deletions

File tree

source/core/slang-string-util.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "slang-blob.h"
44
#include "slang-char-util.h"
5+
#include "slang-math.h"
56
#include "slang-text-io.h"
67

78
namespace Slang
@@ -975,4 +976,61 @@ int StringUtil::parseIntAndAdvancePos(UnownedStringSlice text, Index& pos)
975976
return result;
976977
}
977978

979+
// Shared Levenshtein core. When `caseInsensitive` is set, characters are folded
980+
// with `CharUtil::toLower` during the comparison, avoiding the need to allocate
981+
// lowercased copies of the inputs.
982+
static Index _calcLevenshteinDistance(
983+
const UnownedStringSlice& a,
984+
const UnownedStringSlice& b,
985+
bool caseInsensitive)
986+
{
987+
const Index lenA = a.getLength();
988+
const Index lenB = b.getLength();
989+
if (lenA == 0)
990+
return lenB;
991+
if (lenB == 0)
992+
return lenA;
993+
994+
auto fold = [&](char c) -> char { return caseInsensitive ? CharUtil::toLower(c) : c; };
995+
996+
// Classic dynamic-programming Levenshtein using two rolling rows so the
997+
// working set is O(lenB) rather than O(lenA * lenB).
998+
List<Index> prevRow;
999+
List<Index> currRow;
1000+
prevRow.setCount(lenB + 1);
1001+
currRow.setCount(lenB + 1);
1002+
1003+
for (Index j = 0; j <= lenB; ++j)
1004+
prevRow[j] = j;
1005+
1006+
for (Index i = 1; i <= lenA; ++i)
1007+
{
1008+
currRow[0] = i;
1009+
const char ca = fold(a[i - 1]);
1010+
for (Index j = 1; j <= lenB; ++j)
1011+
{
1012+
const Index cost = (ca == fold(b[j - 1])) ? 0 : 1;
1013+
const Index deletion = prevRow[j] + 1;
1014+
const Index insertion = currRow[j - 1] + 1;
1015+
const Index substitution = prevRow[j - 1] + cost;
1016+
currRow[j] = Math::Min(deletion, insertion, substitution);
1017+
}
1018+
prevRow.swapWith(currRow);
1019+
}
1020+
1021+
return prevRow[lenB];
1022+
}
1023+
1024+
Index StringUtil::calcLevenshteinDistance(const UnownedStringSlice& a, const UnownedStringSlice& b)
1025+
{
1026+
return _calcLevenshteinDistance(a, b, /*caseInsensitive:*/ false);
1027+
}
1028+
1029+
Index StringUtil::calcLevenshteinDistanceCaseInsensitive(
1030+
const UnownedStringSlice& a,
1031+
const UnownedStringSlice& b)
1032+
{
1033+
return _calcLevenshteinDistance(a, b, /*caseInsensitive:*/ true);
1034+
}
1035+
9781036
} // namespace Slang

source/core/slang-string-util.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,20 @@ struct StringUtil
198198
/// Modifies pos to the position where parsing ends.
199199
/// Returns parsed integer.
200200
static int parseIntAndAdvancePos(UnownedStringSlice text, Index& pos);
201+
202+
/// Compute the Levenshtein edit distance between `a` and `b`: the minimum number of
203+
/// single-character insertions, deletions, or substitutions needed to turn one string
204+
/// into the other. Used, for example, to suggest the closest known identifier for a
205+
/// misspelled name ("did you mean ...?"). The comparison is case-sensitive; use
206+
/// `calcLevenshteinDistanceCaseInsensitive` (or lower-case the inputs beforehand) for a
207+
/// case-insensitive distance.
208+
static Index calcLevenshteinDistance(const UnownedStringSlice& a, const UnownedStringSlice& b);
209+
210+
/// Like `calcLevenshteinDistance`, but folds characters with `CharUtil::toLower` during
211+
/// the comparison, so it does not allocate lower-cased copies of the inputs.
212+
static Index calcLevenshteinDistanceCaseInsensitive(
213+
const UnownedStringSlice& a,
214+
const UnownedStringSlice& b);
201215
};
202216

203217
/* A helper class that allows parsing of lines from text with iteration. Uses

source/slang/slang-check-expr.cpp

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// * `slang-check-conversion.cpp` is responsible for the logic of handling type conversion/coercion
1313

1414
#include "../core/slang-math.h"
15+
#include "../core/slang-string-util.h"
1516
#include "core/slang-char-util.h"
1617
#include "slang-ast-decl.h"
1718
#include "slang-ast-natural-layout.h"
@@ -5053,6 +5054,110 @@ Expr* SemanticsExprVisitor::visitInvokeExpr(InvokeExpr* expr)
50535054
return checkedExpr;
50545055
}
50555056

5057+
// Find the in-scope identifier whose spelling is closest to `name`, to power a
5058+
// "did you mean ...?" suggestion when `name` failed to resolve. Walks the scope
5059+
// chain (and each scope's sibling chain) collecting the names of direct members,
5060+
// computes the case-insensitive Levenshtein distance to each, and returns the
5061+
// single closest candidate within a small distance threshold. Returns nullptr if
5062+
// nothing is close enough (so the caller can simply omit the suggestion), or if
5063+
// two candidates tie at the closest distance (so the output never depends on
5064+
// import/scope-walk order).
5065+
//
5066+
// Only direct members of the lexical scope chain are considered; inherited and
5067+
// extension members reached via the `this`-parameter breadcrumb in real lookup
5068+
// are deliberately not searched, to keep this off the hot path of successful
5069+
// lookups. `semantics` is used to skip candidates the user could not access.
5070+
static Name* findClosestInScopeName(SemanticsVisitor* semantics, Name* name, Scope* scope)
5071+
{
5072+
if (!name)
5073+
return nullptr;
5074+
5075+
const UnownedStringSlice target = getUnownedStringSliceText(name);
5076+
5077+
// Scale the allowed edit distance with the identifier length and cap it, so
5078+
// that we only offer genuinely-close names (e.g. `lenght` -> `length`, or
5079+
// `f_a` -> `f_b`) and never a wildly different one (e.g. the keyword `case`
5080+
// -> the module `core`, a distance-2 edit on a 4-char name). Roughly one edit
5081+
// per three characters with a floor of one, matching the heuristic used by
5082+
// other compilers. Names shorter than 3 chars are too short to suggest against.
5083+
if (target.getLength() < 3)
5084+
return nullptr;
5085+
const Index maxDistance = Math::Min<Index>(3, Math::Max<Index>(1, target.getLength() / 3));
5086+
5087+
Index bestDistance = maxDistance + 1;
5088+
// The *distinct* candidate names sharing the current best distance. Names are
5089+
// deduped (the "nub" of the candidate list) so that, e.g., two overloads
5090+
// both called `length` count once: they would print the identical
5091+
// suggestion, so they are not a genuine ambiguity. A suggestion is offered
5092+
// only when this set ends up with exactly one name.
5093+
HashSet<Name*> bestNames;
5094+
5095+
for (Scope* s = scope; s; s = s->parent)
5096+
{
5097+
for (Scope* sib = s; sib; sib = sib->nextSibling)
5098+
{
5099+
auto containerDecl = sib->containerDecl;
5100+
if (!containerDecl)
5101+
continue;
5102+
5103+
for (auto candidateDecl : containerDecl->getDirectMemberDecls())
5104+
{
5105+
Name* candidateName = candidateDecl->getName();
5106+
if (!candidateName || candidateName == name)
5107+
continue;
5108+
5109+
// Don't suggest names from the core module. Its global scope holds
5110+
// thousands of builtins, so almost any identifier finds a spurious
5111+
// close match there (e.g. `instance` -> `distance`); restricting to
5112+
// user-written declarations keeps suggestions quiet and relevant.
5113+
if (isFromCoreModule(candidateDecl))
5114+
continue;
5115+
5116+
const UnownedStringSlice candidateText = getUnownedStringSliceText(candidateName);
5117+
if (candidateText.getLength() == 0)
5118+
continue;
5119+
5120+
// Cheap length pre-filter: |lenA - lenB| is a lower bound on the
5121+
// edit distance, so skip candidates that cannot possibly be
5122+
// within `maxDistance` before paying for the O(lenA*lenB) DP (and
5123+
// before forcing any lazy member materialization downstream).
5124+
if (Math::Abs(candidateText.getLength() - target.getLength()) > maxDistance)
5125+
continue;
5126+
5127+
// Don't suggest a declaration the user could not have referenced:
5128+
// real lookup already filtered inaccessible (`private`/`internal`)
5129+
// candidates, so offering one as a "did you mean" would name a
5130+
// forbidden symbol (and leak imported module contents via typos).
5131+
if (!semantics->isDeclVisibleFromScope(makeDeclRef(candidateDecl), scope))
5132+
continue;
5133+
5134+
const Index distance =
5135+
StringUtil::calcLevenshteinDistanceCaseInsensitive(target, candidateText);
5136+
if (distance < bestDistance)
5137+
{
5138+
bestDistance = distance;
5139+
bestNames.clear();
5140+
bestNames.add(candidateName);
5141+
}
5142+
else if (distance == bestDistance)
5143+
{
5144+
bestNames.add(candidateName);
5145+
}
5146+
}
5147+
}
5148+
}
5149+
5150+
// Offer a suggestion only when there is a single, unambiguous closest name.
5151+
// Multiple distinct names at the best distance would make the chosen one
5152+
// depend on import/scope-walk order, so suppress the suggestion instead.
5153+
if (bestDistance > maxDistance || bestNames.getCount() != 1)
5154+
return nullptr;
5155+
Name* best = nullptr;
5156+
for (auto n : bestNames)
5157+
best = n;
5158+
return best;
5159+
}
5160+
50565161
Expr* SemanticsExprVisitor::visitVarExpr(VarExpr* expr)
50575162
{
50585163
// If we've already resolved this expression, don't try again.
@@ -5097,8 +5202,18 @@ Expr* SemanticsExprVisitor::visitVarExpr(VarExpr* expr)
50975202
}
50985203

50995204
if (!diagnosed)
5100-
getSink()->diagnose(
5101-
Diagnostics::UndefinedIdentifier{.name = expr->name, .location = expr->loc});
5205+
{
5206+
// If a similarly-spelled identifier is in scope, attach a "did you mean
5207+
// 'length'?" note to the error; this turns a bare "undefined identifier
5208+
// 'lenght'" into an actionable hint. The note only renders when
5209+
// `suggestionLocation` is valid, so a null suggestion is harmless.
5210+
auto suggestion = findClosestInScopeName(this, expr->name, expr->scope);
5211+
getSink()->diagnose(Diagnostics::UndefinedIdentifier{
5212+
.name = expr->name,
5213+
.suggestion = suggestion,
5214+
.location = expr->loc,
5215+
.suggestionLocation = suggestion ? expr->loc : SourceLoc{}});
5216+
}
51025217

51035218
return resultExpr;
51045219
}

source/slang/slang-diagnostics.lua

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,10 @@ err(
11301130
"undefined-identifier",
11311131
30015,
11321132
"undefined identifier",
1133-
span { loc = "location", message = "undefined identifier '~name:Name'." }
1133+
span { loc = "location", message = "undefined identifier '~name:Name'." },
1134+
-- Optional "did you mean ...?" note, rendered only when `suggestionLocation`
1135+
-- is set to a valid location by the caller (i.e. a close in-scope name exists).
1136+
note { message = "did you mean '~suggestion:Name'?", span { loc = "suggestionLocation" } }
11341137
)
11351138

11361139
err(

tests/diagnostics/autodiff-custom-diff-unresolved.slang

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ void __d_f(float x, float.Differential dout)
1212
/*CHECK:
1313
^^^^^ undefined identifier
1414
^^^^^ undefined identifier '__d_g'.
15+
^^^^^ did you mean '__d_f'?
1516
^^^^^^^^^^^^^^^^^^ cannot resolve derivative function
1617
^^^^^^^^^^^^^^^^^^ cannot resolve the custom derivative function
1718
*/
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Test for issue #9124: an undefined identifier that is a close misspelling of
2+
// an in-scope name produces a "did you mean ...?" suggestion note.
3+
//
4+
// This test runs in exhaustive mode: every emitted diagnostic must be
5+
// annotated. That is what enforces the *negative* cases below — if a spurious
6+
// "did you mean" note were ever produced (e.g. if a future change dropped the
7+
// min-length guard, raised the distance cap, or removed the core-module
8+
// filter), it would be an unannotated diagnostic and fail the test.
9+
10+
//DIAGNOSTIC_TEST:SIMPLE(diag=CHECK):-target spirv
11+
12+
float closeMatch(float myVariable)
13+
{
14+
return myVariabl;
15+
//CHECK: ^^^^^^^^^ undefined identifier
16+
//CHECK: ^^^^^^^^^ undefined identifier 'myVariabl'.
17+
//CHECK: ^^^^^^^^^ did you mean 'myVariable'?
18+
}
19+
20+
// A name with no nearby candidate must NOT produce a suggestion note.
21+
float noMatch()
22+
{
23+
return zzzzqqq;
24+
//CHECK: ^^^^^^^ undefined identifier
25+
//CHECK: ^^^^^^^ undefined identifier 'zzzzqqq'.
26+
}
27+
28+
// Min-length threshold: identifiers shorter than 3 chars never get a
29+
// suggestion, even when an in-scope candidate is one edit away (`ab` vs `ax`).
30+
float tooShort(float ax)
31+
{
32+
return ab;
33+
//CHECK: ^^ undefined identifier
34+
//CHECK: ^^ undefined identifier 'ab'.
35+
}
36+
37+
// Distance-cap boundary. For a 6-char target the cap is
38+
// min(3, max(1, 6/3)) = 2. A candidate at distance 2 is suggested.
39+
float atCap(float abcdef)
40+
{
41+
// `abXYef` -> `abcdef` is distance 2 (c,d substituted) == maxDistance.
42+
return abXYef;
43+
//CHECK: ^^^^^^ undefined identifier
44+
//CHECK: ^^^^^^ undefined identifier 'abXYef'.
45+
//CHECK: ^^^^^^ did you mean 'abcdef'?
46+
}
47+
48+
// One past the cap: distance 3 on the same 6-char target must NOT suggest.
49+
float overCap(float abcdef)
50+
{
51+
// `abXYZf` -> `abcdef` is distance 3 (c,d,e substituted) > maxDistance.
52+
return abXYZf;
53+
//CHECK: ^^^^^^ undefined identifier
54+
//CHECK: ^^^^^^ undefined identifier 'abXYZf'.
55+
}
56+
57+
// Core-module exclusion: a typo of a builtin type name must not be suggested,
58+
// since core-module declarations are deliberately excluded.
59+
float coreExcluded()
60+
{
61+
return flota;
62+
//CHECK: ^^^^^ undefined identifier
63+
//CHECK: ^^^^^ undefined identifier 'flota'.
64+
}
65+
66+
// Tie-break suppression: when two distinctly-named in-scope candidates are
67+
// equally close to the typo, the winner would depend on scope/import order, so
68+
// no suggestion is offered. Here both `abcd` and `abce` are at distance 1 from
69+
// `abcf`.
70+
float ambiguous(float abcd, float abce)
71+
{
72+
return abcf;
73+
//CHECK: ^^^^ undefined identifier
74+
//CHECK: ^^^^ undefined identifier 'abcf'.
75+
}
76+
77+
// Same-name overloads are NOT ambiguous: both `overloaded` decls would print the
78+
// identical suggestion, so the deduped candidate set has one name and the
79+
// suggestion is still offered.
80+
int overloaded(int x) { return x; }
81+
int overloaded(float x) { return 0; }
82+
int sameNameOverloads()
83+
{
84+
return overloade;
85+
//CHECK: ^^^^^^^^^ undefined identifier
86+
//CHECK: ^^^^^^^^^ undefined identifier 'overloade'.
87+
//CHECK: ^^^^^^^^^ did you mean 'overloaded'?
88+
}
89+
90+
// Inherited-member limitation: only the lexical scope chain is searched, not
91+
// inherited members reached via the implicit `this`. A typo of an inherited
92+
// field gets no suggestion (documented limitation, pinned so any future
93+
// improvement is a deliberate change).
94+
struct Base
95+
{
96+
float lengthField;
97+
}
98+
struct Derived : Base
99+
//CHECK: ^^^^ support for inheritance is unstable
100+
{
101+
float compute()
102+
{
103+
return lengthFiel;
104+
//CHECK: ^^^^^^^^^^ undefined identifier
105+
//CHECK: ^^^^^^^^^^ undefined identifier 'lengthFiel'.
106+
}
107+
}

tests/diagnostics/transitive-namespace-import.slang/test.slang

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ namespace ns
2222
f_a(); // Error.
2323
//CHECK:^^^ undefined identifier
2424
//CHECK:^^^ undefined identifier 'f_a'.
25+
//CHECK:^^^ did you mean 'f_b'?
2526
}
2627
}
2728

@@ -35,5 +36,6 @@ namespace ns2
3536
f_a(); // Error.
3637
//CHECK:^^^ undefined identifier
3738
//CHECK:^^^ undefined identifier 'f_a'.
39+
//CHECK:^^^ did you mean 'f_b'?
3840
}
3941
}

0 commit comments

Comments
 (0)