Skip to content

Commit 1f17e1d

Browse files
New SCIP fields: kind, enclosing_symbol, signature_documentation, display_name (#677 #707)
* Update Document from upstream scip.proto This copies the latest additions to the Document message: * a new language field and Language enum * a new text field, to embed the document content itself. This is meant for the new SymbolInformation::signature_documentation field. This also updates some documentation comments. * Update SymbolInformation from upstream scip.proto This copies the latest additions to the SymbolInformation message: * the documentation field is explicitly not meant for signature documentation anymore, instead a new signature_documentation field is added * a new display_name field is added * a new enclosing_symbol field is added for local symbols * a new kind field is added along with a Kind enum to have a finer-grained classification than the one provided by descriptor suffixes (and is especially useful for local symbols which don't have suffixes) * Forward display_name from SemanticDB to SCIP The SemanticDB schema already provides a display_name field, forward it to the SCIP output in scip-semanticdb. This also adds support to the ScipPrinters testing utility and updates the tests accordingly. * Move signature documentation to its new dedicated field SemanticDB provides a structured version of the signature in the signature field. Instead of turning it into a markdown-encoded string for the documentation field, this builds a Document for the signature_documentation field. This also updates the ScipPrinters testing utility and the tests accordingly. * Add back SemanticDB SymbolInformation::owner field SemanticDB used to have a SymbolInformation::owner field with id 15. This re-introduces the field with the same semantics under the name enclosing_symbol. To be able to re-use the field 15, this moves the out-of-spec definition_relationships field to id 21. * Forward enclosing_symbol from SemanticDB to SCIP This also adds support to the ScipPrinters testing utility. * Populate SymbolInformation::enclosing_symbol in semanticdb-javac This only populates the enclosing_symbol for local symbols, and updates the tests accordingly. * Build SCIP kind from SematicDB kind and properties This also updates the ScipPrinters testing utility and the tests accordingly. * semanticdb-javac: set kind to Variable for local variables --------- Co-authored-by: Nicolas Guichard <[email protected]>
1 parent 20b5142 commit 1f17e1d

File tree

143 files changed

+13603
-3924
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+13603
-3924
lines changed

scip-java-proto/src/main/protobuf/scip.proto

+350-7
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,38 @@ message ToolInfo {
6969

7070
// Document defines the metadata about a source file on disk.
7171
message Document {
72-
// (Required) Path to the text document relative to the directory supplied in
73-
// the associated `Metadata.project_root`. Not URI-encoded. This value should
74-
// not begin with a directory separator.
72+
// The string ID for the programming language this file is written in.
73+
// The `Language` enum contains the names of most common programming languages.
74+
// This field is typed as a string to permit any programming language, including
75+
// ones that are not specified by the `Language` enum.
76+
string language = 4;
77+
// (Required) Unique path to the text document.
78+
//
79+
// 1. The path must be relative to the directory supplied in the associated
80+
// `Metadata.project_root`.
81+
// 2. The path must not begin with a leading '/'.
82+
// 3. The path must point to a regular file, not a symbolic link.
83+
// 4. The path must use '/' as the separator, including on Windows.
84+
// 5. The path must be canonical; it cannot include empty components ('//'),
85+
// or '.' or '..'.
7586
string relative_path = 1;
7687
// Occurrences that appear in this file.
7788
repeated Occurrence occurrences = 2;
78-
// Symbols that are defined within this document.
89+
// Symbols that are "defined" within this document.
90+
//
91+
// This should include symbols which technically do not have any definition,
92+
// but have a reference and are defined by some other symbol (see
93+
// Relationship.is_definition).
7994
repeated SymbolInformation symbols = 3;
95+
96+
// (optional) Text contents of the this document. Indexers are not expected to
97+
// include the text by default. It's preferrable that clients read the text
98+
// contents from the file system by resolving the absolute path from joining
99+
// `Index.metadata.project_root` and `Document.relative_path`. This field was
100+
// introduced to support `SymbolInformation.signature_documentation`, but it
101+
// can be used for other purposes as well, for example testing or when working
102+
// with virtual/in-memory documents.
103+
string text = 5;
80104
}
81105

82106
// Symbol is similar to a URI, it identifies a class, method, or a local
@@ -145,12 +169,205 @@ message SymbolInformation {
145169
// The string must be formatted according to the grammar in `Symbol`.
146170
string symbol = 1;
147171
// (optional, but strongly recommended) The markdown-formatted documentation
148-
// for this symbol. This field is repeated to allow different kinds of
149-
// documentation. For example, it's nice to include both the signature of a
150-
// method (parameters and return type) along with the accompanying docstring.
172+
// for this symbol. Use `SymbolInformation.signature_documentation` to
173+
// document the method/class/type signature of this symbol.
174+
// Due to historical reasons, indexers may include signature documentation in
175+
// this field by rendering markdown code blocks. New indexers should only
176+
// include non-code documentation in this field, for example docstrings.
151177
repeated string documentation = 3;
152178
// (optional) Relationships to other symbols (e.g., implements, type definition).
153179
repeated Relationship relationships = 4;
180+
// The kind of this symbol. Use this field instead of
181+
// `SymbolDescriptor.Suffix` to determine whether something is, for example, a
182+
// class or a method.
183+
Kind kind = 5;
184+
// (optional) Kind represents the fine-grained category of a symbol, suitable for presenting
185+
// information about the symbol's meaning in the language.
186+
//
187+
// For example:
188+
// - A Java method would have the kind `Method` while a Go function would
189+
// have the kind `Function`, even if the symbols for these use the same
190+
// syntax for the descriptor `SymbolDescriptor.Suffix.Method`.
191+
// - A Go struct has the symbol kind `Struct` while a Java class has
192+
// the symbol kind `Class` even if they both have the same descriptor:
193+
// `SymbolDescriptor.Suffix.Type`.
194+
//
195+
// Since Kind is more fine-grained than Suffix:
196+
// - If two symbols have the same Kind, they should share the same Suffix.
197+
// - If two symbols have different Suffixes, they should have different Kinds.
198+
enum Kind {
199+
UnspecifiedKind = 0;
200+
// A method which may or may not have a body. For Java, Kotlin etc.
201+
AbstractMethod = 66;
202+
// For Ruby's attr_accessor
203+
Accessor = 72;
204+
Array = 1;
205+
// For Alloy
206+
Assertion = 2;
207+
AssociatedType = 3;
208+
// For C++
209+
Attribute = 4;
210+
// For Lean
211+
Axiom = 5;
212+
Boolean = 6;
213+
Class = 7;
214+
Constant = 8;
215+
Constructor = 9;
216+
// For Solidity
217+
Contract = 62;
218+
// For Haskell
219+
DataFamily = 10;
220+
// For C# and F#
221+
Delegate = 73;
222+
Enum = 11;
223+
EnumMember = 12;
224+
Error = 63;
225+
Event = 13;
226+
// For Alloy
227+
Fact = 14;
228+
Field = 15;
229+
File = 16;
230+
Function = 17;
231+
// For 'get' in Swift, 'attr_reader' in Ruby
232+
Getter = 18;
233+
// For Raku
234+
Grammar = 19;
235+
// For Purescript and Lean
236+
Instance = 20;
237+
Interface = 21;
238+
Key = 22;
239+
// For Racket
240+
Lang = 23;
241+
// For Lean
242+
Lemma = 24;
243+
// For solidity
244+
Library = 64;
245+
Macro = 25;
246+
Method = 26;
247+
// For Ruby
248+
MethodAlias = 74;
249+
// Analogous to 'ThisParameter' and 'SelfParameter', but for languages
250+
// like Go where the receiver doesn't have a conventional name.
251+
MethodReceiver = 27;
252+
// Analogous to 'AbstractMethod', for Go.
253+
MethodSpecification = 67;
254+
// For Protobuf
255+
Message = 28;
256+
// For Solidity
257+
Modifier = 65;
258+
Module = 29;
259+
Namespace = 30;
260+
Null = 31;
261+
Number = 32;
262+
Object = 33;
263+
Operator = 34;
264+
Package = 35;
265+
PackageObject = 36;
266+
Parameter = 37;
267+
ParameterLabel = 38;
268+
// For Haskell's PatternSynonyms
269+
Pattern = 39;
270+
// For Alloy
271+
Predicate = 40;
272+
Property = 41;
273+
// Analogous to 'Trait' and 'TypeClass', for Swift and Objective-C
274+
Protocol = 42;
275+
// Analogous to 'AbstractMethod', for Swift and Objective-C.
276+
ProtocolMethod = 68;
277+
// Analogous to 'AbstractMethod', for C++.
278+
PureVirtualMethod = 69;
279+
// For Haskell
280+
Quasiquoter = 43;
281+
// 'self' in Python, Rust, Swift etc.
282+
SelfParameter = 44;
283+
// For 'set' in Swift, 'attr_writer' in Ruby
284+
Setter = 45;
285+
// For Alloy, analogous to 'Struct'.
286+
Signature = 46;
287+
// For Ruby
288+
SingletonClass = 75;
289+
// Analogous to 'StaticMethod', for Ruby.
290+
SingletonMethod = 76;
291+
// Analogous to 'StaticField', for C++
292+
StaticDataMember = 77;
293+
// For C#
294+
StaticEvent = 78;
295+
// For C#
296+
StaticField = 79;
297+
// For Java, C#, C++ etc.
298+
StaticMethod = 80;
299+
// For C#, TypeScript etc.
300+
StaticProperty = 81;
301+
// For C, C++
302+
StaticVariable = 82;
303+
String = 48;
304+
Struct = 49;
305+
// For Swift
306+
Subscript = 47;
307+
// For Lean
308+
Tactic = 50;
309+
// For Lean
310+
Theorem = 51;
311+
// Method receiver for languages
312+
// 'this' in JavaScript, C++, Java etc.
313+
ThisParameter = 52;
314+
// Analogous to 'Protocol' and 'TypeClass', for Rust, Scala etc.
315+
Trait = 53;
316+
// Analogous to 'AbstractMethod', for Rust, Scala etc.
317+
TraitMethod = 70;
318+
// Data type definition for languages like OCaml which use `type`
319+
// rather than separate keywords like `struct` and `enum`.
320+
Type = 54;
321+
TypeAlias = 55;
322+
// Analogous to 'Trait' and 'Protocol', for Haskell, Purescript etc.
323+
TypeClass = 56;
324+
// Analogous to 'AbstractMethod', for Haskell, Purescript etc.
325+
TypeClassMethod = 71;
326+
// For Haskell
327+
TypeFamily = 57;
328+
TypeParameter = 58;
329+
// For C, C++, Capn Proto
330+
Union = 59;
331+
Value = 60;
332+
Variable = 61;
333+
// Next = 83;
334+
// Feel free to open a PR proposing new language-specific kinds.
335+
}
336+
// (optional) The name of this symbol as it should be displayed to the user.
337+
// For example, the symbol "com/example/MyClass#myMethod(+1)." should have the
338+
// display name "myMethod". The `symbol` field is not a reliable source of
339+
// the display name for several reasons:
340+
//
341+
// - Local symbols don't encode the name.
342+
// - Some languages have case-insensitive names, so the symbol is all-lowercase.
343+
// - The symbol may encode names with special characters that should not be
344+
// displayed to the user.
345+
string display_name = 6;
346+
// (optional) The signature of this symbol as it's displayed in API
347+
// documentation or in hover tooltips. For example, a Java method that adds
348+
// two numbers this would have `Document.language = "java"` and `Document.text
349+
// = "void add(int a, int b)". The `language` and `text` fields are required
350+
// while other fields such as `Documentation.occurrences` can be optionally
351+
// included to support hyperlinking referenced symbols in the signature.
352+
Document signature_documentation = 7;
353+
// (optional) The enclosing symbol if this is a local symbol. For non-local
354+
// symbols, the enclosing symbol should be parsed from the `symbol` field
355+
// using the `Descriptor` grammar.
356+
//
357+
// The primary use-case for this field is to allow local symbol to be displayed
358+
// in a symbol hierarchy for API documentation. It's OK to leave this field
359+
// empty for local variables since local variables usually don't belong in API
360+
// documentation. However, in the situation that you wish to include a local
361+
// symbol in the hierarchy, then you can use `enclosing_symbol` to locate the
362+
// "parent" or "owner" of this local symbol. For example, a Java indexer may
363+
// choose to use local symbols for private class fields while providing an
364+
// `enclosing_symbol` to reference the enclosing class to allow the field to
365+
// be part of the class documentation hierarchy. From the perspective of an
366+
// author of an indexer, the decision to use a local symbol or global symbol
367+
// should exclusively be determined whether the local symbol is accessible
368+
// outside the document, not by the capability to find the enclosing
369+
// symbol.
370+
string enclosing_symbol = 8;
154371
}
155372

156373
message Relationship {
@@ -382,3 +599,129 @@ enum DiagnosticTag {
382599
Unnecessary = 1;
383600
Deprecated = 2;
384601
}
602+
603+
// Language standardises names of common programming languages that can be used
604+
// for the `Document.language` field. The primary purpose of this enum is to
605+
// prevent a situation where we have a single programming language ends up with
606+
// multiple string representations. For example, the C++ language uses the name
607+
// "CPP" in this enum and other names such as "cpp" are incompatible.
608+
// Feel free to send a pull-request to add missing programming languages.
609+
enum Language {
610+
UnspecifiedLanguage = 0;
611+
ABAP = 60;
612+
Apex = 96;
613+
APL = 49;
614+
Ada = 39;
615+
Agda = 45;
616+
AsciiDoc = 86;
617+
Assembly = 58;
618+
Awk = 66;
619+
Bat = 68;
620+
BibTeX = 81;
621+
C = 34;
622+
COBOL = 59;
623+
CPP = 35; // C++ (the name "CPP" was chosen for consistency with LSP)
624+
CSS = 26;
625+
CSharp = 1;
626+
Clojure = 8;
627+
Coffeescript = 21;
628+
CommonLisp = 9;
629+
Coq = 47;
630+
CUDA = 97;
631+
Dart = 3;
632+
Delphi = 57;
633+
Diff = 88;
634+
Dockerfile = 80;
635+
Dyalog = 50;
636+
Elixir = 17;
637+
Erlang = 18;
638+
FSharp = 42;
639+
Fish = 65;
640+
Flow = 24;
641+
Fortran = 56;
642+
Git_Commit = 91;
643+
Git_Config = 89;
644+
Git_Rebase = 92;
645+
Go = 33;
646+
GraphQL = 98;
647+
Groovy = 7;
648+
HTML = 30;
649+
Hack = 20;
650+
Handlebars = 90;
651+
Haskell = 44;
652+
Idris = 46;
653+
Ini = 72;
654+
J = 51;
655+
JSON = 75;
656+
Java = 6;
657+
JavaScript = 22;
658+
JavaScriptReact = 93;
659+
Jsonnet = 76;
660+
Julia = 55;
661+
Justfile = 109;
662+
Kotlin = 4;
663+
LaTeX = 83;
664+
Lean = 48;
665+
Less = 27;
666+
Lua = 12;
667+
Luau = 108;
668+
Makefile = 79;
669+
Markdown = 84;
670+
Matlab = 52;
671+
Nickel = 110; // https://nickel-lang.org/
672+
Nix = 77;
673+
OCaml = 41;
674+
Objective_C = 36;
675+
Objective_CPP = 37;
676+
Pascal = 99;
677+
PHP = 19;
678+
PLSQL = 70;
679+
Perl = 13;
680+
PowerShell = 67;
681+
Prolog = 71;
682+
Protobuf = 100;
683+
Python = 15;
684+
R = 54;
685+
Racket = 11;
686+
Raku = 14;
687+
Razor = 62;
688+
Repro = 102; // Internal language for testing SCIP
689+
ReST = 85;
690+
Ruby = 16;
691+
Rust = 40;
692+
SAS = 61;
693+
SCSS = 29;
694+
SML = 43;
695+
SQL = 69;
696+
Sass = 28;
697+
Scala = 5;
698+
Scheme = 10;
699+
ShellScript = 64; // Bash
700+
Skylark = 78;
701+
Slang = 107;
702+
Solidity = 95;
703+
Svelte = 106;
704+
Swift = 2;
705+
Tcl = 101;
706+
TOML = 73;
707+
TeX = 82;
708+
Thrift = 103;
709+
TypeScript = 23;
710+
TypeScriptReact = 94;
711+
Verilog = 104;
712+
VHDL = 105;
713+
VisualBasic = 63;
714+
Vue = 25;
715+
Wolfram = 53;
716+
XML = 31;
717+
XSL = 32;
718+
YAML = 74;
719+
Zig = 38;
720+
// NextLanguage = 111;
721+
// Steps add a new language:
722+
// 1. Copy-paste the "NextLanguage = N" line above
723+
// 2. Increment "NextLanguage = N" to "NextLanguage = N+1"
724+
// 3. Replace "NextLanguage = N" with the name of the new language.
725+
// 4. Move the new language to the correct line above using alphabetical order
726+
// 5. (optional) Add a brief comment behind the language if the name is not self-explanatory
727+
}

0 commit comments

Comments
 (0)