|
1 |
| -;;;; scala-mode-syntax.el - Major mode for editing scala, syntax |
2 |
| -;;; Copyright (c) 2012 Heikki Vesalainen |
| 1 | +;;;; scala-mode-syntax.el - Major mode for editing Scala, syntax |
| 2 | +;;; Copyright (c) 2021 Heikki Vesalainen |
3 | 3 | ;;; For information on the License, see the LICENSE file
|
4 | 4 |
|
5 |
| -;;; Based on Scala Language Specification (SLS) Version 2.9 |
| 5 | +;;; Based on Scala Language Specification (SLS) Version 3.0 |
| 6 | +;;; https://dotty.epfl.ch/docs/internals/syntax.html |
6 | 7 |
|
7 | 8 | ;;;;
|
8 | 9 | ;;;; Scala syntax regular expressions
|
9 | 10 | ;;;;
|
10 | 11 |
|
11 |
| -;;; Based on the Scala language specification 2.9. Note: order is not |
| 12 | +;;; Based on the Scala language specification 3.0. Note: order is not |
12 | 13 | ;;; the same as in the document, as here things are declared before
|
13 | 14 | ;;; used.
|
14 | 15 |
|
15 |
| -;;; A note on naming. Things that end with '-re' are regular |
16 |
| -;;; expressions. Things that end with '-group' are regular expression |
| 16 | +;;; A note on naming. Things that end with `-re' are regular |
| 17 | +;;; expressions. Things that end with `-group' are regular expression |
17 | 18 | ;;; character groups without the enclosing [], i.e. they are not
|
18 | 19 | ;;; regular expressions, but can be used in declaring one.
|
19 | 20 |
|
20 | 21 | ;; single letter matching groups (Chapter 1)
|
21 | 22 | (defconst scala-syntax:hexDigit-group "0-9A-Fa-f")
|
22 |
| -(defconst scala-syntax:UnicodeEscape-re (concat "\\\\u[" scala-syntax:hexDigit-group "]\\{4\\}")) |
23 |
| - |
| 23 | +(defconst scala-syntax:UnicodeEscape-re |
| 24 | + ;; using `format' allows editing these regexes with one step closer to a sane |
| 25 | + ;; number of backslash escapes, via `string-edit', at the expense of making % |
| 26 | + ;; a special character |
| 27 | + (format "\\\\u[%s]\\{4\\}" scala-syntax:hexDigit-group)) |
| 28 | + |
| 29 | +;; TODO BNF for `upper' adds the coments "and Unicode category Lu"; do Emacs |
| 30 | +;; regexes handle this naturally? |
24 | 31 | (defconst scala-syntax:upper-group "[:upper:]\\$") ;; missing _ to make ids work
|
25 |
| -(defconst scala-syntax:upperAndUnderscore-group (concat "_" scala-syntax:upper-group )) |
| 32 | +;; NOTE `upperAndUnderscore' corresponds to the `upper' group in the BNF |
| 33 | +(defconst scala-syntax:upperAndUnderscore-group |
| 34 | + (concat "_" scala-syntax:upper-group )) |
| 35 | +;; TODO BNF for `lower' adds the coments "and Unicode category Ll"; do Emacs |
| 36 | +;; regexes handle this naturally? |
26 | 37 | (defconst scala-syntax:lower-group "[:lower:]")
|
27 |
| -(defconst scala-syntax:letter-group (concat scala-syntax:lower-group scala-syntax:upper-group)) ;; TODO: add Lt, Lo, Nl |
| 38 | +;; TODO BNF for `lower' adds the coments "and Unicode categories Lo, Lt, Nl" |
| 39 | +(defconst scala-syntax:letter-group (concat scala-syntax:lower-group |
| 40 | + scala-syntax:upper-group)) |
28 | 41 | (defconst scala-syntax:digit-group "0-9")
|
29 |
| -(defconst scala-syntax:letterOrDigit-group (concat |
30 |
| - scala-syntax:upperAndUnderscore-group |
31 |
| - scala-syntax:lower-group |
32 |
| - scala-syntax:digit-group)) |
33 |
| -(defconst scala-syntax:opchar-safe-group "!%&*+/?\\\\^|~-") ;; TODO: Sm, So |
| 42 | +;; NOTE `letterOrDigit' does not have a separate entry in the 3.0 BNF. |
| 43 | +(defconst scala-syntax:letterOrDigit-group |
| 44 | + (concat |
| 45 | + scala-syntax:upperAndUnderscore-group |
| 46 | + scala-syntax:lower-group |
| 47 | + scala-syntax:digit-group)) |
| 48 | +;; TODO ensure Unicode Sm, So disallowed in `opchar' |
| 49 | +;; TODO do the math: check these positively stated symbols against the |
| 50 | +;; negatively stated BNF. |
| 51 | +(defconst scala-syntax:opchar-safe-group "!%&*+/?\\\\^|~-") |
34 | 52 | (defconst scala-syntax:opchar-unsafe-group "#:<=>@")
|
35 | 53 | (defconst scala-syntax:opchar-group (concat scala-syntax:opchar-unsafe-group
|
36 | 54 | scala-syntax:opchar-safe-group))
|
37 | 55 |
|
38 |
| -;; Scala delimiters (Chapter 1), but no quotes |
| 56 | +;; NOTE `delim' in the BNF |
| 57 | +;; TODO should backtick be here? I'm not sure it is handled correctly ATM. |
| 58 | +;; Scala delimiters, but no quotes |
39 | 59 | (defconst scala-syntax:delimiter-group ".,;")
|
40 | 60 |
|
41 |
| -;; Integer Literal (Chapter 1.3.1) |
| 61 | +;; NOTE BNF also has a definition here for `printabeChar' |
| 62 | +;; NOTE BNF also has a definition here for `charEscapeSeq' |
| 63 | + |
| 64 | +(defconst scala-syntax:op-re |
| 65 | + (concat "[" scala-syntax:opchar-group "]+" )) |
| 66 | + |
| 67 | +(defconst scala-syntax:idrest-re |
| 68 | + ;; Eagerness of regexp causes problems with _. The following is a workaround, |
| 69 | + ;; but the resulting regexp matches only what SLS demands. |
| 70 | + (format "\\([_]??[%s%s]+\\)*\\(_+%s\\|_\\)?" |
| 71 | + scala-syntax:letter-group |
| 72 | + scala-syntax:digit-group |
| 73 | + scala-syntax:op-re)) |
| 74 | + |
| 75 | +(defconst scala-syntax:varid-re |
| 76 | + (concat "[" scala-syntax:lower-group "]" scala-syntax:idrest-re)) |
| 77 | + |
| 78 | +;; `alphaid' introduced by SIP-11 - String Interpolation |
| 79 | +;; https://docs.scala-lang.org/sips/string-interpolation.html |
| 80 | +(defconst scala-syntax:alphaid-re |
| 81 | + (format "\\([%s%s]%s\\)" |
| 82 | + scala-syntax:lower-group |
| 83 | + scala-syntax:upperAndUnderscore-group |
| 84 | + scala-syntax:idrest-re)) |
| 85 | + |
| 86 | +;; TODO resume here (`plainid' in the BNF) |
| 87 | + |
| 88 | +;; Integer Literal |
42 | 89 | (defconst scala-syntax:nonZeroDigit-group "1-9")
|
43 | 90 | (defconst scala-syntax:octalDigit-group "0-7")
|
44 | 91 | (defconst scala-syntax:decimalNumeral-re
|
|
116 | 163 | "\\(?:" scala-syntax:multiLineStringLiteral-end-re "\\)?"
|
117 | 164 | "\\|\\(\"\\)" "\\(\\\\.\\|[^\"\n\\]\\)*" "\\(\"\\)"))
|
118 | 165 |
|
119 |
| -;; Identifiers (Chapter 1.1) |
120 |
| -(defconst scala-syntax:op-re |
121 |
| - (concat "[" scala-syntax:opchar-group "]+" )) |
122 |
| -(defconst scala-syntax:idrest-re |
123 |
| - ;; Eagerness of regexp causes problems with _. The following is a workaround, |
124 |
| - ;; but the resulting regexp matches only what SLS demands. |
125 |
| - (concat "\\(" "[_]??" "[" scala-syntax:letter-group scala-syntax:digit-group "]+" "\\)*" |
126 |
| - "\\(" "_+" scala-syntax:op-re "\\|" "_" "\\)?")) |
127 |
| -(defconst scala-syntax:varid-re (concat "[" scala-syntax:lower-group "]" scala-syntax:idrest-re)) |
128 |
| -(defconst scala-syntax:capitalid-re (concat "[" scala-syntax:upperAndUnderscore-group "]" scala-syntax:idrest-re)) |
129 |
| -;; alphaid introduce by SIP11 |
130 |
| -(defconst scala-syntax:alphaid-re (concat "\\(" "[" scala-syntax:lower-group scala-syntax:upperAndUnderscore-group "]" scala-syntax:idrest-re "\\)")) |
131 |
| -(defconst scala-syntax:plainid-re (concat "\\(" scala-syntax:alphaid-re "\\|" scala-syntax:op-re "\\)")) |
| 166 | +(defconst scala-syntax:capitalid-re |
| 167 | + (concat "[" scala-syntax:upperAndUnderscore-group "]" scala-syntax:idrest-re)) |
| 168 | +(defconst scala-syntax:plainid-re |
| 169 | + (concat "\\(" scala-syntax:alphaid-re "\\|" scala-syntax:op-re "\\)")) |
132 | 170 | ;; stringlit is referred to, but not defined Scala Language Specification 2.9
|
133 | 171 | ;; we define it as consisting of anything but '`' and newline
|
134 | 172 | (defconst scala-syntax:stringlit-re "[^`\n\r]")
|
@@ -968,15 +1006,15 @@ not. A list must be either enclosed in parentheses or start with
|
968 | 1006 | (regexp-opt '("override" "abstract" "final" "sealed" "implicit" "lazy" "using" "extension"
|
969 | 1007 | "private" "protected" "case") 'words))
|
970 | 1008 |
|
971 |
| -(defconst scala-syntax:whitespace-delimeted-modifiers-re |
| 1009 | +(defconst scala-syntax:whitespace-delimited-modifiers-re |
972 | 1010 | (concat "\\(?:" scala-syntax:modifiers-re "\\(?: *\\)" "\\)*"))
|
973 | 1011 |
|
974 | 1012 | (defconst scala-syntax:definition-words-re
|
975 | 1013 | (mapconcat 'regexp-quote '("class" "object" "trait" "val" "var" "def" "type" "enum" "given") "\\|"))
|
976 | 1014 |
|
977 | 1015 | (defun scala-syntax:build-definition-re (words-re)
|
978 | 1016 | (concat " *"
|
979 |
| - scala-syntax:whitespace-delimeted-modifiers-re |
| 1017 | + scala-syntax:whitespace-delimited-modifiers-re |
980 | 1018 | words-re
|
981 | 1019 | "\\(?: *\\)"
|
982 | 1020 | "\\(?2:"
|
|
0 commit comments