Skip to content

Commit eef2bff

Browse files
author
Keith Pinson
committed
Begin to match scala-mode-syntax against the 3.0 BNF
1 parent ce97702 commit eef2bff

File tree

1 file changed

+70
-32
lines changed

1 file changed

+70
-32
lines changed

Diff for: scala-mode-syntax.el

+70-32
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,91 @@
1-
;;;; scala-mode-syntax.el - Major mode for editing scala, syntax
2-
;;; Copyright (c) 2012 Heikki Vesalainen
1+
;;;; scala-mode-syntax.el - Major mode for editing Scala, syntax
2+
;;; Copyright (c) 2021 Heikki Vesalainen
33
;;; For information on the License, see the LICENSE file
44

5-
;;; Based on Scala Language Specification (SLS) Version 2.9
5+
;;; Based on Scala Language Specification (SLS) Version 3.0
6+
;;; https://dotty.epfl.ch/docs/internals/syntax.html
67

78
;;;;
89
;;;; Scala syntax regular expressions
910
;;;;
1011

11-
;;; Based on the Scala language specification 2.9. Note: order is not
12+
;;; Based on the Scala language specification 3.0. Note: order is not
1213
;;; the same as in the document, as here things are declared before
1314
;;; used.
1415

15-
;;; A note on naming. Things that end with '-re' are regular
16-
;;; expressions. Things that end with '-group' are regular expression
16+
;;; A note on naming. Things that end with `-re' are regular
17+
;;; expressions. Things that end with `-group' are regular expression
1718
;;; character groups without the enclosing [], i.e. they are not
1819
;;; regular expressions, but can be used in declaring one.
1920

2021
;; single letter matching groups (Chapter 1)
2122
(defconst scala-syntax:hexDigit-group "0-9A-Fa-f")
22-
(defconst scala-syntax:UnicodeEscape-re (concat "\\\\u[" scala-syntax:hexDigit-group "]\\{4\\}"))
23-
23+
(defconst scala-syntax:UnicodeEscape-re
24+
;; using `format' allows editing these regexes with one step closer to a sane
25+
;; number of backslash escapes, via `string-edit', at the expense of making %
26+
;; a special character
27+
(format "\\\\u[%s]\\{4\\}" scala-syntax:hexDigit-group))
28+
29+
;; TODO BNF for `upper' adds the coments "and Unicode category Lu"; do Emacs
30+
;; regexes handle this naturally?
2431
(defconst scala-syntax:upper-group "[:upper:]\\$") ;; missing _ to make ids work
25-
(defconst scala-syntax:upperAndUnderscore-group (concat "_" scala-syntax:upper-group ))
32+
;; NOTE `upperAndUnderscore' corresponds to the `upper' group in the BNF
33+
(defconst scala-syntax:upperAndUnderscore-group
34+
(concat "_" scala-syntax:upper-group ))
35+
;; TODO BNF for `lower' adds the coments "and Unicode category Ll"; do Emacs
36+
;; regexes handle this naturally?
2637
(defconst scala-syntax:lower-group "[:lower:]")
27-
(defconst scala-syntax:letter-group (concat scala-syntax:lower-group scala-syntax:upper-group)) ;; TODO: add Lt, Lo, Nl
38+
;; TODO BNF for `lower' adds the coments "and Unicode categories Lo, Lt, Nl"
39+
(defconst scala-syntax:letter-group (concat scala-syntax:lower-group
40+
scala-syntax:upper-group))
2841
(defconst scala-syntax:digit-group "0-9")
29-
(defconst scala-syntax:letterOrDigit-group (concat
30-
scala-syntax:upperAndUnderscore-group
31-
scala-syntax:lower-group
32-
scala-syntax:digit-group))
33-
(defconst scala-syntax:opchar-safe-group "!%&*+/?\\\\^|~-") ;; TODO: Sm, So
42+
;; NOTE `letterOrDigit' does not have a separate entry in the 3.0 BNF.
43+
(defconst scala-syntax:letterOrDigit-group
44+
(concat
45+
scala-syntax:upperAndUnderscore-group
46+
scala-syntax:lower-group
47+
scala-syntax:digit-group))
48+
;; TODO ensure Unicode Sm, So disallowed in `opchar'
49+
;; TODO do the math: check these positively stated symbols against the
50+
;; negatively stated BNF.
51+
(defconst scala-syntax:opchar-safe-group "!%&*+/?\\\\^|~-")
3452
(defconst scala-syntax:opchar-unsafe-group "#:<=>@")
3553
(defconst scala-syntax:opchar-group (concat scala-syntax:opchar-unsafe-group
3654
scala-syntax:opchar-safe-group))
3755

38-
;; Scala delimiters (Chapter 1), but no quotes
56+
;; NOTE `delim' in the BNF
57+
;; TODO should backtick be here? I'm not sure it is handled correctly ATM.
58+
;; Scala delimiters, but no quotes
3959
(defconst scala-syntax:delimiter-group ".,;")
4060

41-
;; Integer Literal (Chapter 1.3.1)
61+
;; NOTE BNF also has a definition here for `printabeChar'
62+
;; NOTE BNF also has a definition here for `charEscapeSeq'
63+
64+
(defconst scala-syntax:op-re
65+
(concat "[" scala-syntax:opchar-group "]+" ))
66+
67+
(defconst scala-syntax:idrest-re
68+
;; Eagerness of regexp causes problems with _. The following is a workaround,
69+
;; but the resulting regexp matches only what SLS demands.
70+
(format "\\([_]??[%s%s]+\\)*\\(_+%s\\|_\\)?"
71+
scala-syntax:letter-group
72+
scala-syntax:digit-group
73+
scala-syntax:op-re))
74+
75+
(defconst scala-syntax:varid-re
76+
(concat "[" scala-syntax:lower-group "]" scala-syntax:idrest-re))
77+
78+
;; `alphaid' introduced by SIP-11 - String Interpolation
79+
;; https://docs.scala-lang.org/sips/string-interpolation.html
80+
(defconst scala-syntax:alphaid-re
81+
(format "\\([%s%s]%s\\)"
82+
scala-syntax:lower-group
83+
scala-syntax:upperAndUnderscore-group
84+
scala-syntax:idrest-re))
85+
86+
;; TODO resume here (`plainid' in the BNF)
87+
88+
;; Integer Literal
4289
(defconst scala-syntax:nonZeroDigit-group "1-9")
4390
(defconst scala-syntax:octalDigit-group "0-7")
4491
(defconst scala-syntax:decimalNumeral-re
@@ -116,19 +163,10 @@
116163
"\\(?:" scala-syntax:multiLineStringLiteral-end-re "\\)?"
117164
"\\|\\(\"\\)" "\\(\\\\.\\|[^\"\n\\]\\)*" "\\(\"\\)"))
118165

119-
;; Identifiers (Chapter 1.1)
120-
(defconst scala-syntax:op-re
121-
(concat "[" scala-syntax:opchar-group "]+" ))
122-
(defconst scala-syntax:idrest-re
123-
;; Eagerness of regexp causes problems with _. The following is a workaround,
124-
;; but the resulting regexp matches only what SLS demands.
125-
(concat "\\(" "[_]??" "[" scala-syntax:letter-group scala-syntax:digit-group "]+" "\\)*"
126-
"\\(" "_+" scala-syntax:op-re "\\|" "_" "\\)?"))
127-
(defconst scala-syntax:varid-re (concat "[" scala-syntax:lower-group "]" scala-syntax:idrest-re))
128-
(defconst scala-syntax:capitalid-re (concat "[" scala-syntax:upperAndUnderscore-group "]" scala-syntax:idrest-re))
129-
;; alphaid introduce by SIP11
130-
(defconst scala-syntax:alphaid-re (concat "\\(" "[" scala-syntax:lower-group scala-syntax:upperAndUnderscore-group "]" scala-syntax:idrest-re "\\)"))
131-
(defconst scala-syntax:plainid-re (concat "\\(" scala-syntax:alphaid-re "\\|" scala-syntax:op-re "\\)"))
166+
(defconst scala-syntax:capitalid-re
167+
(concat "[" scala-syntax:upperAndUnderscore-group "]" scala-syntax:idrest-re))
168+
(defconst scala-syntax:plainid-re
169+
(concat "\\(" scala-syntax:alphaid-re "\\|" scala-syntax:op-re "\\)"))
132170
;; stringlit is referred to, but not defined Scala Language Specification 2.9
133171
;; we define it as consisting of anything but '`' and newline
134172
(defconst scala-syntax:stringlit-re "[^`\n\r]")
@@ -968,15 +1006,15 @@ not. A list must be either enclosed in parentheses or start with
9681006
(regexp-opt '("override" "abstract" "final" "sealed" "implicit" "lazy" "using" "extension"
9691007
"private" "protected" "case") 'words))
9701008

971-
(defconst scala-syntax:whitespace-delimeted-modifiers-re
1009+
(defconst scala-syntax:whitespace-delimited-modifiers-re
9721010
(concat "\\(?:" scala-syntax:modifiers-re "\\(?: *\\)" "\\)*"))
9731011

9741012
(defconst scala-syntax:definition-words-re
9751013
(mapconcat 'regexp-quote '("class" "object" "trait" "val" "var" "def" "type" "enum" "given") "\\|"))
9761014

9771015
(defun scala-syntax:build-definition-re (words-re)
9781016
(concat " *"
979-
scala-syntax:whitespace-delimeted-modifiers-re
1017+
scala-syntax:whitespace-delimited-modifiers-re
9801018
words-re
9811019
"\\(?: *\\)"
9821020
"\\(?2:"

0 commit comments

Comments
 (0)