@@ -16,11 +16,12 @@ module Agda2Lambox.Compile.Utils
1616
1717import Control.Monad.State
1818import Control.Monad.IO.Class ( liftIO )
19+ import Numeric ( showHex )
1920import Data.Char
2021import Data.List ( elemIndex , foldl' )
2122import Data.Maybe ( fromMaybe , listToMaybe , isJust )
2223
23- import Unicode.Char.Identifiers
24+ import Unicode.Char.Identifiers ( isXIDStart , isXIDContinue )
2425import Agda.Compiler.Backend
2526import Agda.Syntax.Internal
2627import Agda.Syntax.Abstract.Name
@@ -138,48 +139,54 @@ instance MayBeLogical a => MayBeLogical (Arg a) where
138139 isLogical arg | not (usableModality arg) = pure True
139140 isLogical arg = isLogical $ unArg arg
140141
141- -- | Sanitize an agda name to something without unicode.
142- -- Must be injective.
143- -- We may require a smarter transformation later on for other targets.
144- sanitize :: String -> String
145- sanitize xs | xs `elem` [" true" , " false" ] = " r#" ++ xs
146- sanitize [] = []
147- sanitize (x: xs) = toXIDStart x <> concatMap toXIDContinue xs
148- where
149- toXIDStart , toXIDContinue :: Char -> String
150- toXIDStart c
151- | isXIDStart c || c == ' _' = [c]
152- | isXIDContinue c = " _" <> [c]
153- | otherwise = toXIDContinue c
154-
155- toXIDContinue c
156- | isXIDContinue c = [c]
157- | otherwise = " y" <> show (ord c) <> " y"
158- {-
159- sanitize q = "agda" ++ concatMap encode q
160- where
161- encode '$' = "$$"
162- encode c
163- | isAscii c -- more agressive sanitization
164- , isLower c
165- || isUpper c
166- || c == '_'
167- || generalCategory c == DecimalNumber = [c]
168- | otherwise = "$" ++ show (fromEnum c) ++ "$"
169-
170- -- | Sanitize an agda name to something Rust-compliant.
171- -- Must be injective.
142+ -- | Delimitor used for name sanitization
143+ sDelim :: Char
144+ sDelim = 'Ֆ'
145+
146+ -- | Sanitize an agda name to a Rust-compatible identifier.
147+ -- Injective by design.
172148sanitize :: String -> String
173- sanitize [] = []
174- sanitize (c:cs) = toXIDStart c <> concatMap toXIDContinue cs
149+ sanitize [] = sDelim : " empty" -- NOTE(flupe): should be unreachable
150+ sanitize s@ (x: xs)
151+ -- NOTE(flupe): Rust keywords are converted into raw identifiers
152+ | s `elem` rustKeywords = " r#" ++ s
153+ -- NOTE(flupe): or prefixed with the
154+ | s `elem` forbiddenRaw = sDelim : s
155+ | otherwise =
156+ let hd = escapeChar isXIDStart x
157+ tl = concatMap (escapeChar isXIDContinue) xs
158+ in hd <> tl
175159 where
176- toXIDStart, toXIDContinue :: Char -> String
177- toXIDStart c
178- | isXIDStart c || c == '_' = [c]
179- | isXIDContinue c = "_" <> [c]
180- | otherwise = toXIDContinue c
181-
182- toXIDContinue c
183- | isXIDContinue c = [c]
184- | otherwise = "Ֆ" <> show (ord c) <> "Ֆ"
185- -}
160+ escapeChar :: (Char -> Bool ) -> Char -> String
161+ escapeChar xid x = if xid x then [x] else sDelim : esc x
162+ where
163+ esc ' _' = " _" -- Preserve underscore (it's gonna get prefixed)
164+ esc ' +' = " plus"
165+ esc ' -' = " sub"
166+ esc ' *' = " mult"
167+ esc ' /' = " div"
168+ esc ' =' = " eq"
169+ esc ' <' = " lt"
170+ esc ' >' = " gt"
171+ esc 'λ' = " lam"
172+ esc ' →' = " to"
173+ esc ' ∷' = " cons"
174+ esc ' ?' = " qen"
175+ esc ' !' = " bng"
176+ esc ' #' = " hsh" -- This ensures names starting with # are escaped
177+ -- and enforces injectivity of the sanitization
178+ esc ' .' = " dot"
179+ esc c | c == sDelim = [sDelim]
180+ | otherwise = showHex (ord c) [sDelim]
181+
182+ -- standard rust keywords
183+ rustKeywords =
184+ [ " as" , " break" , " const" , " continue" , " else" , " enum" , " extern"
185+ , " false" , " fn" , " for" , " if" , " impl" , " in" , " let" , " loop" , " match"
186+ , " mod" , " move" , " mut" , " pub" , " ref" , " return" , " static" , " struct"
187+ , " trait" , " true" , " type" , " unsafe" , " use" , " where" , " while"
188+ , " async" , " await" , " dyn"
189+ ]
190+
191+ -- rust keywords that are also forbidden inside raw strings
192+ forbiddenRaw = [" self" , " Self" , " super" , " crate" ]
0 commit comments