Skip to content

Commit 0ae86e9

Browse files
committed
proper name sanitization
1 parent a1b4088 commit 0ae86e9

File tree

1 file changed

+51
-44
lines changed

1 file changed

+51
-44
lines changed

src/Agda2Lambox/Compile/Utils.hs

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ module Agda2Lambox.Compile.Utils
1616

1717
import Control.Monad.State
1818
import Control.Monad.IO.Class ( liftIO )
19+
import Numeric ( showHex )
1920
import Data.Char
2021
import Data.List ( elemIndex, foldl' )
2122
import Data.Maybe ( fromMaybe, listToMaybe, isJust )
2223

23-
import Unicode.Char.Identifiers
24+
import Unicode.Char.Identifiers ( isXIDStart, isXIDContinue )
2425
import Agda.Compiler.Backend
2526
import Agda.Syntax.Internal
2627
import Agda.Syntax.Abstract.Name
@@ -138,48 +139,54 @@ instance MayBeLogical a => MayBeLogical (Arg a) where
138139
isLogical arg | not (usableModality arg) = pure True
139140
isLogical arg = isLogical $ unArg arg
140141

141-
-- | Sanitize an agda name to something without unicode.
142-
-- Must be injective.
143-
-- We may require a smarter transformation later on for other targets.
144-
sanitize :: String -> String
145-
sanitize xs | xs `elem` ["true", "false"] = "r#" ++ xs
146-
sanitize [] = []
147-
sanitize (x:xs) = toXIDStart x <> concatMap toXIDContinue xs
148-
where
149-
toXIDStart, toXIDContinue :: Char -> String
150-
toXIDStart c
151-
| isXIDStart c || c == '_' = [c]
152-
| isXIDContinue c = "_" <> [c]
153-
| otherwise = toXIDContinue c
154-
155-
toXIDContinue c
156-
| isXIDContinue c = [c]
157-
| otherwise = "y" <> show (ord c) <> "y"
158-
{-
159-
sanitize q = "agda" ++ concatMap encode q
160-
where
161-
encode '$' = "$$"
162-
encode c
163-
| isAscii c -- more agressive sanitization
164-
, isLower c
165-
|| isUpper c
166-
|| c == '_'
167-
|| generalCategory c == DecimalNumber = [c]
168-
| otherwise = "$" ++ show (fromEnum c) ++ "$"
169-
170-
-- | Sanitize an agda name to something Rust-compliant.
171-
-- Must be injective.
142+
-- | Delimitor used for name sanitization
143+
sDelim :: Char
144+
sDelim = 'Ֆ'
145+
146+
-- | Sanitize an agda name to a Rust-compatible identifier.
147+
-- Injective by design.
172148
sanitize :: String -> String
173-
sanitize [] = []
174-
sanitize (c:cs) = toXIDStart c <> concatMap toXIDContinue cs
149+
sanitize [] = sDelim : "empty" -- NOTE(flupe): should be unreachable
150+
sanitize s@(x:xs)
151+
-- NOTE(flupe): Rust keywords are converted into raw identifiers
152+
| s `elem` rustKeywords = "r#" ++ s
153+
-- NOTE(flupe): or prefixed with the
154+
| s `elem` forbiddenRaw = sDelim : s
155+
| otherwise =
156+
let hd = escapeChar isXIDStart x
157+
tl = concatMap (escapeChar isXIDContinue) xs
158+
in hd <> tl
175159
where
176-
toXIDStart, toXIDContinue :: Char -> String
177-
toXIDStart c
178-
| isXIDStart c || c == '_' = [c]
179-
| isXIDContinue c = "_" <> [c]
180-
| otherwise = toXIDContinue c
181-
182-
toXIDContinue c
183-
| isXIDContinue c = [c]
184-
| otherwise = "Ֆ" <> show (ord c) <> "Ֆ"
185-
-}
160+
escapeChar :: (Char -> Bool) -> Char -> String
161+
escapeChar xid x = if xid x then [x] else sDelim : esc x
162+
where
163+
esc '_' = "_" -- Preserve underscore (it's gonna get prefixed)
164+
esc '+' = "plus"
165+
esc '-' = "sub"
166+
esc '*' = "mult"
167+
esc '/' = "div"
168+
esc '=' = "eq"
169+
esc '<' = "lt"
170+
esc '>' = "gt"
171+
esc 'λ' = "lam"
172+
esc '' = "to"
173+
esc '' = "cons"
174+
esc '?' = "qen"
175+
esc '!' = "bng"
176+
esc '#' = "hsh" -- This ensures names starting with # are escaped
177+
-- and enforces injectivity of the sanitization
178+
esc '.' = "dot"
179+
esc c | c == sDelim = [sDelim]
180+
| otherwise = showHex (ord c) [sDelim]
181+
182+
-- standard rust keywords
183+
rustKeywords =
184+
[ "as", "break", "const", "continue", "else", "enum", "extern"
185+
, "false", "fn", "for", "if", "impl", "in", "let", "loop", "match"
186+
, "mod", "move", "mut", "pub", "ref", "return", "static", "struct"
187+
, "trait", "true", "type", "unsafe", "use", "where", "while"
188+
, "async", "await", "dyn"
189+
]
190+
191+
-- rust keywords that are also forbidden inside raw strings
192+
forbiddenRaw = ["self", "Self", "super", "crate"]

0 commit comments

Comments
 (0)