|
| 1 | +{-# LANGUAGE DeriveAnyClass #-} |
| 2 | +{-# LANGUAGE DeriveDataTypeable #-} |
| 3 | +{-# LANGUAGE DeriveGeneric #-} |
| 4 | +{-# LANGUAGE DerivingStrategies #-} |
| 5 | +{-# LANGUAGE StandaloneDeriving #-} |
| 6 | +{-| The 'Char' type has 128 nullary constructors, listed in |
| 7 | + order according to each character's 7-bit numeric code. -} |
| 8 | +module ASCII.Char |
| 9 | + ( |
| 10 | + {- * The @Char@ type -} Char (..), |
| 11 | + {- * @Int@ -} toInt, fromIntMaybe, fromIntUnsafe, |
| 12 | + {- * @Word8@ -} toWord8, fromWord8Maybe, fromWord8Unsafe, |
| 13 | + {- * Enumeration -} allCharacters, |
| 14 | + {- * Notes -} {- $notes -} |
| 15 | + ) |
| 16 | + where |
| 17 | + |
| 18 | +import Data.Bool (otherwise) |
| 19 | +import Data.Data (Data) |
| 20 | +import Data.Eq (Eq) |
| 21 | +import Data.Hashable (Hashable) |
| 22 | +import Data.Int (Int) |
| 23 | +import Data.Maybe (Maybe (..)) |
| 24 | +import Data.Ord (Ord, (<), (>)) |
| 25 | +import Data.Word (Word8) |
| 26 | +import GHC.Generics (Generic) |
| 27 | +import Prelude (Bounded, Enum, enumFromTo, fromEnum, fromIntegral, maxBound, minBound, toEnum) |
| 28 | +import Text.Show (Show) |
| 29 | + |
| 30 | +-- | A character in the ASCII character set |
| 31 | + |
| 32 | +data Char = |
| 33 | + Null | StartOfHeading | StartOfText | EndOfText | EndOfTransmission | Enquiry |
| 34 | + | Acknowledgement | Bell | Backspace | HorizontalTab | LineFeed | VerticalTab |
| 35 | + | FormFeed | CarriageReturn | ShiftOut | ShiftIn | DataLinkEscape |
| 36 | + |
| 37 | + | DeviceControl1 | DeviceControl2 | DeviceControl3 | DeviceControl4 |
| 38 | + |
| 39 | + | NegativeAcknowledgement | SynchronousIdle | EndOfTransmissionBlock |
| 40 | + | Cancel | EndOfMedium | Substitute | Escape |
| 41 | + |
| 42 | + | FileSeparator | GroupSeparator | RecordSeparator | UnitSeparator |
| 43 | + |
| 44 | + | Space |
| 45 | + | ExclamationMark -- ^ ! |
| 46 | + | QuotationMark -- ^ " |
| 47 | + | NumberSign -- ^ # |
| 48 | + | DollarSign -- ^ $ |
| 49 | + | PercentSign -- ^ % |
| 50 | + | Ampersand -- ^ & |
| 51 | + | Apostrophe -- ^ # |
| 52 | + | LeftParenthesis -- ^ ( |
| 53 | + | RightParenthesis -- ^ ) |
| 54 | + | Asterisk -- ^ \* |
| 55 | + | PlusSign -- ^ + |
| 56 | + | Comma -- ^ , |
| 57 | + | HyphenMinus -- ^ \- |
| 58 | + | FullStop -- ^ . |
| 59 | + | Slash -- ^ / |
| 60 | + |
| 61 | + | Digit0 | Digit1 | Digit2 | Digit3 | Digit4 | Digit5 | Digit6 | Digit7 | Digit8 | Digit9 |
| 62 | + |
| 63 | + | Colon -- ^ : |
| 64 | + | Semicolon -- ^ ; |
| 65 | + | LessThanSign -- ^ \< |
| 66 | + | EqualsSign -- ^ = |
| 67 | + | GreaterThanSign -- ^ \> |
| 68 | + | QuestionMark -- ^ ? |
| 69 | + | AtSign -- ^ \@ |
| 70 | + |
| 71 | + | CapitalLetterA | CapitalLetterB | CapitalLetterC | CapitalLetterD | CapitalLetterE |
| 72 | + | CapitalLetterF | CapitalLetterG | CapitalLetterH | CapitalLetterI | CapitalLetterJ |
| 73 | + | CapitalLetterK | CapitalLetterL | CapitalLetterM | CapitalLetterN | CapitalLetterO |
| 74 | + | CapitalLetterP | CapitalLetterQ | CapitalLetterR | CapitalLetterS | CapitalLetterT |
| 75 | + | CapitalLetterU | CapitalLetterV | CapitalLetterW | CapitalLetterX | CapitalLetterY |
| 76 | + | CapitalLetterZ |
| 77 | + |
| 78 | + | LeftSquareBracket -- ^ \[ |
| 79 | + | Backslash -- ^ \\ |
| 80 | + | RightSquareBracket -- ^ \] |
| 81 | + | Caret -- ^ \^ |
| 82 | + | Underscore -- ^ _ |
| 83 | + | GraveAccent -- ^ \` |
| 84 | + |
| 85 | + | SmallLetterA | SmallLetterB | SmallLetterC | SmallLetterD | SmallLetterE |
| 86 | + | SmallLetterF | SmallLetterG | SmallLetterH | SmallLetterI | SmallLetterJ |
| 87 | + | SmallLetterK | SmallLetterL | SmallLetterM | SmallLetterN | SmallLetterO |
| 88 | + | SmallLetterP | SmallLetterQ | SmallLetterR | SmallLetterS | SmallLetterT |
| 89 | + | SmallLetterU | SmallLetterV | SmallLetterW | SmallLetterX | SmallLetterY |
| 90 | + | SmallLetterZ |
| 91 | + |
| 92 | + | LeftCurlyBracket -- ^ \{ |
| 93 | + | VerticalLine -- ^ \| |
| 94 | + | RightCurlyBracket -- ^ \} |
| 95 | + | Tilde -- ^ \~ |
| 96 | + | Delete |
| 97 | + |
| 98 | +{-| ASCII characters can be compared for equality using '(==)'. |
| 99 | +Comparisons are case-sensitive; @'SmallLetterA' '/=' 'CapitalLetterA'@. -} |
| 100 | +deriving stock instance Eq Char |
| 101 | + |
| 102 | +{-| ASCII characters are ordered; for example, the letter /A/ is "less than" |
| 103 | +('<') the letter /B/ because it appears earlier in the list. The ordering of |
| 104 | +ASCII characters is the same as the ordering of the corresponding Unicode |
| 105 | +'Data.Char.Char's. -} |
| 106 | + |
| 107 | +deriving stock instance Ord Char |
| 108 | + |
| 109 | +{-| The 'Enum' instance allows us to use range syntax, for example |
| 110 | +@['SmallLetterA' .. 'SmallLetterZ']@ is a list all lower-case letters from /a/ |
| 111 | +to /z/. Instead of 'toEnum' and 'fromEnum', consider using 'toInt' and |
| 112 | +'fromIntMaybe'. -} |
| 113 | +deriving stock instance Enum Char |
| 114 | + |
| 115 | +{-| The least character is 'Null', and the greatest character is 'Delete'. You |
| 116 | +can write @(['minBound' .. 'maxBound'] :: [ASCII.'Char'])@ to get a list of all |
| 117 | +the ASCII characters. -} |
| 118 | +deriving stock instance Bounded Char |
| 119 | + |
| 120 | +{-| 'show' produces the name of a constructor. For example, the character @e@ is |
| 121 | +shown as “@SmallLetterE@”. See "ASCII.Char" for the complete list of constructor |
| 122 | +names. -} |
| 123 | +deriving stock instance Show Char |
| 124 | + |
| 125 | +{-| The 'Data' instance allows ASCII characters to be used with generic |
| 126 | +programming in the “SYB” style. (See the |
| 127 | +<https://hackage.haskell.org/package/syb syb> package and the 2003 paper |
| 128 | +<https://www.microsoft.com/en-us/research/wp-content/uploads/2003/01/hmap.pdf Scrap Your Boilerplate> |
| 129 | +by Ralf Lämmel and Simon Peyton Jones.) -} |
| 130 | +deriving stock instance Data Char |
| 131 | + |
| 132 | +{-| The 'Generic' instance allows ASCII characters to be used with generic |
| 133 | +programming in the “generic deriving” style. (See the |
| 134 | +<https://hackage.haskell.org/package/generic-data generic-data> package and the 2010 paper |
| 135 | +<http://dreixel.net/research/pdf/gdmh.pdf A generic deriving mechanism for Haskell> |
| 136 | +by José Pedro Magalhães, Atze Dijkstra, Johan Jeuring, and Andres Löh.) -} |
| 137 | +deriving stock instance Generic Char |
| 138 | + |
| 139 | +{-| The 'Hashable' instance lets us collect ASCII characters in hash-based sets, |
| 140 | +and it lets us use ASCII characters as keys in hash-based maps. (See the |
| 141 | +@unordered-containers@ package.) -} |
| 142 | +deriving anyclass instance Hashable Char |
| 143 | + |
| 144 | +{-| Converts an ASCII character to its corresponding numeric value between 0 and 127 |
| 145 | +
|
| 146 | +@ |
| 147 | +toInt Null == 0 |
| 148 | +toInt CapitalLetterA == 6 |
| 149 | +toInt SmallLetterA == 97 |
| 150 | +toInt Delete == 127 |
| 151 | +@ |
| 152 | +
|
| 153 | +-} |
| 154 | + |
| 155 | +toInt :: Char -> Int |
| 156 | +toInt = Prelude.fromEnum |
| 157 | + |
| 158 | +{-| Converts an ASCII character to its corresponding byte between 0 and 127 |
| 159 | +
|
| 160 | +@ |
| 161 | +toWord8 Null == 0 |
| 162 | +toWord8 CapitalLetterA == 6 |
| 163 | +toWord8 SmallLetterA == 97 |
| 164 | +toWord8 Delete == 127 |
| 165 | +@ |
| 166 | +-} |
| 167 | +toWord8 :: Char -> Word8 |
| 168 | +toWord8 x = Prelude.fromIntegral (Prelude.fromEnum x) |
| 169 | + |
| 170 | +{-| Returns 'Just' the ASCII character corresponding to a numeric value between |
| 171 | + 0 and 127, or 'Nothing' for numbers outside this range |
| 172 | +
|
| 173 | +@ |
| 174 | +fromIntMaybe (-1) == Nothing |
| 175 | +fromIntMaybe 0 == Just Null |
| 176 | +fromIntMaybe 65 == Just CapitalLetterA |
| 177 | +fromIntMaybe 127 == Just Delete |
| 178 | +fromIntMaybe 128 == Nothing |
| 179 | +@ |
| 180 | +
|
| 181 | +-} |
| 182 | + |
| 183 | +fromIntMaybe :: Int -> Maybe Char |
| 184 | +fromIntMaybe x | x < 0 = Nothing |
| 185 | + | x > 127 = Nothing |
| 186 | + | otherwise = Just (fromIntUnsafe x) |
| 187 | + |
| 188 | +{-| Returns 'Just' the ASCII character corresponding to a byte between |
| 189 | + 0 and 127, or 'Nothing' for bytes above this range |
| 190 | +
|
| 191 | +@ |
| 192 | +fromWord8Maybe 0 == Just Null |
| 193 | +fromWord8Maybe 65 == Just CapitalLetterA |
| 194 | +fromWord8Maybe 127 == Just Delete |
| 195 | +fromWord8Maybe 128 == Nothing |
| 196 | +@ |
| 197 | +
|
| 198 | +-} |
| 199 | +fromWord8Maybe :: Word8 -> Maybe Char |
| 200 | +fromWord8Maybe x | x > 127 = Nothing |
| 201 | + | otherwise = Just (fromWord8Unsafe x) |
| 202 | + |
| 203 | +{-| The inverse of 'toInt' |
| 204 | +
|
| 205 | +This is marked as /unsafe/ because it is undefined for numbers below 0 |
| 206 | +or above 127. The safe variant of this function is 'fromIntMaybe'. |
| 207 | +
|
| 208 | +@ |
| 209 | +fromIntUnsafe (-1) == undefined |
| 210 | +fromIntUnsafe 65 == CapitalLetterA |
| 211 | +fromIntUnsafe 66 == CapitalLetterB |
| 212 | +fromIntUnsafe 67 == CapitalLetterC |
| 213 | +fromIntUnsafe 128 == undefined |
| 214 | +@ |
| 215 | +
|
| 216 | +-} |
| 217 | + |
| 218 | +fromIntUnsafe :: Int -> Char |
| 219 | +fromIntUnsafe = Prelude.toEnum |
| 220 | + |
| 221 | +{-| The inverse of 'toWord8' |
| 222 | +
|
| 223 | +This is marked as /unsafe/ because it is undefined bytes above 127. |
| 224 | +The safe variant of this function is 'fromWord8Maybe'. |
| 225 | +
|
| 226 | +@ |
| 227 | +fromWord8Unsafe 65 == CapitalLetterA |
| 228 | +fromWord8Unsafe 66 == CapitalLetterB |
| 229 | +fromWord8Unsafe 67 == CapitalLetterC |
| 230 | +fromWord8Unsafe 128 == undefined |
| 231 | +@ |
| 232 | +
|
| 233 | +-} |
| 234 | +fromWord8Unsafe :: Word8 -> Char |
| 235 | +fromWord8Unsafe x = fromIntUnsafe (Prelude.fromIntegral x) |
| 236 | + |
| 237 | +allCharacters :: [Char] |
| 238 | +allCharacters = Prelude.enumFromTo Prelude.minBound Prelude.maxBound |
| 239 | + |
| 240 | +{- $notes |
| 241 | +
|
| 242 | +There are 128 characters in total. |
| 243 | +
|
| 244 | +@ |
| 245 | +length allCharacters == 128 |
| 246 | +@ |
| 247 | +
|
| 248 | +Null is the first character. |
| 249 | +
|
| 250 | +@ |
| 251 | +minBound == Null |
| 252 | +@ |
| 253 | +
|
| 254 | +Delete is the last character. |
| 255 | +
|
| 256 | +@ |
| 257 | +maxBound == Delete |
| 258 | +@ |
| 259 | +
|
| 260 | +-} |
0 commit comments