@@ -249,49 +249,51 @@ data TitlePunct
249
249
deriving (Eq , Show )
250
250
251
251
252
- -- | A set of stops—'period', 'comma', and 'interpunct'—to be used by
253
- -- 'normalizeStops' function.
252
+ -- | A set of stops—'period', 'comma', 'interpunct', 'questionMark', and
253
+ -- 'exclamationMark'—to be used by ' normalizeStops' function.
254
254
--
255
255
-- There are three presets: 'horizontalStops', 'verticalStops', and
256
256
-- 'horizontalStopsWithSlashes'.
257
257
data Stops = Stops
258
258
{ period :: Text
259
259
, comma :: Text
260
260
, interpunct :: Text
261
+ , questionMark :: Text
262
+ , exclamationMark :: Text
261
263
} deriving (Eq , Show )
262
264
263
265
-- | Stop sentences in the modern Korean style which follows Western stops.
264
266
-- E.g.:
265
267
--
266
- -- > 봄·여름·가을·겨울. 어제, 오늘.
268
+ -- > 봄·여름·가을·겨울. 어제, 오늘. 새벽? 아침!
267
269
horizontalStops :: Stops
268
270
horizontalStops = Stops
269
271
{ period = " . "
270
272
, comma = " , "
271
273
, interpunct = " ·"
274
+ , questionMark = " ? "
275
+ , exclamationMark = " ! "
272
276
}
273
277
274
278
-- | Stop sentences in the pre-modern Korean style which follows Chinese stops.
275
279
-- E.g.:
276
280
--
277
- -- > 봄·여름·가을·겨울。어제、오늘。
281
+ -- > 봄·여름·가을·겨울。어제、오늘。새벽?아침!
278
282
verticalStops :: Stops
279
283
verticalStops = Stops
280
284
{ period = " 。"
281
285
, comma = " 、"
282
286
, interpunct = " ·"
287
+ , questionMark = " ?"
288
+ , exclamationMark = " !"
283
289
}
284
290
285
291
-- | Similar to 'horizontalStops' except slashes are used instead of
286
292
-- interpuncts. E.g.:
287
293
--
288
- -- > 봄/여름/가을/겨울. 어제, 오늘.
294
+ -- > 봄/여름/가을/겨울. 어제, 오늘. 새벽? 아침!
289
295
horizontalStopsWithSlashes :: Stops
290
- horizontalStopsWithSlashes = Stops
291
- { period = " . "
292
- , comma = " , "
293
- , interpunct = " /"
294
- }
296
+ horizontalStopsWithSlashes = horizontalStops { interpunct = " /" }
295
297
296
298
297
299
-- | Normalizes sentence stops (periods, commas, and interpuncts).
@@ -332,6 +334,12 @@ normalizeStops stops input = (`fmap` annotatedEntities) $ \ case
332
334
, do { ending <- interpunct'
333
335
; return (toEntity $ adjustEnding ending $ interpunct stops)
334
336
}
337
+ , do { ending <- questionMark'
338
+ ; return (toEntity $ adjustEnding ending $ questionMark stops)
339
+ }
340
+ , do { ending <- exclamationMark'
341
+ ; return (toEntity $ adjustEnding ending $ exclamationMark stops)
342
+ }
335
343
]
336
344
adjustEnding :: Ending -> Text -> Text
337
345
adjustEnding ending text
@@ -377,6 +385,26 @@ normalizeStops stops input = (`fmap` annotatedEntities) $ \ case
377
385
, string " ·"
378
386
, asciiCI " ·"
379
387
] >> return Ending
388
+ questionMark' :: Parser Ending
389
+ questionMark' = choice
390
+ [ char ' ?' >> boundary
391
+ , char ' ?' >> trailingSpaces
392
+ , string " ?" >> boundary
393
+ , string " ?" >> boundary
394
+ , asciiCI " ?" >> boundary
395
+ , string " ?" >> trailingSpaces
396
+ , asciiCI " ?" >> trailingSpaces
397
+ ]
398
+ exclamationMark' :: Parser Ending
399
+ exclamationMark' = choice
400
+ [ char ' !' >> boundary
401
+ , char ' !' >> trailingSpaces
402
+ , string " !" >> boundary
403
+ , string " !" >> boundary
404
+ , asciiCI " !" >> boundary
405
+ , string " !" >> trailingSpaces
406
+ , asciiCI " !" >> trailingSpaces
407
+ ]
380
408
closingChars :: String
381
409
closingChars =
382
410
[ ' "' , ' ”' , ' \' ' , ' ’' , ' )' , ' ]' , ' }' , ' 」' , ' 』' , ' 〉' , ' 》' , ' )' , ' 〕'
@@ -419,7 +447,7 @@ normalizeStops stops input = (`fmap` annotatedEntities) $ \ case
419
447
]
420
448
421
449
422
- data Ending = TrailingChars Text | TrailingSpaces Text | Ending
450
+ data Ending = TrailingChars Text | TrailingSpaces Text | Ending deriving ( Show )
423
451
424
452
425
453
-- | Substitution options for 'transformArrow' function. These options can
0 commit comments