Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandoc.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ test-suite test-pandoc
Tests.Readers.FB2
Tests.Readers.Pod
Tests.Readers.DokuWiki
Tests.Readers.MediaWiki
Tests.Writers.Native
Tests.Writers.ConTeXt
Tests.Writers.DocBook
Expand Down
28 changes: 21 additions & 7 deletions src/Text/Pandoc/Readers/MediaWiki.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow, formatCode)
import Text.Pandoc.Char (isCJK)
import Text.Pandoc.XML (fromEntities)
import Data.Functor (($>))

-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: (PandocMonad m, ToSources a)
Expand Down Expand Up @@ -531,8 +532,8 @@ inline = whitespace
<|> url
<|> str
<|> doubleQuotes
<|> strong
<|> emph
<|> strong
<|> image
<|> internalLink
<|> externalLink
Expand Down Expand Up @@ -699,14 +700,27 @@ inlinesBetween start end =
trimInlines . mconcat <$> try (start >> many1Till inline end)

emph :: PandocMonad m => MWParser m Inlines
emph = B.emph <$> inlinesBetween start end
where start = sym "''"
end = try $ notFollowedBy' (() <$ strong) >> sym "''"
emph = B.emph . trimInlines . mconcat <$> try (start >> rest)
-- emph = B.emph <$> inlinesBetween start end
where start = sym "''" >> (lookAhead (void strong) <|> notFollowedBy (many1 (char '\'')))
rest = do
(ins, quots) <- manyUntil inline end
pure $ ins ++ [quots]
end = try $ notBold >> end''
notBold = notFollowedBy' (void strong)
-- end' = sym "''" >> notFollowedBy (char '\'' >> noneOf "'")
end'' = try (sym "''''" >> notFollowedBy (char '\'') $> B.str "''")
<|> try (sym "'''" >> notFollowedBy (char '\'') $> B.str "'")
<|> (sym "''" $> mempty)

strong :: PandocMonad m => MWParser m Inlines
strong = B.strong <$> inlinesBetween start end
where start = sym "'''"
end = sym "'''"
strong = B.strong . trimInlines . mconcat <$> try (start >> rest)
where start = sym "'''" >> (lookAhead (void emph) <|> notFollowedBy (many1 (char '\'')))
rest = do
(ins, quots) <- manyUntil inline end
pure $ ins ++ [quots]
end = try (sym "''''" >> (lookAhead (sym "''") <|> notFollowedBy (char '\'')) $> B.str "'")
<|> (sym "'''" $> mempty)

doubleQuotes :: PandocMonad m => MWParser m Inlines
doubleQuotes = do
Expand Down
166 changes: 166 additions & 0 deletions test/Tests/Readers/MediaWiki.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Tests.Readers.MediaWiki
Copyright : © 2025 Evan Silberman
License : GNU GPL, version 2 or above

Maintainer :
Stability : alpha
Portability : portable

Tests for the MediaWiki reader.
-}

module Tests.Readers.MediaWiki (tests) where

import Data.Text (Text)
import qualified Data.Text as T
import Test.Tasty
import Test.Tasty.HUnit (HasCallStack)
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder

mw :: Text -> Pandoc
mw = purely $ readMediaWiki def

wikilink :: Text -> Inlines
wikilink dest = linkWith ("", ["wikilink"], []) (T.replace " " "_" dest) dest (text dest)

infix 4 =:
(=:) :: (ToString c, HasCallStack)
=> String -> (Text, c) -> TestTree
(=:) = test mw

tests :: [TestTree]
tests = [
-- The "quotes" tests are adapted from tests for parsoid, MediaWiki's current
-- wikitext parser. Cf. https://gerrit.wikimedia.org/r/plugins/gitiles/mediawiki/services/parsoid/+/refs/heads/master/tests/parser/quotes.txt
testGroup "quotes"
[ testGroup "intraword emphasis"
[ "italic" =:
"plain''italic''plain" =?>
para ("plain" <> emph "italic" <> "plain")
, "two italics" =:
"plain''italic''plain''italic''plain" =?>
para ("plain" <> emph "italic" <> "plain" <> emph "italic" <> "plain")
, "bold" =:
"plain'''bold'''plain" =?>
para ("plain" <> strong "bold" <> "plain")
, "two bolds" =:
"plain'''bold'''plain'''bold'''plain" =?>
para ("plain" <> strong "bold" <> "plain" <> strong "bold" <> "plain")
, "bold and italic" =:
"plain'''bold'''plain''italic''plain" =?>
para ("plain" <> strong "bold" <> "plain" <> emph "italic" <> "plain")
, "italic and bold" =:
"plain''italic''plain'''bold'''plain" =?>
para ("plain" <> emph "italic" <> "plain" <> strong "bold" <> "plain")
, "italic with bold-italic" =:
"plain''italic'''bold-italic'''italic''plain" =?>
para ("plain" <> emph ("italic" <> strong "bold-italic" <> "italic") <> "plain")
, "bold with bold-italic" =:
"plain'''bold''bold-italic''bold'''plain" =?>
para ("plain" <> strong ("bold" <> emph "bold-italic" <> "bold") <> "plain")
, "bold-italic then italic" =:
"plain'''''bold-italic'''italic''plain" =?>
para ("plain" <> emph (strong "bold-italic" <> "italic") <> "plain")
, "bold-italic then bold" =:
"plain'''''bold-italic''bold'''plain" =?>
para ("plain" <> strong (emph "bold-italic" <> "bold") <> "plain")
, "italic then bold-italic" =:
"plain''italic'''bold-italic'''''plain" =?>
para ("plain" <> emph ("italic" <> strong "bold-italic") <> "plain")
, "bold then bold-italic" =:
"plain'''bold''bold-italic'''''plain" =?>
para ("plain" <> strong ("bold" <> emph "bold-italic") <> "plain")
]
, testGroup "possessives and italics"
[ "simple" =:
"In ''Flaming Pie'''s liner notes" =?>
para ("In " <> emph "Flaming Pie'" <> "s liner notes")
, "linked" =:
"obtained by ''[[Lunar Prospector]]'''s gamma-ray spectrometer" =?>
para ("obtained by " <> emph ((wikilink "Lunar Prospector") <> "'") <> "s gamma-ray spectrometer")
, "with following italics" =:
"''Sebastián Covarrubias''' ''Tesoro''" =?>
para (emph "Sebastián Covarrubias'" <> " " <> emph "Tesoro")
, "with internal link" =:
"the ''Vocabolario dell'[[Accademia della Crusca]]'', for Italian" =?>
para ("the " <> emph ("Vocabolario dell'" <> wikilink "Accademia della Crusca") <>
", for Italian")
, "multiple" =:
"'''This year''''s election ''should'' beat '''last year''''s." =?>
para (strong "This year'" <> "s election " <> emph "should" <> " beat " <> strong "last year'" <> "s.")
]
, testGroup "two-quote openings"
[ "2 open 3 close" =:
"''foo'''" =?>
para (emph "foo'")
, "2 open 4 close" =:
"''foo''''" =?>
para (emph "foo''")
-- TODO line ends terminate emphases
-- , "2 open 5 close" =:
-- "''foo'''''" =?>
-- para (emph "foo" <> strong "")
]
, testGroup "three-quote openings"
[ "3 open 2 close" =:
"'''foo''" =?>
para ("'" <> emph "foo")
, "3 open 3 close" =:
"'''foo'''" =?>
para (strong "foo")
, "3 open 4 close" =:
"'''foo''''" =?>
para (strong "foo'")
-- TODO line ends terminate emphases
-- , "3 open 5 close" =:
-- "'''foo'''''" =?>
-- para (strong "foo" <> emph "" )
]
, testGroup "four-quote openings"
[ "4 open 2 close" =:
"''''foo''" =?>
para ("''" <> emph "foo")
, "4 open 3 close" =:
"''''foo'''" =?>
para ("'" <> strong "foo")
, "4 open 4 close" =:
"''''foo''''" =?>
para ("'" <> strong "foo'")
-- TODO line ends terminate emphases
-- , "4 open 5 close" =:
-- "''''foo'''''" =?>
-- para ("'" <> strong "foo" <> emph "")
]
, testGroup "five-quote openings"
[ -- TODO line ends terminate emphases
-- "5 open 2 close" =:
-- "'''''foo''" =?>
-- para (strong (emph "foo"))
-- , "5 open 3 close" =:
-- "'''''foo'''" =?>
-- para (emph (strong "foo"))
-- , "5 open 4 close" =:
-- "'''''foo''''" =?>
-- para (emph (strong "foo'"))
"5 open 5 close" =:
"'''''foo'''''" =?>
para (emph (strong "foo"))
, "5 open 6 close" =:
"'''''foo''''''" =?>
para (emph (strong "foo'"))
]
, testGroup "multiple quote sequences"
[ "2, 4, 2" =:
"''foo''''bar''" =?>
para (emph ("foo'" <> strong "bar"))
, "2, 4, 2, more" =:
"''foo''''bar'' something else" =?>
para (emph ("foo'" <> strong "bar"))
]
]
]
6 changes: 3 additions & 3 deletions test/mediawiki-reader.native
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ Pandoc
, Para
[ Emph [ Str "emph" ] , Space , Strong [ Str "strong" ] ]
, Para
[ Strong
[ Emph
[ Emph
[ Strong
[ Str "strong" , Space , Str "and" , Space , Str "emph" ]
]
]
Expand Down Expand Up @@ -766,7 +766,7 @@ Pandoc
, LineBreak
, Emph [ Code ( "" , [] , [] ) "markups" ]
, Code ( "" , [] , [] ) "\160"
, Strong [ Emph [ Code ( "" , [] , [] ) "can" ] ]
, Emph [ Strong [ Code ( "" , [] , [] ) "can" ] ]
, Code ( "" , [] , [] ) "\160be\160done."
]
, Para
Expand Down
2 changes: 2 additions & 0 deletions test/test-pandoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import qualified Tests.Readers.RTF
import qualified Tests.Readers.Txt2Tags
import qualified Tests.Readers.Man
import qualified Tests.Readers.Mdoc
import qualified Tests.Readers.MediaWiki
import qualified Tests.Readers.Pod
import qualified Tests.Shared
import qualified Tests.Writers.AsciiDoc
Expand Down Expand Up @@ -103,6 +104,7 @@ tests pandocPath = testGroup "pandoc tests"
, testGroup "FB2" Tests.Readers.FB2.tests
, testGroup "DokuWiki" Tests.Readers.DokuWiki.tests
, testGroup "Pod" Tests.Readers.Pod.tests
, testGroup "MediaWiki" Tests.Readers.MediaWiki.tests
]
]

Expand Down
Loading