From 50aec25349c98d10d2cd9d7a7ed44ae90ce937f9 Mon Sep 17 00:00:00 2001 From: Jan Mas Rovira Date: Mon, 10 Mar 2025 09:57:25 +0100 Subject: [PATCH 1/5] ast --- .../Compiler/Backend/Markdown/Data/Types.hs | 22 +-- .../Concrete/Translation/FromSource.hs | 6 +- src/Markdown/FromSource.hs | 13 ++ src/Markdown/Language.hs | 150 ++++++++++++++++++ 4 files changed, 177 insertions(+), 14 deletions(-) create mode 100644 src/Markdown/FromSource.hs create mode 100644 src/Markdown/Language.hs diff --git a/src/Juvix/Compiler/Backend/Markdown/Data/Types.hs b/src/Juvix/Compiler/Backend/Markdown/Data/Types.hs index 0493e2c49f..1c82bf908d 100644 --- a/src/Juvix/Compiler/Backend/Markdown/Data/Types.hs +++ b/src/Juvix/Compiler/Backend/Markdown/Data/Types.hs @@ -4,7 +4,7 @@ module Juvix.Compiler.Backend.Markdown.Data.Types ) where -import Commonmark qualified as MK +import Commonmark qualified as CM import Data.Text qualified as T import Juvix.Compiler.Backend.Markdown.Data.MkJuvixBlockOptions import Juvix.Data.Loc @@ -90,13 +90,13 @@ instance Monoid Mk where nl :: Text nl = "\n" -instance MK.ToPlainText TextBlock where +instance CM.ToPlainText TextBlock where toPlainText r = r ^. textBlock -instance MK.ToPlainText JuvixCodeBlock where +instance CM.ToPlainText JuvixCodeBlock where toPlainText = show -instance MK.ToPlainText Mk where +instance CM.ToPlainText Mk where toPlainText = trimText . mconcat @@ -112,7 +112,7 @@ builder = \case flatten :: [Mk] -> Mk flatten = foldl' (<>) MkNull -instance MK.Rangeable Mk where +instance CM.Rangeable Mk where ranged _ x = x toTextBlock :: Text -> TextBlock @@ -145,16 +145,16 @@ paren = wrap' "(" ")" brack :: TextBlock -> TextBlock brack = wrap' "[" "]" -instance MK.HasAttributes TextBlock where +instance CM.HasAttributes TextBlock where addAttributes _ = id -instance MK.Rangeable TextBlock where +instance CM.Rangeable TextBlock where ranged _ r = r -instance MK.HasAttributes Mk where +instance CM.HasAttributes Mk where addAttributes _ = id -instance MK.IsInline TextBlock where +instance CM.IsInline TextBlock where lineBreak = toTextBlock nl softBreak = toTextBlock " " str = toTextBlock @@ -168,7 +168,7 @@ instance MK.IsInline TextBlock where toTextBlock "!" <> brack desc <> paren (toTextBlock src) code = wrap "`" . toTextBlock rawInline f t - | f == MK.Format "html" = + | f == CM.Format "html" = toTextBlock t | otherwise = mempty @@ -189,7 +189,7 @@ processCodeBlock info t loc = let b = "```" <> info <> t <> "```" in MkTextBlock TextBlock {_textBlock = b, _textBlockInterval = loc} -instance MK.IsBlock TextBlock Mk where +instance CM.IsBlock TextBlock Mk where paragraph a = MkTextBlock a plain a = MkTextBlock a thematicBreak = toMK "---" diff --git a/src/Juvix/Compiler/Concrete/Translation/FromSource.hs b/src/Juvix/Compiler/Concrete/Translation/FromSource.hs index 8185af66c6..f49de9a486 100644 --- a/src/Juvix/Compiler/Concrete/Translation/FromSource.hs +++ b/src/Juvix/Compiler/Concrete/Translation/FromSource.hs @@ -161,11 +161,11 @@ runModuleParser fileName input_ res <- P.runParserT juvixCodeBlockParser (toFilePath fileName) input_ case res of Left err -> return . Left . ErrMegaparsec . MegaparsecError $ err - Right r - | MK.nullMk r -> + Right mk + | MK.nullMk mk -> return . Left . ErrMarkdownBackend $ ErrNoJuvixCodeBlocks NoJuvixCodeBlocksError {_noJuvixCodeBlocksErrorFilepath = fileName} - | otherwise -> runMarkdownModuleParser fileName r + | otherwise -> runMarkdownModuleParser fileName mk | otherwise = do m <- evalState (Nothing @ParsedPragmas) diff --git a/src/Markdown/FromSource.hs b/src/Markdown/FromSource.hs new file mode 100644 index 0000000000..5bc428c035 --- /dev/null +++ b/src/Markdown/FromSource.hs @@ -0,0 +1,13 @@ +-- | Import this module qualified +module Markdown.FromSource where + +import Commonmark.Parser +import Juvix.Prelude +import Markdown.Language + +fromFile :: (Members '[Files, Error SimpleError] r) => Path Abs File -> Sem r Block +fromFile inputFile = do + txt <- readFile' inputFile + case commonmark (toFilePath inputFile) txt of + Left err -> todo + Right block -> return block diff --git a/src/Markdown/Language.hs b/src/Markdown/Language.hs new file mode 100644 index 0000000000..f4aaa3e371 --- /dev/null +++ b/src/Markdown/Language.hs @@ -0,0 +1,150 @@ +module Markdown.Language + ( module Markdown.Language, + Attribute, + Format, + ListSpacing, + ListType, + ) +where + +import Commonmark +import Juvix.Prelude + +data Link = Link + { _linkDestination :: Text, + _linkTitle :: Text, + _linkDescription :: Inlines + } + deriving stock (Show) + +data Image = Image + { _imageSource :: Text, + _imageTitle :: Text, + _imageDescription :: Inlines + } + deriving stock (Show) + +data RawInline = RawInline + { _rawInlineFormat :: Format, + _rawInlineText :: Text + } + deriving stock (Show) + +data InlineElem + = InlineLineBreak + | InlineSoftBreak + | InlineString Text + | InlineEntity Text + | InlineEscapedChar Char + | InlineEmph Inlines + | InlineStrong Inlines + | InlineLink Link + | InlineImage Image + | InlineCode Text + | InlineRaw RawInline + deriving stock (Show) + +data CodeBlock = CodeBlock + { _codeBlockLanguage :: Text, + _codeBlock :: Text + } + deriving stock (Show) + +data Heading = Heading + { _headingLabel :: Text, + _headingText :: Inline + } + deriving stock (Show) + +data RawBlock = RawBlock + { _rawBlockFormat :: Format, + _rawBlockText :: Text + } + deriving stock (Show) + +data ReferenceLinkDefinition = ReferenceLinkDefinition + { _referenceLinkDefinitionLabel :: Text, + _referenceLinkDefinitionDestination :: Text, + _referenceLinkDefinitionTitle :: Text + } + deriving stock (Show) + +data List = List + { _listType :: ListType, + _listSpacing :: ListSpacing, + _listBlocks :: [Block] + } + deriving stock (Show) + +data Block + = BlockParagraph Inline + | BlockPlain Inline + | BlockThematicBreak Block + | BlockQuote Block + | BlockCodeBlock CodeBlock + | BlockRawBlock RawBlock + | BlockReferenceLinkDefinition ReferenceLinkDefinition + | BlockList List + deriving stock (Show) + +data Inline = Inline + { _inlineAttributes :: [Attribute], + _inlineElem :: InlineElem + } + deriving stock (Show) + +newtype Inlines = Inlines + { _inlines :: [Inline] + } + deriving stock (Show) + deriving newtype (Semigroup, Monoid) + +makeLenses ''Link +makeLenses ''Inline +makeLenses ''Inlines +makeLenses ''Image +makeLenses ''CodeBlock + +instance HasAttributes Inline where + addAttributes attr = (over inlineAttributes (attr ++)) + +-- TODO +instance Rangeable Inline where + ranged _ = error "todo" + +instance HasAttributes Inlines where + addAttributes attr = over inlines (map (addAttributes attr)) + +-- TODO +instance Rangeable Inlines where + ranged _ = error "todo" + +class IsInlines a where + toInlines :: a -> Inlines + +instance IsInlines Inlines where + toInlines = id + +instance IsInlines InlineElem where + toInlines e = + Inlines + { _inlines = + [ Inline + { _inlineAttributes = [], + _inlineElem = e + } + ] + } + +instance IsInline Inlines where + lineBreak = toInlines InlineLineBreak + softBreak = toInlines InlineSoftBreak + str a = toInlines (InlineString a) + entity a = toInlines (InlineEntity a) + escapedChar a = toInlines (InlineEscapedChar a) + emph a = toInlines (InlineEmph a) + strong a = toInlines (InlineStrong a) + link _linkDestination _linkTitle _linkDescription = toInlines (InlineLink Link {..}) + image _imageSource _imageTitle _imageDescription = toInlines (InlineImage Image {..}) + code a = toInlines (InlineCode a) + rawInline _rawInlineFormat _rawInlineText = toInlines (InlineRaw (RawInline {..})) From 270558d383095a370d7eafe8e766712a619ab215 Mon Sep 17 00:00:00 2001 From: Jan Mas Rovira Date: Tue, 11 Mar 2025 12:18:28 +0100 Subject: [PATCH 2/5] CLI. Some parsing and printing --- app/Commands/Dev.hs | 2 + app/Commands/Dev/Options.hs | 12 +- app/Commands/Dev/PlainMarkdown.hs | 9 ++ app/Commands/Dev/PlainMarkdown/Format.hs | 17 +++ .../Dev/PlainMarkdown/Format/Options.hs | 15 ++ app/Commands/Dev/PlainMarkdown/Options.hs | 24 +++ app/CommonOptions.hs | 2 +- app/TopCommand.hs | 2 +- src/Juvix/Prelude/Base/Foundation.hs | 2 +- src/Juvix/Prelude/Trace.hs | 3 +- src/Markdown/FromSource.hs | 13 +- src/Markdown/Language.hs | 141 +++++++++++++----- src/Markdown/Print.hs | 69 +++++++++ src/Markdown/Print/Options.hs | 6 + 14 files changed, 276 insertions(+), 41 deletions(-) create mode 100644 app/Commands/Dev/PlainMarkdown.hs create mode 100644 app/Commands/Dev/PlainMarkdown/Format.hs create mode 100644 app/Commands/Dev/PlainMarkdown/Format/Options.hs create mode 100644 app/Commands/Dev/PlainMarkdown/Options.hs create mode 100644 src/Markdown/Print.hs create mode 100644 src/Markdown/Print/Options.hs diff --git a/app/Commands/Dev.hs b/app/Commands/Dev.hs index c4726a5097..89c6d1c129 100644 --- a/app/Commands/Dev.hs +++ b/app/Commands/Dev.hs @@ -19,6 +19,7 @@ import Commands.Dev.MigrateJuvixYaml qualified as MigrateJuvixYaml import Commands.Dev.Nockma qualified as Nockma import Commands.Dev.Options import Commands.Dev.Parse qualified as Parse +import Commands.Dev.PlainMarkdown qualified as PlainMarkdown import Commands.Dev.Reg qualified as Reg import Commands.Dev.Runtime qualified as Runtime import Commands.Dev.Scope qualified as Scope @@ -47,3 +48,4 @@ runCommand = \case MigrateJuvixYaml opts -> runFilesIO $ MigrateJuvixYaml.runCommand opts Nockma opts -> Nockma.runCommand opts Anoma opts -> Anoma.runCommand opts + PlainMarkdown opts -> PlainMarkdown.runCommand opts diff --git a/app/Commands/Dev/Options.hs b/app/Commands/Dev/Options.hs index 1744ad8c4e..cd1b227735 100644 --- a/app/Commands/Dev/Options.hs +++ b/app/Commands/Dev/Options.hs @@ -25,6 +25,7 @@ import Commands.Dev.Latex.Options import Commands.Dev.MigrateJuvixYaml.Options import Commands.Dev.Nockma.Options import Commands.Dev.Parse.Options +import Commands.Dev.PlainMarkdown.Options import Commands.Dev.Reg.Options import Commands.Dev.Repl.Options import Commands.Dev.Runtime.Options @@ -45,6 +46,7 @@ data DevCommand | Asm AsmCommand | Reg RegCommand | Tree TreeCommand + | PlainMarkdown PlainMarkdownCommand | Casm CasmCommand | Runtime RuntimeCommand | Parse ParseOptions @@ -78,10 +80,18 @@ parseDevCommand = commandMigrateJuvixYaml, commandLatex, commandAnoma, - commandNockma + commandNockma, + commandPlainMarkdown ] ) +commandPlainMarkdown :: Mod CommandFields DevCommand +commandPlainMarkdown = + command "plain-markdown" $ + info + (PlainMarkdown <$> parsePlainMarkdownCommand) + (progDesc "Subcommands related to Markdown (without Juvix)") + commandLatex :: Mod CommandFields DevCommand commandLatex = command "latex" $ diff --git a/app/Commands/Dev/PlainMarkdown.hs b/app/Commands/Dev/PlainMarkdown.hs new file mode 100644 index 0000000000..d24fa36844 --- /dev/null +++ b/app/Commands/Dev/PlainMarkdown.hs @@ -0,0 +1,9 @@ +module Commands.Dev.PlainMarkdown where + +import Commands.Base +import Commands.Dev.PlainMarkdown.Format qualified as Format +import Commands.Dev.PlainMarkdown.Options + +runCommand :: forall r. (Members AppEffects r) => PlainMarkdownCommand -> Sem r () +runCommand = \case + Format opts -> Format.runCommand opts diff --git a/app/Commands/Dev/PlainMarkdown/Format.hs b/app/Commands/Dev/PlainMarkdown/Format.hs new file mode 100644 index 0000000000..228bed1ae3 --- /dev/null +++ b/app/Commands/Dev/PlainMarkdown/Format.hs @@ -0,0 +1,17 @@ +module Commands.Dev.PlainMarkdown.Format where + +import Commands.Base +import Commands.Dev.PlainMarkdown.Format.Options +import Markdown.FromSource +import Markdown.Print + +runCommand :: + forall r. + (Members AppEffects r) => + FormatOptions -> + Sem r () +runCommand opts = do + afile <- fromAppPathFile (opts ^. formatFile) + mdBlock <- runAppError @SimpleError (fromFile afile) + print mdBlock + renderStdOutLn (ppOut mdBlock) diff --git a/app/Commands/Dev/PlainMarkdown/Format/Options.hs b/app/Commands/Dev/PlainMarkdown/Format/Options.hs new file mode 100644 index 0000000000..e7a4bde48c --- /dev/null +++ b/app/Commands/Dev/PlainMarkdown/Format/Options.hs @@ -0,0 +1,15 @@ +module Commands.Dev.PlainMarkdown.Format.Options where + +import CommonOptions + +newtype FormatOptions = FormatOptions + { _formatFile :: AppPath File + } + deriving stock (Data) + +makeLenses ''FormatOptions + +parseFormatOptions :: Parser FormatOptions +parseFormatOptions = do + _formatFile <- parseInputFile FileExtMarkdown + pure FormatOptions {..} diff --git a/app/Commands/Dev/PlainMarkdown/Options.hs b/app/Commands/Dev/PlainMarkdown/Options.hs new file mode 100644 index 0000000000..85e04c1da3 --- /dev/null +++ b/app/Commands/Dev/PlainMarkdown/Options.hs @@ -0,0 +1,24 @@ +module Commands.Dev.PlainMarkdown.Options where + +import Commands.Dev.PlainMarkdown.Format.Options +import CommonOptions + +data PlainMarkdownCommand + = Format FormatOptions + deriving stock (Data) + +parsePlainMarkdownCommand :: Parser PlainMarkdownCommand +parsePlainMarkdownCommand = + hsubparser $ + mconcat + [ commandFormat + ] + where + commandFormat :: Mod CommandFields PlainMarkdownCommand + commandFormat = command "format" formatInfo + where + formatInfo :: ParserInfo PlainMarkdownCommand + formatInfo = + info + (Format <$> parseFormatOptions) + (progDesc "Format a plain markdown file (no Juvix involved)") diff --git a/app/CommonOptions.hs b/app/CommonOptions.hs index 9869e1ef7d..b665820b06 100644 --- a/app/CommonOptions.hs +++ b/app/CommonOptions.hs @@ -47,7 +47,7 @@ instance Show (AppPath f) where parseInputFilesMod :: NonEmpty FileExt -> Mod ArgumentFields (Prepath File) -> Parser (AppPath File) parseInputFilesMod exts' mods = do - let exts = NonEmpty.toList exts' + let exts = toList exts' mvars = intercalate "|" (map toMetavar exts) dotExts = intercalate ", " (map show exts) helpMsg = "Path to a " <> dotExts <> " file" diff --git a/app/TopCommand.hs b/app/TopCommand.hs index 92e6f740df..aeb41b0b1d 100644 --- a/app/TopCommand.hs +++ b/app/TopCommand.hs @@ -1,6 +1,6 @@ module TopCommand where -import Commands.Base hiding (Format) +import Commands.Base import Commands.Clean qualified as Clean import Commands.Compile qualified as Compile import Commands.Dependencies qualified as Dependencies diff --git a/src/Juvix/Prelude/Base/Foundation.hs b/src/Juvix/Prelude/Base/Foundation.hs index 3faa690645..b9eef0b611 100644 --- a/src/Juvix/Prelude/Base/Foundation.hs +++ b/src/Juvix/Prelude/Base/Foundation.hs @@ -133,7 +133,7 @@ import Data.Bifunctor hiding (first, second) import Data.Bitraversable import Data.Bool import Data.ByteString (ByteString) -import Data.Char +import Data.Char hiding (Format) import Data.Char qualified as Char import Data.Data import Data.Either.Extra diff --git a/src/Juvix/Prelude/Trace.hs b/src/Juvix/Prelude/Trace.hs index 5643a45ed4..472f0c5c71 100644 --- a/src/Juvix/Prelude/Trace.hs +++ b/src/Juvix/Prelude/Trace.hs @@ -27,7 +27,8 @@ traceWith f a = trace (f a) a trace :: Text -> a -> a trace = traceLabel "" -{-# WARNING trace "Using trace" #-} + +-- {-# WARNING trace "Using trace" #-} traceM :: (Applicative f) => Text -> f () traceM t = traceLabel "" t (pure ()) diff --git a/src/Markdown/FromSource.hs b/src/Markdown/FromSource.hs index 5bc428c035..a362e768ab 100644 --- a/src/Markdown/FromSource.hs +++ b/src/Markdown/FromSource.hs @@ -4,10 +4,19 @@ module Markdown.FromSource where import Commonmark.Parser import Juvix.Prelude import Markdown.Language +import Text.Show.Pretty -fromFile :: (Members '[Files, Error SimpleError] r) => Path Abs File -> Sem r Block +fromFile :: (Members '[Files, Error SimpleError] r) => Path Abs File -> Sem r Blocks fromFile inputFile = do txt <- readFile' inputFile case commonmark (toFilePath inputFile) txt of - Left err -> todo + Left _err -> error "parse error" Right block -> return block + +testFile :: Path Abs File -> IO () +testFile f = runM . runFilesIO . runSimpleErrorIO $ do + b <- fromFile f + print (ppShow (b ^. blocks)) + putStrLn "================" + putStrLn "================\n" + print b diff --git a/src/Markdown/Language.hs b/src/Markdown/Language.hs index f4aaa3e371..9906b50f9f 100644 --- a/src/Markdown/Language.hs +++ b/src/Markdown/Language.hs @@ -9,31 +9,39 @@ where import Commonmark import Juvix.Prelude +import Juvix.Prelude.Pretty data Link = Link { _linkDestination :: Text, _linkTitle :: Text, _linkDescription :: Inlines } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data Image = Image { _imageSource :: Text, _imageTitle :: Text, _imageDescription :: Inlines } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data RawInline = RawInline { _rawInlineFormat :: Format, _rawInlineText :: Text } - deriving stock (Show) + deriving stock (Show, Eq, Generic) -data InlineElem +data Meta a = Meta + { _metaAttributes :: [Attribute], + _metaLoc :: Irrelevant SourceRange, + _metaArg :: a + } + deriving stock (Show, Eq, Generic) + +data Inline = InlineLineBreak | InlineSoftBreak - | InlineString Text + | InlineString (Meta Text) | InlineEntity Text | InlineEscapedChar Char | InlineEmph Inlines @@ -42,82 +50,113 @@ data InlineElem | InlineImage Image | InlineCode Text | InlineRaw RawInline - deriving stock (Show) + deriving stock (Show, Eq, Generic) data CodeBlock = CodeBlock { _codeBlockLanguage :: Text, _codeBlock :: Text } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data Heading = Heading - { _headingLabel :: Text, - _headingText :: Inline + { _headingLevel :: Int, + _headingText :: Inlines } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data RawBlock = RawBlock { _rawBlockFormat :: Format, _rawBlockText :: Text } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data ReferenceLinkDefinition = ReferenceLinkDefinition { _referenceLinkDefinitionLabel :: Text, _referenceLinkDefinitionDestination :: Text, _referenceLinkDefinitionTitle :: Text } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data List = List { _listType :: ListType, _listSpacing :: ListSpacing, _listBlocks :: [Block] } - deriving stock (Show) + deriving stock (Show, Eq, Generic) data Block - = BlockParagraph Inline - | BlockPlain Inline + = BlockParagraph (Meta Inlines) + | BlockPlain (Meta Inlines) + | BlockHeading (Meta Heading) | BlockThematicBreak Block | BlockQuote Block - | BlockCodeBlock CodeBlock + | BlockCodeBlock (Meta CodeBlock) | BlockRawBlock RawBlock | BlockReferenceLinkDefinition ReferenceLinkDefinition | BlockList List - deriving stock (Show) + deriving stock (Show, Eq, Generic) -data Inline = Inline - { _inlineAttributes :: [Attribute], - _inlineElem :: InlineElem +newtype Blocks = Blocks + { _blocks :: [Block] } - deriving stock (Show) + deriving stock (Show, Eq, Generic) + deriving newtype (Semigroup, Monoid) + +mkBlocks :: Block -> Blocks +mkBlocks = Blocks . pure newtype Inlines = Inlines { _inlines :: [Inline] } - deriving stock (Show) + deriving stock (Show, Eq, Generic) deriving newtype (Semigroup, Monoid) makeLenses ''Link makeLenses ''Inline makeLenses ''Inlines +makeLenses ''Blocks makeLenses ''Image makeLenses ''CodeBlock +makeLenses ''Meta + +mkMeta :: a -> Meta a +mkMeta _metaArg = + Meta + { _metaArg, + _metaLoc = Irrelevant iniRange, + _metaAttributes = [] + } + +instance HasAttributes (Meta a) where + addAttributes attr = over metaAttributes (attr ++) + +instance Rangeable (Meta a) where + ranged = set (metaLoc . unIrrelevant) instance HasAttributes Inline where - addAttributes attr = (over inlineAttributes (attr ++)) + addAttributes _attr = error "todo" -- TODO instance Rangeable Inline where - ranged _ = error "todo" + ranged d = + \case + InlineString s -> InlineString (ranged d s) + x -> + trace + ("* rangeable inline: '" <> (show x) <> "' " <> show d) + x + +instance HasAttributes Blocks where + addAttributes attr = over blocks (map (addAttributes attr)) instance HasAttributes Inlines where addAttributes attr = over inlines (map (addAttributes attr)) --- TODO +instance Rangeable Blocks where + ranged d bs = trace ("rangeable blocks " <> show (length (bs ^. blocks)) <> " " <> show d) (over blocks (map (ranged d)) bs) + instance Rangeable Inlines where - ranged _ = error "todo" + ranged d = trace ("rangeable inlines " <> show d) . over inlines (map (ranged d)) class IsInlines a where toInlines :: a -> Inlines @@ -125,21 +164,41 @@ class IsInlines a where instance IsInlines Inlines where toInlines = id -instance IsInlines InlineElem where +instance IsInlines Inline where toInlines e = Inlines - { _inlines = - [ Inline - { _inlineAttributes = [], - _inlineElem = e - } - ] + { _inlines = [e] } +placeHolderInterval :: Interval +placeHolderInterval = intervalFromFile $(mkAbsFile "/") + +iniLoc :: Interval +iniLoc = placeHolderInterval + +iniRange :: SourceRange +-- iniRange = impossibleError "The SourceRange should never be accessed. It should be replaced by 'ranged' during parsing" +iniRange = mempty + +instance Rangeable Block where + ranged r b = + trace + ("rangeable block: " <> show b) + ( case b of + BlockParagraph a -> BlockParagraph (ranged r a) + BlockPlain a -> BlockPlain (ranged r a) + BlockCodeBlock a -> BlockCodeBlock (ranged r a) + BlockHeading a -> BlockHeading (ranged r a) + x -> error ("TODO: " <> show x) + ) + +instance HasAttributes Block where + addAttributes _ = error "todo" + instance IsInline Inlines where lineBreak = toInlines InlineLineBreak softBreak = toInlines InlineSoftBreak - str a = toInlines (InlineString a) + str a = toInlines (InlineString (mkMeta a)) entity a = toInlines (InlineEntity a) escapedChar a = toInlines (InlineEscapedChar a) emph a = toInlines (InlineEmph a) @@ -148,3 +207,17 @@ instance IsInline Inlines where image _imageSource _imageTitle _imageDescription = toInlines (InlineImage Image {..}) code a = toInlines (InlineCode a) rawInline _rawInlineFormat _rawInlineText = toInlines (InlineRaw (RawInline {..})) + +instance IsBlock Inlines Blocks where + plain p = mkBlocks (BlockPlain (mkMeta p)) + paragraph i = mkBlocks (BlockParagraph (mkMeta i)) + thematicBreak = error "todo" + blockQuote = error "todo" + codeBlock _codeBlockLanguage _codeBlock = mkBlocks (BlockCodeBlock (mkMeta (CodeBlock {..}))) + heading _headingLevel _headingText = mkBlocks (BlockHeading (mkMeta Heading {..})) + rawBlock = error "todo" + referenceLinkDefinition = error "todo" + list = error "todo" + +instance (Pretty a) => Pretty (Meta a) where + pretty = pretty . (^. metaArg) diff --git a/src/Markdown/Print.hs b/src/Markdown/Print.hs new file mode 100644 index 0000000000..a7b82d806c --- /dev/null +++ b/src/Markdown/Print.hs @@ -0,0 +1,69 @@ +module Markdown.Print + ( module Markdown.Print, + module Markdown.Print.Options, + ) +where + +import Data.Text qualified as Text +import Juvix.Data.Effect.ExactPrint +import Juvix.Data.PPOutput (AnsiText, mkAnsiText) +import Juvix.Prelude +import Juvix.Prelude.Pretty (pretty) +import Markdown.Language +import Markdown.Print.Options + +ppOut :: (PrettyPrint c) => c -> AnsiText +ppOut = + mkAnsiText + . run + . execExactPrint Nothing + . runReader Options + . ppCode + +class PrettyPrint a where + ppCode :: (Members '[ExactPrint, Reader Options] r) => a -> Sem r () + +instance (PrettyPrint a) => PrettyPrint (Meta a) where + ppCode = ppCode . (^. metaArg) + +instance PrettyPrint Inline where + ppCode = \case + InlineString txt -> noLoc (pretty txt) + InlineSoftBreak -> hardline + _ -> todo + +instance PrettyPrint Inlines where + ppCode = mapM_ ppCode . (^. inlines) + +instance PrettyPrint Blocks where + ppCode = concatWith (\a b -> a <> hardline <> hardline <> b) . map ppCode . (^. blocks) + +instance PrettyPrint Text where + ppCode = noLoc . pretty + +instance PrettyPrint CodeBlock where + ppCode :: forall r. (Members '[ExactPrint, Reader Options] r) => CodeBlock -> Sem r () + ppCode CodeBlock {..} = do + codeSep + ppCode _codeBlockLanguage + hardline + ppCode _codeBlock + codeSep + where + codeSep :: Sem r () + codeSep = ppCode @Text "```" + +instance PrettyPrint Heading where + ppCode Heading {..} = do + ppCode (Text.replicate _headingLevel "#") + if + | null (_headingText ^. inlines) -> return () + | otherwise -> ppCode @Text " " >> ppCode _headingText + +instance PrettyPrint Block where + ppCode = \case + BlockParagraph p -> ppCode p + BlockPlain p -> ppCode p + BlockCodeBlock p -> ppCode p + BlockHeading p -> ppCode p + x -> error ("TODO: " <> show x) diff --git a/src/Markdown/Print/Options.hs b/src/Markdown/Print/Options.hs new file mode 100644 index 0000000000..f6de0029d8 --- /dev/null +++ b/src/Markdown/Print/Options.hs @@ -0,0 +1,6 @@ +module Markdown.Print.Options where + +data Options = Options + +defaultOptions :: Options +defaultOptions = Options From d3dd17aa654213f9ff042991a0263fb8872edca0 Mon Sep 17 00:00:00 2001 From: Jan Mas Rovira Date: Tue, 11 Mar 2025 15:42:30 +0100 Subject: [PATCH 3/5] more printing --- src/Juvix/Prelude/Base/Foundation.hs | 10 ++- src/Markdown/Language.hs | 105 +++++++++++++++++---------- src/Markdown/Print.hs | 73 +++++++++++++++++-- 3 files changed, 143 insertions(+), 45 deletions(-) diff --git a/src/Juvix/Prelude/Base/Foundation.hs b/src/Juvix/Prelude/Base/Foundation.hs index b9eef0b611..678940b940 100644 --- a/src/Juvix/Prelude/Base/Foundation.hs +++ b/src/Juvix/Prelude/Base/Foundation.hs @@ -308,9 +308,12 @@ compose n f a = f (compose (n - 1) f a) -- String related util functions. -------------------------------------------------------------------------------- -show :: (Show a, IsString str) => a -> str +show :: forall str a. (Show a, IsString str) => a -> str show = fromString . Show.show +showEscapedChar :: Char -> Text +showEscapedChar c = pack (showLitChar c "") + toUpperFirst :: String -> String toUpperFirst [] = [] toUpperFirst (x : xs) = Char.toUpper x : xs @@ -910,8 +913,11 @@ graphCycle gi = goChildren :: NonEmpty Vertex -> [Tree Vertex] -> Either (NonEmpty Vertex) () goChildren path = mapM_ (go path) +allNaturalsFrom :: Natural -> Stream Natural +allNaturalsFrom start = Stream.iterate succ start + allNaturals :: Stream Natural -allNaturals = Stream.iterate succ 0 +allNaturals = allNaturalsFrom 0 allWords :: Stream Text allWords = pack . toList <$> allFiniteSequences ('a' :| ['b' .. 'z']) diff --git a/src/Markdown/Language.hs b/src/Markdown/Language.hs index 9906b50f9f..2d5b509af5 100644 --- a/src/Markdown/Language.hs +++ b/src/Markdown/Language.hs @@ -1,9 +1,11 @@ module Markdown.Language ( module Markdown.Language, Attribute, - Format, - ListSpacing, - ListType, + Format (..), + ListSpacing (..), + EnumeratorType (..), + DelimiterType (..), + ListType (..), ) where @@ -38,18 +40,29 @@ data Meta a = Meta } deriving stock (Show, Eq, Generic) +data HardBreak = HardBreak + deriving stock (Show, Eq, Generic) + +data SoftBreak = SoftBreak + deriving stock (Show, Eq, Generic) + +newtype EscapedChar = EscapedChar + { _escapedChar :: Char + } + deriving stock (Show, Eq, Generic) + data Inline - = InlineLineBreak - | InlineSoftBreak + = InlineHardBreak (Meta HardBreak) + | InlineSoftBreak (Meta SoftBreak) | InlineString (Meta Text) - | InlineEntity Text - | InlineEscapedChar Char + | InlineEntity (Meta Text) + | InlineEscapedChar (Meta EscapedChar) | InlineEmph Inlines | InlineStrong Inlines - | InlineLink Link - | InlineImage Image - | InlineCode Text - | InlineRaw RawInline + | InlineLink (Meta Link) + | InlineImage (Meta Image) + | InlineCode (Meta Text) + | InlineRaw (Meta RawInline) deriving stock (Show, Eq, Generic) data CodeBlock = CodeBlock @@ -80,7 +93,15 @@ data ReferenceLinkDefinition = ReferenceLinkDefinition data List = List { _listType :: ListType, _listSpacing :: ListSpacing, - _listBlocks :: [Block] + _listBlocks :: NonEmpty Blocks + } + deriving stock (Show, Eq, Generic) + +data ThematicBreak = ThematicBreak + deriving stock (Show, Eq, Generic) + +newtype QuoteBlock = QuoteBlock + { _quoteBlock :: Blocks } deriving stock (Show, Eq, Generic) @@ -88,12 +109,12 @@ data Block = BlockParagraph (Meta Inlines) | BlockPlain (Meta Inlines) | BlockHeading (Meta Heading) - | BlockThematicBreak Block - | BlockQuote Block + | BlockThematicBreak (Meta ThematicBreak) + | BlockQuote (Meta QuoteBlock) | BlockCodeBlock (Meta CodeBlock) - | BlockRawBlock RawBlock - | BlockReferenceLinkDefinition ReferenceLinkDefinition - | BlockList List + | BlockRawBlock (Meta RawBlock) + | BlockReferenceLinkDefinition (Meta ReferenceLinkDefinition) + | BlockList (Meta List) deriving stock (Show, Eq, Generic) newtype Blocks = Blocks @@ -134,17 +155,23 @@ instance Rangeable (Meta a) where ranged = set (metaLoc . unIrrelevant) instance HasAttributes Inline where - addAttributes _attr = error "todo" + addAttributes _attr = trace "todo" -- TODO instance Rangeable Inline where ranged d = \case - InlineString s -> InlineString (ranged d s) - x -> - trace - ("* rangeable inline: '" <> (show x) <> "' " <> show d) - x + InlineHardBreak a -> InlineHardBreak (ranged d a) + InlineSoftBreak a -> InlineSoftBreak (ranged d a) + InlineString a -> InlineString (ranged d a) + InlineEntity a -> InlineEntity (ranged d a) + InlineEscapedChar a -> InlineEscapedChar (ranged d a) + InlineEmph a -> InlineEmph (ranged d a) + InlineStrong a -> InlineStrong (ranged d a) + InlineLink a -> InlineLink (ranged d a) + InlineImage a -> InlineImage (ranged d a) + InlineCode a -> InlineCode (ranged d a) + InlineRaw a -> InlineRaw (ranged d a) instance HasAttributes Blocks where addAttributes attr = over blocks (map (addAttributes attr)) @@ -189,35 +216,39 @@ instance Rangeable Block where BlockPlain a -> BlockPlain (ranged r a) BlockCodeBlock a -> BlockCodeBlock (ranged r a) BlockHeading a -> BlockHeading (ranged r a) - x -> error ("TODO: " <> show x) + BlockThematicBreak a -> BlockThematicBreak (ranged r a) + BlockQuote a -> BlockQuote (ranged r a) + BlockList a -> BlockList (ranged r a) + BlockRawBlock a -> BlockRawBlock (ranged r a) + BlockReferenceLinkDefinition a -> BlockReferenceLinkDefinition (ranged r a) ) instance HasAttributes Block where - addAttributes _ = error "todo" + addAttributes _ = trace "attributes block" instance IsInline Inlines where - lineBreak = toInlines InlineLineBreak - softBreak = toInlines InlineSoftBreak + lineBreak = toInlines (InlineHardBreak (mkMeta HardBreak)) + softBreak = toInlines (InlineSoftBreak (mkMeta SoftBreak)) str a = toInlines (InlineString (mkMeta a)) - entity a = toInlines (InlineEntity a) - escapedChar a = toInlines (InlineEscapedChar a) + entity a = toInlines (InlineEntity (mkMeta a)) + escapedChar _escapedChar = toInlines (InlineEscapedChar (mkMeta (EscapedChar {..}))) emph a = toInlines (InlineEmph a) strong a = toInlines (InlineStrong a) - link _linkDestination _linkTitle _linkDescription = toInlines (InlineLink Link {..}) - image _imageSource _imageTitle _imageDescription = toInlines (InlineImage Image {..}) - code a = toInlines (InlineCode a) - rawInline _rawInlineFormat _rawInlineText = toInlines (InlineRaw (RawInline {..})) + link _linkDestination _linkTitle _linkDescription = toInlines (InlineLink (mkMeta Link {..})) + image _imageSource _imageTitle _imageDescription = toInlines (InlineImage (mkMeta Image {..})) + code a = toInlines (InlineCode (mkMeta a)) + rawInline _rawInlineFormat _rawInlineText = toInlines (InlineRaw (mkMeta RawInline {..})) instance IsBlock Inlines Blocks where plain p = mkBlocks (BlockPlain (mkMeta p)) paragraph i = mkBlocks (BlockParagraph (mkMeta i)) - thematicBreak = error "todo" - blockQuote = error "todo" + thematicBreak = mkBlocks (BlockThematicBreak (mkMeta ThematicBreak)) + blockQuote _quoteBlock = mkBlocks (BlockQuote (mkMeta QuoteBlock {..})) codeBlock _codeBlockLanguage _codeBlock = mkBlocks (BlockCodeBlock (mkMeta (CodeBlock {..}))) heading _headingLevel _headingText = mkBlocks (BlockHeading (mkMeta Heading {..})) rawBlock = error "todo" - referenceLinkDefinition = error "todo" - list = error "todo" + referenceLinkDefinition _referenceLinkDefinitionLabel (_referenceLinkDefinitionDestination, _referenceLinkDefinitionTitle) = mkBlocks (BlockReferenceLinkDefinition (mkMeta ReferenceLinkDefinition {..})) + list _listType _listSpacing lstBlocks = mkBlocks (BlockList (mkMeta List {_listBlocks = nonEmpty' lstBlocks, ..})) instance (Pretty a) => Pretty (Meta a) where pretty = pretty . (^. metaArg) diff --git a/src/Markdown/Print.hs b/src/Markdown/Print.hs index a7b82d806c..e144660b43 100644 --- a/src/Markdown/Print.hs +++ b/src/Markdown/Print.hs @@ -4,11 +4,11 @@ module Markdown.Print ) where +import Data.Stream qualified as Stream import Data.Text qualified as Text import Juvix.Data.Effect.ExactPrint -import Juvix.Data.PPOutput (AnsiText, mkAnsiText) -import Juvix.Prelude -import Juvix.Prelude.Pretty (pretty) +import Juvix.Prelude.Base hiding ((<+>), (<+?>)) +import Juvix.Prelude.Pretty (AnsiText, mkAnsiText, pretty, prettyText, toPlainText) import Markdown.Language import Markdown.Print.Options @@ -26,11 +26,24 @@ class PrettyPrint a where instance (PrettyPrint a) => PrettyPrint (Meta a) where ppCode = ppCode . (^. metaArg) +instance PrettyPrint SoftBreak where + ppCode _ = hardline + +instance PrettyPrint HardBreak where + ppCode _ = do + ppCode @Text "\\" + hardline + +instance PrettyPrint EscapedChar where + ppCode (EscapedChar c) = + ppCode (showEscapedChar c) + instance PrettyPrint Inline where ppCode = \case InlineString txt -> noLoc (pretty txt) - InlineSoftBreak -> hardline - _ -> todo + InlineSoftBreak b -> ppCode b + InlineHardBreak b -> ppCode b + InlineEscapedChar b -> ppCode b instance PrettyPrint Inlines where ppCode = mapM_ ppCode . (^. inlines) @@ -60,10 +73,58 @@ instance PrettyPrint Heading where | null (_headingText ^. inlines) -> return () | otherwise -> ppCode @Text " " >> ppCode _headingText +instance PrettyPrint ThematicBreak where + ppCode ThematicBreak = ppCode @Text "---" + +instance PrettyPrint List where + ppCode List {..} = do + let stream :: Stream Text = case _listType of + BulletList (bullet :: Char) -> Stream.repeat (Text.singleton bullet <> " ") + OrderedList (start :: Int) (_enumType :: EnumeratorType) (delimType :: DelimiterType) -> + let addDelim :: Text -> Text + addDelim num = + case delimType of + Period -> num <> ". " + OneParen -> num <> ") " + TwoParens -> "(" <> num <> ") " + in addDelim . prettyText <$> allNaturalsFrom (fromIntegral start) + + runStreamOf stream . vsepHard $ + fmap + ( \b -> do + num <- yield @Text + ppCode num + ppCode b + ) + _listBlocks + +instance PrettyPrint QuoteBlock where + ppCode QuoteBlock {..} = do + let q :: Text = toPlainText (ppOut _quoteBlock) + withQuotes :: Text = + Text.dropEnd 1 + . Text.unlines + . map ("> " <>) + $ Text.lines q + noLoc (pretty withQuotes) + +instance PrettyPrint ReferenceLinkDefinition where + ppCode ReferenceLinkDefinition {..} = do + let title + | Text.null _referenceLinkDefinitionTitle = Nothing + | otherwise = Just (ppCode (show @Text _referenceLinkDefinitionTitle)) + ppCode ("[" <> _referenceLinkDefinitionLabel <> "]:") + <+> ppCode _referenceLinkDefinitionDestination + <+?> title + instance PrettyPrint Block where ppCode = \case BlockParagraph p -> ppCode p BlockPlain p -> ppCode p BlockCodeBlock p -> ppCode p BlockHeading p -> ppCode p - x -> error ("TODO: " <> show x) + BlockThematicBreak p -> ppCode p + BlockQuote p -> ppCode p + BlockRawBlock {} -> error "raw block" + BlockList l -> ppCode l + BlockReferenceLinkDefinition d -> ppCode d From 45c57d8f69fb9106f3cd0637b17651c983ba462c Mon Sep 17 00:00:00 2001 From: Jan Mas Rovira Date: Wed, 12 Mar 2025 15:09:27 +0100 Subject: [PATCH 4/5] testing --- app/Commands/Dev/PlainMarkdown/Format.hs | 5 +- src/Juvix/Prelude/Trace.hs | 3 +- src/Markdown/FromSource.hs | 21 +- src/Markdown/Language.hs | 92 ++- src/Markdown/Print.hs | 70 +- test/Main.hs | 3 +- test/PlainMarkdown.hs | 48 ++ tests/PlainMarkdown/benchmark.md | 897 +++++++++++++++++++++++ 8 files changed, 1087 insertions(+), 52 deletions(-) create mode 100644 test/PlainMarkdown.hs create mode 100644 tests/PlainMarkdown/benchmark.md diff --git a/app/Commands/Dev/PlainMarkdown/Format.hs b/app/Commands/Dev/PlainMarkdown/Format.hs index 228bed1ae3..f016f6abbc 100644 --- a/app/Commands/Dev/PlainMarkdown/Format.hs +++ b/app/Commands/Dev/PlainMarkdown/Format.hs @@ -2,7 +2,7 @@ module Commands.Dev.PlainMarkdown.Format where import Commands.Base import Commands.Dev.PlainMarkdown.Format.Options -import Markdown.FromSource +import Markdown.FromSource qualified as Markdown import Markdown.Print runCommand :: @@ -12,6 +12,5 @@ runCommand :: Sem r () runCommand opts = do afile <- fromAppPathFile (opts ^. formatFile) - mdBlock <- runAppError @SimpleError (fromFile afile) - print mdBlock + mdBlock <- runAppError @SimpleError (Markdown.parseFile afile) renderStdOutLn (ppOut mdBlock) diff --git a/src/Juvix/Prelude/Trace.hs b/src/Juvix/Prelude/Trace.hs index 472f0c5c71..5643a45ed4 100644 --- a/src/Juvix/Prelude/Trace.hs +++ b/src/Juvix/Prelude/Trace.hs @@ -27,8 +27,7 @@ traceWith f a = trace (f a) a trace :: Text -> a -> a trace = traceLabel "" - --- {-# WARNING trace "Using trace" #-} +{-# WARNING trace "Using trace" #-} traceM :: (Applicative f) => Text -> f () traceM t = traceLabel "" t (pure ()) diff --git a/src/Markdown/FromSource.hs b/src/Markdown/FromSource.hs index a362e768ab..ce634a0e05 100644 --- a/src/Markdown/FromSource.hs +++ b/src/Markdown/FromSource.hs @@ -1,22 +1,15 @@ -- | Import this module qualified -module Markdown.FromSource where +module Markdown.FromSource (parseText, parseFile) where import Commonmark.Parser import Juvix.Prelude import Markdown.Language -import Text.Show.Pretty -fromFile :: (Members '[Files, Error SimpleError] r) => Path Abs File -> Sem r Blocks -fromFile inputFile = do - txt <- readFile' inputFile +parseFile :: (Members '[Files, Error SimpleError] r) => Path Abs File -> Sem r Blocks +parseFile inputFile = readFile' inputFile >>= parseText inputFile + +parseText :: (Members '[Error SimpleError] r) => Path Abs File -> Text -> Sem r Blocks +parseText inputFile txt = do case commonmark (toFilePath inputFile) txt of - Left _err -> error "parse error" + Left err -> throw (SimpleError ("markdown parse error: " <> show err)) Right block -> return block - -testFile :: Path Abs File -> IO () -testFile f = runM . runFilesIO . runSimpleErrorIO $ do - b <- fromFile f - print (ppShow (b ^. blocks)) - putStrLn "================" - putStrLn "================\n" - print b diff --git a/src/Markdown/Language.hs b/src/Markdown/Language.hs index 2d5b509af5..560ee796e9 100644 --- a/src/Markdown/Language.hs +++ b/src/Markdown/Language.hs @@ -51,17 +51,37 @@ newtype EscapedChar = EscapedChar } deriving stock (Show, Eq, Generic) +newtype Strong = Strong + { _strong :: Inlines + } + deriving stock (Show, Eq, Generic) + +newtype Emph = Emph + { _emph :: Inlines + } + deriving stock (Show, Eq, Generic) + +newtype Code = Code + { _code :: Text + } + deriving stock (Show, Eq, Generic) + +newtype Entity = Entity + { _entity :: Text + } + deriving stock (Show, Eq, Generic) + data Inline = InlineHardBreak (Meta HardBreak) | InlineSoftBreak (Meta SoftBreak) | InlineString (Meta Text) - | InlineEntity (Meta Text) + | InlineEntity (Meta Entity) | InlineEscapedChar (Meta EscapedChar) - | InlineEmph Inlines - | InlineStrong Inlines + | InlineEmph (Meta Emph) + | InlineStrong (Meta Strong) | InlineLink (Meta Link) | InlineImage (Meta Image) - | InlineCode (Meta Text) + | InlineCode (Meta Code) | InlineRaw (Meta RawInline) deriving stock (Show, Eq, Generic) @@ -155,9 +175,20 @@ instance Rangeable (Meta a) where ranged = set (metaLoc . unIrrelevant) instance HasAttributes Inline where - addAttributes _attr = trace "todo" + addAttributes attr = + \case + InlineHardBreak a -> InlineHardBreak (addAttributes attr a) + InlineSoftBreak a -> InlineSoftBreak (addAttributes attr a) + InlineString a -> InlineString (addAttributes attr a) + InlineEntity a -> InlineEntity (addAttributes attr a) + InlineEscapedChar a -> InlineEscapedChar (addAttributes attr a) + InlineEmph a -> InlineEmph (addAttributes attr a) + InlineStrong a -> InlineStrong (addAttributes attr a) + InlineLink a -> InlineLink (addAttributes attr a) + InlineImage a -> InlineImage (addAttributes attr a) + InlineCode a -> InlineCode (addAttributes attr a) + InlineRaw a -> InlineRaw (addAttributes attr a) --- TODO instance Rangeable Inline where ranged d = \case @@ -180,10 +211,10 @@ instance HasAttributes Inlines where addAttributes attr = over inlines (map (addAttributes attr)) instance Rangeable Blocks where - ranged d bs = trace ("rangeable blocks " <> show (length (bs ^. blocks)) <> " " <> show d) (over blocks (map (ranged d)) bs) + ranged d bs = over blocks (map (ranged d)) bs instance Rangeable Inlines where - ranged d = trace ("rangeable inlines " <> show d) . over inlines (map (ranged d)) + ranged d = over inlines (map (ranged d)) class IsInlines a where toInlines :: a -> Inlines @@ -208,35 +239,40 @@ iniRange :: SourceRange iniRange = mempty instance Rangeable Block where - ranged r b = - trace - ("rangeable block: " <> show b) - ( case b of - BlockParagraph a -> BlockParagraph (ranged r a) - BlockPlain a -> BlockPlain (ranged r a) - BlockCodeBlock a -> BlockCodeBlock (ranged r a) - BlockHeading a -> BlockHeading (ranged r a) - BlockThematicBreak a -> BlockThematicBreak (ranged r a) - BlockQuote a -> BlockQuote (ranged r a) - BlockList a -> BlockList (ranged r a) - BlockRawBlock a -> BlockRawBlock (ranged r a) - BlockReferenceLinkDefinition a -> BlockReferenceLinkDefinition (ranged r a) - ) + ranged r = \case + BlockParagraph a -> BlockParagraph (ranged r a) + BlockPlain a -> BlockPlain (ranged r a) + BlockCodeBlock a -> BlockCodeBlock (ranged r a) + BlockHeading a -> BlockHeading (ranged r a) + BlockThematicBreak a -> BlockThematicBreak (ranged r a) + BlockQuote a -> BlockQuote (ranged r a) + BlockList a -> BlockList (ranged r a) + BlockRawBlock a -> BlockRawBlock (ranged r a) + BlockReferenceLinkDefinition a -> BlockReferenceLinkDefinition (ranged r a) instance HasAttributes Block where - addAttributes _ = trace "attributes block" + addAttributes attr = \case + BlockParagraph a -> BlockParagraph (addAttributes attr a) + BlockPlain a -> BlockPlain (addAttributes attr a) + BlockCodeBlock a -> BlockCodeBlock (addAttributes attr a) + BlockHeading a -> BlockHeading (addAttributes attr a) + BlockThematicBreak a -> BlockThematicBreak (addAttributes attr a) + BlockQuote a -> BlockQuote (addAttributes attr a) + BlockList a -> BlockList (addAttributes attr a) + BlockRawBlock a -> BlockRawBlock (addAttributes attr a) + BlockReferenceLinkDefinition a -> BlockReferenceLinkDefinition (addAttributes attr a) instance IsInline Inlines where lineBreak = toInlines (InlineHardBreak (mkMeta HardBreak)) softBreak = toInlines (InlineSoftBreak (mkMeta SoftBreak)) str a = toInlines (InlineString (mkMeta a)) - entity a = toInlines (InlineEntity (mkMeta a)) + entity a = toInlines (InlineEntity (mkMeta (Entity a))) escapedChar _escapedChar = toInlines (InlineEscapedChar (mkMeta (EscapedChar {..}))) - emph a = toInlines (InlineEmph a) - strong a = toInlines (InlineStrong a) + emph a = toInlines (InlineEmph (mkMeta (Emph a))) + strong a = toInlines (InlineStrong (mkMeta (Strong a))) link _linkDestination _linkTitle _linkDescription = toInlines (InlineLink (mkMeta Link {..})) image _imageSource _imageTitle _imageDescription = toInlines (InlineImage (mkMeta Image {..})) - code a = toInlines (InlineCode (mkMeta a)) + code a = toInlines (InlineCode (mkMeta (Code a))) rawInline _rawInlineFormat _rawInlineText = toInlines (InlineRaw (mkMeta RawInline {..})) instance IsBlock Inlines Blocks where @@ -246,7 +282,7 @@ instance IsBlock Inlines Blocks where blockQuote _quoteBlock = mkBlocks (BlockQuote (mkMeta QuoteBlock {..})) codeBlock _codeBlockLanguage _codeBlock = mkBlocks (BlockCodeBlock (mkMeta (CodeBlock {..}))) heading _headingLevel _headingText = mkBlocks (BlockHeading (mkMeta Heading {..})) - rawBlock = error "todo" + rawBlock _rawBlockFormat _rawBlockText = mkBlocks (BlockRawBlock (mkMeta RawBlock {..})) referenceLinkDefinition _referenceLinkDefinitionLabel (_referenceLinkDefinitionDestination, _referenceLinkDefinitionTitle) = mkBlocks (BlockReferenceLinkDefinition (mkMeta ReferenceLinkDefinition {..})) list _listType _listSpacing lstBlocks = mkBlocks (BlockList (mkMeta List {_listBlocks = nonEmpty' lstBlocks, ..})) diff --git a/src/Markdown/Print.hs b/src/Markdown/Print.hs index e144660b43..e7a46294db 100644 --- a/src/Markdown/Print.hs +++ b/src/Markdown/Print.hs @@ -38,12 +38,68 @@ instance PrettyPrint EscapedChar where ppCode (EscapedChar c) = ppCode (showEscapedChar c) +instance PrettyPrint Strong where + ppCode (Strong i) = do + ppCode @Text "**" + ppCode i + ppCode @Text "**" + +instance PrettyPrint Emph where + ppCode (Emph i) = do + ppCode @Text "_" + ppCode i + ppCode @Text "_" + +-- [link](/uri "title") +instance PrettyPrint Link where + ppCode Link {..} = do + ppCode @Text "[" + <> ppCode _linkDescription + <> ppCode @Text "](" + <> ppCode _linkDestination + <+?> ppLinkTitle _linkTitle + <> ppCode @Text ")" + +instance PrettyPrint Image where + ppCode Image {..} = do + ppCode @Text "!" + ppCode + Link + { _linkDescription = _imageDescription, + _linkTitle = _imageTitle, + _linkDestination = _imageSource + } + +instance PrettyPrint Code where + ppCode (Code c) = do + ppCode @Text "`" + ppCode c + ppCode @Text "`" + +instance PrettyPrint Entity where + ppCode (Entity e) = ppCode e + instance PrettyPrint Inline where ppCode = \case InlineString txt -> noLoc (pretty txt) InlineSoftBreak b -> ppCode b InlineHardBreak b -> ppCode b InlineEscapedChar b -> ppCode b + InlineEntity b -> ppCode b + InlineEmph b -> ppCode b + InlineStrong b -> ppCode b + InlineLink b -> ppCode b + InlineImage b -> ppCode b + InlineCode b -> ppCode b + InlineRaw b -> ppCode b + +instance PrettyPrint RawBlock where + ppCode RawBlock {..} = + ppCode _rawBlockText + +instance PrettyPrint RawInline where + ppCode RawInline {..} = + ppCode _rawInlineText instance PrettyPrint Inlines where ppCode = mapM_ ppCode . (^. inlines) @@ -108,11 +164,17 @@ instance PrettyPrint QuoteBlock where $ Text.lines q noLoc (pretty withQuotes) +ppLinkTitle :: + (Member (Reader Options) r, Member ExactPrint r) => + Text -> + Maybe (Sem r ()) +ppLinkTitle title + | Text.null title = Nothing + | otherwise = Just (ppCode (show @Text title)) + instance PrettyPrint ReferenceLinkDefinition where ppCode ReferenceLinkDefinition {..} = do - let title - | Text.null _referenceLinkDefinitionTitle = Nothing - | otherwise = Just (ppCode (show @Text _referenceLinkDefinitionTitle)) + let title = ppLinkTitle _referenceLinkDefinitionTitle ppCode ("[" <> _referenceLinkDefinitionLabel <> "]:") <+> ppCode _referenceLinkDefinitionDestination <+?> title @@ -125,6 +187,6 @@ instance PrettyPrint Block where BlockHeading p -> ppCode p BlockThematicBreak p -> ppCode p BlockQuote p -> ppCode p - BlockRawBlock {} -> error "raw block" + BlockRawBlock p -> ppCode p BlockList l -> ppCode l BlockReferenceLinkDefinition d -> ppCode d diff --git a/test/Main.hs b/test/Main.hs index 6f7dd375ab..02f633768d 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -16,6 +16,7 @@ import Juvix.Config qualified as Config import Nockma qualified import Package qualified import Parsing qualified +import PlainMarkdown qualified import Reg qualified import Repl qualified import Resolver qualified @@ -67,4 +68,4 @@ fastTests = main :: IO () main = do tests <- sequence [fastTests, slowTests] - defaultMain (testGroup "Juvix tests" tests) + defaultMain (testGroup "Juvix tests" [PlainMarkdown.allTests]) diff --git a/test/PlainMarkdown.hs b/test/PlainMarkdown.hs new file mode 100644 index 0000000000..cfd70e3751 --- /dev/null +++ b/test/PlainMarkdown.hs @@ -0,0 +1,48 @@ +module PlainMarkdown (allTests) where + +import Base +import Juvix.Prelude.Pretty +import Markdown.FromSource qualified as Markdown +import Markdown.Language +import Markdown.Print + +data PosTest = PosTest + { _name :: String, + _file :: Path Rel File + } + +allTests :: TestTree +allTests = + testGroup + "PlainMarkdown" + (map (mkTest . testDescr) tests) + +root :: Path Abs Dir +root = relToProject $(mkRelDir "tests/PlainMarkdown") + +renderMd :: (PrettyPrint c) => c -> Text +renderMd = toPlainText . ppOut + +testDescr :: PosTest -> TestDescr +testDescr PosTest {..} = + let tRoot = root + file' = tRoot _file + in TestDescr + { _testName = _name, + _testRoot = tRoot, + _testAssertion = Steps $ \step -> runM . runSimpleErrorIO . runFilesIO $ do + liftIO (step "Parsing") + s :: Blocks <- Markdown.parseFile file' + let rendered :: Text = renderMd s + + liftIO (step "Parsing pretty parsed") + parsedPretty <- Markdown.parseText file' rendered + + liftIO (step "Checks") + liftIO (assertEqDiffShow "parsed . pretty . parsed = parsed" s parsedPretty) + } + +tests :: [PosTest] +tests = + [ PosTest "benchmark" $(mkRelFile "benchmark.md") + ] diff --git a/tests/PlainMarkdown/benchmark.md b/tests/PlainMarkdown/benchmark.md new file mode 100644 index 0000000000..0a5d63df09 --- /dev/null +++ b/tests/PlainMarkdown/benchmark.md @@ -0,0 +1,897 @@ +Markdown: Syntax +================ + + + + +* [Overview](#overview) + * [Philosophy](#philosophy) + * [Inline HTML](#html) + * [Automatic Escaping for Special Characters](#autoescape) +* [Block Elements](#block) + * [Paragraphs and Line Breaks](#p) + * [Headers](#header) + * [Blockquotes](#blockquote) + * [Lists](#list) + * [Code Blocks](#precode) + * [Horizontal Rules](#hr) +* [Span Elements](#span) + * [Links](#link) + * [Emphasis](#em) + * [Code](#code) + * [Images](#img) +* [Miscellaneous](#misc) + * [Backslash Escapes](#backslash) + * [Automatic Links](#autolink) + + +**Note:** This document is itself written using Markdown; you +can [see the source for it by adding '.text' to the URL][src]. + + [src]: /projects/markdown/syntax.text + +* * * + +

Overview

+ +

Philosophy

+ +Markdown is intended to be as easy-to-read and easy-to-write as is feasible. + +Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], +[Grutatext] [5], and [EtText] [6] -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email. + + [1]: http://docutils.sourceforge.net/mirror/setext.html + [2]: http://www.aaronsw.com/2002/atx/ + [3]: http://textism.com/tools/textile/ + [4]: http://docutils.sourceforge.net/rst.html + [5]: http://www.triptico.com/software/grutatxt.html + [6]: http://ettext.taint.org/doc/ + +To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like \*emphasis\*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email. + + + +

Inline HTML

+ +Markdown's syntax is intended for one purpose: to be used as a +format for *writing* for the web. + +Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is *not* to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a *publishing* format; Markdown is a *writing* +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text. + +For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags. + +The only restrictions are that block-level HTML elements -- e.g. `
`, +``, `
`, `

`, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) `

` tags around HTML block-level tags. + +For example, to add an HTML table to a Markdown article: + + This is a regular paragraph. + +

+ + + +
Foo
+ + This is another regular paragraph. + +Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an +HTML block. + +Span-level HTML tags -- e.g. ``, ``, or `` -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML `` or `` tags instead of Markdown's +link or image syntax, go right ahead. + +Unlike block-level HTML tags, Markdown syntax *is* processed within +span-level tags. + + +

Automatic Escaping for Special Characters

+ +In HTML, there are two characters that demand special treatment: `<` +and `&`. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. `<`, and +`&`. + +Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write '`AT&T`'. You even need to +escape ampersands within URLs. Thus, if you want to link to: + + http://images.google.com/images?num=30&q=larry+bird + +you need to encode the URL as: + + http://images.google.com/images?num=30&q=larry+bird + +in your anchor tag `href` attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites. + +Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into `&`. + +So, if you want to include a copyright symbol in your article, you can write: + + © + +and Markdown will leave it alone. But if you write: + + AT&T + +Markdown will translate it to: + + AT&T + +Similarly, because Markdown supports [inline HTML](#html), if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write: + + 4 < 5 + +Markdown will translate it to: + + 4 < 5 + +However, inside Markdown code spans and blocks, angle brackets and +ampersands are *always* encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single `<` +and `&` in your example code needs to be escaped.) + + +* * * + + +

Block Elements

+ + +

Paragraphs and Line Breaks

+ +A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be indented with spaces or tabs. + +The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a `
` tag. + +When you *do* want to insert a `
` break tag using Markdown, you +end a line with two or more spaces, then type return. + +Yes, this takes a tad more effort to create a `
`, but a simplistic +"every line break is a `
`" rule wouldn't work for Markdown. +Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] +work best -- and look better -- when you format them with hard breaks. + + [bq]: #blockquote + [l]: #list + + + + + +Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. + +Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example: + + This is an H1 + ============= + + This is an H2 + ------------- + +Any number of underlining `=`'s or `-`'s will work. + +Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example: + + # This is an H1 + + ## This is an H2 + + ###### This is an H6 + +Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) : + + # This is an H1 # + + ## This is an H2 ## + + ### This is an H3 ###### + + +

Blockquotes

+ +Markdown uses email-style `>` characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a `>` before every line: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + > + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + > id sem consectetuer libero luctus adipiscing. + +Markdown allows you to be lazy and only put the `>` before the first +line of a hard-wrapped paragraph: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + id sem consectetuer libero luctus adipiscing. + +Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of `>`: + + > This is the first level of quoting. + > + > > This is nested blockquote. + > + > Back to the first level. + +Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks: + + > ## This is a header. + > + > 1. This is the first list item. + > 2. This is the second list item. + > + > Here's some example code: + > + > return shell_exec("echo $input | $markdown_script"); + +Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu. + + +

Lists

+ +Markdown supports ordered (numbered) and unordered (bulleted) lists. + +Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers: + + * Red + * Green + * Blue + +is equivalent to: + + + Red + + Green + + Blue + +and: + + - Red + - Green + - Blue + +Ordered lists use numbers followed by periods: + + 1. Bird + 2. McHale + 3. Parish + +It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is: + +
    +
  1. Bird
  2. +
  3. McHale
  4. +
  5. Parish
  6. +
+ +If you instead wrote the list in Markdown like this: + + 1. Bird + 1. McHale + 1. Parish + +or even: + + 3. Bird + 1. McHale + 8. Parish + +you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to. + +If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number. + +List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab. + +To make lists look nice, you can wrap items with hanging indents: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +But if you want to be lazy, you don't have to: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +If list items are separated by blank lines, Markdown will wrap the +items in `

` tags in the HTML output. For example, this input: + + * Bird + * Magic + +will turn into: + +

    +
  • Bird
  • +
  • Magic
  • +
+ +But this: + + * Bird + + * Magic + +will turn into: + +
    +
  • Bird

  • +
  • Magic

  • +
+ +List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be indented by either 4 spaces +or one tab: + + 1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + + 2. Suspendisse id sem consectetuer libero luctus adipiscing. + +It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy: + + * This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're + only required to indent the first line. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. + + * Another item in the same list. + +To put a blockquote within a list item, the blockquote's `>` +delimiters need to be indented: + + * A list item with a blockquote: + + > This is a blockquote + > inside a list item. + +To put a code block within a list item, the code block needs +to be indented *twice* -- 8 spaces or two tabs: + + * A list item with a code block: + + + + +It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this: + + 1986. What a great season. + +In other words, a *number-period-space* sequence at the beginning of a +line. To avoid this, you can backslash-escape the period: + + 1986\. What a great season. + + + +

Code Blocks

+ +Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both `
` and `` tags.
+
+To produce a code block in Markdown, simply indent every line of the
+block by at least 4 spaces or 1 tab. For example, given this input:
+
+    This is a normal paragraph:
+
+        This is a code block.
+
+Markdown will generate:
+
+    

This is a normal paragraph:

+ +
This is a code block.
+    
+ +One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this: + + Here is an example of AppleScript: + + tell application "Foo" + beep + end tell + +will turn into: + +

Here is an example of AppleScript:

+ +
tell application "Foo"
+        beep
+    end tell
+    
+ +A code block continues until it reaches a line that is not indented +(or the end of the article). + +Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this: + + + +will turn into: + +
<div class="footer">
+        &copy; 2004 Foo Corporation
+    </div>
+    
+ +Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax. + + + +

Horizontal Rules

+ +You can produce a horizontal rule tag (`
`) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule: + + * * * + + *** + + ***** + + - - - + + --------------------------------------- + + +* * * + +

Span Elements

+ + + +Markdown supports two style of links: *inline* and *reference*. + +In both styles, the link text is delimited by [square brackets]. + +To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an *optional* +title for the link, surrounded in quotes. For example: + + This is [an example](http://example.com/ "Title") inline link. + + [This link](http://example.net/) has no title attribute. + +Will produce: + +

This is + an example inline link.

+ +

This link has no + title attribute.

+ +If you're referring to a local resource on the same server, you can +use relative paths: + + See my [About](/about/) page for details. + +Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link: + + This is [an example][id] reference-style link. + +You can optionally use a space to separate the sets of brackets: + + This is [an example] [id] reference-style link. + +Then, anywhere in the document, you define your link label like this, +on a line by itself: + + [id]: http://example.com/ "Optional Title Here" + +That is: + +* Square brackets containing the link identifier (optionally + indented from the left margin using up to three spaces); +* followed by a colon; +* followed by one or more spaces (or tabs); +* followed by the URL for the link; +* optionally followed by a title attribute for the link, enclosed + in double or single quotes, or enclosed in parentheses. + +The following three link definitions are equivalent: + + [foo]: http://example.com/ "Optional Title Here" + [foo]: http://example.com/ 'Optional Title Here' + [foo]: http://example.com/ (Optional Title Here) + +**Note:** There is a known bug in Markdown.pl 1.0.1 which prevents +single quotes from being used to delimit link titles. + +The link URL may, optionally, be surrounded by angle brackets: + + [id]: "Optional Title Here" + +You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs: + + [id]: http://example.com/longish/path/to/resource/here + "Optional Title Here" + +Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output. + +Link definition names may consist of letters, numbers, spaces, and +punctuation -- but they are *not* case sensitive. E.g. these two +links: + + [link text][a] + [link text][A] + +are equivalent. + +The *implicit link name* shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write: + + [Google][] + +And then define the link: + + [Google]: http://google.com/ + +Because link names may contain spaces, this shortcut even works for +multiple words in the link text: + + Visit [Daring Fireball][] for more information. + +And then define the link: + + [Daring Fireball]: http://daringfireball.net/ + +Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes. + +Here's an example of reference links in action: + + I get 10 times more traffic from [Google] [1] than from + [Yahoo] [2] or [MSN] [3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" + +Using the implicit link name shortcut, you could instead write: + + I get 10 times more traffic from [Google][] than from + [Yahoo][] or [MSN][]. + + [google]: http://google.com/ "Google" + [yahoo]: http://search.yahoo.com/ "Yahoo Search" + [msn]: http://search.msn.com/ "MSN Search" + +Both of the above examples will produce the following HTML output: + +

I get 10 times more traffic from Google than from + Yahoo + or MSN.

+ +For comparison, here is the same paragraph written using +Markdown's inline link style: + + I get 10 times more traffic from [Google](http://google.com/ "Google") + than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or + [MSN](http://search.msn.com/ "MSN Search"). + +The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text. + +With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose. + + +

Emphasis

+ +Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +emphasis. Text wrapped with one `*` or `_` will be wrapped with an +HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML +`` tag. E.g., this input: + + *single asterisks* + + _single underscores_ + + **double asterisks** + + __double underscores__ + +will produce: + + single asterisks + + single underscores + + double asterisks + + double underscores + +You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span. + +Emphasis can be used in the middle of a word: + + un*frigging*believable + +But if you surround an `*` or `_` with spaces, it'll be treated as a +literal asterisk or underscore. + +To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it: + + \*this text is surrounded by literal asterisks\* + + + +

Code

+ +To indicate a span of code, wrap it with backtick quotes (`` ` ``). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example: + + Use the `printf()` function. + +will produce: + +

Use the printf() function.

+ +To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters: + + ``There is a literal backtick (`) here.`` + +which will produce this: + +

There is a literal backtick (`) here.

+ +The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span: + + A single backtick in a code span: `` ` `` + + A backtick-delimited string in a code span: `` `foo` `` + +will produce: + +

A single backtick in a code span: `

+ +

A backtick-delimited string in a code span: `foo`

+ +With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this: + + Please don't use any `` tags. + +into: + +

Please don't use any <blink> tags.

+ +You can write this: + + `—` is the decimal-encoded equivalent of `—`. + +to produce: + +

&#8212; is the decimal-encoded + equivalent of &mdash;.

+ + + +

Images

+ +Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format. + +Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: *inline* and *reference*. + +Inline image syntax looks like this: + + ![Alt text](/path/to/img.jpg) + + ![Alt text](/path/to/img.jpg "Optional title") + +That is: + +* An exclamation mark: `!`; +* followed by a set of square brackets, containing the `alt` + attribute text for the image; +* followed by a set of parentheses, containing the URL or path to + the image, and an optional `title` attribute enclosed in double + or single quotes. + +Reference-style image syntax looks like this: + + ![Alt text][id] + +Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references: + + [id]: url/to/image "Optional title attribute" + +As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML `` tags. + + +* * * + + +

Miscellaneous

+ + + +Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: + + + +Markdown will turn this into: + + http://example.com/ + +Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this: + + + +into something like this: + + address@exa + mple.com + +which will render in a browser as a clickable link to "address@example.com". + +(This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.) + + + +

Backslash Escapes

+ +Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word +with literal asterisks (instead of an HTML `` tag), you can use +backslashes before the asterisks, like this: + + \*literal asterisks\* + +Markdown provides backslash escapes for the following characters: + + \ backslash + ` backtick + * asterisk + _ underscore + {} curly braces + [] square brackets + () parentheses + # hash mark + + plus sign + - minus sign (hyphen) + . dot + ! exclamation mark + From a8d11bbcc103f72dac79396cfb77362e6ea9b489 Mon Sep 17 00:00:00 2001 From: Jan Mas Rovira Date: Wed, 19 Mar 2025 11:57:15 +0100 Subject: [PATCH 5/5] check --- app/Commands/Dev/PlainMarkdown/Format.hs | 7 +++-- .../Dev/PlainMarkdown/Format/Options.hs | 11 +++++-- src/Juvix/Data/Effect/ExactPrint.hs | 5 +++- src/Markdown/Print.hs | 29 ++++++++++++------- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/app/Commands/Dev/PlainMarkdown/Format.hs b/app/Commands/Dev/PlainMarkdown/Format.hs index f016f6abbc..32d9718afc 100644 --- a/app/Commands/Dev/PlainMarkdown/Format.hs +++ b/app/Commands/Dev/PlainMarkdown/Format.hs @@ -12,5 +12,8 @@ runCommand :: Sem r () runCommand opts = do afile <- fromAppPathFile (opts ^. formatFile) - mdBlock <- runAppError @SimpleError (Markdown.parseFile afile) - renderStdOutLn (ppOut mdBlock) + txt <- readFile' afile + mdBlock <- runAppError @SimpleError (Markdown.parseText afile txt) + let out = ppOut mdBlock <> ansiTextNewline + renderStdOut out + when (opts ^. formatCheck && txt /= toPlainText out) exitFailure diff --git a/app/Commands/Dev/PlainMarkdown/Format/Options.hs b/app/Commands/Dev/PlainMarkdown/Format/Options.hs index e7a4bde48c..9d223ec348 100644 --- a/app/Commands/Dev/PlainMarkdown/Format/Options.hs +++ b/app/Commands/Dev/PlainMarkdown/Format/Options.hs @@ -2,8 +2,9 @@ module Commands.Dev.PlainMarkdown.Format.Options where import CommonOptions -newtype FormatOptions = FormatOptions - { _formatFile :: AppPath File +data FormatOptions = FormatOptions + { _formatFile :: AppPath File, + _formatCheck :: Bool } deriving stock (Data) @@ -12,4 +13,10 @@ makeLenses ''FormatOptions parseFormatOptions :: Parser FormatOptions parseFormatOptions = do _formatFile <- parseInputFile FileExtMarkdown + _formatCheck <- + switch + ( long "check" + <> help "Exit code 1 if a file wasn't already formatted." + ) + pure FormatOptions {..} diff --git a/src/Juvix/Data/Effect/ExactPrint.hs b/src/Juvix/Data/Effect/ExactPrint.hs index 8c1aecf6e7..425ef2f485 100644 --- a/src/Juvix/Data/Effect/ExactPrint.hs +++ b/src/Juvix/Data/Effect/ExactPrint.hs @@ -91,8 +91,11 @@ hang = region (P.hang 2) align :: (Members '[ExactPrint] r) => Sem r () -> Sem r () align = region P.align +indentN :: (Members '[ExactPrint] r) => Int -> Sem r () -> Sem r () +indentN n = region (P.indent n) + indent :: (Members '[ExactPrint] r) => Sem r () -> Sem r () -indent = region (P.indent 2) +indent = indentN 2 flatAlt :: (Members '[ExactPrint] r) => Sem r () -> Sem r () -> Sem r () flatAlt = regionAlt P.flatAlt (const id) diff --git a/src/Markdown/Print.hs b/src/Markdown/Print.hs index e7a46294db..b96afe5936 100644 --- a/src/Markdown/Print.hs +++ b/src/Markdown/Print.hs @@ -46,9 +46,9 @@ instance PrettyPrint Strong where instance PrettyPrint Emph where ppCode (Emph i) = do - ppCode @Text "_" + ppCode @Text "*" ppCode i - ppCode @Text "_" + ppCode @Text "*" -- [link](/uri "title") instance PrettyPrint Link where @@ -105,19 +105,24 @@ instance PrettyPrint Inlines where ppCode = mapM_ ppCode . (^. inlines) instance PrettyPrint Blocks where - ppCode = concatWith (\a b -> a <> hardline <> hardline <> b) . map ppCode . (^. blocks) + ppCode = + concatWith (\a b -> a <> hardline <> hardline <> b) + . map ppCode + . (^. blocks) instance PrettyPrint Text where ppCode = noLoc . pretty instance PrettyPrint CodeBlock where ppCode :: forall r. (Members '[ExactPrint, Reader Options] r) => CodeBlock -> Sem r () - ppCode CodeBlock {..} = do - codeSep - ppCode _codeBlockLanguage - hardline - ppCode _codeBlock - codeSep + ppCode CodeBlock {..} + | Text.null _codeBlockLanguage = indentN 4 (ppCode _codeBlock) + | otherwise = do + codeSep + ppCode _codeBlockLanguage + hardline + ppCode _codeBlock + codeSep where codeSep :: Sem r () codeSep = ppCode @Text "```" @@ -150,17 +155,19 @@ instance PrettyPrint List where ( \b -> do num <- yield @Text ppCode num - ppCode b + indentN 2 (ppCode b) ) _listBlocks instance PrettyPrint QuoteBlock where ppCode QuoteBlock {..} = do let q :: Text = toPlainText (ppOut _quoteBlock) + addQuoteChar :: Text -> Text + addQuoteChar t = ">" <> t withQuotes :: Text = Text.dropEnd 1 . Text.unlines - . map ("> " <>) + . map addQuoteChar $ Text.lines q noLoc (pretty withQuotes)