@@ -5,9 +5,20 @@ const {JSDOM} = require('jsdom')
55
66function overrideElementProcessing ( element ) {
77
8- if ( element . tagName ?. toLowerCase ( ) === 'a'
9- && element . className === 'anchor' ) {
8+ if ( element . tagName ?. toLowerCase ( ) === 'a' ) {
9+ if ( element . className === 'anchor' ) {
1010 return [ { type : 'custom' , blank : true } ]
11+ }
12+ let href = element . getAttribute ( 'href' )
13+ const hasProtocol = / ^ [ a - z ] + : \/ \/ / i
14+ console . log ( href )
15+ if ( href && ! href . match ( hasProtocol ) ) {
16+ // convert internal links to markdown format
17+ href = href . replace ( / \. h t m l / , '.md' )
18+ const content = toMarkdown ( element . innerHTML || href )
19+ console . log ( "LINK" , href , content )
20+ return [ { type : 'link' , href, content} ]
21+ }
1122 }
1223
1324 if ( element . classList ?. contains ( "admonitionblock" ) ) {
@@ -51,28 +62,57 @@ function markdownify(page, siteCatalog) {
5162 const html = page . contents . toString ( )
5263 const markdown = `# ${ page . asciidoc . doctitle } \n\n` + toMarkdown ( html )
5364
54- const path = page . out . path . replace ( / \. h t m l $ / , '.md' )
55-
56- // tell docs-ui to output <link rel="alternate" ...> for the markdown page.
57- page . asciidoc . attributes [ "page-markdown-alt" ] = ` ${ page . out . rootPath } / ${ path } `
65+ page . out . path = page . out . path . replace ( / \. h t m l $ / , '.md' )
66+ if ( page . out . url ) {
67+ page . out . url = page . out . url . replace ( / \. h t m l $ / , '.md' )
68+ }
5869
59- siteCatalog . addFile ( {
60- contents : Buffer . from ( markdown ) ,
61- out : { path }
62- } )
70+ page . contents = Buffer . from ( markdown )
71+
72+ // tell docs-ui to output <link rel="alternate" ...> for the markdown page.
73+ // (no longer relevant here - we need to do this in the MAIN build)
74+ // page.asciidoc.attributes["page-markdown-alt"] = `${page.out.rootPath}/${path}`
6375}
6476
6577module . exports . register = function ( { playbook, config } ) {
66- const logger = this . getLogger ( 'markdown-for-llm' )
78+ this . once ( 'contextStarted' , ( ) => {
79+ const { createPageComposer : _createPageComposerDelegate } = this . getFunctions ( )
6780
68- this . on ( 'navigationBuilt' , ( { playbook, siteAsciiDocConfig, siteCatalog, uiCatalog, contentCatalog } ) => {
69-
70- logger . info ( 'Compiling Markdown summaries' )
71-
72- for ( const page of contentCatalog . getPages ( ) ) {
73- if ( page . pub ) {
74- markdownify ( page , siteCatalog )
75- }
81+ this . replaceFunctions ( {
82+ // see https://gitlab.com/antora/antora/-/blob/v3.1.x/packages/page-composer/lib/create-page-composer.js
83+ createPageComposer ( playbook , contentCatalog , uiCatalog ) {
84+ function composePage ( file , _contentCatalog , _navigationCatalog ) {
85+ // instead of wrapping the file in a layout, just
86+ // return the file as-is
87+ return file
7688 }
89+ const create404Page = ( siteAsciiDocConfig ) =>
90+ composePage ( {
91+ asciidoc : siteAsciiDocConfig ,
92+ mediaType : 'text/html' ,
93+ out : { path : '404.html' } ,
94+ pub : { } ,
95+ src : { stem : '404' } ,
96+ title : siteAsciiDocConfig ?. attributes [ '404-page-title' ] || 'PageNot Found' ,
97+ } )
98+
99+ const ret = Object . assign ( composePage , { composePage, create404Page} )
100+ return ret
101+ }
77102 } )
103+ } )
104+
105+ this . once ( 'documentsConverted' , async ( { playbook, contentCatalog, siteCatalog } ) => {
106+ const logger = this . getLogger ( 'llm-summaries' )
107+
108+ const pages = contentCatalog . getPages (
109+ ( page ) =>
110+ page . mediaType === 'text/html'
111+ && page . pub
112+ && page . out )
113+
114+ for ( const page of pages ) {
115+ markdownify ( page , siteCatalog )
116+ }
117+ } )
78118}
0 commit comments