Skip to content

Commit 770fcbf

Browse files
committed
DOC-13760 save ONLY markdown summary
Reduce memory usage by *only* saving the Markdown version in this run. We can do this as a separate build, and scp the output to the same bucket. e.g. this content would be overlaid on the *standard* build (which would only need to add the <link href="..."> URL feature. Rewrite relative hrefs to .md target.
1 parent 9423a4a commit 770fcbf

File tree

1 file changed

+59
-19
lines changed

1 file changed

+59
-19
lines changed

lib/markdown-for-llm.js

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,20 @@ const {JSDOM} = require('jsdom')
55

66
function overrideElementProcessing (element) {
77

8-
if (element.tagName?.toLowerCase() === 'a'
9-
&& element.className === 'anchor' ) {
8+
if (element.tagName?.toLowerCase() === 'a') {
9+
if (element.className === 'anchor' ) {
1010
return [{type: 'custom', blank: true}]
11+
}
12+
let href = element.getAttribute('href')
13+
const hasProtocol = /^[a-z]+:\/\//i
14+
console.log(href)
15+
if (href && !href.match(hasProtocol)) {
16+
// convert internal links to markdown format
17+
href = href.replace(/\.html/, '.md')
18+
const content = toMarkdown(element.innerHTML || href)
19+
console.log("LINK", href, content)
20+
return [{type: 'link', href, content}]
21+
}
1122
}
1223

1324
if (element.classList?.contains("admonitionblock")) {
@@ -51,28 +62,57 @@ function markdownify(page, siteCatalog) {
5162
const html = page.contents.toString()
5263
const markdown = `# ${page.asciidoc.doctitle}\n\n` + toMarkdown(html)
5364

54-
const path = page.out.path.replace(/\.html$/, '.md')
55-
56-
// tell docs-ui to output <link rel="alternate" ...> for the markdown page.
57-
page.asciidoc.attributes["page-markdown-alt"] = `${page.out.rootPath}/${path}`
65+
page.out.path = page.out.path.replace(/\.html$/, '.md')
66+
if (page.out.url) {
67+
page.out.url = page.out.url.replace(/\.html$/, '.md')
68+
}
5869

59-
siteCatalog.addFile({
60-
contents: Buffer.from(markdown),
61-
out: { path }
62-
})
70+
page.contents = Buffer.from(markdown)
71+
72+
// tell docs-ui to output <link rel="alternate" ...> for the markdown page.
73+
// (no longer relevant here - we need to do this in the MAIN build)
74+
// page.asciidoc.attributes["page-markdown-alt"] = `${page.out.rootPath}/${path}`
6375
}
6476

6577
module.exports.register = function ({ playbook, config }) {
66-
const logger = this.getLogger('markdown-for-llm')
78+
this.once('contextStarted', () => {
79+
const { createPageComposer: _createPageComposerDelegate } = this.getFunctions()
6780

68-
this.on('navigationBuilt', ({ playbook, siteAsciiDocConfig, siteCatalog, uiCatalog, contentCatalog }) => {
69-
70-
logger.info('Compiling Markdown summaries')
71-
72-
for (const page of contentCatalog.getPages()) {
73-
if (page.pub) {
74-
markdownify(page, siteCatalog)
75-
}
81+
this.replaceFunctions({
82+
// see https://gitlab.com/antora/antora/-/blob/v3.1.x/packages/page-composer/lib/create-page-composer.js
83+
createPageComposer (playbook, contentCatalog, uiCatalog) {
84+
function composePage (file, _contentCatalog, _navigationCatalog) {
85+
// instead of wrapping the file in a layout, just
86+
// return the file as-is
87+
return file
7688
}
89+
const create404Page = (siteAsciiDocConfig) =>
90+
composePage({
91+
asciidoc: siteAsciiDocConfig,
92+
mediaType: 'text/html',
93+
out: { path: '404.html' },
94+
pub: {},
95+
src: { stem: '404' },
96+
title: siteAsciiDocConfig?.attributes['404-page-title'] || 'PageNot Found',
97+
})
98+
99+
const ret = Object.assign(composePage, {composePage, create404Page} )
100+
return ret
101+
}
77102
})
103+
})
104+
105+
this.once('documentsConverted', async ({ playbook, contentCatalog, siteCatalog }) => {
106+
const logger = this.getLogger('llm-summaries')
107+
108+
const pages = contentCatalog.getPages(
109+
(page) =>
110+
page.mediaType === 'text/html'
111+
&& page.pub
112+
&& page.out)
113+
114+
for (const page of pages) {
115+
markdownify(page, siteCatalog)
116+
}
117+
})
78118
}

0 commit comments

Comments
 (0)