@@ -5,11 +5,8 @@ import {
55} from '@blocknote/core' ;
66import { Canvg } from 'canvg' ;
77import { IParagraphOptions , ShadingType } from 'docx' ;
8- import JSZip from 'jszip' ;
98import React from 'react' ;
109
11- import { exportResolveFileUrl } from './api' ;
12-
1310export function downloadFile ( blob : Blob , filename : string ) {
1411 const url = window . URL . createObjectURL ( blob ) ;
1512 const a = document . createElement ( 'a' ) ;
@@ -182,336 +179,3 @@ export function odtRegisterParagraphStyleForBlock(
182179
183180 return styleName ;
184181}
185-
186- // Escape user-provided text before injecting it into the exported HTML document.
187- export const escapeHtml = ( value : string ) : string =>
188- value
189- . replace ( / & / g, '&' )
190- . replace ( / < / g, '<' )
191- . replace ( / > / g, '>' )
192- . replace ( / " / g, '"' )
193- . replace ( / ' / g, ''' ) ;
194-
195- interface MediaFilenameParams {
196- src : string ;
197- index : number ;
198- blob : Blob ;
199- }
200-
201- /**
202- * Derives a stable, readable filename for media exported in the HTML ZIP.
203- *
204- * Rules:
205- * - Default base name is "media-{index+1}".
206- * - For non data: URLs, we reuse the last path segment when possible (e.g. 1-photo.png).
207- * - If the base name has no extension, we try to infer one from the blob MIME type.
208- */
209- export const deriveMediaFilename = ( {
210- src,
211- index,
212- blob,
213- } : MediaFilenameParams ) : string => {
214- // Default base name
215- let baseName = `media-${ index + 1 } ` ;
216-
217- // Try to reuse the last path segment for non data URLs.
218- if ( ! src . startsWith ( 'data:' ) ) {
219- try {
220- const url = new URL ( src , window . location . origin ) ;
221- const lastSegment = url . pathname . split ( '/' ) . pop ( ) ;
222- if ( lastSegment ) {
223- baseName = `${ index + 1 } -${ lastSegment } ` ;
224- }
225- } catch {
226- // Ignore invalid URLs, keep default baseName.
227- }
228- }
229-
230- let filename = baseName ;
231-
232- // Ensure the filename has an extension consistent with the blob MIME type.
233- const mimeType = blob . type ;
234- if ( mimeType && ! baseName . includes ( '.' ) ) {
235- const slashIndex = mimeType . indexOf ( '/' ) ;
236- const rawSubtype =
237- slashIndex !== - 1 && slashIndex < mimeType . length - 1
238- ? mimeType . slice ( slashIndex + 1 )
239- : '' ;
240-
241- let extension = '' ;
242- const subtype = rawSubtype . toLowerCase ( ) ;
243-
244- if ( subtype . includes ( 'svg' ) ) {
245- extension = 'svg' ;
246- } else if ( subtype . includes ( 'jpeg' ) || subtype . includes ( 'pjpeg' ) ) {
247- extension = 'jpg' ;
248- } else if ( subtype . includes ( 'png' ) ) {
249- extension = 'png' ;
250- } else if ( subtype . includes ( 'gif' ) ) {
251- extension = 'gif' ;
252- } else if ( subtype . includes ( 'webp' ) ) {
253- extension = 'webp' ;
254- } else if ( subtype . includes ( 'pdf' ) ) {
255- extension = 'pdf' ;
256- } else if ( subtype ) {
257- extension = subtype . split ( '+' ) [ 0 ] ;
258- }
259-
260- if ( extension ) {
261- filename = `${ baseName } .${ extension } ` ;
262- }
263- }
264-
265- return filename ;
266- } ;
267-
268- /**
269- * Generates a complete HTML document structure for export.
270- *
271- * @param documentTitle - The title of the document (will be escaped)
272- * @param editorHtmlWithLocalMedia - The HTML content from the editor
273- * @param lang - The language code for the document (e.g., 'fr', 'en')
274- * @returns A complete HTML5 document string
275- */
276- export const generateHtmlDocument = (
277- documentTitle : string ,
278- editorHtmlWithLocalMedia : string ,
279- lang : string ,
280- ) : string => {
281- return `<!DOCTYPE html>
282- <html lang="${ lang } ">
283- <head>
284- <meta charset="utf-8" />
285- <title>${ escapeHtml ( documentTitle ) } </title>
286- <link rel="stylesheet" href="styles.css">
287- </head>
288- <body>
289- <main role="main">
290- ${ editorHtmlWithLocalMedia }
291- </main>
292- </body>
293- </html>` ;
294- } ;
295-
296- /**
297- * Enrich the HTML produced by the editor with semantic tags and basic a11y defaults.
298- *
299- * Notes:
300- * - We work directly on the parsed Document so modifications are reflected before we zip files.
301- * - We keep the editor inner structure but upgrade the key block types to native elements.
302- */
303- export const improveHtmlAccessibility = (
304- parsedDocument : Document ,
305- documentTitle : string ,
306- ) => {
307- const body = parsedDocument . body ;
308- if ( ! body ) {
309- return ;
310- }
311-
312- // 1) Headings: convert heading blocks to h1-h6 based on data-level
313- const headingBlocks = Array . from (
314- body . querySelectorAll < HTMLElement > ( "[data-content-type='heading']" ) ,
315- ) ;
316-
317- headingBlocks . forEach ( ( block ) => {
318- const rawLevel = Number ( block . getAttribute ( 'data-level' ) ) || 1 ;
319- const level = Math . min ( Math . max ( rawLevel , 1 ) , 6 ) ;
320- const heading = parsedDocument . createElement ( `h${ level } ` ) ;
321- heading . innerHTML = block . innerHTML ;
322- block . replaceWith ( heading ) ;
323- } ) ;
324-
325- // 2) Lists: group consecutive list items into UL/OL with LI children
326- const listItemSelector =
327- "[data-content-type='bulletListItem'], [data-content-type='numberedListItem']" ;
328- const listItems = Array . from (
329- body . querySelectorAll < HTMLElement > ( listItemSelector ) ,
330- ) ;
331-
332- listItems . forEach ( ( item ) => {
333- const parent = item . parentElement ;
334- if ( ! parent ) {
335- return ;
336- }
337-
338- const isBullet =
339- item . getAttribute ( 'data-content-type' ) === 'bulletListItem' ;
340- const listTag = isBullet ? 'ul' : 'ol' ;
341-
342- // If the previous sibling is already the right list, reuse it; otherwise create a new one.
343- let previousSibling = item . previousElementSibling ;
344- let listContainer : HTMLElement | null = null ;
345-
346- if ( previousSibling ?. tagName . toLowerCase ( ) === listTag ) {
347- listContainer = previousSibling as HTMLElement ;
348- } else {
349- listContainer = parsedDocument . createElement ( listTag ) ;
350- parent . insertBefore ( listContainer , item ) ;
351- }
352-
353- const li = parsedDocument . createElement ( 'li' ) ;
354- li . innerHTML = item . innerHTML ;
355- listContainer . appendChild ( li ) ;
356- parent . removeChild ( item ) ;
357- } ) ;
358-
359- // 3) Quotes -> <blockquote>
360- const quoteBlocks = Array . from (
361- body . querySelectorAll < HTMLElement > ( "[data-content-type='quote']" ) ,
362- ) ;
363- quoteBlocks . forEach ( ( block ) => {
364- const quote = parsedDocument . createElement ( 'blockquote' ) ;
365- quote . innerHTML = block . innerHTML ;
366- block . replaceWith ( quote ) ;
367- } ) ;
368-
369- // 4) Callouts -> <aside role="note">
370- const calloutBlocks = Array . from (
371- body . querySelectorAll < HTMLElement > ( "[data-content-type='callout']" ) ,
372- ) ;
373- calloutBlocks . forEach ( ( block ) => {
374- const aside = parsedDocument . createElement ( 'aside' ) ;
375- aside . setAttribute ( 'role' , 'note' ) ;
376- aside . innerHTML = block . innerHTML ;
377- block . replaceWith ( aside ) ;
378- } ) ;
379-
380- // 5) Checklists -> list + checkbox semantics
381- const checkListItems = Array . from (
382- body . querySelectorAll < HTMLElement > ( "[data-content-type='checkListItem']" ) ,
383- ) ;
384- checkListItems . forEach ( ( item ) => {
385- const parent = item . parentElement ;
386- if ( ! parent ) {
387- return ;
388- }
389-
390- let previousSibling = item . previousElementSibling ;
391- let listContainer : HTMLElement | null = null ;
392-
393- if ( previousSibling ?. tagName . toLowerCase ( ) === 'ul' ) {
394- listContainer = previousSibling as HTMLElement ;
395- } else {
396- listContainer = parsedDocument . createElement ( 'ul' ) ;
397- listContainer . setAttribute ( 'role' , 'list' ) ;
398- parent . insertBefore ( listContainer , item ) ;
399- }
400-
401- const li = parsedDocument . createElement ( 'li' ) ;
402- li . innerHTML = item . innerHTML ;
403-
404- // Ensure checkbox has an accessible state; fall back to aria-checked if missing.
405- const checkbox = li . querySelector < HTMLInputElement > (
406- "input[type='checkbox']" ,
407- ) ;
408- if ( checkbox && ! checkbox . hasAttribute ( 'aria-checked' ) ) {
409- checkbox . setAttribute (
410- 'aria-checked' ,
411- checkbox . checked ? 'true' : 'false' ,
412- ) ;
413- }
414-
415- listContainer . appendChild ( li ) ;
416- parent . removeChild ( item ) ;
417- } ) ;
418-
419- // 6) Code blocks -> <pre><code>
420- const codeBlocks = Array . from (
421- body . querySelectorAll < HTMLElement > ( "[data-content-type='codeBlock']" ) ,
422- ) ;
423- codeBlocks . forEach ( ( block ) => {
424- const pre = parsedDocument . createElement ( 'pre' ) ;
425- const code = parsedDocument . createElement ( 'code' ) ;
426- code . innerHTML = block . innerHTML ;
427- pre . appendChild ( code ) ;
428- block . replaceWith ( pre ) ;
429- } ) ;
430-
431- // 7) Ensure images have alt text (empty when not provided)
432- body . querySelectorAll < HTMLImageElement > ( 'img' ) . forEach ( ( img ) => {
433- if ( ! img . hasAttribute ( 'alt' ) ) {
434- img . setAttribute ( 'alt' , '' ) ;
435- }
436- } ) ;
437-
438- // 8) Wrap content in an article with a title landmark if none exists
439- const existingH1 = body . querySelector ( 'h1' ) ;
440- if ( ! existingH1 ) {
441- const titleHeading = parsedDocument . createElement ( 'h1' ) ;
442- titleHeading . id = 'doc-title' ;
443- titleHeading . textContent = documentTitle ;
444- body . insertBefore ( titleHeading , body . firstChild ) ;
445- }
446-
447- // If there is no article, group the body content inside one for better semantics.
448- const hasArticle = body . querySelector ( 'article' ) ;
449- if ( ! hasArticle ) {
450- const article = parsedDocument . createElement ( 'article' ) ;
451- article . setAttribute ( 'role' , 'document' ) ;
452- article . setAttribute ( 'aria-labelledby' , 'doc-title' ) ;
453- while ( body . firstChild ) {
454- article . appendChild ( body . firstChild ) ;
455- }
456- body . appendChild ( article ) ;
457- }
458- } ;
459-
460- export const addMediaFilesToZip = async (
461- parsedDocument : Document ,
462- zip : JSZip ,
463- mediaUrl : string ,
464- ) => {
465- const mediaFiles : { filename : string ; blob : Blob } [ ] = [ ] ;
466- const mediaElements = Array . from (
467- parsedDocument . querySelectorAll <
468- HTMLImageElement | HTMLVideoElement | HTMLAudioElement | HTMLSourceElement
469- > ( 'img, video, audio, source' ) ,
470- ) ;
471-
472- await Promise . all (
473- mediaElements . map ( async ( element , index ) => {
474- const src = element . getAttribute ( 'src' ) ;
475-
476- if ( ! src ) {
477- return ;
478- }
479-
480- // data: URLs are already embedded and work offline; no need to create separate files.
481- if ( src . startsWith ( 'data:' ) ) {
482- return ;
483- }
484-
485- // Only download same-origin resources (internal media like /media/...).
486- // External URLs keep their original src and are not included in the ZIP
487- let url : URL | null = null ;
488- try {
489- url = new URL ( src , mediaUrl ) ;
490- } catch {
491- url = null ;
492- }
493-
494- if ( ! url || url . origin !== mediaUrl ) {
495- return ;
496- }
497-
498- const fetched = await exportResolveFileUrl ( url . href ) ;
499-
500- if ( ! ( fetched instanceof Blob ) ) {
501- return ;
502- }
503-
504- const filename = deriveMediaFilename ( {
505- src : url . href ,
506- index,
507- blob : fetched ,
508- } ) ;
509- element . setAttribute ( 'src' , filename ) ;
510- mediaFiles . push ( { filename, blob : fetched } ) ;
511- } ) ,
512- ) ;
513-
514- mediaFiles . forEach ( ( { filename, blob } ) => {
515- zip . file ( filename , blob ) ;
516- } ) ;
517- } ;
0 commit comments