1- import { exec } from 'child_process' ;
2- import fs from 'fs' ;
3- import path from 'path' ;
4- import { promisify } from 'util' ;
5- import { v4 as uuidv4 } from 'uuid' ;
6- import os from 'os' ;
7-
8- const execPromise = promisify ( exec ) ;
1+ import * as XLSX from 'xlsx' ;
2+ import { JSDOM } from 'jsdom' ;
93
104/**
115 * Service for generating Excel spreadsheets from HTML content
126 */
137export const excelService = {
148 /**
15- * Generate an Excel spreadsheet from HTML content using pandoc
9+ * Generate an Excel spreadsheet from HTML content containing tables
1610 * @param {string } html - The HTML content containing tables
1711 * @param {string } sheetName - The name for the worksheet (default: 'Sheet1')
1812 * @returns {Promise<Buffer> } - A buffer containing the Excel file data
19- * @throws {Error } - If pandoc conversion fails
13+ * @throws {Error } - If no tables are found in the HTML content
2014 */
2115 async generateExcel ( html , sheetName = 'Sheet1' ) {
22- try {
23- // Create temporary files for input and output
24- const tempDir = os . tmpdir ( ) ;
25- const inputId = uuidv4 ( ) ;
26- const outputId = uuidv4 ( ) ;
27- const inputPath = path . join ( tempDir , `${ inputId } .html` ) ;
28- const outputPath = path . join ( tempDir , `${ outputId } .xlsx` ) ;
29-
30- // Write HTML to temporary file
31- await fs . promises . writeFile ( inputPath , html , 'utf8' ) ;
32-
33- // Use pandoc to convert HTML to XLSX
34- const command = `pandoc -f html -t xlsx "${ inputPath } " -o "${ outputPath } "` ;
35- console . log ( `Executing pandoc command: ${ command } ` ) ;
36-
37- await execPromise ( command ) ;
38-
39- // Read the generated XLSX file
40- const excelBuffer = await fs . promises . readFile ( outputPath ) ;
41-
42- // Clean up temporary files
43- try {
44- await fs . promises . unlink ( inputPath ) ;
45- await fs . promises . unlink ( outputPath ) ;
46- } catch ( cleanupError ) {
47- console . warn ( 'Error cleaning up temporary files:' , cleanupError ) ;
48- }
49-
50- return excelBuffer ;
51- } catch ( error ) {
52- console . error ( 'Error generating Excel document with pandoc:' , error ) ;
53-
54- // If pandoc fails, provide a detailed error message
55- if ( error . stderr ) {
56- console . error ( 'Pandoc error output:' , error . stderr ) ;
57- }
58-
59- throw new Error ( `Failed to generate Excel document: ${ error . message } ` ) ;
16+ // Create a DOM from the HTML
17+ const dom = new JSDOM ( html ) ;
18+ const document = dom . window . document ;
19+
20+ // Find all tables in the HTML
21+ const tables = document . querySelectorAll ( 'table' ) ;
22+
23+ if ( tables . length === 0 ) {
24+ throw new Error ( 'No tables found in the HTML content' ) ;
6025 }
26+
27+ // Create a new workbook
28+ const workbook = XLSX . utils . book_new ( ) ;
29+
30+ // Process each table and add it as a sheet
31+ tables . forEach ( ( table , index ) => {
32+ // Convert table to worksheet
33+ const worksheet = XLSX . utils . table_to_sheet ( table ) ;
34+
35+ // Add the worksheet to the workbook
36+ const currentSheetName = tables . length === 1 ? sheetName : `${ sheetName } ${ index + 1 } ` ;
37+ XLSX . utils . book_append_sheet ( workbook , worksheet , currentSheetName ) ;
38+ } ) ;
39+
40+ // Write the workbook to a buffer
41+ const excelBuffer = XLSX . write ( workbook , { type : 'buffer' , bookType : 'xlsx' } ) ;
42+
43+ return excelBuffer ;
6144 }
6245} ;
0 commit comments