Skip to content

Commit 682aaa3

Browse files
committed
Replace pandoc with xlsx library for Excel generation from HTML tables
1 parent 9a4e89b commit 682aaa3

File tree

2 files changed

+33
-49
lines changed

2 files changed

+33
-49
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
"qrcode": "^1.5.4",
4444
"stripe": "^14.22.0",
4545
"uuid": "^11.1.0",
46-
"ws": "^8.18.1"
46+
"ws": "^8.18.1",
47+
"xlsx": "^0.18.5"
4748
},
4849
"devDependencies": {
4950
"nodemon": "^3.1.0"

src/services/excel-service.js

Lines changed: 31 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,45 @@
1-
import { exec } from 'child_process';
2-
import fs from 'fs';
3-
import path from 'path';
4-
import { promisify } from 'util';
5-
import { v4 as uuidv4 } from 'uuid';
6-
import os from 'os';
7-
8-
const execPromise = promisify(exec);
1+
import * as XLSX from 'xlsx';
2+
import { JSDOM } from 'jsdom';
93

104
/**
115
* Service for generating Excel spreadsheets from HTML content
126
*/
137
export const excelService = {
148
/**
15-
* Generate an Excel spreadsheet from HTML content using pandoc
9+
* Generate an Excel spreadsheet from HTML content containing tables
1610
* @param {string} html - The HTML content containing tables
1711
* @param {string} sheetName - The name for the worksheet (default: 'Sheet1')
1812
* @returns {Promise<Buffer>} - A buffer containing the Excel file data
19-
* @throws {Error} - If pandoc conversion fails
13+
* @throws {Error} - If no tables are found in the HTML content
2014
*/
2115
async generateExcel(html, sheetName = 'Sheet1') {
22-
try {
23-
// Create temporary files for input and output
24-
const tempDir = os.tmpdir();
25-
const inputId = uuidv4();
26-
const outputId = uuidv4();
27-
const inputPath = path.join(tempDir, `${inputId}.html`);
28-
const outputPath = path.join(tempDir, `${outputId}.xlsx`);
29-
30-
// Write HTML to temporary file
31-
await fs.promises.writeFile(inputPath, html, 'utf8');
32-
33-
// Use pandoc to convert HTML to XLSX
34-
const command = `pandoc -f html -t xlsx "${inputPath}" -o "${outputPath}"`;
35-
console.log(`Executing pandoc command: ${command}`);
36-
37-
await execPromise(command);
38-
39-
// Read the generated XLSX file
40-
const excelBuffer = await fs.promises.readFile(outputPath);
41-
42-
// Clean up temporary files
43-
try {
44-
await fs.promises.unlink(inputPath);
45-
await fs.promises.unlink(outputPath);
46-
} catch (cleanupError) {
47-
console.warn('Error cleaning up temporary files:', cleanupError);
48-
}
49-
50-
return excelBuffer;
51-
} catch (error) {
52-
console.error('Error generating Excel document with pandoc:', error);
53-
54-
// If pandoc fails, provide a detailed error message
55-
if (error.stderr) {
56-
console.error('Pandoc error output:', error.stderr);
57-
}
58-
59-
throw new Error(`Failed to generate Excel document: ${error.message}`);
16+
// Create a DOM from the HTML
17+
const dom = new JSDOM(html);
18+
const document = dom.window.document;
19+
20+
// Find all tables in the HTML
21+
const tables = document.querySelectorAll('table');
22+
23+
if (tables.length === 0) {
24+
throw new Error('No tables found in the HTML content');
6025
}
26+
27+
// Create a new workbook
28+
const workbook = XLSX.utils.book_new();
29+
30+
// Process each table and add it as a sheet
31+
tables.forEach((table, index) => {
32+
// Convert table to worksheet
33+
const worksheet = XLSX.utils.table_to_sheet(table);
34+
35+
// Add the worksheet to the workbook
36+
const currentSheetName = tables.length === 1 ? sheetName : `${sheetName}${index + 1}`;
37+
XLSX.utils.book_append_sheet(workbook, worksheet, currentSheetName);
38+
});
39+
40+
// Write the workbook to a buffer
41+
const excelBuffer = XLSX.write(workbook, { type: 'buffer', bookType: 'xlsx' });
42+
43+
return excelBuffer;
6144
}
6245
};

0 commit comments

Comments
 (0)