Skip to content

Commit b61cd9e

Browse files
committed
feat: Implement Download as Markdown
- Add download button to UI with Markdown icon - Implement DOM to Markdown conversion in utils.js - Add YAML frontmatter support - Sanitize filenames and prefix with date - Add unit tests for conversion and cleanup logic
1 parent 933e255 commit b61cd9e

6 files changed

Lines changed: 353 additions & 5 deletions

File tree

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# PR Draft: Feature - Download as Markdown
2+
3+
## Summary
4+
Introduces a new feature allowing users to download the Reader View content as a clean Markdown (`.md`) file. This includes content sanitization, metadata frontmatter, and a dedicated download button in the UI.
5+
6+
## Technical Changes
7+
- **`src/content/utils.js`**:
8+
- Implemented `domToMarkdown` for lightweight HTML-to-Markdown conversion (supports headers, lists, links, images, tables, code blocks).
9+
- Added `addFrontmatter` to prepend YAML metadata (title, url, date).
10+
- Enhanced `cleanupNodes` with Wikipedia-specific selectors (`.mw-indicators`, `#siteSub`, etc.).
11+
- Updated `createIconSvg` with a new "Markdown Download" icon (Standard Mark + Arrow).
12+
- **`src/content/main.js`**:
13+
- Added `triggerDownload` function to orchestrate conversion, filename sanitization, and download triggering.
14+
- Filename format: `YYYY-MM-DD_Article_Title.md`.
15+
- **`src/content/ui.js`**:
16+
- Added "Download" button to the bottom menu.
17+
- Added native tooltip `title="Download Markdown (.md)"`.
18+
19+
## Test Report
20+
- **Unit Tests**:
21+
- Verified `domToMarkdown` conversion for all supported elements.
22+
- Verified `addFrontmatter` YAML generation.
23+
- Verified filename sanitization and Wikipedia noise removal.
24+
- **Manual Verification**:
25+
- Tested on generic articles, Wikipedia, and Daum News.
26+
- Confirmed date prefix and frontmatter in downloaded files.
27+
28+
## Checklist
29+
- [x] Code follows "Tidy First" principles.
30+
- [x] Unit tests included and passing.
31+
- [x] No breaking changes.

src/background.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
* Listens for extension icon clicks and sends a message to the active tab.
44
*/
55

6-
// Listen for the extension icon click
76
// Listen for the extension icon click
87
chrome.action.onClicked.addListener(async (tab) => {
98
if (!tab.id) return;

src/content/main.js

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ function enableReleaf() {
9191
setupTooltip(closeBtn, "Close Reader");
9292
}
9393

94+
// Download button logic
95+
const downloadBtn = bottomMenu.querySelector('[data-role="download-btn"]');
96+
if (downloadBtn) {
97+
downloadBtn.onclick = () => triggerDownload(content);
98+
setupTooltip(downloadBtn, "Download as Markdown");
99+
}
100+
94101
document.body.appendChild(container);
95102
document.body.style.overflow = "hidden";
96103

@@ -110,13 +117,49 @@ function enableReleaf() {
110117
}
111118
});
112119

113-
// 4. Immersive Mode
120+
// 5. Immersive Mode
114121
resetIdleTimer();
115122
document.addEventListener('mousemove', handleUserActivity);
116123
document.addEventListener('keydown', handleUserActivity);
117124
window.addEventListener('resize', handleResize);
118125
}
119126

127+
/**
128+
* Trigger download of content as Markdown.
129+
* @param {HTMLElement} content
130+
*/
131+
function triggerDownload(content) {
132+
const mdBody = domToMarkdown(content);
133+
134+
// Use H1 or fallback.
135+
let titleRaw = (document.querySelector('h1') ? document.querySelector('h1').textContent : 'article').trim();
136+
137+
// Sanitize filename: remove controls and illegal chars
138+
let title = titleRaw.replace(/[<>:"/\\|?*\x00-\x1F]/g, '_').trim();
139+
if (!title) title = 'article';
140+
141+
const date = new Date().toISOString().split('T')[0];
142+
const filename = `${date}_${title}.md`;
143+
144+
// Construct Frontmatter
145+
const finalContent = addFrontmatter(mdBody, {
146+
title: titleRaw,
147+
url: window.location.href,
148+
date: date
149+
});
150+
151+
const blob = new Blob([finalContent], { type: 'text/markdown' });
152+
const blobUrl = URL.createObjectURL(blob);
153+
154+
const a = document.createElement('a');
155+
a.href = blobUrl;
156+
a.download = filename;
157+
document.body.appendChild(a);
158+
a.click();
159+
document.body.removeChild(a);
160+
URL.revokeObjectURL(blobUrl);
161+
}
162+
120163
// --- Logic Initialization ---
121164

122165
function initializeSettings(container, popup, updateUI) {
@@ -471,6 +514,8 @@ if (typeof module !== "undefined" && module.exports) {
471514
getVirtualScroll,
472515
handleResize,
473516
handleKeyNavigation,
474-
setupTapNavigation
517+
handleKeyNavigation,
518+
setupTapNavigation,
519+
triggerDownload
475520
};
476521
}

src/content/ui.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ function createBottomMenu(container, content) {
5454
// Page Counter
5555
const pageCounter = createPageCounter(content);
5656

57+
// Download Button
58+
const downloadBtn = document.createElement("button");
59+
downloadBtn.className = "releaf-btn";
60+
downloadBtn.dataset.role = "download-btn";
61+
downloadBtn.title = "Download Markdown (.md)";
62+
downloadBtn.innerHTML = createIconSvg('download');
63+
5764
// Close Button
5865
const closeBtn = document.createElement("button");
5966
closeBtn.className = "releaf-btn";
@@ -66,6 +73,7 @@ function createBottomMenu(container, content) {
6673

6774
bottomMenu.appendChild(settingsBtn);
6875
bottomMenu.appendChild(pageCounter);
76+
bottomMenu.appendChild(downloadBtn);
6977
bottomMenu.appendChild(closeBtn);
7078

7179
return bottomMenu;

src/content/utils.js

Lines changed: 151 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ function cleanupNodes(element) {
117117
// Daum/Naver specific noise
118118
'.layer_util', '.box_setting', '.util_view', '.wrap_util',
119119
'.box_layer', '.img_mask', '.btn_util',
120+
// Daum/Kakao Headers
121+
'#kakaoHead', '.gnb_comm', '.d_head', '#kakaoGnb',
120122
// Daum/Naver Footers & Related
121123
'.foot_view', '.box_recommend', '.txt_copyright', '.box_etc',
122124
'#foot_view', '.kakao_ad', '.art_copy',
@@ -141,7 +143,10 @@ function cleanupNodes(element) {
141143
'[class*="copyright"]', '[class*="footer"]', '[class*="related"]',
142144
'[id*="copyright"]', '[id*="footer"]', '[id*="related"]',
143145
// NYT / Ad noise
144-
'#top-wrapper', '#top-slug', 'div[class*="ad-"]'
146+
'#top-wrapper', '#top-slug', 'div[class*="ad-"]',
147+
// Wikipedia Cleanup
148+
'.mw-indicators', '#siteSub', '.mw-editsection', '.hatnote',
149+
'.shortdescription', '#catlinks', '.navbox', '.infobox'
145150
];
146151

147152
element.querySelectorAll(unwantedSelectors.join(', ')).forEach(el => el.remove());
@@ -439,7 +444,9 @@ function createIconSvg(name) {
439444
'margin-h': '<path d="M3 12h18"/><path d="m7 8-4 4 4 4"/><path d="m17 8 4 4-4 4"/>',
440445
view: '<rect width="18" height="18" x="3" y="3" rx="2" ry="2"/><line x1="9" x2="15" y1="3" y2="3"/><line x1="9" x2="15" y1="21" y2="21"/>',
441446
'page-1': '<rect x="5" y="4" width="14" height="16" rx="2" />',
442-
'page-2': '<rect x="4" y="4" width="16" height="16" rx="2" /><line x1="12" y1="4" x2="12" y2="20" />'
447+
'page-2': '<rect x="4" y="4" width="16" height="16" rx="2" /><line x1="12" y1="4" x2="12" y2="20" />',
448+
// Markdown Mark (M + Down Arrow)
449+
download: '<rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect><path d="M7 8v8"></path><path d="M17 8v8"></path><path d="M7 8l5 5l5-5"></path><line x1="12" y1="8" x2="12" y2="13"></line>'
443450
};
444451
return `<svg width="22" height="22" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">${icons[name] || ''}</svg>`;
445452
}
@@ -453,6 +460,148 @@ if (typeof module !== 'undefined' && module.exports) {
453460
cleanupNodes,
454461
sanitizeAndFixContent,
455462
extractContent,
463+
extractContent,
464+
domToMarkdown,
465+
addFrontmatter,
456466
createIconSvg
457467
};
458468
}
469+
/**
470+
* Converts a DOM element or document fragment to Markdown.
471+
* @param {Node} root
472+
* @returns {string}
473+
*/
474+
function domToMarkdown(root) {
475+
let output = '';
476+
477+
const process = (node) => {
478+
if (node.nodeType === Node.TEXT_NODE) {
479+
// Collapse whitespace unless inside a pre tag
480+
let text = node.textContent;
481+
if (node.parentElement && !node.parentElement.closest('pre')) {
482+
text = text.replace(/[\r\n\t]+/g, ' ').replace(/\s{2,}/g, ' ');
483+
}
484+
output += text;
485+
return;
486+
}
487+
488+
if (node.nodeType !== Node.ELEMENT_NODE) return;
489+
490+
const tag = node.tagName.toLowerCase();
491+
let prefix = '', suffix = '';
492+
493+
// Block elements
494+
if (tag.match(/^h[1-6]$/)) {
495+
const level = parseInt(tag[1]);
496+
prefix = '\n\n' + '#'.repeat(level) + ' '; // Ensure spacing before headers
497+
suffix = '\n\n';
498+
} else if (tag === 'p') {
499+
prefix = '\n\n';
500+
suffix = '\n\n';
501+
} else if (tag === 'ul' || tag === 'ol') {
502+
prefix = '\n\n';
503+
suffix = '\n\n';
504+
} else if (tag === 'li') {
505+
prefix = '\n- '; // Ensure list items start on new line
506+
suffix = '';
507+
} else if (tag === 'blockquote') {
508+
prefix = '\n> ';
509+
suffix = '\n\n';
510+
} else if (tag === 'pre') {
511+
prefix = '\n```\n';
512+
suffix = '\n```\n\n';
513+
} else if (tag === 'code') {
514+
if (node.parentElement && node.parentElement.tagName.toLowerCase() !== 'pre') {
515+
prefix = '`';
516+
suffix = '`';
517+
}
518+
}
519+
// Tables
520+
else if (tag === 'tr') {
521+
prefix = '\n| ';
522+
suffix = '';
523+
} else if (tag === 'td' || tag === 'th') {
524+
suffix = ' | ';
525+
}
526+
else if (tag === 'div') {
527+
prefix = '\n';
528+
suffix = '\n';
529+
}
530+
else if (tag === 'b' || tag === 'strong') {
531+
prefix = '**';
532+
suffix = '**';
533+
} else if (tag === 'i' || tag === 'em') {
534+
prefix = '*';
535+
suffix = '*';
536+
} else if (tag === 'a') {
537+
// Contextual ignore: If empty href, just render text
538+
if (!node.href || node.getAttribute('href').trim() === '') {
539+
// Just process children
540+
} else {
541+
output += '[';
542+
node.childNodes.forEach(child => process(child));
543+
output += `](${node.href})`;
544+
return;
545+
}
546+
} else if (tag === 'img') {
547+
const alt = node.alt || '';
548+
const src = node.src;
549+
if (src) {
550+
// No newlines if inside link or table cell
551+
const parentTag = node.parentElement ? node.parentElement.tagName.toLowerCase() : '';
552+
if (parentTag === 'a' || parentTag === 'td' || parentTag === 'th') {
553+
output += `![${alt}](${src})`;
554+
} else {
555+
output += `\n![${alt}](${src})\n`;
556+
}
557+
}
558+
return;
559+
} else if (tag === 'br') {
560+
output += ' \n';
561+
return;
562+
} else if (tag === 'hr') {
563+
output += '\n---\n';
564+
return;
565+
}
566+
567+
output += prefix;
568+
node.childNodes.forEach(child => process(child));
569+
output += suffix;
570+
};
571+
572+
// If root is a string, wrap it in a temp container
573+
if (typeof root === 'string') {
574+
const div = document.createElement('div');
575+
div.innerHTML = root;
576+
root = div;
577+
}
578+
579+
if (root.childNodes) {
580+
root.childNodes.forEach(child => process(child));
581+
} else {
582+
process(root);
583+
}
584+
585+
// Determine clean up: replace multiple newlines/spaces with max 2 newlines
586+
return output.replace(/(\n\s*){3,}/g, '\n\n').trim();
587+
}
588+
589+
/**
590+
* Prepend YAML frontmatter to markdown content.
591+
* @param {string} markdown
592+
* @param {Object} metadata { title, url, date }
593+
* @returns {string}
594+
*/
595+
function addFrontmatter(markdown, { title, url, date }) {
596+
const safeTitle = (title || 'Untitled').replace(/"/g, '\\"');
597+
const safeUrl = url || '';
598+
const safeDate = date || new Date().toISOString().split('T')[0];
599+
600+
return `---
601+
title: "${safeTitle}"
602+
url: "${safeUrl}"
603+
date: ${safeDate}
604+
---
605+
606+
${markdown}`;
607+
}

0 commit comments

Comments
 (0)