-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontentScript.js
More file actions
148 lines (131 loc) · 5.36 KB
/
contentScript.js
File metadata and controls
148 lines (131 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// contentScript.js
// Prevent duplicate message listeners by checking if we've already set up listeners
if (window.summariseExtensionContentScriptLoaded) {
console.log('Content script already loaded, skipping duplicate setup');
// Exit early to prevent duplicate message listeners
} else {
window.summariseExtensionContentScriptLoaded = true;
console.log('Setting up content script message listeners');
// Function to extract content and metadata for copying
function extractContentAndMetadata(sendResponseCallback) {
try {
// Clone the document to avoid modifying the original page
const documentClone = document.cloneNode(true);
const location = window.location;
// Use Readability to parse the page
const reader = new Readability(documentClone);
const article = reader.parse();
if (article) {
const content = article.textContent.trim();
const title = article.title || document.title;
let publishedDate = '';
// Try to extract the published date from meta tags
const metaTags = document.getElementsByTagName('meta');
for (let meta of metaTags) {
if (
meta.getAttribute('property') === 'article:published_time' ||
meta.getAttribute('name') === 'pubdate' ||
meta.getAttribute('name') === 'publishdate' ||
meta.getAttribute('name') === 'date' ||
meta.getAttribute('name') === 'DC.date.issued'
) {
publishedDate = meta.getAttribute('content');
break;
}
}
// If published date not found, try to extract from visible elements
if (!publishedDate) {
const dateElements = document.querySelectorAll('time, .date, .published, .entry-date');
if (dateElements.length > 0) {
publishedDate = dateElements[0].innerText.trim();
}
}
// Send back all data
sendResponseCallback({
action: "contentData",
content: content,
title: title,
publishedDate: publishedDate,
url: window.location.href
});
} else {
console.error('Readability failed to parse the page.');
sendResponseCallback({ action: "contentError", error: "Failed to extract article content" });
}
} catch (error) {
console.error('Error during content extraction:', error);
sendResponseCallback({ action: "contentError", error: error.message });
}
}
// Function to extract content for summarization
function extractContentForSummarization() {
try {
// Clone the document to avoid modifying the original page
const documentClone = document.cloneNode(true);
const location = window.location;
// Use Readability to parse the page
const reader = new Readability(documentClone);
const article = reader.parse();
if (article) {
const text = article.textContent.trim();
const title = article.title || document.title;
let publishedDate = '';
// Log the extraction length
chrome.runtime.sendMessage({ action: "log", message: "Content extracted: " + text.length + " characters." });
// Try to extract the published date from meta tags
const metaTags = document.getElementsByTagName('meta');
for (let meta of metaTags) {
if (
meta.getAttribute('property') === 'article:published_time' ||
meta.getAttribute('name') === 'pubdate' ||
meta.getAttribute('name') === 'publishdate' ||
meta.getAttribute('name') === 'date' ||
meta.getAttribute('name') === 'DC.date.issued'
) {
publishedDate = meta.getAttribute('content');
break;
}
}
// If published date not found, try to extract from visible elements
if (!publishedDate) {
const dateElements = document.querySelectorAll('time, .date, .published, .entry-date');
if (dateElements.length > 0) {
publishedDate = dateElements[0].innerText.trim();
}
}
chrome.runtime.sendMessage({
action: "summariseText",
text: text,
pageUrl: window.location.href,
contentType: "text",
pageTitle: title,
publishedDate: publishedDate
});
} else {
console.error('Readability failed to parse the page.');
alert('Failed to extract article content. Please try a different page.');
}
} catch (error) {
console.error('Error during content extraction:', error);
alert('An error occurred while extracting content.');
}
}
// Message listener for content extraction requests
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.action === "getContentAndMetadata") {
console.log("Received request to get content and metadata for copying...");
extractContentAndMetadata(sendResponse);
// Indicate that sendResponse will be called asynchronously
return true;
} else if (request.action === "extractContentForSummarization") {
console.log("Received request to extract content for summarization...");
extractContentForSummarization();
return false; // No async response needed
}
});
// Auto-run summarization when script is injected for that purpose
// Check if we should auto-run (this will be set by the popup when injecting for summarization)
if (window.location.search.includes('autoRun=true') || document.currentScript?.dataset?.autoRun === 'true') {
extractContentForSummarization();
}
} // End of duplicate prevention block