diff --git a/build/webchatgpt-3.2.4-chrome.zip b/build/webchatgpt-3.2.4-chrome.zip index 66a5817..27afcb4 100644 Binary files a/build/webchatgpt-3.2.4-chrome.zip and b/build/webchatgpt-3.2.4-chrome.zip differ diff --git a/build/webchatgpt-3.2.4-firefox.zip b/build/webchatgpt-3.2.4-firefox.zip index ea9565f..05ff54b 100644 Binary files a/build/webchatgpt-3.2.4-firefox.zip and b/build/webchatgpt-3.2.4-firefox.zip differ diff --git a/package-lock.json b/package-lock.json index e84ae52..2592382 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "webchatgpt", - "version": "2.2.0", + "version": "3.2.4", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "webchatgpt", - "version": "2.2.0", + "version": "3.2.4", "license": "MIT", "dependencies": { "@mozilla/readability": "^0.4.2", diff --git a/src/content-scripts/ddg_search.ts b/src/content-scripts/ddg_search.ts index 5412bab..f0d22fe 100644 --- a/src/content-scripts/ddg_search.ts +++ b/src/content-scripts/ddg_search.ts @@ -98,11 +98,14 @@ export async function webSearch(search: SearchRequest, numResults: number): Prom if (response.url === `${BASE_URL}/lite/`) { results = htmlToSearchResults(response.html, numResults) } else { - const result = await Browser.runtime.sendMessage({ + let result = await Browser.runtime.sendMessage({ type: "get_webpage_text", url: response.url, html: response.html }) + if (result.title && result.title === "Google Scholar") { + result = formatGoogleScholarResponse(result); + } return [{ title: result.title, @@ -113,3 +116,36 @@ export async function webSearch(search: SearchRequest, numResults: number): Prom return results } + +function formatGoogleScholarResponse(result: any): any { + result.body = cleanResponseText(result.body); + return result; +} + +function cleanResponseText(text: string): string { + const lines = text.split('\n'); + const cleanedLines: string[] = []; + + for (const line of lines) { + const cleanedLine = line + .replace(/\[.*?\]/g, '') // Remove tags like [PDF], [HTML], etc. + .replace(/https?:\/\/[^\s]+/g, ' ') // Remove URLs + .replace(/Cite\s+/g, ' ') // Remove Cite button links + .replace(/Cited by \d+?/g, ' ') // Remove citation counts + .replace(/Related articles/g, ' ') // Remove 'Related articles' + .replace(/All \d+? versions/g, ' ') // Remove version counts + .replace(/View as HTML/g, ' ') // Remove 'View as HTML' + .replace(/Fulltext via \w+/g, ' ') // Remove 'Fulltext via X' + .replace(/Cached/g, '') // Remove 'Cached' + .replace(/...Save\s+/g, ' ') // Remove Save button artifact + .replace(/\S+\.(com|org|net|uk)/g, ' ') // Remove right-joined url artifacts + .replace(/arxiv:\S+/, ' ') // Remove arxiv code + .replace(/\.\.\./g, '.') // Trim ellipsis + .replace(/\s{2,}/g, ' ') // Trim inner extra spaces + .trim(); + if (cleanedLine) { + cleanedLines.push(cleanedLine); + } + } + return cleanedLines.join('\n'); +}