From c2e0514f897c5badc99b3012a5c694a7f6595dca Mon Sep 17 00:00:00 2001 From: Halil Can Memoglu Date: Sun, 9 Apr 2023 00:01:09 -0400 Subject: [PATCH 1/4] add: initial files --- .gitignore | 3 ++- build/webchatgpt-3.2.4-chrome.zip | Bin 417893 -> 417893 bytes build/webchatgpt-3.2.4-firefox.zip | Bin 417930 -> 417930 bytes package-lock.json | 4 ++-- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 00f4c9f..21197aa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ node_modules /build/* !build/webchatgpt*.zip -/*.log \ No newline at end of file +/*.log +*.zip diff --git a/build/webchatgpt-3.2.4-chrome.zip b/build/webchatgpt-3.2.4-chrome.zip index 66a5817f1bea978b8d2cbe3e36e00abe3e32efb7..27afcb436f1aa7f2620cbe0fe769304127425320 100644 GIT binary patch delta 1364 zcmZuwUr19?7(ct$`{hJyqCvY$o%CQtZS>;E$Owe8EnChu+jQ5lt~%QvbHfNh6dWO8 z6qKh7@})-+)NBPMKJ{j%QUoTYI(QV5aMIj1}C-Sk}UcJBB6zTfw|zw_PoS>yVw zvA}8PbBa+exgL}brd~ba#zcJ{qASNdh+FI+-m56!$-du~wCSd8Tw;Je-J0Cu$jzrF zw{qUh^R)K*OYZ$)noHHLaeFwnbA9EG8ui=!czl4HXNkTfcf!cZdgg@NJd16v2(1yi zqis$5qSg#;m;g`jP7<|{ieJm=D?+jPNdy5tB$7rLPYzmULy_qw10H(iEg8c(9O#y%F1b4-->`H>+njbCcZ^qOXWkE{vS|?^DI5z1!gAbt zRVLIF^nW1eNm~gU_gTaa50o+u)+;379>ohDFcx;iwRn2gRgGZc%VGjQs-H7_;~eKlFl`ImP@vSq-YcGW}#!f2m)mNEI(v1NM2LBuG{4 zZwuZ(twSilAyk~9V&fd_aH2gC31@M-JyV0z9gMOuD(yz9(F%f!61y-@hkfdOk+6_r5EpCBJ{{WC+e6#=n delta 1280 zcmZvbTSyd97{|}dIA1BOt)}`*6D**~|2vX^J-#8HEFtO} zN_ss*14k>S#k&G~+d2nroLa#TUWk-pJZ%&LPC4_jbFl_lI* z3Ody$VO$EYm~Flqqej1ZNp;C%beat^@e3LB zIJFAQxb`A+sX(T_+MbY>(DN~eH{$^#j}W%nL1JEwb|T~<+-rw}Ec_D(7=_23Jbvqd z91}SW+uT6V^lLvt;R%E!Hg5Ehjh=Tjcky_y6U;K%XqzXqZ@rb+!({2>u<=Jc3i!P( zemU%KY4EomZVJ|0EgXMH*q%IF_qz4oU@k)aWTnMz1$NGurvH!14Wu_cPVm< zOOdTIp})8wi_se{DAs7XR-xN!V}pEC<*hi`>sIJ?cPzcE%A6-QrO%_J?DA+SkFz3@ WWeq&Vl!&Y_tlFQl;+kFu75@M`RD9t8 diff --git a/build/webchatgpt-3.2.4-firefox.zip b/build/webchatgpt-3.2.4-firefox.zip index ea9565f72d6ec8a31aaa59ed35c88a6cec01d06d..05ff54beb9e9076257af7fd5d6dbf684f625b5fc 100644 GIT binary patch delta 1168 zcmZuwZ%9*76yM(Sot!ipXvE&8E~sXq^HU8IiXsqO<}};eKldi9$jD~8z%NCDB`Sdh zbt0h&5rLqRXe*G)FGU~3G(-j&7=7tOK@b#$(7k8x>0G{X*?qrr&hPg-_ny~3W9^@@ zn!O$|ufV{cJVzg{G=O~DX%^YO+kL}roN^0^9`SBTUuvvJn2`Anlh1`Yp3pnGDzGJ{ zMpyG*&zXu0`RvD2FTW5L%Xmhr_Onnf(5~x?P;NDC6vDw-iKJ4oq)=rgnc-P!K%@%$ zCn?&t&Whs;(lY6@6kBTKgA^~I3fH{UWz1zxdcaJ{;~sd!>8LZyxsFQS@0wNKi;UAJ z;3dzwcN%2GI1%ImvLm@8JW#So**2HEUA5X&Anhrjx*W`(Rz8=1=$CkX=F=GQP z>oF(kQpxT{c&b|}Y)`018*@a-o#PZt>BX|8<4R5rG1;47MPu5h_m$RwVoQ^fQ^Rj$ZFaKj_N|x}CkEL3yk*PE2Zys>I6onwTnLpY7yR)VnSn8U)K@SyZreH7S zgC~%3DI(~>P}D7o$R2tVGcge=FnZ~sr+gEmd+*+He`<%_nfcB+-}jw!&yFuC<4cOU z#UW+o3iMOaIEq`Xpx&=AOXPXTe#dTFu#3?VX{UZDJ~JZDsT7ZQ&&#z6#qfm#RJ=JY zF0hP0VR`+CR8BHJKaeX$mMc%ml}b*&kWDU9J601D$ybWk#IoM4Eqm2Crr0umdY>hb_)@%;;;A14Wj0 zS4IutEyC^~Sg^|p^NhUH2O8G6ppd<_yP%HoOpu3CGBxMgWPj;)fglXd3WCaZi~f`d zi$l(75wCh6(bq>8XyU|6{N>_6zzv$z{Yu0eZa73Y2_I7A#CH*2yFtB}$!$&JzVbs9 z!UNHsaDSwl-)&uyNQ=WHR!Ms+==`m$-yU8Og~ihSf|5oM+hyhou*nVCDf>g)E$&&C z1-&=i)!7@0RI^P@($bBb^6)7<@{-|fdmtZ|y^x!-vkD1Fk3Y$1_d+=vSdb#aYck&Q zLJ^zgrU$gLyl?V9ja=q^B>yt(5O%!e5U9NBgL>m^>=V-${id&+>r{88t~IJcwYbtbBGD2S+5I?VTlh8;OPJyWimwlI1_#zNEOc0 axq?mS3e4s`o!gw#c{YO&202_Cgx0_LsBfbH diff --git a/package-lock.json b/package-lock.json index e84ae52..2592382 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "webchatgpt", - "version": "2.2.0", + "version": "3.2.4", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "webchatgpt", - "version": "2.2.0", + "version": "3.2.4", "license": "MIT", "dependencies": { "@mozilla/readability": "^0.4.2", From e72a4b74082f32b1cdf04d4085982ad4e3da92df Mon Sep 17 00:00:00 2001 From: Halil Can Memoglu Date: Sun, 9 Apr 2023 00:33:42 -0400 Subject: [PATCH 2/4] add: filter added, finetuning. --- src/content-scripts/ddg_search.ts | 45 ++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/content-scripts/ddg_search.ts b/src/content-scripts/ddg_search.ts index 5412bab..4f388e9 100644 --- a/src/content-scripts/ddg_search.ts +++ b/src/content-scripts/ddg_search.ts @@ -63,6 +63,7 @@ function htmlToSearchResults(html: string, numResults: number): SearchResult[] { // Extract zero-click info, if present const zeroClickLink = $(`table:nth-of-type(${numTables-1}) tr td a[rel="nofollow"]`).first() if (zeroClickLink.length > 0) { + console.log("zeroClick: " + zeroClickLink); results.push({ title: zeroClickLink.text(), body: $('table:nth-of-type(2) tr:nth-of-type(2)').text().trim(), @@ -77,6 +78,8 @@ function htmlToSearchResults(html: string, numResults: number): SearchResult[] { webLinks.each((i, element) => { const link = $(element) const snippet = $(webSnippets[i]).text().trim() + console.log(link); + console.log(snippet); results.push({ title: link.text(), @@ -85,6 +88,8 @@ function htmlToSearchResults(html: string, numResults: number): SearchResult[] { }) }) + console.log('ddg results: '); + console.log(results); return results } @@ -98,11 +103,18 @@ export async function webSearch(search: SearchRequest, numResults: number): Prom if (response.url === `${BASE_URL}/lite/`) { results = htmlToSearchResults(response.html, numResults) } else { - const result = await Browser.runtime.sendMessage({ + let result = await Browser.runtime.sendMessage({ type: "get_webpage_text", url: response.url, html: response.html }) + console.log('non-ddg response: '); + console.log(result); + if (result.title && result.title === "Google Scholar") { + result = formatGoogleScholarResponse(result); + console.log('cleaned gsc response: '); + console.log(result); + } return [{ title: result.title, @@ -113,3 +125,34 @@ export async function webSearch(search: SearchRequest, numResults: number): Prom return results } + +function formatGoogleScholarResponse(result: any): any { + result.body = cleanResponseText(result.body); + return result; +} + +function cleanResponseText(text: string): string { + const lines = text.split('\n'); + const cleanedLines: string[] = []; + + for (const line of lines) { + const cleanedLine = line + .replace(/\[.*?\]/g, '') // Remove tags like [PDF], [HTML], etc. + .replace(/https?:\/\/[^\s]+/g, ' ') // Remove URLs + .replace(/Cite\s+/g, ' ') // Remove Cite button links + .replace(/Cited by \d+?/g, ' ') // Remove citation counts + .replace(/Related articles/g, ' ') // Remove 'Related articles' + .replace(/All \d+? versions/g, ' ') // Remove version counts + .replace(/View as HTML/g, ' ') // Remove 'View as HTML' + .replace(/Fulltext via \w+/g, ' ') // Remove 'Fulltext via X' + .replace(/Cached/g, '') // Remove 'Cached' + .replace(/...Save\s+/g, ' ') // Remove Save button artifact + .replace(/\S+\.(com|org|net|uk)/g, ' ') // Remove right-joined url artifacts + .replace(/\s{2,}/g, ' ') // Trim inner extra spaces + .trim(); + if (cleanedLine) { + cleanedLines.push(cleanedLine); + } + } + return cleanedLines.join('\n'); +} From 5296e3a06415c8ba7b9e152001e551f9106e0cb0 Mon Sep 17 00:00:00 2001 From: Halil Can Memoglu Date: Sun, 9 Apr 2023 00:40:55 -0400 Subject: [PATCH 3/4] gsc artifact filter satisfactory clean: debug console.log()s. --- src/content-scripts/ddg_search.ts | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/content-scripts/ddg_search.ts b/src/content-scripts/ddg_search.ts index 4f388e9..f0d22fe 100644 --- a/src/content-scripts/ddg_search.ts +++ b/src/content-scripts/ddg_search.ts @@ -63,7 +63,6 @@ function htmlToSearchResults(html: string, numResults: number): SearchResult[] { // Extract zero-click info, if present const zeroClickLink = $(`table:nth-of-type(${numTables-1}) tr td a[rel="nofollow"]`).first() if (zeroClickLink.length > 0) { - console.log("zeroClick: " + zeroClickLink); results.push({ title: zeroClickLink.text(), body: $('table:nth-of-type(2) tr:nth-of-type(2)').text().trim(), @@ -78,8 +77,6 @@ function htmlToSearchResults(html: string, numResults: number): SearchResult[] { webLinks.each((i, element) => { const link = $(element) const snippet = $(webSnippets[i]).text().trim() - console.log(link); - console.log(snippet); results.push({ title: link.text(), @@ -88,8 +85,6 @@ function htmlToSearchResults(html: string, numResults: number): SearchResult[] { }) }) - console.log('ddg results: '); - console.log(results); return results } @@ -108,12 +103,8 @@ export async function webSearch(search: SearchRequest, numResults: number): Prom url: response.url, html: response.html }) - console.log('non-ddg response: '); - console.log(result); if (result.title && result.title === "Google Scholar") { result = formatGoogleScholarResponse(result); - console.log('cleaned gsc response: '); - console.log(result); } return [{ @@ -148,6 +139,8 @@ function cleanResponseText(text: string): string { .replace(/Cached/g, '') // Remove 'Cached' .replace(/...Save\s+/g, ' ') // Remove Save button artifact .replace(/\S+\.(com|org|net|uk)/g, ' ') // Remove right-joined url artifacts + .replace(/arxiv:\S+/, ' ') // Remove arxiv code + .replace(/\.\.\./g, '.') // Trim ellipsis .replace(/\s{2,}/g, ' ') // Trim inner extra spaces .trim(); if (cleanedLine) { From f4cc9dc2959eb5b8fdeaac93f567f3ac9321a6ab Mon Sep 17 00:00:00 2001 From: Halil Can Memoglu Date: Sun, 9 Apr 2023 00:43:17 -0400 Subject: [PATCH 4/4] modify: undo .gitignore change --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 21197aa..00f4c9f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ node_modules /build/* !build/webchatgpt*.zip -/*.log -*.zip +/*.log \ No newline at end of file