Skip to content

Commit fcdbded

Browse files
authored
fix(OrganicResults): Update desc selector and fix other minor issues (#62)
* fix(OrganicResults): Update desc selector and fix other minor issues * chore: Update tests * chore: Update package version
1 parent 5bca62f commit fcdbded

7 files changed

Lines changed: 33 additions & 42 deletions

File tree

lib/core/nodes/FeaturedSnippet.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class FeaturedSnippet {
1818
$(Constants.SELECTORS.FEATURED_SNIPPET_TITLE[1]).text() ||
1919
$(Constants.SELECTORS.FEATURED_SNIPPET_TITLE[2]).text();
2020

21-
const featured_snippet_url = $(Constants.SELECTORS.FEATURED_SNIPPET_URL).map((i, el) => $(el).attr('href')).get()[0];
21+
const featured_snippet_url = $(Constants.SELECTORS.FEATURED_SNIPPET_URL).map((_i, el) => $(el).attr('href')).get()[0];
2222

2323
const featured_snippet = Constants.SELECTORS.FEATURED_SNIPPET_DESC.map((selector) => {
2424
if ($(selector)[0] && selector != Constants.SELECTORS.FEATURED_SNIPPET_DESC[2]) {

lib/core/nodes/OrganicResults.js

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@ const Constants = require('../../utils/constants');
55
class OrganicResult {
66
/** @type {string} */
77
title;
8-
8+
99
/** @type {string} */
1010
description;
11-
11+
1212
/** @type {string} */
1313
url;
1414

1515
/** @type {boolean} */
1616
is_sponsored;
17-
17+
1818
/** @type {{ high_res: string; low_res: string; }} */
1919
favicons;
20-
20+
2121
constructor(data) {
2222
this.title = data.title;
2323
this.description = data.description;
@@ -46,23 +46,23 @@ class OrganicResults {
4646

4747
const titles = $(Constants.SELECTORS.TITLE)
4848
.map((_i, el) => {
49-
const is_ad =
50-
el.parent.attribs.style == '-webkit-line-clamp:2' ||
49+
const is_ad =
50+
Reflect.has(el.parent?.parent?.parent?.attribs || {}, 'data-rw') ||
5151
(!is_mobile && el.parent.attribs.class.startsWith('vdQmEd'));
52-
52+
5353
// Ignore ad titles if parse_ads is false
5454
if (!parse_ads && is_ad)
5555
return null;
5656

57-
return is_mobile ?
57+
return is_mobile ?
5858
$(el).text().trim() : $(el).find('h3').text().trim() || $(el).find('a > div > span').first().text().trim();
5959
}).get();
6060

6161
const descriptions = $(Constants.SELECTORS.DESCRIPTION)
6262
.map((_i, el) => {
63-
const is_ad = el.parent.attribs.class == 'w1C3Le' ||
63+
const is_ad = Reflect.has(el.parent?.parent?.parent?.attribs || {}, 'data-text-ad') ||
6464
(!is_mobile && !Object.keys(el.parent.attribs).length);
65-
65+
6666
// Ignore ad descriptions if parse_ads is false
6767
if (!parse_ads && is_ad) {
6868
return null;
@@ -75,31 +75,25 @@ class OrganicResults {
7575

7676
const urls = $(is_mobile ? Constants.SELECTORS.URL : `${Constants.SELECTORS.TITLE} > a`)
7777
.map((_i, el) => {
78-
const is_ad = el.parent?.parent?.attribs?.class?.startsWith('vdQmEd');
79-
80-
/**
81-
* Since the selector for URLs is the same as the one for titles on desktop,
82-
* we need to check if the element is an ad. If we're parsing the mobile page,
83-
* then ads are simply stripped out of the results.
84-
*/
85-
if (!is_mobile && !parse_ads && is_ad) {
78+
const is_ad = !!$(el).attr('data-rw') || el.parent?.parent?.attribs?.class?.startsWith('vdQmEd');
79+
80+
if (!parse_ads && is_ad)
8681
return null;
87-
}
8882

8983
return $(el).attr('href');
9084
}).get();
91-
85+
9286
// Refine results
9387
if (titles.length < urls.length && titles.length < descriptions.length) {
9488
urls.shift();
9589
}
96-
90+
9791
if (urls.length > titles.length) {
9892
urls.shift();
9993
}
100-
94+
10195
const is_innacurate_data = descriptions.length < urls.slice(1).length;
102-
96+
10397
urls.forEach((item, index) => {
10498
// Why YouTube? Because video results usually don't have a description.
10599
if (item.includes('m.youtube.com') && is_innacurate_data) {
@@ -108,23 +102,23 @@ class OrganicResults {
108102
index--;
109103
}
110104
});
111-
105+
112106
const results = [];
113-
107+
114108
for (let i = 0; i < titles.length; i++) {
115109
const title = titles[i];
116110
const description = descriptions[i];
117-
111+
118112
let url = urls[i];
119113

120114
// Some results have a different URL format (AMP and ad results).
121115
if (url?.startsWith('/aclk') || url?.startsWith('/amp/s')) {
122116
url = `${Constants.URLS.W_GOOGLE}${url.substring(1)}`;
123117
}
124-
118+
125119
const high_res_favicon = `${Constants.URLS.FAVICONKIT}/${new URL(url || Constants.URLS.W_GOOGLE).hostname}/192`;
126120
const low_res_favicon = `${Constants.URLS.W_GOOGLE}s2/favicons?sz=64&domain_url=${new URL(url || Constants.URLS.W_GOOGLE).hostname}`;
127-
121+
128122
if (titles[i] && descriptions[i] && urls[i]) {
129123
results.push(new OrganicResult({
130124
title,
@@ -138,7 +132,7 @@ class OrganicResults {
138132
}));
139133
}
140134
}
141-
135+
142136
return results;
143137
}
144138
}

lib/utils/constants.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ module.exports = {
3838
// Featured Snippet
3939
FEATURED_SNIPPET_TITLE: ['div[class="co8aDb gsrt"]', 'a[class="sXtWJb gsrt"]', 'div[class="Xv4xee"]'],
4040
FEATURED_SNIPPET_DESC: ['ol[class="X5LH0c"]', 'ul[class="i8Z77e"]', 'div[data-attrid="wa:/description"]'],
41-
FEATURED_SNIPPET_URL: 'div > div > h3 > a',
41+
FEATURED_SNIPPET_URL: 'div > div > div > div > div > h3 > div > span > a',
4242

4343
// Unit converter
4444
UNIT_CONVERTER_INPUT: 'div.rpnBye > input',

lib/utils/utils.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ function getHeaders(options = { mobile: false }) {
4343
function refineData (data, parse_ads = false, is_mobile = true) {
4444
let result = data
4545
// Removes classes we don't need:
46-
.replace(/N6jJud MUxGbd lyLwlc/g, '')
47-
.replace(/YjtGef ExmHv MUxGbd/g, '')
48-
.replace(/MUxGbd lyLwlc aLF0Z/g, '')
46+
.replace(/N6jJud VwiC3b lyLwlc/g, '')
47+
.replace(/YjtGef ExmHv VwiC3b/g, '')
48+
.replace(/VwiC3b lyLwlc aLF0Z/g, '')
4949

5050
/*
5151
* Transforms all possible variations of some classes' name into a
@@ -54,10 +54,12 @@ function refineData (data, parse_ads = false, is_mobile = true) {
5454

5555
// Descriptions: -> MUxGbd yDYNvb
5656
.replace(/yDYNvb lEBKkf/g, 'yDYNvb')
57+
.replace(/VwiC3b yDYNvb/g, 'MUxGbd yDYNvb')
5758
.replace(/VwiC3b MUxGbd yDYNvb/g, 'MUxGbd yDYNvb')
5859

5960
// Urls: -> C8nzq BmP5tf
6061
.replace(/cz3goc BmP5tf/g, 'C8nzq BmP5tf')
62+
.replace(/cz3goc v5yQqb BmP5tf/g, 'C8nzq BmP5tf')
6163

6264
// Titles: -> ynAwRc q8U8x MBeuO gsrt oewGkc LeUQr
6365
.replace(/ynAwRc q8U8x MBeuO oewGkc LeUQr/g, 'ynAwRc q8U8x MBeuO gsrt oewGkc LeUQr')

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "googlethis",
3-
"version": "1.7.1",
3+
"version": "1.8.0",
44
"description": "A simple yet powerful module to retrieve organic search results and much more from Google.",
55
"main": "lib/index.js",
66
"author": "LuanRT <luan.lrt4@gmail.com> (https://github.com/LuanRT)",

test/main.test.js

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,6 @@ describe('GoogleThis Tests', () => {
88
expect(search.results).not.toHaveLength(0);
99
});
1010

11-
it('Should search using a desktop user agent', async () => {
12-
const search = await google.search('Stephen Hawking', { use_mobile_ua: false });
13-
expect(search.results).not.toHaveLength(0);
14-
});
15-
1611
it('Should search images', async () => {
1712
const search = await google.image('Supermassive Blackhole');
1813
expect(search).not.toHaveLength(0);

0 commit comments

Comments
 (0)