Skip to content

Commit 46758db

Browse files
committed
Check and log errors
1 parent be022a3 commit 46758db

File tree

3 files changed

+81
-41
lines changed

3 files changed

+81
-41
lines changed

core/browser.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@ import (
1212
)
1313

1414
type BrowserOpts struct {
15-
IsHeadless bool // Use browser interface
16-
IsLeakless bool // Force to kill browser
17-
Timeout time.Duration // Timeout
18-
LanguageCode string
19-
WaitRequests bool // Wait requests to complete after navigation
15+
IsHeadless bool // Use browser interface
16+
IsLeakless bool // Force to kill browser
17+
Timeout time.Duration // Timeout
18+
LanguageCode string
19+
WaitRequests bool // Wait requests to complete after navigation
20+
LeavePageOpen bool // Leave pages and browser open
2021
}
2122

22-
func (o *BrowserOpts) Check() {
23+
// Initialize browser parameters with default values if they are not set
24+
func (o *BrowserOpts) Init() {
2325
if o.Timeout == 0 {
2426
o.Timeout = time.Second * 30
2527
}
@@ -36,7 +38,7 @@ type Browser struct {
3638
}
3739

3840
func NewBrowser(opts BrowserOpts) (*Browser, error) {
39-
opts.Check()
41+
opts.Init()
4042
logrus.Debugf("Browser options: %+v", opts)
4143

4244
path, has := launcher.LookPath()
@@ -64,17 +66,25 @@ func (b *Browser) Navigate(URL string) *rod.Page {
6466

6567
b.browser = rod.New().ControlURL(b.browserAddr)
6668
b.browser.MustConnect()
67-
b.browser.SetCookies(nil)
69+
//b.browser.SetCookies(nil)
6870

6971
page := stealth.MustPage(b.browser)
7072
wait := page.MustWaitRequestIdle()
7173
page.Navigate(URL)
72-
wait()
74+
75+
// causes bugs in google
76+
if b.WaitRequests {
77+
wait()
78+
}
7379

7480
page.MustEmulate(devices.Device{
7581
UserAgent: uarand.GetRandom(),
7682
AcceptLanguage: b.LanguageCode,
7783
})
84+
85+
// Wait till page loads
86+
time.Sleep(time.Second * 1)
87+
7888
return page
7989
}
8090

google/search.go

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ type Google struct {
2020

2121
func New(browser core.Browser) *Google {
2222
gogl := Google{Browser: browser}
23-
gogl.checkTimeout = time.Second * 2
23+
gogl.checkTimeout = time.Second * 5
2424
gogl.findNumRgxp = regexp.MustCompile("\\d")
2525
return &gogl
2626
}
@@ -49,6 +49,11 @@ func (gogl *Google) FindTotalResults(page *rod.Page) (int, error) {
4949
return total, nil
5050
}
5151

52+
func (gogl *Google) preparePage(page *rod.Page) {
53+
// Remove "similar queries" lists
54+
page.Eval(";(() => { document.querySelectorAll(`div[data-initq]`).forEach( el => el.remove()); })();")
55+
}
56+
5257
func (gogl *Google) Search(query core.Query) ([]core.SearchResult, error) {
5358
logrus.Tracef("Start Google search, query: %+v", query)
5459

@@ -59,17 +64,21 @@ func (gogl *Google) Search(query core.Query) ([]core.SearchResult, error) {
5964
if err != nil {
6065
return nil, err
6166
}
67+
6268
page := gogl.Navigate(url)
69+
gogl.preparePage(page)
6370

6471
totalResults, err := gogl.FindTotalResults(page)
6572
if err != nil {
6673
return nil, err
6774
}
75+
logrus.Tracef("%d total results found", totalResults)
76+
6877
if totalResults == 0 {
6978
return searchResults, nil
7079
}
7180

72-
results, err := page.Search("div>div.g")
81+
results, err := page.Timeout(gogl.Timeout).Search("div[data-hveid][data-ved][lang]")
7382
if err != nil {
7483
return nil, err
7584
}
@@ -86,34 +95,42 @@ func (gogl *Google) Search(query core.Query) ([]core.SearchResult, error) {
8695
continue
8796
}
8897
linkText, err := link.Property("href")
98+
if err != nil {
99+
logrus.Error("No `href` tag found")
100+
}
89101

90102
// Get title
91103
titleTag, err := link.Element("h3")
92104
if err != nil {
93-
logrus.Error(err)
105+
logrus.Error("No `h3` tag found")
94106
continue
95107
}
96108

97109
title, err := titleTag.Text()
98110
if err != nil {
111+
logrus.Error("Cannot extract text from title")
99112
title = "No title"
100-
logrus.Error(err)
101113
}
102114

103115
// Get description
116+
// doesn't catch all
104117
descTag, err := r.Element(`div[data-sncf~="1"]`)
105-
desc := "No description found"
106-
if err == nil {
118+
desc := ""
119+
if err != nil {
120+
logrus.Trace(`No description 'div[data-sncf~="1"]' tag found`)
121+
} else {
107122
desc = descTag.MustText()
108123
}
109124

110-
gR := core.SearchResult{Rank: i, URL: linkText.String(), Title: title, Description: desc}
125+
gR := core.SearchResult{Rank: i + 1, URL: linkText.String(), Title: title, Description: desc}
111126
searchResults = append(searchResults, gR)
112127
}
113128

114-
err = page.Close()
115-
if err != nil {
116-
logrus.Error(err)
129+
if !gogl.Browser.LeavePageOpen {
130+
err = page.Close()
131+
if err != nil {
132+
logrus.Error(err)
133+
}
117134
}
118135

119136
return searchResults, nil

yandex/search.go

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package yandex
22

33
import (
4-
"errors"
5-
"fmt"
64
"time"
75

86
"github.com/go-rod/rod"
@@ -13,13 +11,13 @@ import (
1311
type Yandex struct {
1412
core.Browser
1513
checkTimeout time.Duration // Timeout for secondary elements check
16-
pagesSleep time.Duration // Sleep between pages
14+
pageSleep time.Duration // Sleep between pages
1715
}
1816

1917
func New(browser core.Browser) *Yandex {
2018
yand := Yandex{Browser: browser}
2119
yand.checkTimeout = time.Second * 2
22-
yand.pagesSleep = time.Second * 1
20+
yand.pageSleep = time.Second * 1
2321
return &yand
2422
}
2523

@@ -35,17 +33,16 @@ func (yand *Yandex) isCaptcha(page *rod.Page) bool {
3533
return true
3634
}
3735

36+
// Check if nothig is found
3837
func (yand *Yandex) isNoResults(page *rod.Page) bool {
3938
noResFound := false
4039

4140
_, err := page.Timeout(yand.checkTimeout).Search("div.EmptySearchResults-Title")
42-
fmt.Println(err)
4341
if err == nil {
4442
noResFound = true
4543
}
4644

4745
_, err = page.Timeout(yand.checkTimeout).Search("div>div.RequestMeta-Message")
48-
fmt.Println(err)
4946
if err == nil {
5047
noResFound = true
5148
}
@@ -63,23 +60,29 @@ func (yand *Yandex) parseResults(results rod.Elements, pageNum int) []core.Searc
6360
continue
6461
}
6562
linkText, err := link.Property("href")
63+
if err != nil {
64+
logrus.Error("No `href` tag found")
65+
}
6666

6767
// Get title
6868
titleTag, err := link.Element("h2")
6969
if err != nil {
70-
logrus.Error("No title tag found")
70+
logrus.Error("No title `h2` tag found")
7171
continue
7272
}
7373

7474
title, err := titleTag.Text()
7575
if err != nil {
76+
logrus.Error("Cannot extract text from title")
7677
title = "No title"
7778
}
7879

7980
// Get description
8081
descTag, err := r.Element(`span.OrganicTextContentSpan`)
81-
desc := "No description found"
82-
if err == nil {
82+
desc := ""
83+
if err != nil {
84+
logrus.Trace("No description `span.OrganicTextContentSpan` tag found")
85+
} else {
8386
desc = descTag.MustText()
8487
}
8588

@@ -103,33 +106,43 @@ func (yand *Yandex) Search(query core.Query) ([]core.SearchResult, error) {
103106
}
104107

105108
page := yand.Navigate(url)
106-
defer page.Close()
107109

108-
searchRes, _ := page.Timeout(yand.Timeout).Search("li.serp-item")
109-
if searchRes != nil {
110-
elements, _ := searchRes.All()
111-
r := yand.parseResults(elements, searchPage)
112-
allResults = append(allResults, r...)
110+
// Get all search results in page
111+
searchRes, err := page.Timeout(yand.Timeout).Search("li.serp-item")
112+
if err != nil {
113+
logrus.Errorf("Cannot parse search results: %s", err)
113114
}
114115

116+
// Check why no results, maybe captcha?
115117
if searchRes == nil {
116118
if yand.isNoResults(page) {
117-
return allResults, nil
119+
logrus.Errorf("No results found")
118120
} else if yand.isCaptcha(page) {
119-
logrus.Error(errors.New("Yandex captcha occured during: " + url))
120-
return allResults, nil
121+
logrus.Errorf("Yandex captcha occurred during: %s", url)
121122
}
122123
break
123124
}
124125

126+
elements, err := searchRes.All()
127+
if err != nil {
128+
logrus.Errorf("Cannot get all elements from search results: %s", err)
129+
break
130+
}
131+
132+
r := yand.parseResults(elements, searchPage)
133+
allResults = append(allResults, r...)
134+
125135
searchPage++
126136

127-
err = page.Close()
128-
if err != nil {
129-
logrus.Error(err)
137+
if !yand.Browser.LeavePageOpen {
138+
// Close tab before opening new one during the cycle
139+
err = page.Close()
140+
if err != nil {
141+
logrus.Error(err)
142+
}
130143
}
131144

132-
time.Sleep(yand.pagesSleep)
145+
time.Sleep(yand.pageSleep)
133146
}
134147

135148
return allResults, nil

0 commit comments

Comments
 (0)