11package yandex
22
33import (
4- "errors"
5- "fmt"
64 "time"
75
86 "github.com/go-rod/rod"
@@ -13,13 +11,13 @@ import (
1311type Yandex struct {
1412 core.Browser
1513 checkTimeout time.Duration // Timeout for secondary elements check
16- pagesSleep time.Duration // Sleep between pages
14+ pageSleep time.Duration // Sleep between pages
1715}
1816
1917func New (browser core.Browser ) * Yandex {
2018 yand := Yandex {Browser : browser }
2119 yand .checkTimeout = time .Second * 2
22- yand .pagesSleep = time .Second * 1
20+ yand .pageSleep = time .Second * 1
2321 return & yand
2422}
2523
@@ -35,17 +33,16 @@ func (yand *Yandex) isCaptcha(page *rod.Page) bool {
3533 return true
3634}
3735
36+ // Check if nothig is found
3837func (yand * Yandex ) isNoResults (page * rod.Page ) bool {
3938 noResFound := false
4039
4140 _ , err := page .Timeout (yand .checkTimeout ).Search ("div.EmptySearchResults-Title" )
42- fmt .Println (err )
4341 if err == nil {
4442 noResFound = true
4543 }
4644
4745 _ , err = page .Timeout (yand .checkTimeout ).Search ("div>div.RequestMeta-Message" )
48- fmt .Println (err )
4946 if err == nil {
5047 noResFound = true
5148 }
@@ -63,23 +60,29 @@ func (yand *Yandex) parseResults(results rod.Elements, pageNum int) []core.Searc
6360 continue
6461 }
6562 linkText , err := link .Property ("href" )
63+ if err != nil {
64+ logrus .Error ("No `href` tag found" )
65+ }
6666
6767 // Get title
6868 titleTag , err := link .Element ("h2" )
6969 if err != nil {
70- logrus .Error ("No title tag found" )
70+ logrus .Error ("No title `h2` tag found" )
7171 continue
7272 }
7373
7474 title , err := titleTag .Text ()
7575 if err != nil {
76+ logrus .Error ("Cannot extract text from title" )
7677 title = "No title"
7778 }
7879
7980 // Get description
8081 descTag , err := r .Element (`span.OrganicTextContentSpan` )
81- desc := "No description found"
82- if err == nil {
82+ desc := ""
83+ if err != nil {
84+ logrus .Trace ("No description `span.OrganicTextContentSpan` tag found" )
85+ } else {
8386 desc = descTag .MustText ()
8487 }
8588
@@ -103,33 +106,43 @@ func (yand *Yandex) Search(query core.Query) ([]core.SearchResult, error) {
103106 }
104107
105108 page := yand .Navigate (url )
106- defer page .Close ()
107109
108- searchRes , _ := page .Timeout (yand .Timeout ).Search ("li.serp-item" )
109- if searchRes != nil {
110- elements , _ := searchRes .All ()
111- r := yand .parseResults (elements , searchPage )
112- allResults = append (allResults , r ... )
110+ // Get all search results in page
111+ searchRes , err := page .Timeout (yand .Timeout ).Search ("li.serp-item" )
112+ if err != nil {
113+ logrus .Errorf ("Cannot parse search results: %s" , err )
113114 }
114115
116+ // Check why no results, maybe captcha?
115117 if searchRes == nil {
116118 if yand .isNoResults (page ) {
117- return allResults , nil
119+ logrus . Errorf ( "No results found" )
118120 } else if yand .isCaptcha (page ) {
119- logrus .Error (errors .New ("Yandex captcha occured during: " + url ))
120- return allResults , nil
121+ logrus .Errorf ("Yandex captcha occurred during: %s" , url )
121122 }
122123 break
123124 }
124125
126+ elements , err := searchRes .All ()
127+ if err != nil {
128+ logrus .Errorf ("Cannot get all elements from search results: %s" , err )
129+ break
130+ }
131+
132+ r := yand .parseResults (elements , searchPage )
133+ allResults = append (allResults , r ... )
134+
125135 searchPage ++
126136
127- err = page .Close ()
128- if err != nil {
129- logrus .Error (err )
137+ if ! yand .Browser .LeavePageOpen {
138+ // Close tab before opening new one during the cycle
139+ err = page .Close ()
140+ if err != nil {
141+ logrus .Error (err )
142+ }
130143 }
131144
132- time .Sleep (yand .pagesSleep )
145+ time .Sleep (yand .pageSleep )
133146 }
134147
135148 return allResults , nil
0 commit comments