@@ -70,7 +70,7 @@ func (c *Crawl) HQProducer() {
7070 // is already closed, so no other goroutine can write to the slice
7171 if len (discoveredArray ) > 0 {
7272 for {
73- _ , err := c .HQClient .Discovered (discoveredArray , "seed" , false , false )
73+ err := c .HQClient .Add (discoveredArray , false )
7474 if err != nil {
7575 c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{})).Error ("error sending payload to crawl HQ, waiting 1s then retrying.." )
7676 time .Sleep (time .Second )
@@ -85,7 +85,7 @@ func (c *Crawl) HQProducer() {
8585 mutex .Lock ()
8686 if (len (discoveredArray ) >= int (math .Ceil (float64 (c .Workers .Count )/ 2 )) || time .Since (HQLastSent ) >= time .Second * 10 ) && len (discoveredArray ) > 0 {
8787 for {
88- _ , err := c .HQClient .Discovered (discoveredArray , "seed" , false , false )
88+ err := c .HQClient .Add (discoveredArray , false )
8989 if err != nil {
9090 c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{})).Error ("error sending payload to crawl HQ, waiting 1s then retrying.." )
9191 time .Sleep (time .Second )
@@ -123,7 +123,7 @@ func (c *Crawl) HQProducer() {
123123 // gob's encode/decode doesn't properly support booleans
124124 if discoveredItem .BypassSeencheck {
125125 for {
126- _ , err := c .HQClient .Discovered ([]gocrawlhq.URL {discoveredURL }, "seed" , true , false )
126+ err := c .HQClient .Add ([]gocrawlhq.URL {discoveredURL }, true )
127127 if err != nil {
128128 c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{
129129 "bypassSeencheck" : discoveredItem .BypassSeencheck ,
@@ -177,20 +177,20 @@ func (c *Crawl) HQConsumer() {
177177
178178 // get batch from crawl HQ
179179 c .HQConsumerState = "waitingOnFeed"
180- batch , err := c .HQClient .Feed (HQBatchSize , c .HQStrategy )
180+ URLs , err := c .HQClient .Feed (HQBatchSize , c .HQStrategy )
181181 if err != nil {
182- c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{
183- "batchSize" : HQBatchSize ,
184- "err" : err ,
185- })).Error ("error getting new URLs from crawl HQ" )
182+ // c.Log.WithFields(c.genLogFields(err, nil, map[string]interface{}{
183+ // "batchSize": HQBatchSize,
184+ // "err": err,
185+ // })).Debug ("error getting new URLs from crawl HQ")
186186 continue
187187 }
188188 c .HQConsumerState = "feedCompleted"
189189
190190 // send all URLs received in the batch to the queue
191- var items = make ([]* queue.Item , 0 , len (batch . URLs ))
192- if len (batch . URLs ) > 0 {
193- for _ , URL := range batch . URLs {
191+ var items = make ([]* queue.Item , 0 , len (URLs ))
192+ if len (URLs ) > 0 {
193+ for _ , URL := range URLs {
194194 c .HQConsumerState = "urlParse"
195195 newURL , err := url .Parse (URL .Value )
196196 if err != nil {
@@ -246,7 +246,7 @@ func (c *Crawl) HQFinisher() {
246246
247247 if len (finishedArray ) == int (math .Ceil (float64 (c .Workers .Count )/ 2 )) {
248248 for {
249- _ , err := c .HQClient .Finished (finishedArray , locallyCrawledTotal )
249+ err := c .HQClient .Delete (finishedArray , locallyCrawledTotal )
250250 if err != nil {
251251 c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{
252252 "finishedArray" : finishedArray ,
@@ -265,7 +265,7 @@ func (c *Crawl) HQFinisher() {
265265 // send remaining finished URLs
266266 if len (finishedArray ) > 0 {
267267 for {
268- _ , err := c .HQClient .Finished (finishedArray , locallyCrawledTotal )
268+ err := c .HQClient .Delete (finishedArray , locallyCrawledTotal )
269269 if err != nil {
270270 c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{
271271 "finishedArray" : finishedArray ,
@@ -286,10 +286,11 @@ func (c *Crawl) HQSeencheckURLs(URLs []*url.URL) (seencheckedBatch []*url.URL, e
286286 for _ , URL := range URLs {
287287 discoveredURLs = append (discoveredURLs , gocrawlhq.URL {
288288 Value : utils .URLToString (URL ),
289+ Type : "asset" ,
289290 })
290291 }
291292
292- discoveredResponse , err := c .HQClient .Discovered (discoveredURLs , "asset" , false , true )
293+ outputURLs , err := c .HQClient .Seencheck (discoveredURLs )
293294 if err != nil {
294295 c .Log .WithFields (c .genLogFields (err , nil , map [string ]interface {}{
295296 "batchLen" : len (URLs ),
@@ -298,8 +299,8 @@ func (c *Crawl) HQSeencheckURLs(URLs []*url.URL) (seencheckedBatch []*url.URL, e
298299 return seencheckedBatch , err
299300 }
300301
301- if discoveredResponse . URLs != nil {
302- for _ , URL := range discoveredResponse . URLs {
302+ if outputURLs != nil {
303+ for _ , URL := range outputURLs {
303304 // the returned payload only contain new URLs to be crawled by Zeno
304305 newURL , err := url .Parse (URL .Value )
305306 if err != nil {
@@ -324,16 +325,17 @@ func (c *Crawl) HQSeencheckURLs(URLs []*url.URL) (seencheckedBatch []*url.URL, e
324325func (c * Crawl ) HQSeencheckURL (URL * url.URL ) (bool , error ) {
325326 discoveredURL := gocrawlhq.URL {
326327 Value : utils .URLToString (URL ),
328+ Type : "asset" ,
327329 }
328330
329- discoveredResponse , err := c .HQClient .Discovered ([]gocrawlhq.URL {discoveredURL }, "asset" , false , true )
331+ outputURLs , err := c .HQClient .Seencheck ([]gocrawlhq.URL {discoveredURL })
330332 if err != nil {
331333 c .Log .Error ("error sending seencheck payload to crawl HQ" , "err" , err , "url" , utils .URLToString (URL ))
332334 return true , err // return true, don't discard the URL if there's an error
333335 }
334336
335- if discoveredResponse . URLs != nil {
336- for _ , URL := range discoveredResponse . URLs {
337+ if outputURLs != nil {
338+ for _ , URL := range outputURLs {
337339 // the returned payload only contain new URLs to be crawled by Zeno
338340 if URL .Value == discoveredURL .Value {
339341 return true , nil
0 commit comments