forked from rubenwap/company-registry-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.go
41 lines (33 loc) · 817 Bytes
/
scrape.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package main
import (
"fmt"
"github.com/gocolly/colly"
"encoding/json"
)
// Registry will store the Country registration URL items
type Registry struct {
Country string
URL string
}
func main() {
registries := []Registry{}
c := colly.NewCollector()
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.OnHTML(".govspeak .govuk-link", func(e *colly.HTMLElement) {
reg := Registry{}
reg.Country = e.Text
reg.URL = e.Attr("href")
registries = append(registries, reg)
})
c.OnScraped(func(r *colly.Response) {
data, err := json.Marshal(registries)
if err != nil {
fmt.Println(err)
} else {
fmt.Println("Finished. Here is your data:", string(data))
}
})
c.Visit("https://www.gov.uk/government/publications/overseas-registries/overseas-registries")
}