-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.go
More file actions
88 lines (75 loc) · 2.01 KB
/
main.go
File metadata and controls
88 lines (75 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"github.com/gocolly/colly"
"io/ioutil"
"log"
"os"
"strconv"
"strings"
)
type product struct {
Price string `json:"price"`
ProductTitle string `json:"productTitle"`
ShortDescription string `json:"shortDescription"`
Reviews string `json:"reviews"`
}
func main() {
asins, err := os.Open("asins.csv")
if err != nil {
log.Fatal(err)
}
defer asins.Close()
csvReader := csv.NewReader(asins)
asinCodes, err := csvReader.ReadAll()
if err != nil {
log.Fatal(err)
}
urls := make([]string, 0)
for _, asinCode := range asinCodes {
url := fmt.Sprintf("https://www.amazon.com/dp/%s", asinCode[0])
urls = append(urls, url)
}
crawl(urls)
}
func crawl(urls []string) {
allProducts := make([]product, 0)
replacer := strings.NewReplacer("$", "", ",", "")
collector := colly.NewCollector(
colly.AllowedDomains("amazon.com", "www.amazon.com"),
)
collector.OnHTML("html", func(e *colly.HTMLElement) {
model := product{}
model.Price = replacer.Replace(e.ChildText("#price_inside_buybox"))
model.ProductTitle = e.ChildText("#productTitle")
model.ShortDescription = e.ChildText("#featurebullets_feature_div")
model.Reviews = e.ChildText("#acrCustomerReviewText")
allProducts = append(allProducts, model)
})
collector.OnRequest(func(request *colly.Request) {
fmt.Println("Visiting", request.URL.String())
})
for _, url := range urls {
collector.Visit(url)
}
writeJSON(allProducts)
}
func writeJSON(data []product) {
file, err := json.MarshalIndent(data, "", " ")
inJSON, _ := _UnescapeUnicodeCharactersInJSON(file)
if err != nil {
log.Println("Unable to create JSON file")
return
}
fileName := fmt.Sprintf("products.json")
_ = ioutil.WriteFile(fileName, inJSON, 0644)
}
func _UnescapeUnicodeCharactersInJSON(_jsonRaw json.RawMessage) (json.RawMessage, error) {
str, err := strconv.Unquote(strings.Replace(strconv.Quote(string(_jsonRaw)), `\\u`, `\u`, -1))
if err != nil {
return nil, err
}
return []byte(str), nil
}