|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "log" |
| 6 | + "strings" |
| 7 | + |
| 8 | + "github.com/andybalholm/cascadia" |
| 9 | + "golang.org/x/net/html" |
| 10 | +) |
| 11 | + |
| 12 | +var feedbackHtml string = ` |
| 13 | +<section class="feedback-section"> |
| 14 | + <div class="container"> |
| 15 | + <div class="row"> |
| 16 | + <div class="col-md-12 text-center"> |
| 17 | + <h1 class="single-line">The Customers’ Verdict</h1> |
| 18 | + </div> |
| 19 | + </div><br> |
| 20 | + <div class="row"> |
| 21 | + <div class="col-md-3"> |
| 22 | + <div class="feedback-head"> |
| 23 | + <a href="https://www.google.com/search?q=THE+MARINA+MALL&rlz=1C1CHBD_enIN908IN908&oq=th+&aqs=chrome.1.69i57j69i59l2j69i60l5.2415j0j7&sourceid=chrome&ie=UTF-8#lrd=0x3a525a5ed3d3509d:0x51ba8d5c2f099ebb,1,,," target="_blank"> |
| 24 | + <img src="http://marinamallchennai.com/wp-content/uploads/2020/09/Googlebusiness.png"> |
| 25 | + </a> |
| 26 | + <p><i class="fa fa-star"></i> <i class="fa fa-star"></i> <i class="fa fa-star"></i> <i class="fa fa-star"></i> <i class="fa fa-star"></i></p><br> |
| 27 | + </div> |
| 28 | + </div> |
| 29 | + |
| 30 | + </div> |
| 31 | + |
| 32 | + </div> |
| 33 | +</section> |
| 34 | +` |
| 35 | + |
| 36 | +func Query(n *html.Node, query string) *html.Node { |
| 37 | + sel, err := cascadia.Parse(query) |
| 38 | + if err != nil { |
| 39 | + return &html.Node{} |
| 40 | + } |
| 41 | + return cascadia.Query(n, sel) |
| 42 | +} |
| 43 | + |
| 44 | +func QueryAll(n *html.Node, query string) []*html.Node { |
| 45 | + sel, err := cascadia.Parse(query) |
| 46 | + if err != nil { |
| 47 | + return []*html.Node{} |
| 48 | + } |
| 49 | + return cascadia.QueryAll(n, sel) |
| 50 | +} |
| 51 | + |
| 52 | +func AttrOr(n *html.Node, attrName, or string) string { |
| 53 | + for _, a := range n.Attr { |
| 54 | + |
| 55 | + if a.Key == attrName { |
| 56 | + return a.Val |
| 57 | + } |
| 58 | + } |
| 59 | + return or |
| 60 | +} |
| 61 | + |
| 62 | +// main |
| 63 | +func main() { |
| 64 | + doc, err := html.Parse(strings.NewReader(feedbackHtml)) |
| 65 | + if err != nil { |
| 66 | + log.Fatal(err) |
| 67 | + } |
| 68 | + fmt.Printf("List of URLS:\n\n") |
| 69 | + for _, p := range QueryAll(doc, "section.feedback-section") { |
| 70 | + reviewUrl := AttrOr(Query(p, "div a "), "href", "") |
| 71 | + imageUrl := AttrOr(Query(p, "div a img"), "src", "") |
| 72 | + fmt.Println("Review url", reviewUrl, "\n", "Image URl", imageUrl) |
| 73 | + } |
| 74 | +} |
0 commit comments