-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyell.js
119 lines (105 loc) · 4.51 KB
/
yell.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
var fs = require('fs');
// column Separator for the .csv export
var seperator = ';';
// settings up casperjs
var casper = require('casper').create({
clientScripts: ['lib/jquery.min.js'], // Inject jquery library, allows use of $ variables
verbose: true,
logLevel: 'error',
pageSettings: {
loadImages: false,
loadPlugins: false,
//userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X'
}
});
// We create the CLI commands for the keyword, location and page
if (casper.cli.has("keyword")) {
var keyword = casper.cli.get("keyword");
console.log('Keyword Entered: ' + keyword)
} else {
casper.die("No Keyword spesified.", 1);
}
if (casper.cli.has("location")) {
var locationName = casper.cli.get("location");
console.log('Location Entered: ' + locationName);
} else {
casper.die("No location spesified.", 1);
}
if (casper.cli.has("page")) {
var pageNum = casper.cli.get("page");
console.log('Page Number Entered: ' + pageNum);
} else {
// If page number is not provided we assume the first page of the query
var pageNum = 1;
}
// we join the web query by parts
var webpage = 'https://www.yell.com/ucs/UcsSearchAction.do?location=' + locationName + '&keywords=' + keyword + '&scrambleSeed=192248850&pageNum=' + pageNum;
// access the generated page
casper.start(webpage);
casper.wait(1000, function () {
// we create a screenshot of the result page
casper.capture('img/result.jpg');
// we create the .csv file with the column names
fs.write("export/" + keyword + '-' + locationName + ".csv", 'Name' + seperator + 'URL' + seperator + 'Phone' + seperator + 'Address' + seperator + 'Town' + seperator + 'City' + seperator + 'Post' + "\n", "a");
this.echo('result image captured');
})
// for getting company data from current page
casper.then(function () {
var data = this.evaluate(function () {
var nodes = document.querySelectorAll('.businessCapsule--mainContent'); // dive into company container
return Array.prototype.map.call(nodes, function (node, i) {
var url = '',
phone = '',
address = '',
town = '',
city = '',
post = '';
if (node.querySelector('.icon-Business-website')) {
url = node.querySelector('.icon-Business-website').parentElement.getAttribute('href');
}
if (node.querySelector('span[itemprop="telephone"]')) {
phone = node.querySelector('span[itemprop="telephone"]').innerHTML.trim();
}
if (node.querySelector('span[itemprop="streetAddress"]')) {
// we use replace with regex to remove the comma from the address because it will add other columns to the exported csv
address = node.querySelector('span[itemprop="streetAddress"]').innerHTML.replace(/,/g, '').trim();
}
if (node.querySelector('span[itemprop="addressLocality"]')) {
town = node.querySelector('span[itemprop="addressLocality"]').innerHTML.trim();
}
if (node.querySelector('span[itemprop="addressRegion"]')) {
city = node.querySelector('span[itemprop="addressRegion"]').innerHTML.replace(/,/g, '').trim();
}
if (node.querySelector('span[itemprop="postalCode"]')) {
post = node.querySelector('span[itemprop="postalCode"]').innerHTML.replace(/,/g, '').trim();
}
return {
// return the results and for the h2 we use childNodes[0].nodeValue to get only the title (sponsored results have span with text that we dont want)
name: node.querySelector('span').childNodes[0].nodeValue.trim(),
url: url,
phone: phone,
address: address,
town: town,
city: city,
post: post
};
})
});
// loop trought the result containers
for (var i = 0; i < data.length; i++) {
this.echo('name: ' + data[i]['name']);
this.echo('url: ' + data[i]['url']);
this.echo('phone: ' + data[i]['phone']);
this.echo('address: ' + data[i]['address']);
this.echo('town: ' + data[i]['town']);
this.echo('city: ' + data[i]['city']);
this.echo('post: ' + data[i]['post']);
this.echo('-------------------------------');
// export the results to the .csv
fs.write("export/" + keyword + '-' + locationName + ".csv", data[i]['name'] + seperator + data[i]['url'] + seperator + data[i]['phone'] + seperator + data[i]['address'] + seperator + data[i]['town'] + seperator + data[i]['city'] + seperator + data[i]['post'] + "\n", "a");
this.echo('DONE !!');
}
});
// run the casperjs
casper.run();