forked from leesei/node-comics-feed
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparsers.js
More file actions
67 lines (61 loc) · 1.54 KB
/
Copy pathparsers.js
File metadata and controls
67 lines (61 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
var url = require('url');
/**
* Parser = {
* name,
* match(),
* scrape()
* }
*
* match():
* @param {Object} siteUrl parsed url for the comic strips site
* Returns whether this scraper can handle this site
*
* scrape():
* @param {String} baseUrl url of the webpage containing the comic strip
* @param {Object} $ [cheerio](http://matthewmueller.github.io/cheerio/) object containing the parsed page
* @param {Function} callback callback function to return the parsed strip image URL
*
* callback:
* @param {Object} error error object if one occurs
* @param {String} img_url the parsed strip image URL
*
*/
var parsers = [
{
name: "GoComics",
match: function (siteUrl) {
return (/.*gocomics.com/.test(siteUrl.hostname));
},
scrape: function (baseUrl, $, callback) {
callback(null, $('.strip').attr('src'));
},
},
{
name: "Dilbert.com",
match: function (siteUrl) {
return (/.*dilbert.com/.test(siteUrl.hostname));
},
scrape: function (baseUrl, $, callback) {
img = $('.STR_Image > img').attr('src');
// the image src is relative
img = url.resolve(baseUrl, img);
callback(null, img);
}
}
];
exports = module.exports = {
createParser: function (siteUrl) {
var result = null;
parsedUrl = url.parse(siteUrl);
parsers.some(function (parser) {
if (parser.match(parsedUrl)) {
result = parser;
return true;
}
});
return result;
},
getParsers: function () {
return parsers;
},
};