forked from weizetao/spider-roach
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmaps.cfg
More file actions
141 lines (140 loc) · 6.71 KB
/
maps.cfg
File metadata and controls
141 lines (140 loc) · 6.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
{
"http://www.lvmama.com/dest/.+/ticket_tab":{
"info":"抓取景点列表页",
"pre_url":"http://www.lvmama.com",
"link_xpath":[
"//span[@class='dstnt_info_text']/a[@class='dstnt_title']/@href",
"//div[@class='Pages']/a[@class='PageLink']/@href",
],
},
"http://www.lvmama.com/dest/[a-z0-9]+":{
"info":"抓取景点详情页",
"table":"scenery",
"page_xpath":{
"name":"//div[@class='flash_proDetail']/div[@class='proDetail']/h1/text()",
"image":"//div[@class='flash_proDetail']/div/ul[@class='Slides']/li/img/@src",
"price":"//div[@id='productList']/div/div/div[2]/dl/dd/del/text()",
"special":"//div[@class='tictke_notes']/ul/li/text()",
"opentime":"//div[@id='warp']/div/div[2]/ul/li[3]/text()",
"addr":"//div[@id='warp']/div/div[2]/ul/li[2]/text()",
"detail":"//div[@id='Introduction']//text()",
"city_name":"//div[@class='proDetail']/ul/li[1]/a/text()",
"theme":"//div[@class='proDetail']/ul/li[4]/a/text()",
},
},
"http://event.mosh.cn/[a-z0-9]*/city-future":{
"info":"抓取魔时网城市活动列表页",
"crawled":"promotion_crawled_set",
"link_xpath":[
"//div[@class='page']//a[@class='next ']/@href",
"//div[@class='act_heat']/ul/li[@class='fix']/dl/dt/a/@href",
],
},
"http://event.mosh.cn/view":{
"info":"抓取魔时网城市活动详情页",
"crawled":"promotion_crawled_set",
"table":"promotion",
"page_xpath":{
"name":"//div[@id='event_info_hd']/div/div[@class='title']/h1/text()",
"image":"//div[@id='event_info_hd']/div/img/@src",
"time":"//div[@id='event_info_hd']/div/div[@class='tile fix'][1]/div[@class='item']/text()",
"addr":"//div[@class='tile fix'][2]/div[@class='item']/ul/li[1]/text()",
"price":"//div[@class='info_grid']/div[@class='tile fix'][3]/div[@class='item']/text()",
"theme":"//div[@class='info_grid']/div[@class='tile fix'][4]/div[@class='item']/a/text()",
"sid":"//div/div[@class='tile fix'][5]/div[@class='item']/text()",
"detail":"//div[@id='event_overview']/div[@class='item']//text()",
}
},
"http://meishi.qq.com/[a-z]*/c/":{
"info":"抓取美食优惠券的列表页",
"pre_url":"http://meishi.qq.com/c/",
"link_xpath":[
"//div[@class='info']/a[@class='title cmcList']/@href",
"//div[@class='mod_pagenav']/form[@id='pager']/p/a[last()]/@href",
],
},
"http://meishi.qq.com/coupons":{
"info":"抓取美食优惠券的详情页页",
"table":"promotion",
"page_xpath":{
"name":"//div[@id='promos_detail2']/div[@class='box header2']/h2/text()",
"image":"//div[@id='promos_dticket']/div/div/img/@src",
"endtime":"//div[@class='tinfo']/div/div/text()",
"addr":"//div[@id='shop_profile']/div/div[@class='basic']/p[1]/text()",
"city_name":"//div[@class='current_city']/h3/text()",
"tel":"//div[@id='shop_profile']/div/div[@class='basic']/p[2]/text()",
"detail":"//div[@class='dbox']//text()",
}
},
"http://meishi.qq.com/[a-z]*/s/":{
"info":"抓取美食商家列表页",
"pre_url":"http://meishi.qq.com",
"link_xpath":[
"//div[@class='basic']/h4/a/@href",
"//div[@class='mod_pagenav']/form[@id='pager']/p/a[last()]/@href",
],
},
"http://meishi.qq.com/shops":{
"info":"抓取美食商家详情页",
"table":"cate",
"page_xpath":{
"name":"//div[@class='d_header']/h1/text()",
"image":"//div[@id='shop_profile']/div/div[@class='pic']/a/img/@src",
"opentime":"//div[@id='shop_profile']//ul[@id='shop_pro_basic']/li[3]/text()",
"addr":"//div[@id='shop_profile']/div/div[@class='basic']/p[1]/text()",
"city_name":"//div[@class='current_city']/h3/text()",
"tel":"//div[@id='shop_profile']/div/div[@class='basic']/p[2]/text()""tel",
"special":"//div[@id='shop_profile']/div/div/ul/li[@id='serviceStatLong']/span/a/text()",
"detail":"//ul[@id='shop_pro_basic']/li[@id='CommendLong']/span/a/text()",
"park":"//ul[@id='shop_pro_basic']/li[@id='parkingStatLong']/text()",
"theme":"//div[@id='shop_profile']/div/div/p[3]/a[1]/text()",
}
},
"http://venue.damai.cn/search.aspx":{
"info":"抓取大麦网场馆列表页页",
"pre_url":"http://venue.damai.cn",
"link_xpath":[
"//div[@class='pagination']/a[@class='next']/@href",
"//span[@class='type']/h3/a/@href",
],
},
"http://venue.damai.cn/venue":{
"info":"抓取大麦网场馆详情页",
"table":"pastime",
"page_xpath":{
"name":"//div[@class='site_guide']/a[3]/text()",
"image":"//div[@class='venueDetal']/p/img[@class='img']/@src",
"addr":"//div[@class='info']/p/a[@class='VenueAddress'][1]/text()",
"city_name":"//input[@id='city']/@value",
"detail":"//div[@class='info']/div/text()",
}
},
"http://www.17u.cn/HotelInfo":{
"info":"抓取同程网酒店详情页",
"table":"hotel",
"page_xpath":{
"name":"//div[@class='detail_info_title']/div/h1/text()",
"image":"//div[@class='ls_nav']//img/@bigsrc",
"star":"//div[@class='detail_info_title']//a[@class='hotel_star']/@title",
"detail":"//div[@class='detail_intro_main']/div[@id='hotelDes']/text()",
"addr":"//div[@class='hotel_address']//span//text()",
"price":"//span[@id='detail_price']/span[@class='money']/text()",
"special":"//div[@class='detail_intro_list clearfix']/dl/dd//text()",
"hotel_id":"//input[@id='txt_HotelId']/@value",
},
"page_xpath2":{
"name":"//span[@id='lblHotelInfo']//span[@class='titnew']/text()",
"image":"//span[@id='lblHotelInfo']/div[@class='jd_tj']/img/@src",
"star":"//span[@id='lblHotelInfo']//a[@class='titx']/text()",
"detail":"//span[@id='lblHotelInfo']/div[@class='jd_tj']/p/span[@id='Intro']/text()",
"addr":"//span[@id='lblHotelInfo']/div[@class='jd_tj']/p/text()",
},
"extra":"'http://www.17u.cn/ajaxhelper/RecentBrowseHotel.aspx?hotelid=%s' % (self.item['hotel_id'])",
},
"http://www.17u.cn/ajaxhelper/RecentBrowseHotel":{
"table":"hotel",
"page_re":{
"price":"\"price\":\"(.*)\"",
},
},
}