-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path妹子图.py
More file actions
100 lines (84 loc) · 3.19 KB
/
妹子图.py
File metadata and controls
100 lines (84 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2017/8/9 下午1:31
# @Author : hukezhu
# @Site :
# @File : 妹子图.py
# @Software: PyCharm
import requests
from lxml import etree
import time
import sys
import MySQLdb
import os
#import leancloud
reload(sys)
sys.setdefaultencoding("utf-8")
headers111 = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36',
'Accept':'text/html;q=0.9,*/*;q=0.8',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding':'gzip',
'Connection':'close',
'Referer':None #注意如果依然不能抓取的话,这里可以设置抓取网站的host
}
headers1 = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36',
'Accept':'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding':'gzip',
'Connection':'close',
'Referer':'http://www.mzitu.com/' , #注意如果依然不能抓取的话,这里可以设置抓取网站的host
'Host' : 'i.meizitu.net'
}
def get_url(url,a):
html = requests.get(url,headers = headers111)
selector = etree.HTML(html.text)
infos = selector.xpath('//*[@id="pins"]/li/a/img')
for info in infos:
try:
content = MySQLdb.escape_string(info.get('alt'))
imgsource = MySQLdb.escape_string(info.get('data-original'))
print content + ' ' + imgsource
time.sleep(0)
saveImg(imgsource,content)
#leancloud.init("xlWVj5yFEf2YEWsKhR2iaNrI-gzGzoHsz", "v8z65COo5xHyYfquH0S3CDpw")
#todo = Todo()
#todo.set('title', content)
#todo.set('img', imgsource)
#todo.save()
except IndexError:
pass
time.sleep(2)
a = a + 1
url = 'http://www.mzitu.com/page/%s' % a
print '现在是第%s页' % a
get_url(url, a)
def saveImg(img_url,name):
#name = img_url[-9:-4]
img = request_content(img_url)
f = open("/Users/hukezhu/Desktop/meizi1/" + name + '.jpg', 'ab')
f.write(img.content)
f.close()
def request_content( url): ##这个函数获取网页的response 然后返回
#headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}
content = requests.get(url, headers=headers1)
return content
def mkdir( path): ##这个函数创建文件夹
path = path.strip()
isExists = os.path.exists(os.path.join("/Users/hukezhu/Desktop", path))
if not isExists:
print(u'建了一个名字叫做', path, u'的文件夹!')
os.makedirs(os.path.join("/Users/hukezhu/Desktop", path))
os.chdir(os.path.join("/Users/hukezhu/Desktop", path)) ##切换到目录
return True
else:
print(u'名字叫做', path, u'的文件夹已经存在了!')
return False
if __name__ == '__main__':
#leancloud.init("xlWVj5yFEf2YEWsKhR2iaNrI-gzGzoHsz", "vv8z65COo5xHyYfquH0S3CDpw")
#Todo = leancloud.Object.extend('Todo')
url = 'http://www.mzitu.com/'
a = 1
mkdir('meizi1')
get_url(url,a)