-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path视频数据提取.py
108 lines (89 loc) · 2.82 KB
/
视频数据提取.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# coding=utf-8
'''
功能:爬取视频图像(谷歌浏览器)
1、修改Host和从cookie
2、
'''
import urllib.request
import urllib3
from time import strftime, sleep
import requests
import json
import time
from bs4 import BeautifulSoup
#分析页面
def getUrl(urls):
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Cookie': 'JSESSIONID=49840976216FB769E70E63070C15D24C',
'Host': '121.201.83.194:8040',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
# cookies = {
# }
#请求设置页眉
resultPage = requests.get(urls, headers=headers)
soup = BeautifulSoup(resultPage.text)
scrr = soup.find('p')
print(scrr)
lst = scrr.text
j = json.loads(lst)
print(type(j))
print(type(j['rows']))
rowslst = j['rows']
# mp4lst = []
# for rl in rowslst:
# print(type(rl))
# print(rl['imgUrls'])
# urls = rl['imgUrls']
#
# for ls in urls:
# if str(ls).endswith('.mp4'):
# mp4lst.append(ls)
#
# mps = set(mp4lst)
# print(mp4lst)
# print(mps)
# i = 1
# for url in mps:
# strss = str(url).split("/")
# name = strss[-1]
#
# dest_resp = requests.get(url)
# # 视频是二进制数据流,content就是为了获取二进制数据的方法
# data = dest_resp.content
# path = u'C:/Users/Desktop/mp4/' + str(name)
# f = open(path, 'wb')
# f.write(data)
# f.close()
# i = i + 1
#代码爬去通道为1的图片数据
imgLst = []
for tab in lst:
print(tab)
if not str(tab).startswith("{"):
tab = "{"+tab
d1 = eval(tab)
if d1["cameraNo"] == 1:
# print(d1["imgUrl"])
imgLst.append(d1["imgUrl"])
print (imgLst)
for img in imgLst:
print(img)
try:
urllib.request.urlretrieve(img, 'E:/code with lief/%s.jpg' % int(round(time.time() * 1000)))
except urllib.error.HTTPError:
continue
print("over")
for i in range(0,1):
try :
urls = "(写入网站地址)&startTime=2019-02-25%2000:00:00&endTime=2019-02-25%2011:06:19&dbKey=dbGzky"
print (urls);
sleep(1)
getUrl(urls)
except SyntaxError:
continue