-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmailToScuttle.py
executable file
·275 lines (253 loc) · 10.8 KB
/
mailToScuttle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#see http://qiita.com/stkdev/items/a44976fb81ae90a66381
import sys
import email.parser
import email.header
import urllib2
import urllib
from HTMLParser import HTMLParser
import re
import requests
#Global変数
url = []; # 連想記憶 {'url' 'title' 'desc' 'tags' private'}の配列
class MyHTMLParser(HTMLParser):
#HTML形式のメール
flg=False #URLを見つけられたか?
last="" #URL
private=False
def find_href(self, i):
# <a href="..." >だった
if re.match(r"^https?://", i[1]):
#mailto:とかあるのでhttp://とhttps://だけ保存対象にする
if re.match(r"https://flemail\.flipboard\.com/redirect", i[1]):
#FlipboardからのメールでFlipboard経由のURLは保存しない
return
if re.match(r"^https://www\.facebook\.com/", i[1]):
# Facebookだった
if re.match(r"^https://www\.facebook\.com/n/\?saved%2Fredirect%2F", i[1]):
# Facebookの保存のURLだった
matched = re.search("&uri=[^&]*", i[1]).group() #行き先のURLでありuri=取り出し
self.last = urllib.unquote( re.sub(r"^&uri=", "", matched) ) #URI escape外し
if not re.match(r"^http", self.last):
# 取り出したURLがhttpで始まっていなかったのでhttp追加
self.last ="https://www.facebook.com" + self.last
#facebook内のリンクはとりあえずPrivateにする
self.private = True
else:
#facebookではリンク以外の配信停止などは保存しない
return
else:
#facebookやflipboardからのリンク以外の普通のリンク
self.last= i[1]
self.flg=True
def handle_starttag(self, tagname, attribute):
if tagname.lower() == "a":
# <a ... >を探す
for i in attribute:
if i[0].lower() == "href":
# <a href="..." >だった
self.find_href(i)
def handle_data(self, data):
if self.flg:
data=re.sub(r"\s", "", data) #本当は改行の時の空白だけ取りたい
url.append({'url': self.last, 'title':data, 'desc': "", 'tags': "", 'private': self.private})
#print("URL:" + self.last + " Private:" + str(self.private))
self.flg=False
self.private=False
class MyTextParser():
#http://URLSs --URLが来たときに前のがあったらpush
#(title:|)
#desc:
#tags:
uri = ""
title = ""
desc = ""
tags = ""
private = False
uriflg = False
subject = ""
def flash(self):
#print "Flash: " + self.uri
if self.uri != "":
if self.title== "": #タイトル無いときにはメールSubjectで代替
self.title = re.sub(r"\s", "", self.subject)
self.subject = "" #一度つかったらクリア
url.append({'url': self.uri, 'title': self.title, 'desc': self.desc, 'tags': self.tags,
'private': self.private});
#print self.uri + self.title
self.uri = ""
self.title = ""
self.desc = ""
self.tags = ""
self.uriflg = False
def parse(self, body, subject):
self.subject = subject
self.flash()
for line in body.splitlines():
#print line
matched = re.search(r"\s*https?://[^\s]*", line)
#TODO: 説明 http:// タイプもあるよ
if matched:
self.flash()
self.uri = matched.group()
self.uriflg = True
#print "FIND url:" + self.uri
elif re.search(r"^\s*title:\s+", line, re.IGNORECASE):
self.title = re.sub(r"\s$", "",
re.sub(r"^\s*[tT][iI][tT][lL][eE]:\s+",
"", line))
#print "FIND title:" + self.title
elif re.search(r"^\s*desc:\s+", line, re.IGNORECASE):
self.desc = re.sub(r"\s", "",
re.sub(r"^\s*[dD][eE][sS][cC]:\s+",
"", line))
#print "FIND desc:" + self.desc
elif re.search(r"^\s*tags:\s+", line, re.IGNORECASE):
self.tags = re.sub(r",", " ",
re.sub(r"^\s*[tT][aA][gG][sS]:\s+",
"",line))
#print "FIND tags:" + self.tags
elif re.search(r"^\s*private:\s+", line, re.IGNORECASE):
self.private = ( re.sub(r"^\s*[pP][rR][iI][vV][aA][tT][eE]:\s+",
"", line) == "True" )
#print "FIND private:" + str(self.tags)
elif self.uriflg and self.title == "":
self.title = re.sub(r"^\s*", "", line) #本当はムダな…だけ
#print "FIND title:" + self.title
self.uriflg=False #urlの次の行だけね
self.flash()
def main():
if len(sys.argv) < 4:
sys.stderr.write("Usage: " + sys.argv[0] + " URL user password")
sys.exit(1)
scuttle = sys.argv[1] #Scuttle URL https://example.com/scuttle/
user = sys.argv[2] # User Name
passwd = sys.argv[3] # Password
sys.argv[3]=""
#print(scuttle + " : " + user )
#メールを標準入力から読み込む
email_default_encoding = 'utf-8' #'iso-2022-jp'
msg = email.message_from_file(sys.stdin)
#print(msg.keys())
subject=msg.get("Subject")
if subject is None:
subject = ""
#print subject
#エンコーディング判断
try:
msg_encoding = msg.get("Subject")[0][1] or email_default_encoding
except Exception:
#print("!!! Can't found Subject encording." )
if re.match(r"=\?iso-2022-jp\?", subject, re.IGNORECASE):
msg_encoding = 'iso-2022-jp'
elif re.match(r"=\?utf-8\?", subject, re.IGNORECASE):
msg_encoding = 'utf-8'
elif re.match(r"=\?cp932\?", subject, re.IGNORECASE):
msg_encoding = 'cp932'
else:
msg_encoding = email_default_encoding
#print("=== " + msg_encoding )
#サブジェクト取り出し
subjectbase = email.header.decode_header(msg.get('Subject'))
subject = ""
for sub in subjectbase:
if isinstance(sub[0],bytes):
subject += sub[0].decode(msg_encoding)
else:
subject += sub[0]
#print("=== Subject: " + subject)
#メッセージBodyを読み込む マルチパート対策
body = ""
if msg.is_multipart():
#print "=== Multipart"
for payload in msg.get_payload():
#print "=== " + payload.get_content_type()
if payload.get_content_type() == "multipart/related":
for payload2 in payload.get_payload():
#print "=== " + payload2.get_content_type()
if payload2.get_content_type() == "text/html":
#print "=== find HTML"
body = payload2.get_payload(decode=True)
#aレコードを取り出す
parser = MyHTMLParser()
parser.feed(body)
parser.close()
elif payload.get_content_type() == "text/html":
#print "=== find HTML"
body = payload.get_payload(decode=True)
#aレコードを取り出す
parser = MyHTMLParser()
parser.feed(body)
parser.close()
else:
#print "=== Single Part"
if msg.get_content_type() == "text/plain":
#print "=== find TEXT"
body = msg.get_payload(decode=True)
#urlを取り出す
myparser = MyTextParser()
myparser.parse(body, subject)
#とりだしたURLをscuttleへ
while len(url):
d=url.pop()
#print d.keys()
if d['tags'] == "":
# if re.search("Raspberry", d['title']):
# d['tags'] = "Raspberry Pi" + " autoTag"
if re.search(r"IoT", d['title']):
d['tags'] = "autoTag IoT"
elif re.search(r"経済", d['title']):
d['tags'] = "autoTag 経済"
elif re.search(r"企業", d['title']):
d['tags'] = "autoTag 企業"
elif re.search(r"新幹線", d['title']):
d['tags'] = "autoTag 新幹線"
elif re.search("(JR|鉄道)", d['title']):
d['tags'] = "autoTag 鉄道"
elif re.search(r"たばこ", d['title']):
d['tags'] = "autoTag たばこ"
elif re.search(r"Windows", d['title'], re.I):
d['tags'] = "autoTag Windows"
elif re.search(r"SIM", d['title']):
d['tags'] = "autoTag SIM"
elif re.search(r"iPhone", d['title']):
d['tags'] = "autoTag iPhone"
elif re.search("Raspberry", d['title'], re.I):
d['tags'] = "autoTag RaspberryPi"
elif re.search("Aruduino", d['title'], re.I):
d['tags'] = "autoTag Aruduino"
elif re.search("Amazon", d['title'], re.I):
d['tags'] = "autoTag Amazon"
elif re.search("Google", d['title'], re.I):
d['tags'] = "autoTag Google"
elif re.search("Oracle", d['title'], re.I):
d['tags'] = "autoTag Oracle"
else:
d['tags'] = "noTag"
#POSTパラメータは二つ目の引数に辞書で指定する
response = requests.post(
scuttle + 'api/posts/add',
{'url': d['url'],
'description': d['title'],
'extended': d['desc'],
'tags': d['tags'],
'replace': 'no', #scuttleでは無効 'yes'にできない。新規登録のみ
'shared': "no" if d['private'] else "yes", #scuttleでは無効 'no'にできない。常に共有
'status': "2" if d['private'] else "0" #scuttleでは0:default 1:shared 2:private
},
auth=(user, passwd))
print("URL=" + d['url'] + " PRIVATE=" + str(d['private']))
# + " TITLE=" + d['title'])
# print("DESC=" + d['desc'])
# print("TAGS=" + d['tags'])
if response.status_code == 200:
#<result code="done" /> 正常終了なら無言
res = re.search(r'<result code="done" />', response.text)
if not res:
res = re.search(r' code="[^"]*" ', response.text)
print "ERROR: " + res.group()
else:
print(response)
if __name__=='__main__':
main()