Skip to content

Commit b49dd2f

Browse files
committed
fix pickle unserialize error
1 parent 15f3532 commit b49dd2f

File tree

4 files changed

+40
-7
lines changed

4 files changed

+40
-7
lines changed

.gitignore

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Byte-compiled / optimized
2+
__pycache__/
3+
*.py[cod]
4+
5+
# Pycharm
6+
.idea/
7+
8+
# Environment
9+
venv/

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434

3535
# 更新日志
3636

37+
## `v2.6.6`
38+
39+
- 修复 pickle 库反序列化导致内存溢出的问题[#65](https://github.com/zaxtyson/LanZouCloud-API/issues/65)
40+
3741
## `v2.6.5`
3842

3943
- 修复蓝奏云主域名解析异常的问题[#59](https://github.com/zaxtyson/LanZouCloud-API/issues/59) [#60](https://github.com/zaxtyson/LanZouCloud-API/pull/60)

lanzou/api/core.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ def _upload_big_file(self, file_path, dir_id, *, callback=None, uploaded_handler
832832
if not os.path.exists(record_file): # 初始化记录文件
833833
info = {'name': file_name, 'size': file_size, 'uploaded': 0, 'parts': []}
834834
with open(record_file, 'wb') as f:
835-
pickle.dump(info, f)
835+
pickle.dump(info, f, protocol=4)
836836
else:
837837
with open(record_file, 'rb') as f:
838838
info = pickle.load(f)
@@ -859,7 +859,7 @@ def _close_pwd(fid, is_file): # 数据块上传后默认关闭提取码
859859
info['parts'].append(os.path.basename(data_path)) # 记录已上传的文件名
860860
with open(record_file, 'wb') as f:
861861
logger.debug(f"Update record file: {uploaded_size}/{file_size}")
862-
pickle.dump(info, f)
862+
pickle.dump(info, f, protocol=4)
863863
else:
864864
logger.debug(f"Upload data file failed: data_path={data_path}")
865865
return LanZouCloud.FAILED
@@ -1041,10 +1041,19 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
10411041
os.rename(tmp_file_path, file_path) # 下载完成,改回正常文件名
10421042
if os.path.getsize(file_path) > 512: # 文件大于 512 bytes 就检查一下
10431043
file_info = None
1044+
is_protocol_3 = False
10441045
with open(file_path, 'rb') as f:
10451046
f.seek(-512, os.SEEK_END)
10461047
last_512_bytes = f.read()
10471048
file_info = un_serialize(last_512_bytes)
1049+
# Python3.6 序列化时默认使用 pickle 第三版协议,
1050+
# 导致计算时文件尾部多写了 5 字节, 保险期起见处理一下
1051+
if not file_info:
1052+
is_protocol_3 = True
1053+
f.seek(-517, os.SEEK_END)
1054+
last_517_bytes = f.read()
1055+
file_info = un_serialize(last_517_bytes)
1056+
10481057

10491058
# 大文件的记录文件也可以反序列化出 name,但是没有 padding 字段
10501059
if file_info is not None and 'padding' in file_info:
@@ -1059,7 +1068,8 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
10591068
os.rename(file_path, new_file_path)
10601069
# 截断最后 512 字节隐藏信息, 还原文件
10611070
with open(new_file_path, 'rb+') as f:
1062-
f.seek(-512, os.SEEK_END)
1071+
truncate_size = 517 if is_protocol_3 else 512
1072+
f.seek(-truncate_size, os.SEEK_END)
10631073
f.truncate()
10641074
file_path = new_file_path # 保存文件重命名后真实路径
10651075

@@ -1222,7 +1232,7 @@ def _down_big_file(self, name, total_size, file_list, save_path, *, callback=Non
12221232
if not os.path.exists(record_file): # 初始化记录文件
12231233
info = {'last_ending': 0, 'finished': []} # 记录上一个数据块结尾地址和已经下载的数据块
12241234
with open(record_file, 'wb') as rf:
1225-
pickle.dump(info, rf)
1235+
pickle.dump(info, rf, protocol=4)
12261236
else: # 读取记录文件,下载续传
12271237
with open(record_file, 'rb') as rf:
12281238
info = pickle.load(rf)
@@ -1266,7 +1276,7 @@ def _down_big_file(self, name, total_size, file_list, save_path, *, callback=Non
12661276
finally:
12671277
info['last_ending'] = file_size_now
12681278
with open(record_file, 'wb') as rf:
1269-
pickle.dump(info, rf)
1279+
pickle.dump(info, rf, protocol=4)
12701280
logger.debug(f"Update download record info: {info}")
12711281
# 全部数据块下载完成, 记录文件可以删除
12721282
logger.debug(f"Delete download record file: {record_file}")

lanzou/api/utils.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,15 @@ def is_folder_url(share_url: str) -> bool:
112112

113113
def un_serialize(data: bytes):
114114
"""反序列化文件信息数据"""
115+
# https://github.com/zaxtyson/LanZouCloud-API/issues/65
116+
is_right_format = False
117+
if data.startswith(b"\x80\x04") and data.endswith(b"u."):
118+
is_right_format = True
119+
if data.startswith(b"\x80\x03") and data.endswith(b"u."):
120+
is_right_format = True
121+
122+
if not is_right_format:
123+
return None
115124
try:
116125
ret = pickle.loads(data)
117126
if not isinstance(ret, dict):
@@ -189,9 +198,10 @@ def let_me_upload(file_path):
189198
chunk = in_f.read(4096)
190199
# 构建文件 "报尾" 保存真实文件名,大小 512 字节
191200
# 追加数据到文件尾部,并不会影响文件的使用,无需修改即可分享给其他人使用,自己下载时则会去除,确保数据无误
192-
padding = 512 - len(file_name.encode('utf-8')) - 42 # 序列化后空字典占 42 字节
201+
# protocol=4(py3.8默认), 序列化后空字典占 42 字节
202+
padding = 512 - len(file_name.encode('utf-8')) - 42
193203
data = {'name': file_name, 'padding': b'\x00' * padding}
194-
data = pickle.dumps(data)
204+
data = pickle.dumps(data, protocol=4)
195205
out_f.write(data)
196206
return new_file_path
197207

0 commit comments

Comments
 (0)