Skip to content

Commit 85b6bdc

Browse files
authored
v2.0.3: 一些bug fix和代码优化 (#51)
1 parent 50f085b commit 85b6bdc

File tree

8 files changed

+160
-97
lines changed

8 files changed

+160
-97
lines changed

.github/workflows/download.yml

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,23 @@ name: 下载JM本子
22

33
on:
44
push:
5-
branches: [ 'workflow', 'workflow_local' ]
5+
branches-ignore:
6+
- 'master' # master专门用于发布pip和repo介绍
67
paths:
7-
- '.github/workflows/*.yml' # 工作流定义
8-
- 'usage/**/*.py' # 工作流脚本
9-
- 'assets/config/*.yml' # option配置文件
10-
- 'src/**/*.py' # 源码
11-
12-
pull_request:
13-
branches: [ 'workflow', 'workflow_local' ]
8+
- '.github/workflows/download.yml' # 工作流定义
9+
- 'usage/workflow_download.py' # 下载脚本
10+
- 'assets/config/option_workflow_download.yml' # 配置文件
1411

1512
jobs:
1613
crawler:
1714
runs-on: ubuntu-latest
15+
env:
16+
JM_USERNAME: ${{ secrets.JM_USERNAME }}
17+
JM_PASSWORD: ${{ secrets.JM_PASSWORD }}
18+
JM_DOWNLOAD_DIR: /home/runner/work/jmcomic/download/
19+
ZIP_NAME: '本子.tar.gz'
20+
UPLOAD_NAME: '下载完成的本子'
21+
1822
steps:
1923
- uses: actions/checkout@v3
2024
- name: Set up Python 3.11
@@ -29,36 +33,27 @@ jobs:
2933
pip install jmcomic -i https://pypi.org/project --upgrade
3034
3135
- name: 安装依赖项(local)
32-
if: ${{ github.ref == 'refs/heads/workflow_local' }}
33-
run: |
34-
python -m pip install --upgrade pip
35-
pip install commonX -i https://pypi.org/project --upgrade
36-
pip install -e ./
37-
38-
- name: 安装依赖项(pull_request)
39-
if: ${{ github.event_name == 'pull_request' }}
36+
if: ${{ github.ref != 'refs/heads/workflow' }}
4037
run: |
4138
python -m pip install --upgrade pip
4239
pip install commonX -i https://pypi.org/project --upgrade
4340
pip install -e ./
4441
4542
- name: 运行下载脚本
46-
env:
47-
JM_USERNAME: ${{ secrets.JM_USERNAME }}
48-
JM_PASSWORD: ${{ secrets.JM_PASSWORD }}
4943
run: |
5044
cd ./usage/
5145
python workflow_download.py
5246
53-
- name: 压缩下载的漫画
47+
- name: 压缩文件
5448
run: |
55-
cd /home/runner/work/jmcomic/download/
56-
tar -zcvf ../下载完成的本子.tar.gz ./
49+
cd $JM_DOWNLOAD_DIR
50+
tar -zcvf ../$ZIP_NAME ./
51+
mv ../$ZIP_NAME .
5752
5853
- name: 上传结果
5954
uses: actions/upload-artifact@v3
6055
with:
61-
name: 下载完成的本子
62-
path: /home/runner/work/jmcomic/下载完成的本子.tar.gz
56+
name: ${{ env.UPLOAD_NAME }}
57+
path: ${{ env.JM_DOWNLOAD_DIR }}/${{ env.ZIP_NAME }}
6358
if-no-files-found: warn
6459
retention-days: 90

assets/config/option_workflow_download.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,10 @@ version: '2.0'
44

55
dir_rule:
66
base_dir: /home/runner/work/jmcomic/download/
7-
rule: Bd_Aauthor_Atitle_Pindex
7+
rule: Bd_Aauthor_Atitle_Pindex
8+
9+
client:
10+
postman:
11+
meta_data:
12+
headers:
13+
referer: https://18comic.vip/

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option
44

5-
__version__ = '2.0.1'
5+
__version__ = '2.0.3'
66

77
from .api import *

src/jmcomic/jm_client_impl.py

Lines changed: 59 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def request_with_retry(self,
5252
jm_debug(
5353
f'请求重试',
5454
', '.join([
55-
f'次数: [{retry_count + 1}/{self.retry_times}]',
55+
f'次数: [{retry_count}/{self.retry_times}]',
5656
f'域名: [{domain} ({domain_index}/{len(self.domain_list)})]',
5757
f'路径: [{url}]',
5858
f'参数: [{kwargs if "login" not in url else "#login_form#"}]'
@@ -194,56 +194,82 @@ def get_jm_html(self, url, require_200=True, **kwargs):
194194

195195
if require_200 is True and resp.status_code != 200:
196196
write_text('./resp.html', resp.text)
197-
self.check_special_http_code(resp.status_code, url)
198-
raise AssertionError(f"请求失败,"
199-
f"响应状态码为{resp.status_code},"
200-
f"URL=[{resp.url}],"
201-
+ (f"响应文本=[{resp.text}]" if len(resp.text) < 50 else
202-
f'响应文本过长(len={len(resp.text)}),不打印')
203-
)
197+
self.check_special_http_code(resp)
198+
self.raise_request_error(resp)
199+
204200
# 检查请求是否成功
205-
self.require_resp_success_else_raise(resp)
201+
self.require_resp_success_else_raise(resp, url)
206202

207203
return resp
208204

205+
@classmethod
206+
def raise_request_error(cls, resp, msg: Optional[str] = None):
207+
"""
208+
请求如果失败,统一由该方法抛出异常
209+
"""
210+
if msg is None:
211+
msg = f"请求失败," \
212+
f"响应状态码为{resp.status_code}," \
213+
f"URL=[{resp.url}]," \
214+
+ (f"响应文本=[{resp.text}]" if len(resp.text) < 200 else
215+
f'响应文本过长(len={len(resp.text)}),不打印'
216+
)
217+
raise AssertionError(msg)
218+
209219
def get_jm_image(self, img_url) -> JmImageResp:
210220
return JmImageResp(self.get(img_url))
211221

212222
@classmethod
213-
def require_resp_success_else_raise(cls, resp):
214-
# 1. 是否 album_missing
215-
resp_url = resp.url
216-
if resp_url.endswith('/error/album_missing'):
217-
raise AssertionError(f'请求的本子不存在!({resp_url})\n'
218-
'原因可能为:\n'
219-
'1. id有误,检查你的本子/章节id\n'
220-
'2. 该漫画只对登录用户可见,请配置你的cookies\n')
221-
222-
# 2. 是否是错误html页
223-
cls.check_error_html(resp.text.strip(), resp_url)
223+
def require_resp_success_else_raise(cls, resp, req_url):
224+
# 1. 检查是否 album_missing
225+
error_album_missing = '/error/album_missing'
226+
if resp.url.endswith(error_album_missing) and not req_url.endswith(error_album_missing):
227+
cls.raise_request_error(
228+
resp,
229+
f'请求的本子不存在!({req_url})\n'
230+
'原因可能为:\n'
231+
'1. id有误,检查你的本子/章节id\n'
232+
'2. 该漫画只对登录用户可见,请配置你的cookies\n'
233+
)
234+
235+
# 2. 是否是特殊的内容
236+
cls.check_special_text(resp)
224237

225238
@classmethod
226-
def check_error_html(cls, html: str, url=None):
227-
html = html.strip()
228-
error_msg = JmModuleConfig.JM_ERROR_RESPONSE_HTML.get(html, None)
229-
if error_msg is None:
239+
def check_special_text(cls, resp):
240+
html = resp.text
241+
url = resp.url
242+
243+
if len(html) > 500:
230244
return
231245

232-
write_text('./resp.html', html)
233-
raise AssertionError(f'{error_msg}'
234-
+ (f': {url}' if url is not None else ''))
246+
for content, reason in JmModuleConfig.JM_ERROR_RESPONSE_TEXT.items():
247+
if content not in html:
248+
continue
249+
250+
write_text('./resp.html', html)
251+
cls.raise_request_error(
252+
resp,
253+
f'{reason}'
254+
+ (f': {url}' if url is not None else '')
255+
)
235256

236257
@classmethod
237-
def check_special_http_code(cls, code, url=None):
258+
def check_special_http_code(cls, resp):
259+
code = resp.status_code
260+
url = resp.url
261+
238262
error_msg = JmModuleConfig.JM_ERROR_STATUS_CODE.get(int(code), None)
239263
if error_msg is None:
240264
return
241265

242-
raise AssertionError(f"请求失败,"
243-
f"响应状态码为{code},"
244-
f'原因为: [{error_msg}], '
245-
+ (f'URL=[{url}]' if url is not None else '')
246-
)
266+
cls.raise_request_error(
267+
resp,
268+
f"请求失败,"
269+
f"响应状态码为{code},"
270+
f'原因为: [{error_msg}], '
271+
+ (f'URL=[{url}]' if url is not None else '')
272+
)
247273

248274

249275
class JmApiClient(AbstractJmClient):

src/jmcomic/jm_client_interface.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,5 +208,6 @@ class JmcomicClient(
208208
JmImageClient,
209209
JmDetailClient,
210210
JmUserClient,
211+
Postman,
211212
):
212213
pass

src/jmcomic/jm_config.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,20 @@ def default_jm_debug(topic: str, msg: str):
66
class JmModuleConfig:
77
# 网站相关
88
PROT = "https://"
9-
_DOMAIN = None
9+
DOMAIN = None
1010
JM_REDIRECT_URL = f'{PROT}jm365.xyz/3YeBdF' # 永久網域,怕走失的小伙伴收藏起来
1111
JM_PUB_URL = f'{PROT}jmcomic1.bet'
1212
JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}' # index 从1开始
1313
JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']
1414

1515
# 访问JM可能会遇到的异常网页
16-
JM_ERROR_RESPONSE_HTML = {
16+
JM_ERROR_RESPONSE_TEXT = {
1717
"Could not connect to mysql! Please check your database settings!": "禁漫服务器内部报错",
1818
"Restricted Access!": "禁漫拒绝你所在ip地区的访问,你可以选择: 换域名/换代理",
1919
}
2020

2121
JM_ERROR_STATUS_CODE = {
22+
403: 'ip地区禁止访问/爬虫被识别',
2223
520: '520: Web server is returning an unknown error (禁漫服务器内部报错)',
2324
524: '524: The origin web server timed out responding to this request. (禁漫服务器处理超时)',
2425
}
@@ -47,11 +48,11 @@ def domain(cls, postman=None):
4748
并且设置把 domain 设置为禁漫模块的默认域名。
4849
这样一来,配置文件也不用配置域名了,一切都在运行时动态获取。
4950
"""
50-
if cls._DOMAIN is None:
51+
if cls.DOMAIN is None:
5152
from .jm_toolkit import JmcomicText
52-
cls._DOMAIN = JmcomicText.parse_to_jm_domain(cls.get_jmcomic_url(postman))
53+
cls.DOMAIN = JmcomicText.parse_to_jm_domain(cls.get_jmcomic_url(postman))
5354

54-
return cls._DOMAIN # jmcomic默认域名
55+
return cls.DOMAIN # jmcomic默认域名
5556

5657
@classmethod
5758
def headers(cls, authority=None):
@@ -60,12 +61,13 @@ def headers(cls, authority=None):
6061
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
6162
'application/signed-exchange;v=b3;q=0.7',
6263
'accept-language': 'zh-CN,zh;q=0.9',
64+
'referer': 'https://18comic.vip',
6365
'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
6466
'sec-ch-ua-mobile': '?0',
6567
'sec-ch-ua-platform': '"Windows"',
6668
'sec-fetch-dest': 'document',
6769
'sec-fetch-mode': 'navigate',
68-
'sec-fetch-site': 'none',
70+
'sec-fetch-site': 'same-origin',
6971
'sec-fetch-user': '?1',
7072
'upgrade-insecure-requests': '1',
7173
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 '

src/jmcomic/jm_option.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,20 +312,28 @@ def to_file(self, filepath=None):
312312
'api': JmApiClient,
313313
}
314314

315-
def build_jm_client(self) -> JmcomicClient:
315+
def build_jm_client(self, **kwargs) -> JmcomicClient:
316316
if self.cache_jm_client is not True:
317-
return self.new_jm_client()
317+
return self.new_jm_client(**kwargs)
318318

319319
client = self.jm_client_cache
320320
if client is None:
321-
client = self.new_jm_client()
321+
client = self.new_jm_client(**kwargs)
322322
self.jm_client_cache = client
323323

324324
return client
325325

326-
def new_jm_client(self) -> JmcomicClient:
326+
def new_jm_client(self, **kwargs) -> JmcomicClient:
327+
postman_conf: dict = self.client.postman.src_dict
328+
329+
# support overwrite meta_data
330+
if len(kwargs) != 0:
331+
meta_data = postman_conf.get('meta_data', {})
332+
meta_data.update(kwargs)
333+
postman_conf['meta_data'] = meta_data
334+
327335
# postman
328-
postman = Postmans.create(data=self.client.postman)
336+
postman = Postmans.create(data=postman_conf)
329337

330338
# domain_list
331339
domain_list = self.client.domain

0 commit comments

Comments
 (0)