Skip to content

Commit 56c72b9

Browse files
authored
v1.7.0: 增加对禁漫发布页的解析,可拿到所有禁漫网址 (#17)
1 parent 0227da2 commit 56c72b9

File tree

7 files changed

+62
-7
lines changed

7 files changed

+62
-7
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
* 本地安装
1717

1818
```shell
19-
cd ./modules/core/
2019
pip install -e ./
2120
```
2221

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- service <--- option
44

5-
__version__ = '1.6.0'
5+
__version__ = '1.7.0'
66

77
from .api import *

src/jmcomic/jm_client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ def wrap_func_cache(func_name, cache_dict_name):
183183
wrap_func_cache('get_photo_detail', 'album_cache_dict')
184184
wrap_func_cache('get_album_detail', 'photo_cache_dict')
185185

186+
def get_jmcomic_url(self, postman=None):
187+
return JmModuleConfig.get_jmcomic_url(postman or self)
188+
189+
def get_jmcomic_url_all(self, postman=None):
190+
return JmModuleConfig.get_jmcomic_url_all(postman or self)
191+
186192

187193
# 爬取策略
188194
class FetchStrategy:

src/jmcomic/jm_config.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,33 @@ def disable_jm_debug(cls):
7474
@classmethod
7575
def get_jmcomic_url(cls, postman=None):
7676
"""
77-
访问禁漫的永久网域,从而得到一个可用的禁漫网址,
77+
访问禁漫的永久网域,从而得到一个可用的禁漫网址
78+
@return: https://jm-comic2.cc
7879
"""
7980
if postman is None:
8081
from common import Postmans
8182
postman = Postmans.get_impl_clazz('cffi_Session').create()
8283

83-
domain = postman.with_redirect_catching().get(cls.JM_REDIRECT_URL)
84-
cls.jm_debug('获取禁漫地址', f'[{cls.JM_REDIRECT_URL}] → [{domain}]')
85-
return domain
84+
url = postman.with_redirect_catching().get(cls.JM_REDIRECT_URL)
85+
cls.jm_debug('获取禁漫地址', f'[{cls.JM_REDIRECT_URL}] → [{url}]')
86+
return url
87+
88+
@classmethod
89+
def get_jmcomic_url_all(cls, postman=None):
90+
"""
91+
访问禁漫发布页,得到所有禁漫的域名
92+
@return:['18comic.vip', ..., 'jm365.xyz/ZNPJam'], 最后一个是【APP軟件下載】
93+
"""
94+
if postman is None:
95+
from common import Postmans
96+
postman = Postmans.get_impl_clazz('cffi').create()
97+
98+
resp = postman.get(cls.JM_PUB_URL)
99+
if resp.status_code != 200:
100+
raise AssertionError(resp.text)
101+
102+
from .jm_toolkit import JmcomicText
103+
return JmcomicText.analyse_jm_pub_html(resp.text)
86104

87105
@classmethod
88106
def check_html(cls, html: str, url=None):

src/jmcomic/jm_toolkit.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
class JmcomicText:
77
pattern_jm_domain = compile('https://([\w.-]+)')
88
pattern_jm_pa_id = compile('/(photos?|album)/(\d+)')
9+
pattern_html_jm_pub_domain = compile('[\w-]+\.\w+/?\w+')
910

1011
pattern_html_photo_photo_id = compile('<meta property="og:url" content=".*?/photo/(\d+)/?.*?">')
1112
pattern_html_photo_scramble_id = compile('var scramble_id = (\d+);')
@@ -73,6 +74,15 @@ def parse_to_photo_id(cls, text) -> str:
7374
def parse_to_album_id(cls, text) -> str:
7475
return cls.parse_to_photo_id(text)
7576

77+
@classmethod
78+
def analyse_jm_pub_html(cls, html: str, domain_keyword=('jm', 'comic')) -> List[str]:
79+
domain_ls = cls.pattern_html_jm_pub_domain.findall(html)
80+
81+
return list(filter(
82+
lambda domain: any(kw in domain for kw in domain_keyword),
83+
domain_ls
84+
))
85+
7686
@classmethod
7787
def analyse_jm_photo_html(cls, html: str) -> JmPhotoDetail:
7888
return cls.reflect_new_instance(

tests/test_jmcomic/test_jm_api.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,8 @@ def run(aid):
113113
sorted([ans.sort for ans in photo_ls]),
114114
album.album_id
115115
)
116+
117+
def test_get_jmcomic_url(self):
118+
print(JmModuleConfig.get_jmcomic_url_all())
119+
print(self.client.get_jmcomic_url())
120+
print(self.client.get_jmcomic_url_all())

usage/jmcomic_getting_started.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
--------------------
55
"""
66
import jmcomic # 导入此模块,需要先安装.
7+
78
jmcomic.download_album('422866') # 传入要下载的album的id,即可下载整个album到本地.
89
# 上面的这行代码,还有一个可选参数option: JmOption,表示配置项,
910
# 配置项的作用是告诉程序下载时候的一些选择,
@@ -23,6 +24,22 @@
2324
jmcomic.download_album(('422866', '1', '2', '3')) # tuple
2425
jmcomic.download_album(aid for aid in ('422866', '1', '2', '3')) # 生成器
2526

27+
28+
"""
29+
--------------------
30+
获取域名介绍
31+
--------------------
32+
"""
33+
34+
# 方式1: 访问禁漫发布页
35+
url_ls = jmcomic.JmModuleConfig.get_jmcomic_url_all()
36+
print(url_ls)
37+
38+
# 方式2(可能会报错,需要你自己配置梯子)
39+
url = jmcomic.JmModuleConfig.get_jmcomic_url()
40+
print(url)
41+
42+
2643
"""
2744
--------------------
2845
配置文件介绍
@@ -37,4 +54,4 @@
3754

3855
# 如果你修改了默认配置,现在想用你修改后的配置来下载,使用如下代码
3956
jm_option = jmcomic.create_option('./禁漫下载默认配置.yml')
40-
jmcomic.download_album('23333', jm_option)
57+
jmcomic.download_album('23333', jm_option)

0 commit comments

Comments
 (0)