Skip to content

Commit 476efc6

Browse files
authored
v2.1.11: 更新headers,修复对title过长的本子的解析 (#93)
1 parent 8df766e commit 476efc6

File tree

4 files changed

+20
-13
lines changed

4 files changed

+20
-13
lines changed

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.1.10'
5+
__version__ = '2.1.11'
66

77
from .api import *

src/jmcomic/jm_config.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ def headers(cls, domain='18comic.vip'):
9696
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
9797
'application/signed-exchange;v=b3;q=0.7',
9898
'accept-language': 'zh-CN,zh;q=0.9',
99-
'cache-control': 'no-cache',
10099
'referer': f'https://{domain}',
101-
'pragma': 'no-cache',
102100
'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"',
103101
'sec-ch-ua-mobile': '?0',
104102
'sec-ch-ua-platform': '"Windows"',

src/jmcomic/jm_entity.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,21 @@
44

55

66
class JmBaseEntity:
7-
pass
7+
8+
@staticmethod
9+
def fix_title(title: str, limit=50):
10+
"""
11+
一些过长的标题可能含有 \n,例如album: 360537
12+
该方法会把 \n 去除
13+
"""
14+
if len(title) > limit and '\n' in title:
15+
title = title.replace('\n', '')
16+
17+
return title.strip()
18+
19+
def save_to_file(self, filepath):
20+
from common import PackerUtil
21+
PackerUtil.pack(self, filepath)
822

923

1024
class DetailEntity(JmBaseEntity, IterableEntity):
@@ -17,10 +31,6 @@ def id(self) -> str:
1731
def name(self) -> str:
1832
return getattr(self, 'title')
1933

20-
def save_to_file(self, filepath):
21-
from common import PackerUtil
22-
PackerUtil.pack(self, filepath)
23-
2434
@classmethod
2535
def __jm_type__(cls):
2636
# "JmAlbumDetail" -> "album" (本子)
@@ -130,7 +140,7 @@ def __init__(self,
130140
):
131141
self.photo_id: str = photo_id
132142
self.scramble_id: str = scramble_id
133-
self.title: str = str(title).strip()
143+
self.title: str = self.fix_title(str(title))
134144
self.sort: int = int(sort)
135145
self._keywords: str = keywords
136146
self._series_id: int = int(series_id)

src/jmcomic/jm_toolkit.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class JmcomicText:
99

1010
pattern_html_photo_photo_id = compile('<meta property="og:url" content=".*?/photo/(\d+)/?.*?">')
1111
pattern_html_photo_scramble_id = compile('var scramble_id = (\d+);')
12-
pattern_html_photo_title = compile('<title>(.*?)\|.*</title>')
12+
pattern_html_photo_title = compile('<title>([\s\S]*?)\|.*</title>')
1313
# pattern_html_photo_data_original_list = compile('data-original="(.*?)" id="album_photo_.+?"')
1414
pattern_html_photo_data_original_domain = compile('src="https://(.*?)/media/albums/blank')
1515
pattern_html_photo_data_original_0 = compile('data-original="(.*?)"[ \n]*?id="album_photo')
@@ -20,9 +20,9 @@ class JmcomicText:
2020

2121
pattern_html_album_album_id = compile('<span class="number">.*?:JM(\d+)</span>')
2222
pattern_html_album_scramble_id = compile('var scramble_id = (\d+);')
23-
pattern_html_album_title = compile('panel-heading[\s\S]*?<h1>(.*?)</h1>')
23+
pattern_html_album_title = compile('<h1 class="book-name" id="book-name">([\s\S]*?)</h1>')
2424
pattern_html_album_episode_list = compile('data-album="(\d+)">\n *?<li.*?>\n *'
25-
'第(\d+)話\n(.*)\n *'
25+
'第(\d+)話\n([\s\S]*?)\n *'
2626
'<[\s\S]*?>(\d+-\d+-\d+).*?')
2727
pattern_html_album_page_count = compile('<span class="pagecount">.*?:(\d+)</span>')
2828
pattern_html_album_pub_date = compile('>上架日期 : (.*?)</span>')
@@ -135,7 +135,6 @@ def match_field(field_key: str, pattern: Union[Pattern, List[Pattern]], text):
135135

136136
field_dict = {}
137137
pattern_name: str
138-
139138
for pattern_name, pattern_value in cls.__dict__.items():
140139
if not pattern_name.startswith(cls_field_prefix):
141140
continue

0 commit comments

Comments
 (0)