Skip to content

Commit 6685bd0

Browse files
committed
fix(audiences): correctly filter unread messages by icon class and attributes
1 parent c857ae3 commit 6685bd0

2 files changed

Lines changed: 75 additions & 0 deletions

File tree

app/modules/indexer/parser/nexus_audiences.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,30 @@ def _parse_message_unread(self, html_text):
4242
if html is not None:
4343
del html
4444

45+
def _parse_message_unread_links(self, html_text: str, msg_links: list):
46+
"""
47+
解析 Audiences 未读消息链接。
48+
"""
49+
html = etree.HTML(html_text)
50+
try:
51+
if not StringUtils.is_valid_html_element(html):
52+
return None
53+
54+
message_links = html.xpath(
55+
'//tr[.//img[contains(concat(" ", normalize-space(@class), " "), " unreadpm ") '
56+
'or @alt="Unread" or @title="未读"]]/td/a[contains(@href, "viewmessage")]/@href'
57+
)
58+
msg_links.extend(message_links)
59+
next_page = None
60+
next_page_text = html.xpath('//a[contains(.//text(), "下一页") or contains(.//text(), "下一頁")]/@href')
61+
if next_page_text:
62+
next_page = next_page_text[-1].strip()
63+
finally:
64+
if html is not None:
65+
del html
66+
67+
return next_page
68+
4569
def _parse_user_traffic_info(self, html_text):
4670
"""
4771
解析用户流量信息

tests/test_nexus_audiences_parser.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,26 @@ def test_audiences_table_unread_links_ignore_content_rows():
120120
<td class="rowfollow" nowrap=""><span title="2026-05-07 23:01:58">8天17时前</span></td>
121121
<td class="rowfollow"><input class="checkbox" type="checkbox" name="messages[]" value="4318000"></td>
122122
</tr>
123+
<tr>
124+
<td class="rowfollow" align="center">
125+
<img class="readpm" src="pic/trans.gif" title="已读">
126+
</td>
127+
<td class="rowfollow" align="left">
128+
<a href="messages.php?action=viewmessage&amp;id=4317999">无英文 alt 的已读消息</a>
129+
</td>
130+
<td class="rowfollow" align="left">系统</td>
131+
<td class="rowfollow" nowrap=""><span title="2026-05-07 23:01:58">8天17时前</span></td>
132+
<td class="rowfollow"><input class="checkbox" type="checkbox" name="messages[]" value="4317999"></td>
133+
</tr>
134+
<tr>
135+
<td class="rowfollow" align="center"></td>
136+
<td class="rowfollow" align="left">
137+
<a href="messages.php?action=viewmessage&amp;id=4317998">无状态图标消息</a>
138+
</td>
139+
<td class="rowfollow" align="left">系统</td>
140+
<td class="rowfollow" nowrap=""><span title="2026-05-07 23:01:58">8天17时前</span></td>
141+
<td class="rowfollow"><input class="checkbox" type="checkbox" name="messages[]" value="4317998"></td>
142+
</tr>
123143
</table>
124144
</body>
125145
</html>
@@ -130,3 +150,34 @@ def test_audiences_table_unread_links_ignore_content_rows():
130150

131151
assert msg_links == ["messages.php?action=viewmessage&id=4318225"]
132152
assert next_page is None
153+
154+
155+
def test_audiences_readpm_row_is_not_unread_message():
156+
parser = NexusAudiencesSiteUserInfo(
157+
site_name="Audiences",
158+
url="https://audiences.me/",
159+
site_cookie="",
160+
apikey=None,
161+
token=None,
162+
)
163+
html_text = """
164+
<html>
165+
<body>
166+
<table>
167+
<tr>
168+
<td class="rowfollow" align="center">
169+
<img class="readpm" src="pic/trans.gif" alt="Read" title="已读">
170+
</td>
171+
<td class="rowfollow" align="left">
172+
<a href="messages.php?action=viewmessage&amp;id=4318000">已读消息</a>
173+
</td>
174+
</tr>
175+
</table>
176+
</body>
177+
</html>
178+
"""
179+
msg_links = []
180+
181+
parser._parse_message_unread_links(html_text, msg_links)
182+
183+
assert msg_links == []

0 commit comments

Comments
 (0)