|
2 | 2 | import hashlib |
3 | 3 | from urllib.parse import quote |
4 | 4 | import logging |
| 5 | +import re |
5 | 6 |
|
6 | 7 | import bencodepy |
7 | 8 |
|
@@ -119,3 +120,103 @@ async def get_torrent_hashes(torrent_file): |
119 | 120 | except Exception as e: |
120 | 121 | logger.error(f"get_torrent_hashes: Error processing torrent file: {e}") |
121 | 122 | return {"v1": ""} |
| 123 | + |
| 124 | + |
| 125 | +def base32_to_hex(base32_hash: str) -> str: |
| 126 | + """将Base32格式的hash转换为十六进制 |
| 127 | + |
| 128 | + Args: |
| 129 | + base32_hash: 32字符的Base32编码hash |
| 130 | + |
| 131 | + Returns: |
| 132 | + 40字符的小写十六进制hash,转换失败返回空字符串 |
| 133 | + """ |
| 134 | + try: |
| 135 | + # Base32解码 |
| 136 | + decoded = base64.b32decode(base32_hash) |
| 137 | + return decoded.hex().lower() |
| 138 | + except Exception as e: |
| 139 | + logger.warning(f"Base32转换失败: {base32_hash}, error: {e}") |
| 140 | + return "" |
| 141 | + |
| 142 | + |
| 143 | +def hex_to_base32(hex_hash: str) -> str: |
| 144 | + """将十六进制hash转换为Base32格式 |
| 145 | + |
| 146 | + Args: |
| 147 | + hex_hash: 40字符的十六进制hash |
| 148 | + |
| 149 | + Returns: |
| 150 | + 32字符的Base32编码hash,转换失败返回空字符串 |
| 151 | + """ |
| 152 | + try: |
| 153 | + # 十六进制转字节 |
| 154 | + hex_bytes = bytes.fromhex(hex_hash) |
| 155 | + # 编码为Base32,去掉填充符 |
| 156 | + return base64.b32encode(hex_bytes).decode().rstrip('=') |
| 157 | + except Exception as e: |
| 158 | + logger.warning(f"十六进制转Base32失败: {hex_hash}, error: {e}") |
| 159 | + return "" |
| 160 | + |
| 161 | + |
| 162 | +def normalize_hash(hash_value: str) -> str: |
| 163 | + """标准化hash格式,统一转换为40字符小写十六进制 |
| 164 | + |
| 165 | + Args: |
| 166 | + hash_value: 输入的hash值,可能是40字符十六进制或32字符Base32 |
| 167 | + |
| 168 | + Returns: |
| 169 | + 标准化的40字符小写十六进制hash |
| 170 | + """ |
| 171 | + if not hash_value: |
| 172 | + return "" |
| 173 | + |
| 174 | + hash_value = hash_value.strip() |
| 175 | + |
| 176 | + # 40字符十六进制格式 |
| 177 | + if len(hash_value) == 40 and re.match(r'^[a-fA-F0-9]{40}$', hash_value): |
| 178 | + return hash_value.lower() |
| 179 | + |
| 180 | + # 32字符Base32格式 (DMHY等站点常用) |
| 181 | + if len(hash_value) == 32 and re.match(r'^[A-Z0-9]{32}$', hash_value): |
| 182 | + hex_hash = base32_to_hex(hash_value) |
| 183 | + if hex_hash: |
| 184 | + logger.debug(f"将Base32 hash {hash_value} 转换为十六进制: {hex_hash}") |
| 185 | + return hex_hash |
| 186 | + |
| 187 | + # 其他情况,尝试转小写 |
| 188 | + normalized = hash_value.lower() |
| 189 | + logger.debug(f"hash标准化: {hash_value} -> {normalized}") |
| 190 | + return normalized |
| 191 | + |
| 192 | + |
| 193 | +def get_hash(torrent_url: str) -> str | None: |
| 194 | + """从torrent URL或magnet链接中提取hash |
| 195 | + |
| 196 | + Args: |
| 197 | + torrent_url: torrent文件URL或magnet链接 |
| 198 | + |
| 199 | + Returns: |
| 200 | + 提取的hash值,提取失败返回空字符串 |
| 201 | + """ |
| 202 | + hash_pattern_dict = { |
| 203 | + "magnet_hash_pattern": re.compile(r"\b([a-fA-F0-9]{40})\b"), |
| 204 | + "torrent_hash_pattern": re.compile(r"/([a-fA-F0-9]{7,40})\.torrent"), |
| 205 | + "dmhy_hash_pattern": re.compile(r"urn:btih:([A-Z0-9]{32})"), |
| 206 | + } |
| 207 | + |
| 208 | + for pattern_name, hash_pattern in hash_pattern_dict.items(): |
| 209 | + ans = re.search(hash_pattern, torrent_url) |
| 210 | + if ans: |
| 211 | + extracted_hash = ans[1] |
| 212 | + logger.debug(f"使用{pattern_name}提取hash: {extracted_hash}") |
| 213 | + |
| 214 | + # 使用normalize_hash标准化hash格式 |
| 215 | + normalized_hash = normalize_hash(extracted_hash) |
| 216 | + if normalized_hash != extracted_hash: |
| 217 | + logger.debug(f"hash已标准化: {extracted_hash} -> {normalized_hash}") |
| 218 | + |
| 219 | + return normalized_hash |
| 220 | + |
| 221 | + logger.warning(f"[Utils] Cannot find hash in {torrent_url}") |
| 222 | + return "" |
0 commit comments