|
1 | | -#!/usr/bin/env python3 |
2 | | -# mirror_clean_sort.py |
3 | | -# Скачивание источников + очистка + сортировка по протоколам |
4 | | -# Требует: requests |
5 | | -# ENV: нет |
6 | | - |
7 | 1 | import os |
8 | | -import socket |
9 | | -import urllib.parse |
10 | | -import urllib3 |
| 2 | +import re |
11 | 3 | import requests |
12 | | -import time |
13 | | -from requests.adapters import HTTPAdapter |
14 | | -from urllib3.util.retry import Retry |
15 | | - |
16 | | -# -------------------- Настройки -------------------- |
17 | | -LOCAL_DIR = "getmirror" |
18 | | -NEW_DIR = os.path.join(LOCAL_DIR, "new") |
19 | | -CLEAN_DIR = os.path.join(LOCAL_DIR, "clean") |
20 | | - |
21 | | -os.makedirs(LOCAL_DIR, exist_ok=True) |
22 | | -os.makedirs(NEW_DIR, exist_ok=True) |
23 | | -os.makedirs(CLEAN_DIR, exist_ok=True) |
24 | | - |
25 | | -TIMEOUT = 12 |
26 | | -RETRIES = 2 |
27 | | -REQUESTS_POOL = 10 |
28 | | - |
29 | | -CHROME_UA = ( |
30 | | - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " |
31 | | - "AppleWebKit/537.36 (KHTML, like Gecko) " |
32 | | - "Chrome/138.0.0.0 Safari/537.36" |
33 | | -) |
34 | | - |
35 | | -URLS = [ |
| 4 | +from datetime import datetime |
| 5 | + |
| 6 | +# ================= НАСТРОЙКИ ================= |
| 7 | + |
| 8 | +BASE_DIR = "getmirror" |
| 9 | + |
| 10 | +SOURCES_DIR = os.path.join(BASE_DIR, "sources") |
| 11 | +NEW_DIR = os.path.join(BASE_DIR, "new") |
| 12 | +CLEAN_DIR = os.path.join(BASE_DIR, "clean") |
| 13 | + |
| 14 | +SOURCES = [ |
36 | 15 | "https://github.com/sakha1370/OpenRay/raw/refs/heads/main/output/all_valid_proxies.txt", |
37 | 16 | "https://raw.githubusercontent.com/sevcator/5ubscrpt10n/main/protocols/vl.txt", |
38 | 17 | "https://raw.githubusercontent.com/yitong2333/proxy-minging/refs/heads/main/v2ray.txt", |
|
69 | 48 | "https://raw.githubusercontent.com/MrMohebi/xray-proxy-grabber-telegram/master/collected-proxies/row-url/all.txt", |
70 | 49 | ] |
71 | 50 |
|
72 | | -SNI_DOMAINS = [ |
73 | | - "vk.com", "yandex.ru", "ozon.ru", "wildberries.ru", |
74 | | - "sberbank.ru", "mail.ru", "ivi.ru", "hh.ru", |
| 51 | +PROTOCOLS = [ |
| 52 | + "vless", "vmess", "trojan", "ss", |
| 53 | + "hysteria", "hysteria2", "hy2", "tuic" |
75 | 54 | ] |
76 | 55 |
|
77 | | -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
78 | | - |
79 | | -# -------------------- HTTP -------------------- |
80 | | -def build_session(): |
81 | | - s = requests.Session() |
82 | | - adapter = HTTPAdapter( |
83 | | - pool_connections=REQUESTS_POOL, |
84 | | - pool_maxsize=REQUESTS_POOL, |
85 | | - max_retries=Retry( |
86 | | - total=RETRIES, |
87 | | - backoff_factor=0.4, |
88 | | - status_forcelist=(429, 500, 502, 503, 504), |
89 | | - allowed_methods=frozenset(["GET"]), |
90 | | - ), |
91 | | - ) |
92 | | - s.mount("https://", adapter) |
93 | | - s.mount("http://", adapter) |
94 | | - s.headers.update({"User-Agent": CHROME_UA}) |
95 | | - return s |
96 | | - |
97 | | -SESSION = build_session() |
98 | | - |
99 | | -def request_with_strategies(url: str) -> str: |
100 | | - parsed = urllib.parse.urlparse(url) |
101 | | - host = parsed.hostname |
102 | | - |
103 | | - try: |
104 | | - r = SESSION.get(url, timeout=TIMEOUT) |
105 | | - r.raise_for_status() |
106 | | - return r.text |
107 | | - except Exception: |
108 | | - pass |
109 | | - |
110 | | - if host: |
111 | | - ip = socket.gethostbyname(host) |
112 | | - path = parsed.path or "/" |
113 | | - if parsed.query: |
114 | | - path += "?" + parsed.query |
115 | | - |
116 | | - r = SESSION.get( |
117 | | - f"https://{ip}{path}", |
118 | | - headers={"Host": host}, |
119 | | - timeout=TIMEOUT, |
120 | | - verify=False, |
121 | | - ) |
122 | | - r.raise_for_status() |
123 | | - return r.text |
124 | | - |
125 | | - raise RuntimeError("All strategies failed") |
126 | | - |
127 | | -# -------------------- Проверка протоколов -------------------- |
128 | | -def is_valid_proxy(line: str) -> bool: |
129 | | - protocols = ['vless://', 'vmess://', 'trojan://', 'ss://', |
130 | | - 'hysteria://', 'hysteria2://', 'hy2://', 'tuic://'] |
131 | | - return any(line.startswith(p) for p in protocols) |
132 | | - |
133 | | -def get_protocol(line: str) -> str: |
134 | | - for p in ['vless', 'vmess', 'trojan', 'ss', 'hysteria', 'hysteria2', 'hy2', 'tuic']: |
135 | | - if line.startswith(p + "://"): |
136 | | - return p |
137 | | - return "other" |
138 | | - |
139 | | -# -------------------- Основной процесс -------------------- |
140 | | -def main(): |
141 | | - # Очистка папок new и clean |
142 | | - for folder in os.listdir(NEW_DIR): |
143 | | - path = os.path.join(NEW_DIR, folder) |
144 | | - if os.path.isfile(path): |
145 | | - os.remove(path) |
146 | | - for folder in os.listdir(CLEAN_DIR): |
147 | | - path = os.path.join(CLEAN_DIR, folder) |
148 | | - if os.path.isfile(path): |
149 | | - os.remove(path) |
150 | | - |
151 | | - # Создание поддиректорий clean по протоколам |
152 | | - protocols = ['vless', 'vmess', 'trojan', 'ss', 'hysteria', 'hysteria2', 'hy2', 'tuic', 'other'] |
153 | | - for p in protocols: |
154 | | - os.makedirs(os.path.join(CLEAN_DIR, p), exist_ok=True) |
155 | | - |
156 | | - # Скачиваем источники |
| 56 | +# ================= ФУНКЦИИ ================= |
| 57 | + |
| 58 | +def mkdirs(): |
| 59 | + for p in [SOURCES_DIR, NEW_DIR, CLEAN_DIR]: |
| 60 | + os.makedirs(p, exist_ok=True) |
| 61 | + |
| 62 | +def download_sources(): |
157 | 63 | print("Скачиваем источники...") |
158 | | - new_files = [] |
159 | | - for idx, url in enumerate(URLS, 1): |
| 64 | + for i, url in enumerate(SOURCES, 1): |
160 | 65 | try: |
161 | | - text = request_with_strategies(url) |
162 | | - new_path = os.path.join(NEW_DIR, f"new{idx}.txt") |
163 | | - with open(new_path, "w", encoding="utf-8") as f: |
164 | | - f.write(text.replace("\r\n", "\n")) |
165 | | - new_files.append(new_path) |
166 | | - print(f"{idx}/{len(URLS)} скачан") |
167 | | - except Exception as e: |
168 | | - print(f"Ошибка {idx}: {e}") |
169 | | - |
170 | | - # Обработка новых файлов и сортировка по протоколам |
171 | | - print("\nСортируем и чистим ключи...") |
172 | | - all_keys = set() |
173 | | - for file in new_files: |
174 | | - with open(file, "r", encoding="utf-8", errors="ignore") as f: |
| 66 | + r = requests.get(url, timeout=20) |
| 67 | + if r.status_code == 200: |
| 68 | + with open(os.path.join(SOURCES_DIR, f"source_{i}.txt"), "w", encoding="utf-8", errors="ignore") as f: |
| 69 | + f.write(r.text) |
| 70 | + print(f"{i}/{len(SOURCES)} OK") |
| 71 | + else: |
| 72 | + print(f"{i}/{len(SOURCES)} пропущен") |
| 73 | + except: |
| 74 | + print(f"{i}/{len(SOURCES)} ошибка") |
| 75 | + |
| 76 | +def extract_keys(): |
| 77 | + all_keys = [] |
| 78 | + |
| 79 | + pattern = re.compile(r'^(vless|vmess|trojan|ss|hysteria2?|hy2|tuic)://.+$', re.IGNORECASE) |
| 80 | + |
| 81 | + for file in os.listdir(SOURCES_DIR): |
| 82 | + path = os.path.join(SOURCES_DIR, file) |
| 83 | + with open(path, encoding="utf-8", errors="ignore") as f: |
175 | 84 | for line in f: |
176 | 85 | line = line.strip() |
177 | | - if not line or not is_valid_proxy(line): |
178 | | - continue |
179 | | - if any(d in line for d in SNI_DOMAINS): |
180 | | - continue |
181 | | - all_keys.add(line) |
182 | | - |
183 | | - # Разделяем по протоколам и записываем |
184 | | - protocol_files = {p: set() for p in protocols} |
185 | | - for key in all_keys: |
186 | | - p = get_protocol(key) |
187 | | - protocol_files[p].add(key) |
188 | | - |
189 | | - for p, keys in protocol_files.items(): |
190 | | - path = os.path.join(CLEAN_DIR, p, f"{p}_clean.txt") |
191 | | - with open(path, "w", encoding="utf-8") as f: |
192 | | - f.write("\n".join(sorted(keys))) |
193 | | - print(f"{p}: {len(keys)} ключей") |
194 | | - |
195 | | - print("\nГотово! Новые ключи в папке 'new', чистые по протоколам в 'clean'.") |
| 86 | + if pattern.match(line): |
| 87 | + all_keys.append(line) |
| 88 | + |
| 89 | + return all_keys |
| 90 | + |
| 91 | +def save_new(keys): |
| 92 | + ts = datetime.now().strftime("%Y%m%d_%H%M%S") |
| 93 | + path = os.path.join(NEW_DIR, f"NEW_{ts}.txt") |
| 94 | + with open(path, "w", encoding="utf-8") as f: |
| 95 | + for k in keys: |
| 96 | + f.write(k + "\n") |
| 97 | + |
| 98 | +def clean_and_split(keys): |
| 99 | + unique = set(keys) |
| 100 | + |
| 101 | + buckets = {p: [] for p in PROTOCOLS} |
| 102 | + |
| 103 | + for k in unique: |
| 104 | + for p in PROTOCOLS: |
| 105 | + if k.lower().startswith(p + "://"): |
| 106 | + buckets[p].append(k) |
| 107 | + break |
| 108 | + |
| 109 | + for proto, items in buckets.items(): |
| 110 | + if items: |
| 111 | + with open(os.path.join(CLEAN_DIR, f"{proto}.txt"), "w", encoding="utf-8") as f: |
| 112 | + for i in items: |
| 113 | + f.write(i + "\n") |
| 114 | + |
| 115 | + print("\nСортировка завершена:") |
| 116 | + for proto in PROTOCOLS: |
| 117 | + print(f"{proto}: {len(buckets[proto])}") |
| 118 | + |
| 119 | +# ================= ЗАПУСК ================= |
196 | 120 |
|
197 | 121 | if __name__ == "__main__": |
198 | | - main() |
| 122 | + mkdirs() |
| 123 | + download_sources() |
| 124 | + keys = extract_keys() |
| 125 | + save_new(keys) |
| 126 | + clean_and_split(keys) |
| 127 | + print("\nГотово. Структура getmirror приведена в порядок.") |
199 | 128 |
|
200 | 129 |
|
201 | 130 |
|
|
0 commit comments