wechat-decrypt/voice_to_mp3.py at main · ylytdeng/wechat-decrypt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""从 media_0.db 提取所有语音数据，按用户名分目录，SILK_V3 转 MP3"""
import sqlite3
import subprocess
import tempfile
import os
import sys
from datetime import datetime

if sys.platform == "win32" and hasattr(sys.stdout, "reconfigure"):
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")

try:
    import pilk
except ImportError:
    print("[ERROR] 缺少 pilk 库 (SILK 解码必需)", file=sys.stderr)
    print("        请运行: pip install pilk", file=sys.stderr)
    print("        然后重新启动本任务", file=sys.stderr)
    sys.exit(1)

import shutil as _shutil
if not _shutil.which("ffmpeg"):
    print("[ERROR] ffmpeg 不在 PATH 中 (MP3 编码必需)", file=sys.stderr)
    print("        Windows: https://ffmpeg.org/download.html 下载后加入 PATH", file=sys.stderr)
    print("        macOS:   brew install ffmpeg", file=sys.stderr)
    print("        Linux:   apt install ffmpeg / yum install ffmpeg", file=sys.stderr)
    sys.exit(1)

from config import load_config

_cfg = load_config()
DB_PATH = os.path.join(_cfg["decrypted_dir"], "message", "media_0.db")
CONTACT_DB_PATH = os.path.join(_cfg["decrypted_dir"], "contact", "contact.db")
OUTPUT_DIR = _cfg["output_base_dir"]

_CONTACT_FILTER = None
_filter_raw = os.environ.get("WECHAT_EXPORT_CONTACTS", "").strip()
if _filter_raw:
    _CONTACT_FILTER = set(_filter_raw.split(","))
    print(f"联系人筛选: {len(_CONTACT_FILTER)} 个")

def silk_to_mp3(voice_data, output_path):
    """将微信 SILK 语音数据转换为 MP3"""
    # 去掉微信格式的 0x02 前缀
    if voice_data[0:1] == b'\x02':
        silk_data = voice_data[1:]
    else:
        silk_data = voice_data

    if not silk_data.startswith(b'#!SILK_V3'):
        print(f"  警告：数据不以 #!SILK_V3 开头，跳过")
        return False

    # 补上结尾标记
    if not silk_data.endswith(b'\xff\xff'):
        silk_data += b'\xff\xff'

    silk_file = tempfile.mktemp(suffix=".silk")
    pcm_file = tempfile.mktemp(suffix=".pcm")
    try:
        with open(silk_file, "wb") as f:
            f.write(silk_data)

        pilk.decode(silk_file, pcm_file)

        result = subprocess.run([
            "ffmpeg", "-y", "-f", "s16le", "-ar", "24000", "-ac", "1",
            "-i", pcm_file, output_path
        ], capture_output=True, encoding="utf-8", errors="replace")
        return result.returncode == 0
    finally:
        if os.path.exists(silk_file):
            os.remove(silk_file)
        if os.path.exists(pcm_file):
            os.remove(pcm_file)

# 1. 读取 Name2Id 映射 (rowid -> user_name)
conn = sqlite3.connect(DB_PATH)
name_map = {}
for rowid, user_name in conn.execute("SELECT rowid, user_name FROM Name2Id"):
    name_map[rowid] = user_name
print(f"共 {len(name_map)} 个用户")

# 2. 读取 contact 信息 (user_name -> {remark, nick_name, alias, ...})
contact_map = {}
try:
    cconn = sqlite3.connect(CONTACT_DB_PATH)
    for row in cconn.execute("SELECT username, alias, remark, nick_name FROM contact"):
        uname, alias, remark, nick_name = row
        contact_map[uname] = {"username": uname, "alias": alias or "", "remark": remark or "", "nick_name": nick_name or ""}
    cconn.close()
    print(f"联系人数据库加载: {len(contact_map)} 条")
except Exception as e:
    print(f"联系人数据库读取失败: {e}")

def display_name(user_name):
    """优先 remark > nick_name > user_name"""
    info = contact_map.get(user_name, {})
    return info.get("remark") or info.get("nick_name") or user_name

def safe_dirname(name):
    """替换目录名中的非法字符"""
    for ch in r'\/:*?"<>|':
        name = name.replace(ch, "_")
    return name.strip() or "unknown"

# 2. 查询所有语音，按 chat_name_id 关联用户名
rows = conn.execute("SELECT chat_name_id, create_time, local_id, voice_data FROM VoiceInfo ORDER BY chat_name_id, create_time").fetchall()
conn.close()
print(f"共 {len(rows)} 条语音")

# 3. 遍历转换
success = 0
fail = 0
for chat_name_id, create_time, local_id, voice_data in rows:
    user_name = name_map.get(chat_name_id, f"unknown_{chat_name_id}")
    if _CONTACT_FILTER and user_name not in _CONTACT_FILTER:
        continue
    dname = safe_dirname(display_name(user_name))
    dt = datetime.fromtimestamp(create_time)
    filename = dt.strftime("%Y%m%d_%H%M%S") + f"_{local_id}.mp3"

    user_dir = os.path.join(OUTPUT_DIR, dname, "voice")
    os.makedirs(user_dir, exist_ok=True)

    # 写入 .info 文件（只写一次，写到联系人根目录）
    info_path = os.path.join(OUTPUT_DIR, dname, ".info")
    if not os.path.exists(info_path):
        info = contact_map.get(user_name, {"username": user_name, "alias": "", "remark": "", "nick_name": ""})
        with open(info_path, "w", encoding="utf-8") as f:
            f.write(f"username:  {info['username']}\n")
            f.write(f"alias:     {info['alias']}\n")
            f.write(f"nick_name: {info['nick_name']}\n")
            f.write(f"remark:    {info['remark']}\n")

    output_path = os.path.join(user_dir, filename)
    if os.path.exists(output_path):
        success += 1
        continue

    ok = silk_to_mp3(voice_data, output_path)
    if ok:
        success += 1
        print(f"  [{success}/{len(rows)}] {dname}/{filename}")
    else:
        fail += 1
        print(f"  失败: {dname}/{filename}")

print(f"\n完成: 成功 {success}, 失败 {fail}")