Skip to content

Commit 2859419

Browse files
committed
Added setting frontend. Prepare for v0.2.0 release.
1 parent 53bd853 commit 2859419

16 files changed

Lines changed: 1087 additions & 192 deletions

README.MD

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,33 +44,27 @@ Lanlan(兰兰)是一个新手友好、开箱即用的,具有听觉、视
4444

4545
1. **体验桌宠模式**。如果网页版可以正常使用,可以考虑继续通过`启动App版.bat`实现桌面穿透。注意,**请不要同时使用网页版和App版。请确认exe文件没有被系统或杀毒软件隔离。** *使用结束后请在桌面右下角找到小八图标,右键退出。*
4646

47-
> *对于开发者,请在克隆本项目后,(1)新建pyhon3.12环境。(2)执行`pip install -r requirements.txt`安装依赖。(3)复制`config/api_template.py``config/api.py`并进行必要配置。(4)执行`python memory_server.py``python main_server.py`。(5)通过main server中指定的端口(默认为`localhost:48911`)访问网页版。*
47+
> *对于开发者,请在克隆本项目后,(1)新建pyhon3.12环境。(2)执行`pip install -r requirements.txt`安装依赖。(3)复制`config/api_template.py``config/api.py`并进行必要配置。(4)执行`python memory_server.py``python main_server.py`。(5)通过main server中指定的端口(默认为`http://localhost:48911`)访问网页版。*
4848
4949
# 进阶内容
5050

51-
## A. 修改人设
51+
## 修改人设
5252

53-
基本人设位于`config/__init__.py`内,请用文本编辑器打开。请将`MASTER_NAME`修改为自己的名字,`her_name`修改为~~猫娘~~伙伴的名字 *(注意,这只是一个临时措施,本项目支持多个角色并发)*`master_basic_config``lanlan_basic_config`中以json格式填入基本信息。如果对json格式有疑问,请查询豆包等AI工具
53+
- 网页版访问`http://localhost:48911/chara_manager`即可进入人设编辑页面。初始 ~~猫娘~~ 伙伴的预设名称为`test`,建议直接修改名字,并一项一项添加或修改基础人设,但尽量控制数量
5454

55-
进阶人设位于`config/prompts_chara.py`内,请谨慎修改。冗长的人设会降低系统的运行效率和稳定性。开发者由衷希望对猫娘进行设定时请遵循奥卡姆剃刀原则,"如无必要,勿增设定"
55+
- 进阶人设主要包括**Live2D模型设置(live2d)****声音设置(voice_id)**。如果你想要更改**Live2D模型**,请先将模型目录复制到本项目中的`static`文件夹下。从进阶设置中可以进入Live2D模型管理界面,可以更换模型,并通过拖拽和鼠标滚轮调整模型的位置和大小。如果你想要更改**角色声音**,请准备一段15秒左右的连贯、干净的语音录音。通过进阶设置进入语音设置页面,上传录音即可完成自定义语音
5656

57-
## B. 修改Live2D模型
57+
- 进阶人设中还有一个`system_prompt`,可以对系统指令进行完全自定义,但不建议修改。
5858

59-
Live2D模型的路径参数目前硬编码在了`main_server.py`里的`"model_path"`部分,可以自行修改(有两个,先改第一个)。更换Live2D模型后如果想要调整大小和位置,还需要修改`templates/index.html`里的`model.scale``model.anchor`参数。表情控制目前还没有准备好release,待UI完善后才会考虑正式支持自定义Live2D模型。
60-
61-
## C. 修改声音
62-
63-
本项目已经内置了基于CosyVoice API的语音克隆功能,代码中已经包含该功能并且已经通过测试。请根据阿里百炼大模型平台[官网的教程](https://help.aliyun.com/zh/model-studio/cosyvoice-clone-api),进行语音克隆。克隆后,将`VOICE_ID`填写在`config/api.py`中,并将`USE_TTS`设置为`True`
64-
65-
## D. 参与开发
59+
## 参与开发
6660

6761
本项目环境依赖非常简单,请在`python3.12`环境中执行`pip install -r requirements.txt`即可。请注意将`config/api_template.py`复制为`config/api.py`.开发者建议加入企鹅群1048307485,猫娘名称见项目标题。
6862

6963
# TODO List(开发计划)
7064

7165
## A. 高优先级
7266

73-
1. 添加前端UI用于人设管理(L2D模型/语音/性格等)、记忆管理(记忆检索和纠错)。
67+
1. 添加前端UI用于记忆管理(记忆检索和纠错)。
7468

7569
1. 支持L2D表情控制和动作控制。
7670

config/__init__.py

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,46 @@
11
from config.api import *
22
from config.prompts_chara import *
3+
import json
4+
import os
5+
6+
# 读取角色配置
7+
CHARACTER_JSON_PATH = os.path.join(os.path.dirname(__file__), 'characters.json')
8+
# 默认值
9+
_default_master = {"档案名": "哥哥", "性别": "男", "昵称": "哥哥"}
10+
_default_lanlan = {"test": {"性别": "女", "年龄": 15, "昵称": "T酱, 小T", "live2d": "mao_pro", "voice_id": "", "system_prompt": lanlan_prompt}}
11+
12+
def get_character_data():
13+
try:
14+
with open(CHARACTER_JSON_PATH, 'r', encoding='utf-8') as f:
15+
character_data = json.load(f)
16+
except FileNotFoundError:
17+
print(f"⚠️ 未找到猫娘配置文件: {CHARACTER_JSON_PATH},请检查文件是否存在。使用默认人设。")
18+
character_data = {"主人": _default_master, "猫娘": _default_lanlan}
19+
except Exception as e:
20+
print(f"💥 读取猫娘配置文件出错: {e},使用默认人设。")
21+
character_data = {"主人": _default_master, "猫娘": _default_lanlan}
22+
23+
# MASTER_NAME 必须始终存在,取档案名
24+
MASTER_NAME = character_data.get('主人', {}).get('档案名', _default_master['档案名'])
25+
# 获取所有猫娘名
26+
catgirl_names = list(character_data['猫娘'].keys()) if character_data['猫娘'] and len(character_data['猫娘']) > 0 else list(_default_lanlan.keys())
27+
her_name = catgirl_names[0] if catgirl_names else ''
28+
master_basic_config = character_data.get('主人', _default_master)
29+
lanlan_basic_config = character_data['猫娘'] if catgirl_names else _default_lanlan
30+
31+
NAME_MAPPING = {'human': MASTER_NAME, 'system': "SYSTEM_MESSAGE"}
32+
# 生成以猫娘名为key的各类store
33+
LANLAN_PROMPT = {name: character_data['猫娘'][name].get('system_prompt', lanlan_prompt) for name in catgirl_names}
34+
SEMANTIC_STORE = {name: f'memory/store/semantic_memory_{name}' for name in catgirl_names}
35+
TIME_STORE = {name: f'memory/store/time_indexed_{name}' for name in catgirl_names}
36+
SETTING_STORE = {name: f'memory/store/settings_{name}.json' for name in catgirl_names}
37+
RECENT_LOG = {name: f'memory/store/recent_{name}.json' for name in catgirl_names}
38+
39+
return MASTER_NAME, her_name, master_basic_config, lanlan_basic_config, NAME_MAPPING, LANLAN_PROMPT, SEMANTIC_STORE, TIME_STORE, SETTING_STORE, RECENT_LOG
340

441
TIME_ORIGINAL_TABLE_NAME = "time_indexed_original"
542
TIME_COMPRESSED_TABLE_NAME = "time_indexed_compressed"
643

7-
8-
'''
9-
↓↓↓ 核心人设在这里 ↓↓↓
10-
'''
11-
MASTER_NAME = '哥哥'
12-
her_name = "test"
13-
master_basic_config = {'性别': '男', '昵称': MASTER_NAME}
14-
15-
lanlan_basic_config = {her_name: {'性别': '女',
16-
'年龄': 15,
17-
'昵称': ["T酱", "小T"],
18-
}}
19-
'''
20-
↑↑↑ 核心人设在这里 ↑↑↑
21-
'''
22-
23-
24-
"""
25-
本项目支持多个角色,但是为了方便新手用户进行配置,临时增加了一个her_name变量来帮助批量设置初始角色的信息。
26-
请将her_name后的字符串修改为角色名称。
27-
"""
28-
NAME_MAPPING = {'human': MASTER_NAME, 'system': "SYSTEM_MESSAGE"}
29-
LANLAN_PROMPT = {her_name: lanlan_prompt}
30-
SEMANTIC_STORE = {her_name: f'memory/store/semantic_memory_{her_name}'}
31-
TIME_STORE = {her_name: f'memory/store/time_indexed_{her_name}'}
32-
SETTING_STORE = {her_name: f'memory/store/settings_{her_name}.json'}
33-
RECENT_LOG = {her_name: f'memory/store/recent_{her_name}.json'}
34-
35-
36-
import json
3744
try:
3845
with open('core_config.txt', 'r') as f:
3946
core_cfg = json.load(f)

config/prompts_sys.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from config import MASTER_NAME
1+
from config import get_character_data
2+
MASTER_NAME, _, _, _, _, _, _, _, _, _ = get_character_data()
23

34
gpt4_1_system = """## PERSISTENCE
45
You are an agent - please keep going until the user's query is completely

main_helper/core.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import base64
2323
from io import BytesIO
2424
from PIL import Image
25-
from config import MASTER_NAME, MEMORY_SERVER_PORT, CORE_API_KEY, CORE_URL, CORE_MODEL, USE_TTS
25+
from config import get_character_data, CORE_URL, CORE_MODEL, CORE_API_KEY, MEMORY_SERVER_PORT, AUDIO_API_KEY
2626
from multiprocessing import Process, Queue as MPQueue
2727
from uuid import uuid4
2828
import numpy as np
@@ -62,7 +62,27 @@ def __init__(self, sync_message_queue, lanlan_name, lanlan_prompt):
6262

6363
self.lanlan_prompt = lanlan_prompt
6464
self.lanlan_name = lanlan_name
65-
self.MODEL = CORE_MODEL
65+
# 获取角色相关配置
66+
(
67+
self.master_name,
68+
self.her_name,
69+
self.master_basic_config,
70+
self.lanlan_basic_config,
71+
self.name_mapping,
72+
self.lanlan_prompt_map,
73+
self.semantic_store,
74+
self.time_store,
75+
self.setting_store,
76+
self.recent_log
77+
) = get_character_data()
78+
# 获取API相关配置
79+
self.model = CORE_MODEL
80+
self.core_url = CORE_URL
81+
self.core_api_key = CORE_API_KEY
82+
self.memory_server_port = MEMORY_SERVER_PORT
83+
self.audio_api_key = AUDIO_API_KEY
84+
self.voice_id = self.lanlan_basic_config[self.lanlan_name].get('voice_id', '')
85+
self.use_tts = False if not self.voice_id else True
6686
self.generation_config = {} # Qwen暂时不用
6787
self.message_cache_for_new_session = []
6888
self.is_preparing_new_session = False
@@ -75,9 +95,6 @@ def __init__(self, sync_message_queue, lanlan_name, lanlan_prompt):
7595
self.pending_session = None
7696
self.is_hot_swap_imminent = False
7797
self.tts_handler_task = None
78-
self.use_tts = USE_TTS
79-
# 将TTS相关的导入移到外部,确保始终可用
80-
8198
# 热切换相关变量
8299
self.background_preparation_task = None
83100
self.final_swap_task = None
@@ -86,9 +103,9 @@ def __init__(self, sync_message_queue, lanlan_name, lanlan_prompt):
86103

87104
# 注册回调
88105
self.session = OmniRealtimeClient(
89-
base_url=CORE_URL,
90-
api_key=CORE_API_KEY,
91-
model=self.MODEL,
106+
base_url=self.core_url,
107+
api_key=self.core_api_key,
108+
model=self.model,
92109
voice="Chelsie",
93110
on_text_delta=self.handle_text_data,
94111
on_audio_delta=self.handle_audio_data,
@@ -188,8 +205,8 @@ async def handle_input_transcript(self, transcript: str):
188205
if not hasattr(self, 'message_cache_for_new_session'):
189206
self.message_cache_for_new_session = []
190207
if len(self.message_cache_for_new_session) == 0 or self.message_cache_for_new_session[-1]['role'] == self.lanlan_name:
191-
self.message_cache_for_new_session.append({"role": MASTER_NAME, "text": transcript.strip()})
192-
elif self.message_cache_for_new_session[-1]['role'] == MASTER_NAME:
208+
self.message_cache_for_new_session.append({"role": self.master_name, "text": transcript.strip()})
209+
elif self.message_cache_for_new_session[-1]['role'] == self.master_name:
193210
self.message_cache_for_new_session[-1]['text'] += transcript.strip()
194211
# 可选:推送用户活动
195212
with self.lock:
@@ -215,7 +232,7 @@ async def send_lanlan_response(self, text: str, is_first_chunk: bool = False):
215232
if hasattr(self, 'is_preparing_new_session') and self.is_preparing_new_session:
216233
if not hasattr(self, 'message_cache_for_new_session'):
217234
self.message_cache_for_new_session = []
218-
if len(self.message_cache_for_new_session) == 0 or self.message_cache_for_new_session[-1]['role']==MASTER_NAME:
235+
if len(self.message_cache_for_new_session) == 0 or self.message_cache_for_new_session[-1]['role']==self.master_name:
219236
self.message_cache_for_new_session.append(
220237
{"role": self.lanlan_name, "text": text})
221238
elif self.message_cache_for_new_session[-1]['role'] == self.lanlan_name:
@@ -289,10 +306,9 @@ async def start_session(self, websocket: WebSocket, new=False):
289306
if self.use_tts:
290307
# 启动TTS子进程
291308
if self.tts_process is None or not self.tts_process.is_alive():
292-
from config import AUDIO_API_KEY, VOICE_ID
293309
self.tts_process = Process(
294310
target=speech_synthesis_worker,
295-
args=(self.tts_request_queue, self.tts_response_queue, AUDIO_API_KEY, VOICE_ID)
311+
args=(self.tts_request_queue, self.tts_response_queue, self.audio_api_key, self.voice_id)
296312
)
297313
self.tts_process.daemon = True
298314
self.tts_process.start()
@@ -309,9 +325,9 @@ async def start_session(self, websocket: WebSocket, new=False):
309325
try:
310326
# 获取初始 prompt
311327
initial_prompt = self.lanlan_prompt
312-
initial_prompt += requests.get(f"http://localhost:{MEMORY_SERVER_PORT}/new_dialog/{self.lanlan_name}").text
313-
logger.info("====Initial Prompt=====")
314-
logger.info(initial_prompt)
328+
initial_prompt += requests.get(f"http://localhost:{self.memory_server_port}/new_dialog/{self.lanlan_name}").text
329+
# logger.info("====Initial Prompt=====")
330+
# logger.info(initial_prompt)
315331

316332
# 标记 session 激活
317333
if self.session:
@@ -361,9 +377,9 @@ async def _background_prepare_pending_session(self):
361377
try:
362378
# 创建新的pending session
363379
self.pending_session = OmniRealtimeClient(
364-
base_url=CORE_URL,
365-
api_key=CORE_API_KEY,
366-
model=self.MODEL,
380+
base_url=self.core_url,
381+
api_key=self.core_api_key,
382+
model=self.model,
367383
voice="Chelsie",
368384
on_text_delta=self.handle_text_data,
369385
on_audio_delta=self.handle_audio_data,
@@ -377,7 +393,7 @@ async def _background_prepare_pending_session(self):
377393
initial_prompt = self.lanlan_prompt
378394
self.initial_cache_snapshot_len = len(self.message_cache_for_new_session)
379395
async with httpx.AsyncClient() as client:
380-
resp = await client.get(f"http://localhost:{MEMORY_SERVER_PORT}/new_dialog/{self.lanlan_name}")
396+
resp = await client.get(f"http://localhost:{self.memory_server_port}/new_dialog/{self.lanlan_name}")
381397
initial_prompt += resp.text + self._convert_cache_to_str(self.message_cache_for_new_session)
382398
# print(initial_prompt)
383399
await self.pending_session.connect(initial_prompt, native_audio = not self.use_tts)
@@ -415,7 +431,7 @@ async def _perform_final_swap_sequence(self):
415431
# 1. Send incremental cache (or a heartbeat) to PENDING session for its *second* ignored response
416432
if incremental_cache:
417433
final_prime_text = f"SYSTEM_MESSAGE | " + self._convert_cache_to_str(incremental_cache) + \
418-
f'=======以上为前情概要。现在请{self.lanlan_name}准备,即将开始用语音与{MASTER_NAME}继续对话。\n'
434+
f'=======以上为前情概要。现在请{self.lanlan_name}准备,即将开始用语音与{self.master_name}继续对话。\n'
419435
else: # Ensure session cycles a turn even if no incremental cache
420436
logger.error(f"💥 Unexpected: No incremental cache found. {len(self.message_cache_for_new_session)}, {self.initial_cache_snapshot_len}")
421437
final_prime_text = f"SYSTEM_MESSAGE | 系统自动报时,当前时间: " + str(
@@ -717,14 +733,14 @@ async def tts_response_handler(self):
717733

718734
# TTS多进程worker函数,供主进程Process(target=...)调用
719735

720-
def speech_synthesis_worker(request_queue, response_queue, AUDIO_API_KEY, VOICE_ID):
736+
def speech_synthesis_worker(request_queue, response_queue, audio_api_key, voice_id):
721737
import dashscope
722738
from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat
723739
import numpy as np
724740
from librosa import resample
725741
import re
726742
import time
727-
dashscope.api_key = AUDIO_API_KEY
743+
dashscope.api_key = audio_api_key
728744
class Callback(ResultCallback):
729745
def __init__(self, response_queue):
730746
self.response_queue = response_queue
@@ -766,7 +782,7 @@ def on_data(self, data: bytes) -> None:
766782
pass
767783
synthesizer = SpeechSynthesizer(
768784
model="cosyvoice-v2",
769-
voice=VOICE_ID,
785+
voice=voice_id,
770786
speech_rate=1.1,
771787
format=AudioFormat.PCM_24000HZ_MONO_16BIT,
772788
callback=callback,

0 commit comments

Comments
 (0)