-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Closed
Labels
enhancementNew feature or requestNew feature or request
Description
1 通过生命周期分配GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '2,3,5'
...
@asynccontextmanager
async def lifespan(app: FastAPI):
"""生命周期管理,初始化模型资源"""
global model_manager, do_parse, convert_file_to_pdf
# 初始化代码(原 setup 方法内容)
try:
from magic_pdf.tools.cli import do_parse as _do_parse, convert_file_to_pdf as _convert
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
do_parse = _do_parse
convert_file_to_pdf = _convert
# 获取可用GPU数量
available_gpus = torch.cuda.device_count()
logger.info(f"检测到 {available_gpus} 个可用GPU")
model_managers = []
for gpu_id in range(available_gpus):
torch.cuda.set_device(gpu_id) # 切换到当前GPU
physical_gpu_id = get_physical_gpu_id(gpu_id)
manager = ModelSingleton()
manager.get_model(True, False)
manager.get_model(False, False)
model_managers.append(manager)
logger.info(f"模型已加载到 GPU {gpu_id}(物理GPU {physical_gpu_id})")
# 将模型管理器和当前GPU索引挂载到app状态
# model_manager = ModelSingleton()
# model_manager.get_model(True, False)
# model_manager.get_model(False, False)
# 将工具函数挂载到 app 状态
app.state.do_parse = do_parse
app.state.current_gpu = 0
app.state.model_managers = model_managers
app.state.convert_file_to_pdf = convert_file_to_pdf
logger.info("Model initialization complete!")
yield
finally:
# 清理资源
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
gc.collect()- 接口请求 分配GPU
@app.post("/api/process_url")
def analyze_document(
file_url: FileInputVo
):
"""基于mineru+datamax的OCR接口"""
try:
# # 初始化GPU分配参数
# torch.cuda.set_device(1)
# logger.info(f"当前请求分配至 GPU 1(物理GPU {get_physical_gpu_id(1)})")
# logger.info(f"当前进程PID: {os.getpid()}")
model_managers = app.state.model_managers
available_gpus = len(model_managers)
current_gpu = app.state.current_gpu
selected_gpu = current_gpu % available_gpus
app.state.current_gpu = current_gpu + 1
#
# # 设置当前GPU设备(实际设备ID根据CUDA_VISIBLE_DEVICES映射)
torch.cuda.set_device(selected_gpu)
logger.info(f"当前请求分配至 GPU 1(物理GPU {get_physical_gpu_id(1)})")
logger.info(f"当前进程PID: {os.getpid()}")
...- 启动服务观察 一个进程pid在3张卡中都有显存的占用 并且只在第一张卡(物理机2卡)上运行
一下是请求一次的smi面板

Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request