Skip to content

Commit 8ff9868

Browse files
committed
Optimize code structure and add more accurate data query
1 parent f98c38a commit 8ff9868

File tree

8 files changed

+289
-78
lines changed

8 files changed

+289
-78
lines changed

app/api/router.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from fastapi import APIRouter
3333
from app.api.routers import (
3434
health_check,
35-
transcribe
35+
whisper_tasks
3636
)
3737

3838
router = APIRouter()
@@ -41,4 +41,4 @@
4141
router.include_router(health_check.router, prefix="/health", tags=["Health-Check"])
4242

4343
# Transcribe routers
44-
router.include_router(transcribe.router, prefix="/transcribe", tags=["Whisper-Transcribe"])
44+
router.include_router(whisper_tasks.router, prefix="/whisper", tags=["Whisper-Tasks"])

app/api/routers/health_check.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,32 @@ class HealthCheckResponse(BaseModel):
5757
)
5858
async def health_check():
5959
"""
60-
健康检查端点,用于确认服务是否正常运行。
61-
Health check endpoint to confirm that the service is running properly.
60+
# [中文]
61+
62+
### 用途说明:
63+
64+
- 检查服务器是否正确响应请求。
65+
66+
### 参数说明:
67+
68+
- 无参数。
69+
70+
### 返回结果:
71+
72+
- `status`: 服务器状态,正常为 `ok`。
73+
74+
# [English]
75+
76+
### Purpose:
77+
78+
- Check if the server responds to requests correctly.
79+
80+
### Parameter Description:
81+
82+
- No parameters.
83+
84+
### Return Result:
85+
86+
- `status`: Server status, normal is `ok`.
6287
"""
6388
return HealthCheckResponse()
Lines changed: 145 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,21 @@
3737

3838
from app.utils.logging_utils import configure_logging
3939
from app.api.models.APIResponseModel import ResponseModel, ErrorResponseModel
40-
from app.database.models import Task, TaskStatus, TaskPriority
40+
from app.database.models import Task, TaskStatus, TaskPriority, QueryTasksOptionalFilter
4141

4242
router = APIRouter()
4343

4444
# 配置日志记录器
4545
logger = configure_logging(name=__name__)
4646

4747

48-
# 在后台创建一个转录任务 | Create a transcription task in the background
48+
# 创建任务 | Create task
4949
@router.post(
5050
"/task/create",
5151
response_model=ResponseModel,
5252
summary="上传媒体文件并且创建一个Whisper转录任务在后台处理 | Upload a media file and create a Whisper transcription task to be processed in the background"
5353
)
54-
async def create_transcription_task(
54+
async def task_create(
5555
request: Request,
5656
file: UploadFile = File(...,
5757
description="媒体文件(支持的格式:音频和视频,如 MP3, WAV, MP4, MKV 等) / Media file (supported formats: audio and video, e.g., MP3, WAV, MP4, MKV)"),
@@ -203,86 +203,178 @@ async def create_transcription_task(
203203
)
204204

205205

206-
# 获取任务状态 | Get task status
207-
@router.get("/tasks/check",
208-
summary="获取任务状态 / Get task status",
209-
response_model=ResponseModel)
210-
async def get_task_status(
206+
# 查询任务 | Query task
207+
@router.post("/tasks/query",
208+
response_model=ResponseModel,
209+
summary="查询任务 | Query task"
210+
)
211+
async def task_query(
211212
request: Request,
212-
task_id: int = Query(description="任务ID / Task ID")
213+
params: QueryTasksOptionalFilter
213214
):
214215
"""
215216
# [中文]
216217
217218
### 用途说明:
218-
- 获取指定任务的状态信息。
219+
- 根据多种筛选条件查询任务列表,包括任务状态、优先级、创建时间、语言、引擎名称等信息。
220+
- 该接口适用于分页查询,并且通过 `limit` 和 `offset` 参数控制每页显示的记录数,支持客户端逐页加载数据。
219221
220222
### 参数说明:
221-
- `task_id` (int): 任务ID。
223+
- `status` (TaskStatus): 筛选任务状态:
224+
- 例如 'queued'(排队中)或 'processing'(处理中)或 'completed'(已完成) 或 'failed'(失败)。
225+
- `priority` (TaskPriority): 筛选任务优先级:
226+
- 例如 'low'、'normal'、'high'。
227+
- `created_after` (str): 创建时间的起始时间,格式为 'YYYY-MM-DDTHH:MM:SS',为空时忽略该条件。
228+
- `created_before` (str): 创建时间的结束时间,格式为 'YYYY-MM-DDTHH:MM:SS',为空时忽略该条件。
229+
- `language` (str): 任务的语言代码,例如 `zh`或'en'。设置为空字符串 `""` 可以查询所有语言的任务。
230+
- `engine_name` (str): 引擎名称,例如 'faster_whisper'或'openai_whisper'。
231+
- `has_result` (bool): 指定是否要求任务有结果数据。
232+
- `has_error` (bool): 指定是否要求任务有错误信息。
233+
- `limit` (int): 每页的记录数量,默认值为20,用户可根据需求自定义每页数量。
234+
- `offset` (int): 数据分页的起始位置,默认值为0,后续使用响应中的 `next_offset` 值进行下一页查询。
222235
223236
### 返回:
224-
- 返回一个包含任务状态信息的响应,包括任务ID、状态、优先级等信息。
237+
- `tasks` (list): 包含满足条件的任务列表,每个任务记录包括任务ID、状态、优先级、创建时间等详细信息。
238+
- `total_count` (int): 符合条件的任务总数。
239+
- `has_more` (bool): 是否还有更多数据。如果为 `True`,则表示存在下一页数据。
240+
- `next_offset` (int): 下次分页请求的偏移量。用户可以通过此值构建下一页查询请求。
241+
242+
### 使用示例:
243+
- 请求示例:
244+
```json
245+
{
246+
"status": "completed",
247+
"priority": "high",
248+
"created_after": "2024-01-01T00:00:00",
249+
"created_before": "2024-12-31T23:59:59",
250+
"language": "",
251+
"engine_name": "faster_whisper",
252+
"has_result": true,
253+
"has_error": false,
254+
"limit": 10,
255+
"offset": 0
256+
}
257+
```
258+
- 响应示例:
259+
```json
260+
{
261+
"code": 200,
262+
"router": "http://localhost/api/tasks/query",
263+
"params": { ... },
264+
"data": {
265+
"tasks": [
266+
{
267+
"id": 123,
268+
"status": "completed",
269+
"priority": "high",
270+
"created_at": "2024-05-15T12:34:56",
271+
"language": "en",
272+
"engine_name": "faster_whisper",
273+
"result": {...},
274+
"error_message": null
275+
},
276+
...
277+
],
278+
"total_count": 55,
279+
"has_more": true,
280+
"next_offset": 10
281+
}
282+
}
283+
```
225284
226285
### 错误代码说明:
227-
228-
- `404`: 任务未找到,可能是任务ID不存在。
229-
- `500`: 未知错误。
286+
- `500`: 未知错误,通常为内部错误。
230287
231288
# [English]
232289
233290
### Purpose:
234-
- Get the status information of the specified task.
291+
- Query the task list based on multiple filtering conditions, including task status, priority, creation time, language, engine name, etc.
292+
- This endpoint is suitable for paginated queries, and the number of records displayed per page is controlled by the `limit` and `offset` parameters, supporting clients to load data page by page.
235293
236294
### Parameters:
237-
- `task_id` (int): Task ID.
295+
- `status` (TaskStatus): Filter task status:
296+
- e.g., 'queued' (in the queue), 'processing' (processing), 'completed' (completed), or 'failed' (failed).
297+
- `priority` (TaskPriority): Filter task priority:
298+
- e.g., 'low', 'normal', 'high'.
299+
- `created_after` (str): Start time of creation time, format is 'YYYY-MM-DDTHH:MM:SS', ignore this condition when empty.
300+
- `created_before` (str): End time of creation time, format is 'YYYY-MM-DDTHH:MM:SS', ignore this condition when empty.
301+
- `language` (str): Language code of the task, e.g., `zh` or `en`. Set to an empty string `""` to query tasks in all languages.
302+
- `engine_name` (str): Engine name, e.g., 'faster_whisper' or 'openai_whisper'.
303+
- `has_result` (bool): Specify whether the task requires result data.
304+
- `has_error` (bool): Specify whether the task requires error information.
305+
- `limit` (int): Number of records per page, default is 20, users can customize the number of records per page according to their needs.
306+
- `offset` (int): Starting position of data pagination, default is 0, use the `next_offset` value in the response for the next page query.
238307
239308
### Returns:
240-
- Returns a response containing task status information, including task ID, status, priority, etc.
309+
- `tasks` (list): List of tasks that meet the conditions, each task record includes detailed information such as task ID, status, priority, creation time, etc.
310+
- `total_count` (int): Total number of tasks that meet the conditions.
311+
- `has_more` (bool): Whether there is more data. If `True`, it means there is more data on the next page.
312+
- `next_offset` (int): Offset value for the next page request. Users can use this value to construct the next page query request.
313+
314+
### Example:
315+
- Request example:
316+
```json
317+
{
318+
"status": "completed",
319+
"priority": "high",
320+
"created_after": "2024-01-01T00:00:00",
321+
"created_before": "2024-12-31T23:59:59",
322+
"language": "",
323+
"engine_name": "faster_whisper",
324+
"has_result": true,
325+
"has_error": false,
326+
"limit": 10,
327+
"offset": 0
328+
}
329+
```
330+
- Response example:
331+
```json
332+
{
333+
"code": 200,
334+
"router": "http://localhost/api/tasks/query",
335+
"params": { ... },
336+
"data": {
337+
"tasks": [
338+
{
339+
"id": 123,
340+
"status": "completed",
341+
"priority": "high",
342+
"created_at": "2024-05-15T12:34:56",
343+
"language": "en",
344+
"engine_name": "faster_whisper",
345+
"result": {...},
346+
"error_message": null
347+
},
348+
...
349+
],
350+
"total_count": 55,
351+
"has_more": true,
352+
"next_offset": 10
353+
}
354+
}
355+
```
241356
242357
### Error Code Description:
243-
244-
- `404`: Task not found, possibly because the task ID does not exist.
245-
- `500`: Unknown error.
358+
- `500`: Unknown error, usually an internal error.
246359
"""
247-
try:
248-
async with request.app.state.db_manager.get_session() as session:
249-
task_info = await session.get(Task, task_id)
250-
if not task_info:
251-
raise HTTPException(
252-
status_code=status.HTTP_404_NOT_FOUND,
253-
detail=ErrorResponseModel(
254-
code=status.HTTP_404_NOT_FOUND,
255-
message="Task not found.",
256-
router=str(request.url),
257-
params=dict(request.query_params),
258-
).dict()
259-
)
260-
return ResponseModel(code=200,
261-
router=str(request.url),
262-
params=dict(request.query_params),
263-
data=task_info.to_dict())
264360

265-
except HTTPException as http_error:
266-
raise http_error
361+
async with request.app.state.db_manager.get_session() as session:
362+
result = await request.app.state.db_manager.query_tasks(session, params)
363+
if result is None:
364+
raise HTTPException(status_code=500, detail="An error occurred while querying tasks.")
267365

268-
except Exception as e:
269-
logger.error(f"Unexpected error: {str(e)}")
270-
logger.error(f"Traceback: {traceback.format_exc()}")
271-
raise HTTPException(
272-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
273-
detail=ErrorResponseModel(
274-
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
275-
message=f"An unexpected error occurred while retrieving the task status: {str(e)}",
276-
router=str(request.url),
277-
params=dict(request.query_params),
278-
).dict()
279-
)
366+
return ResponseModel(
367+
code=200,
368+
router=str(request.url),
369+
params=params.dict(),
370+
data=result
371+
)
280372

281373

282374
@router.get("/tasks/result",
283375
summary="获取任务结果 / Get task result",
284376
response_model=ResponseModel)
285-
async def get_task_result(
377+
async def task_result(
286378
request: Request,
287379
task_id: int = Query(description="任务ID / Task ID")
288380
):

app/database/database.py

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@
3030
# ==============================================================================
3131

3232
import traceback
33-
from sqlalchemy import select
33+
from sqlalchemy import select, and_, func
3434
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
3535
from sqlalchemy.orm import sessionmaker
3636
from sqlalchemy.exc import SQLAlchemyError
37-
from typing import Optional, List
37+
from typing import Optional, List, Dict
3838
from contextlib import asynccontextmanager
39-
from app.database.models import Task, Base
39+
from app.database.models import Task, Base, QueryTasksOptionalFilter
4040
from app.utils.logging_utils import configure_logging
4141

4242
# 配置日志记录器 | Configure logger
@@ -145,3 +145,72 @@ async def get_all_tasks(self, limit: int = 100) -> List[dict]:
145145
logger.error(f"Error fetching tasks: {e}")
146146
logger.error(traceback.format_exc())
147147
return []
148+
149+
async def query_tasks(self,
150+
session: AsyncSession,
151+
filters: QueryTasksOptionalFilter) -> Optional[Dict[str, List[Dict]]]:
152+
"""
153+
按条件查询任务,使用分页和条件查询
154+
155+
Query tasks with pagination and conditions.
156+
"""
157+
try:
158+
# 构建查询条件 | Build query conditions
159+
conditions = self._build_conditions(filters)
160+
161+
# 构建查询语句 | Build query statement
162+
query = (
163+
select(Task)
164+
.where(and_(*conditions))
165+
.order_by(Task.created_at)
166+
.offset(filters.offset)
167+
.limit(filters.limit)
168+
)
169+
170+
result = await session.execute(query)
171+
tasks = result.scalars().all()
172+
173+
# 获取总记录数 | Get total count
174+
count_query = select(func.count()).select_from(Task).where(and_(*conditions))
175+
total_count = (await session.execute(count_query)).scalar()
176+
177+
# 计算是否有更多数据,并返回 next_offset 以供下一页查询 | Calculate if there are more data and return next_offset for next page query
178+
has_more = filters.offset + filters.limit < total_count
179+
next_offset = filters.offset + filters.limit if has_more else None
180+
181+
return {
182+
"tasks": [task.to_dict() for task in tasks],
183+
"total_count": total_count,
184+
"has_more": has_more,
185+
"next_offset": next_offset
186+
}
187+
188+
except SQLAlchemyError as e:
189+
logger.error(f"Error querying tasks: {e}")
190+
logger.error(traceback.format_exc())
191+
return None
192+
193+
def _build_conditions(self, filters: QueryTasksOptionalFilter) -> List:
194+
"""
195+
根据 QueryTasksOptionalFilter 对象构建查询条件
196+
197+
Build query conditions based on QueryTasksOptionalFilter.
198+
"""
199+
conditions = []
200+
if filters.status:
201+
conditions.append(Task.status == filters.status)
202+
if filters.priority:
203+
conditions.append(Task.priority == filters.priority)
204+
if filters.created_after:
205+
conditions.append(Task.created_at >= filters.created_after)
206+
if filters.created_before:
207+
conditions.append(Task.created_at <= filters.created_before)
208+
if filters.language:
209+
conditions.append(Task.language == filters.language)
210+
if filters.engine_name:
211+
conditions.append(Task.engine_name == filters.engine_name)
212+
if filters.has_result is not None:
213+
conditions.append((Task.result != None) if filters.has_result else (Task.result == None))
214+
if filters.has_error is not None:
215+
conditions.append((Task.error_message != None) if filters.has_error else (Task.error_message == None))
216+
return conditions

0 commit comments

Comments
 (0)