forked from fishaudio/fish-audio-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathschemas.py
133 lines (96 loc) · 2.64 KB
/
schemas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import datetime
import decimal
from typing import Annotated, Generic, Literal, TypeVar
from pydantic import BaseModel, Field
Backends = Literal["speech-1.5", "speech-1.6", "agent-x0"]
Item = TypeVar("Item")
class PaginatedResponse(BaseModel, Generic[Item]):
total: int
items: list[Item]
class ReferenceAudio(BaseModel):
audio: bytes
text: str
class Prosody(BaseModel):
speed: float = 1.0
volume: float = 0.0
class TTSRequest(BaseModel):
text: str
chunk_length: Annotated[int, Field(ge=100, le=300, strict=True)] = 200
format: Literal["wav", "pcm", "mp3"] = "mp3"
sample_rate: int | None = None
mp3_bitrate: Literal[64, 128, 192] = 128
opus_bitrate: Literal[-1000, 24, 32, 48, 64] = 32
references: list[ReferenceAudio] = []
reference_id: str | None = None
normalize: bool = True
latency: Literal["normal", "balanced"] = "balanced"
prosody: Prosody | None = None
top_p: float = 0.7
temperature: float = 0.7
class ASRRequest(BaseModel):
audio: bytes
language: str | None = None
ignore_timestamps: bool | None = None
class ASRSegment(BaseModel):
text: str
start: float
end: float
class ASRResponse(BaseModel):
text: str
# Duration in milliseconds
duration: float
segments: list[ASRSegment]
class SampleEntity(BaseModel):
title: str
text: str
task_id: str
audio: str
class AuthorEntity(BaseModel):
id: str = Field(alias="_id")
nickname: str
avatar: str
class ModelEntity(BaseModel):
id: str = Field(alias="_id")
type: Literal["svc", "tts"]
title: str
description: str
cover_image: str
train_mode: Literal["fast", "full"]
state: Literal["created", "training", "trained", "failed"]
tags: list[str]
samples: list[SampleEntity]
created_at: datetime.datetime
updated_at: datetime.datetime
languages: list[str]
visibility: Literal["public", "unlist", "private"]
lock_visibility: bool
like_count: int
mark_count: int
shared_count: int
task_count: int
liked: bool = False
marked: bool = False
author: AuthorEntity
class APICreditEntity(BaseModel):
_id: str
user_id: str
credit: decimal.Decimal
created_at: str
updated_at: str
class PackageEntity(BaseModel):
_id: str
user_id: str
type: str
total: int
balance: int
created_at: str
updated_at: str
finished_at: str
class StartEvent(BaseModel):
event: Literal["start"] = "start"
request: TTSRequest
class TextEvent(BaseModel):
event: Literal["text"] = "text"
text: str
class CloseEvent(BaseModel):
event: Literal["stop"] = "stop"