@@ -75,24 +75,30 @@ func (p *Provider) SpeechModel(id string) *sdk.SpeechModel {
7575
7676// ListModels returns the speech models exposed by this provider.
7777func (p * Provider ) ListModels (ctx context.Context ) ([]* sdk.SpeechModel , error ) {
78- type modelsListResponse struct {
79- Data []struct {
80- ID string `json:"id"`
81- } `json:"data"`
78+ req , err := http .NewRequestWithContext (ctx , http .MethodGet , p .baseURL + "/models" , http .NoBody )
79+ if err != nil {
80+ return nil , fmt .Errorf ("openai speech: build list models request: %w" , err )
8281 }
82+ req .Header .Set ("Authorization" , "Bearer " + p .apiKey )
8383
84- resp , err := utils .FetchJSON [modelsListResponse ](ctx , p .httpClient , & utils.RequestOptions {
85- Method : http .MethodGet ,
86- BaseURL : p .baseURL ,
87- Path : "/models" ,
88- Headers : map [string ]string {"Authorization" : "Bearer " + p .apiKey },
89- })
84+ resp , err := p .httpClient .Do (req )
9085 if err != nil {
9186 return nil , fmt .Errorf ("openai speech: list models request failed: %w" , err )
9287 }
88+ defer resp .Body .Close ()
89+ if resp .StatusCode < 200 || resp .StatusCode >= 300 {
90+ body , _ := io .ReadAll (resp .Body )
91+ return nil , fmt .Errorf ("openai speech: unexpected status %d: %s" , resp .StatusCode , string (body ))
92+ }
93+
94+ rawModels , err := decodeModelIDs (resp .Body )
95+ if err != nil {
96+ return nil , fmt .Errorf ("openai speech: decode response: %w" , err )
97+ }
9398
94- models := make ([]* sdk.SpeechModel , 0 , len (resp .Data ))
95- for _ , m := range resp .Data {
99+ models := make ([]* sdk.SpeechModel , 0 , len (rawModels ))
100+ for _ , id := range rawModels {
101+ m := struct { ID string }{ID : id }
96102 if isOpenAITTSModel (m .ID ) {
97103 models = append (models , p .SpeechModel (m .ID ))
98104 }
@@ -108,6 +114,42 @@ func isOpenAITTSModel(id string) bool {
108114 return strings .Contains (id , "tts" ) || strings .Contains (id , "audio" )
109115}
110116
117+ func decodeModelIDs (r io.Reader ) ([]string , error ) {
118+ body , err := io .ReadAll (r )
119+ if err != nil {
120+ return nil , err
121+ }
122+
123+ var wrapped struct {
124+ Data []struct {
125+ ID string `json:"id"`
126+ } `json:"data"`
127+ }
128+ if err := json .Unmarshal (body , & wrapped ); err == nil && len (wrapped .Data ) > 0 {
129+ out := make ([]string , 0 , len (wrapped .Data ))
130+ for _ , m := range wrapped .Data {
131+ if m .ID != "" {
132+ out = append (out , m .ID )
133+ }
134+ }
135+ return out , nil
136+ }
137+
138+ var direct []struct {
139+ ID string `json:"id"`
140+ }
141+ if err := json .Unmarshal (body , & direct ); err != nil {
142+ return nil , err
143+ }
144+ out := make ([]string , 0 , len (direct ))
145+ for _ , m := range direct {
146+ if m .ID != "" {
147+ out = append (out , m .ID )
148+ }
149+ }
150+ return out , nil
151+ }
152+
111153// DoSynthesize synthesizes speech and returns the complete audio bytes.
112154func (p * Provider ) DoSynthesize (ctx context.Context , params sdk.SpeechParams ) (* sdk.SpeechResult , error ) {
113155 cfg := parseConfig (params .Config )
0 commit comments