Skip to content

Commit e057ddb

Browse files
authored
Merge pull request #16 from WqyJh/feat-webrtc
Add /v1/realtime/sessions API to create ephemeral for WebRTC connection
2 parents 69cccde + 01c4289 commit e057ddb

File tree

9 files changed

+273
-5
lines changed

9 files changed

+273
-5
lines changed

.github/workflows/go-test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ jobs:
2727
version: latest
2828
- name: Run tests
2929
run: make test
30+
env:
31+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

api.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package openairt
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"net/http"
9+
)
10+
11+
type CreateSessionRequest struct {
12+
ClientSession
13+
14+
// The Realtime model used for this session.
15+
Model string `json:"model"`
16+
}
17+
18+
type ClientSecret struct {
19+
// Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.
20+
Value string `json:"value"`
21+
// Timestamp for when the token expires. Currently, all tokens expire after one minute.
22+
ExpiresAt int64 `json:"expires_at"`
23+
}
24+
25+
type CreateSessionResponse struct {
26+
ServerSession
27+
28+
// Ephemeral key returned by the API.
29+
ClientSecret ClientSecret `json:"client_secret"`
30+
}
31+
32+
type httpOption struct {
33+
client *http.Client
34+
headers http.Header
35+
method string
36+
}
37+
38+
type HTTPOption func(*httpOption)
39+
40+
func WithHeaders(headers http.Header) HTTPOption {
41+
return func(o *httpOption) {
42+
o.headers = headers
43+
}
44+
}
45+
46+
func WithClient(client *http.Client) HTTPOption {
47+
return func(o *httpOption) {
48+
o.client = client
49+
}
50+
}
51+
52+
func WithMethod(method string) HTTPOption {
53+
return func(o *httpOption) {
54+
o.method = method
55+
}
56+
}
57+
58+
func HTTPDo[Q any, R any](ctx context.Context, url string, req *Q, opts ...HTTPOption) (*R, error) {
59+
opt := httpOption{
60+
client: http.DefaultClient,
61+
headers: http.Header{},
62+
method: http.MethodPost,
63+
}
64+
for _, o := range opts {
65+
o(&opt)
66+
}
67+
68+
data, err := json.Marshal(req)
69+
if err != nil {
70+
return nil, fmt.Errorf("failed to marshal request: %w", err)
71+
}
72+
73+
request, err := http.NewRequestWithContext(ctx, opt.method, url, bytes.NewReader(data))
74+
if err != nil {
75+
return nil, fmt.Errorf("failed to create request: %w", err)
76+
}
77+
78+
request.Header = opt.headers
79+
80+
response, err := opt.client.Do(request)
81+
if err != nil {
82+
return nil, fmt.Errorf("http failed: %w", err)
83+
}
84+
defer response.Body.Close()
85+
86+
if response.StatusCode != http.StatusOK {
87+
return nil, fmt.Errorf("http status code: %d", response.StatusCode)
88+
}
89+
90+
var resp R
91+
err = json.NewDecoder(response.Body).Decode(&resp)
92+
if err != nil {
93+
return nil, fmt.Errorf("failed to decode response: %w", err)
94+
}
95+
return &resp, nil
96+
}

api_integration_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package openairt_test
2+
3+
import (
4+
"context"
5+
"os"
6+
"testing"
7+
8+
openairt "github.com/WqyJh/go-openai-realtime"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func TestCreateSession(t *testing.T) {
13+
key := os.Getenv("OPENAI_API_KEY")
14+
if key == "" {
15+
t.Skip("OPENAI_API_KEY is not set")
16+
}
17+
client := openairt.NewClient(key)
18+
session, err := client.CreateSession(context.Background(), &openairt.CreateSessionRequest{
19+
Model: openairt.GPT4oRealtimePreview20241217,
20+
ClientSession: openairt.ClientSession{
21+
Modalities: []openairt.Modality{
22+
openairt.ModalityAudio,
23+
openairt.ModalityText,
24+
},
25+
Instructions: "You are a friendly assistant.",
26+
},
27+
})
28+
require.NoError(t, err)
29+
require.NotEmpty(t, session.ClientSecret.Value)
30+
require.NotZero(t, session.ClientSecret.ExpiresAt)
31+
t.Logf("session: %+v", session)
32+
}

api_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package openairt_test
2+
3+
import (
4+
"encoding/json"
5+
"testing"
6+
7+
openairt "github.com/WqyJh/go-openai-realtime"
8+
"github.com/WqyJh/jsontools"
9+
"github.com/sashabaranov/go-openai"
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func TestCreateSessionRequest(t *testing.T) {
14+
data := `{
15+
"model": "gpt-4o-realtime-preview-2024-12-17",
16+
"modalities": ["audio", "text"],
17+
"instructions": "You are a friendly assistant."
18+
}`
19+
expected := openairt.CreateSessionRequest{
20+
Model: openairt.GPT4oRealtimePreview20241217,
21+
ClientSession: openairt.ClientSession{
22+
Modalities: []openairt.Modality{
23+
openairt.ModalityAudio,
24+
openairt.ModalityText,
25+
},
26+
Instructions: "You are a friendly assistant.",
27+
},
28+
}
29+
30+
var actual openairt.CreateSessionRequest
31+
err := json.Unmarshal([]byte(data), &actual)
32+
require.NoError(t, err)
33+
require.Equal(t, expected, actual)
34+
35+
actualBytes, err := json.Marshal(actual)
36+
require.NoError(t, err)
37+
jsontools.RequireJSONEq(t, data, string(actualBytes))
38+
}
39+
40+
func TestCreateSessionResponse(t *testing.T) {
41+
data := `{
42+
"id": "sess_001",
43+
"object": "realtime.session",
44+
"model": "gpt-4o-realtime-preview-2024-12-17",
45+
"modalities": ["audio", "text"],
46+
"instructions": "You are a friendly assistant.",
47+
"voice": "alloy",
48+
"input_audio_format": "pcm16",
49+
"output_audio_format": "pcm16",
50+
"input_audio_transcription": {
51+
"model": "whisper-1"
52+
},
53+
"turn_detection": null,
54+
"tools": [],
55+
"tool_choice": "none",
56+
"temperature": 0.7,
57+
"max_response_output_tokens": 200,
58+
"client_secret": {
59+
"value": "ek_abc123",
60+
"expires_at": 1234567890
61+
}
62+
}
63+
`
64+
temperature := float32(0.7)
65+
expected := openairt.CreateSessionResponse{
66+
ClientSecret: openairt.ClientSecret{
67+
Value: "ek_abc123",
68+
ExpiresAt: 1234567890,
69+
},
70+
ServerSession: openairt.ServerSession{
71+
ID: "sess_001",
72+
Object: "realtime.session",
73+
Model: openairt.GPT4oRealtimePreview20241217,
74+
Modalities: []openairt.Modality{
75+
openairt.ModalityAudio,
76+
openairt.ModalityText,
77+
},
78+
Instructions: "You are a friendly assistant.",
79+
Voice: openairt.VoiceAlloy,
80+
InputAudioFormat: openairt.AudioFormatPcm16,
81+
OutputAudioFormat: openairt.AudioFormatPcm16,
82+
InputAudioTranscription: &openairt.InputAudioTranscription{
83+
Model: openai.Whisper1,
84+
},
85+
TurnDetection: nil,
86+
Tools: []openairt.Tool{},
87+
ToolChoice: openairt.ServerToolChoice{String: openairt.ToolChoiceNone},
88+
Temperature: &temperature,
89+
MaxOutputTokens: openairt.IntOrInf(200),
90+
},
91+
}
92+
93+
var actual openairt.CreateSessionResponse
94+
err := json.Unmarshal([]byte(data), &actual)
95+
require.NoError(t, err)
96+
require.Equal(t, expected, actual)
97+
}

client.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@ import (
77
)
88

99
const (
10-
GPT4oRealtimePreview = "gpt-4o-realtime-preview"
11-
GPT4oRealtimePreview20241001 = "gpt-4o-realtime-preview-2024-10-01"
10+
GPT4oRealtimePreview = "gpt-4o-realtime-preview"
11+
GPT4oRealtimePreview20241001 = "gpt-4o-realtime-preview-2024-10-01"
12+
GPT4oRealtimePreview20241217 = "gpt-4o-realtime-preview-2024-12-17"
13+
GPT4oMiniRealtimePreview = "gpt-4o-mini-realtime-preview"
14+
GPT4oMiniRealtimePreview20241217 = "gpt-4o-mini-realtime-preview-2024-12-17"
1215
)
1316

1417
// Client is OpenAI Realtime API client.
@@ -110,3 +113,26 @@ func (c *Client) Connect(ctx context.Context, opts ...ConnectOption) (*Conn, err
110113

111114
return &Conn{conn: conn, logger: connectOpts.logger}, nil
112115
}
116+
117+
func (c *Client) getAPIHeaders() http.Header {
118+
headers := http.Header{}
119+
120+
if c.config.APIType == APITypeAzure {
121+
headers.Set("api-key", c.config.authToken)
122+
} else {
123+
headers.Set("Authorization", "Bearer "+c.config.authToken)
124+
}
125+
headers.Set("Content-Type", "application/json")
126+
return headers
127+
}
128+
129+
func (c *Client) CreateSession(ctx context.Context, req *CreateSessionRequest) (*CreateSessionResponse, error) {
130+
return HTTPDo[CreateSessionRequest, CreateSessionResponse](
131+
ctx,
132+
c.config.APIBaseURL+"/realtime/sessions",
133+
req,
134+
WithClient(c.config.HTTPClient),
135+
WithMethod(http.MethodPost),
136+
WithHeaders(c.getAPIHeaders()),
137+
)
138+
}

config.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package openairt
22

3+
import "net/http"
4+
35
// APIType is the type of API.
46
type APIType string
57

@@ -13,6 +15,9 @@ const (
1315
const (
1416
// OpenaiRealtimeAPIURLv1 is the base URL for the OpenAI Realtime API.
1517
OpenaiRealtimeAPIURLv1 = "wss://api.openai.com/v1/realtime"
18+
19+
// OpenaiAPIURLv1 is the base URL for the OpenAI API.
20+
OpenaiAPIURLv1 = "https://api.openai.com/v1"
1621
)
1722

1823
const (
@@ -25,17 +30,21 @@ type ClientConfig struct {
2530
authToken string
2631

2732
BaseURL string // Base URL for the API. Defaults to "wss://api.openai.com/v1/realtime"
33+
APIBaseURL string // Base URL for the API. Defaults to "https://api.openai.com/v1"
2834
APIType APIType // API type. Defaults to APITypeOpenAI
2935
APIVersion string // required when APIType is APITypeAzure
36+
HTTPClient *http.Client
3037
}
3138

3239
// DefaultConfig creates a new ClientConfig with the given auth token.
3340
// Defaults to using the OpenAI Realtime API.
3441
func DefaultConfig(authToken string) ClientConfig {
3542
return ClientConfig{
36-
authToken: authToken,
37-
BaseURL: OpenaiRealtimeAPIURLv1,
38-
APIType: APITypeOpenAI,
43+
authToken: authToken,
44+
BaseURL: OpenaiRealtimeAPIURLv1,
45+
APIBaseURL: OpenaiAPIURLv1,
46+
APIType: APITypeOpenAI,
47+
HTTPClient: &http.Client{},
3948
}
4049
}
4150

@@ -47,6 +56,7 @@ func DefaultAzureConfig(apiKey, baseURL string) ClientConfig {
4756
BaseURL: baseURL,
4857
APIType: APITypeAzure,
4958
APIVersion: azureAPIVersion20241001Preview,
59+
HTTPClient: &http.Client{},
5060
}
5161
}
5262

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module github.com/WqyJh/go-openai-realtime
33
go 1.19
44

55
require (
6+
github.com/WqyJh/jsontools v0.3.1
67
github.com/coder/websocket v1.8.12
78
github.com/sashabaranov/go-openai v1.32.0
89
github.com/stretchr/testify v1.9.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
github.com/WqyJh/jsontools v0.3.1 h1:zKT+DvxUSTji06ZcjsbQzZ48PycFZDI0OGATmmFhJ+U=
2+
github.com/WqyJh/jsontools v0.3.1/go.mod h1:Gk2OlyXjAJmYNZ0aUbEXGHq4I5ihGRjXxVuUprWtkss=
13
github.com/coder/websocket v1.8.12 h1:5bUXkEPPIbewrnkU8LTCLVaxi4N4J8ahufH2vlo4NAo=
24
github.com/coder/websocket v1.8.12/go.mod h1:LNVeNrXQZfe5qhS9ALED3uA+l5pPqvwXg3CKoDBB2gs=
35
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=

types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ type TurnDetectionParams struct {
6464
PrefixPaddingMs int `json:"prefix_padding_ms,omitempty"`
6565
// Duration of silence to detect speech stop (in milliseconds).
6666
SilenceDurationMs int `json:"silence_duration_ms,omitempty"`
67+
// Whether or not to automatically generate a response when VAD is enabled. true by default.
68+
CreateResponse *bool `json:"create_response,omitempty"`
6769
}
6870

6971
type ClientTurnDetection struct {

0 commit comments

Comments
 (0)