Skip to content

Commit 5c7d882

Browse files
authored
Allow embeddings requests to be tokens or strings (#417)
* Allow raw tokens to be used as embedding input * fix linting issues (lines too long) * add endpoint test for embedding from tokens * remove redundant comments * fix comment to match new param name * change interface to any * Rename methods and implement convert for base req * add comments to CreateEmbeddings * update tests * shorten line length * rename parameter
1 parent 2042608 commit 5c7d882

File tree

2 files changed

+94
-6
lines changed

2 files changed

+94
-6
lines changed

embeddings.go

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,25 @@ type EmbeddingResponse struct {
113113
Usage Usage `json:"usage"`
114114
}
115115

116-
// EmbeddingRequest is the input to a Create embeddings request.
116+
type EmbeddingRequestConverter interface {
117+
// Needs to be of type EmbeddingRequestStrings or EmbeddingRequestTokens
118+
Convert() EmbeddingRequest
119+
}
120+
117121
type EmbeddingRequest struct {
122+
Input any `json:"input"`
123+
Model EmbeddingModel `json:"model"`
124+
User string `json:"user"`
125+
}
126+
127+
func (r EmbeddingRequest) Convert() EmbeddingRequest {
128+
return r
129+
}
130+
131+
// EmbeddingRequestStrings is the input to a create embeddings request with a slice of strings.
132+
type EmbeddingRequestStrings struct {
118133
// Input is a slice of strings for which you want to generate an Embedding vector.
119-
// Each input must not exceed 2048 tokens in length.
134+
// Each input must not exceed 8192 tokens in length.
120135
// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
121136
// have observed inferior results when newlines are present.
122137
// E.g.
@@ -129,15 +144,50 @@ type EmbeddingRequest struct {
129144
User string `json:"user"`
130145
}
131146

132-
// CreateEmbeddings returns an EmbeddingResponse which will contain an Embedding for every item in |request.Input|.
147+
func (r EmbeddingRequestStrings) Convert() EmbeddingRequest {
148+
return EmbeddingRequest{
149+
Input: r.Input,
150+
Model: r.Model,
151+
User: r.User,
152+
}
153+
}
154+
155+
type EmbeddingRequestTokens struct {
156+
// Input is a slice of slices of ints ([][]int) for which you want to generate an Embedding vector.
157+
// Each input must not exceed 8192 tokens in length.
158+
// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
159+
// have observed inferior results when newlines are present.
160+
// E.g.
161+
// "The food was delicious and the waiter..."
162+
Input [][]int `json:"input"`
163+
// ID of the model to use. You can use the List models API to see all of your available models,
164+
// or see our Model overview for descriptions of them.
165+
Model EmbeddingModel `json:"model"`
166+
// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
167+
User string `json:"user"`
168+
}
169+
170+
func (r EmbeddingRequestTokens) Convert() EmbeddingRequest {
171+
return EmbeddingRequest{
172+
Input: r.Input,
173+
Model: r.Model,
174+
User: r.User,
175+
}
176+
}
177+
178+
// CreateEmbeddings returns an EmbeddingResponse which will contain an Embedding for every item in |body.Input|.
133179
// https://beta.openai.com/docs/api-reference/embeddings/create
134-
func (c *Client) CreateEmbeddings(ctx context.Context, request EmbeddingRequest) (resp EmbeddingResponse, err error) {
135-
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL("/embeddings", request.Model.String()), withBody(request))
180+
//
181+
// Body should be of type EmbeddingRequestStrings for embedding strings or EmbeddingRequestTokens
182+
// for embedding groups of text already converted to tokens.
183+
func (c *Client) CreateEmbeddings(ctx context.Context, conv EmbeddingRequestConverter) (res EmbeddingResponse, err error) { //nolint:lll
184+
baseReq := conv.Convert()
185+
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL("/embeddings", baseReq.Model.String()), withBody(baseReq))
136186
if err != nil {
137187
return
138188
}
139189

140-
err = c.sendRequest(req, &resp)
190+
err = c.sendRequest(req, &res)
141191

142192
return
143193
}

embeddings_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ func TestEmbedding(t *testing.T) {
3232
BabbageCodeSearchText,
3333
}
3434
for _, model := range embeddedModels {
35+
// test embedding request with strings (simple embedding request)
3536
embeddingReq := EmbeddingRequest{
3637
Input: []string{
3738
"The food was delicious and the waiter",
@@ -46,6 +47,34 @@ func TestEmbedding(t *testing.T) {
4647
if !bytes.Contains(marshaled, []byte(`"model":"`+model.String()+`"`)) {
4748
t.Fatalf("Expected embedding request to contain model field")
4849
}
50+
51+
// test embedding request with strings
52+
embeddingReqStrings := EmbeddingRequestStrings{
53+
Input: []string{
54+
"The food was delicious and the waiter",
55+
"Other examples of embedding request",
56+
},
57+
Model: model,
58+
}
59+
marshaled, err = json.Marshal(embeddingReqStrings)
60+
checks.NoError(t, err, "Could not marshal embedding request")
61+
if !bytes.Contains(marshaled, []byte(`"model":"`+model.String()+`"`)) {
62+
t.Fatalf("Expected embedding request to contain model field")
63+
}
64+
65+
// test embedding request with tokens
66+
embeddingReqTokens := EmbeddingRequestTokens{
67+
Input: [][]int{
68+
{464, 2057, 373, 12625, 290, 262, 46612},
69+
{6395, 6096, 286, 11525, 12083, 2581},
70+
},
71+
Model: model,
72+
}
73+
marshaled, err = json.Marshal(embeddingReqTokens)
74+
checks.NoError(t, err, "Could not marshal embedding request")
75+
if !bytes.Contains(marshaled, []byte(`"model":"`+model.String()+`"`)) {
76+
t.Fatalf("Expected embedding request to contain model field")
77+
}
4978
}
5079
}
5180

@@ -75,6 +104,15 @@ func TestEmbeddingEndpoint(t *testing.T) {
75104
fmt.Fprintln(w, string(resBytes))
76105
},
77106
)
107+
// test create embeddings with strings (simple embedding request)
78108
_, err := client.CreateEmbeddings(context.Background(), EmbeddingRequest{})
79109
checks.NoError(t, err, "CreateEmbeddings error")
110+
111+
// test create embeddings with strings
112+
_, err = client.CreateEmbeddings(context.Background(), EmbeddingRequestStrings{})
113+
checks.NoError(t, err, "CreateEmbeddings strings error")
114+
115+
// test create embeddings with tokens
116+
_, err = client.CreateEmbeddings(context.Background(), EmbeddingRequestTokens{})
117+
checks.NoError(t, err, "CreateEmbeddings tokens error")
80118
}

0 commit comments

Comments
 (0)