Skip to content

Commit 79215b3

Browse files
committed
feat: jina image embed support
1 parent e6092ba commit 79215b3

9 files changed

Lines changed: 373 additions & 10 deletions

File tree

core/common/body.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,32 @@ type requestBodyKey struct{}
1818
const (
1919
MaxRequestBodySize = 1024 * 1024 * 50 // 50MB
2020
MaxResponseBodySize = 1024 * 1024 * 50 // 50MB
21+
22+
multipartFormMemoryLimit = 4 * 1024 * 1024
2123
)
2224

2325
func LimitReader(r io.Reader, n int64) io.Reader { return &LimitedReader{r, n} }
2426

27+
func ParseMultipartFormWithLimit(req *http.Request) error {
28+
if req.ContentLength > 0 && req.ContentLength > MaxRequestBodySize {
29+
return fmt.Errorf(
30+
"request body too large: %d, max: %d",
31+
req.ContentLength,
32+
MaxRequestBodySize,
33+
)
34+
}
35+
36+
originalBody := req.Body
37+
38+
req.Body = http.MaxBytesReader(nil, req.Body, MaxRequestBodySize)
39+
defer func() {
40+
req.Body = originalBody
41+
}()
42+
43+
// #nosec G120 -- ContentLength is checked above and Body is capped by MaxBytesReader.
44+
return req.ParseMultipartForm(multipartFormMemoryLimit)
45+
}
46+
2547
type LimitedReader struct {
2648
R io.Reader
2749
N int64

core/relay/adaptor/ali/stt-realtime.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/gin-gonic/gin"
1313
"github.com/google/uuid"
1414
"github.com/gorilla/websocket"
15+
"github.com/labring/aiproxy/core/common"
1516
"github.com/labring/aiproxy/core/model"
1617
"github.com/labring/aiproxy/core/relay/adaptor"
1718
"github.com/labring/aiproxy/core/relay/meta"
@@ -70,7 +71,7 @@ func ConvertSTTRequest(
7071
meta *meta.Meta,
7172
request *http.Request,
7273
) (adaptor.ConvertResult, error) {
73-
err := request.ParseMultipartForm(1024 * 1024 * 4)
74+
err := common.ParseMultipartFormWithLimit(request)
7475
if err != nil {
7576
return adaptor.ConvertResult{}, err
7677
}

core/relay/adaptor/doc2x/pdf.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func ConvertParsePdfRequest(
2828
meta *meta.Meta,
2929
req *http.Request,
3030
) (adaptor.ConvertResult, error) {
31-
err := req.ParseMultipartForm(1024 * 1024 * 4)
31+
err := common.ParseMultipartFormWithLimit(req)
3232
if err != nil {
3333
return adaptor.ConvertResult{}, err
3434
}

core/relay/adaptor/jina/embeddings.go

Lines changed: 152 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package jina
22

33
import (
44
"net/http"
5+
"strings"
56

67
"github.com/bytedance/sonic/ast"
78
"github.com/labring/aiproxy/core/relay/adaptor"
@@ -14,7 +15,156 @@ func ConvertEmbeddingsRequest(
1415
req *http.Request,
1516
) (adaptor.ConvertResult, error) {
1617
return openai.ConvertEmbeddingsRequest(meta, req, true, func(node *ast.Node) error {
17-
_, err := node.Unset("encoding_format")
18-
return err
18+
if _, err := node.Unset("encoding_format"); err != nil {
19+
return err
20+
}
21+
22+
return patchEmbeddingsInput(node)
1923
})
2024
}
25+
26+
func patchEmbeddingsInput(node *ast.Node) error {
27+
inputNode := node.Get("input")
28+
if !inputNode.Exists() {
29+
return nil
30+
}
31+
32+
switch inputNode.TypeSafe() {
33+
case ast.V_STRING:
34+
text, err := inputNode.String()
35+
if err != nil {
36+
return err
37+
}
38+
39+
*inputNode = ast.NewArray([]ast.Node{newJinaTextInput(text)})
40+
41+
return nil
42+
case ast.V_ARRAY:
43+
var patchErr error
44+
45+
err := inputNode.ForEach(func(_ ast.Sequence, item *ast.Node) bool {
46+
patchErr = patchEmbeddingsInputItem(item)
47+
return patchErr == nil
48+
})
49+
if err != nil {
50+
return err
51+
}
52+
53+
return patchErr
54+
default:
55+
return nil
56+
}
57+
}
58+
59+
func patchEmbeddingsInputItem(item *ast.Node) error {
60+
switch item.TypeSafe() {
61+
case ast.V_STRING:
62+
text, err := item.String()
63+
if err != nil {
64+
return err
65+
}
66+
67+
*item = newJinaTextInput(text)
68+
69+
return nil
70+
case ast.V_OBJECT:
71+
imageURL, ok, err := openAIImageURL(item)
72+
if err != nil {
73+
return err
74+
}
75+
76+
if ok {
77+
*item = newJinaImageInput(imageURL)
78+
return nil
79+
}
80+
81+
imageNode := item.Get("image")
82+
if imageNode.Exists() && imageNode.TypeSafe() == ast.V_STRING {
83+
image, err := imageNode.String()
84+
if err != nil {
85+
return err
86+
}
87+
88+
normalizedImage := normalizeJinaImage(image)
89+
if normalizedImage != image || item.Get("type").Exists() {
90+
*item = newJinaImageInput(normalizedImage)
91+
}
92+
93+
return nil
94+
}
95+
96+
textNode := item.Get("text")
97+
if textNode.Exists() && textNode.TypeSafe() == ast.V_STRING && item.Get("type").Exists() {
98+
text, err := textNode.String()
99+
if err != nil {
100+
return err
101+
}
102+
103+
*item = newJinaTextInput(text)
104+
}
105+
106+
return nil
107+
default:
108+
return nil
109+
}
110+
}
111+
112+
func openAIImageURL(item *ast.Node) (string, bool, error) {
113+
typeNode := item.Get("type")
114+
if typeNode.Exists() {
115+
contentType, err := typeNode.String()
116+
if err != nil {
117+
return "", false, err
118+
}
119+
120+
if contentType != "image_url" {
121+
return "", false, nil
122+
}
123+
}
124+
125+
imageURLNode := item.Get("image_url")
126+
if !imageURLNode.Exists() || imageURLNode.TypeSafe() != ast.V_OBJECT {
127+
return "", false, nil
128+
}
129+
130+
urlNode := imageURLNode.Get("url")
131+
if !urlNode.Exists() || urlNode.TypeSafe() != ast.V_STRING {
132+
return "", false, nil
133+
}
134+
135+
imageURL, err := urlNode.String()
136+
if err != nil {
137+
return "", false, err
138+
}
139+
140+
return imageURL, true, nil
141+
}
142+
143+
func newJinaTextInput(text string) ast.Node {
144+
return ast.NewObject([]ast.Pair{
145+
ast.NewPair("text", ast.NewString(text)),
146+
})
147+
}
148+
149+
func newJinaImageInput(image string) ast.Node {
150+
return ast.NewObject([]ast.Pair{
151+
ast.NewPair("image", ast.NewString(normalizeJinaImage(image))),
152+
})
153+
}
154+
155+
func normalizeJinaImage(image string) string {
156+
if !strings.HasPrefix(image, "data:image/") {
157+
return image
158+
}
159+
160+
_, base64Data, ok := strings.Cut(image, ";base64,")
161+
if !ok {
162+
return image
163+
}
164+
165+
if base64Data == "" {
166+
return image
167+
}
168+
169+
return base64Data
170+
}

0 commit comments

Comments
 (0)