1414 *
1515 * Thanks!
1616 */
17+ import type { ImageToTextOutput } from "@huggingface/tasks" ;
1718import {
1819 InferenceClientInputError ,
1920 InferenceClientProviderApiError ,
2021 InferenceClientProviderOutputError ,
2122} from "../errors.js" ;
2223import { isUrl } from "../lib/isUrl.js" ;
23- import type { BodyParams , HeaderParams , OutputType } from "../types.js" ;
24+ import type { BodyParams , HeaderParams , OutputType , RequestArgs } from "../types.js" ;
25+ import { base64FromBytes } from "../utils/base64FromBytes.js" ;
2426import { dataUrlFromBlob } from "../utils/dataUrlFromBlob.js" ;
2527import { delay } from "../utils/delay.js" ;
2628import { omit } from "../utils/omit.js" ;
27- import { BaseConversationalTask , TaskProviderHelper , type TextToImageTaskHelper } from "./providerHelper.js" ;
29+ import type { ImageToTextArgs } from "../tasks/cv/imageToText.js" ;
30+ import {
31+ BaseConversationalTask ,
32+ TaskProviderHelper ,
33+ type ImageToTextTaskHelper ,
34+ type TextToImageTaskHelper ,
35+ } from "./providerHelper.js" ;
2836
2937const ZAI_API_BASE_URL = "https://api.z.ai" ;
3038
39+ abstract class ZaiTask extends TaskProviderHelper {
40+ constructor ( ) {
41+ super ( "zai-org" , ZAI_API_BASE_URL ) ;
42+ }
43+
44+ override prepareHeaders ( params : HeaderParams , binary : boolean ) : Record < string , string > {
45+ const headers = super . prepareHeaders ( params , binary ) ;
46+ headers [ "x-source-channel" ] = "hugging_face" ;
47+ headers [ "accept-language" ] = "en-US,en" ;
48+ return headers ;
49+ }
50+ }
51+
3152export class ZaiConversationalTask extends BaseConversationalTask {
3253 constructor ( ) {
3354 super ( "zai-org" , ZAI_API_BASE_URL ) ;
@@ -63,28 +84,12 @@ interface ZaiAsyncResultResponse {
6384const MAX_POLL_ATTEMPTS = 60 ;
6485const POLL_INTERVAL_MS = 5000 ;
6586
66- export class ZaiTextToImageTask extends TaskProviderHelper implements TextToImageTaskHelper {
67- constructor ( ) {
68- super ( "zai-org" , ZAI_API_BASE_URL ) ;
69- }
70-
71- override prepareHeaders ( params : HeaderParams , binary : boolean ) : Record < string , string > {
72- const headers : Record < string , string > = {
73- Authorization : `Bearer ${ params . accessToken } ` ,
74- "x-source-channel" : "hugging_face" ,
75- "accept-language" : "en-US,en" ,
76- } ;
77- if ( ! binary ) {
78- headers [ "Content-Type" ] = "application/json" ;
79- }
80- return headers ;
81- }
82-
83- makeRoute ( ) : string {
87+ export class ZaiTextToImageTask extends ZaiTask implements TextToImageTaskHelper {
88+ override makeRoute ( ) : string {
8489 return "/api/paas/v4/async/images/generations" ;
8590 }
8691
87- preparePayload ( params : BodyParams ) : Record < string , unknown > {
92+ override preparePayload ( params : BodyParams ) : Record < string , unknown > {
8893 return {
8994 ...omit ( params . args , [ "inputs" , "parameters" ] ) ,
9095 ...( params . args . parameters as Record < string , unknown > ) ,
@@ -93,7 +98,7 @@ export class ZaiTextToImageTask extends TaskProviderHelper implements TextToImag
9398 } ;
9499 }
95100
96- async getResponse (
101+ override async getResponse (
97102 response : ZaiTextToImageResponse ,
98103 url ?: string ,
99104 headers ?: Record < string , string > ,
@@ -190,3 +195,56 @@ export class ZaiTextToImageTask extends TaskProviderHelper implements TextToImag
190195 ) ;
191196 }
192197}
198+
199+ interface ZaiLayoutParsingResponse {
200+ md_results ?: string ;
201+ }
202+
203+ export class ZaiImageToTextTask extends ZaiTask implements ImageToTextTaskHelper {
204+ override makeRoute ( ) : string {
205+ return "/api/paas/v4/layout_parsing" ;
206+ }
207+
208+ async preparePayloadAsync ( args : ImageToTextArgs ) : Promise < RequestArgs > {
209+ const blob =
210+ "data" in args && args . data instanceof Blob
211+ ? args . data
212+ : "inputs" in args
213+ ? typeof args . inputs === "string" && isUrl ( args . inputs )
214+ ? await fetch ( args . inputs ) . then ( ( r ) => r . blob ( ) )
215+ : args . inputs instanceof Blob
216+ ? args . inputs
217+ : undefined
218+ : undefined ;
219+
220+ if ( ! blob || ! ( blob instanceof Blob ) ) {
221+ throw new InferenceClientInputError ( "ZAI image-to-text requires a URL string or Blob as inputs" ) ;
222+ }
223+
224+ const mimeType = blob . type || "image/png" ;
225+ const b64 = base64FromBytes ( new Uint8Array ( await blob . arrayBuffer ( ) ) ) ;
226+ const file = `data:${ mimeType } ;base64,${ b64 } ` ;
227+
228+ return {
229+ ...( "data" in args ? omit ( args , "data" ) : omit ( args , "inputs" ) ) ,
230+ inputs : file ,
231+ } as RequestArgs ;
232+ }
233+
234+ override preparePayload ( params : BodyParams ) : Record < string , unknown > {
235+ return {
236+ model : params . model ,
237+ file : params . args . inputs ,
238+ } ;
239+ }
240+
241+ override async getResponse ( response : ZaiLayoutParsingResponse ) : Promise < ImageToTextOutput > {
242+ const mdResults = response ?. md_results ;
243+ if ( typeof mdResults !== "string" ) {
244+ throw new InferenceClientProviderOutputError (
245+ `Received malformed response from ZAI layout_parsing API: expected { md_results: string }, got: ${ JSON . stringify ( response ) } ` ,
246+ ) ;
247+ }
248+ return { generated_text : mdResults , generatedText : mdResults } ;
249+ }
250+ }
0 commit comments