Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,23 @@ class ChatAnthropic_ChatModels implements INode {
'Allow image input. Refer to the <a href="https://docs.flowiseai.com/using-flowise/uploads#image" target="_blank">docs</a> for more details.',
default: false,
optional: true
},
{
label: 'Image Resolution',
name: 'imageResolution',
type: 'number',
step: 1,
optional: true,
additionalParams: true
},
{
label: 'Allow PDF Uploads',
name: 'allowPdfUploads',
type: 'boolean',
description:
'Allow PDF input. Refer to the <a href="https://docs.flowiseai.com/using-flowise/uploads#pdf" target="_blank">docs</a> for more details.',
default: false,
optional: true
}
]
}
Expand All @@ -134,12 +151,13 @@ class ChatAnthropic_ChatModels implements INode {
const cache = nodeData.inputs?.cache as BaseCache
const extendedThinking = nodeData.inputs?.extendedThinking as boolean
const budgetTokens = nodeData.inputs?.budgetTokens as string
const imageResolution = nodeData.inputs?.imageResolution as string
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
const allowPdfUploads = nodeData.inputs?.allowPdfUploads as boolean

const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const anthropicApiKey = getCredentialParam('anthropicApiKey', credentialData, nodeData)

const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean

const obj: Partial<AnthropicInput> & BaseLLMParams & { anthropicApiKey?: string } = {
temperature: parseFloat(temperature),
modelName,
Expand All @@ -161,7 +179,11 @@ class ChatAnthropic_ChatModels implements INode {

const multiModalOption: IMultiModalOption = {
image: {
allowImageUploads: allowImageUploads ?? false
allowImageUploads: allowImageUploads ?? false,
imageResolution: imageResolution
},
pdf: {
allowPdfUploads: allowPdfUploads ?? false
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,33 @@ class GoogleGenerativeAI_ChatModels implements INode {
'Allow image input. Refer to the <a href="https://docs.flowiseai.com/using-flowise/uploads#image" target="_blank">docs</a> for more details.',
default: false,
optional: true
},
{
label: 'Allow PDF Uploads',
name: 'allowPdfUploads',
type: 'boolean',
description:
'Allow PDF input. Refer to the <a href="https://docs.flowiseai.com/using-flowise/uploads#pdf" target="_blank">docs</a> for more details.',
default: false,
optional: true
},
{
label: 'Allow Audio Uploads',
name: 'allowAudioUploads',
type: 'boolean',
description:
'Allow audio input. Refer to the <a href="https://docs.flowiseai.com/using-flowise/uploads#audio" target="_blank">docs</a> for more details.',
default: false,
optional: true
},
{
label: 'Allow Video Uploads',
name: 'allowVideoUploads',
type: 'boolean',
description:
'Allow video input. Refer to the <a href="https://docs.flowiseai.com/using-flowise/uploads#video" target="_blank">docs</a> for more details.',
default: false,
optional: true
}
]
}
Expand Down Expand Up @@ -191,6 +218,9 @@ class GoogleGenerativeAI_ChatModels implements INode {
const streaming = nodeData.inputs?.streaming as boolean

const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
const allowPdfUploads = nodeData.inputs?.allowPdfUploads as boolean
const allowAudioUploads = nodeData.inputs?.allowAudioUploads as boolean
const allowVideoUploads = nodeData.inputs?.allowVideoUploads as boolean

const obj: Partial<GoogleGenerativeAIChatInput> = {
apiKey: apiKey,
Expand Down Expand Up @@ -220,6 +250,15 @@ class GoogleGenerativeAI_ChatModels implements INode {
const multiModalOption: IMultiModalOption = {
image: {
allowImageUploads: allowImageUploads ?? false
},
pdf: {
allowPdfUploads: allowPdfUploads ?? false
},
audio: {
allowAudioUploads: allowAudioUploads ?? false
},
video: {
allowVideoUploads: allowVideoUploads ?? false
}
}

Expand Down
149 changes: 104 additions & 45 deletions packages/components/nodes/multiagents/Supervisor/Supervisor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { ChatMistralAI } from '@langchain/mistralai'
import { ChatOpenAI } from '../../chatmodels/ChatOpenAI/FlowiseChatOpenAI'
import { ChatAnthropic } from '../../chatmodels/ChatAnthropic/FlowiseChatAnthropic'
import { ChatGoogleGenerativeAI } from '../../chatmodels/ChatGoogleGenerativeAI/FlowiseChatGoogleGenerativeAI'
import { addImagesToMessages, llmSupportsVision } from '../../../src/multiModalUtils'
import { addMultiModalContentToMessages, llmSupportsVision } from '../../../src/multiModalUtils'

const sysPrompt = `You are a supervisor tasked with managing a conversation between the following workers: {team_members}.
Given the following user request, respond with the worker to act next.
Expand Down Expand Up @@ -209,6 +209,15 @@ class Supervisor_MultiAgents implements INode {
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent

const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if ((llm as any).bindTools === undefined) {
throw new Error(`This agent only compatible with function calling models.`)
}
Expand All @@ -226,13 +235,15 @@ class Supervisor_MultiAgents implements INode {
return {
next: toolAgentAction.toolInput.next,
instructions: toolAgentAction.toolInput.instructions,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: toolAgentAction.toolInput.summarization
}
} else if (typeof x === 'object' && 'returnValues' in x) {
return {
next: 'FINISH',
instructions: x.returnValues?.output,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: defaultSummarization
}
} else {
return {
Expand All @@ -249,10 +260,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

// @ts-ignore
const messages = await processImageMessage(1, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

// Force OpenAI to use tool
const modelWithTool = llm.bind({
Expand All @@ -272,13 +287,15 @@ class Supervisor_MultiAgents implements INode {
return {
next: toolAgentAction.toolInput.next,
instructions: toolAgentAction.toolInput.instructions,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: toolAgentAction.toolInput.summarization
}
} else if (typeof x === 'object' && 'returnValues' in x) {
return {
next: 'FINISH',
instructions: x.returnValues?.output,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: defaultSummarization
}
} else {
return {
Expand All @@ -299,9 +316,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

const messages = await processImageMessage(2, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if (llm.bindTools === undefined) {
throw new Error(`This agent only compatible with function calling models.`)
Expand All @@ -319,13 +341,15 @@ class Supervisor_MultiAgents implements INode {
return {
next: toolAgentAction.toolInput.next,
instructions: toolAgentAction.toolInput.instructions,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: toolAgentAction.toolInput.summarization
}
} else if (typeof x === 'object' && 'returnValues' in x) {
return {
next: 'FINISH',
instructions: x.returnValues?.output,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: defaultSummarization
}
} else {
return {
Expand All @@ -342,9 +366,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

const messages = await processImageMessage(1, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if (llm.bindTools === undefined) {
throw new Error(`This agent only compatible with function calling models.`)
Expand All @@ -362,13 +391,15 @@ class Supervisor_MultiAgents implements INode {
return {
next: toolAgentAction.toolInput.next,
instructions: toolAgentAction.toolInput.instructions,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: toolAgentAction.toolInput.summarization
}
} else if (typeof x === 'object' && 'returnValues' in x) {
return {
next: 'FINISH',
instructions: x.returnValues?.output,
team_members: members.join(', ')
team_members: members.join(', '),
summarization: defaultSummarization
}
} else {
return {
Expand Down Expand Up @@ -411,9 +442,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

const messages = await processImageMessage(1, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

// Force Mistral to use tool
// @ts-ignore
Expand Down Expand Up @@ -460,9 +496,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

const messages = await processImageMessage(1, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if ((llm as any).bindTools === undefined) {
throw new Error(`This agent only compatible with function calling models.`)
Expand Down Expand Up @@ -507,10 +548,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

// @ts-ignore
const messages = await processImageMessage(1, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

// Force OpenAI to use tool
const modelWithTool = llm.bind({
Expand Down Expand Up @@ -560,9 +605,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

const messages = await processImageMessage(2, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if (llm.bindTools === undefined) {
throw new Error(`This agent only compatible with function calling models.`)
Expand Down Expand Up @@ -606,9 +656,14 @@ class Supervisor_MultiAgents implements INode {
['human', userPrompt]
])

const messages = await processImageMessage(1, llm, prompt, nodeData, options)
prompt = messages.prompt
multiModalMessageContent = messages.multiModalMessageContent
const multiModalContent = await addMultiModalContentToMessages(
nodeData,
options,
llmSupportsVision(llm) ? llm.multiModalOption : undefined
)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if (llm.bindTools === undefined) {
throw new Error(`This agent only compatible with function calling models.`)
Expand Down Expand Up @@ -639,9 +694,8 @@ class Supervisor_MultiAgents implements INode {
} else {
return {
next: 'FINISH',
instructions: defaultInstruction,
team_members: members.join(', '),
summarization: defaultSummarization
instructions: 'Conversation finished',
team_members: members.join(', ')
}
}
})
Expand Down Expand Up @@ -715,16 +769,21 @@ const processImageMessage = async (

if (llmSupportsVision(llm)) {
const visionChatModel = llm as IVisionChatModal
multiModalMessageContent = await addImagesToMessages(nodeData, options, llm.multiModalOption)
if (visionChatModel.multiModalOption) {
const multiModalContent = await addMultiModalContentToMessages(nodeData, options, visionChatModel.multiModalOption)

// Filter out only image content
multiModalMessageContent = multiModalContent.filter((content) => content.type === 'image_url') as MessageContentImageUrl[]

if (multiModalMessageContent?.length) {
visionChatModel.setVisionModel()
if (multiModalMessageContent?.length) {
visionChatModel.setVisionModel()

const msg = HumanMessagePromptTemplate.fromTemplate([...multiModalMessageContent])
const msg = HumanMessagePromptTemplate.fromTemplate([...multiModalMessageContent])

prompt.promptMessages.splice(index, 0, msg)
} else {
visionChatModel.revertToOriginalModel()
prompt.promptMessages.splice(index, 0, msg)
} else {
visionChatModel.revertToOriginalModel()
}
}
}

Expand Down
Loading