Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ollama - Remote hosts #8234

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
41 changes: 37 additions & 4 deletions autogpt_platform/backend/backend/blocks/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class LlmModel(str, Enum):
# Ollama models
OLLAMA_LLAMA3_8B = "llama3"
OLLAMA_LLAMA3_405B = "llama3.1:405b"
# CUSTOM
OLLAMA_DOLPHIN = "dolphin-mistral:latest"

@property
def metadata(self) -> ModelMetadata:
Expand All @@ -79,6 +81,8 @@ def metadata(self) -> ModelMetadata:
LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 131072, cost_factor=13),
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192, cost_factor=7),
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192, cost_factor=11),
# CUSTOM
LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768, cost_factor=0),
}

for model in LlmModel:
Expand All @@ -105,6 +109,11 @@ class Input(BlockSchema):
prompt_values: dict[str, str] = SchemaField(
advanced=False, default={}, description="Values used to fill in the prompt."
)
ollama_host: str = SchemaField(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Bentlybro is there a way to conditionally show stuff like this if ollama is the selected model:

Copy link
Member

@Bentlybro Bentlybro Oct 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ntindle i dont think we have that setup at the moment but maybe that is something we should look into getting setup because i already see quite a lot of use-cases for that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just thinking, would it be worth splitting model selection into provider then model, rather than just model? Some providers have a wide variety of models (i.e. Ollama) which may overlap with other providers, so being able to choose the model and provider would let users have more control. It'd also make it a tad easier to have conditional inputs on the blocks I imagine as we wouldn't have to look up provider based on the model, it'd be in the block inputs.

Let me know and I can try and get a pr out for that functionality soon

Copy link
Member

@Bentlybro Bentlybro Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Fried-Squid I think that's a good very good idea, it makes a lot more sense being able to do it that way, @ntindle @Torantulino what do we think? it should be pretty easy to do and it should just be changes to the block its self as far as I can see.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Bentlybro the one issue I see with it is getting the frontend to display that properly - we'd need to make changes to the JSON schema which gets passed to the frontend to render the available model selection. Or we could just throw an error when model and provider don't match, but that seems pretty hostile to new users who might not understand the differences between the providers and models.

advanced=True,
default="localhost:11434",
description="Ollama host for local models",
)

class Output(BlockSchema):
response: dict[str, Any]
Expand Down Expand Up @@ -139,7 +148,11 @@ def __init__(self):

@staticmethod
def llm_call(
api_key: str, model: LlmModel, prompt: list[dict], json_format: bool
api_key: str,
model: LlmModel,
prompt: list[dict],
json_format: bool,
ollama_host: str,
) -> str:
provider = model.metadata.provider

Expand Down Expand Up @@ -201,9 +214,10 @@ def llm_call(
)
return response.choices[0].message.content or ""
elif provider == "ollama":
response = ollama.generate(
client = ollama.Client(host=ollama_host)
response = client.generate(
model=model.value,
prompt=prompt[0]["content"],
prompt=str(prompt),
)
return response["response"]
else:
Expand Down Expand Up @@ -269,6 +283,7 @@ def parse_response(resp: str) -> tuple[dict[str, Any], str | None]:
model=model,
prompt=prompt,
json_format=bool(input_data.expected_format),
ollama_host=input_data.ollama_host,
)
logger.info(f"LLM attempt-{retry_count} response: {response_text}")

Expand Down Expand Up @@ -326,6 +341,11 @@ class Input(BlockSchema):
prompt_values: dict[str, str] = SchemaField(
advanced=False, default={}, description="Values used to fill in the prompt."
)
ollama_host: str = SchemaField(
advanced=True,
default="localhost:11434",
description="Ollama host for local models",
)

class Output(BlockSchema):
response: str
Expand Down Expand Up @@ -385,6 +405,11 @@ class Input(BlockSchema):
# TODO: Make this dynamic
max_tokens: int = 4000 # Adjust based on the model's context window
chunk_overlap: int = 100 # Overlap between chunks to maintain context
ollama_host: str = SchemaField(
advanced=True,
default="localhost:11434",
description="Ollama host for local models",
)

class Output(BlockSchema):
summary: str
Expand Down Expand Up @@ -526,6 +551,11 @@ class Input(BlockSchema):
description="The maximum number of tokens to generate in the chat completion.",
ge=1,
)
ollama_host: str = SchemaField(
advanced=True,
default="localhost:11434",
description="Ollama host for local models",
)

class Output(BlockSchema):
response: str = SchemaField(
Expand Down Expand Up @@ -567,6 +597,7 @@ def llm_call(
api_key: str,
model: LlmModel,
messages: List[dict[str, str]],
ollama_host: str,
max_tokens: int | None = None,
) -> str:
provider = model.metadata.provider
Expand Down Expand Up @@ -596,7 +627,8 @@ def llm_call(
)
return response.choices[0].message.content or ""
elif provider == "ollama":
response = ollama.chat(
client = ollama.Client(host=ollama_host)
response = client.chat(
model=model.value,
messages=messages, # type: ignore
stream=False, # type: ignore
Expand All @@ -619,6 +651,7 @@ def run(self, input_data: Input, **kwargs) -> BlockOutput:
model=input_data.model,
messages=messages,
max_tokens=input_data.max_tokens,
ollama_host=input_data.ollama_host,
)

yield "response", response
Expand Down
Loading