Skip to content

unable to stream response llama gcp #3793

Open
@clive1995

Description

@clive1995

I am trying to make streaming work for LLM, but I am not able to get it to return results properly.


from google.oauth2 import service_account
import asyncio
from google.cloud import aiplatform_v1

PROJECT_ID = ""
REGION = ""
MODEL_ID = ""  


async def sample_predict():
    """Creates and returns an AI Platform client with provided credentials."""
    SERVICE_ACCOUNT_INFO = {} #api key
    credentials = service_account.Credentials.from_service_account_info(SERVICE_ACCOUNT_INFO)
    client_options = {"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}

    # Create a client
    client = aiplatform_v1.PredictionServiceAsyncClient(credentials=credentials, client_options=client_options)

    instance = {
        "stream": True,
        "prompt": "System: <|begin_of_text|><|start_header_id|>system<|end_header_id|>just answer user questions<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\ninput text:how do I clear browser cache?<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>"
    }

    request = aiplatform_v1.PredictRequest(
        endpoint='projects/{}/locations/{}/endpoints/{}',
        parameters={"max_tokens": 8000, "temperature": 0.6, "top_p": 0.1}
    )
    request.instances.append(instance)


    response = await client.predict(request=request)
    print(response)


if __name__ == "__main__":
    print("\nGenerating text response from Llama3 model...")
    asyncio.run(sample_predict())

i am getting the below error

google.api_core.exceptions.FailedPrecondition: 400 The output data is not valid json. Original output: {"predictions": ["\n\n"]} {"predictions": ["Clear"]} {"predictions": ["ing"]} {"predictions": [" your"]} {"predictions": [" browser"]} {"predictions": [" cache"]} {"predictions": [" is"]} {"predictions": [" a"]} {"predictions": [" straightforward"]} {"predictions": [" process"]} {"predictions": [" that"]} {"predictions": [" varies"]} {"predictions": [" slightly"]} {"predictions": [" depending"]} {"predictions": [" on"]} {"predictions": [" the"]} .

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions