Open
Description
I am trying to make streaming work for LLM, but I am not able to get it to return results properly.
from google.oauth2 import service_account
import asyncio
from google.cloud import aiplatform_v1
PROJECT_ID = ""
REGION = ""
MODEL_ID = ""
async def sample_predict():
"""Creates and returns an AI Platform client with provided credentials."""
SERVICE_ACCOUNT_INFO = {} #api key
credentials = service_account.Credentials.from_service_account_info(SERVICE_ACCOUNT_INFO)
client_options = {"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}
# Create a client
client = aiplatform_v1.PredictionServiceAsyncClient(credentials=credentials, client_options=client_options)
instance = {
"stream": True,
"prompt": "System: <|begin_of_text|><|start_header_id|>system<|end_header_id|>just answer user questions<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\ninput text:how do I clear browser cache?<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>"
}
request = aiplatform_v1.PredictRequest(
endpoint='projects/{}/locations/{}/endpoints/{}',
parameters={"max_tokens": 8000, "temperature": 0.6, "top_p": 0.1}
)
request.instances.append(instance)
response = await client.predict(request=request)
print(response)
if __name__ == "__main__":
print("\nGenerating text response from Llama3 model...")
asyncio.run(sample_predict())
i am getting the below error
google.api_core.exceptions.FailedPrecondition: 400 The output data is not valid json. Original output: {"predictions": ["\n\n"]} {"predictions": ["Clear"]} {"predictions": ["ing"]} {"predictions": [" your"]} {"predictions": [" browser"]} {"predictions": [" cache"]} {"predictions": [" is"]} {"predictions": [" a"]} {"predictions": [" straightforward"]} {"predictions": [" process"]} {"predictions": [" that"]} {"predictions": [" varies"]} {"predictions": [" slightly"]} {"predictions": [" depending"]} {"predictions": [" on"]} {"predictions": [" the"]} .
Metadata
Metadata
Assignees
Labels
No labels