|
| 1 | +# Copyright (C) 2024 Intel Corporation |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +import os |
| 5 | + |
| 6 | +from fastapi.responses import StreamingResponse |
| 7 | +from langchain.chains.summarize import load_summarize_chain |
| 8 | +from langchain.docstore.document import Document |
| 9 | +from langchain.prompts import PromptTemplate |
| 10 | +from langchain.text_splitter import CharacterTextSplitter |
| 11 | +from langchain_community.llms import VLLMOpenAI |
| 12 | + |
| 13 | +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice |
| 14 | +from comps.cores.mega.utils import get_access_token |
| 15 | + |
| 16 | +logger = CustomLogger("llm_docsum") |
| 17 | +logflag = os.getenv("LOGFLAG", False) |
| 18 | + |
| 19 | +# Environment variables |
| 20 | +TOKEN_URL = os.getenv("TOKEN_URL") |
| 21 | +CLIENTID = os.getenv("CLIENTID") |
| 22 | +CLIENT_SECRET = os.getenv("CLIENT_SECRET") |
| 23 | +MODEL_ID = os.getenv("LLM_MODEL_ID", None) |
| 24 | + |
| 25 | +templ_en = """Write a concise summary of the following: |
| 26 | +"{text}" |
| 27 | +CONCISE SUMMARY:""" |
| 28 | + |
| 29 | +templ_zh = """请简要概括以下内容: |
| 30 | +"{text}" |
| 31 | +概况:""" |
| 32 | + |
| 33 | + |
| 34 | +def post_process_text(text: str): |
| 35 | + if text == " ": |
| 36 | + return "data: @#$\n\n" |
| 37 | + if text == "\n": |
| 38 | + return "data: <br/>\n\n" |
| 39 | + if text.isspace(): |
| 40 | + return None |
| 41 | + new_text = text.replace(" ", "@#$") |
| 42 | + return f"data: {new_text}\n\n" |
| 43 | + |
| 44 | + |
| 45 | +@register_microservice( |
| 46 | + name="opea_service@llm_docsum", |
| 47 | + service_type=ServiceType.LLM, |
| 48 | + endpoint="/v1/chat/docsum", |
| 49 | + host="0.0.0.0", |
| 50 | + port=9000, |
| 51 | +) |
| 52 | +async def llm_generate(input: LLMParamsDoc): |
| 53 | + if logflag: |
| 54 | + logger.info(input) |
| 55 | + if input.language in ["en", "auto"]: |
| 56 | + templ = templ_en |
| 57 | + elif input.language in ["zh"]: |
| 58 | + templ = templ_zh |
| 59 | + else: |
| 60 | + raise NotImplementedError('Please specify the input language in "en", "zh", "auto"') |
| 61 | + |
| 62 | + PROMPT = PromptTemplate.from_template(templ) |
| 63 | + |
| 64 | + if logflag: |
| 65 | + logger.info("After prompting:") |
| 66 | + logger.info(PROMPT) |
| 67 | + |
| 68 | + access_token = ( |
| 69 | + get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None |
| 70 | + ) |
| 71 | + headers = {} |
| 72 | + if access_token: |
| 73 | + headers = {"Authorization": f"Bearer {access_token}"} |
| 74 | + llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8080") |
| 75 | + model = input.model if input.model else os.getenv("LLM_MODEL_ID") |
| 76 | + llm = VLLMOpenAI( |
| 77 | + openai_api_key="EMPTY", |
| 78 | + openai_api_base=llm_endpoint + "/v1", |
| 79 | + model_name=model, |
| 80 | + default_headers=headers, |
| 81 | + max_tokens=input.max_tokens, |
| 82 | + top_p=input.top_p, |
| 83 | + streaming=input.streaming, |
| 84 | + temperature=input.temperature, |
| 85 | + presence_penalty=input.repetition_penalty, |
| 86 | + ) |
| 87 | + llm_chain = load_summarize_chain(llm=llm, prompt=PROMPT) |
| 88 | + texts = text_splitter.split_text(input.query) |
| 89 | + |
| 90 | + # Create multiple documents |
| 91 | + docs = [Document(page_content=t) for t in texts] |
| 92 | + |
| 93 | + if input.streaming: |
| 94 | + |
| 95 | + async def stream_generator(): |
| 96 | + from langserve.serialization import WellKnownLCSerializer |
| 97 | + |
| 98 | + _serializer = WellKnownLCSerializer() |
| 99 | + async for chunk in llm_chain.astream_log(docs): |
| 100 | + data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") |
| 101 | + if logflag: |
| 102 | + logger.info(data) |
| 103 | + yield f"data: {data}\n\n" |
| 104 | + yield "data: [DONE]\n\n" |
| 105 | + |
| 106 | + return StreamingResponse(stream_generator(), media_type="text/event-stream") |
| 107 | + else: |
| 108 | + response = await llm_chain.ainvoke(docs) |
| 109 | + response = response["output_text"] |
| 110 | + if logflag: |
| 111 | + logger.info(response) |
| 112 | + return GeneratedDoc(text=response, prompt=input.query) |
| 113 | + |
| 114 | + |
| 115 | +if __name__ == "__main__": |
| 116 | + # Split text |
| 117 | + text_splitter = CharacterTextSplitter() |
| 118 | + opea_microservices["opea_service@llm_docsum"].start() |
0 commit comments