Skip to content

Commit ab51b61

Browse files
14pankajhteeyeohavinash-palletiganesanintel
authored
Chatqna with GPU support (open-edge-platform#143)
Signed-off-by: Yeoh, Hoong Tee <[email protected]> Co-authored-by: Yeoh, Hoong Tee <[email protected]> Co-authored-by: Avinash Reddy Palleti <[email protected]> Co-authored-by: ganesanintel <[email protected]>
1 parent e462fe6 commit ab51b61

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3476
-2830
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
.venv/
2+
__pycache__/
3+
.vscode
4+
.dockerignore
5+
docker
6+
scripts
7+
tests
8+
ui
9+
docs
10+
README*
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
INFERENCE_DEVICE = "cpu"
1+
EMBEDDING_DEVICE = "CPU"
2+
RERANKER_DEVICE = "CPU"
3+
LLM_DEVICE = "CPU"
24
MAX_TOKENS = 1024
35
ENABLE_RERANK = true
46
CACHE_DIR = "/tmp/model_cache"
57
HF_DATASETS_CACHE = "/tmp/model_cache"
6-
TMP_FILE_PATH = "/tmp/chatqna/documents"
8+
TMP_FILE_PATH = "/tmp/chatqna/documents"

sample-applications/chat-question-and-answer-core/app/chain.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@
5151
# Initialize Embedding Model
5252
embedding = OpenVINOBgeEmbeddings(
5353
model_name_or_path=f"{config.CACHE_DIR}/{config.EMBEDDING_MODEL_ID}",
54-
model_kwargs={"device": config.INFERENCE_DEVICE, "compile": False},
54+
model_kwargs={"device": config.EMBEDDING_DEVICE, "compile": False},
5555
)
5656
embedding.ov_model.compile()
5757

5858
# Initialize Reranker Model
5959
reranker = OpenVINOReranker(
6060
model_name_or_path=f"{config.CACHE_DIR}/{config.RERANKER_MODEL_ID}",
61-
model_kwargs={"device": config.INFERENCE_DEVICE},
61+
model_kwargs={"device": config.RERANKER_DEVICE},
6262
top_n=2,
6363
)
6464

@@ -68,7 +68,7 @@
6868
task="text-generation",
6969
backend="openvino",
7070
model_kwargs={
71-
"device": config.INFERENCE_DEVICE,
71+
"device": config.LLM_DEVICE,
7272
"ov_config": {
7373
"PERFORMANCE_HINT": "LATENCY",
7474
"NUM_STREAMS": "1",
@@ -277,4 +277,4 @@ def delete_embedding_from_vectordb(document: str = "", delete_all: bool = False)
277277

278278
vectorstore.delete(chunk_list)
279279

280-
return True
280+
return True

sample-applications/chat-question-and-answer-core/app/config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ class Settings(BaseSettings):
1515
EMBEDDING_MODEL_ID (str): The ID of the embedding model.
1616
RERANKER_MODEL_ID (str): The ID of the reranker model.
1717
LLM_MODEL_ID (str): The ID of the large language model.
18-
INFERENCE_DEVICE (str): The device used for inference.
18+
EMBEDDING_DEVICE (str): The device used for embedding.
19+
RERANKER_DEVICE (str): The device used for reranker.
20+
LLM_DEVICE (str): The device used for LLM inferencing.
1921
CACHE_DIR (str): The directory used for caching.
2022
HF_DATASETS_CACHE (str): The cache directory for Hugging Face datasets.
2123
MAX_TOKENS (int): The maximum number of output tokens.
@@ -33,7 +35,9 @@ class Settings(BaseSettings):
3335
EMBEDDING_MODEL_ID: str = ...
3436
RERANKER_MODEL_ID: str = ...
3537
LLM_MODEL_ID: str = ...
36-
INFERENCE_DEVICE: str = ...
38+
EMBEDDING_DEVICE: str = ...
39+
RERANKER_DEVICE: str = ...
40+
LLM_DEVICE: str = ...
3741
CACHE_DIR: str = ...
3842
HF_DATASETS_CACHE: str = ...
3943
MAX_TOKENS: int = ...

sample-applications/chat-question-and-answer-core/app/server.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pathlib import Path
55
from fastapi import FastAPI, HTTPException, File, UploadFile
66
from fastapi.middleware.cors import CORSMiddleware
7-
from fastapi.responses import StreamingResponse
7+
from fastapi.responses import StreamingResponse, JSONResponse
88
from http import HTTPStatus
99
from pydantic import BaseModel
1010
from typing import Annotated
@@ -19,6 +19,7 @@
1919
process_query,
2020
)
2121
from .document import validate_document, save_document
22+
from .utils import get_available_devices, get_device_property
2223

2324
app = FastAPI(root_path="/v1/chatqna")
2425

@@ -66,6 +67,55 @@ async def get_llm_model():
6667
return {"status": "Success", "llm_model": llm_model}
6768

6869

70+
@app.get("/devices", tags=["Device API"], summary="Get available devices list")
71+
async def get_devices():
72+
"""
73+
Retrieve a list of devices.
74+
Returns:
75+
dict: A dictionary with a key "devices" containing the list of devices.
76+
Raises:
77+
HTTPException: If an error occurs while retrieving the devices, an HTTP 500 exception is raised with the error details.
78+
"""
79+
80+
try:
81+
devices = get_available_devices()
82+
83+
return {"devices": devices}
84+
85+
except Exception as e:
86+
logger.exception("Error getting devices list.", error=e)
87+
raise HTTPException(status_code=500, detail=str(e))
88+
89+
90+
@app.get("/devices/{device}", tags=["Device API"], summary="Get device property")
91+
async def get_device_info(device: str = ""):
92+
"""
93+
Retrieve information about a specific device.
94+
Args:
95+
device (str): The name of the device to retrieve information for. Defaults to an empty string.
96+
Returns:
97+
JSONResponse: A JSON response containing the properties of the specified device.
98+
Raises:
99+
HTTPException: If the device is not found or if there is an error retrieving the device properties.
100+
"""
101+
102+
try:
103+
available_devices = get_available_devices()
104+
105+
if device not in available_devices:
106+
raise HTTPException(
107+
status_code=404, detail=f"Device {device} not found. Available devices: {available_devices}"
108+
)
109+
110+
device_props = get_device_property(device)
111+
112+
return JSONResponse(content=device_props)
113+
114+
except Exception as e:
115+
logger.exception("Error getting properties for device.", error=e)
116+
raise HTTPException(status_code=500, detail=str(e))
117+
118+
69119
@app.get(
70120
"/documents",
71121
tags=["Document Ingestion API"],

sample-applications/chat-question-and-answer-core/app/utils.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import openvino as ov
3+
import openvino.properties as props
34
from .logger import logger
45
from huggingface_hub import login, whoami, snapshot_download
56
from optimum.intel import (
@@ -104,3 +105,49 @@ def convert_model(model_id: str, cache_dir: str, model_type: str):
104105
model_id, export=True, weight_format="int8"
105106
)
106107
llm_model.save_pretrained(f"{cache_dir}/{model_id}")
108+
109+
110+
def get_available_devices():
111+
"""
112+
Retrieves a list of available devices from the OpenVINO core.
113+
Returns:
114+
list: A list of available device names.
115+
"""
116+
117+
core = ov.Core()
118+
device_list = core.available_devices
119+
120+
return device_list
121+
122+
123+
def get_device_property(device: str = ""):
124+
"""
125+
Retrieves the properties of a specified device.
126+
Args:
127+
device (str): The name of the device to query. Defaults to an empty string.
128+
Returns:
129+
dict: A dictionary containing the properties of the device. The keys are property names,
130+
and the values are the corresponding property values. Non-serializable types are
131+
converted to strings. If a property value cannot be retrieved due to a TypeError,
132+
it is set to "UNSUPPORTED TYPE".
133+
"""
134+
135+
properties_dict = {}
136+
core = ov.Core()
137+
supported_properties = core.get_property(device, "SUPPORTED_PROPERTIES")
138+
139+
for property_key in supported_properties:
140+
if property_key not in ('SUPPORTED_METRICS', 'SUPPORTED_CONFIG_KEYS', 'SUPPORTED_PROPERTIES'):
141+
try:
142+
property_val = core.get_property(device, property_key)
143+
144+
# Convert non-serializable types to strings
145+
if not isinstance(property_val, (str, int, float, bool, type(None))):
146+
property_val = str(property_val)
147+
148+
except TypeError:
149+
property_val = "UNSUPPORTED TYPE"
150+
151+
properties_dict[property_key] = property_val
152+
153+
return properties_dict

sample-applications/chat-question-and-answer-core/chart/templates/deployment.yaml

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ spec:
1212
labels:
1313
app: chatqna-core
1414
spec:
15-
securityContext:
16-
runAsUser: 0
1715
containers:
1816
- name: chatqna-core
1917
image: "{{ .Values.image.registry }}chatqna:{{ .Values.image.backendTag }}"
@@ -24,12 +22,6 @@ spec:
2422
port: {{ .Values.chatqna.readinessProbe.httpGet.port }}
2523
initialDelaySeconds: {{ .Values.chatqna.readinessProbe.initialDelaySeconds }}
2624
periodSeconds: {{ .Values.chatqna.readinessProbe.periodSeconds }}
27-
startupProbe:
28-
httpGet:
29-
path: {{ .Values.chatqna.startupProbe.httpGet.path }}
30-
port: {{ .Values.chatqna.startupProbe.httpGet.port }}
31-
initialDelaySeconds: {{ .Values.chatqna.startupProbe.initialDelaySeconds }}
32-
periodSeconds: {{ .Values.chatqna.startupProbe.periodSeconds }}
3325
env:
3426
- name: http_proxy
3527
value: "{{ .Values.global.http_proxy }}"
@@ -42,9 +34,40 @@ spec:
4234
- name: EMBEDDING_MODEL_ID
4335
value: "{{ .Values.global.EMBEDDING_MODEL_NAME }}"
4436
- name: RERANKER_MODEL_ID
45-
value: "{{ .Values.global.RERANKER_MODEL}}"
37+
value: "{{ .Values.global.RERANKER_MODEL }}"
4638
- name: LLM_MODEL_ID
4739
value: "{{ .Values.global.LLM_MODEL }}"
40+
- name: EMBEDDING_DEVICE
41+
value: {{ .Values.global.EMBEDDING_DEVICE }}
42+
- name: RERANKER_DEVICE
43+
value: {{ .Values.global.RERANKER_DEVICE }}
44+
- name: LLM_DEVICE
45+
value: {{ .Values.global.LLM_DEVICE }}
46+
- name: MAX_TOKENS
47+
value: "{{ .Values.chatqna.env.MAX_TOKENS }}"
48+
- name: ENABLE_RERANK
49+
value: "{{ .Values.chatqna.env.ENABLE_RERANK }}"
50+
- name: CACHE_DIR
51+
value: "{{ .Values.chatqna.env.CACHE_DIR }}"
52+
- name: HF_DATASETS_CACHE
53+
value: "{{ .Values.chatqna.env.HF_DATASETS_CACHE }}"
54+
- name: TMP_FILE_PATH
55+
value: "{{ .Values.chatqna.env.TMP_FILE_PATH }}"
56+
{{ if .Values.gpu.enabled }}
57+
resources:
58+
requests:
59+
{{ .Values.gpu.key}}: 1
60+
limits:
61+
{{ .Values.gpu.key}}: 1
62+
devices:
63+
- name: dri-device
64+
containerPath: /dev/dri
65+
securityContext:
66+
privileged: true
67+
runAsUser: 0
68+
runAsGroup: 0
69+
fsGroup: 0
70+
{{ end }}
4871
ports:
4972
- containerPort: 8888
5073
volumeMounts:
@@ -53,4 +76,10 @@ spec:
5376
volumes:
5477
- name: model-cache
5578
persistentVolumeClaim:
56-
claimName: egai-pvc
79+
claimName: chatqna-pvc
80+
{{- if .Values.gpu.enabled }}
81+
- name: dri-device
82+
hostPath:
83+
path: {{ .Values.gpu.devices }}
84+
type: Directory
85+
{{- end }}

sample-applications/chat-question-and-answer-core/chart/templates/pvc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: v1
22
kind: PersistentVolumeClaim
33
metadata:
4-
name: egai-pvc
4+
name: chatqna-pvc
55
{{- if .Values.global.keeppvc }}
66
annotations:
77
helm.sh/resource-policy: keep

sample-applications/chat-question-and-answer-core/chart/values.yaml

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
image:
22
registry: "intel/"
3-
backendTag: "core_1.1.2"
3+
backendTag: "core_1.2.0"
44
pullPolicy: IfNotPresent
55

66
global:
7-
http_proxy: ""
8-
https_proxy: ""
9-
no_proxy: ""
7+
http_proxy:
8+
https_proxy:
9+
no_proxy:
1010
huggingface:
11-
apiToken: ""
12-
EMBEDDING_MODEL_NAME: ""
13-
RERANKER_MODEL: ""
14-
LLM_MODEL: ""
11+
apiToken:
12+
EMBEDDING_MODEL_NAME:
13+
RERANKER_MODEL:
14+
LLM_MODEL:
1515
model_cache_path: "/tmp/model_cache"
16-
UI_NODEPORT: <ui-nodeport>
16+
#If the system has an integrated GPU, its id is always 0 (GPU.0). The GPU is an alias for GPU.0. If a system has multiple GPUs (for example, an integrated and a discrete Intel GPU) It is done by specifying GPU.1,GPU.0
17+
EMBEDDING_DEVICE: "CPU"
18+
RERANKER_DEVICE: "CPU"
19+
LLM_DEVICE: "CPU"
20+
UI_NODEPORT:
1721
pvc:
18-
size: 40Gi
19-
keeppvc: false
22+
size: 60Gi
23+
keeppvc: false # true to persist models across multiple deployments
2024
chatqna:
2125
name: chatqna-core
2226
service:
@@ -28,12 +32,18 @@ chatqna:
2832
port: 8888
2933
initialDelaySeconds: 30
3034
periodSeconds: 30
31-
startupProbe:
32-
httpGet:
33-
path: /v1/chatqna/health
34-
port: 8888
35-
initialDelaySeconds: 1000
36-
periodSeconds: 30
35+
env:
36+
MAX_TOKENS: "1024"
37+
ENABLE_RERANK: true
38+
CACHE_DIR: "/tmp/model_cache"
39+
HF_DATASETS_CACHE: "/tmp/model_cache"
40+
TMP_FILE_PATH: "/tmp/chatqna/documents"
41+
42+
gpu:
43+
enabled: false
44+
devices: /dev/dri
45+
group_add: $(stat -c "%g" /dev/dri/render*)
46+
key: #update as per the cluster node label key for GPU assigned by device Plugin
3747

3848
uiService:
3949
name: chatqna-core-ui

0 commit comments

Comments
 (0)