hteeyeoh
diff --git a/‎sample-applications/chat-question-and-answer-core/.dockerignore‎
Lines changed: 10 additions & 0 deletions b/‎sample-applications/chat-question-and-answer-core/.dockerignore‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎sample-applications/chat-question-and-answer-core/app/.env‎
Lines changed: 4 additions & 2 deletions b/‎sample-applications/chat-question-and-answer-core/app/.env‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎sample-applications/chat-question-and-answer-core/app/chain.py‎
Lines changed: 4 additions & 4 deletions b/‎sample-applications/chat-question-and-answer-core/app/chain.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sample-applications/chat-question-and-answer-core/app/config.py‎
Lines changed: 6 additions & 2 deletions b/‎sample-applications/chat-question-and-answer-core/app/config.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎sample-applications/chat-question-and-answer-core/app/server.py‎
Lines changed: 51 additions & 1 deletion b/‎sample-applications/chat-question-and-answer-core/app/server.py‎
Lines changed: 51 additions & 1 deletion
diff --git a/‎sample-applications/chat-question-and-answer-core/app/utils.py‎
Lines changed: 47 additions & 0 deletions b/‎sample-applications/chat-question-and-answer-core/app/utils.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎sample-applications/chat-question-and-answer-core/chart/templates/deployment.yaml‎
Lines changed: 39 additions & 10 deletions b/‎sample-applications/chat-question-and-answer-core/chart/templates/deployment.yaml‎
Lines changed: 39 additions & 10 deletions
diff --git a/‎sample-applications/chat-question-and-answer-core/chart/templates/pvc.yaml‎
Lines changed: 1 addition & 1 deletion b/‎sample-applications/chat-question-and-answer-core/chart/templates/pvc.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sample-applications/chat-question-and-answer-core/chart/values.yaml‎
Lines changed: 27 additions & 17 deletions b/‎sample-applications/chat-question-and-answer-core/chart/values.yaml‎
Lines changed: 27 additions & 17 deletions
@@ -0,0 +1,10 @@
+.venv/
+__pycache__/
+.vscode
+.dockerignore
+docker
+scripts
+tests
+ui
+docs
+README*
@@ -1,6 +1,8 @@
-INFERENCE_DEVICE = "cpu"
+EMBEDDING_DEVICE = "CPU"
+RERANKER_DEVICE = "CPU"
+LLM_DEVICE = "CPU"
 MAX_TOKENS = 1024
 ENABLE_RERANK = true
 CACHE_DIR = "/tmp/model_cache"
 HF_DATASETS_CACHE = "/tmp/model_cache"
-TMP_FILE_PATH = "/tmp/chatqna/documents"
+TMP_FILE_PATH = "/tmp/chatqna/documents"
@@ -51,14 +51,14 @@
 # Initialize Embedding Model
 embedding = OpenVINOBgeEmbeddings(
     model_name_or_path=f"{config.CACHE_DIR}/{config.EMBEDDING_MODEL_ID}",
-    model_kwargs={"device": config.INFERENCE_DEVICE, "compile": False},
+    model_kwargs={"device": config.EMBEDDING_DEVICE, "compile": False},
 )
 embedding.ov_model.compile()
 
 # Initialize Reranker Model
 reranker = OpenVINOReranker(
     model_name_or_path=f"{config.CACHE_DIR}/{config.RERANKER_MODEL_ID}",
-    model_kwargs={"device": config.INFERENCE_DEVICE},
+    model_kwargs={"device": config.RERANKER_DEVICE},
     top_n=2,
 )
 
@@ -68,7 +68,7 @@
     task="text-generation",
     backend="openvino",
     model_kwargs={
-        "device": config.INFERENCE_DEVICE,
+        "device": config.LLM_DEVICE,
         "ov_config": {
             "PERFORMANCE_HINT": "LATENCY",
             "NUM_STREAMS": "1",
@@ -277,4 +277,4 @@ def delete_embedding_from_vectordb(document: str = "", delete_all: bool = False)
 
     vectorstore.delete(chunk_list)
 
-    return True
+    return True
@@ -15,7 +15,9 @@ class Settings(BaseSettings):
         EMBEDDING_MODEL_ID (str): The ID of the embedding model.
         RERANKER_MODEL_ID (str): The ID of the reranker model.
         LLM_MODEL_ID (str): The ID of the large language model.
-        INFERENCE_DEVICE (str): The device used for inference.
+        EMBEDDING_DEVICE (str): The device used for embedding.
+        RERANKER_DEVICE (str): The device used for reranker.
+        LLM_DEVICE (str): The device used for LLM inferencing.
         CACHE_DIR (str): The directory used for caching.
         HF_DATASETS_CACHE (str): The cache directory for Hugging Face datasets.
         MAX_TOKENS (int): The maximum number of output tokens.
@@ -33,7 +35,9 @@ class Settings(BaseSettings):
     EMBEDDING_MODEL_ID: str = ...
     RERANKER_MODEL_ID: str = ...
     LLM_MODEL_ID: str = ...
-    INFERENCE_DEVICE: str = ...
+    EMBEDDING_DEVICE: str = ...
+    RERANKER_DEVICE: str = ...
+    LLM_DEVICE: str = ...
     CACHE_DIR: str = ...
     HF_DATASETS_CACHE: str = ...
     MAX_TOKENS: int = ...
 
@@ -4,7 +4,7 @@
 from pathlib import Path
 from fastapi import FastAPI, HTTPException, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
+from fastapi.responses import StreamingResponse, JSONResponse
 from http import HTTPStatus
 from pydantic import BaseModel
 from typing import Annotated
@@ -19,6 +19,7 @@
     process_query,
 )
 from .document import validate_document, save_document
+from .utils import get_available_devices, get_device_property
 
 app = FastAPI(root_path="/v1/chatqna")
 
@@ -66,6 +67,55 @@ async def get_llm_model():
     return {"status": "Success", "llm_model": llm_model}
 
 
+@app.get("/devices", tags=["Device API"], summary="Get available devices list")
+async def get_devices():
+    """
+    Retrieve a list of devices.
+    Returns:
+        dict: A dictionary with a key "devices" containing the list of devices.
+    Raises:
+        HTTPException: If an error occurs while retrieving the devices, an HTTP 500 exception is raised with the error details.
+    """
+
+    try:
+        devices = get_available_devices()
+
+        return {"devices": devices}
+
+    except Exception as e:
+        logger.exception("Error getting devices list.", error=e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/devices/{device}", tags=["Device API"], summary="Get device property")
+async def get_device_info(device: str = ""):
+    """
+    Retrieve information about a specific device.
+    Args:
+        device (str): The name of the device to retrieve information for. Defaults to an empty string.
+    Returns:
+        JSONResponse: A JSON response containing the properties of the specified device.
+    Raises:
+        HTTPException: If the device is not found or if there is an error retrieving the device properties.
+    """
+
+    try:
+        available_devices = get_available_devices()
+
+        if device not in available_devices:
+            raise HTTPException(
+                status_code=404, detail=f"Device {device} not found. Available devices: {available_devices}"
+            )
+
+        device_props = get_device_property(device)
+
+        return JSONResponse(content=device_props)
+
+    except Exception as e:
+        logger.exception("Error getting properties for device.", error=e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @app.get(
     "/documents",
     tags=["Document Ingestion API"],
 
@@ -1,5 +1,6 @@
 import os
 import openvino as ov
+import openvino.properties as props
 from .logger import logger
 from huggingface_hub import login, whoami, snapshot_download
 from optimum.intel import (
@@ -104,3 +105,49 @@ def convert_model(model_id: str, cache_dir: str, model_type: str):
                 model_id, export=True, weight_format="int8"
             )
             llm_model.save_pretrained(f"{cache_dir}/{model_id}")
+
+
+def get_available_devices():
+    """
+    Retrieves a list of available devices from the OpenVINO core.
+    Returns:
+        list: A list of available device names.
+    """
+
+    core = ov.Core()
+    device_list = core.available_devices
+
+    return device_list
+
+
+def get_device_property(device: str = ""):
+    """
+    Retrieves the properties of a specified device.
+    Args:
+        device (str): The name of the device to query. Defaults to an empty string.
+    Returns:
+        dict: A dictionary containing the properties of the device. The keys are property names,
+            and the values are the corresponding property values. Non-serializable types are
+            converted to strings. If a property value cannot be retrieved due to a TypeError,
+            it is set to "UNSUPPORTED TYPE".
+    """
+
+    properties_dict = {}
+    core = ov.Core()
+    supported_properties = core.get_property(device, "SUPPORTED_PROPERTIES")
+
+    for property_key in supported_properties:
+        if property_key not in ('SUPPORTED_METRICS', 'SUPPORTED_CONFIG_KEYS', 'SUPPORTED_PROPERTIES'):
+            try:
+                property_val = core.get_property(device, property_key)
+
+                # Convert non-serializable types to strings
+                if not isinstance(property_val, (str, int, float, bool, type(None))):
+                    property_val = str(property_val)
+
+            except TypeError:
+                property_val = "UNSUPPORTED TYPE"
+
+            properties_dict[property_key] = property_val
+
+    return properties_dict
@@ -12,8 +12,6 @@ spec:
       labels:
         app: chatqna-core
     spec:
-      securityContext:
-        runAsUser: 0 
       containers:
         - name: chatqna-core
           image: "{{ .Values.image.registry }}chatqna:{{ .Values.image.backendTag }}"
@@ -24,12 +22,6 @@ spec:
               port: {{ .Values.chatqna.readinessProbe.httpGet.port }}
             initialDelaySeconds: {{ .Values.chatqna.readinessProbe.initialDelaySeconds }}
             periodSeconds: {{ .Values.chatqna.readinessProbe.periodSeconds }}
-          startupProbe:
-            httpGet:
-              path: {{ .Values.chatqna.startupProbe.httpGet.path }}
-              port: {{ .Values.chatqna.startupProbe.httpGet.port }}
-            initialDelaySeconds: {{ .Values.chatqna.startupProbe.initialDelaySeconds }}
-            periodSeconds: {{ .Values.chatqna.startupProbe.periodSeconds }} 
           env:
             - name: http_proxy
               value: "{{ .Values.global.http_proxy }}"
@@ -42,9 +34,40 @@ spec:
             - name: EMBEDDING_MODEL_ID
               value: "{{ .Values.global.EMBEDDING_MODEL_NAME }}"
             - name: RERANKER_MODEL_ID
-              value: "{{ .Values.global.RERANKER_MODEL}}"
+              value: "{{ .Values.global.RERANKER_MODEL }}"
             - name: LLM_MODEL_ID
               value: "{{ .Values.global.LLM_MODEL }}"
+            - name: EMBEDDING_DEVICE
+              value: {{ .Values.global.EMBEDDING_DEVICE }}
+            - name: RERANKER_DEVICE
+              value: {{ .Values.global.RERANKER_DEVICE }}
+            - name: LLM_DEVICE
+              value: {{ .Values.global.LLM_DEVICE }}
+            - name: MAX_TOKENS
+              value: "{{ .Values.chatqna.env.MAX_TOKENS }}"
+            - name: ENABLE_RERANK
+              value: "{{ .Values.chatqna.env.ENABLE_RERANK }}"
+            - name: CACHE_DIR
+              value: "{{ .Values.chatqna.env.CACHE_DIR }}"
+            - name: HF_DATASETS_CACHE
+              value: "{{ .Values.chatqna.env.HF_DATASETS_CACHE }}"
+            - name: TMP_FILE_PATH
+              value: "{{ .Values.chatqna.env.TMP_FILE_PATH }}"
+          {{ if .Values.gpu.enabled }}
+          resources:
+            requests:
+              {{ .Values.gpu.key}}: 1
+            limits:
+              {{ .Values.gpu.key}}: 1
+          devices:
+            - name: dri-device
+              containerPath: /dev/dri
+          securityContext:
+            privileged: true
+            runAsUser: 0
+            runAsGroup: 0
+            fsGroup: 0
+          {{ end }}
           ports:
             - containerPort: 8888
           volumeMounts:
@@ -53,4 +76,10 @@ spec:
       volumes:
         - name: model-cache
           persistentVolumeClaim:
-            claimName: egai-pvc
+            claimName: chatqna-pvc
+      {{- if .Values.gpu.enabled }}
+        - name: dri-device
+          hostPath:
+            path: {{ .Values.gpu.devices }}
+            type: Directory
+      {{- end }}
@@ -1,7 +1,7 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: egai-pvc
+  name: chatqna-pvc
   {{- if .Values.global.keeppvc }}
   annotations:
     helm.sh/resource-policy: keep
 
@@ -1,22 +1,26 @@
 image:
   registry: "intel/"
-  backendTag: "core_1.1.2"
+  backendTag: "core_1.2.0"
   pullPolicy: IfNotPresent
 
 global:
-  http_proxy: ""
-  https_proxy: ""
-  no_proxy: ""
+  http_proxy:
+  https_proxy:
+  no_proxy:
   huggingface: 
-    apiToken: ""
-  EMBEDDING_MODEL_NAME: ""
-  RERANKER_MODEL: ""
-  LLM_MODEL: ""
+    apiToken:
+  EMBEDDING_MODEL_NAME:
+  RERANKER_MODEL:
+  LLM_MODEL:
   model_cache_path: "/tmp/model_cache"
-  UI_NODEPORT: <ui-nodeport>
+  #If the system has an integrated GPU, its id is always 0 (GPU.0). The GPU is an alias for GPU.0. If a system has multiple GPUs (for example, an integrated and a discrete Intel GPU) It is done by specifying GPU.1,GPU.0
+  EMBEDDING_DEVICE: "CPU"
+  RERANKER_DEVICE: "CPU"
+  LLM_DEVICE: "CPU"
+  UI_NODEPORT:
   pvc:
-    size: 40Gi
-  keeppvc: false 
+    size: 60Gi
+  keeppvc: false # true  to persist models across multiple deployments
 chatqna:
   name: chatqna-core
   service:
@@ -28,12 +32,18 @@ chatqna:
       port: 8888
     initialDelaySeconds: 30
     periodSeconds: 30
-  startupProbe:
-    httpGet:
-      path: /v1/chatqna/health
-      port: 8888
-    initialDelaySeconds: 1000 
-    periodSeconds: 30
+  env: 
+    MAX_TOKENS: "1024"
+    ENABLE_RERANK: true
+    CACHE_DIR: "/tmp/model_cache"
+    HF_DATASETS_CACHE: "/tmp/model_cache"
+    TMP_FILE_PATH: "/tmp/chatqna/documents"
+    
+gpu:
+  enabled: false
+  devices: /dev/dri
+  group_add: $(stat -c "%g" /dev/dri/render*)
+  key:  #update as per the cluster node label key for GPU assigned by device Plugin
 
 uiService:
   name: chatqna-core-ui
-Original file line number
+Diff line change
@@ @@ -0,0 +1,10 @@ @@
 +.venv/
 +__pycache__/
 +.vscode
 +.dockerignore
 +docker
 +scripts
 +tests
 +ui
 +docs
 +README*