Open
Description
Hi,
See here, I cannot seem to run with intel gpu.
Here's my values:
replicaCount: 1
deployment:
image:
repository: quay.io/go-skynet/local-ai
tag: latest-aio-gpu-intel-f32 # Use the appropriate tag. E.g. for cpu only: "latest-cpu"
env:
threads: 4
context_size: 512
DEBUG: true
# # Inject Secrets into Environment:
# secretEnv:
# - name: HF_TOKEN
# valueFrom:
# secretKeyRef:
# name: some-secret
# key: hf-token
modelsPath: "/models"
prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
pullPolicy: IfNotPresent
imagePullSecrets: []
# - name: secret-names
## Needed for GPU Nodes
#runtimeClassName: gpu
resources:
limits:
gpu.intel.com/i915: "1"
# {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# Model config to include
modelsConfigs:
{}
# phi-2: |
# name: phi-2
# context_size: 2048
# f16: true
# mmap: true
# trimsuffix:
# - "\n"
# parameters:
# model: phi-2.Q8_0.gguf
# temperature: 0.2
# top_k: 40
# top_p: 0.95
# seed: -1
# template:
# chat: &template |-
# Instruct: {{.Input}}
# Output:
# completion: *template
# Prompt templates to include
# Note: the keys of this map will be the names of the prompt template files
promptTemplates:
{}
# ggml-gpt4all-j.tmpl: |
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
# ### Prompt:
# {{.Input}}
# ### Response:
initContainers: []
# Example:
# - name: my-init-container
# image: my-init-image
# imagePullPolicy: IfNotPresent
# command: ["/bin/sh", "-c", "echo init"]
# volumeMounts:
# - name: my-volume
# mountPath: /path/to/mount
sidecarContainers: []
# Example:
# - name: my-sidecar-container
# image: my-sidecar-image
# imagePullPolicy: IfNotPresent
# ports:
# - containerPort: 1234
# Persistent storage for models and prompt templates.
# PVC and HostPath are mutually exclusive. If both are enabled,
# PVC configuration takes precedence. If neither are enabled, ephemeral
# storage is used.
persistence:
models:
enabled: true
annotations: {}
storageClass: "" # Use default storage class
accessModes:
- ReadWriteMany
size: 10Gi
globalMount: /build/models
output:
enabled: true
annotations: {}
storageClass: "" # Use default storage class
accessModes:
- ReadWriteMany
size: 5Gi
globalMount: /tmp/generated
service:
type: ClusterIP
# If deferring to an internal only load balancer
# externalTrafficPolicy: Local
port: 80
nodePort:
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
ingress:
enabled: false
className: ""
annotations:
{}
# nginx.ingress.kubernetes.io/proxy-body-size: "25m" # This value determines the maxmimum uploadable file size
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
nodeSelector: {}
tolerations: []
affinity: {}
Metadata
Metadata
Assignees
Labels
No labels