Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ray-project/ray
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: windycom/ray
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: windy
Choose a head ref

Commits on Aug 13, 2021

  1. Copy the full SHA
    120ce73 View commit details

Commits on Sep 7, 2021

  1. Add metrics port

    sandratatarevicova committed Sep 7, 2021
    Copy the full SHA
    2921648 View commit details
  2. Copy the full SHA
    23c9f45 View commit details
  3. Copy the full SHA
    4f693e7 View commit details

Commits on Sep 21, 2021

  1. Copy the full SHA
    27f2b2d View commit details

Commits on Sep 23, 2021

  1. Copy the full SHA
    7798b55 View commit details
  2. Copy the full SHA
    1361bae View commit details

Commits on Sep 27, 2021

  1. Copy the full SHA
    fcbf997 View commit details

Commits on Sep 29, 2021

  1. Copy the full SHA
    4211150 View commit details

Commits on Oct 7, 2021

  1. Copy the full SHA
    56fa37d View commit details
  2. Copy the full SHA
    38beebb View commit details
  3. Copy the full SHA
    f847428 View commit details
  4. Copy the full SHA
    d52c981 View commit details

Commits on Oct 20, 2021

  1. Copy the full SHA
    b1d9c26 View commit details

Commits on Nov 1, 2021

  1. Copy the full SHA
    0a18595 View commit details

Commits on Nov 18, 2021

  1. Disable dashboard

    sandratatarevicova committed Nov 18, 2021
    Copy the full SHA
    020fc91 View commit details

Commits on Dec 6, 2021

  1. Copy the full SHA
    805dedd View commit details

Commits on Feb 21, 2022

  1. Copy the full SHA
    d857ed4 View commit details

Commits on Apr 25, 2022

  1. Copy the full SHA
    a9ba7b1 View commit details
  2. Copy the full SHA
    b19bb0a View commit details

Commits on Oct 18, 2022

  1. Disable Ray memory monitor

    See #20906 and
    #14541
    sandratatarevicova committed Oct 18, 2022
    Copy the full SHA
    88c9846 View commit details
2 changes: 1 addition & 1 deletion deploy/charts/ray/Chart.yaml
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ description: A Helm chart for deployments of Ray on Kubernetes.
type: application

# Chart version.
version: 0.1.0
version: 0.1.0-24

# Ray version.
appVersion: "latest"
24 changes: 24 additions & 0 deletions deploy/charts/ray/templates/lb_service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}{{ .Values.clusterNameSuffix }}-head-lb
spec:
loadBalancerSourceRanges:
{{- toYaml .Values.headNodeServiceAllowedRanges | nindent 2 }}
ports:
- port: 54399
protocol: TCP
targetPort: 54399
name: metrics
- port: 10001
protocol: TCP
targetPort: 10001
name: client
- port: 6379
protocol: TCP
targetPort: 6379
name: redis
selector:
ray-node-type: head
ray-cluster-name: {{ .Release.Name }}{{ .Values.clusterNameSuffix }}
type: LoadBalancer
15 changes: 15 additions & 0 deletions deploy/charts/ray/templates/operator_cluster_scoped.yaml
Original file line number Diff line number Diff line change
@@ -53,13 +53,28 @@ spec:
image: {{ .Values.operatorImage }}
command: ["ray-operator"]
env:
- name: TZ
value: UTC
- name: AUTOSCALER_MAX_NUM_FAILURES
value: "inf"
- name: RAY_CLUSTER_ADDRESS
value: ray-ray-head.{{ .Values.operatorNamespace }}.svc.cluster.local
- name: RAY_CLUSTER_PORT
value: "10001"
resources:
requests:
cpu: 1
memory: 1Gi
limits:
memory: 2Gi
cpu: 1
livenessProbe:
exec:
command:
- python
- /worker_heartbeat.py
failureThreshold: 1
initialDelaySeconds: 600
periodSeconds: 900
timeoutSeconds: 240
{{- end }}
15 changes: 15 additions & 0 deletions deploy/charts/ray/templates/operator_namespaced.yaml
Original file line number Diff line number Diff line change
@@ -50,18 +50,33 @@ spec:
image: {{ .Values.operatorImage }}
command: ["ray-operator"]
env:
- name: TZ
value: UTC
- name: RAY_OPERATOR_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: AUTOSCALER_MAX_NUM_FAILURES
value: "inf"
- name: RAY_CLUSTER_ADDRESS
value: ray-ray-head.{{ .Values.operatorNamespace }}.svc.cluster.local
- name: RAY_CLUSTER_PORT
value: "10001"
resources:
requests:
cpu: 1
memory: 1Gi
limits:
memory: 2Gi
cpu: 1
livenessProbe:
exec:
command:
- python
- /worker_heartbeat.py
failureThreshold: 1
initialDelaySeconds: 600
periodSeconds: 900
timeoutSeconds: 240
{{- end }}

52 changes: 46 additions & 6 deletions deploy/charts/ray/templates/raycluster.yaml
Original file line number Diff line number Diff line change
@@ -10,9 +10,9 @@ spec:
# E.g., if the task requires adding more nodes then autoscaler will gradually
# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
# This number should be > 0.
upscalingSpeed: 1.0
upscalingSpeed: 10.0
# If a node is idle for this many minutes, it will be removed.
idleTimeoutMinutes: 5
idleTimeoutMinutes: 1
# Specify the pod type for the ray head node (as configured below).
headPodType: {{ .Values.headPodType }}
# Specify the allowed pod types for this ray cluster and the resources they provide.
@@ -39,6 +39,11 @@ spec:
- name: dshm
emptyDir:
medium: Memory
{{- range $volume := .persistentVolumes }}
- name: {{ $volume.claimName }}
persistentVolumeClaim:
claimName: {{ $volume.claimName }}
{{- end }}
containers:
- name: ray-node
imagePullPolicy: Always
@@ -49,16 +54,43 @@ spec:
args: ['trap : TERM INT; sleep infinity & wait;']
ports:
- containerPort: 6379 # Redis port
- containerPort: 10001 # Used by Ray Client
- containerPort: 10001 # Used by Ray Client
- containerPort: 8265 # Used by Ray Dashboard
- containerPort: 8000 # Used by Ray Serve

- containerPort: 8000 # Used by Ray Serve
- containerPort: 54399 # Metrics
env:
- name: TZ
value: UTC
{{- if eq $key $.Values.headPodType }}
# Set max autoscaler number of failures to infinity to prevent
# unexpected autoscaler crashes.
- name: AUTOSCALER_MAX_NUM_FAILURES
value: inf
- name: PYTHONFAULTHANDLER
value: "true"
{{- end }}
- name: RAY_BACKEND_LOG_LEVEL
value: fatal
- name: RAY_DISABLE_MEMORY_MONITOR
value: "1"
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumeMounts:
- mountPath: /dev/shm
name: dshm
{{- range $volume := .persistentVolumes }}
{{- range $mount := $volume.mounts }}
- name: {{ $volume.claimName }}
mountPath: {{ $mount.mountPath }}
{{- if $mount.subPath }}
subPath: {{ $mount.subPath }}
{{- end }}
{{- if $mount.readOnly }}
readOnly: {{ $mount.readOnly }}
{{- end }}
{{- end }}
{{- end }}
resources:
requests:
cpu: {{ .CPU }}
@@ -76,16 +108,24 @@ spec:
{{- if .GPU }}
nvidia.com/gpu: {{ .GPU }}
{{- end }}
securityContext:
capabilities:
add:
- SYS_PTRACE
{{- if .nodeSelector }}
nodeSelector:
{{- toYaml .nodeSelector | nindent 12 }}
{{- end }}
{{- if .tolerations }}
tolerations:
{{- toYaml .tolerations | nindent 12 }}
{{- end }}
{{- end }}
# Commands to start Ray on the head node. You don't need to change this.
# Note dashboard-host is set to 0.0.0.0 so that Kubernetes can port forward.
headStartRayCommands:
- ray stop
- ulimit -n 65536; ray start --head --no-monitor --dashboard-host 0.0.0.0
- ulimit -n 65536; ray start --head --no-monitor --include-dashboard 0 --metrics-export-port=54399
# Commands to start Ray on worker nodes. You don't need to change this.
workerStartRayCommands:
- ray stop
10 changes: 10 additions & 0 deletions deploy/charts/ray/values.yaml
Original file line number Diff line number Diff line change
@@ -6,6 +6,8 @@
image: rayproject/ray:latest
# headPodType is the podType used for the Ray head node (as configured below).
headPodType: rayHeadType
# Allowed source ranges for head node LB service
headNodeServiceAllowedRanges: []
# podTypes is the list of pod configurations available for use as Ray nodes.
podTypes:
# The key for each podType is a user-defined string.
@@ -31,6 +33,10 @@ podTypes:
rayResources: {}
# Optionally, set a node selector for this podType: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector
nodeSelector: {}
# Optionally, set a tolerations for this podType: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
tolerations: {}
# PersistentVolumeClaim mounts
persistentVolumes: []
# The key for each podType is a user-defined string.
rayWorkerType:
# minWorkers is the minimum number of Ray workers of this pod type to keep running.
@@ -51,8 +57,12 @@ podTypes:
# For example, rayResources: {"CPU": 0} can be used in the head podType to prevent Ray from scheduling tasks on the head.
# See https://docs.ray.io/en/master/advanced.html#dynamic-remote-parameters for an example of usage of custom resources in a Ray task.
rayResources: {}
# Optionally, set a tolerations for this podType: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
tolerations: {}
# Optionally, set a node selector for this Pod type. See https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector
nodeSelector: {}
# PersistentVolumeClaim mounts
persistentVolumes: []


# Operator settings: