Fix out of date inference deploy configs (#432)

akashmjn · dthaler · web-flow · commit 65ff6ed02a42 · 2026-03-11T13:06:12.000-07:00
* Update configs to match what is deployed

* Increase threshold for Andrews Bay, North SJC, and Port Townsend to 70%
* Run all nodes on the f4sv2pool pool
* Updated memory config to request 1.6M, with limit 2.5M
* Updated images to 01-24-2026.FastAI.R1-12.v0

Signed-off-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;

* Update Port Townsend threshold to 70%

Per discussion with Akash

Signed-off-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;

* Copy Strategy Recreate to other configs

Since the VMs are memory constrained, stopping the old pod
before starting the new one is essential in getting the new
one to actually start.

Signed-off-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;

* Update image on Bush Point and Point Robinson

Signed-off-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;

* Add comments

Signed-off-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;

* update for nodes with v1 deployed

---------

Signed-off-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;
Co-authored-by: Dave Thaler &lt;dthaler1968@gmail.com&gt;
diff --git a/InferenceSystem/deploy/andrews-bay-configmap.yaml b/InferenceSystem/deploy/andrews-bay-configmap.yaml
@@ -5,13 +5,16 @@ metadata:
   namespace: andrews-bay
 data:
   config.yml: |
-    model_type: "FastAI"
-    model_local_threshold: 0.5
-    model_global_threshold: 3
-    model_path: "./model"
-    model_name: "model.pkl"
+    model_id: "orcasound/orcahello-srkw-detector-v1.030926"
+    model_hf_repo_id: "orcasound/orcahello-srkw-detector-v1"
+    model_config_path: "./model/config.yaml"
     hls_stream_type: "LiveHLS"
     hls_polling_interval: 60
     hls_hydrophone_id: "rpi_andrews_bay"
     upload_to_azure: True
     delete_local_wavs: True
+    model_config_overrides:
+      inference:
+        max_batch_size: 2
+      global_prediction:
+        pred_global_threshold: 0.6
diff --git a/InferenceSystem/deploy/andrews-bay.yaml b/InferenceSystem/deploy/andrews-bay.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: andrews-bay
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,13 +19,18 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system-v1:03-09-2026.v1.0.0
         resources:
+          requests:
+            cpu: "1"
+            memory: "1600Mi"
           limits:
-            cpu: 1
-            memory: 3G
+            cpu: "1"
+            memory: "2500Mi"
         env:
           - name: AZURE_COSMOSDB_PRIMARY_KEY
             valueFrom:
diff --git a/InferenceSystem/deploy/bush-point.yaml b/InferenceSystem/deploy/bush-point.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: bush-point
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,13 +19,18 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system:01-24-2026.FastAI.R1-12.v0
         resources:
+          requests:
+            cpu: "1"
+            memory: "1600Mi"
           limits:
-            cpu: 1
-            memory: 3G
+            cpu: "1"
+            memory: "2500Mi"
         env:
           - name: AZURE_COSMOSDB_PRIMARY_KEY
             valueFrom:
diff --git a/InferenceSystem/deploy/mast-center.yaml b/InferenceSystem/deploy/mast-center.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: mast-center
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,13 +19,18 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system:01-24-2026.FastAI.R1-12.v0
         resources:
+          requests:
+            cpu: "1"
+            memory: "1600Mi"
           limits:
-            cpu: 1
-            memory: 3G
+            cpu: "1"
+            memory: "2500Mi"
         env:
           - name: AZURE_COSMOSDB_PRIMARY_KEY
             valueFrom:
diff --git a/InferenceSystem/deploy/north-sjc-configmap.yaml b/InferenceSystem/deploy/north-sjc-configmap.yaml
@@ -6,7 +6,7 @@ metadata:
 data:
   config.yml: |
     model_type: "FastAI"
-    model_local_threshold: 0.5
+    model_local_threshold: 0.7
     model_global_threshold: 3
     model_path: "./model"
     model_name: "model.pkl"
diff --git a/InferenceSystem/deploy/north-sjc.yaml b/InferenceSystem/deploy/north-sjc.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: north-sjc
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,9 +19,11 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system:01-24-2026.FastAI.R1-12.v0
         resources:
           limits:
             cpu: 1
diff --git a/InferenceSystem/deploy/orcasound-lab.yaml b/InferenceSystem/deploy/orcasound-lab.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: orcasound-lab
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,13 +19,18 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system:01-24-2026.FastAI.R1-12.v0
         resources:
+          requests:
+            cpu: "1"
+            memory: "1600Mi"
           limits:
-            cpu: 1
-            memory: 3G
+            cpu: "1"
+            memory: "2500Mi"
         env:
           - name: AZURE_COSMOSDB_PRIMARY_KEY
             valueFrom:
diff --git a/InferenceSystem/deploy/point-robinson.yaml b/InferenceSystem/deploy/point-robinson.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: point-robinson
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,9 +19,11 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system:01-24-2026.FastAI.R1-12.v0
         resources:
           limits:
             cpu: 1
diff --git a/InferenceSystem/deploy/port-townsend-configmap.yaml b/InferenceSystem/deploy/port-townsend-configmap.yaml
@@ -5,13 +5,16 @@ metadata:
   namespace: port-townsend
 data:
   config.yml: |
-    model_type: "FastAI"
-    model_local_threshold: 0.5
-    model_global_threshold: 3
-    model_path: "./model"
-    model_name: "model.pkl"
+    model_id: "orcasound/orcahello-srkw-detector-v1.030926"
+    model_hf_repo_id: "orcasound/orcahello-srkw-detector-v1"
+    model_config_path: "./model/config.yaml"
     hls_stream_type: "LiveHLS"
     hls_polling_interval: 60
     hls_hydrophone_id: "rpi_port_townsend"
     upload_to_azure: True
     delete_local_wavs: True
+    model_config_overrides:
+      inference:
+        max_batch_size: 2
+      global_prediction:
+        pred_global_threshold: 0.6
diff --git a/InferenceSystem/deploy/port-townsend.yaml b/InferenceSystem/deploy/port-townsend.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: port-townsend
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,13 +19,18 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system-v1:03-09-2026.v1.0.0
         resources:
+          requests:
+            cpu: "1"
+            memory: "1600Mi"
           limits:
-            cpu: 1
-            memory: 3G
+            cpu: "1"
+            memory: "2500Mi"
         env:
           - name: AZURE_COSMOSDB_PRIMARY_KEY
             valueFrom:
diff --git a/InferenceSystem/deploy/sunset-bay.yaml b/InferenceSystem/deploy/sunset-bay.yaml
@@ -5,6 +5,12 @@ metadata:
   namespace: sunset-bay
 spec:
   replicas: 1
+  # NOTE: This deployment intentionally uses a single replica with a Recreate
+  # strategy due to memory constraints and so that only one inference pod processes
+  # hydrophone audio at a time.  This avoids concurrent inference instances for
+  # this site, and the temporary detection gap during upgrades is acceptable.
+  strategy:
+    type: Recreate
   selector:
     matchLabels:
       app: inference-system
@@ -13,13 +19,18 @@ spec:
       labels:
         app: inference-system
     spec:
+      nodeSelector:
+        kubernetes.azure.com/agentpool: f4sv2pool
       containers:
       - name: inference-system
-        image: orcaconservancycr.azurecr.io/live-inference-system:11-19-2025.FastAI.R1-12.v0
+        image: orcaconservancycr.azurecr.io/live-inference-system:01-24-2026.FastAI.R1-12.v0
         resources:
+          requests:
+            cpu: "1"
+            memory: "1600Mi"
           limits:
-            cpu: 1
-            memory: 3G
+            cpu: "1"
+            memory: "2500Mi"
         env:
           - name: AZURE_COSMOSDB_PRIMARY_KEY
             valueFrom: