TimeSeries, Multimodal: merge release branch (#1164)

sathyendranv · vkb1 · pooja-intel · web-flow · commit 67a41faaf94c · 2025-11-24T19:43:22.000+05:30
This PR merges changes from the release branch for TimeSeries and Multimodal components, focusing on improving container security, adding source tracking for MQTT publishers, and enhancing documentation with troubleshooting guides.

- Enhanced security by adding read_only: true and no-new-privileges options to multiple Docker services
- Implemented source identification for multiple MQTT publisher instances using INSTANCE_ID environment variable
- Added comprehensive troubleshooting documentation for common deployment issues

Signed-off-by: B, Vinod K &lt;vinod.k.b@intel.com&gt;
Signed-off-by: Vellaisamy, Sathyendran &lt;sathyendran.vellaisamy@intel.com&gt;
Signed-off-by: Pooja Kumbharkar &lt;pooja.kumbharkar@intel.com&gt;
Co-authored-by: Vinod Kumar B &lt;vinod.k.b@intel.com&gt;
Co-authored-by: Pooja Kumbharkar &lt;pooja.kumbharkar@intel.com&gt;
diff --git a/manufacturing-ai-suite/industrial-edge-insights-multimodal/configs/time-series-analytics-microservice/udfs/weld_anomaly_detector.py b/manufacturing-ai-suite/industrial-edge-insights-multimodal/configs/time-series-analytics-microservice/udfs/weld_anomaly_detector.py
@@ -29,6 +29,9 @@
 total_no_pts = int(os.getenv('BENCHMARK_TOTAL_PTS', "0"))
 logging_level = getattr(logging, log_level, logging.INFO)
 
+# Primary weld current threshold
+WELD_CURRENT_THRESHOLD = 50
+
 # Configure logging
 logging.basicConfig(
     level=logging_level,  # Set the log level to DEBUG
@@ -104,39 +107,38 @@ def begin_batch(self, begin_req):
     def point(self, point):
         """ A point has arrived.
         """
-        server = None
+        stream_src = None
         start_time = time.time_ns()
         if "source" in point.tags:
-            server = point.tags["source"]
+            stream_src = point.tags["source"]
+        elif "source" in point.fieldsString:
+            stream_src = point.fieldsString["source"]
 
         global enable_benchmarking
         if enable_benchmarking:
-            if server not in self.points_received:
-                self.points_received[server] = 0
-            if self.points_received[server] >= self.max_points:
+            if stream_src not in self.points_received:
+                self.points_received[stream_src] = 0
+            if self.points_received[stream_src] >= self.max_points:
+                logger.info(f"Benchmarking: Reached max points {self.max_points} for source {stream_src}. Skipping further processing.")
                 return
-            self.points_received[server] += 1
-
+            self.points_received[stream_src] += 1
         fields = {}
         for key, value in point.fieldsDouble.items():
             fields[key] = value
             
         for key, value in point.fieldsInt.items():
             fields[key] = value
-            
-        for key, value in point.fieldsString.items():
-            fields[key] = value
 
         point_series = pd.Series(fields)
-        if "Primary Weld Current" in point_series and point_series["Primary Weld Current"] > 50:
+        if "Primary Weld Current" in point_series and point_series["Primary Weld Current"] > WELD_CURRENT_THRESHOLD:
             defect_likelihood_main = self.model.predict_proba(point_series)
             bad_defect = defect_likelihood_main[0]*100
             good_defect = defect_likelihood_main[1]*100
             if bad_defect > 50:
                 point.fieldsDouble["anomaly_status"] = 1.0
             logger.info(f"Good Weld: {good_defect:.2f}%, Defective Weld: {bad_defect:.2f}%")
         else:
-            logger.info("Good Weld: N/A, Defective Weld: N/A") 
+            logger.info("Primary Weld Current below threshold (%d). Skipping anomaly detection.", WELD_CURRENT_THRESHOLD)
 
         point.fieldsDouble["Good Weld"] = round(good_defect, 2) if "good_defect" in locals() else 0.0
         point.fieldsDouble["Defective Weld"] = round(bad_defect, 2) if "bad_defect" in locals() else 0.0
@@ -147,7 +149,7 @@ def point(self, point):
         point.fieldsDouble["processing_time"] = processing_time
         point.fieldsDouble["end_end_time"] = end_end_time
 
-        logger.info("Processing point %s %s for source %s", point.time, time.time(), server)
+        logger.info("Processing point %s %s for source %s", point.time, time.time(), stream_src)
 
         response = udf_pb2.Response()
         if "anomaly_status" not in point.fieldsDouble:
diff --git a/manufacturing-ai-suite/industrial-edge-insights-multimodal/docker-compose.yml b/manufacturing-ai-suite/industrial-edge-insights-multimodal/docker-compose.yml
@@ -3,7 +3,6 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-version: '3.6'
 services:
   ia-telegraf:
     user: "${TIMESERIES_UID}:${TIMESERIES_UID}"
@@ -233,7 +232,8 @@ services:
     container_name: nginx_proxy
     read_only: true
     user: "${TIMESERIES_UID}:${TIMESERIES_UID}"
-    # entrypoint: ["sleep", "infinity"]
+    security_opt:
+    - no-new-privileges
     command: >
      /bin/sh -c "/usr/local/bin/nginx-cert-gen.sh && exec nginx -g 'daemon off;'"
     environment:
@@ -268,7 +268,10 @@ services:
         TIMESERIES_UID: ${TIMESERIES_UID}
     container_name: ia-fusion-analytics
     image: ${DOCKER_REGISTRY}${FUSION_MODULE_IMAGE}${IMAGE_SUFFIX:+-${IMAGE_SUFFIX}}
+    read_only: true
     restart: unless-stopped
+    security_opt:
+    - no-new-privileges
     environment:
       # MQTT Configuration
       MQTT_BROKER: ia-mqtt-broker
@@ -298,6 +301,7 @@ services:
     image: ${DLSTREAMER_PIPELINE_SERVER_IMAGE}
     container_name: dlstreamer-pipeline-server
     hostname: dlstreamer-pipeline-server
+    read_only: true
     networks:
     - timeseries_network
     restart: unless-stopped
@@ -379,6 +383,10 @@ services:
     image: bluenviron/mediamtx:1.11.3
     container_name: mediamtx
     restart: unless-stopped
+    read_only: true
+    security_opt:
+    - no-new-privileges
+    user: "${TIMESERIES_UID}:${TIMESERIES_UID}"
     ports:
       - ${WHIP_SERVER_PORT}:8889   # WebRTC
       - 9554:8554   # RTSP
@@ -407,6 +415,9 @@ services:
   coturn:
     image: coturn/coturn:4.7.0
     container_name: coturn
+    read_only: true
+    security_opt:
+    - no-new-privileges
     ports:
       - "${COTURN_UDP_PORT}:3478"
       - "${COTURN_UDP_PORT}:3478/udp"
@@ -450,4 +461,4 @@ volumes:
     driver: local
     driver_opts:
       type: tmpfs
-      device: tmpfs
+      device: tmpfs
diff --git a/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/get-started.md b/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/get-started.md
@@ -76,8 +76,8 @@ cd edge-ai-suites/manufacturing-ai-suite/industrial-edge-insights-multimodal
    > - The sample app is deployed by pulling the pre-built container images of the sample app
    >   from the docker hub OR from the internal container registry (login to the docker registry from cli and configure `DOCKER_REGISTRY`
    >   env variable in `.env` file at `edge-ai-suites/manufacturing-ai-suite/industrial-edge-insights-multimodal`)
-   > - The `CONTINUOUS_SIMULATOR_INGESTION` variable in the `.env` file (for Docker Compose) and in `helm/values.yaml` (for Helm deployments)
-   >   is set to `true` by default, enabling continuous looping of simulator data. To ingest the simulator data only once (without looping),
+   > - The `CONTINUOUS_SIMULATOR_INGESTION` variable in the `.env` file (for Docker Compose) is set to `true` by default, 
+   >   enabling continuous looping of simulator data. To ingest the simulator data only once (without looping),
    >   set this variable to `false`.
    > - The update rate of the graph and table may lag by a few seconds and might not perfectly align with the video stream, since
    >   Grafana’s minimum refresh interval is 5 seconds.
@@ -104,10 +104,6 @@ cd edge-ai-suites/manufacturing-ai-suite/industrial-edge-insights-multimodal
 
 1. Get into the InfluxDB* container.
 
-   > **Note**: Use `kubectl exec -it <influxdb-pod-name> -n <namespace> -- /bin/bash` for the helm deployment
-   > where for <namespace> replace with namespace name where the application was deployed and
-   > for <influxdb-pod-name> replace with InfluxDB pod name.
-
    ``` bash
     docker exec -it ia-influxdb bash
    ```
@@ -120,7 +116,6 @@ cd edge-ai-suites/manufacturing-ai-suite/industrial-edge-insights-multimodal
 
     ``` bash
     # For below command, the INFLUXDB_USERNAME and INFLUXDB_PASSWORD needs to be fetched from `.env` file
-    # for docker compose deployment and `values.yml` for helm deployment
     influx -username <username> -password <passwd>
     use datain # database access
     show measurements
@@ -133,8 +128,6 @@ cd edge-ai-suites/manufacturing-ai-suite/industrial-edge-insights-multimodal
 
     - Use link `http://<host_ip>:3000` to launch Grafana from browser (preferably, chrome browser)
 
-      > **Note**: Use link `http://<host_ip>:30001` to launch Grafana from browser (preferably, chrome browser) for the helm deployment
-
     - Login to the Grafana with values set for `VISUALIZER_GRAFANA_USER` and `VISUALIZER_GRAFANA_PASSWORD`
       in `.env` file and select **Multimodal Weld Defect Detection Dashboard**.
 
diff --git a/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/index.md b/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/index.md
@@ -42,5 +42,6 @@ system-requirements
 weld-defect-detection/index
 how-to-guides/index
 release_notes/Overview.md
+troubleshoot-guide
 :::
 hide_directive-->
diff --git a/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/release_notes/Overview.md b/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/release_notes/Overview.md
@@ -2,8 +2,10 @@
 
 - [December 2025](./dec-2025.md)
 
+<!--hide_directive
 ```{toctree}
 :maxdepth: 5
 :hidden:
 dec-2025.md
 ```
+hide_directive-->
diff --git a/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/troubleshoot-guide.md b/manufacturing-ai-suite/industrial-edge-insights-multimodal/docs/user-guide/troubleshoot-guide.md
@@ -0,0 +1,99 @@
+# Troubleshoot Guide
+
+## 1. Seeing "No Data" in Grafana
+
+### Issue
+
+Grafana panels show **"No Data"** even though the container/stack is
+running.
+
+### Reason
+
+The **system date/time is incorrect** on the device. If the system time
+is wrong, data timestamps fall outside Grafana's query window.
+
+### Solution
+
+Check the date/time using the command below:
+
+``` sh
+$ date
+```
+
+Set the correct date/time manually:
+
+``` sh
+$ sudo date -s 'YYYY-MM-DD HH:MM:SS'   # Replace with your actual date and time
+```
+
+Set date/time from the internet:
+
+``` sh
+$ sudo date -s "$(wget --method=HEAD -qSO- --max-redirect=0 google.com 2>&1 | sed -n 's/^ *Date: *//p')"
+```
+
+---
+
+## 2. Influx -- Data Being Deleted Beyond Retention Policy (RP)
+
+### Issue
+
+-   Data appears to be deleted beyond the configured retention policy
+    (RP).
+-   InfluxDB 1.x deletes old data based on the retention policy duration
+    and shard group duration.
+
+### Reason
+
+-   Data is grouped into **shards**.
+-   Shards are deleted only when **all data inside them** is older than
+    the RP.
+-   For RPs **≤ 2 days**, shard group duration = **1 hour**.
+-   InfluxDB always expires data at **RP + shard duration**.
+
+Example:
+
+For a **1-hour RP**: - Data written at **00:00** goes into the shard
+covering **00:00--01:00**. - The shard closes at **01:00**. - InfluxDB
+deletes the shard only when everything inside it is past the RP → at
+**02:00**.
+
+So the effective expiration time is **1 hour RP + 1 hour shard duration
+= 2 hours**.
+
+  Retention Policy   Shard Duration   Actual Expiry
+  ------------------ ---------------- -----------------
+  1 hour             1 hour           2 hours
+  2 days             1 hour           2 days + 1 hr
+  30 days            24 hours         30 days + 24 hr
+
+### Solution
+
+-   Understand that this is **normal and expected behavior** in InfluxDB
+    1.x.
+-   A 1-hour RP will **always** result in \~2 hours before deletion.
+-   No configuration can force deletion exactly at the RP limit.
+
+---
+
+## 3. Time Series Analytics Microservice (Docker) -- Takes Time to Start or Shows Python Packages Installing
+
+### Issue
+
+The Time Series Analytics Microservice takes time to start or displays
+messages about Python packages being installed.
+
+### Reason
+
+UDF packages require several dependent packages to be installed during
+runtime, as specified under `udfs/requirements.txt`. Once these
+dependencies are installed, the **Time Series Analytics** microservice
+initializes and starts inferencing.
+
+### Solution
+
+No action required --- wait for the **time-series-analytics**
+microservice to complete downloading the dependent packages and
+initialize Kapacitor to start inference.
+
+---
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/Makefile b/manufacturing-ai-suite/industrial-edge-insights-time-series/Makefile
@@ -133,7 +133,11 @@ up_mqtt_ingestion: check_env_variables down
 	if [ $(INCLUDE) = 'validation' ]; then \
 		$(DOCKER_COMPOSE) -f $(DOCKER_COMPOSE_FILE) -f $(DOCKER_COMPOSE_VALIDATION_FILE) up --scale ia-opcua-server=0 -d; \
 	else \
-		$(DOCKER_COMPOSE) up --scale ia-mqtt-publisher=$(num_of_streams) --scale ia-opcua-server=0 -d; \
+		for i in $$(seq 1 $(num_of_streams)); do \
+			echo "Starting ia-mqtt-publisher instance $$i for SAMPLE_APP: $$SAMPLE_APP";\
+			$(DOCKER_COMPOSE) run -d --name ia-mqtt-publisher-$$i -e INSTANCE_ID=$$i ia-mqtt-publisher; \
+		done; \
+		$(DOCKER_COMPOSE) up --scale ia-opcua-server=0 -d $(shell $(DOCKER_COMPOSE) config --services | grep -v ia-mqtt-publisher); \
 	fi;
 
 # Run Docker containers
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/weld-anomaly-detection/telegraf-config/Telegraf.conf b/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/weld-anomaly-detection/telegraf-config/Telegraf.conf
@@ -3946,6 +3946,7 @@
      ]
     name_override = "weld-sensor-data"
     data_format = "json"
+    json_string_fields = ["source"]
 #
 #   # if true, messages that can't be delivered while the subscriber is offline
 #   # will be delivered when it comes back (such as on service restart).
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/weld-anomaly-detection/time-series-analytics-config/udfs/weld_anomaly_detector.py b/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/weld-anomaly-detection/time-series-analytics-config/udfs/weld_anomaly_detector.py
@@ -29,6 +29,9 @@
 total_no_pts = int(os.getenv('BENCHMARK_TOTAL_PTS', "0"))
 logging_level = getattr(logging, log_level, logging.INFO)
 
+# Primary weld current threshold
+WELD_CURRENT_THRESHOLD = 50
+
 # Configure logging
 logging.basicConfig(
     level=logging_level,  # Set the log level to DEBUG
@@ -104,39 +107,38 @@ def begin_batch(self, begin_req):
     def point(self, point):
         """ A point has arrived.
         """
-        server = None
+        stream_src = None
         start_time = time.time_ns()
         if "source" in point.tags:
-            server = point.tags["source"]
+            stream_src = point.tags["source"]
+        elif "source" in point.fieldsString:
+            stream_src = point.fieldsString["source"]
 
         global enable_benchmarking
         if enable_benchmarking:
-            if server not in self.points_received:
-                self.points_received[server] = 0
-            if self.points_received[server] >= self.max_points:
+            if stream_src not in self.points_received:
+                self.points_received[stream_src] = 0
+            if self.points_received[stream_src] >= self.max_points:
+                logger.info(f"Benchmarking: Reached max points {self.max_points} for source {stream_src}. Skipping further processing.")
                 return
-            self.points_received[server] += 1
-
+            self.points_received[stream_src] += 1
         fields = {}
         for key, value in point.fieldsDouble.items():
             fields[key] = value
             
         for key, value in point.fieldsInt.items():
             fields[key] = value
-            
-        for key, value in point.fieldsString.items():
-            fields[key] = value
 
         point_series = pd.Series(fields)
-        if "Primary Weld Current" in point_series and point_series["Primary Weld Current"] > 50:
+        if "Primary Weld Current" in point_series and point_series["Primary Weld Current"] > WELD_CURRENT_THRESHOLD:
             defect_likelihood_main = self.model.predict_proba(point_series)
             bad_defect = defect_likelihood_main[0]*100
             good_defect = defect_likelihood_main[1]*100
             if bad_defect > 50:
                 point.fieldsDouble["anomaly_status"] = 1.0
             logger.info(f"Good Weld: {good_defect:.2f}%, Defective Weld: {bad_defect:.2f}%")
         else:
-            logger.info("Good Weld: N/A, Defective Weld: N/A") 
+            logger.info("Primary Weld Current below threshold (%d). Skipping anomaly detection.", WELD_CURRENT_THRESHOLD)
 
         point.fieldsDouble["Good Weld"] = round(good_defect, 2) if "good_defect" in locals() else 0.0
         point.fieldsDouble["Defective Weld"] = round(bad_defect, 2) if "bad_defect" in locals() else 0.0
@@ -147,7 +149,7 @@ def point(self, point):
         point.fieldsDouble["processing_time"] = processing_time
         point.fieldsDouble["end_end_time"] = end_end_time
 
-        logger.info("Processing point %s %s for source %s", point.time, time.time(), server)
+        logger.info("Processing point %s %s for source %s", point.time, time.time(), stream_src)
 
         response = udf_pb2.Response()
         if "anomaly_status" not in point.fieldsDouble:
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/wind-turbine-anomaly-detection/telegraf-config/Telegraf.conf b/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/wind-turbine-anomaly-detection/telegraf-config/Telegraf.conf
@@ -3946,6 +3946,8 @@
      ]
     name_override = "wind-turbine-data"
     data_format = "json"
+    json_string_fields = ["source"]
+
 #
 #   # if true, messages that can't be delivered while the subscriber is offline
 #   # will be delivered when it comes back (such as on service restart).
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/wind-turbine-anomaly-detection/time-series-analytics-config/udfs/windturbine_anomaly_detector.py b/manufacturing-ai-suite/industrial-edge-insights-time-series/apps/wind-turbine-anomaly-detection/time-series-analytics-config/udfs/windturbine_anomaly_detector.py
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/docker-compose.yml b/manufacturing-ai-suite/industrial-edge-insights-time-series/docker-compose.yml
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/docs/user-guide/index.md b/manufacturing-ai-suite/industrial-edge-insights-time-series/docs/user-guide/index.md
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/docs/user-guide/troubleshoot-guide.md b/manufacturing-ai-suite/industrial-edge-insights-time-series/docs/user-guide/troubleshoot-guide.md
diff --git a/manufacturing-ai-suite/industrial-edge-insights-time-series/simulator/mqtt-publisher/publisher.py b/manufacturing-ai-suite/industrial-edge-insights-time-series/simulator/mqtt-publisher/publisher.py

Original file line number	Diff line number	Diff line change
`@@ -3946,6 +3946,7 @@`
`3946`	`3946`	`]`
`3947`	`3947`	`name_override = "weld-sensor-data"`
`3948`	`3948`	`data_format = "json"`
	`3949`	`+ json_string_fields = ["source"]`
`3949`	`3950`	`#`
`3950`	`3951`	`# # if true, messages that can't be delivered while the subscriber is offline`
`3951`	`3952`	`# # will be delivered when it comes back (such as on service restart).`
Original file line number	Diff line number	Diff line change
`@@ -3946,6 +3946,8 @@`
`3946`	`3946`	`]`
`3947`	`3947`	`name_override = "wind-turbine-data"`
`3948`	`3948`	`data_format = "json"`
	`3949`	`+ json_string_fields = ["source"]`
	`3950`	`+`
`3949`	`3951`	`#`
`3950`	`3952`	`# # if true, messages that can't be delivered while the subscriber is offline`
`3951`	`3953`	`# # will be delivered when it comes back (such as on service restart).`