constrained-hmm: Dataset troubles...

jonnor · jonnor · commit 4f7fab547198 · 2025-06-08T23:49:20.000+02:00
diff --git a/handson/constrained-hmm/CNCMillSmartMichicanExplore.ipynb b/handson/constrained-hmm/CNCMillSmartMichicanExplore.ipynb
@@ -94,7 +94,38 @@
    "id": "1672296a-1751-4fd5-a2cb-31ba03cba26d",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "\n",
+    "# Add experiment info to sensor data, for ease of analysis\n",
+    "enrich = pandas.merge(data.reset_index(),  experiments, left_on='experiment', right_index=True).set_index(['experiment', 'time'])\n",
+    "enrich.head()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b133f8fb-7f6e-4e9a-9855-2861246d994d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "power_columns = list(enrich.columns[enrich.columns.str.contains('Power')])\n",
+    "def p99(s):\n",
+    "    return s.quantile(0.99)\n",
+    "\n",
+    "power_stats = enrich[power_columns].agg(['min', 'max', 'median', p99])\n",
+    "print(power_stats)\n",
+    "\n",
+    "for c in power_columns:\n",
+    "    s = numpy.maximum(enrich[c], 0.0)\n",
+    "    s = s / s.quantile(0.99)\n",
+    "    s = numpy.minimum(s, 1.0)\n",
+    "    enrich[c+'_Scaled'] = s\n",
+    "\n",
+    "scaled_power_columns = [ c+'_Scaled' for c in power_columns  ]\n",
+    "power_stats = enrich[scaled_power_columns].agg(['min', 'max', 'median', p99])\n",
+    "power_stats"
+   ]
   },
   {
    "cell_type": "code",
@@ -186,24 +217,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7f333d84-042a-4a68-a2f3-5cff5b05fc89",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "enrich = pandas.merge(data.reset_index(),  experiments, left_on='experiment', right_index=True).set_index(['experiment', 'time'])\n",
-    "enrich.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "35c99d2f-e049-437c-8842-6b10df5b15e2",
+   "cell_type": "markdown",
+   "id": "cdc83bdd-f65f-46cd-9ab8-a0a8cda9af72",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "plotly.express.histogram(enrich.reset_index().sort_values('feedrate'), x='S1_OutputPower', facet_col='feedrate', color='experiment')"
+    "## Spindle power vs different conditions"
    ]
   },
   {
@@ -214,10 +232,7 @@
    "outputs": [],
    "source": [
     "spindleactive = enrich[enrich['S1_OutputPower'] > 0.03]\n",
-    "import seaborn\n",
-    "\n",
-    "seaborn.displot(kind='kde', data=spindleactive, x='S1_OutputPower', hue='Machining_Process', row='feedrate', clip=(0.1, 0.250), aspect=2.0, height=4.0, common_norm=False)\n",
-    "#(spindleactive.reset_index().sort_values('Machining_Process'), x='S1_OutputPower', color='Machining_Process')\n"
+    "import seaborn"
    ]
   },
   {
@@ -257,65 +272,76 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "88c01b43-c535-41ac-a97f-a189e456fe90",
+   "id": "f9af841d-6ebb-454f-8fce-cbea5f8e2981",
    "metadata": {},
    "outputs": [],
    "source": [
-    "experiments"
+    "seaborn.displot(kind='kde', data=spindleactive, x='S1_OutputPower', hue='clamp_pressure', clip=(0.1, 0.250), aspect=2.0, height=4.0, common_norm=False)\n",
+    "#(spindleactive.reset_index().sort_values('Machining_Process'), x='S1_OutputPower', color='Machining_Process')\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "03a06816-37c6-4229-b864-46b2d65bf500",
+   "id": "206f74c7-f714-4871-bd8e-3feba4453fa8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "experiments.groupby('feedrate')['machining_finalized'].value_counts().reset_index()"
+    "seaborn.displot(kind='kde', data=spindleactive, x='S1_OutputPower', hue='Machining_Process', row='feedrate', clip=(0.1, 0.250), aspect=2.0, height=2.0, common_norm=False)\n",
+    "#(spindleactive.reset_index().sort_values('Machining_Process'), x='S1_OutputPower', color='Machining_Process')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2e86bd4-0376-46be-a971-45c11a63eea1",
+   "metadata": {},
+   "source": [
+    "## Time-series view"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "18caa430-ed15-4e3a-af56-3a19ac213afa",
+   "id": "a90745b1-f9a3-4287-8640-f782fe6c2528",
    "metadata": {},
    "outputs": [],
    "source": [
-    "experiments.groupby('clamp_pressure')['machining_finalized'].value_counts().reset_index()"
+    "# TODO: show the different labeled sections in Machining_Process column\n",
+    "# TODO: normalize powers for all axes, and plot together\n",
+    "plot_timeseries(data, y_column='Y1_OutputPower')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "05db06c0-fa0f-43cb-b3bd-76ea38629327",
+   "id": "be42389a-b4a1-4721-adad-08b701f07330",
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "seaborn.displot(kind='kde', data=spindleactive, x='S1_OutputPower', hue='clamp_pressure', clip=(0.1, 0.250), aspect=2.0, height=4.0, common_norm=False)\n",
-    "#(spindleactive.reset_index().sort_values('Machining_Process'), x='S1_OutputPower', color='Machining_Process')\n"
+    "for column in scaled_power_columns:\n",
+    "    \n",
+    "    seaborn.displot(data=enrich.reset_index(), kind='kde', x=column, hue='feedrate', height=2.0, aspect=2.0)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4e44b6c4-2617-48ff-b44b-ef23886a9411",
+   "id": "3a97c242-8b3d-4e26-bbbe-6211ff5b4975",
    "metadata": {},
    "outputs": [],
    "source": [
-    "plotly.express.histogram(spindleactive.reset_index().sort_values('feedrate'), x='S1_OutputPower', facet_col='feedrate', color='experiment')"
+    "spindle_active = enrich[enrich['S1_OutputPower'] > 0.01]\n",
+    "seaborn.pairplot(data=spindle_active.reset_index(), vars=scaled_power_columns, hue='feedrate', height=3.6, aspect=1.5, diag_kws=dict(common_norm=False))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a90745b1-f9a3-4287-8640-f782fe6c2528",
+   "id": "8471ed38-36a9-4938-8326-575a0e887ca8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# TODO: show the different labeled sections in Machining_Process column\n",
-    "# TODO: normalize powers for all axes, and plot together\n",
-    "plot_timeseries(data, y_column='Y1_OutputPower')"
+    "seaborn.pairplot(data=spindle_active.reset_index(), vars=scaled_power_columns, hue='machining_finalized', height=3.6, aspect=1.5, diag_kws=dict(common_norm=False))"
    ]
   },
   {
@@ -327,30 +353,113 @@
    },
    "outputs": [],
    "source": [
-    "# TODO: plot time-series\n",
-    "\n",
     "\n",
-    "def plot_timeseries(data, y_column = 'S1_OutputPower', x_column = 'time'):\n",
     "\n",
+    "def plot_timeseries(data, y, time_column = 'time', row_column='experiment', row_order=None):\n",
+    "    import plotly.graph_objects as go\n",
+    "    \n",
     "    data = data.reset_index()\n",
-    "    data['time'] = data['time'] / pandas.Timedelta('1sec')\n",
+    "    # convert to seconds, Plotly default time markers are bad with Timedelta\n",
+    "    data[time_column] = data[time_column] / pandas.Timedelta('1sec')\n",
+    "\n",
+    "    x_range = data[time_column].min(), data[time_column].max()\n",
+    "\n",
+    "    if row_order is None:\n",
+    "        row_order = sorted(list(data[row_column].unique()))\n",
+    "    else:\n",
+    "        row_order = list(row_order)\n",
     "    \n",
-    "    #traces = []\n",
-    "    #titles = []\n",
-    "    for experiment, d in data.groupby('experiment'):\n",
+    "    for experiment in row_order:\n",
+    "        df = data[data[row_column] == experiment]\n",
+    "        df = df.sort_values(time_column) # plotly lines connect badly without sorting by time\n",
+    "        \n",
     "        ex = experiments.loc[experiment]\n",
     "        describe = f\"Ex {experiment}: f={ex['feedrate']} c={ex['clamp_pressure']} t={ex['tool_condition']} f={ex['machining_finalized']} p={ex['passed_visual_inspection']}\"\n",
+    "\n",
+    "        fig = go.Figure()\n",
+    "        fig.update_layout(title=describe, xaxis=dict(range=x_range))\n",
+    "        for column in y:\n",
+    "            fig.add_trace(go.Scatter(x=df[time_column], y=df[column], name=column))\n",
     "        \n",
-    "        fig = plotly.express.scatter(d, x=x_column, y=y_column, title=describe)\n",
     "        fig.show()\n",
-    "        \n",
-    "        #print(ex)    \n",
-    "        #traces.append(trace)\n",
-    "        #titles.append(describe)\n",
     "\n",
-    "    \n",
+    "plot_timeseries(enrich.sort_values(['feedrate']), y=scaled_power_columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "876f7376-af98-411a-af20-966a444ca9d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exx = experiments.sort_values(['feedrate', 'clamp_pressure', 'tool_condition'])\n",
+    "exx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8665f0-0767-4e5b-8f35-b99c83eae49c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "\n",
-    "plot_timeseries(data)"
+    "plot_timeseries(data.reset_index(), y=['Y1_ActualPosition', 'X1_ActualPosition'], row_order=exx.index)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26499011-f015-47a4-ad9c-a6e23e4b0c1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_timeseries(enrich.sort_values(['feedrate']), y=['M1_CURRENT_FEEDRATE', 'S1_CurrentFeedback'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9deb06d4-5327-440b-9f5d-826ce9357a15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_timeseries(enrich.sort_values(['feedrate']), y=['M1_CURRENT_FEEDRATE', 'S1_OutputPower_Scaled'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5973658-2893-4a56-8cd1-fdd92d57ac0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p = enrich.sort_values(['feedrate'])\n",
+    "p['S1_CommandVelocity_Scaled'] = p['S1_CommandVelocity'] / 50.0\n",
+    "p['S1_Power_Calc'] = p['S1_OutputCurrent'] * p['S1_OutputVoltage']\n",
+    "#plot_timeseries(p, y=['S1_CommandVelocity_Scaled', 'S1_OutputPower_Scaled'])\n",
+    "plot_timeseries(p, y=['S1_OutputCurrent', 'S1_OutputPower'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f1d8383-3180-49e7-b2be-bfef897be023",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scaled_power_columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b756b69a-5b3f-4c5a-b040-24356cae483c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data['S1_CommandVelocity'].hist()"
    ]
   },
   {
diff --git a/handson/constrained-hmm/README.md b/handson/constrained-hmm/README.md
@@ -26,7 +26,6 @@ Some real-world examples are referenced at the bottom of this page.
 
 - Provide an example on real data.
 For example fitting a repeated sequential (cyclic) process, such as those found in automation/manufacturing.
-The CNC Mill Tool Wear looks highly relevant.
 Alternative with sound could be MMII dataset?
 
 ## Implementation
diff --git a/handson/constrained-hmm/notes.md b/handson/constrained-hmm/notes.md