diff --git a/notebooks/test_statescript_parsing.ipynb b/notebooks/test_statescript_parsing.ipynb
new file mode 100644
index 0000000..5c9f27c
--- /dev/null
+++ b/notebooks/test_statescript_parsing.ipynb
@@ -0,0 +1,4464 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "46fbf114",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[09:42:05][WARNING] Spyglass: Failed to load SpyglassConfig. Please set up config file.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Log content yielded no raw events.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 270 | \n",
+ " 648028 UP 2 | \n",
+ " ts_str_int | \n",
+ " 648028 | \n",
+ " 648.028 | \n",
+ " UP | \n",
+ " 2 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 271 | \n",
+ " 648028 2 0 | \n",
+ " ts_int_int | \n",
+ " 648028 | \n",
+ " 648.028 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " [2] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 290 | \n",
+ " 648083 lastPort = -1 to currPort = 1 | \n",
+ " ts_str | \n",
+ " 648083 | \n",
+ " 648.083 | \n",
+ " lastPort = -1 to currPort = 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 292 | \n",
+ " 658285 DOWN 2 | \n",
+ " ts_str_int | \n",
+ " 658285 | \n",
+ " 658.285 | \n",
+ " DOWN | \n",
+ " 2 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 293 | \n",
+ " 658285 0 0 | \n",
+ " ts_int_int | \n",
+ " 658285 | \n",
+ " 658.285 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 9098 | \n",
+ " 3925934 8 0 | \n",
+ " ts_int_int | \n",
+ " 3925934 | \n",
+ " 3925.934 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 9099 | \n",
+ " 3926021 DOWN 4 | \n",
+ " ts_str_int | \n",
+ " 3926021 | \n",
+ " 3926.021 | \n",
+ " DOWN | \n",
+ " 4 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 9100 | \n",
+ " 3926021 0 0 | \n",
+ " ts_int_int | \n",
+ " 3926021 | \n",
+ " 3926.021 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 9101 | \n",
+ " 3926086 UP 4 | \n",
+ " ts_str_int | \n",
+ " 3926086 | \n",
+ " 3926.086 | \n",
+ " UP | \n",
+ " 4 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 9102 | \n",
+ " 3926086 8 0 | \n",
+ " ts_int_int | \n",
+ " 3926086 | \n",
+ " 3926.086 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6241 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp \\\n",
+ "line_num \n",
+ "270 648028 UP 2 ts_str_int 648028 \n",
+ "271 648028 2 0 ts_int_int 648028 \n",
+ "290 648083 lastPort = -1 to currPort = 1 ts_str 648083 \n",
+ "292 658285 DOWN 2 ts_str_int 658285 \n",
+ "293 658285 0 0 ts_int_int 658285 \n",
+ "... ... ... ... \n",
+ "9098 3925934 8 0 ts_int_int 3925934 \n",
+ "9099 3926021 DOWN 4 ts_str_int 3926021 \n",
+ "9100 3926021 0 0 ts_int_int 3926021 \n",
+ "9101 3926086 UP 4 ts_str_int 3926086 \n",
+ "9102 3926086 8 0 ts_int_int 3926086 \n",
+ "\n",
+ " trodes_timestamp_sec text value \\\n",
+ "line_num \n",
+ "270 648.028 UP 2 \n",
+ "271 648.028 \n",
+ "290 648.083 lastPort = -1 to currPort = 1 \n",
+ "292 658.285 DOWN 2 \n",
+ "293 658.285 \n",
+ "... ... ... ... \n",
+ "9098 3925.934 \n",
+ "9099 3926.021 DOWN 4 \n",
+ "9100 3926.021 \n",
+ "9101 3926.086 UP 4 \n",
+ "9102 3926.086 \n",
+ "\n",
+ " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "270 \n",
+ "271 2 0 \n",
+ "290 \n",
+ "292 \n",
+ "293 0 0 \n",
+ "... ... ... \n",
+ "9098 8 0 \n",
+ "9099 \n",
+ "9100 0 0 \n",
+ "9101 \n",
+ "9102 8 0 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "270 [] [] \n",
+ "271 [2] [] \n",
+ "290 [] [] \n",
+ "292 [] [] \n",
+ "293 [] [] \n",
+ "... ... ... \n",
+ "9098 [4] [] \n",
+ "9099 [] [] \n",
+ "9100 [] [] \n",
+ "9101 [] [] \n",
+ "9102 [4] [] \n",
+ "\n",
+ "[6241 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from spyglass.utils.statescript import StateScriptLogProcessor\n",
+ "\n",
+ "import pathlib\n",
+ "\n",
+ "search_dir = pathlib.Path(\"/Users/edeno/Downloads/\")\n",
+ "log_files_generator = search_dir.glob(\"*.stateScriptLog\")\n",
+ "\n",
+ "statescript_dfs = [\n",
+ " StateScriptLogProcessor.from_file(file_path).get_events_dataframe()\n",
+ " for file_path in log_files_generator\n",
+ "]\n",
+ "statescript_dfs[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "35bc8caf",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " #<Hexmaze_NoSequence.sc> | \n",
+ " comment_or_empty | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " #% author: XS | \n",
+ " comment_or_empty | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " #% date: 20231224; added a reward indicator fo... | \n",
+ " comment_or_empty | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " # | \n",
+ " comment_or_empty | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " #%initialize constant vars | \n",
+ " comment_or_empty | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 37629 | \n",
+ " ~~~ | \n",
+ " unknown | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37630 | \n",
+ " Executing trigger function 22 | \n",
+ " unknown | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37631 | \n",
+ " Executing trigger function 22 | \n",
+ " unknown | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37632 | \n",
+ " Executing trigger function 22 | \n",
+ " unknown | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37633 | \n",
+ " Executing trigger function 22 | \n",
+ " unknown | \n",
+ " <NA> | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
37634 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type \\\n",
+ "line_num \n",
+ "0 # comment_or_empty \n",
+ "1 #% author: XS comment_or_empty \n",
+ "2 #% date: 20231224; added a reward indicator fo... comment_or_empty \n",
+ "3 # comment_or_empty \n",
+ "4 #%initialize constant vars comment_or_empty \n",
+ "... ... ... \n",
+ "37629 ~~~ unknown \n",
+ "37630 Executing trigger function 22 unknown \n",
+ "37631 Executing trigger function 22 unknown \n",
+ "37632 Executing trigger function 22 unknown \n",
+ "37633 Executing trigger function 22 unknown \n",
+ "\n",
+ " trodes_timestamp trodes_timestamp_sec text value \\\n",
+ "line_num \n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... ... ... \n",
+ "37629 NaN NaN \n",
+ "37630 NaN NaN \n",
+ "37631 NaN NaN \n",
+ "37632 NaN NaN \n",
+ "37633 NaN NaN \n",
+ "\n",
+ " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "0 \n",
+ "1 \n",
+ "2 \n",
+ "3 \n",
+ "4 \n",
+ "... ... ... \n",
+ "37629 \n",
+ "37630 \n",
+ "37631 \n",
+ "37632 \n",
+ "37633 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "0 [] [] \n",
+ "1 [] [] \n",
+ "2 [] [] \n",
+ "3 [] [] \n",
+ "4 [] [] \n",
+ "... ... ... \n",
+ "37629 [] [] \n",
+ "37630 [] [] \n",
+ "37631 [] [] \n",
+ "37632 [] [] \n",
+ "37633 [] [] \n",
+ "\n",
+ "[37634 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "StateScriptLogProcessor.from_file(\n",
+ " \"/Users/edeno/Downloads/20240513_BraveLu_03_r2.stateScriptLog\"\n",
+ ").get_events_dataframe(exclude_comments_unknown=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8a3daa1c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 165 | \n",
+ " 173027 DOWN 1 | \n",
+ " ts_str_int | \n",
+ " 173027 | \n",
+ " 173.027 | \n",
+ " DOWN | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 166 | \n",
+ " 173027 0 0 | \n",
+ " ts_int_int | \n",
+ " 173027 | \n",
+ " 173.027 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 167 | \n",
+ " 173050 UP 1 | \n",
+ " ts_str_int | \n",
+ " 173050 | \n",
+ " 173.050 | \n",
+ " UP | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 168 | \n",
+ " 173050 1 0 | \n",
+ " ts_int_int | \n",
+ " 173050 | \n",
+ " 173.050 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 169 | \n",
+ " 173658 DOWN 1 | \n",
+ " ts_str_int | \n",
+ " 173658 | \n",
+ " 173.658 | \n",
+ " DOWN | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 3568 | \n",
+ " 1449843 DOWN 1 | \n",
+ " ts_str_int | \n",
+ " 1449843 | \n",
+ " 1449.843 | \n",
+ " DOWN | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 3569 | \n",
+ " 1449843 0 8 | \n",
+ " ts_int_int | \n",
+ " 1449843 | \n",
+ " 1449.843 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " [] | \n",
+ " [4] | \n",
+ "
\n",
+ " \n",
+ " | 3570 | \n",
+ " 1450010 UP 1 | \n",
+ " ts_str_int | \n",
+ " 1450010 | \n",
+ " 1450.010 | \n",
+ " UP | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 3571 | \n",
+ " 1450010 1 8 | \n",
+ " ts_int_int | \n",
+ " 1450010 | \n",
+ " 1450.010 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " [1] | \n",
+ " [4] | \n",
+ "
\n",
+ " \n",
+ " | 3572 | \n",
+ " 1450078 1 0 | \n",
+ " ts_int_int | \n",
+ " 1450078 | \n",
+ " 1450.078 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2828 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "165 173027 DOWN 1 ts_str_int 173027 173.027 \n",
+ "166 173027 0 0 ts_int_int 173027 173.027 \n",
+ "167 173050 UP 1 ts_str_int 173050 173.050 \n",
+ "168 173050 1 0 ts_int_int 173050 173.050 \n",
+ "169 173658 DOWN 1 ts_str_int 173658 173.658 \n",
+ "... ... ... ... ... \n",
+ "3568 1449843 DOWN 1 ts_str_int 1449843 1449.843 \n",
+ "3569 1449843 0 8 ts_int_int 1449843 1449.843 \n",
+ "3570 1450010 UP 1 ts_str_int 1450010 1450.010 \n",
+ "3571 1450010 1 8 ts_int_int 1450010 1450.010 \n",
+ "3572 1450078 1 0 ts_int_int 1450078 1450.078 \n",
+ "\n",
+ " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "165 DOWN 1 \n",
+ "166 0 0 \n",
+ "167 UP 1 \n",
+ "168 1 0 \n",
+ "169 DOWN 1 \n",
+ "... ... ... ... ... \n",
+ "3568 DOWN 1 \n",
+ "3569 0 8 \n",
+ "3570 UP 1 \n",
+ "3571 1 8 \n",
+ "3572 1 0 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "165 [] [] \n",
+ "166 [] [] \n",
+ "167 [] [] \n",
+ "168 [1] [] \n",
+ "169 [] [] \n",
+ "... ... ... \n",
+ "3568 [] [] \n",
+ "3569 [] [4] \n",
+ "3570 [] [] \n",
+ "3571 [1] [4] \n",
+ "3572 [1] [] \n",
+ "\n",
+ "[2828 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "06e49235",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 83 | \n",
+ " 364241 UP 9 | \n",
+ " ts_str_int | \n",
+ " 364241 | \n",
+ " 364.241 | \n",
+ " UP | \n",
+ " 9 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 84 | \n",
+ " 364241 256 256 | \n",
+ " ts_int_int | \n",
+ " 364241 | \n",
+ " 364.241 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 256 | \n",
+ " 256 | \n",
+ " [9] | \n",
+ " [9] | \n",
+ "
\n",
+ " \n",
+ " | 87 | \n",
+ " 364269 outer reward | \n",
+ " ts_str | \n",
+ " 364269 | \n",
+ " 364.269 | \n",
+ " outer reward | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 89 | \n",
+ " 364269 256 2304 | \n",
+ " ts_int_int | \n",
+ " 364269 | \n",
+ " 364.269 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 256 | \n",
+ " 2304 | \n",
+ " [9] | \n",
+ " [9, 12] | \n",
+ "
\n",
+ " \n",
+ " | 94 | \n",
+ " 364669 256 256 | \n",
+ " ts_int_int | \n",
+ " 364669 | \n",
+ " 364.669 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 256 | \n",
+ " 256 | \n",
+ " [9] | \n",
+ " [9] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 7448 | \n",
+ " 1991064 contentTrialCount = 75 | \n",
+ " ts_str_equals_int | \n",
+ " 1991064 | \n",
+ " 1991.064 | \n",
+ " 75 | \n",
+ " 75 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 7449 | \n",
+ " 1991064 contentReward = 75 | \n",
+ " ts_str_equals_int | \n",
+ " 1991064 | \n",
+ " 1991.064 | \n",
+ " 75 | \n",
+ " 75 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 7450 | \n",
+ " 1991065 contentOuterCount = 1 | \n",
+ " ts_str_equals_int | \n",
+ " 1991065 | \n",
+ " 1991.065 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 7452 | \n",
+ " 1991066 CURRENTGOAL IS 13 TASK_STATE IS 4 | \n",
+ " ts_str | \n",
+ " 1991066 | \n",
+ " 1991.066 | \n",
+ " CURRENTGOAL IS 13 TASK_STATE IS 4 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 7454 | \n",
+ " 1991331 64 0 | \n",
+ " ts_int_int | \n",
+ " 1991331 | \n",
+ " 1991.331 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5953 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type \\\n",
+ "line_num \n",
+ "83 364241 UP 9 ts_str_int \n",
+ "84 364241 256 256 ts_int_int \n",
+ "87 364269 outer reward ts_str \n",
+ "89 364269 256 2304 ts_int_int \n",
+ "94 364669 256 256 ts_int_int \n",
+ "... ... ... \n",
+ "7448 1991064 contentTrialCount = 75 ts_str_equals_int \n",
+ "7449 1991064 contentReward = 75 ts_str_equals_int \n",
+ "7450 1991065 contentOuterCount = 1 ts_str_equals_int \n",
+ "7452 1991066 CURRENTGOAL IS 13 TASK_STATE IS 4 ts_str \n",
+ "7454 1991331 64 0 ts_int_int \n",
+ "\n",
+ " trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "83 364241 364.241 \n",
+ "84 364241 364.241 \n",
+ "87 364269 364.269 \n",
+ "89 364269 364.269 \n",
+ "94 364669 364.669 \n",
+ "... ... ... \n",
+ "7448 1991064 1991.064 \n",
+ "7449 1991064 1991.064 \n",
+ "7450 1991065 1991.065 \n",
+ "7452 1991066 1991.066 \n",
+ "7454 1991331 1991.331 \n",
+ "\n",
+ " text value active_DIO_inputs_bitmask \\\n",
+ "line_num \n",
+ "83 UP 9 \n",
+ "84 256 \n",
+ "87 outer reward \n",
+ "89 256 \n",
+ "94 256 \n",
+ "... ... ... ... \n",
+ "7448 75 75 \n",
+ "7449 75 75 \n",
+ "7450 1 1 \n",
+ "7452 CURRENTGOAL IS 13 TASK_STATE IS 4 \n",
+ "7454 64 \n",
+ "\n",
+ " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "83 [] [] \n",
+ "84 256 [9] [9] \n",
+ "87 [] [] \n",
+ "89 2304 [9] [9, 12] \n",
+ "94 256 [9] [9] \n",
+ "... ... ... ... \n",
+ "7448 [] [] \n",
+ "7449 [] [] \n",
+ "7450 [] [] \n",
+ "7452 [] [] \n",
+ "7454 0 [7] [] \n",
+ "\n",
+ "[5953 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "6e663e37",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 288 | \n",
+ " 322450 UP 4 | \n",
+ " ts_str_int | \n",
+ " 322450 | \n",
+ " 322.450 | \n",
+ " UP | \n",
+ " 4 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 289 | \n",
+ " 322450 8 0 | \n",
+ " ts_int_int | \n",
+ " 322450 | \n",
+ " 322.450 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 310 | \n",
+ " 322500 8 262144 | \n",
+ " ts_int_int | \n",
+ " 322500 | \n",
+ " 322.500 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 8 | \n",
+ " 262144 | \n",
+ " [4] | \n",
+ " [19] | \n",
+ "
\n",
+ " \n",
+ " | 315 | \n",
+ " 322510 lastPort = -1 to currPort = 2 | \n",
+ " ts_str | \n",
+ " 322510 | \n",
+ " 322.510 | \n",
+ " lastPort = -1 to currPort = 2 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 318 | \n",
+ " 322634 8 262208 | \n",
+ " ts_int_int | \n",
+ " 322634 | \n",
+ " 322.634 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 8 | \n",
+ " 262208 | \n",
+ " [4] | \n",
+ " [7, 19] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 37624 | \n",
+ " 3357820 0 0 | \n",
+ " ts_int_int | \n",
+ " 3357820 | \n",
+ " 3357.820 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37625 | \n",
+ " 3357823 0 64 | \n",
+ " ts_int_int | \n",
+ " 3357823 | \n",
+ " 3357.823 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 37626 | \n",
+ " 3357825 0 0 | \n",
+ " ts_int_int | \n",
+ " 3357825 | \n",
+ " 3357.825 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37627 | \n",
+ " 3358882 RESETSTIM | \n",
+ " ts_str | \n",
+ " 3358882 | \n",
+ " 3358.882 | \n",
+ " RESETSTIM | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 37628 | \n",
+ " 3358882 ifDelay = 1 | \n",
+ " ts_str_equals_int | \n",
+ " 3358882 | \n",
+ " 3358.882 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
34144 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type \\\n",
+ "line_num \n",
+ "288 322450 UP 4 ts_str_int \n",
+ "289 322450 8 0 ts_int_int \n",
+ "310 322500 8 262144 ts_int_int \n",
+ "315 322510 lastPort = -1 to currPort = 2 ts_str \n",
+ "318 322634 8 262208 ts_int_int \n",
+ "... ... ... \n",
+ "37624 3357820 0 0 ts_int_int \n",
+ "37625 3357823 0 64 ts_int_int \n",
+ "37626 3357825 0 0 ts_int_int \n",
+ "37627 3358882 RESETSTIM ts_str \n",
+ "37628 3358882 ifDelay = 1 ts_str_equals_int \n",
+ "\n",
+ " trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "288 322450 322.450 \n",
+ "289 322450 322.450 \n",
+ "310 322500 322.500 \n",
+ "315 322510 322.510 \n",
+ "318 322634 322.634 \n",
+ "... ... ... \n",
+ "37624 3357820 3357.820 \n",
+ "37625 3357823 3357.823 \n",
+ "37626 3357825 3357.825 \n",
+ "37627 3358882 3358.882 \n",
+ "37628 3358882 3358.882 \n",
+ "\n",
+ " text value active_DIO_inputs_bitmask \\\n",
+ "line_num \n",
+ "288 UP 4 \n",
+ "289 8 \n",
+ "310 8 \n",
+ "315 lastPort = -1 to currPort = 2 \n",
+ "318 8 \n",
+ "... ... ... ... \n",
+ "37624 0 \n",
+ "37625 0 \n",
+ "37626 0 \n",
+ "37627 RESETSTIM \n",
+ "37628 1 1 \n",
+ "\n",
+ " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "288 [] [] \n",
+ "289 0 [4] [] \n",
+ "310 262144 [4] [19] \n",
+ "315 [] [] \n",
+ "318 262208 [4] [7, 19] \n",
+ "... ... ... ... \n",
+ "37624 0 [] [] \n",
+ "37625 64 [] [7] \n",
+ "37626 0 [] [] \n",
+ "37627 [] [] \n",
+ "37628 [] [] \n",
+ "\n",
+ "[34144 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "de34f501",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 287 | \n",
+ " 4236600 1 0 | \n",
+ " ts_int_int | \n",
+ " 4236600 | \n",
+ " 4236.600 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 288 | \n",
+ " 4239693 0 0 | \n",
+ " ts_int_int | \n",
+ " 4239693 | \n",
+ " 4239.693 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 289 | \n",
+ " 4242288 1 0 | \n",
+ " ts_int_int | \n",
+ " 4242288 | \n",
+ " 4242.288 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 290 | \n",
+ " 4242749 0 0 | \n",
+ " ts_int_int | \n",
+ " 4242749 | \n",
+ " 4242.749 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 291 | \n",
+ " 4243151 1 0 | \n",
+ " ts_int_int | \n",
+ " 4243151 | \n",
+ " 4243.151 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 292 | \n",
+ " 4244648 0 0 | \n",
+ " ts_int_int | \n",
+ " 4244648 | \n",
+ " 4244.648 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 293 | \n",
+ " 4313683 1 0 | \n",
+ " ts_int_int | \n",
+ " 4313683 | \n",
+ " 4313.683 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 294 | \n",
+ " 4314756 0 0 | \n",
+ " ts_int_int | \n",
+ " 4314756 | \n",
+ " 4314.756 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 295 | \n",
+ " 4660546 1 0 | \n",
+ " ts_int_int | \n",
+ " 4660546 | \n",
+ " 4660.546 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 296 | \n",
+ " 4661064 0 0 | \n",
+ " ts_int_int | \n",
+ " 4661064 | \n",
+ " 4661.064 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 297 | \n",
+ " 4661360 1 0 | \n",
+ " ts_int_int | \n",
+ " 4661360 | \n",
+ " 4661.360 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 298 | \n",
+ " 4661565 0 0 | \n",
+ " ts_int_int | \n",
+ " 4661565 | \n",
+ " 4661.565 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 299 | \n",
+ " 4666057 8 0 | \n",
+ " ts_int_int | \n",
+ " 4666057 | \n",
+ " 4666.057 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 300 | \n",
+ " 4666539 0 0 | \n",
+ " ts_int_int | \n",
+ " 4666539 | \n",
+ " 4666.539 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 301 | \n",
+ " 4667439 8 0 | \n",
+ " ts_int_int | \n",
+ " 4667439 | \n",
+ " 4667.439 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 302 | \n",
+ " 4668457 0 0 | \n",
+ " ts_int_int | \n",
+ " 4668457 | \n",
+ " 4668.457 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 303 | \n",
+ " 4669200 8 0 | \n",
+ " ts_int_int | \n",
+ " 4669200 | \n",
+ " 4669.200 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " [4] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 304 | \n",
+ " 4669481 0 0 | \n",
+ " ts_int_int | \n",
+ " 4669481 | \n",
+ " 4669.481 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 305 | \n",
+ " 4675049 2 0 | \n",
+ " ts_int_int | \n",
+ " 4675049 | \n",
+ " 4675.049 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " [2] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 306 | \n",
+ " 4675275 0 0 | \n",
+ " ts_int_int | \n",
+ " 4675275 | \n",
+ " 4675.275 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 307 | \n",
+ " 4675302 2 0 | \n",
+ " ts_int_int | \n",
+ " 4675302 | \n",
+ " 4675.302 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " [2] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 308 | \n",
+ " 4675318 0 0 | \n",
+ " ts_int_int | \n",
+ " 4675318 | \n",
+ " 4675.318 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 309 | \n",
+ " 4675615 2 0 | \n",
+ " ts_int_int | \n",
+ " 4675615 | \n",
+ " 4675.615 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " [2] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 310 | \n",
+ " 4676096 0 0 | \n",
+ " ts_int_int | \n",
+ " 4676096 | \n",
+ " 4676.096 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "287 4236600 1 0 ts_int_int 4236600 4236.600 \n",
+ "288 4239693 0 0 ts_int_int 4239693 4239.693 \n",
+ "289 4242288 1 0 ts_int_int 4242288 4242.288 \n",
+ "290 4242749 0 0 ts_int_int 4242749 4242.749 \n",
+ "291 4243151 1 0 ts_int_int 4243151 4243.151 \n",
+ "292 4244648 0 0 ts_int_int 4244648 4244.648 \n",
+ "293 4313683 1 0 ts_int_int 4313683 4313.683 \n",
+ "294 4314756 0 0 ts_int_int 4314756 4314.756 \n",
+ "295 4660546 1 0 ts_int_int 4660546 4660.546 \n",
+ "296 4661064 0 0 ts_int_int 4661064 4661.064 \n",
+ "297 4661360 1 0 ts_int_int 4661360 4661.360 \n",
+ "298 4661565 0 0 ts_int_int 4661565 4661.565 \n",
+ "299 4666057 8 0 ts_int_int 4666057 4666.057 \n",
+ "300 4666539 0 0 ts_int_int 4666539 4666.539 \n",
+ "301 4667439 8 0 ts_int_int 4667439 4667.439 \n",
+ "302 4668457 0 0 ts_int_int 4668457 4668.457 \n",
+ "303 4669200 8 0 ts_int_int 4669200 4669.200 \n",
+ "304 4669481 0 0 ts_int_int 4669481 4669.481 \n",
+ "305 4675049 2 0 ts_int_int 4675049 4675.049 \n",
+ "306 4675275 0 0 ts_int_int 4675275 4675.275 \n",
+ "307 4675302 2 0 ts_int_int 4675302 4675.302 \n",
+ "308 4675318 0 0 ts_int_int 4675318 4675.318 \n",
+ "309 4675615 2 0 ts_int_int 4675615 4675.615 \n",
+ "310 4676096 0 0 ts_int_int 4676096 4676.096 \n",
+ "\n",
+ " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "287 1 0 \n",
+ "288 0 0 \n",
+ "289 1 0 \n",
+ "290 0 0 \n",
+ "291 1 0 \n",
+ "292 0 0 \n",
+ "293 1 0 \n",
+ "294 0 0 \n",
+ "295 1 0 \n",
+ "296 0 0 \n",
+ "297 1 0 \n",
+ "298 0 0 \n",
+ "299 8 0 \n",
+ "300 0 0 \n",
+ "301 8 0 \n",
+ "302 0 0 \n",
+ "303 8 0 \n",
+ "304 0 0 \n",
+ "305 2 0 \n",
+ "306 0 0 \n",
+ "307 2 0 \n",
+ "308 0 0 \n",
+ "309 2 0 \n",
+ "310 0 0 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "287 [1] [] \n",
+ "288 [] [] \n",
+ "289 [1] [] \n",
+ "290 [] [] \n",
+ "291 [1] [] \n",
+ "292 [] [] \n",
+ "293 [1] [] \n",
+ "294 [] [] \n",
+ "295 [1] [] \n",
+ "296 [] [] \n",
+ "297 [1] [] \n",
+ "298 [] [] \n",
+ "299 [4] [] \n",
+ "300 [] [] \n",
+ "301 [4] [] \n",
+ "302 [] [] \n",
+ "303 [4] [] \n",
+ "304 [] [] \n",
+ "305 [2] [] \n",
+ "306 [] [] \n",
+ "307 [2] [] \n",
+ "308 [] [] \n",
+ "309 [2] [] \n",
+ "310 [] [] "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "27c9f114",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 80 | \n",
+ " 3853607 0 64 | \n",
+ " ts_int_int | \n",
+ " 3853607 | \n",
+ " 3853.607 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 81 | \n",
+ " 3853630 128 64 | \n",
+ " ts_int_int | \n",
+ " 3853630 | \n",
+ " 3853.630 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 128 | \n",
+ " 64 | \n",
+ " [8] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 82 | \n",
+ " 3853785 0 64 | \n",
+ " ts_int_int | \n",
+ " 3853785 | \n",
+ " 3853.785 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 83 | \n",
+ " 3853796 128 64 | \n",
+ " ts_int_int | \n",
+ " 3853796 | \n",
+ " 3853.796 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 128 | \n",
+ " 64 | \n",
+ " [8] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 84 | \n",
+ " 3854144 0 64 | \n",
+ " ts_int_int | \n",
+ " 3854144 | \n",
+ " 3854.144 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 6684 | \n",
+ " 5023394 64 0 | \n",
+ " ts_int_int | \n",
+ " 5023394 | \n",
+ " 5023.394 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6685 | \n",
+ " 5026015 0 0 | \n",
+ " ts_int_int | \n",
+ " 5026015 | \n",
+ " 5026.015 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6686 | \n",
+ " 5026079 64 0 | \n",
+ " ts_int_int | \n",
+ " 5026079 | \n",
+ " 5026.079 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6687 | \n",
+ " 5026170 0 0 | \n",
+ " ts_int_int | \n",
+ " 5026170 | \n",
+ " 5026.170 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6688 | \n",
+ " 5026201 64 0 | \n",
+ " ts_int_int | \n",
+ " 5026201 | \n",
+ " 5026.201 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6418 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "80 3853607 0 64 ts_int_int 3853607 3853.607 \n",
+ "81 3853630 128 64 ts_int_int 3853630 3853.630 \n",
+ "82 3853785 0 64 ts_int_int 3853785 3853.785 \n",
+ "83 3853796 128 64 ts_int_int 3853796 3853.796 \n",
+ "84 3854144 0 64 ts_int_int 3854144 3854.144 \n",
+ "... ... ... ... ... \n",
+ "6684 5023394 64 0 ts_int_int 5023394 5023.394 \n",
+ "6685 5026015 0 0 ts_int_int 5026015 5026.015 \n",
+ "6686 5026079 64 0 ts_int_int 5026079 5026.079 \n",
+ "6687 5026170 0 0 ts_int_int 5026170 5026.170 \n",
+ "6688 5026201 64 0 ts_int_int 5026201 5026.201 \n",
+ "\n",
+ " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "80 NaN 0 64 \n",
+ "81 NaN 128 64 \n",
+ "82 NaN 0 64 \n",
+ "83 NaN 128 64 \n",
+ "84 NaN 0 64 \n",
+ "... ... ... ... ... \n",
+ "6684 NaN 64 0 \n",
+ "6685 NaN 0 0 \n",
+ "6686 NaN 64 0 \n",
+ "6687 NaN 0 0 \n",
+ "6688 NaN 64 0 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "80 [] [7] \n",
+ "81 [8] [7] \n",
+ "82 [] [7] \n",
+ "83 [8] [7] \n",
+ "84 [] [7] \n",
+ "... ... ... \n",
+ "6684 [7] [] \n",
+ "6685 [] [] \n",
+ "6686 [7] [] \n",
+ "6687 [] [] \n",
+ "6688 [7] [] \n",
+ "\n",
+ "[6418 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "860793dd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 108023 0 131072 | \n",
+ " ts_int_int | \n",
+ " 108023 | \n",
+ " 108.023 | \n",
+ " 0 | \n",
+ " 131072 | \n",
+ " [] | \n",
+ " [18] | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 108024 0 0 | \n",
+ " ts_int_int | \n",
+ " 108024 | \n",
+ " 108.024 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 108188 0 131072 | \n",
+ " ts_int_int | \n",
+ " 108188 | \n",
+ " 108.188 | \n",
+ " 0 | \n",
+ " 131072 | \n",
+ " [] | \n",
+ " [18] | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 108189 0 0 | \n",
+ " ts_int_int | \n",
+ " 108189 | \n",
+ " 108.189 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 108353 0 131072 | \n",
+ " ts_int_int | \n",
+ " 108353 | \n",
+ " 108.353 | \n",
+ " 0 | \n",
+ " 131072 | \n",
+ " [] | \n",
+ " [18] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 4015 | \n",
+ " 1048180 0 0 | \n",
+ " ts_int_int | \n",
+ " 1048180 | \n",
+ " 1048.180 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 4016 | \n",
+ " 1048344 0 131072 | \n",
+ " ts_int_int | \n",
+ " 1048344 | \n",
+ " 1048.344 | \n",
+ " 0 | \n",
+ " 131072 | \n",
+ " [] | \n",
+ " [18] | \n",
+ "
\n",
+ " \n",
+ " | 4017 | \n",
+ " 1048345 0 0 | \n",
+ " ts_int_int | \n",
+ " 1048345 | \n",
+ " 1048.345 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 4018 | \n",
+ " 1048509 0 131072 | \n",
+ " ts_int_int | \n",
+ " 1048509 | \n",
+ " 1048.509 | \n",
+ " 0 | \n",
+ " 131072 | \n",
+ " [] | \n",
+ " [18] | \n",
+ "
\n",
+ " \n",
+ " | 4019 | \n",
+ " 1048510 0 0 | \n",
+ " ts_int_int | \n",
+ " 1048510 | \n",
+ " 1048.510 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4000 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp \\\n",
+ "line_num \n",
+ "1 108023 0 131072 ts_int_int 108023 \n",
+ "2 108024 0 0 ts_int_int 108024 \n",
+ "3 108188 0 131072 ts_int_int 108188 \n",
+ "4 108189 0 0 ts_int_int 108189 \n",
+ "5 108353 0 131072 ts_int_int 108353 \n",
+ "... ... ... ... \n",
+ "4015 1048180 0 0 ts_int_int 1048180 \n",
+ "4016 1048344 0 131072 ts_int_int 1048344 \n",
+ "4017 1048345 0 0 ts_int_int 1048345 \n",
+ "4018 1048509 0 131072 ts_int_int 1048509 \n",
+ "4019 1048510 0 0 ts_int_int 1048510 \n",
+ "\n",
+ " trodes_timestamp_sec active_DIO_inputs_bitmask \\\n",
+ "line_num \n",
+ "1 108.023 0 \n",
+ "2 108.024 0 \n",
+ "3 108.188 0 \n",
+ "4 108.189 0 \n",
+ "5 108.353 0 \n",
+ "... ... ... \n",
+ "4015 1048.180 0 \n",
+ "4016 1048.344 0 \n",
+ "4017 1048.345 0 \n",
+ "4018 1048.509 0 \n",
+ "4019 1048.510 0 \n",
+ "\n",
+ " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "1 131072 [] [18] \n",
+ "2 0 [] [] \n",
+ "3 131072 [] [18] \n",
+ "4 0 [] [] \n",
+ "5 131072 [] [18] \n",
+ "... ... ... ... \n",
+ "4015 0 [] [] \n",
+ "4016 131072 [] [18] \n",
+ "4017 0 [] [] \n",
+ "4018 131072 [] [18] \n",
+ "4019 0 [] [] \n",
+ "\n",
+ "[4000 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[6]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "a0f696c5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['first poke',\n",
+ " 'PROXON',\n",
+ " 'UP',\n",
+ " ,\n",
+ " 'DOWN',\n",
+ " 'second pokes',\n",
+ " 'PROXOFF',\n",
+ " 'UPIND_0',\n",
+ " 'pump on',\n",
+ " 'home reward',\n",
+ " '1',\n",
+ " '0',\n",
+ " '10',\n",
+ " '3',\n",
+ " 'poke during proximity',\n",
+ " 'UPIND_4',\n",
+ " '2',\n",
+ " '8',\n",
+ " '4',\n",
+ " 'UPIND_3',\n",
+ " 'LOCKOUT',\n",
+ " 'LOCKEND',\n",
+ " '6',\n",
+ " 'UPIND_1',\n",
+ " '5',\n",
+ " '12',\n",
+ " '7',\n",
+ " 'UPIND_2',\n",
+ " '9',\n",
+ " '14',\n",
+ " 'outer reward',\n",
+ " '11',\n",
+ " '13',\n",
+ " '15',\n",
+ " '16',\n",
+ " '17',\n",
+ " '18',\n",
+ " '19',\n",
+ " '20',\n",
+ " '21',\n",
+ " '22',\n",
+ " '23',\n",
+ " '24',\n",
+ " '25',\n",
+ " '26',\n",
+ " '27',\n",
+ " '28',\n",
+ " '29',\n",
+ " '30',\n",
+ " '31',\n",
+ " 'poke during lock period',\n",
+ " '33',\n",
+ " '34',\n",
+ " '35',\n",
+ " '36',\n",
+ " '37',\n",
+ " '38',\n",
+ " '39',\n",
+ " '40',\n",
+ " '41',\n",
+ " '42',\n",
+ " '43',\n",
+ " '44',\n",
+ " '46',\n",
+ " '47',\n",
+ " '48',\n",
+ " '49',\n",
+ " '50',\n",
+ " '51',\n",
+ " '52',\n",
+ " '53',\n",
+ " '54',\n",
+ " '55',\n",
+ " '56',\n",
+ " '57',\n",
+ " '58',\n",
+ " '59',\n",
+ " '60',\n",
+ " '61',\n",
+ " '32',\n",
+ " '62',\n",
+ " '63',\n",
+ " '64',\n",
+ " '65',\n",
+ " '66',\n",
+ " '67',\n",
+ " '68',\n",
+ " '69',\n",
+ " '70',\n",
+ " '71',\n",
+ " '72',\n",
+ " '73',\n",
+ " '74',\n",
+ " '75',\n",
+ " '76',\n",
+ " '77',\n",
+ " '78',\n",
+ " '79',\n",
+ " '80',\n",
+ " '81',\n",
+ " '82',\n",
+ " '83',\n",
+ " '84',\n",
+ " '85',\n",
+ " '86',\n",
+ " '87',\n",
+ " '45',\n",
+ " '88',\n",
+ " '89',\n",
+ " '90',\n",
+ " '91',\n",
+ " '92',\n",
+ " '93',\n",
+ " '94',\n",
+ " '95',\n",
+ " '96',\n",
+ " '97',\n",
+ " '98',\n",
+ " '99',\n",
+ " '100',\n",
+ " '101',\n",
+ " '102',\n",
+ " '103',\n",
+ " '104',\n",
+ " '105',\n",
+ " '106',\n",
+ " '107',\n",
+ " '108',\n",
+ " '109',\n",
+ " '110',\n",
+ " '111',\n",
+ " '112',\n",
+ " '113',\n",
+ " '114',\n",
+ " '115',\n",
+ " '116',\n",
+ " '117',\n",
+ " '118',\n",
+ " '119',\n",
+ " '120',\n",
+ " '121',\n",
+ " '122',\n",
+ " '123',\n",
+ " '124',\n",
+ " '125',\n",
+ " '126',\n",
+ " '127',\n",
+ " '128',\n",
+ " '129',\n",
+ " '130',\n",
+ " '131',\n",
+ " '132',\n",
+ " '133',\n",
+ " '134',\n",
+ " '135',\n",
+ " '136',\n",
+ " '137',\n",
+ " '138',\n",
+ " '139',\n",
+ " '140',\n",
+ " '141',\n",
+ " '142',\n",
+ " '143',\n",
+ " '144',\n",
+ " '145',\n",
+ " '146',\n",
+ " '147',\n",
+ " '148',\n",
+ " '149',\n",
+ " '150',\n",
+ " '151',\n",
+ " '152',\n",
+ " '153',\n",
+ " '154',\n",
+ " '155',\n",
+ " '156',\n",
+ " '157',\n",
+ " '158',\n",
+ " '159',\n",
+ " 'EndSession']"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[8].text.unique().tolist()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e8108415",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: []\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[7]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "5055cb37",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 487 | \n",
+ " 6935765 first poke | \n",
+ " ts_str | \n",
+ " 6935765 | \n",
+ " 6935.765 | \n",
+ " first poke | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 488 | \n",
+ " 6935766 PROXON | \n",
+ " ts_str | \n",
+ " 6935766 | \n",
+ " 6935.766 | \n",
+ " PROXON | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 489 | \n",
+ " 6935766 UP 10 | \n",
+ " ts_str_int | \n",
+ " 6935766 | \n",
+ " 6935.766 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 490 | \n",
+ " 6935765 512 512 | \n",
+ " ts_int_int | \n",
+ " 6935765 | \n",
+ " 6935.765 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 512 | \n",
+ " 512 | \n",
+ " [10] | \n",
+ " [10] | \n",
+ "
\n",
+ " \n",
+ " | 491 | \n",
+ " 6935778 DOWN 10 | \n",
+ " ts_str_int | \n",
+ " 6935778 | \n",
+ " 6935.778 | \n",
+ " DOWN | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 25837 | \n",
+ " 9104866 mostRecentOuterWell_ind = 4 | \n",
+ " ts_str_equals_int | \n",
+ " 9104866 | \n",
+ " 9104.866 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25838 | \n",
+ " 9104866 mostRecentRewardOuterWell_ind = 2 | \n",
+ " ts_str_equals_int | \n",
+ " 9104866 | \n",
+ " 9104.866 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25842 | \n",
+ " 9104890 UPIND_4 | \n",
+ " ts_str | \n",
+ " 9104890 | \n",
+ " 9104.890 | \n",
+ " UPIND_4 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25849 | \n",
+ " 9104908 0 0 | \n",
+ " ts_int_int | \n",
+ " 9104908 | \n",
+ " 9104.908 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25850 | \n",
+ " 9105380 UPIND_0 | \n",
+ " ts_str | \n",
+ " 9105380 | \n",
+ " 9105.380 | \n",
+ " UPIND_0 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
19975 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type \\\n",
+ "line_num \n",
+ "487 6935765 first poke ts_str \n",
+ "488 6935766 PROXON ts_str \n",
+ "489 6935766 UP 10 ts_str_int \n",
+ "490 6935765 512 512 ts_int_int \n",
+ "491 6935778 DOWN 10 ts_str_int \n",
+ "... ... ... \n",
+ "25837 9104866 mostRecentOuterWell_ind = 4 ts_str_equals_int \n",
+ "25838 9104866 mostRecentRewardOuterWell_ind = 2 ts_str_equals_int \n",
+ "25842 9104890 UPIND_4 ts_str \n",
+ "25849 9104908 0 0 ts_int_int \n",
+ "25850 9105380 UPIND_0 ts_str \n",
+ "\n",
+ " trodes_timestamp trodes_timestamp_sec text value \\\n",
+ "line_num \n",
+ "487 6935765 6935.765 first poke \n",
+ "488 6935766 6935.766 PROXON \n",
+ "489 6935766 6935.766 UP 10 \n",
+ "490 6935765 6935.765 \n",
+ "491 6935778 6935.778 DOWN 10 \n",
+ "... ... ... ... ... \n",
+ "25837 9104866 9104.866 4 4 \n",
+ "25838 9104866 9104.866 2 2 \n",
+ "25842 9104890 9104.890 UPIND_4 \n",
+ "25849 9104908 9104.908 \n",
+ "25850 9105380 9105.380 UPIND_0 \n",
+ "\n",
+ " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "487 \n",
+ "488 \n",
+ "489 \n",
+ "490 512 512 \n",
+ "491 \n",
+ "... ... ... \n",
+ "25837 \n",
+ "25838 \n",
+ "25842 \n",
+ "25849 0 0 \n",
+ "25850 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "487 [] [] \n",
+ "488 [] [] \n",
+ "489 [] [] \n",
+ "490 [10] [10] \n",
+ "491 [] [] \n",
+ "... ... ... \n",
+ "25837 [] [] \n",
+ "25838 [] [] \n",
+ "25842 [] [] \n",
+ "25849 [] [] \n",
+ "25850 [] [] \n",
+ "\n",
+ "[19975 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[8]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "4cb98d40",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 489 | \n",
+ " 6935766 UP 10 | \n",
+ " ts_str_int | \n",
+ " 6935766 | \n",
+ " 6935.766 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 494 | \n",
+ " 6935987 UP 10 | \n",
+ " ts_str_int | \n",
+ " 6935987 | \n",
+ " 6935.987 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 499 | \n",
+ " 6936059 UP 10 | \n",
+ " ts_str_int | \n",
+ " 6936059 | \n",
+ " 6936.059 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 541 | \n",
+ " 6937614 UP 10 | \n",
+ " ts_str_int | \n",
+ " 6937614 | \n",
+ " 6937.614 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 550 | \n",
+ " 6937781 UP 10 | \n",
+ " ts_str_int | \n",
+ " 6937781 | \n",
+ " 6937.781 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 25764 | \n",
+ " 9063659 UP 10 | \n",
+ " ts_str_int | \n",
+ " 9063659 | \n",
+ " 9063.659 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25775 | \n",
+ " 9065869 UP 10 | \n",
+ " ts_str_int | \n",
+ " 9065869 | \n",
+ " 9065.869 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25786 | \n",
+ " 9071584 UP 8 | \n",
+ " ts_str_int | \n",
+ " 9071584 | \n",
+ " 9071.584 | \n",
+ " UP | \n",
+ " 8 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25817 | \n",
+ " 9073200 UP 8 | \n",
+ " ts_str_int | \n",
+ " 9073200 | \n",
+ " 9073.200 | \n",
+ " UP | \n",
+ " 8 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 25824 | \n",
+ " 9082033 UP 10 | \n",
+ " ts_str_int | \n",
+ " 9082033 | \n",
+ " 9082.033 | \n",
+ " UP | \n",
+ " 10 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2355 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "489 6935766 UP 10 ts_str_int 6935766 6935.766 \n",
+ "494 6935987 UP 10 ts_str_int 6935987 6935.987 \n",
+ "499 6936059 UP 10 ts_str_int 6936059 6936.059 \n",
+ "541 6937614 UP 10 ts_str_int 6937614 6937.614 \n",
+ "550 6937781 UP 10 ts_str_int 6937781 6937.781 \n",
+ "... ... ... ... ... \n",
+ "25764 9063659 UP 10 ts_str_int 9063659 9063.659 \n",
+ "25775 9065869 UP 10 ts_str_int 9065869 9065.869 \n",
+ "25786 9071584 UP 8 ts_str_int 9071584 9071.584 \n",
+ "25817 9073200 UP 8 ts_str_int 9073200 9073.200 \n",
+ "25824 9082033 UP 10 ts_str_int 9082033 9082.033 \n",
+ "\n",
+ " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "489 UP 10 \n",
+ "494 UP 10 \n",
+ "499 UP 10 \n",
+ "541 UP 10 \n",
+ "550 UP 10 \n",
+ "... ... ... ... ... \n",
+ "25764 UP 10 \n",
+ "25775 UP 10 \n",
+ "25786 UP 8 \n",
+ "25817 UP 8 \n",
+ "25824 UP 10 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "489 [] [] \n",
+ "494 [] [] \n",
+ "499 [] [] \n",
+ "541 [] [] \n",
+ "550 [] [] \n",
+ "... ... ... \n",
+ "25764 [] [] \n",
+ "25775 [] [] \n",
+ "25786 [] [] \n",
+ "25817 [] [] \n",
+ "25824 [] [] \n",
+ "\n",
+ "[2355 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[8].loc[statescript_dfs[8].text == \"UP\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "3245aaaf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 47 | \n",
+ " 2300995 rewCount = 1 | \n",
+ " ts_str_equals_int | \n",
+ " 2300995 | \n",
+ " 2300.995 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 48 | \n",
+ " 2300995 1 4 | \n",
+ " ts_int_int | \n",
+ " 2300995 | \n",
+ " 2300.995 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " [1] | \n",
+ " [3] | \n",
+ "
\n",
+ " \n",
+ " | 49 | \n",
+ " 2301028 0 4 | \n",
+ " ts_int_int | \n",
+ " 2301028 | \n",
+ " 2301.028 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " [] | \n",
+ " [3] | \n",
+ "
\n",
+ " \n",
+ " | 50 | \n",
+ " 2301295 0 0 | \n",
+ " ts_int_int | \n",
+ " 2301295 | \n",
+ " 2301.295 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 51 | \n",
+ " 2303333 1 0 | \n",
+ " ts_int_int | \n",
+ " 2303333 | \n",
+ " 2303.333 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 294 | \n",
+ " 3166053 0 0 | \n",
+ " ts_int_int | \n",
+ " 3166053 | \n",
+ " 3166.053 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 295 | \n",
+ " 3168403 1 0 | \n",
+ " ts_int_int | \n",
+ " 3168403 | \n",
+ " 3168.403 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 296 | \n",
+ " 3168575 0 0 | \n",
+ " ts_int_int | \n",
+ " 3168575 | \n",
+ " 3168.575 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 297 | \n",
+ " 3168643 1 0 | \n",
+ " ts_int_int | \n",
+ " 3168643 | \n",
+ " 3168.643 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [1] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 298 | \n",
+ " 3168890 0 0 | \n",
+ " ts_int_int | \n",
+ " 3168890 | \n",
+ " 3168.890 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
252 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp \\\n",
+ "line_num \n",
+ "47 2300995 rewCount = 1 ts_str_equals_int 2300995 \n",
+ "48 2300995 1 4 ts_int_int 2300995 \n",
+ "49 2301028 0 4 ts_int_int 2301028 \n",
+ "50 2301295 0 0 ts_int_int 2301295 \n",
+ "51 2303333 1 0 ts_int_int 2303333 \n",
+ "... ... ... ... \n",
+ "294 3166053 0 0 ts_int_int 3166053 \n",
+ "295 3168403 1 0 ts_int_int 3168403 \n",
+ "296 3168575 0 0 ts_int_int 3168575 \n",
+ "297 3168643 1 0 ts_int_int 3168643 \n",
+ "298 3168890 0 0 ts_int_int 3168890 \n",
+ "\n",
+ " trodes_timestamp_sec text value active_DIO_inputs_bitmask \\\n",
+ "line_num \n",
+ "47 2300.995 1 1 \n",
+ "48 2300.995 1 \n",
+ "49 2301.028 0 \n",
+ "50 2301.295 0 \n",
+ "51 2303.333 1 \n",
+ "... ... ... ... ... \n",
+ "294 3166.053 NaN 0 \n",
+ "295 3168.403 NaN 1 \n",
+ "296 3168.575 NaN 0 \n",
+ "297 3168.643 NaN 1 \n",
+ "298 3168.890 NaN 0 \n",
+ "\n",
+ " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "47 [] [] \n",
+ "48 4 [1] [3] \n",
+ "49 4 [] [3] \n",
+ "50 0 [] [] \n",
+ "51 0 [1] [] \n",
+ "... ... ... ... \n",
+ "294 0 [] [] \n",
+ "295 0 [1] [] \n",
+ "296 0 [] [] \n",
+ "297 0 [1] [] \n",
+ "298 0 [] [] \n",
+ "\n",
+ "[252 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[9]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "688f3a3d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [raw_line, type, trodes_timestamp, trodes_timestamp_sec, text, value, active_DIO_inputs_bitmask, active_DIO_outputs_bitmask, active_DIO_inputs, active_DIO_outputs]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "statescript_dfs[9].loc[statescript_dfs[9].text == \"rewCount\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "a0dc407f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " | line_num | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 80 | \n",
+ " 3853607 0 64 | \n",
+ " ts_int_int | \n",
+ " 3853607 | \n",
+ " 3853.607 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 81 | \n",
+ " 3853630 128 64 | \n",
+ " ts_int_int | \n",
+ " 3853630 | \n",
+ " 3853.630 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 128 | \n",
+ " 64 | \n",
+ " [8] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 82 | \n",
+ " 3853785 0 64 | \n",
+ " ts_int_int | \n",
+ " 3853785 | \n",
+ " 3853.785 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 83 | \n",
+ " 3853796 128 64 | \n",
+ " ts_int_int | \n",
+ " 3853796 | \n",
+ " 3853.796 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 128 | \n",
+ " 64 | \n",
+ " [8] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 84 | \n",
+ " 3854144 0 64 | \n",
+ " ts_int_int | \n",
+ " 3854144 | \n",
+ " 3854.144 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 6684 | \n",
+ " 5023394 64 0 | \n",
+ " ts_int_int | \n",
+ " 5023394 | \n",
+ " 5023.394 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6685 | \n",
+ " 5026015 0 0 | \n",
+ " ts_int_int | \n",
+ " 5026015 | \n",
+ " 5026.015 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6686 | \n",
+ " 5026079 64 0 | \n",
+ " ts_int_int | \n",
+ " 5026079 | \n",
+ " 5026.079 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6687 | \n",
+ " 5026170 0 0 | \n",
+ " ts_int_int | \n",
+ " 5026170 | \n",
+ " 5026.170 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ " | 6688 | \n",
+ " 5026201 64 0 | \n",
+ " ts_int_int | \n",
+ " 5026201 | \n",
+ " 5026.201 | \n",
+ " NaN | \n",
+ " <NA> | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " [7] | \n",
+ " [] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6418 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " raw_line type trodes_timestamp trodes_timestamp_sec \\\n",
+ "line_num \n",
+ "80 3853607 0 64 ts_int_int 3853607 3853.607 \n",
+ "81 3853630 128 64 ts_int_int 3853630 3853.630 \n",
+ "82 3853785 0 64 ts_int_int 3853785 3853.785 \n",
+ "83 3853796 128 64 ts_int_int 3853796 3853.796 \n",
+ "84 3854144 0 64 ts_int_int 3854144 3854.144 \n",
+ "... ... ... ... ... \n",
+ "6684 5023394 64 0 ts_int_int 5023394 5023.394 \n",
+ "6685 5026015 0 0 ts_int_int 5026015 5026.015 \n",
+ "6686 5026079 64 0 ts_int_int 5026079 5026.079 \n",
+ "6687 5026170 0 0 ts_int_int 5026170 5026.170 \n",
+ "6688 5026201 64 0 ts_int_int 5026201 5026.201 \n",
+ "\n",
+ " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n",
+ "line_num \n",
+ "80 NaN 0 64 \n",
+ "81 NaN 128 64 \n",
+ "82 NaN 0 64 \n",
+ "83 NaN 128 64 \n",
+ "84 NaN 0 64 \n",
+ "... ... ... ... ... \n",
+ "6684 NaN 64 0 \n",
+ "6685 NaN 0 0 \n",
+ "6686 NaN 64 0 \n",
+ "6687 NaN 0 0 \n",
+ "6688 NaN 64 0 \n",
+ "\n",
+ " active_DIO_inputs active_DIO_outputs \n",
+ "line_num \n",
+ "80 [] [7] \n",
+ "81 [8] [7] \n",
+ "82 [] [7] \n",
+ "83 [8] [7] \n",
+ "84 [] [7] \n",
+ "... ... ... \n",
+ "6684 [7] [] \n",
+ "6685 [] [] \n",
+ "6686 [7] [] \n",
+ "6687 [] [] \n",
+ "6688 [7] [] \n",
+ "\n",
+ "[6418 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "processor = StateScriptLogProcessor.from_file(\n",
+ " \"/Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog\"\n",
+ ")\n",
+ "processor.get_events_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "7f9b4c03",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
StateScriptLogProcessor
\n",
+ "
Source: from file: /Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog
\n",
+ "
Status: Parsed (6689 raw entries)
\n",
+ "
Time Offset: Not Calculated
\n",
+ "
DataFrame: Generated
\n",
+ "
DataFrame Preview (first 5 rows):
\n",
+ " \n",
+ " \n",
+ " | raw_line | \n",
+ " type | \n",
+ " trodes_timestamp | \n",
+ " trodes_timestamp_sec | \n",
+ " text | \n",
+ " value | \n",
+ " active_DIO_inputs_bitmask | \n",
+ " active_DIO_outputs_bitmask | \n",
+ " active_DIO_inputs | \n",
+ " active_DIO_outputs | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3853607 0 64 | \n",
+ " ts_int_int | \n",
+ " 3853607 | \n",
+ " 3853.607 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 3853630 128 64 | \n",
+ " ts_int_int | \n",
+ " 3853630 | \n",
+ " 3853.630 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 128 | \n",
+ " 64 | \n",
+ " [8] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 3853785 0 64 | \n",
+ " ts_int_int | \n",
+ " 3853785 | \n",
+ " 3853.785 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 3853796 128 64 | \n",
+ " ts_int_int | \n",
+ " 3853796 | \n",
+ " 3853.796 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 128 | \n",
+ " 64 | \n",
+ " [8] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ " | 3854144 0 64 | \n",
+ " ts_int_int | \n",
+ " 3854144 | \n",
+ " 3854.144 | \n",
+ " <NA> | \n",
+ " <NA> | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " [] | \n",
+ " [7] | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "processor"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "166a1f1d",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AssertionError",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[17], line 427\u001b[0m\n\u001b[1;32m 425\u001b[0m test_parse_ts_int_int_direct()\n\u001b[1;32m 426\u001b[0m test_parse_ts_str_int_direct()\n\u001b[0;32m--> 427\u001b[0m \u001b[43mtest_parse_ts_str_equals_int_direct\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 428\u001b[0m test_parse_ts_str_direct()\n\u001b[1;32m 429\u001b[0m test_parse_statescript_line_ts_int_int()\n",
+ "Cell \u001b[0;32mIn[17], line 129\u001b[0m, in \u001b[0;36mtest_parse_ts_str_equals_int_direct\u001b[0;34m()\u001b[0m\n\u001b[1;32m 122\u001b[0m parts \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m100078\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcounter_handlePoke\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m=\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 123\u001b[0m expected \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 124\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mts_str_equals_int\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 125\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m100078\u001b[39m,\n\u001b[1;32m 126\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcounter_handlePoke\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 127\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 128\u001b[0m }\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m parse_ts_str_equals_int(parts) \u001b[38;5;241m==\u001b[39m expected\n\u001b[1;32m 131\u001b[0m parts_multi_word \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m3610855\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrewards\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m=\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m70\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 132\u001b[0m expected_multi \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 133\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mts_str_equals_int\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 134\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m3610855\u001b[39m,\n\u001b[1;32m 135\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal rewards\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 136\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m70\u001b[39m,\n\u001b[1;32m 137\u001b[0m }\n",
+ "\u001b[0;31mAssertionError\u001b[0m: "
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import pathlib\n",
+ "import tempfile\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import pytest\n",
+ "\n",
+ "from spyglass.utils.statescript import (\n",
+ " StateScriptLogProcessor,\n",
+ " _parse_int,\n",
+ " parse_statescript_line,\n",
+ " parse_ts_int_int,\n",
+ " parse_ts_str,\n",
+ " parse_ts_str_equals_int,\n",
+ " parse_ts_str_int,\n",
+ ")\n",
+ "\n",
+ "\n",
+ "@pytest.fixture(scope=\"module\")\n",
+ "def sample_log_content():\n",
+ " \"\"\"Provides sample log content.\"\"\"\n",
+ " return \"\"\"# Test log\n",
+ "76504 0 0\n",
+ "76566 center_poke\n",
+ "76566 65536 0\n",
+ "100078 counter_handlePoke = 1\n",
+ "100078 4 0\n",
+ "100559 0 0\n",
+ "Executing this line\n",
+ "115030 center_poke\n",
+ "115030 65536 0\n",
+ "\"\"\"\n",
+ "\n",
+ "\n",
+ "@pytest.fixture\n",
+ "def processor(sample_log_content):\n",
+ " \"\"\"Provides a processor instance initialized with sample content.\"\"\"\n",
+ " return StateScriptLogProcessor(sample_log_content)\n",
+ "\n",
+ "\n",
+ "@pytest.fixture(scope=\"module\")\n",
+ "def external_times():\n",
+ " \"\"\"Provides sample external times.\"\"\"\n",
+ " return np.array([1678886476.530, 1678886500.100, 1678886515.050])\n",
+ "\n",
+ "\n",
+ "@pytest.fixture\n",
+ "def temp_log_file(sample_log_content):\n",
+ " \"\"\"Creates a temporary log file and yields its path.\"\"\"\n",
+ " with tempfile.NamedTemporaryFile(\n",
+ " mode=\"w\", delete=False, suffix=\".stateScriptLog\"\n",
+ " ) as tmp_file:\n",
+ " tmp_file.write(sample_log_content)\n",
+ " tmp_file_path = tmp_file.name\n",
+ " yield tmp_file_path\n",
+ " os.remove(tmp_file_path)\n",
+ "\n",
+ "\n",
+ "# --- Tests for Level 1 Parsers ---\n",
+ "\n",
+ "\n",
+ "def test_parse_int():\n",
+ " \"\"\"Test the _parse_int helper function.\"\"\"\n",
+ " assert _parse_int(\"123\") == 123\n",
+ " assert _parse_int(\"-45\") == -45\n",
+ " assert _parse_int(\"0\") == 0\n",
+ " assert _parse_int(\"abc\") is None\n",
+ " assert _parse_int(\"12.3\") is None\n",
+ " assert _parse_int(\"\") is None\n",
+ " assert (\n",
+ " _parse_int(\" 123 \") == 123\n",
+ " ) # Should handle surrounding whitespace if not stripped before\n",
+ "\n",
+ "\n",
+ "def test_parse_ts_int_int_direct():\n",
+ " \"\"\"Test parse_ts_int_int directly.\"\"\"\n",
+ " parts = [\"8386500\", \"0\", \"0\"]\n",
+ " expected = {\n",
+ " \"type\": \"ts_int_int\",\n",
+ " \"timestamp\": 8386500,\n",
+ " \"value1\": 0,\n",
+ " \"value2\": 0,\n",
+ " }\n",
+ " assert parse_ts_int_int(parts) == expected\n",
+ "\n",
+ " parts_wrong_len = [\"123\", \"0\"]\n",
+ " assert parse_ts_int_int(parts_wrong_len) is None\n",
+ "\n",
+ " parts_not_int = [\"123\", \"abc\", \"0\"]\n",
+ " assert parse_ts_int_int(parts_not_int) is None\n",
+ "\n",
+ " parts_float = [\"123\", \"4.5\", \"0\"]\n",
+ " assert parse_ts_int_int(parts_float) is None\n",
+ "\n",
+ "\n",
+ "def test_parse_ts_str_int_direct():\n",
+ " \"\"\"Test parse_ts_str_int directly.\"\"\"\n",
+ " parts = [\"8386500\", \"DOWN\", \"3\"]\n",
+ " expected = {\n",
+ " \"type\": \"ts_str_int\",\n",
+ " \"timestamp\": 8386500,\n",
+ " \"text\": \"DOWN\",\n",
+ " \"value\": 3,\n",
+ " }\n",
+ " assert parse_ts_str_int(parts) == expected\n",
+ "\n",
+ " parts_wrong_len = [\"123\", \"UP\"]\n",
+ " assert parse_ts_str_int(parts_wrong_len) is None\n",
+ "\n",
+ " parts_str_is_int = [\"123\", \"456\", \"789\"]\n",
+ " assert (\n",
+ " parse_ts_str_int(parts_str_is_int) is None\n",
+ " ) # Should be handled by ts_int_int\n",
+ "\n",
+ " parts_val_not_int = [\"123\", \"UP\", \"abc\"]\n",
+ " assert parse_ts_str_int(parts_val_not_int) is None\n",
+ "\n",
+ "\n",
+ "def test_parse_ts_str_equals_int_direct():\n",
+ " \"\"\"Test parse_ts_str_equals_int directly.\"\"\"\n",
+ " parts = [\"100078\", \"counter_handlePoke\", \"=\", \"1\"]\n",
+ " expected = {\n",
+ " \"type\": \"ts_str_equals_int\",\n",
+ " \"timestamp\": 100078,\n",
+ " \"text\": \"counter_handlePoke\",\n",
+ " \"value\": 1,\n",
+ " }\n",
+ " assert parse_ts_str_equals_int(parts) == expected\n",
+ "\n",
+ " parts_multi_word = [\"3610855\", \"total\", \"rewards\", \"=\", \"70\"]\n",
+ " expected_multi = {\n",
+ " \"type\": \"ts_str_equals_int\",\n",
+ " \"timestamp\": 3610855,\n",
+ " \"text\": \"total rewards\",\n",
+ " \"value\": 70,\n",
+ " }\n",
+ " assert parse_ts_str_equals_int(parts_multi_word) == expected_multi\n",
+ "\n",
+ " parts_wrong_len = [\"123\", \"=\", \"1\"]\n",
+ " assert parse_ts_str_equals_int(parts_wrong_len) is None\n",
+ "\n",
+ " parts_no_equals = [\"123\", \"text\", \"1\"]\n",
+ " assert parse_ts_str_equals_int(parts_no_equals) is None\n",
+ "\n",
+ " parts_val_not_int = [\"123\", \"text\", \"=\", \"abc\"]\n",
+ " assert parse_ts_str_equals_int(parts_val_not_int) is None\n",
+ "\n",
+ "\n",
+ "def test_parse_ts_str_direct():\n",
+ " \"\"\"Test parse_ts_str directly.\"\"\"\n",
+ " parts = [\"76566\", \"center_poke\"]\n",
+ " expected = {\"type\": \"ts_str\", \"timestamp\": 76566, \"text\": \"center_poke\"}\n",
+ " assert parse_ts_str(parts) == expected\n",
+ "\n",
+ " parts_multi_word = [\n",
+ " \"1271815\",\n",
+ " \"lastPort\",\n",
+ " \"=\",\n",
+ " \"-1\",\n",
+ " \"to\",\n",
+ " \"currPort\",\n",
+ " \"=\",\n",
+ " \"2\",\n",
+ " ]\n",
+ " expected_multi = {\n",
+ " \"type\": \"ts_str\",\n",
+ " \"timestamp\": 1271815,\n",
+ " \"text\": \"lastPort = -1 to currPort = 2\",\n",
+ " }\n",
+ " assert parse_ts_str(parts_multi_word) == expected_multi\n",
+ "\n",
+ " parts_wrong_len = [\"123\"]\n",
+ " assert parse_ts_str(parts_wrong_len) is None\n",
+ "\n",
+ " parts_second_is_int = [\n",
+ " \"123\",\n",
+ " \"456\",\n",
+ " ] # Second part is int, should fail this parser\n",
+ " assert parse_ts_str(parts_second_is_int) is None\n",
+ "\n",
+ "\n",
+ "# --- Tests for parse_statescript_line (Covers integration and dispatching) ---\n",
+ "\n",
+ "\n",
+ "def test_parse_statescript_line_ts_int_int():\n",
+ " \"\"\"Test parse_statescript_line dispatching for ts_int_int.\"\"\"\n",
+ " line = \"8386500 0 0\"\n",
+ " parsed = parse_statescript_line(line)\n",
+ " assert parsed[\"type\"] == \"ts_int_int\"\n",
+ " assert parsed[\"timestamp\"] == 8386500\n",
+ " assert parsed[\"value1\"] == 0\n",
+ " assert parsed[\"value2\"] == 0\n",
+ " assert parsed[\"raw_line\"] == line\n",
+ "\n",
+ "\n",
+ "def test_parse_statescript_line_ts_str_int():\n",
+ " \"\"\"Test parse_statescript_line dispatching for ts_str_int.\"\"\"\n",
+ " line = \"8386500 DOWN 3\"\n",
+ " parsed = parse_statescript_line(line)\n",
+ " assert parsed[\"type\"] == \"ts_str_int\"\n",
+ " assert parsed[\"timestamp\"] == 8386500\n",
+ " assert parsed[\"text\"] == \"DOWN\"\n",
+ " assert parsed[\"value\"] == 3\n",
+ " assert parsed[\"raw_line\"] == line\n",
+ "\n",
+ "\n",
+ "def test_parse_statescript_line_ts_str_equals_int():\n",
+ " \"\"\"Test parse_statescript_line dispatching for ts_str_equals_int.\"\"\"\n",
+ " line = \"100078 counter_handlePoke = 1\"\n",
+ " parsed = parse_statescript_line(line)\n",
+ " assert parsed[\"type\"] == \"ts_str_equals_int\"\n",
+ " assert parsed[\"timestamp\"] == 100078\n",
+ " assert parsed[\"text\"] == \"counter_handlePoke\"\n",
+ " assert parsed[\"value\"] == 1\n",
+ " assert parsed[\"raw_line\"] == line\n",
+ "\n",
+ "\n",
+ "def test_parse_statescript_line_ts_str():\n",
+ " \"\"\"Test parse_statescript_line dispatching for ts_str.\"\"\"\n",
+ " line = \"76566 center_poke\"\n",
+ " parsed = parse_statescript_line(line)\n",
+ " assert parsed[\"type\"] == \"ts_str\"\n",
+ " assert parsed[\"timestamp\"] == 76566\n",
+ " assert parsed[\"text\"] == \"center_poke\"\n",
+ " assert parsed[\"raw_line\"] == line\n",
+ "\n",
+ "\n",
+ "def test_parse_statescript_line_unknown():\n",
+ " \"\"\"Test parse_statescript_line dispatching for unknown lines.\"\"\"\n",
+ " line = \"Executing trigger function 22\" # No timestamp\n",
+ " parsed = parse_statescript_line(line)\n",
+ " assert parsed[\"type\"] == \"unknown\"\n",
+ " assert \"timestamp\" not in parsed\n",
+ " assert parsed[\"raw_line\"] == line\n",
+ "\n",
+ "\n",
+ "def test_parse_statescript_line_comment_empty():\n",
+ " \"\"\"Test parse_statescript_line dispatching for comments/empty.\"\"\"\n",
+ " line_c = \"# comment\"\n",
+ " line_e = \"\"\n",
+ " line_s = \" \"\n",
+ " assert parse_statescript_line(line_c)[\"type\"] == \"comment_or_empty\"\n",
+ " assert parse_statescript_line(line_c)[\"raw_line\"] == line_c\n",
+ " assert parse_statescript_line(line_e)[\"type\"] == \"comment_or_empty\"\n",
+ " assert parse_statescript_line(line_e)[\"raw_line\"] == line_e\n",
+ " assert parse_statescript_line(line_s)[\"type\"] == \"comment_or_empty\"\n",
+ " assert parse_statescript_line(line_s)[\"raw_line\"] == \"\"\n",
+ "\n",
+ "\n",
+ "# --- Tests for StateScriptLogProcessor ---\n",
+ "\n",
+ "\n",
+ "def test_init_from_string(processor, sample_log_content):\n",
+ " assert processor.log_content == sample_log_content\n",
+ " assert processor.source_description == \"from string\"\n",
+ " assert processor.raw_events == []\n",
+ " assert processor.time_offset is None\n",
+ " assert processor.processed_events_df is None\n",
+ "\n",
+ "\n",
+ "def test_init_from_file(temp_log_file, sample_log_content):\n",
+ " processor_file = StateScriptLogProcessor.from_file(temp_log_file)\n",
+ " assert processor_file.log_content == sample_log_content\n",
+ " assert processor_file.source_description.startswith(\"from file:\")\n",
+ " assert pathlib.Path(temp_log_file).name in processor_file.source_description\n",
+ "\n",
+ "\n",
+ "def test_init_from_file_not_found():\n",
+ " with pytest.raises(FileNotFoundError):\n",
+ " StateScriptLogProcessor.from_file(\"non_existent_file.log\")\n",
+ "\n",
+ "\n",
+ "def test_parse_raw_events(processor):\n",
+ " events = processor.parse_raw_events()\n",
+ " assert isinstance(events, list)\n",
+ " assert len(events) == 10\n",
+ " assert events[0][\"type\"] == \"comment_or_empty\"\n",
+ " assert events[1][\"type\"] == \"ts_int_int\"\n",
+ " assert events[7][\"type\"] == \"unknown\"\n",
+ " assert events[1][\"raw_line\"] == \"76504 0 0\"\n",
+ " assert events[7][\"raw_line\"] == \"Executing this line\"\n",
+ "\n",
+ "\n",
+ "def test_find_reference_events(processor):\n",
+ " ref_df = processor._find_reference_events(\n",
+ " event_type=\"ts_str\", conditions={\"text\": \"center_poke\"}\n",
+ " )\n",
+ " assert isinstance(ref_df, pd.DataFrame)\n",
+ " assert len(ref_df) == 2\n",
+ " pd.testing.assert_series_equal(\n",
+ " ref_df[\"timestamp\"],\n",
+ " pd.Series([76566, 115030], name=\"timestamp\"),\n",
+ " check_dtype=False,\n",
+ " )\n",
+ " assert \"log_timestamp_sec\" in ref_df.columns\n",
+ "\n",
+ " ref_df_num = processor._find_reference_events(\n",
+ " event_type=\"ts_int_int\", conditions={\"value1\": 4, \"value2\": 0}\n",
+ " )\n",
+ " assert len(ref_df_num) == 1\n",
+ " assert ref_df_num[\"timestamp\"].iloc[0] == 100078\n",
+ "\n",
+ " ref_df_none = processor._find_reference_events(\n",
+ " event_type=\"ts_str_int\", conditions={\"text\": \"nonexistent\"}\n",
+ " )\n",
+ " assert ref_df_none.empty\n",
+ "\n",
+ "\n",
+ "def test_calculate_time_offset_success(processor):\n",
+ " ext_times = np.array([1678880076.566, 1678880115.030])\n",
+ " offset = processor.calculate_time_offset(\n",
+ " external_reference_times=ext_times,\n",
+ " log_event_type=\"ts_int_int\",\n",
+ " log_event_conditions={\"value1\": 65536, \"value2\": 0},\n",
+ " check_n_events=2,\n",
+ " )\n",
+ " assert offset is not None\n",
+ " assert offset == pytest.approx(1678880000.0)\n",
+ "\n",
+ "\n",
+ "def test_calculate_time_offset_fail_not_enough_log(processor, external_times):\n",
+ " offset = processor.calculate_time_offset(\n",
+ " external_reference_times=external_times,\n",
+ " log_event_type=\"ts_str_equals_int\",\n",
+ " log_event_conditions={\"text\": \"counter_handlePoke\"},\n",
+ " check_n_events=2,\n",
+ " )\n",
+ " assert offset is None\n",
+ " assert processor.time_offset is None\n",
+ "\n",
+ "\n",
+ "def test_calculate_time_offset_fail_not_enough_external(processor):\n",
+ " offset = processor.calculate_time_offset(\n",
+ " external_reference_times=np.array([1678880076.566]),\n",
+ " log_event_type=\"ts_int_int\",\n",
+ " log_event_conditions={\"value1\": 65536, \"value2\": 0},\n",
+ " check_n_events=2,\n",
+ " )\n",
+ " assert offset is None\n",
+ " assert processor.time_offset is None\n",
+ "\n",
+ "\n",
+ "def test_get_events_dataframe_defaults(processor):\n",
+ " \"\"\"Test default behavior: exclude comments/unknown, no offset applied yet.\"\"\"\n",
+ " df = processor.get_events_dataframe(\n",
+ " apply_offset=False\n",
+ " ) # Default exclude=True\n",
+ " assert isinstance(df, pd.DataFrame)\n",
+ " assert len(df) == 8 # Excludes comment and unknown line\n",
+ " assert \"raw_line\" in df.columns\n",
+ " assert \"timestamp\" in df.columns\n",
+ " assert \"log_timestamp_sec\" in df.columns\n",
+ " # Check column order: time first, raw_line last\n",
+ " expected_cols = [\n",
+ " \"timestamp\",\n",
+ " \"log_timestamp_sec\",\n",
+ " \"timestamp_sync\",\n",
+ " \"text\",\n",
+ " \"value\",\n",
+ " \"value1\",\n",
+ " \"value2\",\n",
+ " \"raw_line\",\n",
+ " \"type\",\n",
+ " ]\n",
+ " actual_expected_cols = [col for col in expected_cols if col in df.columns]\n",
+ " assert list(df.columns) == actual_expected_cols\n",
+ " # Check content\n",
+ " assert df[\"raw_line\"].iloc[0] == \"76504 0 0\"\n",
+ " assert pd.isna(df[\"text\"].iloc[0]) # Should be NA where not applicable\n",
+ " assert df[\"value1\"].iloc[0] == 0\n",
+ "\n",
+ "\n",
+ "def test_get_events_dataframe_include_all(processor):\n",
+ " \"\"\"Test including comments and unknown lines.\"\"\"\n",
+ " df = processor.get_events_dataframe(\n",
+ " apply_offset=False, exclude_comments_unknown=False\n",
+ " )\n",
+ " assert isinstance(df, pd.DataFrame)\n",
+ " assert len(df) == 10 # Includes comment and unknown line\n",
+ " assert \"raw_line\" in df.columns\n",
+ " # Check raw_line for the unknown line\n",
+ " assert (\n",
+ " df[\"raw_line\"].iloc[7] == \"Executing this line\"\n",
+ " ) # Index adjusted for comment\n",
+ " # Check that timestamp is NA or 0 for lines without one\n",
+ " assert (\n",
+ " pd.isna(df[\"timestamp\"].iloc[0]) or df[\"timestamp\"].iloc[0] == 0\n",
+ " ) # Comment line\n",
+ " assert (\n",
+ " pd.isna(df[\"timestamp\"].iloc[7]) or df[\"timestamp\"].iloc[7] == 0\n",
+ " ) # Unknown line\n",
+ " # Check column order\n",
+ " expected_cols = [\n",
+ " \"timestamp\",\n",
+ " \"log_timestamp_sec\",\n",
+ " \"timestamp_sync\",\n",
+ " \"text\",\n",
+ " \"value\",\n",
+ " \"value1\",\n",
+ " \"value2\",\n",
+ " \"raw_line\",\n",
+ " \"type\",\n",
+ " ]\n",
+ " actual_expected_cols = [col for col in expected_cols if col in df.columns]\n",
+ " assert list(df.columns) == actual_expected_cols\n",
+ "\n",
+ "\n",
+ "def test_get_events_dataframe_with_offset(processor):\n",
+ " \"\"\"Test applying offset and column order.\"\"\"\n",
+ " processor.time_offset = 1678880000.0\n",
+ " df = processor.get_events_dataframe(\n",
+ " apply_offset=True\n",
+ " ) # Default exclude=True\n",
+ " assert isinstance(df, pd.DataFrame)\n",
+ " assert len(df) == 8\n",
+ " # Check calculation\n",
+ " expected_sync_time = (76504 / 1000.0) + 1678880000.0\n",
+ " assert df[\"timestamp_sync\"].iloc[0] == pytest.approx(expected_sync_time)\n",
+ " # Check NA value handling\n",
+ " assert pd.isna(df[\"text\"].iloc[0])\n",
+ "\n",
+ "\n",
+ "test_parse_int()\n",
+ "test_parse_ts_int_int_direct()\n",
+ "test_parse_ts_str_int_direct()\n",
+ "test_parse_ts_str_equals_int_direct()\n",
+ "test_parse_ts_str_direct()\n",
+ "test_parse_statescript_line_ts_int_int()\n",
+ "test_parse_statescript_line_ts_str_int()\n",
+ "test_parse_statescript_line_ts_str_equals_int()\n",
+ "test_parse_statescript_line_ts_str()\n",
+ "test_parse_statescript_line_unknown()\n",
+ "test_parse_statescript_line_comment_empty()\n",
+ "test_parse_statescript_line_comment_empty()\n",
+ "\n",
+ "sample_log_content = \"\"\"# Test log\n",
+ "76504 0 0\n",
+ "76566 center_poke\n",
+ "76566 65536 0\n",
+ "100078 counter_handlePoke = 1\n",
+ "100078 4 0\n",
+ "100559 0 0\n",
+ "Executing this line\n",
+ "115030 center_poke\n",
+ "115030 65536 0\n",
+ "\"\"\"\n",
+ "processor = StateScriptLogProcessor(sample_log_content)\n",
+ "with tempfile.NamedTemporaryFile(\n",
+ " mode=\"w\", delete=False, suffix=\".stateScriptLog\"\n",
+ ") as temp_log_file:\n",
+ " temp_log_file.write(sample_log_content)\n",
+ " temp_log_file_path = temp_log_file.name\n",
+ "\n",
+ "external_times = np.array([1678886476.530, 1678886500.100, 1678886515.050])\n",
+ "\n",
+ "test_init_from_string(processor, sample_log_content)\n",
+ "test_init_from_file(temp_log_file_path, sample_log_content)\n",
+ "test_init_from_file_not_found()\n",
+ "test_parse_raw_events(processor)\n",
+ "test_find_reference_events(processor)\n",
+ "test_calculate_time_offset_success(processor)\n",
+ "test_calculate_time_offset_fail_not_enough_log(processor, external_times)\n",
+ "test_calculate_time_offset_fail_not_enough_external(processor)\n",
+ "test_get_events_dataframe_defaults(processor)\n",
+ "test_get_events_dataframe_include_all(processor)\n",
+ "test_get_events_dataframe_with_offset(processor)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "30269f5f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import List, Optional\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "def _interpret_port_mask(\n",
+ " port_state_value: Optional[int], max_ports: int = 32\n",
+ ") -> List[int]:\n",
+ " \"\"\"\n",
+ " Interprets an integer value as a bitmask representing active ports using NumPy.\n",
+ "\n",
+ " Assumes a 1-based port numbering system (e.g., bit 0 corresponds to port 1).\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " port_state_value : Optional[int]\n",
+ " The integer value representing the combined state of multiple ports.\n",
+ " Handles None or pandas NA values.\n",
+ " max_ports : int, optional\n",
+ " The maximum port number to check (bits 0 to max_ports-1), by default 32.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " List[int]\n",
+ " A sorted list of 1-based port numbers that are active (bit is set).\n",
+ " Returns an empty list if the value is 0, None, or NA.\n",
+ "\n",
+ " Example\n",
+ " -------\n",
+ " >>> interpret_port_mask(9) # 1001 in binary -> Ports 1 and 4\n",
+ " [1, 4]\n",
+ " >>> interpret_port_mask(65536) # 2^16 -> Port 17\n",
+ " [17]\n",
+ " \"\"\"\n",
+ " # Return empty list for 0, None, or pandas NA\n",
+ " if pd.isna(port_state_value) or port_state_value == 0:\n",
+ " return []\n",
+ "\n",
+ " # Ensure value is treated as an integer after NA check\n",
+ " try:\n",
+ " port_state_int = int(port_state_value)\n",
+ " except (ValueError, TypeError):\n",
+ " # Should not happen if input is from Int64Dtype column after NA check,\n",
+ " # but included for robustness if called directly with invalid input.\n",
+ " return []\n",
+ "\n",
+ " # Create bit masks for positions 0 to max_ports-1\n",
+ " # E.g., [1, 2, 4, 8, ...]\n",
+ " bit_masks = np.left_shift(1, np.arange(max_ports))\n",
+ "\n",
+ " # Check which bits are set in the input value using bitwise AND\n",
+ " active_bits_mask = np.bitwise_and(port_state_int, bit_masks) > 0\n",
+ "\n",
+ " # Get the 0-based indices (bit positions) where bits are active\n",
+ " active_indices = np.where(active_bits_mask)[0]\n",
+ "\n",
+ " # Convert 0-based indices to 1-based port numbers and return as a list\n",
+ " active_ports = (active_indices + 1).tolist()\n",
+ "\n",
+ " # np.where returns sorted indices, so list is already sorted\n",
+ " return active_ports\n",
+ "\n",
+ "\n",
+ "def add_interpreted_port_columns(\n",
+ " events_df: pd.DataFrame,\n",
+ " input_mask_col: str = \"value1\",\n",
+ " output_mask_col: str = \"value2\",\n",
+ " max_ports: int = 32,\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Adds 'active_inputs' and 'active_outputs' columns to a DataFrame\n",
+ " by interpreting bitmask columns representing port states using NumPy.\n",
+ "\n",
+ " Operates on and returns a modified copy of the input DataFrame.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " events_df : pd.DataFrame\n",
+ " The DataFrame containing the parsed StateScript event data.\n",
+ " input_mask_col : str, optional\n",
+ " The name of the column containing the input port bitmask values,\n",
+ " by default 'value1'.\n",
+ " output_mask_col : str, optional\n",
+ " The name of the column containing the output port bitmask values,\n",
+ " by default 'value2'.\n",
+ " max_ports : int, optional\n",
+ " The maximum port number to check for the bitmasks, by default 32.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " pd.DataFrame\n",
+ " A copy of the input DataFrame with 'active_inputs' and 'active_outputs'\n",
+ " columns added (or updated if they existed). Prints warnings if specified\n",
+ " mask columns are not found.\n",
+ "\n",
+ " Raises\n",
+ " ------\n",
+ " TypeError\n",
+ " If the input `events_df` is not a pandas DataFrame.\n",
+ " \"\"\"\n",
+ " if not isinstance(events_df, pd.DataFrame):\n",
+ " raise TypeError(\"Input 'events_df' must be a pandas DataFrame.\")\n",
+ "\n",
+ " # Work on a copy to avoid modifying the original DataFrame\n",
+ " processed_df = events_df.copy()\n",
+ "\n",
+ " # Interpret Input Ports\n",
+ " if input_mask_col in processed_df.columns:\n",
+ " # Convert column to numeric, coercing errors, then apply interpretation\n",
+ " input_series = pd.to_numeric(\n",
+ " processed_df[input_mask_col], errors=\"coerce\"\n",
+ " )\n",
+ " processed_df[\"active_inputs\"] = input_series.apply(\n",
+ " lambda mask: _interpret_port_mask(mask, max_ports)\n",
+ " )\n",
+ " else:\n",
+ " print(\n",
+ " f\"Warning: Input mask column '{input_mask_col}' not found in DataFrame. Skipping 'active_inputs'.\"\n",
+ " )\n",
+ " # Add empty column if it doesn't exist for consistency\n",
+ " processed_df[\"active_inputs\"] = [[] for _ in range(len(processed_df))]\n",
+ "\n",
+ " # Interpret Output Ports\n",
+ " if output_mask_col in processed_df.columns:\n",
+ " output_series = pd.to_numeric(\n",
+ " processed_df[output_mask_col], errors=\"coerce\"\n",
+ " )\n",
+ " processed_df[\"active_outputs\"] = output_series.apply(\n",
+ " lambda mask: _interpret_port_mask(mask, max_ports)\n",
+ " )\n",
+ " else:\n",
+ " print(\n",
+ " f\"Warning: Output mask column '{output_mask_col}' not found in DataFrame. Skipping 'active_outputs'.\"\n",
+ " )\n",
+ " # Add empty column if it doesn't exist for consistency\n",
+ " processed_df[\"active_outputs\"] = [[] for _ in range(len(processed_df))]\n",
+ "\n",
+ " return processed_df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6b94f61a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "statescript_dfs[9]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1c825d3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "add_interpreted_port_columns(statescript_dfs[8])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a17a0170",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "statescript_dfs[8].groupby(\"type\").groups"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4f09598",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "events_by_type = [\n",
+ " group.drop(columns=[\"type\"])\n",
+ " for _, group in statescript_dfs[8].groupby(\"type\")\n",
+ "]\n",
+ "\n",
+ "events_by_type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "83cc5cf1",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "spyglass",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.18"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/trodes_to_nwb/convert_dios.py b/src/trodes_to_nwb/convert_dios.py
index e532cae..377140e 100644
--- a/src/trodes_to_nwb/convert_dios.py
+++ b/src/trodes_to_nwb/convert_dios.py
@@ -9,7 +9,7 @@
from .spike_gadgets_raw_io import SpikeGadgetsRawIO
-def _get_channel_name_map(metadata: dict) -> dict[str, str]:
+def _get_channel_name_map(metadata: dict) -> dict[str, dict[str, str]]:
"""Parses behavioral events metadata from the yaml file
Parameters
@@ -21,6 +21,7 @@ def _get_channel_name_map(metadata: dict) -> dict[str, str]:
-------
channel_name_map : dict
Parsed behavioral events metadata mapping hardware event name to human-readable name
+ {"hardware_event_name": {"name": "human_readable_name", "comments": "comments"}}
"""
dio_metadata = metadata["behavioral_events"]
channel_name_map = {}
diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
new file mode 100644
index 0000000..b2c8f57
--- /dev/null
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -0,0 +1,1252 @@
+"""StateScript log parsing and processing module.
+
+This module provides tools for parsing, interpreting, and processing `.stateScriptLog`
+files generated by Trodes. It handles the conversion
+of Trodes timestamps, alignment with external time sources, interpretation of
+Digital Input/Output (DIO) states, and processing of various common log line formats.
+
+Notes
+-----
+Source Files:
+ - Log files parsed by this module typically have the `.stateScriptLog` extension.
+ - These files are generated by Trodes during data acquisition sessions.
+
+Timestamp Information:
+ - The primary timestamp (``) found in these logs is a 64-bit integer.
+ - It represents the number of milliseconds elapsed since the start of the
+ Trodes recording session.
+ - This is often referred to as the 'Trodes timestamp'.
+
+Log Line Formats:
+ `.stateScriptLog` files usually contain lines adhering to several common formats.
+ The module aims to parse lines matching these structures:
+
+ ``ts_int_int`` : ` `
+ Represents timestamp and two integers. These integers often function as
+ bitwise masks representing the state of DIO pins.
+ Example: ``1817158 128 512``
+
+ ``ts_str_int`` : ` `
+ Represents timestamp, a string label, and an integer value. Frequently
+ used for user-defined messages logging DIO pin state changes (e.g., pin name and state).
+ Example: ``8386500 DOWN 3``
+
+ ``ts_str_eq_int`` : ` = `
+ Represents timestamp and a named integer variable assignment, useful for
+ tracking counters or state variables within the StateScript.
+ Example: ``3610855 totRewards = 70``
+
+ ``ts_str`` : ` `
+ Represents timestamp followed by one or more space-separated strings.
+ Commonly used for logging event markers or descriptive text messages.
+ Example: ``1678886401 LOCKEND``
+
+ ``comment_or_empty`` : Lines starting with `#` or completely empty lines.
+ Lines starting with '#' are treated as comments. Empty lines may also occur.
+ These are typically ignored during data extraction.
+ Example: ``# Starting new trial block``
+
+ ``unknown`` : Lines that do not conform to the patterns listed above.
+ These might include initial header lines, formatting variations, or unexpected entries.
+ Example: ``initiated``
+
+Component Definitions:
+ - ````: 64-bit integer; milliseconds since session start (Trodes timestamp).
+ - ````: Integer value; often used as a bitwise mask for DIO pin states.
+ - ````: String value; can represent an event name, variable name, message component, etc.
+ - ````: Denotes one or more space-separated strings.
+
+"""
+
+import pathlib
+from typing import Any, Dict, List, Optional, Type, TypeVar, Union
+
+import numpy as np
+import pandas as pd
+
+from .convert_dios import _get_channel_name_map as _get_dio_channel_name_map
+
+T_StateScriptLogProcessor = TypeVar(
+ "T_StateScriptLogProcessor", bound="StateScriptLogProcessor"
+)
+
+
+def _parse_int(s: str) -> Optional[int]:
+ """Attempts to parse a string as an integer.
+
+ Parameters
+ ----------
+ s : str
+ Input string.
+
+ Returns
+ -------
+ Optional[int]
+ The parsed integer, or None if parsing fails.
+ """
+ try:
+ return int(s)
+ except ValueError:
+ return None
+
+
+def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]:
+ """Parses lines with structure.
+
+ This pattern typically represents a timestamp followed by two integer values.
+ These integers are bitwise masks or state values, often used for logging
+ DIO states or other binary values.
+
+ Example:
+ 8386500 0 0 -> {'ts': 8386500, 'value1': 0, 'value2': 0}
+
+ Parameters
+ ----------
+ parts : list
+ A list of strings obtained by splitting a log line by whitespace.
+ Expected to contain exactly 3 parts for this pattern.
+
+ Returns
+ -------
+ Optional[Dict[str, Any]]
+ A dictionary containing the parsed data:
+ {'type': 'ts_int_int', 'timestamp': int, 'value1': int, 'value2': int}
+ if the line matches the expected structure and all parts are valid integers.
+ Returns None otherwise.
+ """
+ if len(parts) == 3:
+ # Attempt to parse all three parts as integers
+ timestamp, val1, val2 = [_parse_int(part) for part in parts]
+
+ # Check if all parsing attempts were successful
+ if timestamp is not None and val1 is not None and val2 is not None:
+ return {
+ "type": "ts_int_int",
+ "timestamp": timestamp,
+ "value1": val1,
+ "value2": val2,
+ }
+
+
+def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]:
+ """Parses log lines structured as: .
+
+ This pattern consists of a timestamp, a string, and a final
+ integer value. Often used for logging state changes associated
+ with an identifier (e.g., DIO pin state).
+
+ Example:
+ 8386500 DOWN 3 -> {'ts': 8386500, 'text': 'DOWN', 'value': 3}
+
+ Interpretation: At timestamp 8386500, the state associated with
+ identifier 3 changed to 'DOWN'.
+
+ Parameters
+ ----------
+ parts : list
+ A list of strings obtained by splitting a log line by whitespace.
+ Expected to contain exactly 3 parts for this pattern.
+
+ Returns
+ -------
+ Optional[Dict[str, Any]]
+ A dictionary containing the parsed data:
+ {'type': 'ts_str_int', 'timestamp': int, 'text': str, 'value': int}
+ if the line matches the structure (int, non-int string, int).
+ Returns None otherwise.
+ """
+ if len(parts) == 3:
+ # Parse the first and third parts as integers
+ timestamp = _parse_int(parts[0])
+ text_part = parts[1] # Middle part is expected to be text
+ val_int = _parse_int(parts[2])
+
+ # Check conditions: timestamp and value are ints, text part is not an int
+ if (
+ timestamp is not None
+ and _parse_int(text_part) is None
+ and val_int is not None
+ ):
+ return {
+ "type": "ts_str_int",
+ "timestamp": timestamp,
+ "text": text_part,
+ "value": val_int,
+ }
+
+
+def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]:
+ """Parses log lines structured as: = .
+
+ This pattern includes a timestamp, followed by one or more strings forming a label,
+ an equals sign, and a final integer value. Used for logging named integer variables.
+
+ Example Log Lines:
+ 3610855 totRewards = 70 -> {'ts': 3610855, 'text': 'totRewards', 'value': 70}
+ 100078 counter_handlePoke = 1 -> {'ts': 100078, 'text': 'counter_handlePoke', 'value': 1}
+
+ Parameters
+ ----------
+ parts : list
+ A list of strings obtained by splitting a log line by whitespace.
+ Expected to contain 4 parts, with '=' as the third part.
+
+ Returns
+ -------
+ Optional[Dict[str, Any]]
+ A dictionary containing the parsed data:
+ {'type': 'ts_str_equals_int', 'timestamp': int, 'text': str, 'value': int}
+ if the line matches the expected structure (int, string, '=', int).
+ Returns None otherwise.
+ """
+ # Check length and presence of '=' in the correct position
+ if len(parts) == 4 and parts[2] == "=":
+ timestamp = _parse_int(parts[0])
+ text = parts[1]
+ value = _parse_int(parts[3]) # Expect integer value only
+
+ # Check if timestamp and value were successfully parsed as integers
+ if timestamp is not None and value is not None:
+ return {
+ "type": "ts_str_equals_int",
+ "timestamp": timestamp,
+ "text": text,
+ "value": value,
+ }
+
+
+def parse_ts_str(parts: list) -> Optional[Dict[str, Any]]:
+ """Parses log lines structured as: .
+
+ This pattern represents a timestamp followed by one or more string parts,
+ where the first string part after the timestamp is *not* parseable as an integer.
+ Often used for logging timestamped events or messages.
+
+ Example Log Lines:
+ 1678886401 LOCKEND -> {'ts': 1678886401, 'text': 'LOCKEND'}
+ 76566 center_poke initiated -> {'ts': 76566, 'text': 'center_poke initiated'}
+
+ Parameters
+ ----------
+ parts : list
+ A list of strings obtained by splitting a log line by whitespace.
+ Expected to contain at least 2 parts.
+
+ Returns
+ -------
+ Optional[Dict[str, Any]]
+ A dictionary containing the parsed data:
+ {'type': 'ts_str', 'timestamp': int, 'text': str}
+ if the line matches the structure (int, non-int string, [optional strings...]).
+ 'text' contains the joined string parts after the timestamp.
+ Returns None otherwise.
+ """
+ # Check minimum length
+ if len(parts) >= 2:
+ timestamp = _parse_int(parts[0])
+ # Check if the second part is parseable as an integer
+ first_word_is_int = _parse_int(parts[1]) is not None
+
+ # Proceed only if timestamp is valid AND the second part is NOT an integer
+ if timestamp is not None and not first_word_is_int:
+ # Join all parts after the timestamp
+ text_part = " ".join(parts[1:])
+ return {"type": "ts_str", "timestamp": timestamp, "text": text_part}
+
+
+def parse_statescript_line(line: str, line_num: int = 0) -> Optional[Dict[str, Any]]:
+ """Attempts to parse a single StateScript log line using a set of parsers.
+
+ It tries parsing the line against known structures in a specific order
+ of precedence to handle potentially overlapping patterns:
+ 1. ` = ` ('ts_str_equals_int')
+ 2. ` ` ('ts_int_int')
+ 3. ` ` ('ts_str_int', where is not an int)
+ 4. ` ` ('ts_str', where first is not an int)
+
+ Lines starting with '#' or empty lines are marked as 'comment_or_empty'.
+ Lines that do not match any known pattern are marked as 'unknown'.
+
+ Parameters
+ ----------
+ line : str
+ A single line (string) from the StateScript log file.
+ line_num : int, optional
+ The line number in the file (for reference), by default 0.
+
+ Returns
+ -------
+ Dict[str, Any]
+ A dictionary describing the parsed line. It always contains:
+ - 'type': A string indicating the matched pattern
+ ('ts_str_equals_int', 'ts_int_int', 'ts_str_int', 'ts_str',
+ 'comment_or_empty', 'unknown').
+ - 'raw_line': The original input line string.
+ For successfully parsed types, it includes additional keys like
+ 'timestamp', 'text', 'value', 'value1', 'value2' as appropriate.
+ """
+ line = line.strip()
+
+ # Handle comments and empty lines first
+ if not line or line.startswith("#"):
+ return {
+ "type": "comment_or_empty",
+ "raw_line": line,
+ "line_num": line_num,
+ "timestamp": None,
+ }
+
+ # Define the parsing functions in order of desired precedence
+ # More specific patterns should come before more general ones
+ parsers = [
+ parse_ts_str_equals_int,
+ parse_ts_int_int,
+ parse_ts_str_int,
+ parse_ts_str,
+ ]
+ parts = line.split() # Split line into parts based on whitespace
+
+ # Iterate through parsers and return the first successful match
+ for parser in parsers:
+ parsed = parser(parts)
+ if parsed:
+ # Add the original line to the parsed result
+ parsed["raw_line"] = line
+ parsed["line_num"] = line_num # Include line number for reference
+ return parsed
+
+ return {
+ "type": "unknown",
+ "raw_line": line,
+ "line_num": line_num,
+ "timestamp": None,
+ }
+
+
+def _interpret_DIO_mask(
+ DIO_state_value: Optional[int], max_DIOs: int = 32
+) -> List[int]:
+ """
+ Interprets an integer value as a bitmask representing active DIOs.
+ Assumes a 1-based DIO numbering system (e.g., bit 0 corresponds to DIO 1).
+
+ For example, if there are 32 DIOs, the integer value 9 (binary 1001)
+ indicates that DIOs 1 and 4 are active (bits 0 and 3 are set).
+
+ If there are 16 DIOs, the integer value 65536 (binary 10000000000000000)
+ indicates that DIO 17 is active (bit 16 is set).
+
+
+ Parameters
+ ----------
+ DIO_state_value : Optional[int]
+ The integer value representing the combined state of multiple ports.
+ Handles None or pandas NA values.
+ max_DIOs : int, optional
+ The maximum port number to check (bits 0 to max_DIOs-1), by default 32.
+
+ Returns
+ -------
+ List[int]
+ A sorted list of 1-based port numbers that are active (bit is set).
+ Returns an empty list if the value is 0, None, or NA.
+
+ Example
+ -------
+ >>> _interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4
+ [1, 4]
+ >>> _interpret_DIO_mask(65536) # 2^16 -> Port 17
+ [17]
+ """
+ if pd.isna(DIO_state_value) or DIO_state_value == 0:
+ return []
+
+ # Ensure value is treated as an integer after NA check
+ try:
+ DIO_state_value = int(DIO_state_value)
+ except (ValueError, TypeError):
+ # Should not happen if input is from Int64Dtype column after NA check,
+ # but included for robustness if called directly with invalid input.
+ return []
+
+ # Create bit masks for positions 0 to max_DIOs-1
+ # E.g., [1, 2, 4, 8, ...]
+ bit_masks = np.left_shift(1, np.arange(max_DIOs))
+
+ # Check which bits are set in the input value using bitwise AND
+ active_bits_mask = np.bitwise_and(DIO_state_value, bit_masks) > 0
+
+ # Get the 0-based indices (bit positions) where bits are active
+ active_indices = np.where(active_bits_mask)[0]
+
+ # Convert 0-based indices to 1-based DIO numbers and return as a list
+ active_ports = (active_indices + 1).tolist()
+
+ # np.where returns sorted indices, so list is already sorted
+ return active_ports
+
+
+# -- Main Class for Processing StateScript Logs --
+class StateScriptLogProcessor:
+ """Processes StateScript log content, handling parsing and time alignment.
+
+ This class reads StateScript log data (either from a file or a string),
+ parses each line into a structured format, converts integer timestamps
+ (assumed to be milliseconds) into seconds, and optionally calculates
+ a time offset to align the log timestamps with an external reference time
+ source (e.g., synchronization pulses recorded by another system).
+
+ Attributes
+ ----------
+ log_content : str
+ The raw string content of the log file.
+ source_description : str
+ Information about where the log content came from (e.g., file path).
+ raw_events : List[Dict[str, Any]]
+ List of dictionaries, one per parsed line from the log content
+ (including comments/unknown lines). Generated by `parse_raw_events`.
+ Timestamps in this list are raw integers from the log.
+ processed_events_df : Optional[pd.DataFrame]
+ DataFrame containing structured event data, typically excluding
+ comments and unknown lines. Generated by `get_events_dataframe`.
+ Includes 'trodes_timestamp_sec' (float, seconds) converted from raw
+ timestamps, and potentially 'timestamp_sync' (float, seconds) if
+ time offset is calculated and applied.
+ time_offset : Optional[float]
+ The calculated time offset in seconds, representing the difference:
+ (external_reference_time_sec - trodes_timestamp_sec).
+ Set by `calculate_time_offset`. If calculated, adding this offset
+ to 'trodes_timestamp_sec' yields the synchronized time ('timestamp_sync').
+
+ Example Usage
+ -------------
+ # Load from file
+ processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog")
+ # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds)
+ # corresponding to the log event "DIO Pin 8 going UP"
+ processor.calculate_time_offset(
+ external_reference_times=external_sync_times,
+ log_event_type="ts_str_int",
+ log_event_conditions={"text": "UP", "value": 8}
+ )
+ # Get the processed DataFrame with synchronized timestamps
+ df = processor.get_events_dataframe(apply_offset=True)
+ if df is not None:
+ print(df[['timestamp_sync', 'type', 'text', 'value']].head())
+ """
+
+ MILLISECONDS_PER_SECOND = 1000
+
+ log_content: str
+ source_description: str
+ raw_events: List[Dict[str, Any]]
+ processed_events_df: Optional[pd.DataFrame]
+ time_offset: Optional[float]
+
+ def __init__(self, log_content: str, source_info: str = "from string"):
+ """Initializes the processor with log content and source information.
+
+ Parameters
+ ----------
+ log_content : str
+ The entire content of the state script log as a single string.
+ source_info : str, optional
+ A description of the log content's source (e.g., file path, identifier).
+ Defaults to "from string".
+ """
+ self.log_content = log_content
+ self.source_description = source_info
+
+ # Initialize attributes that will be populated by methods
+ self.raw_events = []
+ self.processed_events_df = None
+ self.time_offset = None
+
+ @classmethod
+ def from_file(
+ cls: Type[T_StateScriptLogProcessor],
+ file_path: Union[str, pathlib.Path],
+ ) -> T_StateScriptLogProcessor:
+ """Creates a StateScriptLogProcessor instance by reading a log file.
+
+ Parameters
+ ----------
+ file_path : Union[str, pathlib.Path]
+ The path to the StateScript log file.
+
+ Returns
+ -------
+ T_StateScriptLogProcessor
+ An instance of the StateScriptLogProcessor initialized with the
+ content of the specified file.
+
+ Raises
+ ------
+ FileNotFoundError
+ If the file specified by `file_path` does not exist.
+ IOError
+ If an error occurs during file reading (e.g., permissions).
+ UnicodeDecodeError
+ If the file cannot be decoded using UTF-8 encoding (with fallback).
+ """
+ file_path = pathlib.Path(file_path) # Ensure Path object for consistency
+ source_info = f"from file: {file_path}"
+ try:
+ # Read the file content. Using 'surrogateescape' allows reading
+ # potentially mixed/invalid encodings, preserving problematic bytes.
+ # UTF-8 is a common default for logs.
+ content = file_path.read_text(encoding="utf-8", errors="surrogateescape")
+ # Create and return an instance of the class
+ return cls(log_content=content, source_info=source_info)
+ except FileNotFoundError:
+ print(f"Error: File not found at {file_path}")
+ raise # Re-raise to signal failure
+ except IOError as e:
+ print(f"Error reading file at {file_path}: {e}")
+ raise # Re-raise
+ except UnicodeDecodeError as e:
+ print(f"Error decoding file {file_path} using utf-8: {e}")
+ print("Consider checking file encoding if errors persist.")
+ raise # Re-raise
+ except Exception as e:
+ print(f"Unexpected error reading file {file_path}: {e}")
+ raise
+
+ def __repr__(self) -> str:
+ """Provides a concise, unambiguous string representation of the processor.
+
+ Includes information about the source, parsing status, number of raw events,
+ time offset status, and DataFrame generation status.
+
+ Returns
+ -------
+ str
+ String representation of the StateScriptLogProcessor instance.
+ """
+ cls_name = self.__class__.__name__
+ source = self.source_description
+
+ # Describe parsing status
+ if not self.raw_events:
+ parse_status = "not parsed"
+ num_raw = ""
+ else:
+ parse_status = "parsed"
+ num_raw = f", raw_events={len(self.raw_events)}"
+
+ # Describe time offset status
+ offset_status = (
+ f"offset={self.time_offset:.4f}s"
+ if self.time_offset is not None
+ else "no offset calculated"
+ )
+
+ # Describe DataFrame status
+ df_status = (
+ "DataFrame generated"
+ if self.processed_events_df is not None
+ else "DataFrame not generated"
+ )
+
+ return f"<{cls_name}(source='{source}', status={parse_status}{num_raw}, {offset_status}, {df_status})>"
+
+ def _repr_html_(self) -> str:
+ """Generates an HTML representation for display in Jupyter/IPython.
+
+ Provides a more visually structured overview of the processor's state,
+ including source, parsing status, offset, DataFrame status, and a
+ preview of the DataFrame if generated.
+
+ Returns
+ -------
+ str
+ HTML string representing the StateScriptLogProcessor instance.
+ """
+ cls_name = self.__class__.__name__
+ # Use getattr for robustness in case attributes haven't been set yet
+ source = getattr(self, "source_description", "source info missing")
+ raw_events_list = getattr(self, "raw_events", []) # Default to empty list
+ df_val = getattr(self, "processed_events_df", None)
+ offset_val = getattr(self, "time_offset", None)
+
+ # Build status strings based on attribute values
+ if not raw_events_list:
+ parse_status = "Status: Not Parsed"
+ num_raw_str = ""
+ else:
+ parse_status = "Status: Parsed"
+ num_raw_str = f" ({len(raw_events_list)} raw entries)"
+
+ offset_status = (
+ f"Time Offset: {offset_val:.4f}s"
+ if offset_val is not None
+ else "Time Offset: Not Calculated"
+ )
+ df_status = (
+ "DataFrame: Generated"
+ if df_val is not None
+ else "DataFrame: Not Generated"
+ )
+
+ # Basic HTML structure and styling
+ html = f"""
+
+
{cls_name}
+
Source: {source}
+
{parse_status}{num_raw_str}
+
{offset_status}
+
{df_status}
+ """
+
+ # Add DataFrame preview if it exists and is not empty
+ if df_val is not None and not df_val.empty:
+ html += "
DataFrame Preview (first 5 rows):
"
+ try:
+ # Generate HTML table from DataFrame head
+ html += df_val.head().to_html(
+ index=False, # Don't include DataFrame index
+ border=0, # No table border
+ justify="left", # Align text left
+ classes="dataframe-preview", # Add a class for potential CSS styling
+ )
+ except Exception as e:
+ html += f"
Error generating DataFrame HTML preview: {e}
"
+ elif df_val is not None and df_val.empty:
+ html += "
(DataFrame is empty)
"
+
+ html += "
"
+ return html
+
+ def parse_raw_events(self) -> List[Dict[str, Any]]:
+ """Parses the loaded log content line by line.
+
+ Returns
+ -------
+ List[Dict[str, Any]]
+ The list of parsed event dictionaries stored in `self.raw_events`.
+ Each dictionary represents one line from the log.
+ """
+ lines = self.log_content.splitlines()
+ # Use list comprehension for concise parsing of all lines
+ self.raw_events = [
+ parse_statescript_line(line, line_num)
+ for line_num, line in enumerate(lines)
+ ]
+ return self.raw_events
+
+ def _find_reference_events(
+ self, event_type: str, conditions: Dict[str, Any]
+ ) -> pd.DataFrame:
+ """Internal helper to find specific log events for time alignment.
+
+ Filters the `self.raw_events` list to find events matching the specified
+ `event_type` and satisfying all key-value pairs in `conditions`.
+ Converts the integer timestamp (assumed to be milliseconds) of matching
+ events to seconds (float) and stores it in a 'trodes_timestamp_sec' column.
+
+ Parameters
+ ----------
+ event_type : str
+ The required 'type' field of the events to find
+ (e.g., 'ts_str_int', 'ts_int_int').
+ conditions : Dict[str, Any]
+ A dictionary where keys are field names within the event dictionary
+ (e.g., 'text', 'value', 'value1') and values are the required values
+ for an event to be considered a match.
+
+ Returns
+ -------
+ pd.DataFrame
+ A DataFrame containing the matching events. Includes the original
+ 'timestamp' (int, milliseconds), the calculated 'trodes_timestamp_sec'
+ (float, seconds), and the fields specified in `conditions`.
+ The DataFrame is sorted by 'trodes_timestamp_sec'.
+ Returns an empty DataFrame if no matching events are found.
+ """
+ # Ensure raw events are parsed first if not already done
+ if not self.raw_events:
+ self.parse_raw_events()
+
+ matching_events = []
+ # Iterate through all parsed raw events
+ for event in self.raw_events:
+ # Check if the event type matches and it has a timestamp
+ if event.get("type") == event_type and "timestamp" in event:
+ # Check if all specified conditions are met for this event
+ match = all(
+ event.get(key) == value for key, value in conditions.items()
+ )
+ if match:
+ matching_events.append(event)
+
+ # If no matches were found, return an empty DataFrame with expected columns
+ if not matching_events:
+ # Define columns for the empty DataFrame for consistency
+ cols = ["trodes_timestamp", "trodes_timestamp_sec"] + list(
+ conditions.keys()
+ )
+ # Ensure other relevant columns from potential matches are also defined
+ potential_value_cols = ["value", "value1", "value2", "text"]
+ for vc in potential_value_cols:
+ if vc not in cols:
+ cols.append(vc)
+ return pd.DataFrame(columns=cols)
+
+ # Create DataFrame from the list of matching event dictionaries
+ df = pd.DataFrame(matching_events)
+
+ # Convert timestamp (assumed ms) to seconds (float)
+ df["trodes_timestamp_sec"] = (
+ df["timestamp"].astype(float) / self.MILLISECONDS_PER_SECOND
+ )
+ # Ensure original timestamp remains integer
+ df["timestamp"] = df["timestamp"].astype(int)
+
+ # Attempt to cast condition columns to appropriate types (e.g., int)
+ # This improves consistency if values were parsed as strings initially
+ for key, value in conditions.items():
+ if key in df.columns:
+ try:
+ if isinstance(value, int):
+ # Convert column to numeric, then integer (handles potential errors)
+ df[key] = pd.to_numeric(df[key], errors="coerce").astype(
+ pd.Int64Dtype()
+ )
+ except (ValueError, TypeError):
+ # Ignore casting errors if conversion isn't possible
+ pass
+
+ # Sort by time and reset index
+ return df.sort_values("trodes_timestamp_sec")
+
+ def calculate_time_offset(
+ self,
+ external_reference_times: np.ndarray,
+ log_event_type: str,
+ log_event_conditions: Dict[str, Any],
+ match_threshold: float = 0.1,
+ check_n_events: int = 4,
+ ) -> Optional[float]:
+ """Calculates the time offset between log events and external timestamps.
+
+ This method aligns timestamps (in seconds) of specific events found
+ in the log (`log_event_type` with `log_event_conditions`) against a
+ provided sorted array of `external_reference_times` (also in seconds).
+ It assumes both sets of timestamps correspond to the same sequence of
+ real-world events (e.g., synchronization pulses).
+
+ The offset is determined by finding the constant difference
+ (`offset = external_time - log_time`) that minimizes the timing
+ discrepancy between the first `check_n_events` corresponding events
+ in both sequences.
+
+ IMPORTANT: If `external_reference_times` represent Unix time (seconds
+ since 1970-01-01 UTC), the calculated offset will align the log's
+ timestamps (`trodes_timestamp_sec`) to Unix time. The resulting
+ `timestamp_sync` column in the DataFrame will then also be in Unix time.
+
+ Parameters
+ ----------
+ external_reference_times : np.ndarray
+ A 1D numpy array of timestamps (float, in seconds) from the external
+ reference system. This array *must* be sorted in ascending order.
+ If using for Unix time alignment, these must be Unix timestamps.
+ log_event_type : str
+ The 'type' of log event to use as the reference points within the log
+ (e.g., 'ts_str_int', 'ts_int_int').
+ log_event_conditions : Dict[str, Any]
+ Dictionary specifying the exact conditions to identify the reference
+ log events (e.g., {'text': 'UP', 'value': 8} for a pin state change).
+ match_threshold : float, optional
+ The maximum acceptable cumulative absolute difference (in seconds)
+ between the matched `check_n_events` pairs (log vs. external) for
+ an offset to be considered valid. Defaults to 0.1 seconds.
+ check_n_events : int, optional
+ The number of initial events from both sequences to use for calculating
+ the mismatch and finding the best offset. Defaults to 4. A higher
+ number increases robustness against spurious events but requires more
+ matching events to be present.
+
+ Returns
+ -------
+ Optional[float]
+ The calculated time offset in seconds (`external_time_sec - log_time_sec`).
+ Adding this offset to `trodes_timestamp_sec` synchronizes the log time
+ to the external reference time. Returns `None` if a satisfactory
+ offset (below `match_threshold`) cannot be found, or if insufficient
+ events are available in either the log or the external references.
+ If successful, updates `self.time_offset` with the calculated value.
+ """
+ # Find the timestamps of the reference events within the log
+ log_reference_df = self._find_reference_events(
+ log_event_type, log_event_conditions
+ )
+
+ # Check if enough log events were found
+ if log_reference_df.empty or len(log_reference_df) < check_n_events:
+ print(
+ f"Warning: Not enough reference events found in log matching "
+ f"type='{log_event_type}', conditions={log_event_conditions}. "
+ f"Need at least {check_n_events}, found {len(log_reference_df)}."
+ )
+ self.time_offset = None # Ensure offset is None if calculation fails
+ return None
+
+ # Extract log event times (in seconds) and ensure external times are a sorted numpy array
+ sc_times_sec = log_reference_df["trodes_timestamp_sec"].to_numpy()
+ # Ensure external times are numpy array and sorted (as required by algorithm)
+ dio_times_sec = np.sort(np.asarray(external_reference_times))
+
+ # Check if enough external reference times were provided
+ if len(dio_times_sec) < check_n_events:
+ print(
+ f"Warning: Not enough external reference timestamps provided "
+ f"({len(dio_times_sec)}), need at least {check_n_events} for matching."
+ )
+ self.time_offset = None # Ensure offset is None
+ return None
+
+ # --- Offset Calculation Logic ---
+ # This section iterates through potential starting alignments between
+ # the external times and the first log time, calculates the total mismatch
+ # for the first 'check_n_events', and finds the offset minimizing this mismatch.
+
+ best_offset = None
+ min_mismatch = float("inf")
+
+ # Iterate through possible starting points in the external times array
+ # We only need to check starting alignments where enough subsequent external times exist
+ # for the check_n_events comparison.
+ # We test aligning sc_times_sec[0] with each dio_times_sec[event_idx]
+ for event_idx in range(len(dio_times_sec) - check_n_events + 1):
+ # Calculate the potential offset based on the first log event and current external event
+ potential_offset = dio_times_sec[event_idx] - sc_times_sec[0]
+ current_mismatch = 0.0
+
+ # Simple check: Calculate mismatch using the *next consecutive* N events
+ # This assumes no missing events in *either* stream within the checked range.
+ # If events can be missing, a more complex alignment (like Needleman-Wunsch
+ # or checking nearest neighbors) might be needed. This simpler approach
+ # is often sufficient if the sync signals are reliable.
+ mismatch_found = False
+ for i in range(check_n_events):
+ # Calculate the expected external time for the i-th log event using the potential offset
+ projected_dio_time = sc_times_sec[i] + potential_offset
+ # Calculate the absolute difference with the corresponding i-th external time
+ # (relative to the starting event_idx)
+ diff = abs(dio_times_sec[event_idx + i] - projected_dio_time)
+ current_mismatch += diff
+
+ # Optimization: If mismatch already exceeds threshold or current best, stop early
+ if (
+ current_mismatch >= match_threshold
+ and current_mismatch >= min_mismatch
+ ):
+ mismatch_found = True # Signal that this offset is not viable
+ break # Stop checking further events for this offset
+
+ # If loop completed without early exit and this offset has lower mismatch
+ if not mismatch_found and current_mismatch < min_mismatch:
+ min_mismatch = current_mismatch
+ best_offset = potential_offset
+
+ # After checking all potential alignments, evaluate the result
+ if best_offset is not None and min_mismatch < match_threshold:
+ print(
+ f"Time offset calculation successful.\n"
+ f" Best Offset: {best_offset:.4f} s (External Time - Log Time)\n"
+ f" Lowest Mismatch: {min_mismatch:.4f} s (summed abs diff over {check_n_events} events)\n"
+ f" Threshold: {match_threshold:.4f} s"
+ )
+ self.time_offset = best_offset # Store the successful offset
+ return self.time_offset
+ else:
+ # Report failure if no offset met the threshold
+ print(
+ f"Warning: Could not find a suitable time offset.\n"
+ f" Minimum mismatch found: {min_mismatch:.4f} s (using {check_n_events} events)\n"
+ f" Match threshold: {match_threshold:.4f} s\n"
+ f" Troubleshooting: Check if reference events match, increase threshold, "
+ f"or verify external timestamps."
+ )
+ self.time_offset = None # Ensure offset is None on failure
+ return None
+
+ def get_events_dataframe(
+ self,
+ apply_offset: bool = True,
+ exclude_comments_unknown: bool = True,
+ exclude_int_int: bool = False,
+ max_DIOs: int = 32,
+ ) -> pd.DataFrame:
+ """Constructs and returns a pandas DataFrame from the parsed log events.
+
+ Parameters
+ ----------
+ apply_offset : bool, optional
+ If True (default), and a `time_offset` has been calculated, add the
+ 'timestamp_sync' column to the DataFrame. If False, or if no offset
+ is available, this column is omitted.
+ exclude_comments_unknown : bool, optional
+ If True (default), lines parsed as 'comment_or_empty' or 'unknown'
+ are excluded from the DataFrame. If False, all entries from
+ `raw_events` are included (potentially useful for debugging parsing).
+ exclude_int_int : bool, optional
+ If True, lines parsed as 'ts_int_int' are excluded from
+ the DataFrame. These are often used for DIO state changes and may not
+ be relevant for most analyses.
+ max_DIOs : int, optional
+ The maximum number of DIOs to consider when interpreting bitmasks
+ for active DIO inputs/outputs. Default is 32. This is used to
+ determine the number of bits to check in the bitmask values.
+
+ Returns
+ -------
+ pd.DataFrame
+ A DataFrame containing the structured event data. Columns are:
+ - 'trodes_timestamp' (int, ms since start of recording)
+ - 'trodes_timestamp_sec' (float, seconds since start of recording)
+ - `timestamp_sync` (float, seconds)
+ - 'raw_line' (str)
+ - 'type' (str)
+ - 'text' (str)
+ - 'value' (int, if pattern `text = value`, type 'ts_str_equals_int')
+ - 'active_DIO_inputs_bitmask' (int, from 'ts_int_int')
+ - 'active_DIO_outputs_bitmask' (int, from 'ts_int_int')
+ - 'active_DIO_inputs' (list of int)
+ - 'active_DIO_outputs' (list of int)
+
+ Returns an empty DataFrame if no valid events are found after filtering.
+ """
+ # Ensure raw events are available
+ if not self.raw_events:
+ self.parse_raw_events()
+ if not self.raw_events:
+ print("Warning: Log content yielded no raw events.")
+ self.processed_events_df = pd.DataFrame() # Store empty df
+ return self.processed_events_df
+
+ # Determine which event types to filter out
+ exclude_types = []
+ if exclude_comments_unknown:
+ exclude_types += ["comment_or_empty", "unknown"]
+ if exclude_int_int:
+ exclude_types += ["ts_int_int"]
+
+ filtered_events = [
+ event for event in self.raw_events if event.get("type") not in exclude_types
+ ]
+
+ # Handle case where filtering leaves no events
+ if not filtered_events:
+ print("Warning: No valid events remain after filtering.")
+ self.processed_events_df = pd.DataFrame() # Store empty df
+ return self.processed_events_df
+
+ # Define a preferred column order for better readability
+ # Include all potential columns generated by the parsers + derived columns
+ preferred_column_order = [
+ "line_num", # Line number in the original log
+ "raw_line", # Original line content
+ "type", # Type of parsed line pattern
+ "trodes_timestamp", # trodes integer timestamp (ms since start)
+ "trodes_timestamp_sec", # trodes timestamp converted to seconds
+ "timestamp_sync", # Synchronized timestamp (if calculated)
+ "text", # Text part (from ts_str, ts_str_int, ts_str_equals_int)
+ "value", # Integer value after equals (from ts_str_int, ts_str_equals_int)
+ "active_DIO_inputs_bitmask", # DIO input bitmask (from ts_int_int)
+ "active_DIO_outputs_bitmask", # DIO output bitmask (from ts_int_int)
+ ]
+
+ # Create DataFrame. Pandas handles missing columns gracefully.
+ df = pd.DataFrame(filtered_events).rename(
+ columns={
+ "timestamp": "trodes_timestamp",
+ "value1": "active_DIO_inputs_bitmask",
+ "value2": "active_DIO_outputs_bitmask",
+ }
+ )
+ if "active_DIO_inputs_bitmask" in df.columns:
+ df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply(
+ lambda mask: _interpret_DIO_mask(mask, max_DIOs)
+ )
+ if "active_DIO_outputs_bitmask" in df.columns:
+ df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply(
+ lambda mask: _interpret_DIO_mask(mask, max_DIOs)
+ )
+
+ # --- Timestamp Processing ---
+ # Ensure 'timestamp' column exists and convert to numeric/int
+ if "trodes_timestamp" in df.columns:
+ # Coerce errors to NaN, fill NaN with 0, then convert to integer
+ df["trodes_timestamp"] = (
+ pd.to_numeric(df["trodes_timestamp"], errors="coerce")
+ .fillna(pd.NA)
+ .astype(pd.Int64Dtype())
+ )
+ # Calculate timestamp in seconds
+ df["trodes_timestamp_sec"] = (
+ df["trodes_timestamp"].astype(float) / self.MILLISECONDS_PER_SECOND
+ )
+ else:
+ # Add empty columns if trodes_timestamp was missing (e.g., only comments)
+ print(
+ "Warning: 'trodes_timestamp' column not found in parsed data. Timestamp columns will be empty."
+ )
+ df["trodes_timestamp"] = pd.NA
+ df["trodes_timestamp_sec"] = np.nan
+
+ # Apply time offset if requested and available
+ if apply_offset:
+ if self.time_offset is not None:
+ if "trodes_timestamp_sec" in df.columns:
+ df["timestamp_sync"] = df["trodes_timestamp_sec"] + self.time_offset
+ else:
+ df["timestamp_sync"] = (
+ np.nan
+ ) # Cannot calculate if trodes_timestamp_sec is missing
+ else:
+ # Warning if offset applied but not calculated
+ print(
+ "Warning: Time offset application requested, but offset has not "
+ "been calculated or was unsuccessful. 'timestamp_sync' column omitted."
+ )
+ # Ensure the column doesn't exist if it wasn't created
+ if "timestamp_sync" in df.columns:
+ df = df.drop(columns=["timestamp_sync"])
+
+ # --- Data Type Consolidation ---
+ # Standardize types for common data columns if they exist
+ int_cols = [
+ "value",
+ "active_DIO_inputs_bitmask",
+ "active_DIO_outputs_bitmask",
+ ]
+ text_cols = ["text"]
+
+ for col in int_cols:
+ if col in df.columns:
+ # Convert to numeric (allowing NaNs), then use nullable Int64 type
+ df[col] = pd.to_numeric(df[col], errors="coerce").astype(
+ pd.Int64Dtype()
+ )
+
+ for col in text_cols:
+ if col in df.columns:
+ # Ensure text columns are object type (string) and
+ # fill potential float NaNs with pandas NA
+ df[col] = df[col].astype(str).replace("nan", pd.NA).astype("object")
+
+ # Reorder columns according to preference, keeping only existing columns
+ existing_cols_in_order = [
+ col for col in preferred_column_order if col in df.columns
+ ]
+ # Add any remaining columns not in the preferred list (e.g., from 'unknown' type)
+ other_cols = [col for col in df.columns if col not in existing_cols_in_order]
+ final_column_order = existing_cols_in_order + other_cols
+ df = df[final_column_order]
+
+ # Store the final DataFrame and return it
+ self.processed_events_df = df.set_index("line_num")
+ return self.processed_events_df
+
+ def get_events_by_type(
+ self,
+ apply_offset: bool = True,
+ exclude_comments_unknown: bool = True,
+ ) -> List[pd.DataFrame]:
+ """Groups the events in the DataFrame by their 'type' column.
+ This method first generates the DataFrame using `get_events_dataframe`
+ and then groups the events by their 'type' column. Each group is
+ returned as a separate DataFrame, excluding the 'type' column.
+ This allows for easy access to events of the same type for further
+ analysis or processing.
+
+ Parameters
+ ----------
+ apply_offset : bool, optional
+ If True (default), applies the time offset to the DataFrame.
+ If False, the DataFrame will contain raw timestamps.
+ exclude_comments_unknown : bool, optional
+ If True (default), excludes comment and unknown lines from the DataFrame.
+ If False, all lines are included, which may be useful for debugging.
+ Returns
+ -------
+ List[pd.DataFrame]
+ A list of DataFrames, each corresponding to a unique event type.
+ Each DataFrame contains the events of that type, excluding the 'type' column.
+ """
+ df = self.get_events_dataframe(
+ apply_offset=apply_offset,
+ exclude_comments_unknown=exclude_comments_unknown,
+ )
+ return [group.drop(columns=["type"]) for _, group in df.groupby("type")]
+
+ def segment_into_trials(
+ self,
+ trial_start_terms: List[str],
+ trial_end_terms: List[str],
+ time_column: str = "timestamp_sync",
+ ) -> pd.DataFrame:
+ """
+ Segments events from the processed StateScript log DataFrame into trials.
+
+ Identifies trial boundaries based on the presence of specified start and end
+ terms within the 'text' column of the `processed_events_df`.
+
+ Parameters
+ ----------
+ trial_start_terms : List[str]
+ List of strings found in the 'text' column that mark the start of a trial.
+ The event containing the start term *is* the start of the trial.
+ trial_end_terms : List[str]
+ List of strings found in the 'text' column that mark the end of a trial.
+ The event containing the end term *is* the end of the trial.
+ Can overlap with trial_start_terms.
+ time_column : str, optional
+ The name of the time column in `processed_events_df` to use for
+ reporting trial start and end times. Common choices are 'timestamp_sync'
+ (if offset calculated) or 'trodes_timestamp_sec'. Defaults to 'timestamp_sync'.
+
+ Returns
+ -------
+ pd.DataFrame
+ A DataFrame where each row represents a detected trial. Columns include:
+ - 'start_time': The timestamp (from `time_column`) of the event marking the trial start.
+ - 'stop_time': The timestamp (from `time_column`) of the event marking the trial end.
+ - 'status': String indicating if the trial was 'complete' or 'incomplete'
+ (if the log ended before an end term was found).
+ Returns an empty DataFrame if no trials are found or if the required
+ columns ('text', `time_column`) are missing from `processed_events_df`.
+
+ Notes
+ -----
+ - Requires `processed_events_df` to be generated first (e.g., by calling
+ `get_events_dataframe`). If it's None, this method will attempt to generate it
+ with default settings (apply_offset=True, exclude_comments_unknown=True).
+ - Assumes trials are sequential and non-overlapping based on the first occurrence
+ of start/end terms.
+ - Handles cases where start/end terms overlap (an event can mark both the end
+ of one trial and the start of the next).
+ - Warns if a start term is found while already in a trial (restarts the trial).
+ - Warns if the log ends while a trial is in progress.
+ """
+ # Attempt to generate the df if it doesn't exist
+ if self.processed_events_df is None:
+ print(
+ "Warning: processed_events_df not found. Generating with default settings."
+ )
+ self.get_events_dataframe() # Use defaults: apply_offset=True, exclude=True
+
+ events_df = self.processed_events_df # Use the potentially newly generated df
+
+ # Check if DataFrame is valid and contains necessary columns
+ if events_df is None or events_df.empty:
+ print("Error: No processed events DataFrame available to segment.")
+ return pd.DataFrame(
+ columns=["start_time", "stop_time", "status"]
+ ) # Return empty DF
+
+ if "text" not in events_df.columns or time_column not in events_df.columns:
+ print(
+ f"Error: DataFrame must contain 'text' and '{time_column}' columns for segmentation."
+ )
+ return pd.DataFrame(
+ columns=["start_time", "stop_time", "status"]
+ ) # Return empty DF
+
+ # Lists to store data for the final DataFrame
+ start_times = []
+ stop_times = []
+ statuses = []
+
+ current_trial_start_time = None
+ in_trial = False
+ last_valid_time = (
+ events_df[time_column].dropna().iloc[-1]
+ if not events_df[time_column].dropna().empty
+ else None
+ )
+
+ # Iterate through the DataFrame rows (index is line_num)
+ for index, row in events_df.iterrows():
+ message = row["text"] # Check the 'text' column
+ current_time = row[time_column]
+
+ # Skip rows with missing time in the specified column or missing text
+ if pd.isna(current_time) or pd.isna(message):
+ continue
+
+ # Ensure message is treated as string for 'in' check
+ message_str = str(message)
+
+ # Check if the current message contains any start or end terms
+ # Use a generator expression for slightly better efficiency
+ found_end_term = any(term in message_str for term in trial_end_terms)
+ found_start_term = any(term in message_str for term in trial_start_terms)
+
+ # --- End Trial Logic ---
+ # If we are currently in a trial AND find an end term
+ if in_trial and found_end_term:
+ # Finalize the previous trial by adding its data to the lists
+ start_times.append(current_trial_start_time)
+ stop_times.append(current_time)
+ statuses.append("complete")
+
+ in_trial = False
+ current_trial_start_time = (
+ None # Reset start time for the next potential trial
+ )
+
+ # --- Start Trial Logic ---
+ # If we find a start term (this check happens AFTER potential end logic,
+ # allowing an event to end a trial and immediately start the next one)
+ if found_start_term:
+ # If we were NOT previously in a trial, this starts a new one
+ if not in_trial:
+ in_trial = True
+ current_trial_start_time = current_time
+ # If we *were* already in a trial (e.g., two start terms without an end term),
+ # log a warning and restart the trial timer from the current event.
+ else:
+ print(
+ f"Warning (Line {index}): Found start term '{message_str}' at {current_time} "
+ f"while already in a trial started at {current_trial_start_time}. Restarting trial timer."
+ )
+ # Effectively ends the previous (implicit) trial and starts new one
+ current_trial_start_time = current_time
+
+ # --- Handle Incomplete Trial at End of Log ---
+ # If the loop finishes and we are still marked as 'in_trial'
+ if in_trial:
+ print(
+ f"Warning: Log processing ended while still in a trial that started at {current_trial_start_time}. "
+ f"Marking as incomplete."
+ )
+ # Add the incomplete trial to the lists
+ start_times.append(current_trial_start_time)
+ # Use the time of the last valid event in the time column as the end time
+ stop_times.append(
+ last_valid_time if last_valid_time is not None else np.nan
+ )
+ statuses.append("incomplete")
+
+ # --- Create Final DataFrame ---
+ # Construct the DataFrame from the collected lists
+ trials_df = pd.DataFrame(
+ {"start_time": start_times, "stop_time": stop_times, "status": statuses}
+ )
+
+ # Ensure correct dtypes (start/end times should match time_column, status is object)
+ if not trials_df.empty:
+ trials_df["start_time"] = trials_df["start_time"].astype(
+ events_df[time_column].dtype
+ )
+ trials_df["stop_time"] = trials_df["stop_time"].astype(
+ events_df[time_column].dtype
+ )
+ trials_df["status"] = trials_df["status"].astype(
+ "object"
+ ) # String/object type
+
+ return trials_df
diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
new file mode 100644
index 0000000..47838c3
--- /dev/null
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -0,0 +1,771 @@
+import os
+import pathlib
+import tempfile
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from trodes_to_nwb.convert_statescript import (
+ StateScriptLogProcessor,
+ _interpret_DIO_mask,
+ _parse_int,
+ parse_statescript_line,
+ parse_ts_int_int,
+ parse_ts_str,
+ parse_ts_str_equals_int,
+ parse_ts_str_int,
+)
+
+# --- Fixtures ---
+
+
+@pytest.fixture(scope="module")
+def sample_log_content():
+ """Provides sample log content for general testing."""
+ return """# Test log started
+76504 0 0
+76566 center_poke
+76566 65536 0
+100078 counter_handlePoke = 1
+100078 4 0
+100559 LEFT_PORT 1
+Executing this line without timestamp
+115030 center_poke
+115030 65536 0
+115040 0 0
+# Test log ended
+"""
+
+
+@pytest.fixture(scope="module")
+def empty_log_content():
+ """Provides empty log content."""
+ return ""
+
+
+@pytest.fixture(scope="module")
+def comment_only_log_content():
+ """Provides log content with only comments and whitespace."""
+ return """# Start
+# Middle line
+
+# End
+"""
+
+
+@pytest.fixture
+def processor(sample_log_content):
+ """Provides a processor instance initialized with standard sample content."""
+ return StateScriptLogProcessor(sample_log_content, source_info="from string")
+
+
+@pytest.fixture
+def empty_processor(empty_log_content):
+ """Provides a processor instance initialized with empty content."""
+ return StateScriptLogProcessor(empty_log_content, source_info="empty string")
+
+
+@pytest.fixture
+def comment_only_processor(comment_only_log_content):
+ """Provides a processor instance initialized with only comments."""
+ return StateScriptLogProcessor(
+ comment_only_log_content, source_info="comments only string"
+ )
+
+
+@pytest.fixture(scope="module")
+def external_times():
+ """Provides sample external times for offset calculation tests."""
+ # These correspond to the '65536 0' events (ts_int_int) in sample_log_content
+ # 76566 ms -> 76.566 s
+ # 115030 ms -> 115.030 s
+ # Let's assume a base time (e.g., Unix timestamp) for the external system
+ base_time = 1678880000.0
+ return np.array([base_time + 76.566, base_time + 115.030])
+
+
+@pytest.fixture(scope="module")
+def external_times_for_str_int():
+ """Provides sample external times for offset calculation tests using ts_str_int."""
+ # These correspond to the 'LEFT_PORT 1' event in sample_log_content
+ # 100559 ms -> 100.559 s
+ base_time = 1678880000.0
+ return np.array(
+ [
+ base_time + 100.559,
+ base_time + 110.0,
+ base_time + 120.0,
+ base_time + 130.0,
+ ]
+ )
+
+
+@pytest.fixture
+def temp_log_file(sample_log_content):
+ """Creates a temporary log file with standard content and yields its path."""
+ with tempfile.NamedTemporaryFile(
+ mode="w", delete=False, suffix=".stateScriptLog", encoding="utf-8"
+ ) as tmp_file:
+ tmp_file.write(sample_log_content)
+ tmp_file_path = tmp_file.name
+ yield pathlib.Path(tmp_file_path)
+ os.remove(tmp_file_path)
+
+
+# --- Tests for Level 0 Helpers ---
+
+
+def test_parse_int():
+ """Test the _parse_int helper function."""
+ assert _parse_int("123") == 123
+ assert _parse_int("-45") == -45
+ assert _parse_int("0") == 0
+ assert _parse_int("abc") is None
+ assert _parse_int("12.3") is None
+ assert _parse_int("") is None
+
+
+def test_interpret_dio_mask():
+ """Test the _interpret_DIO_mask function."""
+ assert _interpret_DIO_mask(9, max_DIOs=8) == [1, 4] # Binary 1001
+ assert _interpret_DIO_mask(0) == []
+ assert _interpret_DIO_mask(None) == []
+ assert _interpret_DIO_mask(pd.NA) == []
+ assert _interpret_DIO_mask(1) == [1]
+ assert _interpret_DIO_mask(65536, max_DIOs=32) == [17] # 2^16
+ assert _interpret_DIO_mask(65535, max_DIOs=16) == list(
+ range(1, 17)
+ ) # All 16 bits set
+ assert _interpret_DIO_mask(65535, max_DIOs=32) == list(
+ range(1, 17)
+ ) # Check max_DIOs limit
+ assert _interpret_DIO_mask("abc") == [] # Invalid input type
+
+
+# --- Tests for Level 1 Parsers ---
+
+
+def test_parse_ts_int_int():
+ """Test parse_ts_int_int directly."""
+ parts = ["8386500", "0", "0"]
+ expected = {
+ "type": "ts_int_int",
+ "timestamp": 8386500,
+ "value1": 0,
+ "value2": 0,
+ }
+ assert parse_ts_int_int(parts) == expected
+
+ parts_wrong_len = ["123", "0"]
+ assert parse_ts_int_int(parts_wrong_len) is None
+
+ parts_not_int = ["123", "abc", "0"]
+ assert parse_ts_int_int(parts_not_int) is None
+
+ parts_float = ["123", "4.5", "0"]
+ assert parse_ts_int_int(parts_float) is None
+
+
+def test_parse_ts_str_int():
+ """Test parse_ts_str_int directly."""
+ parts = ["8386500", "DOWN", "3"]
+ expected = {
+ "type": "ts_str_int",
+ "timestamp": 8386500,
+ "text": "DOWN",
+ "value": 3,
+ }
+ assert parse_ts_str_int(parts) == expected
+
+ parts_wrong_len = ["123", "UP"]
+ assert parse_ts_str_int(parts_wrong_len) is None
+
+ # This should be parsed by parse_ts_int_int due to precedence,
+ # so parse_ts_str_int should return None here because str part is int.
+ parts_str_is_int = ["123", "456", "789"]
+ assert parse_ts_str_int(parts_str_is_int) is None
+
+ parts_val_not_int = ["123", "UP", "abc"]
+ assert parse_ts_str_int(parts_val_not_int) is None
+
+
+def test_parse_ts_str_equals_int():
+ """Test parse_ts_str_equals_int directly.
+ NOTE: The code only handles a single word before '='.
+ """
+ parts = ["100078", "counter_handlePoke", "=", "1"]
+ expected = {
+ "type": "ts_str_equals_int",
+ "timestamp": 100078, # Raw timestamp key
+ "text": "counter_handlePoke", # Correctly uses parts[1]
+ "value": 1,
+ }
+ assert parse_ts_str_equals_int(parts) == expected
+
+ # This case is NOT handled by the current implementation (len(parts) != 4)
+ parts_multi_word = ["3610855", "total", "rewards", "=", "70"]
+ assert parse_ts_str_equals_int(parts_multi_word) is None
+
+ parts_wrong_len = ["123", "=", "1"]
+ assert parse_ts_str_equals_int(parts_wrong_len) is None
+
+ parts_no_equals = ["123", "text", "1"] # len=3 != 4
+ assert parse_ts_str_equals_int(parts_no_equals) is None
+
+ parts_wrong_equals_pos = ["123", "text", "1", "="] # '=' is parts[3], not parts[2]
+ assert parse_ts_str_equals_int(parts_wrong_equals_pos) is None
+
+ parts_val_not_int = ["123", "text", "=", "abc"]
+ assert parse_ts_str_equals_int(parts_val_not_int) is None
+
+
+def test_parse_ts_str():
+ """Test parse_ts_str directly."""
+ parts = ["76566", "center_poke"]
+ expected = {
+ "type": "ts_str",
+ "timestamp": 76566,
+ "text": "center_poke",
+ }
+ assert parse_ts_str(parts) == expected
+
+ parts_multi_word = ["1271815", "some", "multi", "word", "event"]
+ expected_multi = {
+ "type": "ts_str",
+ "timestamp": 1271815,
+ "text": "some multi word event",
+ }
+ assert parse_ts_str(parts_multi_word) == expected_multi
+
+ parts_wrong_len = ["123"]
+ assert parse_ts_str(parts_wrong_len) is None
+
+ # Second part is int, should fail this parser (handled by ts_int_int or ts_str_int)
+ parts_second_is_int = ["123", "456"]
+ assert parse_ts_str(parts_second_is_int) is None
+
+
+# --- Tests for parse_statescript_line (Covers integration and dispatching) ---
+
+
+def test_parse_statescript_line_dispatching():
+ """Test parse_statescript_line dispatching for various line types."""
+ lines_expected = [
+ ("8386500 0 0", "ts_int_int", 8386500),
+ ("100559 LEFT_PORT 1", "ts_str_int", 100559),
+ ("100078 counter_handlePoke = 1", "ts_str_equals_int", 100078),
+ ("76566 center_poke", "ts_str", 76566),
+ ("Executing trigger function 22", "unknown", None), # No timestamp
+ ("# comment", "comment_or_empty", None),
+ ("", "comment_or_empty", None),
+ (" ", "comment_or_empty", None),
+ ("123 456 abc", "unknown", None), # Doesn't fit ts_int_int/ts_str_int/ts_str
+ ("123 abc def", "ts_str", 123), # Fits ts_str
+ # Precedence: ts_str_equals_int matches first
+ ("456 text = 5", "ts_str_equals_int", 456),
+ # Precedence: ts_int_int matches first
+ ("8386500 128 512", "ts_int_int", 8386500),
+ # Precedence: ts_str_int matches (str 'UP' is not int)
+ ("90000 UP 10", "ts_str_int", 90000),
+ # Precedence: ts_str matches (str 'some text' is not int)
+ ("95000 some text here", "ts_str", 95000),
+ ]
+
+ for i, (line, expected_type, expected_ts) in enumerate(lines_expected):
+ parsed = parse_statescript_line(line, line_num=i)
+ assert parsed["type"] == expected_type, f"Line: {line}"
+ assert parsed["raw_line"] == line.strip(), f"Line: {line}"
+ assert parsed["line_num"] == i, f"Line: {line}"
+ # Check timestamp presence/value based on type
+ if expected_type not in ["unknown", "comment_or_empty"]:
+ assert "timestamp" in parsed, f"Line: {line}"
+ assert parsed["timestamp"] == expected_ts, f"Line: {line}"
+ else:
+ # Should explicitly contain timestamp: None for these types
+ assert parsed.get("timestamp") is None, f"Line: {line}"
+
+
+# --- Tests for StateScriptLogProcessor ---
+
+
+def test_init_from_string(processor, sample_log_content):
+ """Test initialization from string."""
+ assert processor.log_content == sample_log_content
+ assert processor.source_description == "from string"
+ assert processor.raw_events == []
+ assert processor.time_offset is None
+ assert processor.processed_events_df is None
+
+
+def test_init_from_file(temp_log_file, sample_log_content):
+ """Test initialization from a file."""
+ processor_file = StateScriptLogProcessor.from_file(temp_log_file)
+ assert processor_file.log_content == sample_log_content
+ assert processor_file.source_description.startswith("from file:")
+ assert temp_log_file.name in processor_file.source_description
+
+
+def test_init_from_file_not_found():
+ """Test initialization from a non-existent file raises error."""
+ with pytest.raises(FileNotFoundError):
+ StateScriptLogProcessor.from_file("non_existent_file_qwerty.log")
+
+
+def test_parse_raw_events(processor, sample_log_content):
+ """Test parsing the raw log content into events."""
+ events = processor.parse_raw_events()
+ assert processor.raw_events is events # Should store result internally
+ assert isinstance(events, list)
+ # Count lines in the fixture (includes comments, blanks if any)
+ num_lines = len(sample_log_content.strip().splitlines())
+ assert len(events) == num_lines
+
+ # Check specific lines based on fixture content
+ # Line 0: # Test log started
+ assert events[0]["type"] == "comment_or_empty"
+ assert events[0]["line_num"] == 0
+ assert events[0]["timestamp"] is None
+ # Line 1: 76504 0 0
+ assert events[1]["type"] == "ts_int_int"
+ assert events[1]["timestamp"] == 76504
+ assert events[1]["value1"] == 0
+ assert events[1]["line_num"] == 1
+ assert events[1]["raw_line"] == "76504 0 0"
+ # Line 7: Executing this line without timestamp
+ assert events[7]["type"] == "unknown"
+ assert events[7]["raw_line"] == "Executing this line without timestamp"
+ assert events[7]["line_num"] == 7
+ assert events[7]["timestamp"] is None
+ # Line 11: # Test log ended
+ assert events[11]["type"] == "comment_or_empty"
+ assert events[11]["line_num"] == 11
+ assert events[11]["timestamp"] is None
+
+
+def test_find_reference_events(processor):
+ """Test the internal _find_reference_events method."""
+ # Case 1: Find 'ts_str' events ('center_poke' appears twice)
+ ref_df_str = processor._find_reference_events(
+ event_type="ts_str", conditions={"text": "center_poke"}
+ )
+ assert isinstance(ref_df_str, pd.DataFrame)
+ assert len(ref_df_str) == 2
+ # Check raw timestamp column (renamed from 'timestamp' in raw_events)
+ pd.testing.assert_series_equal(
+ ref_df_str["timestamp"], # Raw integer timestamp
+ pd.Series([76566, 115030], name="timestamp", dtype=int),
+ check_names=True,
+ check_dtype=True,
+ )
+ # Check calculated seconds column
+ assert "trodes_timestamp_sec" in ref_df_str.columns
+ pd.testing.assert_series_equal(
+ ref_df_str["trodes_timestamp_sec"],
+ pd.Series([76.566, 115.030], name="trodes_timestamp_sec", dtype=float),
+ check_names=True,
+ check_dtype=True,
+ )
+ assert ref_df_str["text"].tolist() == ["center_poke", "center_poke"]
+
+ # Case 2: Find 'ts_int_int' events with specific values (appears twice)
+ ref_df_int = processor._find_reference_events(
+ event_type="ts_int_int", conditions={"value1": 65536, "value2": 0}
+ )
+ assert len(ref_df_int) == 2
+ assert ref_df_int["timestamp"].tolist() == [76566, 115030]
+ assert ref_df_int["value1"].tolist() == [65536, 65536]
+ assert ref_df_int["value2"].tolist() == [0, 0]
+ assert ref_df_int["trodes_timestamp_sec"].tolist() == [76.566, 115.030]
+
+ # Case 3: Find 'ts_str_equals_int' (appears once)
+ ref_df_eq = processor._find_reference_events(
+ event_type="ts_str_equals_int", conditions={"text": "counter_handlePoke"}
+ )
+ assert len(ref_df_eq) == 1
+ assert ref_df_eq["timestamp"].iloc[0] == 100078
+ assert ref_df_eq["text"].iloc[0] == "counter_handlePoke"
+ assert ref_df_eq["value"].iloc[0] == 1
+ assert ref_df_eq["trodes_timestamp_sec"].iloc[0] == pytest.approx(100.078)
+
+ # Case 4: No matching events found
+ ref_df_none = processor._find_reference_events(
+ event_type="ts_str", conditions={"text": "nonexistent"}
+ )
+ assert ref_df_none.empty
+ assert isinstance(ref_df_none, pd.DataFrame) # Should still return DF
+ # Check expected columns exist even if empty
+ assert "trodes_timestamp" in ref_df_none.columns
+ assert "trodes_timestamp_sec" in ref_df_none.columns
+ assert "text" in ref_df_none.columns # From conditions
+
+ # Case 5: Ensure processor parses if raw_events is empty
+ processor.raw_events = [] # Reset raw events
+ assert processor.raw_events == []
+ ref_df_reparse = processor._find_reference_events(
+ event_type="ts_str", conditions={"text": "center_poke"}
+ )
+ assert len(processor.raw_events) > 0 # Should have re-parsed
+ assert len(ref_df_reparse) == 2 # Should find the events
+
+
+def test_calculate_time_offset_success(processor, external_times):
+ """Test successful time offset calculation."""
+ # Use the 'ts_int_int' events matching external_times fixture
+ offset = processor.calculate_time_offset(
+ external_reference_times=external_times,
+ log_event_type="ts_int_int",
+ # Use the keys from the raw parsed dict ('value1', 'value2')
+ log_event_conditions={"value1": 65536, "value2": 0},
+ check_n_events=2, # Use both available matching events
+ )
+ assert offset is not None
+ assert processor.time_offset == offset # Check internal storage
+ # Expected offset = external_base_time = 1678880000.0
+ # external_times[0] = base + 76.566; log_times_sec[0] = 76.566
+ # offset = external - log = base
+ assert offset == pytest.approx(1678880000.0)
+
+
+def test_calculate_time_offset_fail_not_enough_log(
+ processor, external_times_for_str_int
+):
+ """Test offset calculation failure due to insufficient log events."""
+ # 'LEFT_PORT 1' only appears once in the log, but default check_n_events=4
+ offset = processor.calculate_time_offset(
+ external_reference_times=external_times_for_str_int, # Has 4 times
+ log_event_type="ts_str_int",
+ log_event_conditions={"text": "LEFT_PORT", "value": 1},
+ # check_n_events=4, # Default
+ )
+ assert offset is None
+ assert processor.time_offset is None # Should remain None
+
+
+def test_calculate_time_offset_fail_not_enough_external(processor):
+ """Test offset calculation failure due to insufficient external times."""
+ # Log has 2 '65536 0' events, provide only 1 external time, default check=4
+ offset = processor.calculate_time_offset(
+ external_reference_times=np.array([1678880076.566]), # Only 1 time
+ log_event_type="ts_int_int",
+ log_event_conditions={"value1": 65536, "value2": 0},
+ # check_n_events=4, # Default
+ )
+ assert offset is None
+ assert processor.time_offset is None
+
+ # Test again with check_n_events=2 (should still fail, need 2 external)
+ offset_check2 = processor.calculate_time_offset(
+ external_reference_times=np.array([1678880076.566]), # Only 1 time
+ log_event_type="ts_int_int",
+ log_event_conditions={"value1": 65536, "value2": 0},
+ check_n_events=2,
+ )
+ assert offset_check2 is None
+ assert processor.time_offset is None
+
+
+def test_calculate_time_offset_fail_mismatch(processor, external_times):
+ """Test offset calculation failure due to exceeding mismatch threshold."""
+ # Shift external times enough to exceed default threshold (0.1) on
+ # the second event
+ shifted_external_times = external_times
+ # External times are not a good stable reference
+ # because the second one is shifted
+ shifted_external_times[1] += 0.2 # Shift the second time by 0.2 seconds
+ offset = processor.calculate_time_offset(
+ external_reference_times=shifted_external_times,
+ log_event_type="ts_int_int",
+ log_event_conditions={"value1": 65536, "value2": 0},
+ check_n_events=2,
+ match_threshold=0.1, # Explicitly set default for clarity
+ )
+ assert offset is None
+ assert processor.time_offset is None
+
+
+def test_get_events_dataframe_defaults(processor):
+ """Test default behavior: exclude comments/unknown, no offset applied yet."""
+ df = processor.get_events_dataframe(apply_offset=False)
+ assert processor.processed_events_df is df # Check internal storage
+ assert isinstance(df, pd.DataFrame)
+ # Expected: 12 lines total - 2 comments - 1 unknown = 9 valid events
+ assert len(df) == 9
+ assert df.index.name == "line_num" # Index should be line_num
+
+ # --- Check Columns ---
+ assert "raw_line" in df.columns
+ assert "type" in df.columns
+ assert "trodes_timestamp" in df.columns
+ assert "trodes_timestamp_sec" in df.columns
+ assert "text" in df.columns
+ assert "value" in df.columns
+ assert "active_DIO_inputs_bitmask" in df.columns
+ assert "active_DIO_outputs_bitmask" in df.columns
+ assert "active_DIO_inputs" in df.columns # List column
+ assert "active_DIO_outputs" in df.columns # List column
+ assert "timestamp_sync" not in df.columns # Offset not applied
+
+ # --- Check Content and Types (spot check first few rows) ---
+ # Row index corresponds to line_num
+ # Line 1: 76504 0 0 (type: ts_int_int) -> line_num 1
+ assert df.loc[1, "type"] == "ts_int_int"
+ assert df.loc[1, "raw_line"] == "76504 0 0"
+ assert df.loc[1, "trodes_timestamp"] == 76504
+ assert df.loc[1, "trodes_timestamp_sec"] == pytest.approx(76.504)
+ assert pd.isna(df.loc[1, "text"])
+ assert pd.isna(df.loc[1, "value"])
+ assert df.loc[1, "active_DIO_inputs_bitmask"] == 0
+ assert df.loc[1, "active_DIO_outputs_bitmask"] == 0
+ assert df.loc[1, "active_DIO_inputs"] == []
+ assert df.loc[1, "active_DIO_outputs"] == []
+
+ # Line 2: 76566 center_poke (type: ts_str) -> line_num 2
+ assert df.loc[2, "type"] == "ts_str"
+ assert df.loc[2, "trodes_timestamp"] == 76566
+ assert df.loc[2, "text"] == "center_poke"
+ assert pd.isna(df.loc[2, "value"])
+ assert pd.isna(df.loc[2, "active_DIO_inputs_bitmask"])
+ assert pd.isna(df.loc[2, "active_DIO_outputs_bitmask"])
+ assert df.loc[2, "active_DIO_inputs"] == [] # Should be empty list from NA mask
+ assert df.loc[2, "active_DIO_outputs"] == [] # Should be empty list from NA mask
+
+ # Line 3: 76566 65536 0 (type: ts_int_int) -> line_num 3
+ assert df.loc[3, "type"] == "ts_int_int"
+ assert df.loc[3, "trodes_timestamp"] == 76566
+ assert df.loc[3, "active_DIO_inputs_bitmask"] == 65536 # DIO 17
+ assert df.loc[3, "active_DIO_outputs_bitmask"] == 0
+ assert df.loc[3, "active_DIO_inputs"] == [17] # Check interpretation
+ assert df.loc[3, "active_DIO_outputs"] == []
+
+ # Line 4: 100078 counter_handlePoke = 1 (type: ts_str_equals_int) -> line_num 4
+ assert df.loc[4, "type"] == "ts_str_equals_int"
+ assert df.loc[4, "trodes_timestamp"] == 100078
+ assert df.loc[4, "text"] == "counter_handlePoke"
+ assert df.loc[4, "value"] == 1
+ assert pd.isna(df.loc[4, "active_DIO_inputs_bitmask"])
+
+ # Line 6: 100559 LEFT_PORT 1 (type: ts_str_int) -> line_num 6
+ assert df.loc[6, "type"] == "ts_str_int"
+ assert df.loc[6, "trodes_timestamp"] == 100559
+ assert df.loc[6, "text"] == "LEFT_PORT"
+ assert df.loc[6, "value"] == 1
+ assert pd.isna(df.loc[6, "active_DIO_inputs_bitmask"])
+
+ # --- Check Dtypes ---
+ assert df["trodes_timestamp"].dtype == pd.Int64Dtype() # Nullable int
+ assert df["trodes_timestamp_sec"].dtype == "float64"
+ assert df["text"].dtype == "object" # String/mixed
+ assert df["value"].dtype == pd.Int64Dtype()
+ assert df["active_DIO_inputs_bitmask"].dtype == pd.Int64Dtype()
+ assert df["active_DIO_outputs_bitmask"].dtype == pd.Int64Dtype()
+ assert df["active_DIO_inputs"].dtype == "object" # List type
+ assert df["active_DIO_outputs"].dtype == "object" # List type
+
+
+def test_get_events_dataframe_include_all(processor, sample_log_content):
+ """Test including comments and unknown lines."""
+ df = processor.get_events_dataframe(
+ apply_offset=False, exclude_comments_unknown=False
+ )
+ assert isinstance(df, pd.DataFrame)
+ num_lines = len(sample_log_content.strip().splitlines())
+ assert len(df) == num_lines # All lines included (12)
+ assert df.index.name == "line_num"
+
+ # Check specific lines
+ # Line 0: Comment
+ assert df.loc[0, "type"] == "comment_or_empty"
+ assert df.loc[0, "raw_line"] == "# Test log started"
+ assert pd.isna(df.loc[0, "trodes_timestamp"]) # Should be NA (Int64Dtype)
+ assert np.isnan(df.loc[0, "trodes_timestamp_sec"]) # Should be NaN (float)
+ assert pd.isna(df.loc[0, "text"]) # Should be NA
+ assert df.loc[0, "active_DIO_inputs"] == [] # Should be empty list for comment
+
+ # Line 7: Unknown
+ assert df.loc[7, "type"] == "unknown"
+ assert df.loc[7, "raw_line"] == "Executing this line without timestamp"
+ assert pd.isna(df.loc[7, "trodes_timestamp"])
+ assert np.isnan(df.loc[7, "trodes_timestamp_sec"])
+ assert pd.isna(df.loc[7, "text"])
+ assert df.loc[7, "active_DIO_inputs"] == []
+
+ # Line 11: Comment
+ assert df.loc[11, "type"] == "comment_or_empty"
+ assert df.loc[11, "raw_line"] == "# Test log ended"
+ assert pd.isna(df.loc[11, "trodes_timestamp"])
+
+ # Check a valid line still looks right
+ assert df.loc[1, "type"] == "ts_int_int"
+ assert df.loc[1, "trodes_timestamp"] == 76504
+
+
+def test_get_events_dataframe_with_offset(processor):
+ """Test applying offset and check sync timestamp calculation."""
+ # Simulate successful offset calculation
+ test_offset = 1678880000.0
+ processor.time_offset = test_offset
+ df = processor.get_events_dataframe(apply_offset=True) # Default exclude=True
+ assert isinstance(df, pd.DataFrame)
+ assert len(df) == 9 # Excludes comments/unknown
+ assert df.index.name == "line_num"
+ assert "timestamp_sync" in df.columns
+ assert df["timestamp_sync"].dtype == "float64"
+
+ # Check calculation for a few events
+ # Line 1: 76504 ms
+ expected_sync_1 = (76504 / 1000.0) + test_offset
+ assert df.loc[1, "timestamp_sync"] == pytest.approx(expected_sync_1)
+
+ # Line 3: 76566 ms
+ expected_sync_3 = (76566 / 1000.0) + test_offset
+ assert df.loc[3, "timestamp_sync"] == pytest.approx(expected_sync_3)
+
+ # Line 9: 115030 ms
+ expected_sync_9 = (115030 / 1000.0) + test_offset
+ assert df.loc[9, "timestamp_sync"] == pytest.approx(expected_sync_9)
+
+ # Check NA value handling in other columns remains correct
+ assert pd.isna(df.loc[1, "text"])
+ assert df.loc[1, "active_DIO_inputs_bitmask"] == 0
+ assert df.loc[3, "active_DIO_inputs"] == [17]
+
+
+def test_get_events_dataframe_apply_offset_not_calculated(processor, capsys):
+ """Test applying offset when offset is None generates warning and no column."""
+ processor.time_offset = None # Ensure no offset is set
+ df = processor.get_events_dataframe(apply_offset=True) # Request offset application
+ assert isinstance(df, pd.DataFrame)
+ assert "timestamp_sync" not in df.columns # Sync column should be absent
+ assert len(df) == 9 # Should still return the dataframe without the column
+ assert df.index.name == "line_num"
+
+ # Check that the warning was printed
+ captured = capsys.readouterr()
+ assert (
+ "Warning: Time offset application requested" in captured.out
+ or "Warning: Time offset application requested" in captured.err
+ )
+
+
+def test_get_events_dataframe_no_apply_offset_calculated(processor):
+ """Test apply_offset=False ignores existing offset."""
+ processor.time_offset = 1000.0 # Set an offset
+ df = processor.get_events_dataframe(
+ apply_offset=False
+ ) # Request NO offset application
+ assert isinstance(df, pd.DataFrame)
+ assert "timestamp_sync" not in df.columns # Sync column should be absent
+ assert len(df) == 9
+ assert df.index.name == "line_num"
+
+
+def test_empty_log(empty_processor):
+ """Test processing an empty log file."""
+ events = empty_processor.parse_raw_events()
+ assert events == []
+ df = empty_processor.get_events_dataframe()
+ assert isinstance(df, pd.DataFrame)
+ assert df.empty
+ # An empty dataframe doesn't have an index name set
+ assert df.index.name is None
+
+
+def test_comment_only_log(comment_only_processor):
+ """Test processing a log file with only comments/whitespace."""
+ events = comment_only_processor.parse_raw_events()
+ assert len(events) == 4 # 4 lines in the fixture
+ assert all(e["type"] == "comment_or_empty" for e in events)
+ assert all(e["timestamp"] is None for e in events)
+
+ # Default: exclude comments -> empty DataFrame
+ df_excluded = comment_only_processor.get_events_dataframe(apply_offset=False)
+ assert isinstance(df_excluded, pd.DataFrame)
+ assert df_excluded.empty
+ assert df_excluded.index.name is None
+
+ # Include comments -> DataFrame with only comment entries
+ df_included = comment_only_processor.get_events_dataframe(
+ apply_offset=False, exclude_comments_unknown=False
+ )
+ assert isinstance(df_included, pd.DataFrame)
+ assert len(df_included) == 4
+ assert df_included.index.name == "line_num"
+ assert all(df_included["type"] == "comment_or_empty")
+ assert df_included["trodes_timestamp"].isna().all()
+ assert df_included["trodes_timestamp_sec"].isna().all()
+
+
+def test_repr(processor):
+ """Test the __repr__ method reflects state."""
+ # Initial state
+ initial_repr = repr(processor)
+ assert isinstance(initial_repr, str)
+ assert "StateScriptLogProcessor" in html_initial
+ assert "Status: Not Parsed" in html_initial
+ assert "Offset: Not Calculated" in html_initial
+ assert "DataFrame: Not Generated" in html_initial
+ assert "Source: from string" in html_initial
+ assert "DataFrame Preview" not in html_initial # No preview yet
+
+ # After parsing
+ processor.parse_raw_events()
+ num_raw = len(processor.raw_events)
+ html_parsed = processor._repr_html_()
+ assert isinstance(html_parsed, str)
+ assert "Status: Parsed" in html_parsed
+ assert f"({num_raw} raw entries)" in html_parsed
+ assert "Offset: Not Calculated" in html_parsed
+ assert "DataFrame: Not Generated" in html_parsed
+
+ # After offset calculation
+ processor.time_offset = 1234.5678
+ html_offset = processor._repr_html_()
+ assert isinstance(html_offset, str)
+ assert "Offset: 1234.5678s" in html_offset # Check formatting
+ assert "DataFrame: Not Generated" in html_offset
+
+ # After DataFrame generation
+ processor.get_events_dataframe()
+ html_df = processor._repr_html_()
+ assert isinstance(html_df, str)
+ assert "DataFrame: Generated" in html_df
+ assert (
+ "DataFrame Preview (first 5 rows):
" in html_df
+ ) # Check for preview section
+ assert "