From 8009edf8064ac359c17aa547abec2924776814b1 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 11:49:21 -0400
Subject: [PATCH 01/23] Add statescript parsing

---
 src/trodes_to_nwb/convert_dios.py             |    1 +
 src/trodes_to_nwb/convert_statescript.py      | 1123 +++++++++++++++++
 .../tests/test_convert_statescript.py         |  544 ++++++++
 3 files changed, 1668 insertions(+)
 create mode 100644 src/trodes_to_nwb/convert_statescript.py
 create mode 100644 src/trodes_to_nwb/tests/test_convert_statescript.py
diff --git a/src/trodes_to_nwb/convert_dios.py b/src/trodes_to_nwb/convert_dios.py
index e532cae..97c57a8 100644
--- a/src/trodes_to_nwb/convert_dios.py
+++ b/src/trodes_to_nwb/convert_dios.py
@@ -21,6 +21,7 @@ def _get_channel_name_map(metadata: dict) -> dict[str, str]:
     -------
     channel_name_map : dict
         Parsed behavioral events metadata mapping hardware event name to human-readable name
+        {"hardware_event_name": {"name": "human_readable_name", "comments": "comments"}}
     """
     dio_metadata = metadata["behavioral_events"]
     channel_name_map = {}
diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
new file mode 100644
index 0000000..e0ce567
--- /dev/null
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -0,0 +1,1123 @@
+import pathlib
+from typing import Any, Dict, List, Optional, Type, TypeVar, Union
+
+import numpy as np
+import pandas as pd
+
+from .convert_dios import _get_channel_name_map as _get_dio_channel_name_map
+
+T_StateScriptLogProcessor = TypeVar(
+    "T_StateScriptLogProcessor", bound="StateScriptLogProcessor"
+)
+
+
+def _parse_int(s: str) -> Optional[int]:
+    """Attempts to parse a string as an integer.
+
+    Parameters
+    ----------
+    s : str
+        Input string.
+
+    Returns
+    -------
+    Optional[int]
+        The parsed integer, or None if parsing fails.
+
+    Raises
+    ------
+    ValueError
+        If the string cannot be converted to an integer.
+    """
+    try:
+        return int(s)
+    except ValueError:
+        return None
+
+
+def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]:
+    """Parses lines with <timestamp_int> <int> <int> structure.
+
+    This pattern typically represents a timestamp followed by two integer values.
+    These integers are bitwise masks or state values, often used for logging
+    DIO states or other binary values.
+
+    Example:
+        8386500 0 0 -> {'ts': 8386500, 'value1': 0, 'value2': 0}
+        1817158 128 512 -> {'ts': 1817158, 'value1': 128, 'value2': 512}
+        76566 65536 0 -> {'ts': 76566, 'value1': 65536, 'value2': 0}
+
+    Parameters
+    ----------
+    parts : list
+        A list of strings obtained by splitting a log line by whitespace.
+        Expected to contain exactly 3 parts for this pattern.
+
+    Returns
+    -------
+    Optional[Dict[str, Any]]
+        A dictionary containing the parsed data:
+        {'type': 'ts_int_int', 'timestamp': int, 'value1': int, 'value2': int}
+        if the line matches the expected structure and all parts are valid integers.
+        Returns None otherwise.
+    """
+    if len(parts) == 3:
+        # Attempt to parse all three parts as integers
+        timestamp, val1, val2 = [_parse_int(part) for part in parts]
+
+        # Check if all parsing attempts were successful
+        if timestamp is not None and val1 is not None and val2 is not None:
+            return {
+                "type": "ts_int_int",
+                "timestamp": timestamp,
+                "value1": val1,
+                "value2": val2,
+            }
+
+
+def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]:
+    """Parses log lines structured as: <timestamp_int> <str> <int>.
+
+    This pattern consists of a timestamp, a string, and a final
+    integer value. Often used for logging state changes associated
+    with an identifier (e.g., DIO pin state).
+
+    Example:
+        8386500 DOWN 3  -> {'ts': 8386500, 'text': 'DOWN', 'value': 3}
+
+    Interpretation: At timestamp 8386500, the state associated with
+    identifier 3 changed to 'DOWN'.
+
+    Parameters
+    ----------
+    parts : list
+        A list of strings obtained by splitting a log line by whitespace.
+        Expected to contain exactly 3 parts for this pattern.
+
+    Returns
+    -------
+    Optional[Dict[str, Any]]
+        A dictionary containing the parsed data:
+        {'type': 'ts_str_int', 'timestamp': int, 'text': str, 'value': int}
+        if the line matches the structure (int, non-int string, int).
+        Returns None otherwise.
+    """
+    if len(parts) == 3:
+        # Parse the first and third parts as integers
+        timestamp = _parse_int(parts[0])
+        text_part = parts[1]  # Middle part is expected to be text
+        val_int = _parse_int(parts[2])
+
+        # Check conditions: timestamp and value are ints, text part is not an int
+        if (
+            timestamp is not None
+            and _parse_int(text_part) is None
+            and val_int is not None
+        ):
+            return {
+                "type": "ts_str_int",
+                "timestamp": timestamp,
+                "text": text_part,
+                "value": val_int,
+            }
+
+
+def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]:
+    """Parses log lines structured as: <timestamp_int> <str> = <int>.
+
+    This pattern includes a timestamp, followed by one or more strings forming a label,
+    an equals sign, and a final integer value. Used for logging named integer variables.
+
+    Example Log Lines:
+        3610855 totRewards = 70 -> {'ts': 3610855, 'text': 'totRewards', 'value': 70}
+        100078 counter_handlePoke = 1 -> {'ts': 100078, 'text': 'counter_handlePoke', 'value': 1}
+
+    Parameters
+    ----------
+    parts : list
+        A list of strings obtained by splitting a log line by whitespace.
+        Expected to contain 4 parts, with '=' as the second part.
+
+    Returns
+    -------
+    Optional[Dict[str, Any]]
+        A dictionary containing the parsed data:
+        {'type': 'ts_str_equals_int', 'timestamp': int, 'text': str, 'value': int}
+        if the line matches the expected structure (int, string, '=', int).
+        Returns None otherwise.
+    """
+    # Check length and presence of '=' in the correct position
+    if len(parts) == 4 and parts[2] == "=":
+        timestamp = _parse_int(parts[0])
+        value = _parse_int(parts[-1])  # Expect integer value only
+        text = parts[3]
+
+        # Check if timestamp and value were successfully parsed as integers
+        if timestamp is not None and value is not None:
+            return {
+                "type": "ts_str_equals_int",
+                "timestamp": timestamp,
+                "text": text,
+                "value": value,
+            }
+
+
+def parse_ts_str(parts: list) -> Optional[Dict[str, Any]]:
+    """Parses log lines structured as: <timestamp_int> <str...>.
+
+    This pattern represents a timestamp followed by one or more string parts,
+    where the first string part after the timestamp is *not* parseable as an integer.
+    Often used for logging timestamped events or messages.
+
+    Example Log Lines:
+        1678886401 LOCKEND -> {'ts': 1678886401, 'text': 'LOCKEND'}
+        76566 center_poke initiated -> {'ts': 76566, 'text': 'center_poke initiated'}
+
+    Parameters
+    ----------
+    parts : list
+        A list of strings obtained by splitting a log line by whitespace.
+        Expected to contain at least 2 parts.
+
+    Returns
+    -------
+    Optional[Dict[str, Any]]
+        A dictionary containing the parsed data:
+        {'type': 'ts_str', 'timestamp': int, 'text': str}
+        if the line matches the structure (int, non-int string, [optional strings...]).
+        'text' contains the joined string parts after the timestamp.
+        Returns None otherwise.
+    """
+    # Check minimum length
+    if len(parts) >= 2:
+        timestamp = _parse_int(parts[0])
+        # Check if the second part is parseable as an integer
+        first_word_is_int = _parse_int(parts[1]) is not None
+
+        # Proceed only if timestamp is valid AND the second part is NOT an integer
+        if timestamp is not None and not first_word_is_int:
+            # Join all parts after the timestamp
+            text_part = " ".join(parts[1:])
+            return {"type": "ts_str", "timestamp": timestamp, "text": text_part}
+
+
+def parse_statescript_line(line: str, line_num: int = 0) -> Optional[Dict[str, Any]]:
+    """Attempts to parse a single StateScript log line using a set of parsers.
+
+    It tries parsing the line against known structures in a specific order
+    of precedence to handle potentially overlapping patterns:
+    1. `<ts> <str> = <int>` ('ts_str_equals_int')
+    2. `<ts> <int> <int>` ('ts_int_int')
+    3. `<ts> <str> <int>` ('ts_str_int', where <str> is not an int)
+    4. `<ts> <str...>` ('ts_str', where first <str> is not an int)
+
+    Lines starting with '#' or empty lines are marked as 'comment_or_empty'.
+    Lines that do not match any known pattern are marked as 'unknown'.
+
+    Parameters
+    ----------
+    line : str
+        A single line (string) from the StateScript log file.
+    line_num : int, optional
+        The line number in the file (for reference), by default 0.
+
+    Returns
+    -------
+    Dict[str, Any]
+        A dictionary describing the parsed line. It always contains:
+        - 'type': A string indicating the matched pattern
+                  ('ts_str_equals_int', 'ts_int_int', 'ts_str_int', 'ts_str',
+                   'comment_or_empty', 'unknown').
+        - 'raw_line': The original input line string.
+        For successfully parsed types, it includes additional keys like
+        'timestamp', 'text', 'value', 'value1', 'value2' as appropriate.
+    """
+    line = line.strip()
+
+    # Handle comments and empty lines first
+    if not line or line.startswith("#"):
+        return {
+            "type": "comment_or_empty",
+            "raw_line": line,
+            "line_num": line_num,
+            "timestamp": None,
+        }
+
+    # Define the parsing functions in order of desired precedence
+    # More specific patterns should come before more general ones
+    parsers = [
+        parse_ts_str_equals_int,
+        parse_ts_int_int,
+        parse_ts_str_int,
+        parse_ts_str,
+    ]
+    parts = line.split()  # Split line into parts based on whitespace
+
+    # Iterate through parsers and return the first successful match
+    for parser in parsers:
+        parsed = parser(parts)
+        if parsed:
+            # Add the original line to the parsed result
+            parsed["raw_line"] = line
+            parsed["line_num"] = line_num  # Include line number for reference
+            return parsed
+
+    return {
+        "type": "unknown",
+        "raw_line": line,
+        "line_num": line_num,
+        "timestamp": None,
+    }
+
+
+def _interpret_DIO_mask(
+    DIO_state_value: Optional[int], max_DIOs: int = 32
+) -> List[int]:
+    """
+    Interprets an integer value as a bitmask representing active DIOs.
+    Assumes a 1-based DIO numbering system (e.g., bit 0 corresponds to DIO 1).
+
+    For example, if there are 32 DIOs, the integer value 9 (binary 1001)
+    indicates that DIOs 1 and 4 are active (bits 0 and 3 are set).
+
+    If there are 16 DIOs, the integer value 65536 (binary 10000000000000000)
+    indicates that DIO 17 is active (bit 16 is set).
+
+
+    Parameters
+    ----------
+    DIO_state_value : Optional[int]
+        The integer value representing the combined state of multiple ports.
+        Handles None or pandas NA values.
+    max_DIOs : int, optional
+        The maximum port number to check (bits 0 to max_DIOs-1), by default 32.
+
+    Returns
+    -------
+    List[int]
+        A sorted list of 1-based port numbers that are active (bit is set).
+        Returns an empty list if the value is 0, None, or NA.
+
+    Example
+    -------
+    >>> interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4
+    [1, 4]
+    >>> interpret_DIO_mask(65536) # 2^16 -> Port 17
+    [17]
+    """
+    if pd.isna(DIO_state_value) or DIO_state_value == 0:
+        return []
+
+    # Ensure value is treated as an integer after NA check
+    try:
+        DIO_state_value = int(DIO_state_value)
+    except (ValueError, TypeError):
+        # Should not happen if input is from Int64Dtype column after NA check,
+        # but included for robustness if called directly with invalid input.
+        return []
+
+    # Create bit masks for positions 0 to max_DIOs-1
+    # E.g., [1, 2, 4, 8, ...]
+    bit_masks = np.left_shift(1, np.arange(max_DIOs))
+
+    # Check which bits are set in the input value using bitwise AND
+    active_bits_mask = np.bitwise_and(DIO_state_value, bit_masks) > 0
+
+    # Get the 0-based indices (bit positions) where bits are active
+    active_indices = np.where(active_bits_mask)[0]
+
+    # Convert 0-based indices to 1-based DIO numbers and return as a list
+    active_ports = (active_indices + 1).tolist()
+
+    # np.where returns sorted indices, so list is already sorted
+    return active_ports
+
+
+# -- Main Class for Processing StateScript Logs --
+class StateScriptLogProcessor:
+    """Processes StateScript log content, handling parsing and time alignment.
+
+    This class reads StateScript log data (either from a file or a string),
+    parses each line into a structured format, converts integer timestamps
+    (assumed to be milliseconds) into seconds, and optionally calculates
+    a time offset to align the log timestamps with an external reference time
+    source (e.g., synchronization pulses recorded by another system).
+
+    Attributes
+    ----------
+    log_content : str
+        The raw string content of the log file.
+    source_description : str
+        Information about where the log content came from (e.g., file path).
+    raw_events : List[Dict[str, Any]]
+        List of dictionaries, one per parsed line from the log content
+        (including comments/unknown lines). Generated by `parse_raw_events`.
+        Timestamps in this list are raw integers from the log.
+    processed_events_df : Optional[pd.DataFrame]
+        DataFrame containing structured event data, typically excluding
+        comments and unknown lines. Generated by `get_events_dataframe`.
+        Includes 'trodes_timestamp_sec' (float, seconds) converted from raw
+        timestamps, and potentially 'timestamp_sync' (float, seconds) if
+        time offset is calculated and applied.
+    time_offset : Optional[float]
+        The calculated time offset in seconds, representing the difference:
+        (external_reference_time_sec - trodes_timestamp_sec).
+        Set by `calculate_time_offset`. If calculated, adding this offset
+        to 'trodes_timestamp_sec' yields the synchronized time ('timestamp_sync').
+
+    Example Usage
+    -------------
+    >>> # Load from file
+    >>> processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog")
+    >>> # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds)
+    >>> # corresponding to the log event "DIO Pin 8 going UP"
+    >>> processor.calculate_time_offset(
+    ...     external_reference_times=external_sync_times,
+    ...     log_event_type="ts_str_int",
+    ...     log_event_conditions={"text": "UP", "value": 8}
+    ... )
+    >>> # Get the processed DataFrame with synchronized timestamps
+    >>> df = processor.get_events_dataframe(apply_offset=True)
+    >>> if df is not None:
+    ...     print(df[['timestamp_sync', 'type', 'text', 'value']].head())
+    """
+
+    MILLISECONDS_PER_SECOND = 1000
+
+    log_content: str
+    source_description: str
+    raw_events: List[Dict[str, Any]]
+    processed_events_df: Optional[pd.DataFrame]
+    time_offset: Optional[float]
+
+    def __init__(self, log_content: str, source_info: str = "from string"):
+        """Initializes the processor with log content and source information.
+
+        Parameters
+        ----------
+        log_content : str
+            The entire content of the state script log as a single string.
+        source_info : str, optional
+            A description of the log content's source (e.g., file path, identifier).
+            Defaults to "from string".
+        """
+        self.log_content = log_content
+        self.source_description = source_info
+
+        # Initialize attributes that will be populated by methods
+        self.raw_events = []
+        self.processed_events_df = None
+        self.time_offset = None
+
+    @classmethod
+    def from_file(
+        cls: Type[T_StateScriptLogProcessor],
+        file_path: Union[str, pathlib.Path],
+    ) -> T_StateScriptLogProcessor:
+        """Creates a StateScriptLogProcessor instance by reading a log file.
+
+        Parameters
+        ----------
+        file_path : Union[str, pathlib.Path]
+            The path to the StateScript log file.
+
+        Returns
+        -------
+        T_StateScriptLogProcessor
+            An instance of the StateScriptLogProcessor initialized with the
+            content of the specified file.
+
+        Raises
+        ------
+        FileNotFoundError
+            If the file specified by `file_path` does not exist.
+        IOError
+            If an error occurs during file reading (e.g., permissions).
+        UnicodeDecodeError
+            If the file cannot be decoded using UTF-8 encoding (with fallback).
+        """
+        file_path = pathlib.Path(file_path)  # Ensure Path object for consistency
+        source_info = f"from file: {file_path}"
+        try:
+            # Read the file content. Using 'surrogateescape' allows reading
+            # potentially mixed/invalid encodings, preserving problematic bytes.
+            # UTF-8 is a common default for logs.
+            content = file_path.read_text(encoding="utf-8", errors="surrogateescape")
+            # Create and return an instance of the class
+            return cls(log_content=content, source_info=source_info)
+        except FileNotFoundError:
+            print(f"Error: File not found at {file_path}")
+            raise  # Re-raise to signal failure
+        except IOError as e:
+            print(f"Error reading file at {file_path}: {e}")
+            raise  # Re-raise
+        except UnicodeDecodeError as e:
+            print(f"Error decoding file {file_path} using utf-8: {e}")
+            print("Consider checking file encoding if errors persist.")
+            raise  # Re-raise
+        except Exception as e:
+            print(f"Unexpected error reading file {file_path}: {e}")
+            raise
+
+    def __repr__(self) -> str:
+        """Provides a concise, unambiguous string representation of the processor.
+
+        Includes information about the source, parsing status, number of raw events,
+        time offset status, and DataFrame generation status.
+
+        Returns
+        -------
+        str
+            String representation of the StateScriptLogProcessor instance.
+        """
+        cls_name = self.__class__.__name__
+        source = self.source_description
+
+        # Describe parsing status
+        if not self.raw_events:
+            parse_status = "not parsed"
+            num_raw = ""
+        else:
+            parse_status = "parsed"
+            num_raw = f", raw_events={len(self.raw_events)}"
+
+        # Describe time offset status
+        offset_status = (
+            f"offset={self.time_offset:.4f}s"
+            if self.time_offset is not None
+            else "no offset calculated"
+        )
+
+        # Describe DataFrame status
+        df_status = (
+            "DataFrame generated"
+            if self.processed_events_df is not None
+            else "DataFrame not generated"
+        )
+
+        return f"<{cls_name}(source='{source}', status={parse_status}{num_raw}, {offset_status}, {df_status})>"
+
+    def _repr_html_(self) -> str:
+        """Generates an HTML representation for display in Jupyter/IPython.
+
+        Provides a more visually structured overview of the processor's state,
+        including source, parsing status, offset, DataFrame status, and a
+        preview of the DataFrame if generated.
+
+        Returns
+        -------
+        str
+            HTML string representing the StateScriptLogProcessor instance.
+        """
+        cls_name = self.__class__.__name__
+        # Use getattr for robustness in case attributes haven't been set yet
+        source = getattr(self, "source_description", "source info missing")
+        raw_events_list = getattr(self, "raw_events", [])  # Default to empty list
+        df_val = getattr(self, "processed_events_df", None)
+        offset_val = getattr(self, "time_offset", None)
+
+        # Build status strings based on attribute values
+        if not raw_events_list:
+            parse_status = "<strong>Status:</strong> Not Parsed"
+            num_raw_str = ""
+        else:
+            parse_status = "<strong>Status:</strong> Parsed"
+            num_raw_str = f" ({len(raw_events_list)} raw entries)"
+
+        offset_status = (
+            f"<strong>Time Offset:</strong> {offset_val:.4f}s"
+            if offset_val is not None
+            else "<strong>Time Offset:</strong> Not Calculated"
+        )
+        df_status = (
+            "<strong>DataFrame:</strong> Generated"
+            if df_val is not None
+            else "<strong>DataFrame:</strong> Not Generated"
+        )
+
+        # Basic HTML structure and styling
+        html = f"""
+        <div style="border: 1px solid #ccc; padding: 10px; margin: 5px; font-family: sans-serif; line-height: 1.4;">
+            <h4>{cls_name}</h4>
+            <p><strong>Source:</strong> {source}</p>
+            <p>{parse_status}{num_raw_str}</p>
+            <p>{offset_status}</p>
+            <p>{df_status}</p>
+        """
+
+        # Add DataFrame preview if it exists and is not empty
+        if df_val is not None and not df_val.empty:
+            html += "<h5>DataFrame Preview (first 5 rows):</h5>"
+            try:
+                # Generate HTML table from DataFrame head
+                html += df_val.head().to_html(
+                    index=False,  # Don't include DataFrame index
+                    border=0,  # No table border
+                    justify="left",  # Align text left
+                    classes="dataframe-preview",  # Add a class for potential CSS styling
+                )
+            except Exception as e:
+                html += f"<p><em>Error generating DataFrame HTML preview: {e}</em></p>"
+        elif df_val is not None and df_val.empty:
+            html += "<p><em>(DataFrame is empty)</em></p>"
+
+        html += "</div>"
+        return html
+
+    def parse_raw_events(self) -> List[Dict[str, Any]]:
+        """Parses the loaded log content line by line.
+
+        Returns
+        -------
+        List[Dict[str, Any]]
+            The list of parsed event dictionaries stored in `self.raw_events`.
+            Each dictionary represents one line from the log.
+        """
+        lines = self.log_content.splitlines()
+        # Use list comprehension for concise parsing of all lines
+        self.raw_events = [
+            parse_statescript_line(line, line_num)
+            for line_num, line in enumerate(lines)
+        ]
+        return self.raw_events
+
+    def _find_reference_events(
+        self, event_type: str, conditions: Dict[str, Any]
+    ) -> pd.DataFrame:
+        """Internal helper to find specific log events for time alignment.
+
+        Filters the `self.raw_events` list to find events matching the specified
+        `event_type` and satisfying all key-value pairs in `conditions`.
+        Converts the integer timestamp (assumed to be milliseconds) of matching
+        events to seconds (float) and stores it in a 'trodes_timestamp_sec' column.
+
+        Parameters
+        ----------
+        event_type : str
+            The required 'type' field of the events to find
+            (e.g., 'ts_str_int', 'ts_int_int').
+        conditions : Dict[str, Any]
+            A dictionary where keys are field names within the event dictionary
+            (e.g., 'text', 'value', 'value1') and values are the required values
+            for an event to be considered a match.
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame containing the matching events. Includes the original
+            'timestamp' (int, milliseconds), the calculated 'trodes_timestamp_sec'
+            (float, seconds), and the fields specified in `conditions`.
+            The DataFrame is sorted by 'trodes_timestamp_sec'.
+            Returns an empty DataFrame if no matching events are found.
+        """
+        # Ensure raw events are parsed first if not already done
+        if not self.raw_events:
+            self.parse_raw_events()
+
+        matching_events = []
+        # Iterate through all parsed raw events
+        for event in self.raw_events:
+            # Check if the event type matches and it has a timestamp
+            if event.get("type") == event_type and "trodes_timestamp" in event:
+                # Check if all specified conditions are met for this event
+                match = all(
+                    event.get(key) == value for key, value in conditions.items()
+                )
+                if match:
+                    matching_events.append(event)
+
+        # If no matches were found, return an empty DataFrame with expected columns
+        if not matching_events:
+            # Define columns for the empty DataFrame for consistency
+            cols = ["trodes_timestamp", "trodes_timestamp_sec"] + list(
+                conditions.keys()
+            )
+            # Ensure other relevant columns from potential matches are also defined
+            potential_value_cols = ["value", "value1", "value2", "text"]
+            for vc in potential_value_cols:
+                if vc not in cols:
+                    cols.append(vc)
+            return pd.DataFrame(columns=cols)
+
+        # Create DataFrame from the list of matching event dictionaries
+        df = pd.DataFrame(matching_events)
+
+        # Convert timestamp (assumed ms) to seconds (float)
+        df["trodes_timestamp_sec"] = (
+            df["timestamp"].astype(float) / self.MILLISECONDS_PER_SECOND
+        )
+        # Ensure original timestamp remains integer
+        df["timestamp"] = df["timestamp"].astype(int)
+
+        # Attempt to cast condition columns to appropriate types (e.g., int)
+        # This improves consistency if values were parsed as strings initially
+        for key, value in conditions.items():
+            if key in df.columns:
+                try:
+                    if isinstance(value, int):
+                        # Convert column to numeric, then integer (handles potential errors)
+                        df[key] = pd.to_numeric(df[key], errors="coerce").astype(int)
+                    # Add elif for float, bool etc. if needed
+                except (ValueError, TypeError):
+                    # Ignore casting errors if conversion isn't possible
+                    pass
+
+        # Sort by time and reset index
+        return df.sort_values("trodes_timestamp_sec")
+
+    def calculate_time_offset(
+        self,
+        external_reference_times: np.ndarray,
+        log_event_type: str,
+        log_event_conditions: Dict[str, Any],
+        match_threshold: float = 0.1,
+        check_n_events: int = 4,
+    ) -> Optional[float]:
+        """Calculates the time offset between log events and external timestamps.
+
+        This method aligns timestamps (in seconds) of specific events found
+        in the log (`log_event_type` with `log_event_conditions`) against a
+        provided sorted array of `external_reference_times` (also in seconds).
+        It assumes both sets of timestamps correspond to the same sequence of
+        real-world events (e.g., synchronization pulses).
+
+        The offset is determined by finding the constant difference
+        (`offset = external_time - log_time`) that minimizes the timing
+        discrepancy between the first `check_n_events` corresponding events
+        in both sequences.
+
+        IMPORTANT: If `external_reference_times` represent Unix time (seconds
+        since 1970-01-01 UTC), the calculated offset will align the log's
+        timestamps (`trodes_timestamp_sec`) to Unix time. The resulting
+        `timestamp_sync` column in the DataFrame will then also be in Unix time.
+
+        Parameters
+        ----------
+        external_reference_times : np.ndarray
+            A 1D numpy array of timestamps (float, in seconds) from the external
+            reference system. This array *must* be sorted in ascending order.
+            If using for Unix time alignment, these must be Unix timestamps.
+        log_event_type : str
+            The 'type' of log event to use as the reference points within the log
+            (e.g., 'ts_str_int', 'ts_int_int').
+        log_event_conditions : Dict[str, Any]
+            Dictionary specifying the exact conditions to identify the reference
+            log events (e.g., {'text': 'UP', 'value': 8} for a pin state change).
+        match_threshold : float, optional
+            The maximum acceptable cumulative absolute difference (in seconds)
+            between the matched `check_n_events` pairs (log vs. external) for
+            an offset to be considered valid. Defaults to 0.1 seconds.
+        check_n_events : int, optional
+            The number of initial events from both sequences to use for calculating
+            the mismatch and finding the best offset. Defaults to 4. A higher
+            number increases robustness against spurious events but requires more
+            matching events to be present.
+
+        Returns
+        -------
+        Optional[float]
+            The calculated time offset in seconds (`external_time_sec - log_time_sec`).
+            Adding this offset to `trodes_timestamp_sec` synchronizes the log time
+            to the external reference time. Returns `None` if a satisfactory
+            offset (below `match_threshold`) cannot be found, or if insufficient
+            events are available in either the log or the external references.
+            If successful, updates `self.time_offset` with the calculated value.
+        """
+        # Find the timestamps of the reference events within the log
+        log_reference_df = self._find_reference_events(
+            log_event_type, log_event_conditions
+        )
+
+        # Check if enough log events were found
+        if log_reference_df.empty or len(log_reference_df) < check_n_events:
+            print(
+                f"Warning: Not enough reference events found in log matching "
+                f"type='{log_event_type}', conditions={log_event_conditions}. "
+                f"Need at least {check_n_events}, found {len(log_reference_df)}."
+            )
+            self.time_offset = None  # Ensure offset is None if calculation fails
+            return None
+
+        # Extract log event times (in seconds) and ensure external times are a sorted numpy array
+        sc_times_sec = log_reference_df["trodes_timestamp_sec"].to_numpy()
+        # Ensure external times are numpy array and sorted (as required by algorithm)
+        dio_times_sec = np.sort(np.asarray(external_reference_times))
+
+        # Check if enough external reference times were provided
+        if len(dio_times_sec) < check_n_events:
+            print(
+                f"Warning: Not enough external reference timestamps provided "
+                f"({len(dio_times_sec)}), need at least {check_n_events} for matching."
+            )
+            self.time_offset = None  # Ensure offset is None
+            return None
+
+        # --- Offset Calculation Logic ---
+        # This section iterates through potential starting alignments between
+        # the external times and the first log time, calculates the total mismatch
+        # for the first 'check_n_events', and finds the offset minimizing this mismatch.
+
+        best_offset = None
+        min_mismatch = float("inf")
+
+        # Iterate through possible starting points in the external times array
+        # We only need to check starting alignments where enough subsequent external times exist
+        # for the check_n_events comparison.
+        # We test aligning sc_times_sec[0] with each dio_times_sec[event_idx]
+        for event_idx in range(len(dio_times_sec) - check_n_events + 1):
+            # Calculate the potential offset based on the first log event and current external event
+            potential_offset = dio_times_sec[event_idx] - sc_times_sec[0]
+            current_mismatch = 0.0
+
+            # Simple check: Calculate mismatch using the *next consecutive* N events
+            # This assumes no missing events in *either* stream within the checked range.
+            # If events can be missing, a more complex alignment (like Needleman-Wunsch
+            # or checking nearest neighbors) might be needed. This simpler approach
+            # is often sufficient if the sync signals are reliable.
+            mismatch_found = False
+            for i in range(check_n_events):
+                # Calculate the expected external time for the i-th log event using the potential offset
+                projected_dio_time = sc_times_sec[i] + potential_offset
+                # Calculate the absolute difference with the corresponding i-th external time
+                # (relative to the starting event_idx)
+                diff = abs(dio_times_sec[event_idx + i] - projected_dio_time)
+                current_mismatch += diff
+
+                # Optimization: If mismatch already exceeds threshold or current best, stop early
+                if (
+                    current_mismatch >= match_threshold
+                    and current_mismatch >= min_mismatch
+                ):
+                    mismatch_found = True  # Signal that this offset is not viable
+                    break  # Stop checking further events for this offset
+
+            # If loop completed without early exit and this offset has lower mismatch
+            if not mismatch_found and current_mismatch < min_mismatch:
+                min_mismatch = current_mismatch
+                best_offset = potential_offset
+
+        # After checking all potential alignments, evaluate the result
+        if best_offset is not None and min_mismatch < match_threshold:
+            print(
+                f"Time offset calculation successful.\n"
+                f"  Best Offset: {best_offset:.4f} s (External Time - Log Time)\n"
+                f"  Lowest Mismatch: {min_mismatch:.4f} s (summed abs diff over {check_n_events} events)\n"
+                f"  Threshold: {match_threshold:.4f} s"
+            )
+            self.time_offset = best_offset  # Store the successful offset
+            return self.time_offset
+        else:
+            # Report failure if no offset met the threshold
+            print(
+                f"Warning: Could not find a suitable time offset.\n"
+                f"  Minimum mismatch found: {min_mismatch:.4f} s (using {check_n_events} events)\n"
+                f"  Match threshold: {match_threshold:.4f} s\n"
+                f"  Troubleshooting: Check if reference events match, increase threshold, "
+                f"or verify external timestamps."
+            )
+            self.time_offset = None  # Ensure offset is None on failure
+            return None
+
+    def get_events_dataframe(
+        self,
+        apply_offset: bool = True,
+        exclude_comments_unknown: bool = True,
+        max_DIOs: int = 32,
+    ) -> pd.DataFrame:
+        """Constructs and returns a pandas DataFrame from the parsed log events.
+
+        Parameters
+        ----------
+        apply_offset : bool, optional
+            If True (default), and a `time_offset` has been calculated, add the
+            'timestamp_sync' column to the DataFrame. If False, or if no offset
+            is available, this column is omitted.
+        exclude_comments_unknown : bool, optional
+            If True (default), lines parsed as 'comment_or_empty' or 'unknown'
+            are excluded from the DataFrame. If False, all entries from
+            `raw_events` are included (potentially useful for debugging parsing).
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame containing the structured event data. Columns are:
+            - 'trodes_timestamp' (int, ms since start of recording)
+            - 'trodes_timestamp_sec' (float, seconds since start of recording)
+            - `timestamp_sync` (float, seconds)
+            - 'raw_line' (str)
+            - 'type' (str)
+            - 'text' (str)
+            - 'value' (int, if pattern `text = value`, type 'ts_str_equals_int')
+            - 'active_DIO_inputs_bitmask' (int, from 'ts_int_int')
+            - 'active_DIO_outputs_bitmask' (int, from 'ts_int_int')
+            - 'active_DIO_inputs' (list of int)
+            - 'active_DIO_outputs' (list of int)
+
+            Returns an empty DataFrame if no valid events are found after filtering.
+        """
+        # Ensure raw events are available
+        if not self.raw_events:
+            self.parse_raw_events()
+            if not self.raw_events:
+                print("Warning: Log content yielded no raw events.")
+                self.processed_events_df = pd.DataFrame()  # Store empty df
+                return self.processed_events_df
+
+        # Determine which event types to filter out
+        if exclude_comments_unknown:
+            exclude_types = ("comment_or_empty", "unknown")
+            filtered_events = [
+                event
+                for event in self.raw_events
+                if event.get("type") not in exclude_types
+            ]
+        else:
+            # Include all event types if not excluding
+            filtered_events = self.raw_events
+
+        # Handle case where filtering leaves no events
+        if not filtered_events:
+            print("Warning: No valid events remain after filtering.")
+            self.processed_events_df = pd.DataFrame()  # Store empty df
+            return self.processed_events_df
+
+        # Define a preferred column order for better readability
+        # Include all potential columns generated by the parsers + derived columns
+        preferred_column_order = [
+            "line_num",  # Line number in the original log
+            "raw_line",  # Original line content
+            "type",  # Type of parsed line pattern
+            "trodes_timestamp",  # trodes integer timestamp (ms since start)
+            "trodes_timestamp_sec",  # trodes timestamp converted to seconds
+            "timestamp_sync",  # Synchronized timestamp (if calculated)
+            "text",  # Text part (from ts_str, ts_str_int, ts_str_equals_int)
+            "value",  # Integer value after equals (from ts_str_int, ts_str_equals_int)
+            "active_DIO_inputs_bitmask",  # DIO input bitmask (from ts_int_int)
+            "active_DIO_outputs_bitmask",  # DIO output bitmask (from ts_int_int)
+        ]
+
+        # Create DataFrame. Pandas handles missing columns gracefully.
+        df = pd.DataFrame(filtered_events).rename(
+            columns={
+                "timestamp": "trodes_timestamp",
+                "value1": "active_DIO_inputs_bitmask",
+                "value2": "active_DIO_outputs_bitmask",
+            }
+        )
+        df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply(
+            lambda mask: _interpret_DIO_mask(mask, max_DIOs)
+        )
+        df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply(
+            lambda mask: _interpret_DIO_mask(mask, max_DIOs)
+        )
+
+        # --- Timestamp Processing ---
+        # Ensure 'timestamp' column exists and convert to numeric/int
+        if "trodes_timestamp" in df.columns:
+            # Coerce errors to NaN, fill NaN with 0, then convert to integer
+            df["trodes_timestamp"] = (
+                pd.to_numeric(df["trodes_timestamp"], errors="coerce")
+                .fillna(pd.NA)
+                .astype(pd.Int64Dtype())
+            )
+            # Calculate timestamp in seconds
+            df["trodes_timestamp_sec"] = (
+                df["trodes_timestamp"].astype(float) / self.MILLISECONDS_PER_SECOND
+            )
+        else:
+            # Add empty columns if trodes_timestamp was missing (e.g., only comments)
+            print(
+                "Warning: 'trodes_timestamp' column not found in parsed data. Timestamp columns will be empty."
+            )
+            df["trodes_timestamp"] = pd.NA
+            df["trodes_timestamp_sec"] = np.nan
+
+        # Apply time offset if requested and available
+        if apply_offset:
+            if self.time_offset is not None:
+                if "trodes_timestamp_sec" in df.columns:
+                    df["timestamp_sync"] = df["trodes_timestamp_sec"] + self.time_offset
+                else:
+                    df["timestamp_sync"] = (
+                        np.nan
+                    )  # Cannot calculate if trodes_timestamp_sec is missing
+            else:
+                # Warning if offset applied but not calculated
+                print(
+                    "Warning: Time offset application requested, but offset has not "
+                    "been calculated or was unsuccessful. 'timestamp_sync' column omitted."
+                )
+                # Ensure the column doesn't exist if it wasn't created
+                if "timestamp_sync" in df.columns:
+                    df = df.drop(columns=["timestamp_sync"])
+
+        # --- Data Type Consolidation ---
+        # Standardize types for common data columns if they exist
+        int_cols = [
+            "value",
+            "active_DIO_inputs_bitmask",
+            "active_DIO_outputs_bitmask",
+        ]
+        text_cols = ["text"]
+
+        for col in int_cols:
+            if col in df.columns:
+                # Convert to numeric (allowing NaNs), then use nullable Int64 type
+                df[col] = pd.to_numeric(df[col], errors="coerce").astype(
+                    pd.Int64Dtype()
+                )
+
+        for col in text_cols:
+            if col in df.columns:
+                # Ensure text columns are object type (string) and
+                # fill potential float NaNs with pandas NA
+                df[col] = df[col].astype(str).replace("nan", pd.NA).astype("object")
+
+        # Reorder columns according to preference, keeping only existing columns
+        existing_cols_in_order = [
+            col for col in preferred_column_order if col in df.columns
+        ]
+        # Add any remaining columns not in the preferred list (e.g., from 'unknown' type)
+        other_cols = [col for col in df.columns if col not in existing_cols_in_order]
+        final_column_order = existing_cols_in_order + other_cols
+        df = df[final_column_order]
+
+        # Store the final DataFrame and return it
+        self.processed_events_df = df.set_index("line_num")
+        return self.processed_events_df
+
+    def get_events_by_type(
+        self,
+        apply_offset: bool = True,
+        exclude_comments_unknown: bool = True,
+    ) -> List[pd.DataFrame]:
+        """Groups the events in the DataFrame by their 'type' column.
+        This method first generates the DataFrame using `get_events_dataframe`
+        and then groups the events by their 'type' column. Each group is
+        returned as a separate DataFrame, excluding the 'type' column.
+        This allows for easy access to events of the same type for further
+        analysis or processing.
+
+        Parameters
+        ----------
+        apply_offset : bool, optional
+            If True (default), applies the time offset to the DataFrame.
+            If False, the DataFrame will contain raw timestamps.
+        exclude_comments_unknown : bool, optional
+            If True (default), excludes comment and unknown lines from the DataFrame.
+            If False, all lines are included, which may be useful for debugging.
+        Returns
+        -------
+        List[pd.DataFrame]
+            A list of DataFrames, each corresponding to a unique event type.
+            Each DataFrame contains the events of that type, excluding the 'type' column.
+        """
+        df = self.get_events_dataframe(
+            apply_offset=apply_offset,
+            exclude_comments_unknown=exclude_comments_unknown,
+        )
+        return [group.drop(columns=["type"]) for _, group in df.groupby("type")]
+
+    def segment_into_trials(
+        self,
+        trial_start_terms: List[str],
+        trial_end_terms: List[str],
+        time_column: str = "timestamp_sync",
+    ) -> List[Dict[str, Any]]:
+        """
+        Segments events from a StateScript log DataFrame into trials.
+
+        Parameters
+        ----------
+        trial_start_terms : List[str]
+            List of strings found in the 'text' column that mark the start of a trial.
+        trial_end_terms : List[str]
+            List of strings found in the 'text' column that mark the end of a trial.
+            Can overlap with trial_start_terms.
+        time_column : str, optional
+            The name of the column to use for time ranges ('timestamp_sync' or
+            'trodes_timestamp_sec'), by default 'timestamp_sync'.
+
+        Returns
+        -------
+        List[Dict[str, Any]]
+            A list where each dictionary represents a trial. Each trial dictionary
+            contains at least 'start_time' and 'end_time'. Further analysis
+            (like finding input/output changes within the trial) would typically
+            be done separately using these time ranges to filter events_df.
+
+        Notes
+        -----
+        - This implementation assumes trials are defined by text messages.
+        - It handles cases where start/end terms overlap.
+        """
+        events_df = self.processed_events_df
+        if events_df is None:
+            print("Error: No processed events DataFrame available.")
+            return []
+
+        if "text" not in events_df.columns or time_column not in events_df.columns:
+            print(f"Error: DataFrame must contain 'text' and '{time_column}' columns.")
+            return []
+
+        trials = []
+        current_trial_start_time = None
+        in_trial = False
+
+        # Iterate through the DataFrame rows
+        for index, row in events_df.iterrows():
+            message = row["text"]  # Check the 'text' column
+            current_time = row[time_column]
+
+            if pd.isna(message) or pd.isna(current_time):
+                continue  # Skip rows with missing text or time
+
+            found_end_term = any(term in message for term in trial_end_terms)
+            found_start_term = any(term in message for term in trial_start_terms)
+
+            # --- End Trial Logic ---
+            # If we are currently in a trial and find an end term
+            if in_trial and found_end_term:
+                # Finalize the previous trial
+                trials.append(
+                    {
+                        "start_time": current_trial_start_time,
+                        "end_time": current_time,
+                        # Add trial index or other basic info if needed
+                    }
+                )
+                in_trial = False
+                current_trial_start_time = None  # Reset start time
+
+            # --- Start Trial Logic ---
+            # If we find a start term (potentially the same event as the end term)
+            if found_start_term:
+                # If we weren't in a trial, start a new one
+                if not in_trial:
+                    in_trial = True
+                    current_trial_start_time = current_time
+                # If we *were* already in a trial (e.g., two start terms back-to-back
+                # without an end term), you might choose to log a warning or
+                # implicitly end the previous one here and start a new one.
+                # This example restarts the trial timer.
+                else:
+                    print(
+                        f"Warning: Found start term '{message}' at {current_time} while already in a trial started at {current_trial_start_time}. Restarting trial."
+                    )
+                    current_trial_start_time = current_time
+
+        # Handle case where log ends while still in a trial
+        if in_trial:
+            print(
+                f"Warning: Log ended while still in a trial started at {current_trial_start_time}."
+            )
+            # Optionally add the incomplete trial
+            trials.append(
+                {
+                    "start_time": current_trial_start_time,
+                    "end_time": events_df[time_column].iloc[-1],  # Use last event time
+                    "status": "incomplete",
+                }
+            )
+
+        return trials
diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
new file mode 100644
index 0000000..f115cea
--- /dev/null
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -0,0 +1,544 @@
+import os
+import pathlib
+import tempfile
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from trodes_to_nwb.convert_statescript import (
+    StateScriptLogProcessor,
+    _parse_int,
+    parse_statescript_line,
+    parse_ts_int_int,
+    parse_ts_str,
+    parse_ts_str_equals_int,
+    parse_ts_str_int,
+)
+
+# --- Fixtures ---
+
+
+@pytest.fixture(scope="module")
+def sample_log_content():
+    """Provides sample log content for general testing."""
+    return """# Test log started
+76504 0 0
+76566 center_poke
+76566 65536 0
+100078 counter_handlePoke = 1
+100078 4 0
+100559 0 0
+Executing this line without timestamp
+115030 center_poke
+115030 65536 0
+# Test log ended
+"""
+
+
+@pytest.fixture(scope="module")
+def empty_log_content():
+    """Provides empty log content."""
+    return ""
+
+
+@pytest.fixture(scope="module")
+def comment_only_log_content():
+    """Provides log content with only comments and whitespace."""
+    return """# Start
+# Middle line
+
+# End
+   """
+
+
+@pytest.fixture
+def processor(sample_log_content):
+    """Provides a processor instance initialized with standard sample content."""
+    return StateScriptLogProcessor(sample_log_content, source_info="from string")
+
+
+@pytest.fixture
+def empty_processor(empty_log_content):
+    """Provides a processor instance initialized with empty content."""
+    return StateScriptLogProcessor(empty_log_content, source_info="empty string")
+
+
+@pytest.fixture
+def comment_only_processor(comment_only_log_content):
+    """Provides a processor instance initialized with only comments."""
+    return StateScriptLogProcessor(
+        comment_only_log_content, source_info="comments only string"
+    )
+
+
+@pytest.fixture(scope="module")
+def external_times():
+    """Provides sample external times for offset calculation tests."""
+    # These correspond roughly to the '65536 0' events in sample_log_content
+    # 76566 ms -> 76.566 s
+    # 115030 ms -> 115.030 s
+    # Let's assume a base time for the external system
+    base_time = 1678880000.0
+    return np.array([base_time + 76.566, base_time + 115.030])
+
+
+@pytest.fixture
+def temp_log_file(sample_log_content):
+    """Creates a temporary log file with standard content and yields its path."""
+    with tempfile.NamedTemporaryFile(
+        mode="w", delete=False, suffix=".stateScriptLog", encoding="utf-8"
+    ) as tmp_file:
+        tmp_file.write(sample_log_content)
+        tmp_file_path = tmp_file.name
+    yield tmp_file_path
+    os.remove(tmp_file_path)
+
+
+# --- Tests for Level 1 Parsers ---
+
+
+def test_parse_int():
+    """Test the _parse_int helper function."""
+    assert _parse_int("123") == 123
+    assert _parse_int("-45") == -45
+    assert _parse_int("0") == 0
+    assert _parse_int("abc") is None
+    assert _parse_int("12.3") is None
+    assert _parse_int("") is None
+    assert _parse_int("123 ") is None
+
+
+def test_parse_ts_int_int():
+    """Test parse_ts_int_int directly."""
+    parts = ["8386500", "0", "0"]
+    expected = {
+        "type": "ts_int_int",
+        "trodes_timestamp": 8386500,
+        "value1": 0,
+        "value2": 0,
+    }
+    assert parse_ts_int_int(parts) == expected
+
+    parts_wrong_len = ["123", "0"]
+    assert parse_ts_int_int(parts_wrong_len) is None
+
+    parts_not_int = ["123", "abc", "0"]
+    assert parse_ts_int_int(parts_not_int) is None
+
+    parts_float = ["123", "4.5", "0"]
+    assert parse_ts_int_int(parts_float) is None
+
+
+def test_parse_ts_str_int():
+    """Test parse_ts_str_int directly."""
+    parts = ["8386500", "DOWN", "3"]
+    expected = {
+        "type": "ts_str_int",
+        "trodes_timestamp": 8386500,
+        "text": "DOWN",
+        "value": 3,
+    }
+    assert parse_ts_str_int(parts) == expected
+
+    parts_wrong_len = ["123", "UP"]
+    assert parse_ts_str_int(parts_wrong_len) is None
+
+    parts_str_is_int = ["123", "456", "789"]
+    assert parse_ts_str_int(parts_str_is_int) is None  # Should be handled by ts_int_int
+
+    parts_val_not_int = ["123", "UP", "abc"]
+    assert parse_ts_str_int(parts_val_not_int) is None
+
+
+def test_parse_ts_str_equals_int():
+    """Test parse_ts_str_equals_int directly."""
+    parts = ["100078", "counter_handlePoke", "=", "1"]
+    expected = {
+        "type": "ts_str_equals_int",
+        "trodes_timestamp": 100078,
+        "text": "counter_handlePoke",
+        "value": 1,
+    }
+    assert parse_ts_str_equals_int(parts) == expected
+
+    parts_multi_word = ["3610855", "total", "rewards", "=", "70"]
+    expected_multi = {
+        "type": "ts_str_equals_int",
+        "trodes_timestamp": 3610855,
+        "text": "total rewards",
+        "value": 70,
+    }
+    assert parse_ts_str_equals_int(parts_multi_word) == expected_multi
+
+    parts_wrong_len = ["123", "=", "1"]
+    assert parse_ts_str_equals_int(parts_wrong_len) is None
+
+    parts_no_equals = ["123", "text", "1"]
+    assert parse_ts_str_equals_int(parts_no_equals) is None
+
+    parts_val_not_int = ["123", "text", "=", "abc"]
+    assert parse_ts_str_equals_int(parts_val_not_int) is None
+
+
+def test_parse_ts_str():
+    """Test parse_ts_str directly."""
+    parts = ["76566", "center_poke"]
+    expected = {
+        "type": "ts_str",
+        "trodes_timestamp": 76566,
+        "text": "center_poke",
+    }
+    assert parse_ts_str(parts) == expected
+
+    parts_multi_word = ["1271815", "some", "multi", "word", "event"]
+    expected_multi = {
+        "type": "ts_str",
+        "trodes_timestamp": 1271815,
+        "text": "some multi word event",
+    }
+    assert parse_ts_str(parts_multi_word) == expected_multi
+
+    parts_wrong_len = ["123"]
+    assert parse_ts_str(parts_wrong_len) is None
+
+    parts_second_is_int = [
+        "123",
+        "456",
+    ]  # Second part is int, should fail this parser
+    assert parse_ts_str(parts_second_is_int) is None
+
+
+# --- Tests for parse_statescript_line (Covers integration and dispatching) ---
+
+
+def test_parse_statescript_line_dispatching():
+    """Test parse_statescript_line dispatching for various line types."""
+    lines_expected_types = [
+        ("8386500 0 0", "ts_int_int"),
+        ("8386500 DOWN 3", "ts_str_int"),
+        ("100078 counter_handlePoke = 1", "ts_str_equals_int"),
+        ("76566 center_poke", "ts_str"),
+        ("Executing trigger function 22", "unknown"),
+        ("# comment", "comment_or_empty"),
+        ("", "comment_or_empty"),
+        ("   ", "comment_or_empty"),
+        ("123 456 abc", "unknown"),  # Doesn't fit ts_int_int because of 'abc'
+        ("123 abc def", "ts_str"),  # Fits ts_str
+        ("456 123 = 5", "ts_str_equals_int"),  # Fits this specific pattern
+    ]
+
+    for line, expected_type in lines_expected_types:
+        parsed = parse_statescript_line(line)
+        assert parsed["type"] == expected_type
+        assert parsed["raw_line"] == line.strip()  # parse_statescript_line strips
+        if expected_type not in ["unknown", "comment_or_empty"]:
+            assert "trodes_timestamp" in parsed
+        else:
+            assert "trodes_timestamp" not in parsed or pd.isna(
+                parsed.get("trodes_timestamp")
+            )
+
+
+# --- Tests for StateScriptLogProcessor ---
+
+
+def test_init_from_string(processor, sample_log_content):
+    """Test initialization from string."""
+    assert processor.log_content == sample_log_content
+    assert processor.source_description == "from string"
+    assert processor.raw_events == []
+    assert processor.time_offset is None
+    assert processor.processed_events_df is None
+
+
+def test_init_from_file(temp_log_file, sample_log_content):
+    """Test initialization from a file."""
+    processor_file = StateScriptLogProcessor.from_file(temp_log_file)
+    assert processor_file.log_content == sample_log_content
+    assert processor_file.source_description.startswith("from file:")
+    assert pathlib.Path(temp_log_file).name in processor_file.source_description
+
+
+def test_init_from_file_not_found():
+    """Test initialization from a non-existent file raises error."""
+    with pytest.raises(FileNotFoundError):
+        StateScriptLogProcessor.from_file("non_existent_file_qwerty.log")
+
+
+def test_parse_raw_events(processor, sample_log_content):
+    """Test parsing the raw log content into events."""
+    events = processor.parse_raw_events()
+    assert processor.raw_events is events  # Should store result internally
+    assert isinstance(events, list)
+    assert len(events) == len(
+        sample_log_content.strip().splitlines()
+    )  # One dict per line
+    assert events[0]["type"] == "comment_or_empty"
+    assert events[1]["type"] == "ts_int_int"
+    assert events[7]["type"] == "unknown"  # "Executing this line..."
+    assert events[9]["type"] == "comment_or_empty"  # Last comment
+    assert events[1]["raw_line"] == "76504 0 0"
+    assert events[7]["raw_line"] == "Executing this line without timestamp"
+
+
+def test_find_reference_events(processor):
+    """Test the internal _find_reference_events method."""
+    # Case 1: Find 'ts_str' events
+    ref_df_str = processor._find_reference_events(
+        event_type="ts_str", conditions={"text": "center_poke"}
+    )
+    assert isinstance(ref_df_str, pd.DataFrame)
+    assert len(ref_df_str) == 2
+    pd.testing.assert_series_equal(
+        ref_df_str["trodes_timestamp"],
+        pd.Series([76566, 115030], name="trodes_timestamp"),
+        check_dtype=False,
+    )
+    assert "trodes_timestamp_sec" in ref_df_str.columns
+    assert ref_df_str["trodes_timestamp_sec"].iloc[0] == pytest.approx(76.566)
+
+    # Case 2: Find 'ts_int_int' events with specific values
+    ref_df_int = processor._find_reference_events(
+        event_type="ts_int_int", conditions={"value1": 4, "value2": 0}
+    )
+    assert len(ref_df_int) == 1
+    assert ref_df_int["trodes_timestamp"].iloc[0] == 100078
+
+    # Case 3: No matching events found
+    ref_df_none = processor._find_reference_events(
+        event_type="ts_str_int", conditions={"text": "nonexistent"}
+    )
+    assert ref_df_none.empty
+    assert isinstance(ref_df_none, pd.DataFrame)  # Should still return DF
+
+    # Case 4: Ensure processor parses if raw_events is empty
+    processor.raw_events = []
+    ref_df_reparse = processor._find_reference_events(
+        event_type="ts_str", conditions={"text": "center_poke"}
+    )
+    assert len(ref_df_reparse) == 2  # Should re-parse automatically
+
+
+def test_calculate_time_offset_success(processor, external_times):
+    """Test successful time offset calculation."""
+    offset = processor.calculate_time_offset(
+        external_reference_times=external_times,
+        log_event_type="ts_int_int",  # Use the events corresponding to external_times
+        log_event_conditions={"value1": 65536, "value2": 0},
+        check_n_events=2,  # Use both events for matching
+    )
+    assert offset is not None
+    assert processor.time_offset == offset  # Check internal storage
+    # Expected offset = external_base_time = 1678880000.0
+    # external_times[0] = base + 76.566; log_times[0] = 76.566
+    assert offset == pytest.approx(1678880000.0)
+
+
+def test_calculate_time_offset_fail_not_enough_log(processor, external_times):
+    """Test offset calculation failure due to insufficient log events."""
+    # 'counter_handlePoke' only appears once, need 2 events
+    offset = processor.calculate_time_offset(
+        external_reference_times=external_times,
+        log_event_type="ts_str_equals_int",
+        log_event_conditions={"text": "counter_handlePoke"},
+        check_n_events=2,
+    )
+    assert offset is None
+    assert processor.time_offset is None  # Should remain None
+
+
+def test_calculate_time_offset_fail_not_enough_external(processor):
+    """Test offset calculation failure due to insufficient external times."""
+    # Only one external time provided, need 2 events
+    offset = processor.calculate_time_offset(
+        external_reference_times=np.array([1678880076.566]),
+        log_event_type="ts_int_int",
+        log_event_conditions={"value1": 65536, "value2": 0},
+        check_n_events=2,
+    )
+    assert offset is None
+    assert processor.time_offset is None
+
+
+def test_calculate_time_offset_fail_mismatch(processor, external_times):
+    """Test offset calculation failure due to exceeding mismatch threshold."""
+    # Shift external times slightly more than default threshold (0.1)
+    shifted_external_times = external_times + 0.06  # Total shift 0.12 over 2 events
+    offset = processor.calculate_time_offset(
+        external_reference_times=shifted_external_times,
+        log_event_type="ts_int_int",
+        log_event_conditions={"value1": 65536, "value2": 0},
+        check_n_events=2,
+        match_threshold=0.1,  # Default threshold
+    )
+    assert offset is None
+    assert processor.time_offset is None
+
+
+def test_get_events_dataframe_defaults(processor):
+    """Test default behavior: exclude comments/unknown, no offset applied yet."""
+    df = processor.get_events_dataframe(apply_offset=False)
+    assert processor.processed_events_df is df  # Check internal storage
+    assert isinstance(df, pd.DataFrame)
+    # Expected: 11 lines total - 3 comments - 1 unknown = 7 valid events
+    assert len(df) == 7
+    assert "raw_line" in df.columns
+    assert "trodes_timestamp" in df.columns
+    assert "trodes_timestamp_sec" in df.columns
+    assert "timestamp_sync" not in df.columns  # Offset not applied
+    # Check content and types
+    assert df["type"].iloc[0] == "ts_int_int"
+    assert df["raw_line"].iloc[0] == "76504 0 0"
+    assert pd.isna(df["text"].iloc[0])  # text NA for ts_int_int
+    assert df["value1"].iloc[0] == 0
+    assert df["trodes_timestamp"].dtype == "int64"
+    assert df["trodes_timestamp_sec"].dtype == "float64"
+    assert df["value"].dtype == pd.Int64Dtype()  # Nullable Integer
+
+
+def test_get_events_dataframe_include_all(processor):
+    """Test including comments and unknown lines."""
+    df = processor.get_events_dataframe(
+        apply_offset=False, exclude_comments_unknown=False
+    )
+    assert isinstance(df, pd.DataFrame)
+    assert len(df) == 10  # All lines included
+    assert df["type"].iloc[0] == "comment_or_empty"
+    assert df["type"].iloc[7] == "unknown"
+    assert df["raw_line"].iloc[7] == "Executing this line without timestamp"
+    # Check that timestamp is NA/0 for lines without one
+    assert (
+        pd.isna(df["trodes_timestamp"].iloc[0]) or df["trodes_timestamp"].iloc[0] == 0
+    )
+    assert (
+        pd.isna(df["trodes_timestamp"].iloc[7]) or df["trodes_timestamp"].iloc[7] == 0
+    )
+    assert pd.isna(df["trodes_timestamp_sec"].iloc[0]) or np.isnan(
+        df["trodes_timestamp_sec"].iloc[0]
+    )
+    assert pd.isna(df["trodes_timestamp_sec"].iloc[7]) or np.isnan(
+        df["trodes_timestamp_sec"].iloc[7]
+    )
+
+
+def test_get_events_dataframe_with_offset(processor):
+    """Test applying offset and check sync timestamp calculation."""
+    # Simulate successful offset calculation
+    processor.time_offset = 1678880000.0
+    df = processor.get_events_dataframe(apply_offset=True)  # Default exclude=True
+    assert isinstance(df, pd.DataFrame)
+    assert len(df) == 7  # Excludes comments/unknown
+    assert "timestamp_sync" in df.columns
+    # Check calculation for the first valid event (76504 ms)
+    expected_sync_time = (76504 / 1000.0) + 1678880000.0
+    assert df["timestamp_sync"].iloc[0] == pytest.approx(expected_sync_time)
+    # Check NA value handling in other columns remains correct
+    assert pd.isna(df["text"].iloc[0])
+    assert df["value1"].iloc[0] == 0
+    assert df["timestamp_sync"].dtype == "float64"
+
+
+def test_get_events_dataframe_offset_not_calculated(processor, capsys):
+    """Test applying offset when offset is None."""
+    processor.time_offset = None  # Ensure no offset is set
+    df = processor.get_events_dataframe(apply_offset=True)
+    assert isinstance(df, pd.DataFrame)
+    assert "timestamp_sync" not in df.columns  # Sync column should be absent
+    assert len(df) == 7  # Should still return the dataframe without the column
+
+    # Check that the warning was printed to stderr/stdout
+    captured = capsys.readouterr()
+    assert (
+        "Warning: Time offset requested but not calculated" in captured.out
+        or "Warning: Time offset requested but not calculated" in captured.err
+    )
+
+
+def test_empty_log(empty_processor):
+    """Test processing an empty log file."""
+    events = empty_processor.parse_raw_events()
+    assert events == []
+    df = empty_processor.get_events_dataframe()
+    assert isinstance(df, pd.DataFrame)
+    assert df.empty
+
+
+def test_comment_only_log(comment_only_processor):
+    """Test processing a log file with only comments/whitespace."""
+    events = comment_only_processor.parse_raw_events()
+    assert len(events) == 4  # 4 lines in the fixture
+    assert all(e["type"] == "comment_or_empty" for e in events)
+
+    # Default: exclude comments -> empty DataFrame
+    df_excluded = comment_only_processor.get_events_dataframe(apply_offset=False)
+    assert isinstance(df_excluded, pd.DataFrame)
+    assert df_excluded.empty
+
+    # Include comments -> DataFrame with only comment entries
+    df_included = comment_only_processor.get_events_dataframe(
+        apply_offset=False, exclude_comments_unknown=False
+    )
+    assert isinstance(df_included, pd.DataFrame)
+    assert len(df_included) == 4
+    assert all(df_included["type"] == "comment_or_empty")
+    assert (
+        pd.isna(df_included["trodes_timestamp"].iloc[0])
+        or df_included["trodes_timestamp"].iloc[0] == 0
+    )
+
+
+def test_repr(processor):
+    """Test the __repr__ method."""
+    # Initial state
+    initial_repr = repr(processor)
+    assert isinstance(initial_repr, str)
+    assert "StateScriptLogProcessor" in initial_repr
+    assert "not parsed" in initial_repr
+    assert "no offset" in initial_repr
+    assert "not generated" in initial_repr
+
+    # After parsing
+    processor.parse_raw_events()
+    parsed_repr = repr(processor)
+    assert "parsed" in parsed_repr
+    assert f"raw_events={len(processor.raw_events)}" in parsed_repr
+    assert "no offset" in parsed_repr
+    assert "not generated" in parsed_repr
+
+    # After offset calculation
+    processor.time_offset = 1000.0
+    offset_repr = repr(processor)
+    assert "offset=1000.0" in offset_repr
+    assert "not generated" in offset_repr
+
+    # After DataFrame generation
+    processor.get_events_dataframe()
+    df_repr = repr(processor)
+    assert "DataFrame generated" in df_repr
+
+
+def test_repr_html(processor):
+    """Test the _repr_html_ method."""
+    # Check it runs without error in different states and returns string
+    html_initial = processor._repr_html_()
+    assert isinstance(html_initial, str)
+    assert "StateScriptLogProcessor" in html_initial
+    assert "Not Parsed" in html_initial
+
+    processor.parse_raw_events()
+    html_parsed = processor._repr_html_()
+    assert isinstance(html_parsed, str)
+    assert "Parsed" in html_parsed
+    assert f"({len(processor.raw_events)} raw entries)" in html_parsed
+
+    processor.time_offset = 1000.0
+    html_offset = processor._repr_html_()
+    assert isinstance(html_offset, str)
+    assert "Offset:</strong> 1000.0" in html_offset
+
+    processor.get_events_dataframe()
+    html_df = processor._repr_html_()
+    assert isinstance(html_df, str)
+    assert "DataFrame:</strong> Generated" in html_df
+    assert "DataFrame Preview" in html_df  # Check for preview section

From ca7d70a5660118999b711494df65f95f9225d61d Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 11:52:24 -0400
Subject: [PATCH 02/23] Add notebook

---
 notebooks/test_statescript_parsing.ipynb | 4464 ++++++++++++++++++++++
 1 file changed, 4464 insertions(+)
 create mode 100644 notebooks/test_statescript_parsing.ipynb

diff --git a/notebooks/test_statescript_parsing.ipynb b/notebooks/test_statescript_parsing.ipynb
new file mode 100644
index 0000000..5c9f27c
--- /dev/null
+++ b/notebooks/test_statescript_parsing.ipynb
@@ -0,0 +1,4464 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "46fbf114",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[09:42:05][WARNING] Spyglass: Failed to load SpyglassConfig. Please set up config file.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Log content yielded no raw events.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n",
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>270</th>\n",
+       "      <td>648028 UP 2</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>648028</td>\n",
+       "      <td>648.028</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>2</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>271</th>\n",
+       "      <td>648028 2 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>648028</td>\n",
+       "      <td>648.028</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>290</th>\n",
+       "      <td>648083 lastPort = -1 to currPort = 1</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>648083</td>\n",
+       "      <td>648.083</td>\n",
+       "      <td>lastPort = -1 to currPort = 1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>292</th>\n",
+       "      <td>658285 DOWN 2</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>658285</td>\n",
+       "      <td>658.285</td>\n",
+       "      <td>DOWN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>293</th>\n",
+       "      <td>658285 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>658285</td>\n",
+       "      <td>658.285</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9098</th>\n",
+       "      <td>3925934 8 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3925934</td>\n",
+       "      <td>3925.934</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9099</th>\n",
+       "      <td>3926021 DOWN 4</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>3926021</td>\n",
+       "      <td>3926.021</td>\n",
+       "      <td>DOWN</td>\n",
+       "      <td>4</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9100</th>\n",
+       "      <td>3926021 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3926021</td>\n",
+       "      <td>3926.021</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9101</th>\n",
+       "      <td>3926086 UP 4</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>3926086</td>\n",
+       "      <td>3926.086</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>4</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9102</th>\n",
+       "      <td>3926086 8 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3926086</td>\n",
+       "      <td>3926.086</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>6241 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                      raw_line        type  trodes_timestamp  \\\n",
+       "line_num                                                                       \n",
+       "270                                648028 UP 2  ts_str_int            648028   \n",
+       "271                                 648028 2 0  ts_int_int            648028   \n",
+       "290       648083 lastPort = -1 to currPort = 1      ts_str            648083   \n",
+       "292                              658285 DOWN 2  ts_str_int            658285   \n",
+       "293                                 658285 0 0  ts_int_int            658285   \n",
+       "...                                        ...         ...               ...   \n",
+       "9098                               3925934 8 0  ts_int_int           3925934   \n",
+       "9099                            3926021 DOWN 4  ts_str_int           3926021   \n",
+       "9100                               3926021 0 0  ts_int_int           3926021   \n",
+       "9101                              3926086 UP 4  ts_str_int           3926086   \n",
+       "9102                               3926086 8 0  ts_int_int           3926086   \n",
+       "\n",
+       "          trodes_timestamp_sec                           text  value  \\\n",
+       "line_num                                                               \n",
+       "270                    648.028                             UP      2   \n",
+       "271                    648.028                           <NA>   <NA>   \n",
+       "290                    648.083  lastPort = -1 to currPort = 1   <NA>   \n",
+       "292                    658.285                           DOWN      2   \n",
+       "293                    658.285                           <NA>   <NA>   \n",
+       "...                        ...                            ...    ...   \n",
+       "9098                  3925.934                           <NA>   <NA>   \n",
+       "9099                  3926.021                           DOWN      4   \n",
+       "9100                  3926.021                           <NA>   <NA>   \n",
+       "9101                  3926.086                             UP      4   \n",
+       "9102                  3926.086                           <NA>   <NA>   \n",
+       "\n",
+       "          active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                          \n",
+       "270                            <NA>                        <NA>   \n",
+       "271                               2                           0   \n",
+       "290                            <NA>                        <NA>   \n",
+       "292                            <NA>                        <NA>   \n",
+       "293                               0                           0   \n",
+       "...                             ...                         ...   \n",
+       "9098                              8                           0   \n",
+       "9099                           <NA>                        <NA>   \n",
+       "9100                              0                           0   \n",
+       "9101                           <NA>                        <NA>   \n",
+       "9102                              8                           0   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "270                     []                 []  \n",
+       "271                    [2]                 []  \n",
+       "290                     []                 []  \n",
+       "292                     []                 []  \n",
+       "293                     []                 []  \n",
+       "...                    ...                ...  \n",
+       "9098                   [4]                 []  \n",
+       "9099                    []                 []  \n",
+       "9100                    []                 []  \n",
+       "9101                    []                 []  \n",
+       "9102                   [4]                 []  \n",
+       "\n",
+       "[6241 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from spyglass.utils.statescript import StateScriptLogProcessor\n",
+    "\n",
+    "import pathlib\n",
+    "\n",
+    "search_dir = pathlib.Path(\"/Users/edeno/Downloads/\")\n",
+    "log_files_generator = search_dir.glob(\"*.stateScriptLog\")\n",
+    "\n",
+    "statescript_dfs = [\n",
+    "    StateScriptLogProcessor.from_file(file_path).get_events_dataframe()\n",
+    "    for file_path in log_files_generator\n",
+    "]\n",
+    "statescript_dfs[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "35bc8caf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>#&lt;Hexmaze_NoSequence.sc&gt;</td>\n",
+       "      <td>comment_or_empty</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>#% author: XS</td>\n",
+       "      <td>comment_or_empty</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>#% date: 20231224; added a reward indicator fo...</td>\n",
+       "      <td>comment_or_empty</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>#</td>\n",
+       "      <td>comment_or_empty</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>#%initialize constant vars</td>\n",
+       "      <td>comment_or_empty</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37629</th>\n",
+       "      <td>~~~</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37630</th>\n",
+       "      <td>Executing trigger function 22</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37631</th>\n",
+       "      <td>Executing trigger function 22</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37632</th>\n",
+       "      <td>Executing trigger function 22</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37633</th>\n",
+       "      <td>Executing trigger function 22</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>37634 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                   raw_line              type  \\\n",
+       "line_num                                                                        \n",
+       "0                                  #<Hexmaze_NoSequence.sc>  comment_or_empty   \n",
+       "1                                             #% author: XS  comment_or_empty   \n",
+       "2         #% date: 20231224; added a reward indicator fo...  comment_or_empty   \n",
+       "3                                                         #  comment_or_empty   \n",
+       "4                                #%initialize constant vars  comment_or_empty   \n",
+       "...                                                     ...               ...   \n",
+       "37629                                                   ~~~           unknown   \n",
+       "37630                         Executing trigger function 22           unknown   \n",
+       "37631                         Executing trigger function 22           unknown   \n",
+       "37632                         Executing trigger function 22           unknown   \n",
+       "37633                         Executing trigger function 22           unknown   \n",
+       "\n",
+       "          trodes_timestamp  trodes_timestamp_sec text  value  \\\n",
+       "line_num                                                       \n",
+       "0                     <NA>                   NaN  NaN   <NA>   \n",
+       "1                     <NA>                   NaN  NaN   <NA>   \n",
+       "2                     <NA>                   NaN  NaN   <NA>   \n",
+       "3                     <NA>                   NaN  NaN   <NA>   \n",
+       "4                     <NA>                   NaN  NaN   <NA>   \n",
+       "...                    ...                   ...  ...    ...   \n",
+       "37629                 <NA>                   NaN  NaN   <NA>   \n",
+       "37630                 <NA>                   NaN  NaN   <NA>   \n",
+       "37631                 <NA>                   NaN  NaN   <NA>   \n",
+       "37632                 <NA>                   NaN  NaN   <NA>   \n",
+       "37633                 <NA>                   NaN  NaN   <NA>   \n",
+       "\n",
+       "          active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                          \n",
+       "0                              <NA>                        <NA>   \n",
+       "1                              <NA>                        <NA>   \n",
+       "2                              <NA>                        <NA>   \n",
+       "3                              <NA>                        <NA>   \n",
+       "4                              <NA>                        <NA>   \n",
+       "...                             ...                         ...   \n",
+       "37629                          <NA>                        <NA>   \n",
+       "37630                          <NA>                        <NA>   \n",
+       "37631                          <NA>                        <NA>   \n",
+       "37632                          <NA>                        <NA>   \n",
+       "37633                          <NA>                        <NA>   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "0                       []                 []  \n",
+       "1                       []                 []  \n",
+       "2                       []                 []  \n",
+       "3                       []                 []  \n",
+       "4                       []                 []  \n",
+       "...                    ...                ...  \n",
+       "37629                   []                 []  \n",
+       "37630                   []                 []  \n",
+       "37631                   []                 []  \n",
+       "37632                   []                 []  \n",
+       "37633                   []                 []  \n",
+       "\n",
+       "[37634 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "StateScriptLogProcessor.from_file(\n",
+    "    \"/Users/edeno/Downloads/20240513_BraveLu_03_r2.stateScriptLog\"\n",
+    ").get_events_dataframe(exclude_comments_unknown=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8a3daa1c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>165</th>\n",
+       "      <td>173027 DOWN 1</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>173027</td>\n",
+       "      <td>173.027</td>\n",
+       "      <td>DOWN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>166</th>\n",
+       "      <td>173027 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>173027</td>\n",
+       "      <td>173.027</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>167</th>\n",
+       "      <td>173050 UP 1</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>173050</td>\n",
+       "      <td>173.050</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>168</th>\n",
+       "      <td>173050 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>173050</td>\n",
+       "      <td>173.050</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>169</th>\n",
+       "      <td>173658 DOWN 1</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>173658</td>\n",
+       "      <td>173.658</td>\n",
+       "      <td>DOWN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3568</th>\n",
+       "      <td>1449843 DOWN 1</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>1449843</td>\n",
+       "      <td>1449.843</td>\n",
+       "      <td>DOWN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3569</th>\n",
+       "      <td>1449843 0 8</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1449843</td>\n",
+       "      <td>1449.843</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[4]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3570</th>\n",
+       "      <td>1450010 UP 1</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>1450010</td>\n",
+       "      <td>1450.010</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3571</th>\n",
+       "      <td>1450010 1 8</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1450010</td>\n",
+       "      <td>1450.010</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[4]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3572</th>\n",
+       "      <td>1450078 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1450078</td>\n",
+       "      <td>1450.078</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2828 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                raw_line        type  trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                                                       \n",
+       "165        173027 DOWN 1  ts_str_int            173027               173.027   \n",
+       "166           173027 0 0  ts_int_int            173027               173.027   \n",
+       "167          173050 UP 1  ts_str_int            173050               173.050   \n",
+       "168           173050 1 0  ts_int_int            173050               173.050   \n",
+       "169        173658 DOWN 1  ts_str_int            173658               173.658   \n",
+       "...                  ...         ...               ...                   ...   \n",
+       "3568      1449843 DOWN 1  ts_str_int           1449843              1449.843   \n",
+       "3569         1449843 0 8  ts_int_int           1449843              1449.843   \n",
+       "3570        1450010 UP 1  ts_str_int           1450010              1450.010   \n",
+       "3571         1450010 1 8  ts_int_int           1450010              1450.010   \n",
+       "3572         1450078 1 0  ts_int_int           1450078              1450.078   \n",
+       "\n",
+       "          text  value  active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                                       \n",
+       "165       DOWN      1                       <NA>                        <NA>   \n",
+       "166       <NA>   <NA>                          0                           0   \n",
+       "167         UP      1                       <NA>                        <NA>   \n",
+       "168       <NA>   <NA>                          1                           0   \n",
+       "169       DOWN      1                       <NA>                        <NA>   \n",
+       "...        ...    ...                        ...                         ...   \n",
+       "3568      DOWN      1                       <NA>                        <NA>   \n",
+       "3569      <NA>   <NA>                          0                           8   \n",
+       "3570        UP      1                       <NA>                        <NA>   \n",
+       "3571      <NA>   <NA>                          1                           8   \n",
+       "3572      <NA>   <NA>                          1                           0   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "165                     []                 []  \n",
+       "166                     []                 []  \n",
+       "167                     []                 []  \n",
+       "168                    [1]                 []  \n",
+       "169                     []                 []  \n",
+       "...                    ...                ...  \n",
+       "3568                    []                 []  \n",
+       "3569                    []                [4]  \n",
+       "3570                    []                 []  \n",
+       "3571                   [1]                [4]  \n",
+       "3572                   [1]                 []  \n",
+       "\n",
+       "[2828 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "06e49235",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>83</th>\n",
+       "      <td>364241 UP 9</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>364241</td>\n",
+       "      <td>364.241</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>9</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>84</th>\n",
+       "      <td>364241 256 256</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>364241</td>\n",
+       "      <td>364.241</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>256</td>\n",
+       "      <td>256</td>\n",
+       "      <td>[9]</td>\n",
+       "      <td>[9]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>87</th>\n",
+       "      <td>364269 outer reward</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>364269</td>\n",
+       "      <td>364.269</td>\n",
+       "      <td>outer reward</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>89</th>\n",
+       "      <td>364269 256 2304</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>364269</td>\n",
+       "      <td>364.269</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>256</td>\n",
+       "      <td>2304</td>\n",
+       "      <td>[9]</td>\n",
+       "      <td>[9, 12]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>94</th>\n",
+       "      <td>364669 256 256</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>364669</td>\n",
+       "      <td>364.669</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>256</td>\n",
+       "      <td>256</td>\n",
+       "      <td>[9]</td>\n",
+       "      <td>[9]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7448</th>\n",
+       "      <td>1991064 contentTrialCount = 75</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>1991064</td>\n",
+       "      <td>1991.064</td>\n",
+       "      <td>75</td>\n",
+       "      <td>75</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7449</th>\n",
+       "      <td>1991064 contentReward = 75</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>1991064</td>\n",
+       "      <td>1991.064</td>\n",
+       "      <td>75</td>\n",
+       "      <td>75</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7450</th>\n",
+       "      <td>1991065 contentOuterCount = 1</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>1991065</td>\n",
+       "      <td>1991.065</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7452</th>\n",
+       "      <td>1991066 CURRENTGOAL IS 13 TASK_STATE IS 4</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>1991066</td>\n",
+       "      <td>1991.066</td>\n",
+       "      <td>CURRENTGOAL IS 13 TASK_STATE IS 4</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7454</th>\n",
+       "      <td>1991331 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1991331</td>\n",
+       "      <td>1991.331</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5953 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                           raw_line               type  \\\n",
+       "line_num                                                                 \n",
+       "83                                      364241 UP 9         ts_str_int   \n",
+       "84                                   364241 256 256         ts_int_int   \n",
+       "87                              364269 outer reward             ts_str   \n",
+       "89                                  364269 256 2304         ts_int_int   \n",
+       "94                                   364669 256 256         ts_int_int   \n",
+       "...                                             ...                ...   \n",
+       "7448                 1991064 contentTrialCount = 75  ts_str_equals_int   \n",
+       "7449                     1991064 contentReward = 75  ts_str_equals_int   \n",
+       "7450                  1991065 contentOuterCount = 1  ts_str_equals_int   \n",
+       "7452      1991066 CURRENTGOAL IS 13 TASK_STATE IS 4             ts_str   \n",
+       "7454                                   1991331 64 0         ts_int_int   \n",
+       "\n",
+       "          trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                           \n",
+       "83                  364241               364.241   \n",
+       "84                  364241               364.241   \n",
+       "87                  364269               364.269   \n",
+       "89                  364269               364.269   \n",
+       "94                  364669               364.669   \n",
+       "...                    ...                   ...   \n",
+       "7448               1991064              1991.064   \n",
+       "7449               1991064              1991.064   \n",
+       "7450               1991065              1991.065   \n",
+       "7452               1991066              1991.066   \n",
+       "7454               1991331              1991.331   \n",
+       "\n",
+       "                                       text  value  active_DIO_inputs_bitmask  \\\n",
+       "line_num                                                                        \n",
+       "83                                       UP      9                       <NA>   \n",
+       "84                                     <NA>   <NA>                        256   \n",
+       "87                             outer reward   <NA>                       <NA>   \n",
+       "89                                     <NA>   <NA>                        256   \n",
+       "94                                     <NA>   <NA>                        256   \n",
+       "...                                     ...    ...                        ...   \n",
+       "7448                                     75     75                       <NA>   \n",
+       "7449                                     75     75                       <NA>   \n",
+       "7450                                      1      1                       <NA>   \n",
+       "7452      CURRENTGOAL IS 13 TASK_STATE IS 4   <NA>                       <NA>   \n",
+       "7454                                   <NA>   <NA>                         64   \n",
+       "\n",
+       "          active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                                                   \n",
+       "83                              <NA>                []                 []  \n",
+       "84                               256               [9]                [9]  \n",
+       "87                              <NA>                []                 []  \n",
+       "89                              2304               [9]            [9, 12]  \n",
+       "94                               256               [9]                [9]  \n",
+       "...                              ...               ...                ...  \n",
+       "7448                            <NA>                []                 []  \n",
+       "7449                            <NA>                []                 []  \n",
+       "7450                            <NA>                []                 []  \n",
+       "7452                            <NA>                []                 []  \n",
+       "7454                               0               [7]                 []  \n",
+       "\n",
+       "[5953 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6e663e37",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>288</th>\n",
+       "      <td>322450 UP 4</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>322450</td>\n",
+       "      <td>322.450</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>4</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>289</th>\n",
+       "      <td>322450 8 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>322450</td>\n",
+       "      <td>322.450</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>310</th>\n",
+       "      <td>322500 8 262144</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>322500</td>\n",
+       "      <td>322.500</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>8</td>\n",
+       "      <td>262144</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[19]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>315</th>\n",
+       "      <td>322510 lastPort = -1 to currPort = 2</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>322510</td>\n",
+       "      <td>322.510</td>\n",
+       "      <td>lastPort = -1 to currPort = 2</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>318</th>\n",
+       "      <td>322634 8 262208</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>322634</td>\n",
+       "      <td>322.634</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>8</td>\n",
+       "      <td>262208</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[7, 19]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37624</th>\n",
+       "      <td>3357820 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3357820</td>\n",
+       "      <td>3357.820</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37625</th>\n",
+       "      <td>3357823 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3357823</td>\n",
+       "      <td>3357.823</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37626</th>\n",
+       "      <td>3357825 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3357825</td>\n",
+       "      <td>3357.825</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37627</th>\n",
+       "      <td>3358882 RESETSTIM</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>3358882</td>\n",
+       "      <td>3358.882</td>\n",
+       "      <td>RESETSTIM</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37628</th>\n",
+       "      <td>3358882 ifDelay = 1</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>3358882</td>\n",
+       "      <td>3358.882</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>34144 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                      raw_line               type  \\\n",
+       "line_num                                                            \n",
+       "288                                322450 UP 4         ts_str_int   \n",
+       "289                                 322450 8 0         ts_int_int   \n",
+       "310                            322500 8 262144         ts_int_int   \n",
+       "315       322510 lastPort = -1 to currPort = 2             ts_str   \n",
+       "318                            322634 8 262208         ts_int_int   \n",
+       "...                                        ...                ...   \n",
+       "37624                              3357820 0 0         ts_int_int   \n",
+       "37625                             3357823 0 64         ts_int_int   \n",
+       "37626                              3357825 0 0         ts_int_int   \n",
+       "37627                        3358882 RESETSTIM             ts_str   \n",
+       "37628                      3358882 ifDelay = 1  ts_str_equals_int   \n",
+       "\n",
+       "          trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                           \n",
+       "288                 322450               322.450   \n",
+       "289                 322450               322.450   \n",
+       "310                 322500               322.500   \n",
+       "315                 322510               322.510   \n",
+       "318                 322634               322.634   \n",
+       "...                    ...                   ...   \n",
+       "37624              3357820              3357.820   \n",
+       "37625              3357823              3357.823   \n",
+       "37626              3357825              3357.825   \n",
+       "37627              3358882              3358.882   \n",
+       "37628              3358882              3358.882   \n",
+       "\n",
+       "                                   text  value  active_DIO_inputs_bitmask  \\\n",
+       "line_num                                                                    \n",
+       "288                                  UP      4                       <NA>   \n",
+       "289                                <NA>   <NA>                          8   \n",
+       "310                                <NA>   <NA>                          8   \n",
+       "315       lastPort = -1 to currPort = 2   <NA>                       <NA>   \n",
+       "318                                <NA>   <NA>                          8   \n",
+       "...                                 ...    ...                        ...   \n",
+       "37624                              <NA>   <NA>                          0   \n",
+       "37625                              <NA>   <NA>                          0   \n",
+       "37626                              <NA>   <NA>                          0   \n",
+       "37627                         RESETSTIM   <NA>                       <NA>   \n",
+       "37628                                 1      1                       <NA>   \n",
+       "\n",
+       "          active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                                                   \n",
+       "288                             <NA>                []                 []  \n",
+       "289                                0               [4]                 []  \n",
+       "310                           262144               [4]               [19]  \n",
+       "315                             <NA>                []                 []  \n",
+       "318                           262208               [4]            [7, 19]  \n",
+       "...                              ...               ...                ...  \n",
+       "37624                              0                []                 []  \n",
+       "37625                             64                []                [7]  \n",
+       "37626                              0                []                 []  \n",
+       "37627                           <NA>                []                 []  \n",
+       "37628                           <NA>                []                 []  \n",
+       "\n",
+       "[34144 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "de34f501",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>287</th>\n",
+       "      <td>4236600 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4236600</td>\n",
+       "      <td>4236.600</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>288</th>\n",
+       "      <td>4239693 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4239693</td>\n",
+       "      <td>4239.693</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>289</th>\n",
+       "      <td>4242288 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4242288</td>\n",
+       "      <td>4242.288</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>290</th>\n",
+       "      <td>4242749 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4242749</td>\n",
+       "      <td>4242.749</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>291</th>\n",
+       "      <td>4243151 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4243151</td>\n",
+       "      <td>4243.151</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>292</th>\n",
+       "      <td>4244648 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4244648</td>\n",
+       "      <td>4244.648</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>293</th>\n",
+       "      <td>4313683 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4313683</td>\n",
+       "      <td>4313.683</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>294</th>\n",
+       "      <td>4314756 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4314756</td>\n",
+       "      <td>4314.756</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>295</th>\n",
+       "      <td>4660546 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4660546</td>\n",
+       "      <td>4660.546</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>296</th>\n",
+       "      <td>4661064 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4661064</td>\n",
+       "      <td>4661.064</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>297</th>\n",
+       "      <td>4661360 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4661360</td>\n",
+       "      <td>4661.360</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>298</th>\n",
+       "      <td>4661565 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4661565</td>\n",
+       "      <td>4661.565</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299</th>\n",
+       "      <td>4666057 8 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4666057</td>\n",
+       "      <td>4666.057</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>300</th>\n",
+       "      <td>4666539 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4666539</td>\n",
+       "      <td>4666.539</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>301</th>\n",
+       "      <td>4667439 8 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4667439</td>\n",
+       "      <td>4667.439</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>302</th>\n",
+       "      <td>4668457 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4668457</td>\n",
+       "      <td>4668.457</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>303</th>\n",
+       "      <td>4669200 8 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4669200</td>\n",
+       "      <td>4669.200</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[4]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>304</th>\n",
+       "      <td>4669481 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4669481</td>\n",
+       "      <td>4669.481</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>305</th>\n",
+       "      <td>4675049 2 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4675049</td>\n",
+       "      <td>4675.049</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>306</th>\n",
+       "      <td>4675275 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4675275</td>\n",
+       "      <td>4675.275</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>307</th>\n",
+       "      <td>4675302 2 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4675302</td>\n",
+       "      <td>4675.302</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>308</th>\n",
+       "      <td>4675318 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4675318</td>\n",
+       "      <td>4675.318</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>309</th>\n",
+       "      <td>4675615 2 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4675615</td>\n",
+       "      <td>4675.615</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>310</th>\n",
+       "      <td>4676096 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>4676096</td>\n",
+       "      <td>4676.096</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             raw_line        type  trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                                                    \n",
+       "287       4236600 1 0  ts_int_int           4236600              4236.600   \n",
+       "288       4239693 0 0  ts_int_int           4239693              4239.693   \n",
+       "289       4242288 1 0  ts_int_int           4242288              4242.288   \n",
+       "290       4242749 0 0  ts_int_int           4242749              4242.749   \n",
+       "291       4243151 1 0  ts_int_int           4243151              4243.151   \n",
+       "292       4244648 0 0  ts_int_int           4244648              4244.648   \n",
+       "293       4313683 1 0  ts_int_int           4313683              4313.683   \n",
+       "294       4314756 0 0  ts_int_int           4314756              4314.756   \n",
+       "295       4660546 1 0  ts_int_int           4660546              4660.546   \n",
+       "296       4661064 0 0  ts_int_int           4661064              4661.064   \n",
+       "297       4661360 1 0  ts_int_int           4661360              4661.360   \n",
+       "298       4661565 0 0  ts_int_int           4661565              4661.565   \n",
+       "299       4666057 8 0  ts_int_int           4666057              4666.057   \n",
+       "300       4666539 0 0  ts_int_int           4666539              4666.539   \n",
+       "301       4667439 8 0  ts_int_int           4667439              4667.439   \n",
+       "302       4668457 0 0  ts_int_int           4668457              4668.457   \n",
+       "303       4669200 8 0  ts_int_int           4669200              4669.200   \n",
+       "304       4669481 0 0  ts_int_int           4669481              4669.481   \n",
+       "305       4675049 2 0  ts_int_int           4675049              4675.049   \n",
+       "306       4675275 0 0  ts_int_int           4675275              4675.275   \n",
+       "307       4675302 2 0  ts_int_int           4675302              4675.302   \n",
+       "308       4675318 0 0  ts_int_int           4675318              4675.318   \n",
+       "309       4675615 2 0  ts_int_int           4675615              4675.615   \n",
+       "310       4676096 0 0  ts_int_int           4676096              4676.096   \n",
+       "\n",
+       "          active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                          \n",
+       "287                               1                           0   \n",
+       "288                               0                           0   \n",
+       "289                               1                           0   \n",
+       "290                               0                           0   \n",
+       "291                               1                           0   \n",
+       "292                               0                           0   \n",
+       "293                               1                           0   \n",
+       "294                               0                           0   \n",
+       "295                               1                           0   \n",
+       "296                               0                           0   \n",
+       "297                               1                           0   \n",
+       "298                               0                           0   \n",
+       "299                               8                           0   \n",
+       "300                               0                           0   \n",
+       "301                               8                           0   \n",
+       "302                               0                           0   \n",
+       "303                               8                           0   \n",
+       "304                               0                           0   \n",
+       "305                               2                           0   \n",
+       "306                               0                           0   \n",
+       "307                               2                           0   \n",
+       "308                               0                           0   \n",
+       "309                               2                           0   \n",
+       "310                               0                           0   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "287                    [1]                 []  \n",
+       "288                     []                 []  \n",
+       "289                    [1]                 []  \n",
+       "290                     []                 []  \n",
+       "291                    [1]                 []  \n",
+       "292                     []                 []  \n",
+       "293                    [1]                 []  \n",
+       "294                     []                 []  \n",
+       "295                    [1]                 []  \n",
+       "296                     []                 []  \n",
+       "297                    [1]                 []  \n",
+       "298                     []                 []  \n",
+       "299                    [4]                 []  \n",
+       "300                     []                 []  \n",
+       "301                    [4]                 []  \n",
+       "302                     []                 []  \n",
+       "303                    [4]                 []  \n",
+       "304                     []                 []  \n",
+       "305                    [2]                 []  \n",
+       "306                     []                 []  \n",
+       "307                    [2]                 []  \n",
+       "308                     []                 []  \n",
+       "309                    [2]                 []  \n",
+       "310                     []                 []  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "27c9f114",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>80</th>\n",
+       "      <td>3853607 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853607</td>\n",
+       "      <td>3853.607</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>81</th>\n",
+       "      <td>3853630 128 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853630</td>\n",
+       "      <td>3853.630</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>128</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[8]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>82</th>\n",
+       "      <td>3853785 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853785</td>\n",
+       "      <td>3853.785</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>83</th>\n",
+       "      <td>3853796 128 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853796</td>\n",
+       "      <td>3853.796</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>128</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[8]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>84</th>\n",
+       "      <td>3854144 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3854144</td>\n",
+       "      <td>3854.144</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6684</th>\n",
+       "      <td>5023394 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5023394</td>\n",
+       "      <td>5023.394</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6685</th>\n",
+       "      <td>5026015 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026015</td>\n",
+       "      <td>5026.015</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6686</th>\n",
+       "      <td>5026079 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026079</td>\n",
+       "      <td>5026.079</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6687</th>\n",
+       "      <td>5026170 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026170</td>\n",
+       "      <td>5026.170</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6688</th>\n",
+       "      <td>5026201 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026201</td>\n",
+       "      <td>5026.201</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>6418 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                raw_line        type  trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                                                       \n",
+       "80          3853607 0 64  ts_int_int           3853607              3853.607   \n",
+       "81        3853630 128 64  ts_int_int           3853630              3853.630   \n",
+       "82          3853785 0 64  ts_int_int           3853785              3853.785   \n",
+       "83        3853796 128 64  ts_int_int           3853796              3853.796   \n",
+       "84          3854144 0 64  ts_int_int           3854144              3854.144   \n",
+       "...                  ...         ...               ...                   ...   \n",
+       "6684        5023394 64 0  ts_int_int           5023394              5023.394   \n",
+       "6685         5026015 0 0  ts_int_int           5026015              5026.015   \n",
+       "6686        5026079 64 0  ts_int_int           5026079              5026.079   \n",
+       "6687         5026170 0 0  ts_int_int           5026170              5026.170   \n",
+       "6688        5026201 64 0  ts_int_int           5026201              5026.201   \n",
+       "\n",
+       "         text  value  active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                                      \n",
+       "80        NaN   <NA>                          0                          64   \n",
+       "81        NaN   <NA>                        128                          64   \n",
+       "82        NaN   <NA>                          0                          64   \n",
+       "83        NaN   <NA>                        128                          64   \n",
+       "84        NaN   <NA>                          0                          64   \n",
+       "...       ...    ...                        ...                         ...   \n",
+       "6684      NaN   <NA>                         64                           0   \n",
+       "6685      NaN   <NA>                          0                           0   \n",
+       "6686      NaN   <NA>                         64                           0   \n",
+       "6687      NaN   <NA>                          0                           0   \n",
+       "6688      NaN   <NA>                         64                           0   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "80                      []                [7]  \n",
+       "81                     [8]                [7]  \n",
+       "82                      []                [7]  \n",
+       "83                     [8]                [7]  \n",
+       "84                      []                [7]  \n",
+       "...                    ...                ...  \n",
+       "6684                   [7]                 []  \n",
+       "6685                    []                 []  \n",
+       "6686                   [7]                 []  \n",
+       "6687                    []                 []  \n",
+       "6688                   [7]                 []  \n",
+       "\n",
+       "[6418 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "860793dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>108023 0 131072</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>108023</td>\n",
+       "      <td>108.023</td>\n",
+       "      <td>0</td>\n",
+       "      <td>131072</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[18]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>108024 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>108024</td>\n",
+       "      <td>108.024</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>108188 0 131072</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>108188</td>\n",
+       "      <td>108.188</td>\n",
+       "      <td>0</td>\n",
+       "      <td>131072</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[18]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>108189 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>108189</td>\n",
+       "      <td>108.189</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>108353 0 131072</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>108353</td>\n",
+       "      <td>108.353</td>\n",
+       "      <td>0</td>\n",
+       "      <td>131072</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[18]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4015</th>\n",
+       "      <td>1048180 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1048180</td>\n",
+       "      <td>1048.180</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4016</th>\n",
+       "      <td>1048344 0 131072</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1048344</td>\n",
+       "      <td>1048.344</td>\n",
+       "      <td>0</td>\n",
+       "      <td>131072</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[18]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4017</th>\n",
+       "      <td>1048345 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1048345</td>\n",
+       "      <td>1048.345</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4018</th>\n",
+       "      <td>1048509 0 131072</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1048509</td>\n",
+       "      <td>1048.509</td>\n",
+       "      <td>0</td>\n",
+       "      <td>131072</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[18]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4019</th>\n",
+       "      <td>1048510 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>1048510</td>\n",
+       "      <td>1048.510</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4000 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  raw_line        type  trodes_timestamp  \\\n",
+       "line_num                                                   \n",
+       "1          108023 0 131072  ts_int_int            108023   \n",
+       "2               108024 0 0  ts_int_int            108024   \n",
+       "3          108188 0 131072  ts_int_int            108188   \n",
+       "4               108189 0 0  ts_int_int            108189   \n",
+       "5          108353 0 131072  ts_int_int            108353   \n",
+       "...                    ...         ...               ...   \n",
+       "4015           1048180 0 0  ts_int_int           1048180   \n",
+       "4016      1048344 0 131072  ts_int_int           1048344   \n",
+       "4017           1048345 0 0  ts_int_int           1048345   \n",
+       "4018      1048509 0 131072  ts_int_int           1048509   \n",
+       "4019           1048510 0 0  ts_int_int           1048510   \n",
+       "\n",
+       "          trodes_timestamp_sec  active_DIO_inputs_bitmask  \\\n",
+       "line_num                                                    \n",
+       "1                      108.023                          0   \n",
+       "2                      108.024                          0   \n",
+       "3                      108.188                          0   \n",
+       "4                      108.189                          0   \n",
+       "5                      108.353                          0   \n",
+       "...                        ...                        ...   \n",
+       "4015                  1048.180                          0   \n",
+       "4016                  1048.344                          0   \n",
+       "4017                  1048.345                          0   \n",
+       "4018                  1048.509                          0   \n",
+       "4019                  1048.510                          0   \n",
+       "\n",
+       "          active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                                                   \n",
+       "1                             131072                []               [18]  \n",
+       "2                                  0                []                 []  \n",
+       "3                             131072                []               [18]  \n",
+       "4                                  0                []                 []  \n",
+       "5                             131072                []               [18]  \n",
+       "...                              ...               ...                ...  \n",
+       "4015                               0                []                 []  \n",
+       "4016                          131072                []               [18]  \n",
+       "4017                               0                []                 []  \n",
+       "4018                          131072                []               [18]  \n",
+       "4019                               0                []                 []  \n",
+       "\n",
+       "[4000 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[6]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a0f696c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['first poke',\n",
+       " 'PROXON',\n",
+       " 'UP',\n",
+       " <NA>,\n",
+       " 'DOWN',\n",
+       " 'second pokes',\n",
+       " 'PROXOFF',\n",
+       " 'UPIND_0',\n",
+       " 'pump on',\n",
+       " 'home reward',\n",
+       " '1',\n",
+       " '0',\n",
+       " '10',\n",
+       " '3',\n",
+       " 'poke during proximity',\n",
+       " 'UPIND_4',\n",
+       " '2',\n",
+       " '8',\n",
+       " '4',\n",
+       " 'UPIND_3',\n",
+       " 'LOCKOUT',\n",
+       " 'LOCKEND',\n",
+       " '6',\n",
+       " 'UPIND_1',\n",
+       " '5',\n",
+       " '12',\n",
+       " '7',\n",
+       " 'UPIND_2',\n",
+       " '9',\n",
+       " '14',\n",
+       " 'outer reward',\n",
+       " '11',\n",
+       " '13',\n",
+       " '15',\n",
+       " '16',\n",
+       " '17',\n",
+       " '18',\n",
+       " '19',\n",
+       " '20',\n",
+       " '21',\n",
+       " '22',\n",
+       " '23',\n",
+       " '24',\n",
+       " '25',\n",
+       " '26',\n",
+       " '27',\n",
+       " '28',\n",
+       " '29',\n",
+       " '30',\n",
+       " '31',\n",
+       " 'poke during lock period',\n",
+       " '33',\n",
+       " '34',\n",
+       " '35',\n",
+       " '36',\n",
+       " '37',\n",
+       " '38',\n",
+       " '39',\n",
+       " '40',\n",
+       " '41',\n",
+       " '42',\n",
+       " '43',\n",
+       " '44',\n",
+       " '46',\n",
+       " '47',\n",
+       " '48',\n",
+       " '49',\n",
+       " '50',\n",
+       " '51',\n",
+       " '52',\n",
+       " '53',\n",
+       " '54',\n",
+       " '55',\n",
+       " '56',\n",
+       " '57',\n",
+       " '58',\n",
+       " '59',\n",
+       " '60',\n",
+       " '61',\n",
+       " '32',\n",
+       " '62',\n",
+       " '63',\n",
+       " '64',\n",
+       " '65',\n",
+       " '66',\n",
+       " '67',\n",
+       " '68',\n",
+       " '69',\n",
+       " '70',\n",
+       " '71',\n",
+       " '72',\n",
+       " '73',\n",
+       " '74',\n",
+       " '75',\n",
+       " '76',\n",
+       " '77',\n",
+       " '78',\n",
+       " '79',\n",
+       " '80',\n",
+       " '81',\n",
+       " '82',\n",
+       " '83',\n",
+       " '84',\n",
+       " '85',\n",
+       " '86',\n",
+       " '87',\n",
+       " '45',\n",
+       " '88',\n",
+       " '89',\n",
+       " '90',\n",
+       " '91',\n",
+       " '92',\n",
+       " '93',\n",
+       " '94',\n",
+       " '95',\n",
+       " '96',\n",
+       " '97',\n",
+       " '98',\n",
+       " '99',\n",
+       " '100',\n",
+       " '101',\n",
+       " '102',\n",
+       " '103',\n",
+       " '104',\n",
+       " '105',\n",
+       " '106',\n",
+       " '107',\n",
+       " '108',\n",
+       " '109',\n",
+       " '110',\n",
+       " '111',\n",
+       " '112',\n",
+       " '113',\n",
+       " '114',\n",
+       " '115',\n",
+       " '116',\n",
+       " '117',\n",
+       " '118',\n",
+       " '119',\n",
+       " '120',\n",
+       " '121',\n",
+       " '122',\n",
+       " '123',\n",
+       " '124',\n",
+       " '125',\n",
+       " '126',\n",
+       " '127',\n",
+       " '128',\n",
+       " '129',\n",
+       " '130',\n",
+       " '131',\n",
+       " '132',\n",
+       " '133',\n",
+       " '134',\n",
+       " '135',\n",
+       " '136',\n",
+       " '137',\n",
+       " '138',\n",
+       " '139',\n",
+       " '140',\n",
+       " '141',\n",
+       " '142',\n",
+       " '143',\n",
+       " '144',\n",
+       " '145',\n",
+       " '146',\n",
+       " '147',\n",
+       " '148',\n",
+       " '149',\n",
+       " '150',\n",
+       " '151',\n",
+       " '152',\n",
+       " '153',\n",
+       " '154',\n",
+       " '155',\n",
+       " '156',\n",
+       " '157',\n",
+       " '158',\n",
+       " '159',\n",
+       " 'EndSession']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[8].text.unique().tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "e8108415",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: []\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[7]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5055cb37",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>487</th>\n",
+       "      <td>6935765 first poke</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>6935765</td>\n",
+       "      <td>6935.765</td>\n",
+       "      <td>first poke</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>488</th>\n",
+       "      <td>6935766 PROXON</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>6935766</td>\n",
+       "      <td>6935.766</td>\n",
+       "      <td>PROXON</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>489</th>\n",
+       "      <td>6935766 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6935766</td>\n",
+       "      <td>6935.766</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>490</th>\n",
+       "      <td>6935765 512 512</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>6935765</td>\n",
+       "      <td>6935.765</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>512</td>\n",
+       "      <td>512</td>\n",
+       "      <td>[10]</td>\n",
+       "      <td>[10]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>491</th>\n",
+       "      <td>6935778 DOWN 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6935778</td>\n",
+       "      <td>6935.778</td>\n",
+       "      <td>DOWN</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25837</th>\n",
+       "      <td>9104866 mostRecentOuterWell_ind = 4</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>9104866</td>\n",
+       "      <td>9104.866</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25838</th>\n",
+       "      <td>9104866 mostRecentRewardOuterWell_ind = 2</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>9104866</td>\n",
+       "      <td>9104.866</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25842</th>\n",
+       "      <td>9104890 UPIND_4</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>9104890</td>\n",
+       "      <td>9104.890</td>\n",
+       "      <td>UPIND_4</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25849</th>\n",
+       "      <td>9104908 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>9104908</td>\n",
+       "      <td>9104.908</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25850</th>\n",
+       "      <td>9105380 UPIND_0</td>\n",
+       "      <td>ts_str</td>\n",
+       "      <td>9105380</td>\n",
+       "      <td>9105.380</td>\n",
+       "      <td>UPIND_0</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>19975 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                           raw_line               type  \\\n",
+       "line_num                                                                 \n",
+       "487                              6935765 first poke             ts_str   \n",
+       "488                                  6935766 PROXON             ts_str   \n",
+       "489                                   6935766 UP 10         ts_str_int   \n",
+       "490                                 6935765 512 512         ts_int_int   \n",
+       "491                                 6935778 DOWN 10         ts_str_int   \n",
+       "...                                             ...                ...   \n",
+       "25837           9104866 mostRecentOuterWell_ind = 4  ts_str_equals_int   \n",
+       "25838     9104866 mostRecentRewardOuterWell_ind = 2  ts_str_equals_int   \n",
+       "25842                               9104890 UPIND_4             ts_str   \n",
+       "25849                                   9104908 0 0         ts_int_int   \n",
+       "25850                               9105380 UPIND_0             ts_str   \n",
+       "\n",
+       "          trodes_timestamp  trodes_timestamp_sec        text  value  \\\n",
+       "line_num                                                              \n",
+       "487                6935765              6935.765  first poke   <NA>   \n",
+       "488                6935766              6935.766      PROXON   <NA>   \n",
+       "489                6935766              6935.766          UP     10   \n",
+       "490                6935765              6935.765        <NA>   <NA>   \n",
+       "491                6935778              6935.778        DOWN     10   \n",
+       "...                    ...                   ...         ...    ...   \n",
+       "25837              9104866              9104.866           4      4   \n",
+       "25838              9104866              9104.866           2      2   \n",
+       "25842              9104890              9104.890     UPIND_4   <NA>   \n",
+       "25849              9104908              9104.908        <NA>   <NA>   \n",
+       "25850              9105380              9105.380     UPIND_0   <NA>   \n",
+       "\n",
+       "          active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                          \n",
+       "487                            <NA>                        <NA>   \n",
+       "488                            <NA>                        <NA>   \n",
+       "489                            <NA>                        <NA>   \n",
+       "490                             512                         512   \n",
+       "491                            <NA>                        <NA>   \n",
+       "...                             ...                         ...   \n",
+       "25837                          <NA>                        <NA>   \n",
+       "25838                          <NA>                        <NA>   \n",
+       "25842                          <NA>                        <NA>   \n",
+       "25849                             0                           0   \n",
+       "25850                          <NA>                        <NA>   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "487                     []                 []  \n",
+       "488                     []                 []  \n",
+       "489                     []                 []  \n",
+       "490                   [10]               [10]  \n",
+       "491                     []                 []  \n",
+       "...                    ...                ...  \n",
+       "25837                   []                 []  \n",
+       "25838                   []                 []  \n",
+       "25842                   []                 []  \n",
+       "25849                   []                 []  \n",
+       "25850                   []                 []  \n",
+       "\n",
+       "[19975 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[8]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "4cb98d40",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>489</th>\n",
+       "      <td>6935766 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6935766</td>\n",
+       "      <td>6935.766</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>494</th>\n",
+       "      <td>6935987 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6935987</td>\n",
+       "      <td>6935.987</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>6936059 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6936059</td>\n",
+       "      <td>6936.059</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>541</th>\n",
+       "      <td>6937614 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6937614</td>\n",
+       "      <td>6937.614</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>550</th>\n",
+       "      <td>6937781 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>6937781</td>\n",
+       "      <td>6937.781</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25764</th>\n",
+       "      <td>9063659 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>9063659</td>\n",
+       "      <td>9063.659</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25775</th>\n",
+       "      <td>9065869 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>9065869</td>\n",
+       "      <td>9065.869</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25786</th>\n",
+       "      <td>9071584 UP 8</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>9071584</td>\n",
+       "      <td>9071.584</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>8</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25817</th>\n",
+       "      <td>9073200 UP 8</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>9073200</td>\n",
+       "      <td>9073.200</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>8</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25824</th>\n",
+       "      <td>9082033 UP 10</td>\n",
+       "      <td>ts_str_int</td>\n",
+       "      <td>9082033</td>\n",
+       "      <td>9082.033</td>\n",
+       "      <td>UP</td>\n",
+       "      <td>10</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2355 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               raw_line        type  trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                                                      \n",
+       "489       6935766 UP 10  ts_str_int           6935766              6935.766   \n",
+       "494       6935987 UP 10  ts_str_int           6935987              6935.987   \n",
+       "499       6936059 UP 10  ts_str_int           6936059              6936.059   \n",
+       "541       6937614 UP 10  ts_str_int           6937614              6937.614   \n",
+       "550       6937781 UP 10  ts_str_int           6937781              6937.781   \n",
+       "...                 ...         ...               ...                   ...   \n",
+       "25764     9063659 UP 10  ts_str_int           9063659              9063.659   \n",
+       "25775     9065869 UP 10  ts_str_int           9065869              9065.869   \n",
+       "25786      9071584 UP 8  ts_str_int           9071584              9071.584   \n",
+       "25817      9073200 UP 8  ts_str_int           9073200              9073.200   \n",
+       "25824     9082033 UP 10  ts_str_int           9082033              9082.033   \n",
+       "\n",
+       "         text  value  active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                                      \n",
+       "489        UP     10                       <NA>                        <NA>   \n",
+       "494        UP     10                       <NA>                        <NA>   \n",
+       "499        UP     10                       <NA>                        <NA>   \n",
+       "541        UP     10                       <NA>                        <NA>   \n",
+       "550        UP     10                       <NA>                        <NA>   \n",
+       "...       ...    ...                        ...                         ...   \n",
+       "25764      UP     10                       <NA>                        <NA>   \n",
+       "25775      UP     10                       <NA>                        <NA>   \n",
+       "25786      UP      8                       <NA>                        <NA>   \n",
+       "25817      UP      8                       <NA>                        <NA>   \n",
+       "25824      UP     10                       <NA>                        <NA>   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "489                     []                 []  \n",
+       "494                     []                 []  \n",
+       "499                     []                 []  \n",
+       "541                     []                 []  \n",
+       "550                     []                 []  \n",
+       "...                    ...                ...  \n",
+       "25764                   []                 []  \n",
+       "25775                   []                 []  \n",
+       "25786                   []                 []  \n",
+       "25817                   []                 []  \n",
+       "25824                   []                 []  \n",
+       "\n",
+       "[2355 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[8].loc[statescript_dfs[8].text == \"UP\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "3245aaaf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>2300995 rewCount = 1</td>\n",
+       "      <td>ts_str_equals_int</td>\n",
+       "      <td>2300995</td>\n",
+       "      <td>2300.995</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>48</th>\n",
+       "      <td>2300995 1 4</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>2300995</td>\n",
+       "      <td>2300.995</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[3]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49</th>\n",
+       "      <td>2301028 0 4</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>2301028</td>\n",
+       "      <td>2301.028</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[3]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50</th>\n",
+       "      <td>2301295 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>2301295</td>\n",
+       "      <td>2301.295</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>51</th>\n",
+       "      <td>2303333 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>2303333</td>\n",
+       "      <td>2303.333</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>294</th>\n",
+       "      <td>3166053 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3166053</td>\n",
+       "      <td>3166.053</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>295</th>\n",
+       "      <td>3168403 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3168403</td>\n",
+       "      <td>3168.403</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>296</th>\n",
+       "      <td>3168575 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3168575</td>\n",
+       "      <td>3168.575</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>297</th>\n",
+       "      <td>3168643 1 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3168643</td>\n",
+       "      <td>3168.643</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>298</th>\n",
+       "      <td>3168890 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3168890</td>\n",
+       "      <td>3168.890</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>252 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      raw_line               type  trodes_timestamp  \\\n",
+       "line_num                                                              \n",
+       "47        2300995 rewCount = 1  ts_str_equals_int           2300995   \n",
+       "48                 2300995 1 4         ts_int_int           2300995   \n",
+       "49                 2301028 0 4         ts_int_int           2301028   \n",
+       "50                 2301295 0 0         ts_int_int           2301295   \n",
+       "51                 2303333 1 0         ts_int_int           2303333   \n",
+       "...                        ...                ...               ...   \n",
+       "294                3166053 0 0         ts_int_int           3166053   \n",
+       "295                3168403 1 0         ts_int_int           3168403   \n",
+       "296                3168575 0 0         ts_int_int           3168575   \n",
+       "297                3168643 1 0         ts_int_int           3168643   \n",
+       "298                3168890 0 0         ts_int_int           3168890   \n",
+       "\n",
+       "          trodes_timestamp_sec  text  value  active_DIO_inputs_bitmask  \\\n",
+       "line_num                                                                 \n",
+       "47                    2300.995     1      1                       <NA>   \n",
+       "48                    2300.995  <NA>   <NA>                          1   \n",
+       "49                    2301.028  <NA>   <NA>                          0   \n",
+       "50                    2301.295  <NA>   <NA>                          0   \n",
+       "51                    2303.333  <NA>   <NA>                          1   \n",
+       "...                        ...   ...    ...                        ...   \n",
+       "294                   3166.053   NaN   <NA>                          0   \n",
+       "295                   3168.403   NaN   <NA>                          1   \n",
+       "296                   3168.575   NaN   <NA>                          0   \n",
+       "297                   3168.643   NaN   <NA>                          1   \n",
+       "298                   3168.890   NaN   <NA>                          0   \n",
+       "\n",
+       "          active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                                                   \n",
+       "47                              <NA>                []                 []  \n",
+       "48                                 4               [1]                [3]  \n",
+       "49                                 4                []                [3]  \n",
+       "50                                 0                []                 []  \n",
+       "51                                 0               [1]                 []  \n",
+       "...                              ...               ...                ...  \n",
+       "294                                0                []                 []  \n",
+       "295                                0               [1]                 []  \n",
+       "296                                0                []                 []  \n",
+       "297                                0               [1]                 []  \n",
+       "298                                0                []                 []  \n",
+       "\n",
+       "[252 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[9]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "688f3a3d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [raw_line, type, trodes_timestamp, trodes_timestamp_sec, text, value, active_DIO_inputs_bitmask, active_DIO_outputs_bitmask, active_DIO_inputs, active_DIO_outputs]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "statescript_dfs[9].loc[statescript_dfs[9].text == \"rewCount\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "a0dc407f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>line_num</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>80</th>\n",
+       "      <td>3853607 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853607</td>\n",
+       "      <td>3853.607</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>81</th>\n",
+       "      <td>3853630 128 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853630</td>\n",
+       "      <td>3853.630</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>128</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[8]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>82</th>\n",
+       "      <td>3853785 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853785</td>\n",
+       "      <td>3853.785</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>83</th>\n",
+       "      <td>3853796 128 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853796</td>\n",
+       "      <td>3853.796</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>128</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[8]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>84</th>\n",
+       "      <td>3854144 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3854144</td>\n",
+       "      <td>3854.144</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6684</th>\n",
+       "      <td>5023394 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5023394</td>\n",
+       "      <td>5023.394</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6685</th>\n",
+       "      <td>5026015 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026015</td>\n",
+       "      <td>5026.015</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6686</th>\n",
+       "      <td>5026079 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026079</td>\n",
+       "      <td>5026.079</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6687</th>\n",
+       "      <td>5026170 0 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026170</td>\n",
+       "      <td>5026.170</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6688</th>\n",
+       "      <td>5026201 64 0</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>5026201</td>\n",
+       "      <td>5026.201</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[7]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>6418 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                raw_line        type  trodes_timestamp  trodes_timestamp_sec  \\\n",
+       "line_num                                                                       \n",
+       "80          3853607 0 64  ts_int_int           3853607              3853.607   \n",
+       "81        3853630 128 64  ts_int_int           3853630              3853.630   \n",
+       "82          3853785 0 64  ts_int_int           3853785              3853.785   \n",
+       "83        3853796 128 64  ts_int_int           3853796              3853.796   \n",
+       "84          3854144 0 64  ts_int_int           3854144              3854.144   \n",
+       "...                  ...         ...               ...                   ...   \n",
+       "6684        5023394 64 0  ts_int_int           5023394              5023.394   \n",
+       "6685         5026015 0 0  ts_int_int           5026015              5026.015   \n",
+       "6686        5026079 64 0  ts_int_int           5026079              5026.079   \n",
+       "6687         5026170 0 0  ts_int_int           5026170              5026.170   \n",
+       "6688        5026201 64 0  ts_int_int           5026201              5026.201   \n",
+       "\n",
+       "         text  value  active_DIO_inputs_bitmask  active_DIO_outputs_bitmask  \\\n",
+       "line_num                                                                      \n",
+       "80        NaN   <NA>                          0                          64   \n",
+       "81        NaN   <NA>                        128                          64   \n",
+       "82        NaN   <NA>                          0                          64   \n",
+       "83        NaN   <NA>                        128                          64   \n",
+       "84        NaN   <NA>                          0                          64   \n",
+       "...       ...    ...                        ...                         ...   \n",
+       "6684      NaN   <NA>                         64                           0   \n",
+       "6685      NaN   <NA>                          0                           0   \n",
+       "6686      NaN   <NA>                         64                           0   \n",
+       "6687      NaN   <NA>                          0                           0   \n",
+       "6688      NaN   <NA>                         64                           0   \n",
+       "\n",
+       "         active_DIO_inputs active_DIO_outputs  \n",
+       "line_num                                       \n",
+       "80                      []                [7]  \n",
+       "81                     [8]                [7]  \n",
+       "82                      []                [7]  \n",
+       "83                     [8]                [7]  \n",
+       "84                      []                [7]  \n",
+       "...                    ...                ...  \n",
+       "6684                   [7]                 []  \n",
+       "6685                    []                 []  \n",
+       "6686                   [7]                 []  \n",
+       "6687                    []                 []  \n",
+       "6688                   [7]                 []  \n",
+       "\n",
+       "[6418 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "processor = StateScriptLogProcessor.from_file(\n",
+    "    \"/Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog\"\n",
+    ")\n",
+    "processor.get_events_dataframe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "7f9b4c03",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <div style=\"border: 1px solid #ccc; padding: 10px; margin: 5px; font-family: sans-serif; line-height: 1.4;\">\n",
+       "            <h4>StateScriptLogProcessor</h4>\n",
+       "            <p><strong>Source:</strong> from file: /Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog</p>\n",
+       "            <p><strong>Status:</strong> Parsed (6689 raw entries)</p>\n",
+       "            <p><strong>Time Offset:</strong> Not Calculated</p>\n",
+       "            <p><strong>DataFrame:</strong> Generated</p>\n",
+       "        <h5>DataFrame Preview (first 5 rows):</h5><table class=\"dataframe dataframe-preview\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>raw_line</th>\n",
+       "      <th>type</th>\n",
+       "      <th>trodes_timestamp</th>\n",
+       "      <th>trodes_timestamp_sec</th>\n",
+       "      <th>text</th>\n",
+       "      <th>value</th>\n",
+       "      <th>active_DIO_inputs_bitmask</th>\n",
+       "      <th>active_DIO_outputs_bitmask</th>\n",
+       "      <th>active_DIO_inputs</th>\n",
+       "      <th>active_DIO_outputs</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>3853607 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853607</td>\n",
+       "      <td>3853.607</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3853630 128 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853630</td>\n",
+       "      <td>3853.630</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>128</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[8]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3853785 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853785</td>\n",
+       "      <td>3853.785</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3853796 128 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3853796</td>\n",
+       "      <td>3853.796</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>128</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[8]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3854144 0 64</td>\n",
+       "      <td>ts_int_int</td>\n",
+       "      <td>3854144</td>\n",
+       "      <td>3854.144</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0</td>\n",
+       "      <td>64</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[7]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table></div>"
+      ],
+      "text/plain": [
+       "<StateScriptLogProcessor(source='from file: /Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog', status=parsed, raw_events=6689, no offset calculated, DataFrame generated)>"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "166a1f1d",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AssertionError",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[17], line 427\u001b[0m\n\u001b[1;32m    425\u001b[0m test_parse_ts_int_int_direct()\n\u001b[1;32m    426\u001b[0m test_parse_ts_str_int_direct()\n\u001b[0;32m--> 427\u001b[0m \u001b[43mtest_parse_ts_str_equals_int_direct\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    428\u001b[0m test_parse_ts_str_direct()\n\u001b[1;32m    429\u001b[0m test_parse_statescript_line_ts_int_int()\n",
+      "Cell \u001b[0;32mIn[17], line 129\u001b[0m, in \u001b[0;36mtest_parse_ts_str_equals_int_direct\u001b[0;34m()\u001b[0m\n\u001b[1;32m    122\u001b[0m parts \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m100078\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcounter_handlePoke\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m=\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    123\u001b[0m expected \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    124\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mts_str_equals_int\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    125\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m100078\u001b[39m,\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcounter_handlePoke\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    127\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m    128\u001b[0m }\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m parse_ts_str_equals_int(parts) \u001b[38;5;241m==\u001b[39m expected\n\u001b[1;32m    131\u001b[0m parts_multi_word \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m3610855\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrewards\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m=\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m70\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    132\u001b[0m expected_multi \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    133\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mts_str_equals_int\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    134\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m3610855\u001b[39m,\n\u001b[1;32m    135\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal rewards\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    136\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m70\u001b[39m,\n\u001b[1;32m    137\u001b[0m }\n",
+      "\u001b[0;31mAssertionError\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import pathlib\n",
+    "import tempfile\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pytest\n",
+    "\n",
+    "from spyglass.utils.statescript import (\n",
+    "    StateScriptLogProcessor,\n",
+    "    _parse_int,\n",
+    "    parse_statescript_line,\n",
+    "    parse_ts_int_int,\n",
+    "    parse_ts_str,\n",
+    "    parse_ts_str_equals_int,\n",
+    "    parse_ts_str_int,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "@pytest.fixture(scope=\"module\")\n",
+    "def sample_log_content():\n",
+    "    \"\"\"Provides sample log content.\"\"\"\n",
+    "    return \"\"\"# Test log\n",
+    "76504 0 0\n",
+    "76566 center_poke\n",
+    "76566 65536 0\n",
+    "100078 counter_handlePoke = 1\n",
+    "100078 4 0\n",
+    "100559 0 0\n",
+    "Executing this line\n",
+    "115030 center_poke\n",
+    "115030 65536 0\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "@pytest.fixture\n",
+    "def processor(sample_log_content):\n",
+    "    \"\"\"Provides a processor instance initialized with sample content.\"\"\"\n",
+    "    return StateScriptLogProcessor(sample_log_content)\n",
+    "\n",
+    "\n",
+    "@pytest.fixture(scope=\"module\")\n",
+    "def external_times():\n",
+    "    \"\"\"Provides sample external times.\"\"\"\n",
+    "    return np.array([1678886476.530, 1678886500.100, 1678886515.050])\n",
+    "\n",
+    "\n",
+    "@pytest.fixture\n",
+    "def temp_log_file(sample_log_content):\n",
+    "    \"\"\"Creates a temporary log file and yields its path.\"\"\"\n",
+    "    with tempfile.NamedTemporaryFile(\n",
+    "        mode=\"w\", delete=False, suffix=\".stateScriptLog\"\n",
+    "    ) as tmp_file:\n",
+    "        tmp_file.write(sample_log_content)\n",
+    "        tmp_file_path = tmp_file.name\n",
+    "    yield tmp_file_path\n",
+    "    os.remove(tmp_file_path)\n",
+    "\n",
+    "\n",
+    "# --- Tests for Level 1 Parsers ---\n",
+    "\n",
+    "\n",
+    "def test_parse_int():\n",
+    "    \"\"\"Test the _parse_int helper function.\"\"\"\n",
+    "    assert _parse_int(\"123\") == 123\n",
+    "    assert _parse_int(\"-45\") == -45\n",
+    "    assert _parse_int(\"0\") == 0\n",
+    "    assert _parse_int(\"abc\") is None\n",
+    "    assert _parse_int(\"12.3\") is None\n",
+    "    assert _parse_int(\"\") is None\n",
+    "    assert (\n",
+    "        _parse_int(\" 123 \") == 123\n",
+    "    )  # Should handle surrounding whitespace if not stripped before\n",
+    "\n",
+    "\n",
+    "def test_parse_ts_int_int_direct():\n",
+    "    \"\"\"Test parse_ts_int_int directly.\"\"\"\n",
+    "    parts = [\"8386500\", \"0\", \"0\"]\n",
+    "    expected = {\n",
+    "        \"type\": \"ts_int_int\",\n",
+    "        \"timestamp\": 8386500,\n",
+    "        \"value1\": 0,\n",
+    "        \"value2\": 0,\n",
+    "    }\n",
+    "    assert parse_ts_int_int(parts) == expected\n",
+    "\n",
+    "    parts_wrong_len = [\"123\", \"0\"]\n",
+    "    assert parse_ts_int_int(parts_wrong_len) is None\n",
+    "\n",
+    "    parts_not_int = [\"123\", \"abc\", \"0\"]\n",
+    "    assert parse_ts_int_int(parts_not_int) is None\n",
+    "\n",
+    "    parts_float = [\"123\", \"4.5\", \"0\"]\n",
+    "    assert parse_ts_int_int(parts_float) is None\n",
+    "\n",
+    "\n",
+    "def test_parse_ts_str_int_direct():\n",
+    "    \"\"\"Test parse_ts_str_int directly.\"\"\"\n",
+    "    parts = [\"8386500\", \"DOWN\", \"3\"]\n",
+    "    expected = {\n",
+    "        \"type\": \"ts_str_int\",\n",
+    "        \"timestamp\": 8386500,\n",
+    "        \"text\": \"DOWN\",\n",
+    "        \"value\": 3,\n",
+    "    }\n",
+    "    assert parse_ts_str_int(parts) == expected\n",
+    "\n",
+    "    parts_wrong_len = [\"123\", \"UP\"]\n",
+    "    assert parse_ts_str_int(parts_wrong_len) is None\n",
+    "\n",
+    "    parts_str_is_int = [\"123\", \"456\", \"789\"]\n",
+    "    assert (\n",
+    "        parse_ts_str_int(parts_str_is_int) is None\n",
+    "    )  # Should be handled by ts_int_int\n",
+    "\n",
+    "    parts_val_not_int = [\"123\", \"UP\", \"abc\"]\n",
+    "    assert parse_ts_str_int(parts_val_not_int) is None\n",
+    "\n",
+    "\n",
+    "def test_parse_ts_str_equals_int_direct():\n",
+    "    \"\"\"Test parse_ts_str_equals_int directly.\"\"\"\n",
+    "    parts = [\"100078\", \"counter_handlePoke\", \"=\", \"1\"]\n",
+    "    expected = {\n",
+    "        \"type\": \"ts_str_equals_int\",\n",
+    "        \"timestamp\": 100078,\n",
+    "        \"text\": \"counter_handlePoke\",\n",
+    "        \"value\": 1,\n",
+    "    }\n",
+    "    assert parse_ts_str_equals_int(parts) == expected\n",
+    "\n",
+    "    parts_multi_word = [\"3610855\", \"total\", \"rewards\", \"=\", \"70\"]\n",
+    "    expected_multi = {\n",
+    "        \"type\": \"ts_str_equals_int\",\n",
+    "        \"timestamp\": 3610855,\n",
+    "        \"text\": \"total rewards\",\n",
+    "        \"value\": 70,\n",
+    "    }\n",
+    "    assert parse_ts_str_equals_int(parts_multi_word) == expected_multi\n",
+    "\n",
+    "    parts_wrong_len = [\"123\", \"=\", \"1\"]\n",
+    "    assert parse_ts_str_equals_int(parts_wrong_len) is None\n",
+    "\n",
+    "    parts_no_equals = [\"123\", \"text\", \"1\"]\n",
+    "    assert parse_ts_str_equals_int(parts_no_equals) is None\n",
+    "\n",
+    "    parts_val_not_int = [\"123\", \"text\", \"=\", \"abc\"]\n",
+    "    assert parse_ts_str_equals_int(parts_val_not_int) is None\n",
+    "\n",
+    "\n",
+    "def test_parse_ts_str_direct():\n",
+    "    \"\"\"Test parse_ts_str directly.\"\"\"\n",
+    "    parts = [\"76566\", \"center_poke\"]\n",
+    "    expected = {\"type\": \"ts_str\", \"timestamp\": 76566, \"text\": \"center_poke\"}\n",
+    "    assert parse_ts_str(parts) == expected\n",
+    "\n",
+    "    parts_multi_word = [\n",
+    "        \"1271815\",\n",
+    "        \"lastPort\",\n",
+    "        \"=\",\n",
+    "        \"-1\",\n",
+    "        \"to\",\n",
+    "        \"currPort\",\n",
+    "        \"=\",\n",
+    "        \"2\",\n",
+    "    ]\n",
+    "    expected_multi = {\n",
+    "        \"type\": \"ts_str\",\n",
+    "        \"timestamp\": 1271815,\n",
+    "        \"text\": \"lastPort = -1 to currPort = 2\",\n",
+    "    }\n",
+    "    assert parse_ts_str(parts_multi_word) == expected_multi\n",
+    "\n",
+    "    parts_wrong_len = [\"123\"]\n",
+    "    assert parse_ts_str(parts_wrong_len) is None\n",
+    "\n",
+    "    parts_second_is_int = [\n",
+    "        \"123\",\n",
+    "        \"456\",\n",
+    "    ]  # Second part is int, should fail this parser\n",
+    "    assert parse_ts_str(parts_second_is_int) is None\n",
+    "\n",
+    "\n",
+    "# --- Tests for parse_statescript_line (Covers integration and dispatching) ---\n",
+    "\n",
+    "\n",
+    "def test_parse_statescript_line_ts_int_int():\n",
+    "    \"\"\"Test parse_statescript_line dispatching for ts_int_int.\"\"\"\n",
+    "    line = \"8386500 0 0\"\n",
+    "    parsed = parse_statescript_line(line)\n",
+    "    assert parsed[\"type\"] == \"ts_int_int\"\n",
+    "    assert parsed[\"timestamp\"] == 8386500\n",
+    "    assert parsed[\"value1\"] == 0\n",
+    "    assert parsed[\"value2\"] == 0\n",
+    "    assert parsed[\"raw_line\"] == line\n",
+    "\n",
+    "\n",
+    "def test_parse_statescript_line_ts_str_int():\n",
+    "    \"\"\"Test parse_statescript_line dispatching for ts_str_int.\"\"\"\n",
+    "    line = \"8386500 DOWN 3\"\n",
+    "    parsed = parse_statescript_line(line)\n",
+    "    assert parsed[\"type\"] == \"ts_str_int\"\n",
+    "    assert parsed[\"timestamp\"] == 8386500\n",
+    "    assert parsed[\"text\"] == \"DOWN\"\n",
+    "    assert parsed[\"value\"] == 3\n",
+    "    assert parsed[\"raw_line\"] == line\n",
+    "\n",
+    "\n",
+    "def test_parse_statescript_line_ts_str_equals_int():\n",
+    "    \"\"\"Test parse_statescript_line dispatching for ts_str_equals_int.\"\"\"\n",
+    "    line = \"100078 counter_handlePoke = 1\"\n",
+    "    parsed = parse_statescript_line(line)\n",
+    "    assert parsed[\"type\"] == \"ts_str_equals_int\"\n",
+    "    assert parsed[\"timestamp\"] == 100078\n",
+    "    assert parsed[\"text\"] == \"counter_handlePoke\"\n",
+    "    assert parsed[\"value\"] == 1\n",
+    "    assert parsed[\"raw_line\"] == line\n",
+    "\n",
+    "\n",
+    "def test_parse_statescript_line_ts_str():\n",
+    "    \"\"\"Test parse_statescript_line dispatching for ts_str.\"\"\"\n",
+    "    line = \"76566 center_poke\"\n",
+    "    parsed = parse_statescript_line(line)\n",
+    "    assert parsed[\"type\"] == \"ts_str\"\n",
+    "    assert parsed[\"timestamp\"] == 76566\n",
+    "    assert parsed[\"text\"] == \"center_poke\"\n",
+    "    assert parsed[\"raw_line\"] == line\n",
+    "\n",
+    "\n",
+    "def test_parse_statescript_line_unknown():\n",
+    "    \"\"\"Test parse_statescript_line dispatching for unknown lines.\"\"\"\n",
+    "    line = \"Executing trigger function 22\"  # No timestamp\n",
+    "    parsed = parse_statescript_line(line)\n",
+    "    assert parsed[\"type\"] == \"unknown\"\n",
+    "    assert \"timestamp\" not in parsed\n",
+    "    assert parsed[\"raw_line\"] == line\n",
+    "\n",
+    "\n",
+    "def test_parse_statescript_line_comment_empty():\n",
+    "    \"\"\"Test parse_statescript_line dispatching for comments/empty.\"\"\"\n",
+    "    line_c = \"# comment\"\n",
+    "    line_e = \"\"\n",
+    "    line_s = \"   \"\n",
+    "    assert parse_statescript_line(line_c)[\"type\"] == \"comment_or_empty\"\n",
+    "    assert parse_statescript_line(line_c)[\"raw_line\"] == line_c\n",
+    "    assert parse_statescript_line(line_e)[\"type\"] == \"comment_or_empty\"\n",
+    "    assert parse_statescript_line(line_e)[\"raw_line\"] == line_e\n",
+    "    assert parse_statescript_line(line_s)[\"type\"] == \"comment_or_empty\"\n",
+    "    assert parse_statescript_line(line_s)[\"raw_line\"] == \"\"\n",
+    "\n",
+    "\n",
+    "# --- Tests for StateScriptLogProcessor ---\n",
+    "\n",
+    "\n",
+    "def test_init_from_string(processor, sample_log_content):\n",
+    "    assert processor.log_content == sample_log_content\n",
+    "    assert processor.source_description == \"from string\"\n",
+    "    assert processor.raw_events == []\n",
+    "    assert processor.time_offset is None\n",
+    "    assert processor.processed_events_df is None\n",
+    "\n",
+    "\n",
+    "def test_init_from_file(temp_log_file, sample_log_content):\n",
+    "    processor_file = StateScriptLogProcessor.from_file(temp_log_file)\n",
+    "    assert processor_file.log_content == sample_log_content\n",
+    "    assert processor_file.source_description.startswith(\"from file:\")\n",
+    "    assert pathlib.Path(temp_log_file).name in processor_file.source_description\n",
+    "\n",
+    "\n",
+    "def test_init_from_file_not_found():\n",
+    "    with pytest.raises(FileNotFoundError):\n",
+    "        StateScriptLogProcessor.from_file(\"non_existent_file.log\")\n",
+    "\n",
+    "\n",
+    "def test_parse_raw_events(processor):\n",
+    "    events = processor.parse_raw_events()\n",
+    "    assert isinstance(events, list)\n",
+    "    assert len(events) == 10\n",
+    "    assert events[0][\"type\"] == \"comment_or_empty\"\n",
+    "    assert events[1][\"type\"] == \"ts_int_int\"\n",
+    "    assert events[7][\"type\"] == \"unknown\"\n",
+    "    assert events[1][\"raw_line\"] == \"76504 0 0\"\n",
+    "    assert events[7][\"raw_line\"] == \"Executing this line\"\n",
+    "\n",
+    "\n",
+    "def test_find_reference_events(processor):\n",
+    "    ref_df = processor._find_reference_events(\n",
+    "        event_type=\"ts_str\", conditions={\"text\": \"center_poke\"}\n",
+    "    )\n",
+    "    assert isinstance(ref_df, pd.DataFrame)\n",
+    "    assert len(ref_df) == 2\n",
+    "    pd.testing.assert_series_equal(\n",
+    "        ref_df[\"timestamp\"],\n",
+    "        pd.Series([76566, 115030], name=\"timestamp\"),\n",
+    "        check_dtype=False,\n",
+    "    )\n",
+    "    assert \"log_timestamp_sec\" in ref_df.columns\n",
+    "\n",
+    "    ref_df_num = processor._find_reference_events(\n",
+    "        event_type=\"ts_int_int\", conditions={\"value1\": 4, \"value2\": 0}\n",
+    "    )\n",
+    "    assert len(ref_df_num) == 1\n",
+    "    assert ref_df_num[\"timestamp\"].iloc[0] == 100078\n",
+    "\n",
+    "    ref_df_none = processor._find_reference_events(\n",
+    "        event_type=\"ts_str_int\", conditions={\"text\": \"nonexistent\"}\n",
+    "    )\n",
+    "    assert ref_df_none.empty\n",
+    "\n",
+    "\n",
+    "def test_calculate_time_offset_success(processor):\n",
+    "    ext_times = np.array([1678880076.566, 1678880115.030])\n",
+    "    offset = processor.calculate_time_offset(\n",
+    "        external_reference_times=ext_times,\n",
+    "        log_event_type=\"ts_int_int\",\n",
+    "        log_event_conditions={\"value1\": 65536, \"value2\": 0},\n",
+    "        check_n_events=2,\n",
+    "    )\n",
+    "    assert offset is not None\n",
+    "    assert offset == pytest.approx(1678880000.0)\n",
+    "\n",
+    "\n",
+    "def test_calculate_time_offset_fail_not_enough_log(processor, external_times):\n",
+    "    offset = processor.calculate_time_offset(\n",
+    "        external_reference_times=external_times,\n",
+    "        log_event_type=\"ts_str_equals_int\",\n",
+    "        log_event_conditions={\"text\": \"counter_handlePoke\"},\n",
+    "        check_n_events=2,\n",
+    "    )\n",
+    "    assert offset is None\n",
+    "    assert processor.time_offset is None\n",
+    "\n",
+    "\n",
+    "def test_calculate_time_offset_fail_not_enough_external(processor):\n",
+    "    offset = processor.calculate_time_offset(\n",
+    "        external_reference_times=np.array([1678880076.566]),\n",
+    "        log_event_type=\"ts_int_int\",\n",
+    "        log_event_conditions={\"value1\": 65536, \"value2\": 0},\n",
+    "        check_n_events=2,\n",
+    "    )\n",
+    "    assert offset is None\n",
+    "    assert processor.time_offset is None\n",
+    "\n",
+    "\n",
+    "def test_get_events_dataframe_defaults(processor):\n",
+    "    \"\"\"Test default behavior: exclude comments/unknown, no offset applied yet.\"\"\"\n",
+    "    df = processor.get_events_dataframe(\n",
+    "        apply_offset=False\n",
+    "    )  # Default exclude=True\n",
+    "    assert isinstance(df, pd.DataFrame)\n",
+    "    assert len(df) == 8  # Excludes comment and unknown line\n",
+    "    assert \"raw_line\" in df.columns\n",
+    "    assert \"timestamp\" in df.columns\n",
+    "    assert \"log_timestamp_sec\" in df.columns\n",
+    "    # Check column order: time first, raw_line last\n",
+    "    expected_cols = [\n",
+    "        \"timestamp\",\n",
+    "        \"log_timestamp_sec\",\n",
+    "        \"timestamp_sync\",\n",
+    "        \"text\",\n",
+    "        \"value\",\n",
+    "        \"value1\",\n",
+    "        \"value2\",\n",
+    "        \"raw_line\",\n",
+    "        \"type\",\n",
+    "    ]\n",
+    "    actual_expected_cols = [col for col in expected_cols if col in df.columns]\n",
+    "    assert list(df.columns) == actual_expected_cols\n",
+    "    # Check content\n",
+    "    assert df[\"raw_line\"].iloc[0] == \"76504 0 0\"\n",
+    "    assert pd.isna(df[\"text\"].iloc[0])  # Should be NA where not applicable\n",
+    "    assert df[\"value1\"].iloc[0] == 0\n",
+    "\n",
+    "\n",
+    "def test_get_events_dataframe_include_all(processor):\n",
+    "    \"\"\"Test including comments and unknown lines.\"\"\"\n",
+    "    df = processor.get_events_dataframe(\n",
+    "        apply_offset=False, exclude_comments_unknown=False\n",
+    "    )\n",
+    "    assert isinstance(df, pd.DataFrame)\n",
+    "    assert len(df) == 10  # Includes comment and unknown line\n",
+    "    assert \"raw_line\" in df.columns\n",
+    "    # Check raw_line for the unknown line\n",
+    "    assert (\n",
+    "        df[\"raw_line\"].iloc[7] == \"Executing this line\"\n",
+    "    )  # Index adjusted for comment\n",
+    "    # Check that timestamp is NA or 0 for lines without one\n",
+    "    assert (\n",
+    "        pd.isna(df[\"timestamp\"].iloc[0]) or df[\"timestamp\"].iloc[0] == 0\n",
+    "    )  # Comment line\n",
+    "    assert (\n",
+    "        pd.isna(df[\"timestamp\"].iloc[7]) or df[\"timestamp\"].iloc[7] == 0\n",
+    "    )  # Unknown line\n",
+    "    # Check column order\n",
+    "    expected_cols = [\n",
+    "        \"timestamp\",\n",
+    "        \"log_timestamp_sec\",\n",
+    "        \"timestamp_sync\",\n",
+    "        \"text\",\n",
+    "        \"value\",\n",
+    "        \"value1\",\n",
+    "        \"value2\",\n",
+    "        \"raw_line\",\n",
+    "        \"type\",\n",
+    "    ]\n",
+    "    actual_expected_cols = [col for col in expected_cols if col in df.columns]\n",
+    "    assert list(df.columns) == actual_expected_cols\n",
+    "\n",
+    "\n",
+    "def test_get_events_dataframe_with_offset(processor):\n",
+    "    \"\"\"Test applying offset and column order.\"\"\"\n",
+    "    processor.time_offset = 1678880000.0\n",
+    "    df = processor.get_events_dataframe(\n",
+    "        apply_offset=True\n",
+    "    )  # Default exclude=True\n",
+    "    assert isinstance(df, pd.DataFrame)\n",
+    "    assert len(df) == 8\n",
+    "    # Check calculation\n",
+    "    expected_sync_time = (76504 / 1000.0) + 1678880000.0\n",
+    "    assert df[\"timestamp_sync\"].iloc[0] == pytest.approx(expected_sync_time)\n",
+    "    # Check NA value handling\n",
+    "    assert pd.isna(df[\"text\"].iloc[0])\n",
+    "\n",
+    "\n",
+    "test_parse_int()\n",
+    "test_parse_ts_int_int_direct()\n",
+    "test_parse_ts_str_int_direct()\n",
+    "test_parse_ts_str_equals_int_direct()\n",
+    "test_parse_ts_str_direct()\n",
+    "test_parse_statescript_line_ts_int_int()\n",
+    "test_parse_statescript_line_ts_str_int()\n",
+    "test_parse_statescript_line_ts_str_equals_int()\n",
+    "test_parse_statescript_line_ts_str()\n",
+    "test_parse_statescript_line_unknown()\n",
+    "test_parse_statescript_line_comment_empty()\n",
+    "test_parse_statescript_line_comment_empty()\n",
+    "\n",
+    "sample_log_content = \"\"\"# Test log\n",
+    "76504 0 0\n",
+    "76566 center_poke\n",
+    "76566 65536 0\n",
+    "100078 counter_handlePoke = 1\n",
+    "100078 4 0\n",
+    "100559 0 0\n",
+    "Executing this line\n",
+    "115030 center_poke\n",
+    "115030 65536 0\n",
+    "\"\"\"\n",
+    "processor = StateScriptLogProcessor(sample_log_content)\n",
+    "with tempfile.NamedTemporaryFile(\n",
+    "    mode=\"w\", delete=False, suffix=\".stateScriptLog\"\n",
+    ") as temp_log_file:\n",
+    "    temp_log_file.write(sample_log_content)\n",
+    "    temp_log_file_path = temp_log_file.name\n",
+    "\n",
+    "external_times = np.array([1678886476.530, 1678886500.100, 1678886515.050])\n",
+    "\n",
+    "test_init_from_string(processor, sample_log_content)\n",
+    "test_init_from_file(temp_log_file_path, sample_log_content)\n",
+    "test_init_from_file_not_found()\n",
+    "test_parse_raw_events(processor)\n",
+    "test_find_reference_events(processor)\n",
+    "test_calculate_time_offset_success(processor)\n",
+    "test_calculate_time_offset_fail_not_enough_log(processor, external_times)\n",
+    "test_calculate_time_offset_fail_not_enough_external(processor)\n",
+    "test_get_events_dataframe_defaults(processor)\n",
+    "test_get_events_dataframe_include_all(processor)\n",
+    "test_get_events_dataframe_with_offset(processor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30269f5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List, Optional\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "def _interpret_port_mask(\n",
+    "    port_state_value: Optional[int], max_ports: int = 32\n",
+    ") -> List[int]:\n",
+    "    \"\"\"\n",
+    "    Interprets an integer value as a bitmask representing active ports using NumPy.\n",
+    "\n",
+    "    Assumes a 1-based port numbering system (e.g., bit 0 corresponds to port 1).\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    port_state_value : Optional[int]\n",
+    "        The integer value representing the combined state of multiple ports.\n",
+    "        Handles None or pandas NA values.\n",
+    "    max_ports : int, optional\n",
+    "        The maximum port number to check (bits 0 to max_ports-1), by default 32.\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    List[int]\n",
+    "        A sorted list of 1-based port numbers that are active (bit is set).\n",
+    "        Returns an empty list if the value is 0, None, or NA.\n",
+    "\n",
+    "    Example\n",
+    "    -------\n",
+    "    >>> interpret_port_mask(9) # 1001 in binary -> Ports 1 and 4\n",
+    "    [1, 4]\n",
+    "    >>> interpret_port_mask(65536) # 2^16 -> Port 17\n",
+    "    [17]\n",
+    "    \"\"\"\n",
+    "    # Return empty list for 0, None, or pandas NA\n",
+    "    if pd.isna(port_state_value) or port_state_value == 0:\n",
+    "        return []\n",
+    "\n",
+    "    # Ensure value is treated as an integer after NA check\n",
+    "    try:\n",
+    "        port_state_int = int(port_state_value)\n",
+    "    except (ValueError, TypeError):\n",
+    "        # Should not happen if input is from Int64Dtype column after NA check,\n",
+    "        # but included for robustness if called directly with invalid input.\n",
+    "        return []\n",
+    "\n",
+    "    # Create bit masks for positions 0 to max_ports-1\n",
+    "    # E.g., [1, 2, 4, 8, ...]\n",
+    "    bit_masks = np.left_shift(1, np.arange(max_ports))\n",
+    "\n",
+    "    # Check which bits are set in the input value using bitwise AND\n",
+    "    active_bits_mask = np.bitwise_and(port_state_int, bit_masks) > 0\n",
+    "\n",
+    "    # Get the 0-based indices (bit positions) where bits are active\n",
+    "    active_indices = np.where(active_bits_mask)[0]\n",
+    "\n",
+    "    # Convert 0-based indices to 1-based port numbers and return as a list\n",
+    "    active_ports = (active_indices + 1).tolist()\n",
+    "\n",
+    "    # np.where returns sorted indices, so list is already sorted\n",
+    "    return active_ports\n",
+    "\n",
+    "\n",
+    "def add_interpreted_port_columns(\n",
+    "    events_df: pd.DataFrame,\n",
+    "    input_mask_col: str = \"value1\",\n",
+    "    output_mask_col: str = \"value2\",\n",
+    "    max_ports: int = 32,\n",
+    ") -> pd.DataFrame:\n",
+    "    \"\"\"\n",
+    "    Adds 'active_inputs' and 'active_outputs' columns to a DataFrame\n",
+    "    by interpreting bitmask columns representing port states using NumPy.\n",
+    "\n",
+    "    Operates on and returns a modified copy of the input DataFrame.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    events_df : pd.DataFrame\n",
+    "        The DataFrame containing the parsed StateScript event data.\n",
+    "    input_mask_col : str, optional\n",
+    "        The name of the column containing the input port bitmask values,\n",
+    "        by default 'value1'.\n",
+    "    output_mask_col : str, optional\n",
+    "        The name of the column containing the output port bitmask values,\n",
+    "        by default 'value2'.\n",
+    "    max_ports : int, optional\n",
+    "        The maximum port number to check for the bitmasks, by default 32.\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    pd.DataFrame\n",
+    "        A copy of the input DataFrame with 'active_inputs' and 'active_outputs'\n",
+    "        columns added (or updated if they existed). Prints warnings if specified\n",
+    "        mask columns are not found.\n",
+    "\n",
+    "    Raises\n",
+    "    ------\n",
+    "    TypeError\n",
+    "        If the input `events_df` is not a pandas DataFrame.\n",
+    "    \"\"\"\n",
+    "    if not isinstance(events_df, pd.DataFrame):\n",
+    "        raise TypeError(\"Input 'events_df' must be a pandas DataFrame.\")\n",
+    "\n",
+    "    # Work on a copy to avoid modifying the original DataFrame\n",
+    "    processed_df = events_df.copy()\n",
+    "\n",
+    "    # Interpret Input Ports\n",
+    "    if input_mask_col in processed_df.columns:\n",
+    "        # Convert column to numeric, coercing errors, then apply interpretation\n",
+    "        input_series = pd.to_numeric(\n",
+    "            processed_df[input_mask_col], errors=\"coerce\"\n",
+    "        )\n",
+    "        processed_df[\"active_inputs\"] = input_series.apply(\n",
+    "            lambda mask: _interpret_port_mask(mask, max_ports)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"Warning: Input mask column '{input_mask_col}' not found in DataFrame. Skipping 'active_inputs'.\"\n",
+    "        )\n",
+    "        # Add empty column if it doesn't exist for consistency\n",
+    "        processed_df[\"active_inputs\"] = [[] for _ in range(len(processed_df))]\n",
+    "\n",
+    "    # Interpret Output Ports\n",
+    "    if output_mask_col in processed_df.columns:\n",
+    "        output_series = pd.to_numeric(\n",
+    "            processed_df[output_mask_col], errors=\"coerce\"\n",
+    "        )\n",
+    "        processed_df[\"active_outputs\"] = output_series.apply(\n",
+    "            lambda mask: _interpret_port_mask(mask, max_ports)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\n",
+    "            f\"Warning: Output mask column '{output_mask_col}' not found in DataFrame. Skipping 'active_outputs'.\"\n",
+    "        )\n",
+    "        # Add empty column if it doesn't exist for consistency\n",
+    "        processed_df[\"active_outputs\"] = [[] for _ in range(len(processed_df))]\n",
+    "\n",
+    "    return processed_df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b94f61a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "statescript_dfs[9]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c825d3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "add_interpreted_port_columns(statescript_dfs[8])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a17a0170",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "statescript_dfs[8].groupby(\"type\").groups"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f4f09598",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "events_by_type = [\n",
+    "    group.drop(columns=[\"type\"])\n",
+    "    for _, group in statescript_dfs[8].groupby(\"type\")\n",
+    "]\n",
+    "\n",
+    "events_by_type"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83cc5cf1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "spyglass",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 4a9dd5cc38388940516086c8cab18b33cb44e81d Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 11:59:23 -0400
Subject: [PATCH 03/23] Return dataframe

---
 src/trodes_to_nwb/convert_statescript.py | 165 ++++++++++++++++-------
 1 file changed, 115 insertions(+), 50 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index e0ce567..e7bf8a7 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -1023,101 +1023,166 @@ def segment_into_trials(
         trial_start_terms: List[str],
         trial_end_terms: List[str],
         time_column: str = "timestamp_sync",
-    ) -> List[Dict[str, Any]]:
+    ) -> pd.DataFrame:
         """
-        Segments events from a StateScript log DataFrame into trials.
+        Segments events from the processed StateScript log DataFrame into trials.
+
+        Identifies trial boundaries based on the presence of specified start and end
+        terms within the 'text' column of the `processed_events_df`.
 
         Parameters
         ----------
         trial_start_terms : List[str]
             List of strings found in the 'text' column that mark the start of a trial.
+            The event containing the start term *is* the start of the trial.
         trial_end_terms : List[str]
             List of strings found in the 'text' column that mark the end of a trial.
+            The event containing the end term *is* the end of the trial.
             Can overlap with trial_start_terms.
         time_column : str, optional
-            The name of the column to use for time ranges ('timestamp_sync' or
-            'trodes_timestamp_sec'), by default 'timestamp_sync'.
+            The name of the time column in `processed_events_df` to use for
+            reporting trial start and end times. Common choices are 'timestamp_sync'
+            (if offset calculated) or 'trodes_timestamp_sec'. Defaults to 'timestamp_sync'.
 
         Returns
         -------
-        List[Dict[str, Any]]
-            A list where each dictionary represents a trial. Each trial dictionary
-            contains at least 'start_time' and 'end_time'. Further analysis
-            (like finding input/output changes within the trial) would typically
-            be done separately using these time ranges to filter events_df.
+        pd.DataFrame
+            A DataFrame where each row represents a detected trial. Columns include:
+            - 'start_time': The timestamp (from `time_column`) of the event marking the trial start.
+            - 'stop_time': The timestamp (from `time_column`) of the event marking the trial end.
+            - 'status': String indicating if the trial was 'complete' or 'incomplete'
+                      (if the log ended before an end term was found).
+            Returns an empty DataFrame if no trials are found or if the required
+            columns ('text', `time_column`) are missing from `processed_events_df`.
 
         Notes
         -----
-        - This implementation assumes trials are defined by text messages.
-        - It handles cases where start/end terms overlap.
+        - Requires `processed_events_df` to be generated first (e.g., by calling
+          `get_events_dataframe`). If it's None, this method will attempt to generate it
+          with default settings (apply_offset=True, exclude_comments_unknown=True).
+        - Assumes trials are sequential and non-overlapping based on the first occurrence
+          of start/end terms.
+        - Handles cases where start/end terms overlap (an event can mark both the end
+          of one trial and the start of the next).
+        - Warns if a start term is found while already in a trial (restarts the trial).
+        - Warns if the log ends while a trial is in progress.
         """
-        events_df = self.processed_events_df
-        if events_df is None:
-            print("Error: No processed events DataFrame available.")
-            return []
+        # Attempt to generate the df if it doesn't exist
+        if self.processed_events_df is None:
+            print(
+                "Warning: processed_events_df not found. Generating with default settings."
+            )
+            self.get_events_dataframe()  # Use defaults: apply_offset=True, exclude=True
+
+        events_df = self.processed_events_df  # Use the potentially newly generated df
+
+        # Check if DataFrame is valid and contains necessary columns
+        if events_df is None or events_df.empty:
+            print("Error: No processed events DataFrame available to segment.")
+            return pd.DataFrame(
+                columns=["start_time", "stop_time", "status"]
+            )  # Return empty DF
 
         if "text" not in events_df.columns or time_column not in events_df.columns:
-            print(f"Error: DataFrame must contain 'text' and '{time_column}' columns.")
-            return []
+            print(
+                f"Error: DataFrame must contain 'text' and '{time_column}' columns for segmentation."
+            )
+            return pd.DataFrame(
+                columns=["start_time", "stop_time", "status"]
+            )  # Return empty DF
+
+        # Lists to store data for the final DataFrame
+        start_times = []
+        stop_times = []
+        statuses = []
 
-        trials = []
         current_trial_start_time = None
         in_trial = False
+        last_valid_time = (
+            events_df[time_column].dropna().iloc[-1]
+            if not events_df[time_column].dropna().empty
+            else None
+        )
 
-        # Iterate through the DataFrame rows
+        # Iterate through the DataFrame rows (index is line_num)
         for index, row in events_df.iterrows():
             message = row["text"]  # Check the 'text' column
             current_time = row[time_column]
 
-            if pd.isna(message) or pd.isna(current_time):
-                continue  # Skip rows with missing text or time
+            # Skip rows with missing time in the specified column or missing text
+            if pd.isna(current_time) or pd.isna(message):
+                continue
+
+            # Ensure message is treated as string for 'in' check
+            message_str = str(message)
 
-            found_end_term = any(term in message for term in trial_end_terms)
-            found_start_term = any(term in message for term in trial_start_terms)
+            # Check if the current message contains any start or end terms
+            # Use a generator expression for slightly better efficiency
+            found_end_term = any(term in message_str for term in trial_end_terms)
+            found_start_term = any(term in message_str for term in trial_start_terms)
 
             # --- End Trial Logic ---
-            # If we are currently in a trial and find an end term
+            # If we are currently in a trial AND find an end term
             if in_trial and found_end_term:
-                # Finalize the previous trial
-                trials.append(
-                    {
-                        "start_time": current_trial_start_time,
-                        "end_time": current_time,
-                        # Add trial index or other basic info if needed
-                    }
-                )
+                # Finalize the previous trial by adding its data to the lists
+                start_times.append(current_trial_start_time)
+                stop_times.append(current_time)
+                statuses.append("complete")
+
                 in_trial = False
-                current_trial_start_time = None  # Reset start time
+                current_trial_start_time = (
+                    None  # Reset start time for the next potential trial
+                )
 
             # --- Start Trial Logic ---
-            # If we find a start term (potentially the same event as the end term)
+            # If we find a start term (this check happens AFTER potential end logic,
+            # allowing an event to end a trial and immediately start the next one)
             if found_start_term:
-                # If we weren't in a trial, start a new one
+                # If we were NOT previously in a trial, this starts a new one
                 if not in_trial:
                     in_trial = True
                     current_trial_start_time = current_time
-                # If we *were* already in a trial (e.g., two start terms back-to-back
-                # without an end term), you might choose to log a warning or
-                # implicitly end the previous one here and start a new one.
-                # This example restarts the trial timer.
+                # If we *were* already in a trial (e.g., two start terms without an end term),
+                # log a warning and restart the trial timer from the current event.
                 else:
                     print(
-                        f"Warning: Found start term '{message}' at {current_time} while already in a trial started at {current_trial_start_time}. Restarting trial."
+                        f"Warning (Line {index}): Found start term '{message_str}' at {current_time} "
+                        f"while already in a trial started at {current_trial_start_time}. Restarting trial timer."
                     )
+                    # Effectively ends the previous (implicit) trial and starts new one
                     current_trial_start_time = current_time
 
-        # Handle case where log ends while still in a trial
+        # --- Handle Incomplete Trial at End of Log ---
+        # If the loop finishes and we are still marked as 'in_trial'
         if in_trial:
             print(
-                f"Warning: Log ended while still in a trial started at {current_trial_start_time}."
+                f"Warning: Log processing ended while still in a trial that started at {current_trial_start_time}. "
+                f"Marking as incomplete."
+            )
+            # Add the incomplete trial to the lists
+            start_times.append(current_trial_start_time)
+            # Use the time of the last valid event in the time column as the end time
+            stop_times.append(
+                last_valid_time if last_valid_time is not None else np.nan
+            )
+            statuses.append("incomplete")
+
+        # --- Create Final DataFrame ---
+        # Construct the DataFrame from the collected lists
+        trials_df = pd.DataFrame(
+            {"start_time": start_times, "stop_time": stop_times, "status": statuses}
+        )
+
+        # Ensure correct dtypes (start/end times should match time_column, status is object)
+        if not trials_df.empty:
+            trials_df["start_time"] = trials_df["start_time"].astype(
+                events_df[time_column].dtype
             )
-            # Optionally add the incomplete trial
-            trials.append(
-                {
-                    "start_time": current_trial_start_time,
-                    "end_time": events_df[time_column].iloc[-1],  # Use last event time
-                    "status": "incomplete",
-                }
+            trials_df["stop_time"] = trials_df["stop_time"].astype(
+                events_df[time_column].dtype
             )
+            trials_df["status"] = trials_df["status"].astype(
+                "object"
+            )  # String/object type
 
-        return trials
+        return trials_df

From cf735b9f381f64a8ca7cf8c28b20fe7ef25b9022 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 12:27:34 -0400
Subject: [PATCH 04/23] Fix type

---
 src/trodes_to_nwb/convert_dios.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/trodes_to_nwb/convert_dios.py b/src/trodes_to_nwb/convert_dios.py
index 97c57a8..377140e 100644
--- a/src/trodes_to_nwb/convert_dios.py
+++ b/src/trodes_to_nwb/convert_dios.py
@@ -9,7 +9,7 @@
 from .spike_gadgets_raw_io import SpikeGadgetsRawIO
 
 
-def _get_channel_name_map(metadata: dict) -> dict[str, str]:
+def _get_channel_name_map(metadata: dict) -> dict[str, dict[str, str]]:
     """Parses behavioral events metadata from the yaml file
 
     Parameters

From 81e924c49890c190f2eb7e3512eef0400f95786f Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 12:35:12 -0400
Subject: [PATCH 05/23] Update tests

---
 .../tests/test_convert_statescript.py         | 523 +++++++++++++-----
 1 file changed, 375 insertions(+), 148 deletions(-)

diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
index f115cea..14e916f 100644
--- a/src/trodes_to_nwb/tests/test_convert_statescript.py
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -8,6 +8,7 @@
 
 from trodes_to_nwb.convert_statescript import (
     StateScriptLogProcessor,
+    _interpret_DIO_mask,
     _parse_int,
     parse_statescript_line,
     parse_ts_int_int,
@@ -28,10 +29,11 @@ def sample_log_content():
 76566 65536 0
 100078 counter_handlePoke = 1
 100078 4 0
-100559 0 0
+100559 LEFT_PORT 1
 Executing this line without timestamp
 115030 center_poke
 115030 65536 0
+115040 0 0
 # Test log ended
 """
 
@@ -75,14 +77,31 @@ def comment_only_processor(comment_only_log_content):
 @pytest.fixture(scope="module")
 def external_times():
     """Provides sample external times for offset calculation tests."""
-    # These correspond roughly to the '65536 0' events in sample_log_content
+    # These correspond to the '65536 0' events (ts_int_int) in sample_log_content
     # 76566 ms -> 76.566 s
     # 115030 ms -> 115.030 s
-    # Let's assume a base time for the external system
+    # Let's assume a base time (e.g., Unix timestamp) for the external system
     base_time = 1678880000.0
     return np.array([base_time + 76.566, base_time + 115.030])
 
 
+@pytest.fixture(scope="module")
+def external_times_for_str_int():
+    """Provides sample external times for offset calculation tests using ts_str_int."""
+    # These correspond to the 'LEFT_PORT 1' event in sample_log_content
+    # 100559 ms -> 100.559 s
+    base_time = 1678880000.0
+    # Needs enough events for check_n_events default (4), let's assume more exist conceptually
+    return np.array(
+        [
+            base_time + 100.559,
+            base_time + 110.0,
+            base_time + 120.0,
+            base_time + 130.0,
+        ]
+    )
+
+
 @pytest.fixture
 def temp_log_file(sample_log_content):
     """Creates a temporary log file with standard content and yields its path."""
@@ -91,11 +110,11 @@ def temp_log_file(sample_log_content):
     ) as tmp_file:
         tmp_file.write(sample_log_content)
         tmp_file_path = tmp_file.name
-    yield tmp_file_path
+    yield pathlib.Path(tmp_file_path)  # Yield Path object
     os.remove(tmp_file_path)
 
 
-# --- Tests for Level 1 Parsers ---
+# --- Tests for Level 0 Helpers ---
 
 
 def test_parse_int():
@@ -106,7 +125,27 @@ def test_parse_int():
     assert _parse_int("abc") is None
     assert _parse_int("12.3") is None
     assert _parse_int("") is None
-    assert _parse_int("123 ") is None
+    assert _parse_int("123 ") is None  # Fails because of trailing space
+
+
+def test_interpret_dio_mask():
+    """Test the _interpret_DIO_mask function."""
+    assert _interpret_DIO_mask(9, max_DIOs=8) == [1, 4]  # Binary 1001
+    assert _interpret_DIO_mask(0) == []
+    assert _interpret_DIO_mask(None) == []
+    assert _interpret_DIO_mask(pd.NA) == []
+    assert _interpret_DIO_mask(1) == [1]
+    assert _interpret_DIO_mask(65536, max_DIOs=32) == [17]  # 2^16
+    assert _interpret_DIO_mask(65535, max_DIOs=16) == list(
+        range(1, 17)
+    )  # All 16 bits set
+    assert _interpret_DIO_mask(65535, max_DIOs=32) == list(
+        range(1, 17)
+    )  # Check max_DIOs limit
+    assert _interpret_DIO_mask("abc") == []  # Invalid input type
+
+
+# --- Tests for Level 1 Parsers ---
 
 
 def test_parse_ts_int_int():
@@ -114,7 +153,7 @@ def test_parse_ts_int_int():
     parts = ["8386500", "0", "0"]
     expected = {
         "type": "ts_int_int",
-        "trodes_timestamp": 8386500,
+        "timestamp": 8386500,  # Raw timestamp key
         "value1": 0,
         "value2": 0,
     }
@@ -135,7 +174,7 @@ def test_parse_ts_str_int():
     parts = ["8386500", "DOWN", "3"]
     expected = {
         "type": "ts_str_int",
-        "trodes_timestamp": 8386500,
+        "timestamp": 8386500,  # Raw timestamp key
         "text": "DOWN",
         "value": 3,
     }
@@ -144,39 +183,41 @@ def test_parse_ts_str_int():
     parts_wrong_len = ["123", "UP"]
     assert parse_ts_str_int(parts_wrong_len) is None
 
+    # This should be parsed by parse_ts_int_int due to precedence,
+    # so parse_ts_str_int should return None here because str part is int.
     parts_str_is_int = ["123", "456", "789"]
-    assert parse_ts_str_int(parts_str_is_int) is None  # Should be handled by ts_int_int
+    assert parse_ts_str_int(parts_str_is_int) is None
 
     parts_val_not_int = ["123", "UP", "abc"]
     assert parse_ts_str_int(parts_val_not_int) is None
 
 
 def test_parse_ts_str_equals_int():
-    """Test parse_ts_str_equals_int directly."""
+    """Test parse_ts_str_equals_int directly.
+    NOTE: The code only handles a single word before '='.
+    """
     parts = ["100078", "counter_handlePoke", "=", "1"]
     expected = {
         "type": "ts_str_equals_int",
-        "trodes_timestamp": 100078,
-        "text": "counter_handlePoke",
+        "timestamp": 100078,  # Raw timestamp key
+        "text": "counter_handlePoke",  # Correctly uses parts[1]
         "value": 1,
     }
     assert parse_ts_str_equals_int(parts) == expected
 
+    # This case is NOT handled by the current implementation (len(parts) != 4)
     parts_multi_word = ["3610855", "total", "rewards", "=", "70"]
-    expected_multi = {
-        "type": "ts_str_equals_int",
-        "trodes_timestamp": 3610855,
-        "text": "total rewards",
-        "value": 70,
-    }
-    assert parse_ts_str_equals_int(parts_multi_word) == expected_multi
+    assert parse_ts_str_equals_int(parts_multi_word) is None
 
     parts_wrong_len = ["123", "=", "1"]
     assert parse_ts_str_equals_int(parts_wrong_len) is None
 
-    parts_no_equals = ["123", "text", "1"]
+    parts_no_equals = ["123", "text", "1"]  # len=3 != 4
     assert parse_ts_str_equals_int(parts_no_equals) is None
 
+    parts_wrong_equals_pos = ["123", "text", "1", "="]  # '=' is parts[3], not parts[2]
+    assert parse_ts_str_equals_int(parts_wrong_equals_pos) is None
+
     parts_val_not_int = ["123", "text", "=", "abc"]
     assert parse_ts_str_equals_int(parts_val_not_int) is None
 
@@ -186,7 +227,7 @@ def test_parse_ts_str():
     parts = ["76566", "center_poke"]
     expected = {
         "type": "ts_str",
-        "trodes_timestamp": 76566,
+        "timestamp": 76566,  # Raw timestamp key
         "text": "center_poke",
     }
     assert parse_ts_str(parts) == expected
@@ -194,7 +235,7 @@ def test_parse_ts_str():
     parts_multi_word = ["1271815", "some", "multi", "word", "event"]
     expected_multi = {
         "type": "ts_str",
-        "trodes_timestamp": 1271815,
+        "timestamp": 1271815,  # Raw timestamp key
         "text": "some multi word event",
     }
     assert parse_ts_str(parts_multi_word) == expected_multi
@@ -202,10 +243,8 @@ def test_parse_ts_str():
     parts_wrong_len = ["123"]
     assert parse_ts_str(parts_wrong_len) is None
 
-    parts_second_is_int = [
-        "123",
-        "456",
-    ]  # Second part is int, should fail this parser
+    # Second part is int, should fail this parser (handled by ts_int_int or ts_str_int)
+    parts_second_is_int = ["123", "456"]
     assert parse_ts_str(parts_second_is_int) is None
 
 
@@ -214,30 +253,39 @@ def test_parse_ts_str():
 
 def test_parse_statescript_line_dispatching():
     """Test parse_statescript_line dispatching for various line types."""
-    lines_expected_types = [
-        ("8386500 0 0", "ts_int_int"),
-        ("8386500 DOWN 3", "ts_str_int"),
-        ("100078 counter_handlePoke = 1", "ts_str_equals_int"),
-        ("76566 center_poke", "ts_str"),
-        ("Executing trigger function 22", "unknown"),
-        ("# comment", "comment_or_empty"),
-        ("", "comment_or_empty"),
-        ("   ", "comment_or_empty"),
-        ("123 456 abc", "unknown"),  # Doesn't fit ts_int_int because of 'abc'
-        ("123 abc def", "ts_str"),  # Fits ts_str
-        ("456 123 = 5", "ts_str_equals_int"),  # Fits this specific pattern
+    lines_expected = [
+        ("8386500 0 0", "ts_int_int", 8386500),
+        ("100559 LEFT_PORT 1", "ts_str_int", 100559),
+        ("100078 counter_handlePoke = 1", "ts_str_equals_int", 100078),
+        ("76566 center_poke", "ts_str", 76566),
+        ("Executing trigger function 22", "unknown", None),  # No timestamp
+        ("# comment", "comment_or_empty", None),
+        ("", "comment_or_empty", None),
+        ("   ", "comment_or_empty", None),
+        ("123 456 abc", "unknown", None),  # Doesn't fit ts_int_int/ts_str_int/ts_str
+        ("123 abc def", "ts_str", 123),  # Fits ts_str
+        # Precedence: ts_str_equals_int matches first
+        ("456 text = 5", "ts_str_equals_int", 456),
+        # Precedence: ts_int_int matches first
+        ("8386500 128 512", "ts_int_int", 8386500),
+        # Precedence: ts_str_int matches (str 'UP' is not int)
+        ("90000 UP 10", "ts_str_int", 90000),
+        # Precedence: ts_str matches (str 'some text' is not int)
+        ("95000 some text here", "ts_str", 95000),
     ]
 
-    for line, expected_type in lines_expected_types:
-        parsed = parse_statescript_line(line)
-        assert parsed["type"] == expected_type
-        assert parsed["raw_line"] == line.strip()  # parse_statescript_line strips
+    for i, (line, expected_type, expected_ts) in enumerate(lines_expected):
+        parsed = parse_statescript_line(line, line_num=i)
+        assert parsed["type"] == expected_type, f"Line: {line}"
+        assert parsed["raw_line"] == line.strip(), f"Line: {line}"
+        assert parsed["line_num"] == i, f"Line: {line}"
+        # Check timestamp presence/value based on type
         if expected_type not in ["unknown", "comment_or_empty"]:
-            assert "trodes_timestamp" in parsed
+            assert "timestamp" in parsed, f"Line: {line}"
+            assert parsed["timestamp"] == expected_ts, f"Line: {line}"
         else:
-            assert "trodes_timestamp" not in parsed or pd.isna(
-                parsed.get("trodes_timestamp")
-            )
+            # Should explicitly contain timestamp: None for these types
+            assert parsed.get("timestamp") is None, f"Line: {line}"
 
 
 # --- Tests for StateScriptLogProcessor ---
@@ -257,7 +305,7 @@ def test_init_from_file(temp_log_file, sample_log_content):
     processor_file = StateScriptLogProcessor.from_file(temp_log_file)
     assert processor_file.log_content == sample_log_content
     assert processor_file.source_description.startswith("from file:")
-    assert pathlib.Path(temp_log_file).name in processor_file.source_description
+    assert temp_log_file.name in processor_file.source_description
 
 
 def test_init_from_file_not_found():
@@ -271,78 +319,126 @@ def test_parse_raw_events(processor, sample_log_content):
     events = processor.parse_raw_events()
     assert processor.raw_events is events  # Should store result internally
     assert isinstance(events, list)
-    assert len(events) == len(
-        sample_log_content.strip().splitlines()
-    )  # One dict per line
+    # Count lines in the fixture (includes comments, blanks if any)
+    num_lines = len(sample_log_content.strip().splitlines())
+    assert len(events) == num_lines
+
+    # Check specific lines based on fixture content
+    # Line 0: # Test log started
     assert events[0]["type"] == "comment_or_empty"
+    assert events[0]["line_num"] == 0
+    assert events[0]["timestamp"] is None
+    # Line 1: 76504 0 0
     assert events[1]["type"] == "ts_int_int"
-    assert events[7]["type"] == "unknown"  # "Executing this line..."
-    assert events[9]["type"] == "comment_or_empty"  # Last comment
+    assert events[1]["timestamp"] == 76504
+    assert events[1]["value1"] == 0
+    assert events[1]["line_num"] == 1
     assert events[1]["raw_line"] == "76504 0 0"
+    # Line 7: Executing this line without timestamp
+    assert events[7]["type"] == "unknown"
     assert events[7]["raw_line"] == "Executing this line without timestamp"
+    assert events[7]["line_num"] == 7
+    assert events[7]["timestamp"] is None
+    # Line 11: # Test log ended
+    assert events[11]["type"] == "comment_or_empty"
+    assert events[11]["line_num"] == 11
+    assert events[11]["timestamp"] is None
 
 
 def test_find_reference_events(processor):
     """Test the internal _find_reference_events method."""
-    # Case 1: Find 'ts_str' events
+    # Case 1: Find 'ts_str' events ('center_poke' appears twice)
     ref_df_str = processor._find_reference_events(
         event_type="ts_str", conditions={"text": "center_poke"}
     )
     assert isinstance(ref_df_str, pd.DataFrame)
     assert len(ref_df_str) == 2
+    # Check raw timestamp column (renamed from 'timestamp' in raw_events)
     pd.testing.assert_series_equal(
-        ref_df_str["trodes_timestamp"],
-        pd.Series([76566, 115030], name="trodes_timestamp"),
-        check_dtype=False,
+        ref_df_str["timestamp"],  # Raw integer timestamp
+        pd.Series([76566, 115030], name="timestamp", dtype=int),
+        check_names=True,
+        check_dtype=True,
     )
+    # Check calculated seconds column
     assert "trodes_timestamp_sec" in ref_df_str.columns
-    assert ref_df_str["trodes_timestamp_sec"].iloc[0] == pytest.approx(76.566)
+    pd.testing.assert_series_equal(
+        ref_df_str["trodes_timestamp_sec"],
+        pd.Series([76.566, 115.030], name="trodes_timestamp_sec", dtype=float),
+        check_names=True,
+        check_dtype=True,
+    )
+    assert ref_df_str["text"].tolist() == ["center_poke", "center_poke"]
 
-    # Case 2: Find 'ts_int_int' events with specific values
+    # Case 2: Find 'ts_int_int' events with specific values (appears twice)
     ref_df_int = processor._find_reference_events(
-        event_type="ts_int_int", conditions={"value1": 4, "value2": 0}
+        event_type="ts_int_int", conditions={"value1": 65536, "value2": 0}
     )
-    assert len(ref_df_int) == 1
-    assert ref_df_int["trodes_timestamp"].iloc[0] == 100078
+    assert len(ref_df_int) == 2
+    assert ref_df_int["timestamp"].tolist() == [76566, 115030]
+    assert ref_df_int["value1"].tolist() == [65536, 65536]
+    assert ref_df_int["value2"].tolist() == [0, 0]
+    assert ref_df_int["trodes_timestamp_sec"].tolist() == [76.566, 115.030]
+
+    # Case 3: Find 'ts_str_equals_int' (appears once)
+    ref_df_eq = processor._find_reference_events(
+        event_type="ts_str_equals_int", conditions={"text": "counter_handlePoke"}
+    )
+    assert len(ref_df_eq) == 1
+    assert ref_df_eq["timestamp"].iloc[0] == 100078
+    assert ref_df_eq["text"].iloc[0] == "counter_handlePoke"
+    assert ref_df_eq["value"].iloc[0] == 1
+    assert ref_df_eq["trodes_timestamp_sec"].iloc[0] == pytest.approx(100.078)
 
-    # Case 3: No matching events found
+    # Case 4: No matching events found
     ref_df_none = processor._find_reference_events(
-        event_type="ts_str_int", conditions={"text": "nonexistent"}
+        event_type="ts_str", conditions={"text": "nonexistent"}
     )
     assert ref_df_none.empty
     assert isinstance(ref_df_none, pd.DataFrame)  # Should still return DF
+    # Check expected columns exist even if empty
+    assert "timestamp" in ref_df_none.columns
+    assert "trodes_timestamp_sec" in ref_df_none.columns
+    assert "text" in ref_df_none.columns  # From conditions
 
-    # Case 4: Ensure processor parses if raw_events is empty
-    processor.raw_events = []
+    # Case 5: Ensure processor parses if raw_events is empty
+    processor.raw_events = []  # Reset raw events
+    assert processor.raw_events == []
     ref_df_reparse = processor._find_reference_events(
         event_type="ts_str", conditions={"text": "center_poke"}
     )
-    assert len(ref_df_reparse) == 2  # Should re-parse automatically
+    assert len(processor.raw_events) > 0  # Should have re-parsed
+    assert len(ref_df_reparse) == 2  # Should find the events
 
 
 def test_calculate_time_offset_success(processor, external_times):
     """Test successful time offset calculation."""
+    # Use the 'ts_int_int' events matching external_times fixture
     offset = processor.calculate_time_offset(
         external_reference_times=external_times,
-        log_event_type="ts_int_int",  # Use the events corresponding to external_times
+        log_event_type="ts_int_int",
+        # Use the keys from the raw parsed dict ('value1', 'value2')
         log_event_conditions={"value1": 65536, "value2": 0},
-        check_n_events=2,  # Use both events for matching
+        check_n_events=2,  # Use both available matching events
     )
     assert offset is not None
     assert processor.time_offset == offset  # Check internal storage
     # Expected offset = external_base_time = 1678880000.0
-    # external_times[0] = base + 76.566; log_times[0] = 76.566
+    # external_times[0] = base + 76.566; log_times_sec[0] = 76.566
+    # offset = external - log = base
     assert offset == pytest.approx(1678880000.0)
 
 
-def test_calculate_time_offset_fail_not_enough_log(processor, external_times):
+def test_calculate_time_offset_fail_not_enough_log(
+    processor, external_times_for_str_int
+):
     """Test offset calculation failure due to insufficient log events."""
-    # 'counter_handlePoke' only appears once, need 2 events
+    # 'LEFT_PORT 1' only appears once in the log, but default check_n_events=4
     offset = processor.calculate_time_offset(
-        external_reference_times=external_times,
-        log_event_type="ts_str_equals_int",
-        log_event_conditions={"text": "counter_handlePoke"},
-        check_n_events=2,
+        external_reference_times=external_times_for_str_int,  # Has 4 times
+        log_event_type="ts_str_int",
+        log_event_conditions={"text": "LEFT_PORT", "value": 1},
+        # check_n_events=4, # Default
     )
     assert offset is None
     assert processor.time_offset is None  # Should remain None
@@ -350,27 +446,38 @@ def test_calculate_time_offset_fail_not_enough_log(processor, external_times):
 
 def test_calculate_time_offset_fail_not_enough_external(processor):
     """Test offset calculation failure due to insufficient external times."""
-    # Only one external time provided, need 2 events
+    # Log has 2 '65536 0' events, provide only 1 external time, default check=4
     offset = processor.calculate_time_offset(
-        external_reference_times=np.array([1678880076.566]),
+        external_reference_times=np.array([1678880076.566]),  # Only 1 time
         log_event_type="ts_int_int",
         log_event_conditions={"value1": 65536, "value2": 0},
-        check_n_events=2,
+        # check_n_events=4, # Default
     )
     assert offset is None
     assert processor.time_offset is None
 
+    # Test again with check_n_events=2 (should still fail, need 2 external)
+    offset_check2 = processor.calculate_time_offset(
+        external_reference_times=np.array([1678880076.566]),  # Only 1 time
+        log_event_type="ts_int_int",
+        log_event_conditions={"value1": 65536, "value2": 0},
+        check_n_events=2,
+    )
+    assert offset_check2 is None
+    assert processor.time_offset is None
+
 
 def test_calculate_time_offset_fail_mismatch(processor, external_times):
     """Test offset calculation failure due to exceeding mismatch threshold."""
-    # Shift external times slightly more than default threshold (0.1)
-    shifted_external_times = external_times + 0.06  # Total shift 0.12 over 2 events
+    # Shift external times enough to exceed default threshold (0.1) over 2 events
+    # Shift each by 0.06 -> total diff = 0.06 + 0.06 = 0.12 > 0.1
+    shifted_external_times = external_times + 0.06
     offset = processor.calculate_time_offset(
         external_reference_times=shifted_external_times,
         log_event_type="ts_int_int",
         log_event_conditions={"value1": 65536, "value2": 0},
         check_n_events=2,
-        match_threshold=0.1,  # Default threshold
+        match_threshold=0.1,  # Explicitly set default for clarity
     )
     assert offset is None
     assert processor.time_offset is None
@@ -381,80 +488,177 @@ def test_get_events_dataframe_defaults(processor):
     df = processor.get_events_dataframe(apply_offset=False)
     assert processor.processed_events_df is df  # Check internal storage
     assert isinstance(df, pd.DataFrame)
-    # Expected: 11 lines total - 3 comments - 1 unknown = 7 valid events
-    assert len(df) == 7
+    # Expected: 12 lines total - 2 comments - 1 unknown = 9 valid events
+    assert len(df) == 9
+    assert df.index.name == "line_num"  # Index should be line_num
+
+    # --- Check Columns ---
     assert "raw_line" in df.columns
+    assert "type" in df.columns
     assert "trodes_timestamp" in df.columns
     assert "trodes_timestamp_sec" in df.columns
+    assert "text" in df.columns
+    assert "value" in df.columns
+    assert "active_DIO_inputs_bitmask" in df.columns
+    assert "active_DIO_outputs_bitmask" in df.columns
+    assert "active_DIO_inputs" in df.columns  # List column
+    assert "active_DIO_outputs" in df.columns  # List column
     assert "timestamp_sync" not in df.columns  # Offset not applied
-    # Check content and types
-    assert df["type"].iloc[0] == "ts_int_int"
-    assert df["raw_line"].iloc[0] == "76504 0 0"
-    assert pd.isna(df["text"].iloc[0])  # text NA for ts_int_int
-    assert df["value1"].iloc[0] == 0
-    assert df["trodes_timestamp"].dtype == "int64"
+
+    # --- Check Content and Types (spot check first few rows) ---
+    # Row index corresponds to line_num
+    # Line 1: 76504 0 0 (type: ts_int_int) -> line_num 1
+    assert df.loc[1, "type"] == "ts_int_int"
+    assert df.loc[1, "raw_line"] == "76504 0 0"
+    assert df.loc[1, "trodes_timestamp"] == 76504
+    assert df.loc[1, "trodes_timestamp_sec"] == pytest.approx(76.504)
+    assert pd.isna(df.loc[1, "text"])
+    assert pd.isna(df.loc[1, "value"])
+    assert df.loc[1, "active_DIO_inputs_bitmask"] == 0
+    assert df.loc[1, "active_DIO_outputs_bitmask"] == 0
+    assert df.loc[1, "active_DIO_inputs"] == []
+    assert df.loc[1, "active_DIO_outputs"] == []
+
+    # Line 2: 76566 center_poke (type: ts_str) -> line_num 2
+    assert df.loc[2, "type"] == "ts_str"
+    assert df.loc[2, "trodes_timestamp"] == 76566
+    assert df.loc[2, "text"] == "center_poke"
+    assert pd.isna(df.loc[2, "value"])
+    assert pd.isna(df.loc[2, "active_DIO_inputs_bitmask"])
+    assert pd.isna(df.loc[2, "active_DIO_outputs_bitmask"])
+    assert df.loc[2, "active_DIO_inputs"] == []  # Should be empty list from NA mask
+    assert df.loc[2, "active_DIO_outputs"] == []  # Should be empty list from NA mask
+
+    # Line 3: 76566 65536 0 (type: ts_int_int) -> line_num 3
+    assert df.loc[3, "type"] == "ts_int_int"
+    assert df.loc[3, "trodes_timestamp"] == 76566
+    assert df.loc[3, "active_DIO_inputs_bitmask"] == 65536  # DIO 17
+    assert df.loc[3, "active_DIO_outputs_bitmask"] == 0
+    assert df.loc[3, "active_DIO_inputs"] == [17]  # Check interpretation
+    assert df.loc[3, "active_DIO_outputs"] == []
+
+    # Line 4: 100078 counter_handlePoke = 1 (type: ts_str_equals_int) -> line_num 4
+    assert df.loc[4, "type"] == "ts_str_equals_int"
+    assert df.loc[4, "trodes_timestamp"] == 100078
+    assert df.loc[4, "text"] == "counter_handlePoke"
+    assert df.loc[4, "value"] == 1
+    assert pd.isna(df.loc[4, "active_DIO_inputs_bitmask"])
+
+    # Line 6: 100559 LEFT_PORT 1 (type: ts_str_int) -> line_num 6
+    assert df.loc[6, "type"] == "ts_str_int"
+    assert df.loc[6, "trodes_timestamp"] == 100559
+    assert df.loc[6, "text"] == "LEFT_PORT"
+    assert df.loc[6, "value"] == 1
+    assert pd.isna(df.loc[6, "active_DIO_inputs_bitmask"])
+
+    # --- Check Dtypes ---
+    assert df["trodes_timestamp"].dtype == pd.Int64Dtype()  # Nullable int
     assert df["trodes_timestamp_sec"].dtype == "float64"
-    assert df["value"].dtype == pd.Int64Dtype()  # Nullable Integer
+    assert df["text"].dtype == "object"  # String/mixed
+    assert df["value"].dtype == pd.Int64Dtype()
+    assert df["active_DIO_inputs_bitmask"].dtype == pd.Int64Dtype()
+    assert df["active_DIO_outputs_bitmask"].dtype == pd.Int64Dtype()
+    assert df["active_DIO_inputs"].dtype == "object"  # List type
+    assert df["active_DIO_outputs"].dtype == "object"  # List type
 
 
-def test_get_events_dataframe_include_all(processor):
+def test_get_events_dataframe_include_all(processor, sample_log_content):
     """Test including comments and unknown lines."""
     df = processor.get_events_dataframe(
         apply_offset=False, exclude_comments_unknown=False
     )
     assert isinstance(df, pd.DataFrame)
-    assert len(df) == 10  # All lines included
-    assert df["type"].iloc[0] == "comment_or_empty"
-    assert df["type"].iloc[7] == "unknown"
-    assert df["raw_line"].iloc[7] == "Executing this line without timestamp"
-    # Check that timestamp is NA/0 for lines without one
-    assert (
-        pd.isna(df["trodes_timestamp"].iloc[0]) or df["trodes_timestamp"].iloc[0] == 0
-    )
-    assert (
-        pd.isna(df["trodes_timestamp"].iloc[7]) or df["trodes_timestamp"].iloc[7] == 0
-    )
-    assert pd.isna(df["trodes_timestamp_sec"].iloc[0]) or np.isnan(
-        df["trodes_timestamp_sec"].iloc[0]
-    )
-    assert pd.isna(df["trodes_timestamp_sec"].iloc[7]) or np.isnan(
-        df["trodes_timestamp_sec"].iloc[7]
-    )
+    num_lines = len(sample_log_content.strip().splitlines())
+    assert len(df) == num_lines  # All lines included (12)
+    assert df.index.name == "line_num"
+
+    # Check specific lines
+    # Line 0: Comment
+    assert df.loc[0, "type"] == "comment_or_empty"
+    assert df.loc[0, "raw_line"] == "# Test log started"
+    assert pd.isna(df.loc[0, "trodes_timestamp"])  # Should be NA (Int64Dtype)
+    assert np.isnan(df.loc[0, "trodes_timestamp_sec"])  # Should be NaN (float)
+    assert pd.isna(df.loc[0, "text"])  # Should be NA
+    assert df.loc[0, "active_DIO_inputs"] == []  # Should be empty list for comment
+
+    # Line 7: Unknown
+    assert df.loc[7, "type"] == "unknown"
+    assert df.loc[7, "raw_line"] == "Executing this line without timestamp"
+    assert pd.isna(df.loc[7, "trodes_timestamp"])
+    assert np.isnan(df.loc[7, "trodes_timestamp_sec"])
+    assert pd.isna(df.loc[7, "text"])
+    assert df.loc[7, "active_DIO_inputs"] == []
+
+    # Line 11: Comment
+    assert df.loc[11, "type"] == "comment_or_empty"
+    assert df.loc[11, "raw_line"] == "# Test log ended"
+    assert pd.isna(df.loc[11, "trodes_timestamp"])
+
+    # Check a valid line still looks right
+    assert df.loc[1, "type"] == "ts_int_int"
+    assert df.loc[1, "trodes_timestamp"] == 76504
 
 
 def test_get_events_dataframe_with_offset(processor):
     """Test applying offset and check sync timestamp calculation."""
     # Simulate successful offset calculation
-    processor.time_offset = 1678880000.0
+    test_offset = 1678880000.0
+    processor.time_offset = test_offset
     df = processor.get_events_dataframe(apply_offset=True)  # Default exclude=True
     assert isinstance(df, pd.DataFrame)
-    assert len(df) == 7  # Excludes comments/unknown
+    assert len(df) == 9  # Excludes comments/unknown
+    assert df.index.name == "line_num"
     assert "timestamp_sync" in df.columns
-    # Check calculation for the first valid event (76504 ms)
-    expected_sync_time = (76504 / 1000.0) + 1678880000.0
-    assert df["timestamp_sync"].iloc[0] == pytest.approx(expected_sync_time)
-    # Check NA value handling in other columns remains correct
-    assert pd.isna(df["text"].iloc[0])
-    assert df["value1"].iloc[0] == 0
     assert df["timestamp_sync"].dtype == "float64"
 
+    # Check calculation for a few events
+    # Line 1: 76504 ms
+    expected_sync_1 = (76504 / 1000.0) + test_offset
+    assert df.loc[1, "timestamp_sync"] == pytest.approx(expected_sync_1)
+
+    # Line 3: 76566 ms
+    expected_sync_3 = (76566 / 1000.0) + test_offset
+    assert df.loc[3, "timestamp_sync"] == pytest.approx(expected_sync_3)
+
+    # Line 9: 115030 ms
+    expected_sync_9 = (115030 / 1000.0) + test_offset
+    assert df.loc[9, "timestamp_sync"] == pytest.approx(expected_sync_9)
+
+    # Check NA value handling in other columns remains correct
+    assert pd.isna(df.loc[1, "text"])
+    assert df.loc[1, "active_DIO_inputs_bitmask"] == 0
+    assert df.loc[3, "active_DIO_inputs"] == [17]
+
 
-def test_get_events_dataframe_offset_not_calculated(processor, capsys):
-    """Test applying offset when offset is None."""
+def test_get_events_dataframe_apply_offset_not_calculated(processor, capsys):
+    """Test applying offset when offset is None generates warning and no column."""
     processor.time_offset = None  # Ensure no offset is set
-    df = processor.get_events_dataframe(apply_offset=True)
+    df = processor.get_events_dataframe(apply_offset=True)  # Request offset application
     assert isinstance(df, pd.DataFrame)
     assert "timestamp_sync" not in df.columns  # Sync column should be absent
-    assert len(df) == 7  # Should still return the dataframe without the column
+    assert len(df) == 9  # Should still return the dataframe without the column
+    assert df.index.name == "line_num"
 
-    # Check that the warning was printed to stderr/stdout
+    # Check that the warning was printed
     captured = capsys.readouterr()
     assert (
-        "Warning: Time offset requested but not calculated" in captured.out
-        or "Warning: Time offset requested but not calculated" in captured.err
+        "Warning: Time offset application requested" in captured.out
+        or "Warning: Time offset application requested" in captured.err
     )
 
 
+def test_get_events_dataframe_no_apply_offset_calculated(processor):
+    """Test apply_offset=False ignores existing offset."""
+    processor.time_offset = 1000.0  # Set an offset
+    df = processor.get_events_dataframe(
+        apply_offset=False
+    )  # Request NO offset application
+    assert isinstance(df, pd.DataFrame)
+    assert "timestamp_sync" not in df.columns  # Sync column should be absent
+    assert len(df) == 9
+    assert df.index.name == "line_num"
+
+
 def test_empty_log(empty_processor):
     """Test processing an empty log file."""
     events = empty_processor.parse_raw_events()
@@ -462,6 +666,8 @@ def test_empty_log(empty_processor):
     df = empty_processor.get_events_dataframe()
     assert isinstance(df, pd.DataFrame)
     assert df.empty
+    # An empty dataframe doesn't have an index name set
+    assert df.index.name is None
 
 
 def test_comment_only_log(comment_only_processor):
@@ -469,11 +675,13 @@ def test_comment_only_log(comment_only_processor):
     events = comment_only_processor.parse_raw_events()
     assert len(events) == 4  # 4 lines in the fixture
     assert all(e["type"] == "comment_or_empty" for e in events)
+    assert all(e["timestamp"] is None for e in events)
 
     # Default: exclude comments -> empty DataFrame
     df_excluded = comment_only_processor.get_events_dataframe(apply_offset=False)
     assert isinstance(df_excluded, pd.DataFrame)
     assert df_excluded.empty
+    assert df_excluded.index.name is None
 
     # Include comments -> DataFrame with only comment entries
     df_included = comment_only_processor.get_events_dataframe(
@@ -481,36 +689,38 @@ def test_comment_only_log(comment_only_processor):
     )
     assert isinstance(df_included, pd.DataFrame)
     assert len(df_included) == 4
+    assert df_included.index.name == "line_num"
     assert all(df_included["type"] == "comment_or_empty")
-    assert (
-        pd.isna(df_included["trodes_timestamp"].iloc[0])
-        or df_included["trodes_timestamp"].iloc[0] == 0
-    )
+    assert df_included["trodes_timestamp"].isna().all()
+    assert df_included["trodes_timestamp_sec"].isna().all()
+    assert df_included["active_DIO_inputs"].apply(lambda x: x == []).all()
 
 
 def test_repr(processor):
-    """Test the __repr__ method."""
+    """Test the __repr__ method reflects state."""
     # Initial state
     initial_repr = repr(processor)
     assert isinstance(initial_repr, str)
-    assert "StateScriptLogProcessor" in initial_repr
-    assert "not parsed" in initial_repr
-    assert "no offset" in initial_repr
-    assert "not generated" in initial_repr
+    assert "<StateScriptLogProcessor" in initial_repr
+    assert "status=not parsed" in initial_repr
+    assert "no offset calculated" in initial_repr
+    assert "DataFrame not generated" in initial_repr
+    assert "source='from string'" in initial_repr
 
     # After parsing
     processor.parse_raw_events()
+    num_raw = len(processor.raw_events)
     parsed_repr = repr(processor)
-    assert "parsed" in parsed_repr
-    assert f"raw_events={len(processor.raw_events)}" in parsed_repr
-    assert "no offset" in parsed_repr
-    assert "not generated" in parsed_repr
+    assert "status=parsed" in parsed_repr
+    assert f"raw_events={num_raw}" in parsed_repr
+    assert "no offset calculated" in parsed_repr
+    assert "DataFrame not generated" in parsed_repr
 
     # After offset calculation
-    processor.time_offset = 1000.0
+    processor.time_offset = 1234.5678
     offset_repr = repr(processor)
-    assert "offset=1000.0" in offset_repr
-    assert "not generated" in offset_repr
+    assert "offset=1234.5678s" in offset_repr  # Check formatting
+    assert "DataFrame not generated" in offset_repr
 
     # After DataFrame generation
     processor.get_events_dataframe()
@@ -519,26 +729,43 @@ def test_repr(processor):
 
 
 def test_repr_html(processor):
-    """Test the _repr_html_ method."""
-    # Check it runs without error in different states and returns string
+    """Test the _repr_html_ method generates HTML in different states."""
+    # Check it runs without error and returns string containing key info
+
+    # Initial state
     html_initial = processor._repr_html_()
     assert isinstance(html_initial, str)
-    assert "StateScriptLogProcessor" in html_initial
-    assert "Not Parsed" in html_initial
+    assert "<h4>StateScriptLogProcessor</h4>" in html_initial
+    assert "Status:</strong> Not Parsed" in html_initial
+    assert "Offset:</strong> Not Calculated" in html_initial
+    assert "DataFrame:</strong> Not Generated" in html_initial
+    assert "Source:</strong> from string" in html_initial
+    assert "DataFrame Preview" not in html_initial  # No preview yet
 
+    # After parsing
     processor.parse_raw_events()
+    num_raw = len(processor.raw_events)
     html_parsed = processor._repr_html_()
     assert isinstance(html_parsed, str)
-    assert "Parsed" in html_parsed
-    assert f"({len(processor.raw_events)} raw entries)" in html_parsed
+    assert "Status:</strong> Parsed" in html_parsed
+    assert f"({num_raw} raw entries)" in html_parsed
+    assert "Offset:</strong> Not Calculated" in html_parsed
+    assert "DataFrame:</strong> Not Generated" in html_parsed
 
-    processor.time_offset = 1000.0
+    # After offset calculation
+    processor.time_offset = 1234.5678
     html_offset = processor._repr_html_()
     assert isinstance(html_offset, str)
-    assert "Offset:</strong> 1000.0" in html_offset
+    assert "Offset:</strong> 1234.5678s" in html_offset  # Check formatting
+    assert "DataFrame:</strong> Not Generated" in html_offset
 
+    # After DataFrame generation
     processor.get_events_dataframe()
     html_df = processor._repr_html_()
     assert isinstance(html_df, str)
     assert "DataFrame:</strong> Generated" in html_df
-    assert "DataFrame Preview" in html_df  # Check for preview section
+    assert (
+        "<h5>DataFrame Preview (first 5 rows):</h5>" in html_df
+    )  # Check for preview section
+    assert "<table" in html_df  # Check that a table is likely generated
+    assert "trodes_timestamp" in html_df  # Check a column name is present

From ec5d3d3b47a357fc0dd92da1fea22a30ea0570c4 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 13:00:54 -0400
Subject: [PATCH 06/23] Fix tests

---
 src/trodes_to_nwb/convert_statescript.py           |  4 ++--
 .../tests/test_convert_statescript.py              | 14 ++++++--------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index e7bf8a7..b81e5a1 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -149,8 +149,8 @@ def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]:
     # Check length and presence of '=' in the correct position
     if len(parts) == 4 and parts[2] == "=":
         timestamp = _parse_int(parts[0])
-        value = _parse_int(parts[-1])  # Expect integer value only
-        text = parts[3]
+        text = parts[1]
+        value = _parse_int(parts[3])  # Expect integer value only
 
         # Check if timestamp and value were successfully parsed as integers
         if timestamp is not None and value is not None:
diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
index 14e916f..29e3a40 100644
--- a/src/trodes_to_nwb/tests/test_convert_statescript.py
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -51,7 +51,7 @@ def comment_only_log_content():
 # Middle line
 
 # End
-   """
+"""
 
 
 @pytest.fixture
@@ -91,7 +91,6 @@ def external_times_for_str_int():
     # These correspond to the 'LEFT_PORT 1' event in sample_log_content
     # 100559 ms -> 100.559 s
     base_time = 1678880000.0
-    # Needs enough events for check_n_events default (4), let's assume more exist conceptually
     return np.array(
         [
             base_time + 100.559,
@@ -110,7 +109,7 @@ def temp_log_file(sample_log_content):
     ) as tmp_file:
         tmp_file.write(sample_log_content)
         tmp_file_path = tmp_file.name
-    yield pathlib.Path(tmp_file_path)  # Yield Path object
+    yield pathlib.Path(tmp_file_path)
     os.remove(tmp_file_path)
 
 
@@ -125,7 +124,6 @@ def test_parse_int():
     assert _parse_int("abc") is None
     assert _parse_int("12.3") is None
     assert _parse_int("") is None
-    assert _parse_int("123 ") is None  # Fails because of trailing space
 
 
 def test_interpret_dio_mask():
@@ -153,7 +151,7 @@ def test_parse_ts_int_int():
     parts = ["8386500", "0", "0"]
     expected = {
         "type": "ts_int_int",
-        "timestamp": 8386500,  # Raw timestamp key
+        "timestamp": 8386500,
         "value1": 0,
         "value2": 0,
     }
@@ -174,7 +172,7 @@ def test_parse_ts_str_int():
     parts = ["8386500", "DOWN", "3"]
     expected = {
         "type": "ts_str_int",
-        "timestamp": 8386500,  # Raw timestamp key
+        "timestamp": 8386500,
         "text": "DOWN",
         "value": 3,
     }
@@ -227,7 +225,7 @@ def test_parse_ts_str():
     parts = ["76566", "center_poke"]
     expected = {
         "type": "ts_str",
-        "timestamp": 76566,  # Raw timestamp key
+        "timestamp": 76566,
         "text": "center_poke",
     }
     assert parse_ts_str(parts) == expected
@@ -235,7 +233,7 @@ def test_parse_ts_str():
     parts_multi_word = ["1271815", "some", "multi", "word", "event"]
     expected_multi = {
         "type": "ts_str",
-        "timestamp": 1271815,  # Raw timestamp key
+        "timestamp": 1271815,
         "text": "some multi word event",
     }
     assert parse_ts_str(parts_multi_word) == expected_multi

From acca16423caa3650c055641d6de94c56e29c27f3 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 18:09:57 -0400
Subject: [PATCH 07/23] Fix name

---
 src/trodes_to_nwb/convert_statescript.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index b81e5a1..b920ba1 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -618,7 +618,7 @@ def _find_reference_events(
         # Iterate through all parsed raw events
         for event in self.raw_events:
             # Check if the event type matches and it has a timestamp
-            if event.get("type") == event_type and "trodes_timestamp" in event:
+            if event.get("type") == event_type and "timestamp" in event:
                 # Check if all specified conditions are met for this event
                 match = all(
                     event.get(key) == value for key, value in conditions.items()

From 550231506575e80ee8c87f554e46df3a481ac7fc Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 18:10:13 -0400
Subject: [PATCH 08/23] Not expected to have active_DIO_inputs

---
 src/trodes_to_nwb/tests/test_convert_statescript.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
index 29e3a40..0e0a3b5 100644
--- a/src/trodes_to_nwb/tests/test_convert_statescript.py
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -691,7 +691,6 @@ def test_comment_only_log(comment_only_processor):
     assert all(df_included["type"] == "comment_or_empty")
     assert df_included["trodes_timestamp"].isna().all()
     assert df_included["trodes_timestamp_sec"].isna().all()
-    assert df_included["active_DIO_inputs"].apply(lambda x: x == []).all()
 
 
 def test_repr(processor):

From 42d0c408ca7fb8cb3c5301a20da1c76c03306e72 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 18:12:35 -0400
Subject: [PATCH 09/23] Check for bitmask columns

---
 src/trodes_to_nwb/convert_statescript.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index b920ba1..1c21216 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -904,12 +904,14 @@ def get_events_dataframe(
                 "value2": "active_DIO_outputs_bitmask",
             }
         )
-        df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply(
-            lambda mask: _interpret_DIO_mask(mask, max_DIOs)
-        )
-        df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply(
-            lambda mask: _interpret_DIO_mask(mask, max_DIOs)
-        )
+        if "active_DIO_inputs" in df.columns:
+            df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply(
+                lambda mask: _interpret_DIO_mask(mask, max_DIOs)
+            )
+        if "active_DIO_outputs" in df.columns:
+            df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply(
+                lambda mask: _interpret_DIO_mask(mask, max_DIOs)
+            )
 
         # --- Timestamp Processing ---
         # Ensure 'timestamp' column exists and convert to numeric/int

From b0e2663761ef7b30fa2e53aeb70369cd292613bc Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 18:14:45 -0400
Subject: [PATCH 10/23] Fix examples

---
 src/trodes_to_nwb/convert_statescript.py | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 1c21216..3ffc856 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -300,9 +300,9 @@ def _interpret_DIO_mask(
 
     Example
     -------
-    >>> interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4
+    >>> _interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4
     [1, 4]
-    >>> interpret_DIO_mask(65536) # 2^16 -> Port 17
+    >>> _interpret_DIO_mask(65536) # 2^16 -> Port 17
     [17]
     """
     if pd.isna(DIO_state_value) or DIO_state_value == 0:
@@ -367,19 +367,19 @@ class StateScriptLogProcessor:
 
     Example Usage
     -------------
-    >>> # Load from file
-    >>> processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog")
-    >>> # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds)
-    >>> # corresponding to the log event "DIO Pin 8 going UP"
-    >>> processor.calculate_time_offset(
-    ...     external_reference_times=external_sync_times,
-    ...     log_event_type="ts_str_int",
-    ...     log_event_conditions={"text": "UP", "value": 8}
-    ... )
-    >>> # Get the processed DataFrame with synchronized timestamps
-    >>> df = processor.get_events_dataframe(apply_offset=True)
-    >>> if df is not None:
-    ...     print(df[['timestamp_sync', 'type', 'text', 'value']].head())
+    # Load from file
+    processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog")
+    # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds)
+    # corresponding to the log event "DIO Pin 8 going UP"
+    processor.calculate_time_offset(
+         external_reference_times=external_sync_times,
+         log_event_type="ts_str_int",
+         log_event_conditions={"text": "UP", "value": 8}
+     )
+    # Get the processed DataFrame with synchronized timestamps
+    df = processor.get_events_dataframe(apply_offset=True)
+    if df is not None:
+         print(df[['timestamp_sync', 'type', 'text', 'value']].head())
     """
 
     MILLISECONDS_PER_SECOND = 1000

From 1eddb74c737a47fccc527b1462f4d2e06ce90b8d Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 18:35:38 -0400
Subject: [PATCH 11/23] Fix name

---
 src/trodes_to_nwb/convert_statescript.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 3ffc856..f7a8687 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -904,11 +904,11 @@ def get_events_dataframe(
                 "value2": "active_DIO_outputs_bitmask",
             }
         )
-        if "active_DIO_inputs" in df.columns:
+        if "active_DIO_inputs_bitmask" in df.columns:
             df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply(
                 lambda mask: _interpret_DIO_mask(mask, max_DIOs)
             )
-        if "active_DIO_outputs" in df.columns:
+        if "active_DIO_outputs_bitmask" in df.columns:
             df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply(
                 lambda mask: _interpret_DIO_mask(mask, max_DIOs)
             )

From 962e60644b06289535580c521ebf46bfc68a3a60 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 21:48:36 -0400
Subject: [PATCH 12/23] Fix test

---
 src/trodes_to_nwb/tests/test_convert_statescript.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
index 0e0a3b5..aace40c 100644
--- a/src/trodes_to_nwb/tests/test_convert_statescript.py
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -467,9 +467,10 @@ def test_calculate_time_offset_fail_not_enough_external(processor):
 
 def test_calculate_time_offset_fail_mismatch(processor, external_times):
     """Test offset calculation failure due to exceeding mismatch threshold."""
-    # Shift external times enough to exceed default threshold (0.1) over 2 events
-    # Shift each by 0.06 -> total diff = 0.06 + 0.06 = 0.12 > 0.1
-    shifted_external_times = external_times + 0.06
+    # Shift external times enough to exceed default threshold (0.1) on
+    # the second event
+    shifted_external_times = external_times
+    shifted_external_times[1] += 0.2  # Shift the second time by 0.2 seconds
     offset = processor.calculate_time_offset(
         external_reference_times=shifted_external_times,
         log_event_type="ts_int_int",

From 1dcf3de1473175cfb37308b83086706b34150238 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 22:18:02 -0400
Subject: [PATCH 13/23] Fix name

---
 src/trodes_to_nwb/tests/test_convert_statescript.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py
index aace40c..47838c3 100644
--- a/src/trodes_to_nwb/tests/test_convert_statescript.py
+++ b/src/trodes_to_nwb/tests/test_convert_statescript.py
@@ -395,7 +395,7 @@ def test_find_reference_events(processor):
     assert ref_df_none.empty
     assert isinstance(ref_df_none, pd.DataFrame)  # Should still return DF
     # Check expected columns exist even if empty
-    assert "timestamp" in ref_df_none.columns
+    assert "trodes_timestamp" in ref_df_none.columns
     assert "trodes_timestamp_sec" in ref_df_none.columns
     assert "text" in ref_df_none.columns  # From conditions
 
@@ -470,6 +470,8 @@ def test_calculate_time_offset_fail_mismatch(processor, external_times):
     # Shift external times enough to exceed default threshold (0.1) on
     # the second event
     shifted_external_times = external_times
+    # External times are not a good stable reference
+    # because the second one is shifted
     shifted_external_times[1] += 0.2  # Shift the second time by 0.2 seconds
     offset = processor.calculate_time_offset(
         external_reference_times=shifted_external_times,

From eb87814587ce79cc9c7cfba5471667238b0a6140 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Mon, 28 Apr 2025 22:42:13 -0400
Subject: [PATCH 14/23] Add docstring

---
 src/trodes_to_nwb/convert_statescript.py | 60 ++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index f7a8687..0545d52 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -1,3 +1,63 @@
+"""StateScript log parsing and processing module.
+
+This module provides tools for parsing, interpreting, and processing `.stateScriptLog`
+files generated by the Trodes neural recording system. It handles the conversion
+of Trodes timestamps, alignment with external time sources, interpretation of
+Digital Input/Output (DIO) states, and processing of various common log line formats.
+
+Notes
+-----
+Source Files:
+    - Log files parsed by this module typically have the `.stateScriptLog` extension.
+    - These files are generated by the Trodes software during data acquisition sessions.
+
+Timestamp Information:
+    - The primary timestamp (`<timestamp_int>`) found in these logs is a 64-bit integer.
+    - It represents the number of milliseconds elapsed since the start of the
+      Trodes recording session.
+    - This is often referred to as the 'Trodes timestamp'.
+
+Log Line Formats:
+    `.stateScriptLog` files usually contain lines adhering to several common formats.
+    The module aims to parse lines matching these structures:
+
+    ``ts_int_int`` : `<timestamp_int> <int> <int>`
+        Represents timestamp and two integers. These integers often function as
+        bitwise masks representing the state of DIO pins.
+        Example: ``1817158 128 512``
+
+    ``ts_str_int`` : `<timestamp_int> <str> <int>`
+        Represents timestamp, a string label, and an integer value. Frequently
+        used for user-defined messages logging DIO pin state changes (e.g., pin name and state).
+        Example: ``8386500 DOWN 3``
+
+    ``ts_str_eq_int`` : `<timestamp_int> <str> = <int>`
+        Represents timestamp and a named integer variable assignment, useful for
+        tracking counters or state variables within the StateScript.
+        Example: ``3610855 totRewards = 70``
+
+    ``ts_str`` : `<timestamp_int> <str...>`
+        Represents timestamp followed by one or more space-separated strings.
+        Commonly used for logging event markers or descriptive text messages.
+        Example: ``1678886401 LOCKEND``
+
+    ``comment_or_empty`` : Lines starting with `#` or completely empty lines.
+        Lines starting with '#' are treated as comments. Empty lines may also occur.
+        These are typically ignored during data extraction.
+        Example: ``# Starting new trial block``
+
+    ``unknown`` : Lines that do not conform to the patterns listed above.
+        These might include initial header lines, formatting variations, or unexpected entries.
+        Example: ``initiated``
+
+Component Definitions:
+    - ``<timestamp_int>``: 64-bit integer; milliseconds since session start (Trodes timestamp).
+    - ``<int>``: Integer value; often used as a bitwise mask for DIO pin states.
+    - ``<str>``: String value; can represent an event name, variable name, message component, etc.
+    - ``<str...>``: Denotes one or more space-separated strings.
+
+"""
+
 import pathlib
 from typing import Any, Dict, List, Optional, Type, TypeVar, Union
 

From 9f1ed1a658bd226c59ee90e1636ed8441a0a132a Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Tue, 29 Apr 2025 09:20:16 -0400
Subject: [PATCH 15/23] Minor edits to docstrings

---
 src/trodes_to_nwb/convert_statescript.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 0545d52..21fefac 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -1,7 +1,7 @@
 """StateScript log parsing and processing module.
 
 This module provides tools for parsing, interpreting, and processing `.stateScriptLog`
-files generated by the Trodes neural recording system. It handles the conversion
+files generated by Trodes. It handles the conversion
 of Trodes timestamps, alignment with external time sources, interpretation of
 Digital Input/Output (DIO) states, and processing of various common log line formats.
 
@@ -9,7 +9,7 @@
 -----
 Source Files:
     - Log files parsed by this module typically have the `.stateScriptLog` extension.
-    - These files are generated by the Trodes software during data acquisition sessions.
+    - These files are generated by Trodes during data acquisition sessions.
 
 Timestamp Information:
     - The primary timestamp (`<timestamp_int>`) found in these logs is a 64-bit integer.
@@ -83,11 +83,6 @@ def _parse_int(s: str) -> Optional[int]:
     -------
     Optional[int]
         The parsed integer, or None if parsing fails.
-
-    Raises
-    ------
-    ValueError
-        If the string cannot be converted to an integer.
     """
     try:
         return int(s)
@@ -196,7 +191,6 @@ def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]:
     ----------
     parts : list
         A list of strings obtained by splitting a log line by whitespace.
-        Expected to contain 4 parts, with '=' as the second part.
 
     Returns
     -------

From d605914953c00279ac14b909d8425d09a83a1a97 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Tue, 29 Apr 2025 09:20:37 -0400
Subject: [PATCH 16/23] Use Int64Dtype

---
 src/trodes_to_nwb/convert_statescript.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 21fefac..3415533 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -710,8 +710,9 @@ def _find_reference_events(
                 try:
                     if isinstance(value, int):
                         # Convert column to numeric, then integer (handles potential errors)
-                        df[key] = pd.to_numeric(df[key], errors="coerce").astype(int)
-                    # Add elif for float, bool etc. if needed
+                        df[key] = pd.to_numeric(df[key], errors="coerce").astype(
+                            pd.Int64Dtype()
+                        )
                 except (ValueError, TypeError):
                     # Ignore casting errors if conversion isn't possible
                     pass

From 4304051b27d8139bbe1a6ba21d7bb6ef92cd09b6 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Tue, 29 Apr 2025 09:20:57 -0400
Subject: [PATCH 17/23] Minor edit

---
 src/trodes_to_nwb/convert_statescript.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 3415533..77a27d8 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -191,6 +191,7 @@ def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]:
     ----------
     parts : list
         A list of strings obtained by splitting a log line by whitespace.
+        Expected to contain 4 parts, with '=' as the third part.
 
     Returns
     -------

From 9a89ae5485f650f88ea453bcdb487192b4160831 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Tue, 29 Apr 2025 09:21:35 -0400
Subject: [PATCH 18/23] Exclude int int by default

---
 src/trodes_to_nwb/convert_statescript.py | 26 ++++++++++++++++--------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 77a27d8..7f03c95 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -878,6 +878,7 @@ def get_events_dataframe(
         self,
         apply_offset: bool = True,
         exclude_comments_unknown: bool = True,
+        exclude_int_int: bool = True,
         max_DIOs: int = 32,
     ) -> pd.DataFrame:
         """Constructs and returns a pandas DataFrame from the parsed log events.
@@ -892,6 +893,14 @@ def get_events_dataframe(
             If True (default), lines parsed as 'comment_or_empty' or 'unknown'
             are excluded from the DataFrame. If False, all entries from
             `raw_events` are included (potentially useful for debugging parsing).
+        exclude_int_int : bool, optional
+            If True (default), lines parsed as 'ts_int_int' are excluded from
+            the DataFrame. These are often used for DIO state changes and may not
+            be relevant for most analyses.
+        max_DIOs : int, optional
+            The maximum number of DIOs to consider when interpreting bitmasks
+            for active DIO inputs/outputs. Default is 32. This is used to
+            determine the number of bits to check in the bitmask values.
 
         Returns
         -------
@@ -920,16 +929,15 @@ def get_events_dataframe(
                 return self.processed_events_df
 
         # Determine which event types to filter out
+        exclude_types = []
         if exclude_comments_unknown:
-            exclude_types = ("comment_or_empty", "unknown")
-            filtered_events = [
-                event
-                for event in self.raw_events
-                if event.get("type") not in exclude_types
-            ]
-        else:
-            # Include all event types if not excluding
-            filtered_events = self.raw_events
+            exclude_types += ["comment_or_empty", "unknown"]
+        if exclude_int_int:
+            exclude_types += ["ts_int_int"]
+
+        filtered_events = [
+            event for event in self.raw_events if event.get("type") not in exclude_types
+        ]
 
         # Handle case where filtering leaves no events
         if not filtered_events:

From ce58d140b445c655baf219ac4b64194b16e2f7ad Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Tue, 29 Apr 2025 10:06:24 -0400
Subject: [PATCH 19/23] false by default

---
 src/trodes_to_nwb/convert_statescript.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 7f03c95..e66b607 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -878,7 +878,7 @@ def get_events_dataframe(
         self,
         apply_offset: bool = True,
         exclude_comments_unknown: bool = True,
-        exclude_int_int: bool = True,
+        exclude_int_int: bool = False,
         max_DIOs: int = 32,
     ) -> pd.DataFrame:
         """Constructs and returns a pandas DataFrame from the parsed log events.
@@ -894,7 +894,7 @@ def get_events_dataframe(
             are excluded from the DataFrame. If False, all entries from
             `raw_events` are included (potentially useful for debugging parsing).
         exclude_int_int : bool, optional
-            If True (default), lines parsed as 'ts_int_int' are excluded from
+            If True, lines parsed as 'ts_int_int' are excluded from
             the DataFrame. These are often used for DIO state changes and may not
             be relevant for most analyses.
         max_DIOs : int, optional

From 180e804a9e1bffa7e26fe8629bd2510045c49b4b Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@users.noreply.github.com>
Date: Wed, 30 Apr 2025 10:58:26 -0700
Subject: [PATCH 20/23] Update src/trodes_to_nwb/convert_statescript.py

Co-authored-by: Samuel Bray <sam.bray@ucsf.edu>
---
 src/trodes_to_nwb/convert_statescript.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index e66b607..977fa71 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -116,17 +116,18 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]:
         if the line matches the expected structure and all parts are valid integers.
         Returns None otherwise.
     """
-    if len(parts) == 3:
-        # Attempt to parse all three parts as integers
-        timestamp, val1, val2 = [_parse_int(part) for part in parts]
+    if len(parts) != 3:
+        return
+    # Attempt to parse all three parts as integers
+    int_parts = [_parse_int(part) for part in parts]
 
-        # Check if all parsing attempts were successful
-        if timestamp is not None and val1 is not None and val2 is not None:
-            return {
-                "type": "ts_int_int",
-                "timestamp": timestamp,
-                "value1": val1,
-                "value2": val2,
+    # Check if all parsing attempts were successful
+    if all([part is not None for part in int_parts]):
+        return {
+            "type": "ts_int_int",
+            "timestamp": part[0],
+            "value1": part[1],
+            "value2": part[2],
             }
 
 

From bbe68f06a83e6c7b6795ef96ce353217b3973826 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@users.noreply.github.com>
Date: Wed, 30 Apr 2025 10:58:53 -0700
Subject: [PATCH 21/23] Update src/trodes_to_nwb/convert_statescript.py

Co-authored-by: Samuel Bray <sam.bray@ucsf.edu>
---
 src/trodes_to_nwb/convert_statescript.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 977fa71..3911d4b 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -99,8 +99,6 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]:
 
     Example:
         8386500 0 0 -> {'ts': 8386500, 'value1': 0, 'value2': 0}
-        1817158 128 512 -> {'ts': 1817158, 'value1': 128, 'value2': 512}
-        76566 65536 0 -> {'ts': 76566, 'value1': 65536, 'value2': 0}
 
     Parameters
     ----------

From 5b1b611e34dccfdea0d08c3f5ee5bef4ddaaadc6 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Wed, 30 Apr 2025 15:14:55 -0400
Subject: [PATCH 22/23] Fix linting

---
 src/trodes_to_nwb/convert_statescript.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index 3911d4b..f407347 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -126,7 +126,7 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]:
             "timestamp": part[0],
             "value1": part[1],
             "value2": part[2],
-            }
+        }
 
 
 def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]:

From b300cb95cd0ae6363e1105d0c1b31c1e3d7a890f Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Wed, 30 Apr 2025 16:31:48 -0400
Subject: [PATCH 23/23] Update convert_statescript.py

---
 src/trodes_to_nwb/convert_statescript.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py
index f407347..b2c8f57 100644
--- a/src/trodes_to_nwb/convert_statescript.py
+++ b/src/trodes_to_nwb/convert_statescript.py
@@ -114,19 +114,18 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]:
         if the line matches the expected structure and all parts are valid integers.
         Returns None otherwise.
     """
-    if len(parts) != 3:
-        return
-    # Attempt to parse all three parts as integers
-    int_parts = [_parse_int(part) for part in parts]
+    if len(parts) == 3:
+        # Attempt to parse all three parts as integers
+        timestamp, val1, val2 = [_parse_int(part) for part in parts]
 
-    # Check if all parsing attempts were successful
-    if all([part is not None for part in int_parts]):
-        return {
-            "type": "ts_int_int",
-            "timestamp": part[0],
-            "value1": part[1],
-            "value2": part[2],
-        }
+        # Check if all parsing attempts were successful
+        if timestamp is not None and val1 is not None and val2 is not None:
+            return {
+                "type": "ts_int_int",
+                "timestamp": timestamp,
+                "value1": val1,
+                "value2": val2,
+            }
 
 
 def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]:

	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
270	648028 UP 2	ts_str_int	648028	648.028	UP	2	<NA>	<NA>	[]	[]
271	648028 2 0	ts_int_int	648028	648.028	<NA>	<NA>	2	0	[2]	[]
290	648083 lastPort = -1 to currPort = 1	ts_str	648083	648.083	lastPort = -1 to currPort = 1	<NA>	<NA>	<NA>	[]	[]
292	658285 DOWN 2	ts_str_int	658285	658.285	DOWN	2	<NA>	<NA>	[]	[]
293	658285 0 0	ts_int_int	658285	658.285	<NA>	<NA>	0	0	[]	[]
...	...	...	...	...	...	...	...	...	...	...
9098	3925934 8 0	ts_int_int	3925934	3925.934	<NA>	<NA>	8	0	[4]	[]
9099	3926021 DOWN 4	ts_str_int	3926021	3926.021	DOWN	4	<NA>	<NA>	[]	[]
9100	3926021 0 0	ts_int_int	3926021	3926.021	<NA>	<NA>	0	0	[]	[]
9101	3926086 UP 4	ts_str_int	3926086	3926.086	UP	4	<NA>	<NA>	[]	[]
9102	3926086 8 0	ts_int_int	3926086	3926.086	<NA>	<NA>	8	0	[4]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
0	#<Hexmaze_NoSequence.sc>	comment_or_empty	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
1	#% author: XS	comment_or_empty	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
2	#% date: 20231224; added a reward indicator fo...	comment_or_empty	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
3	#	comment_or_empty	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
4	#%initialize constant vars	comment_or_empty	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
...	...	...	...	...	...	...	...	...	...	...
37629	~~~	unknown	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
37630	Executing trigger function 22	unknown	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
37631	Executing trigger function 22	unknown	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
37632	Executing trigger function 22	unknown	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
37633	Executing trigger function 22	unknown	<NA>	NaN	NaN	<NA>	<NA>	<NA>	[]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
165	173027 DOWN 1	ts_str_int	173027	173.027	DOWN	1	<NA>	<NA>	[]	[]
166	173027 0 0	ts_int_int	173027	173.027	<NA>	<NA>	0	0	[]	[]
167	173050 UP 1	ts_str_int	173050	173.050	UP	1	<NA>	<NA>	[]	[]
168	173050 1 0	ts_int_int	173050	173.050	<NA>	<NA>	1	0	[1]	[]
169	173658 DOWN 1	ts_str_int	173658	173.658	DOWN	1	<NA>	<NA>	[]	[]
...	...	...	...	...	...	...	...	...	...	...
3568	1449843 DOWN 1	ts_str_int	1449843	1449.843	DOWN	1	<NA>	<NA>	[]	[]
3569	1449843 0 8	ts_int_int	1449843	1449.843	<NA>	<NA>	0	8	[]	[4]
3570	1450010 UP 1	ts_str_int	1450010	1450.010	UP	1	<NA>	<NA>	[]	[]
3571	1450010 1 8	ts_int_int	1450010	1450.010	<NA>	<NA>	1	8	[1]	[4]
3572	1450078 1 0	ts_int_int	1450078	1450.078	<NA>	<NA>	1	0	[1]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
83	364241 UP 9	ts_str_int	364241	364.241	UP	9	<NA>	<NA>	[]	[]
84	364241 256 256	ts_int_int	364241	364.241	<NA>	<NA>	256	256	[9]	[9]
87	364269 outer reward	ts_str	364269	364.269	outer reward	<NA>	<NA>	<NA>	[]	[]
89	364269 256 2304	ts_int_int	364269	364.269	<NA>	<NA>	256	2304	[9]	[9, 12]
94	364669 256 256	ts_int_int	364669	364.669	<NA>	<NA>	256	256	[9]	[9]
...	...	...	...	...	...	...	...	...	...	...
7448	1991064 contentTrialCount = 75	ts_str_equals_int	1991064	1991.064	75	75	<NA>	<NA>	[]	[]
7449	1991064 contentReward = 75	ts_str_equals_int	1991064	1991.064	75	75	<NA>	<NA>	[]	[]
7450	1991065 contentOuterCount = 1	ts_str_equals_int	1991065	1991.065	1	1	<NA>	<NA>	[]	[]
7452	1991066 CURRENTGOAL IS 13 TASK_STATE IS 4	ts_str	1991066	1991.066	CURRENTGOAL IS 13 TASK_STATE IS 4	<NA>	<NA>	<NA>	[]	[]
7454	1991331 64 0	ts_int_int	1991331	1991.331	<NA>	<NA>	64	0	[7]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
288	322450 UP 4	ts_str_int	322450	322.450	UP	4	<NA>	<NA>	[]	[]
289	322450 8 0	ts_int_int	322450	322.450	<NA>	<NA>	8	0	[4]	[]
310	322500 8 262144	ts_int_int	322500	322.500	<NA>	<NA>	8	262144	[4]	[19]
315	322510 lastPort = -1 to currPort = 2	ts_str	322510	322.510	lastPort = -1 to currPort = 2	<NA>	<NA>	<NA>	[]	[]
318	322634 8 262208	ts_int_int	322634	322.634	<NA>	<NA>	8	262208	[4]	[7, 19]
...	...	...	...	...	...	...	...	...	...	...
37624	3357820 0 0	ts_int_int	3357820	3357.820	<NA>	<NA>	0	0	[]	[]
37625	3357823 0 64	ts_int_int	3357823	3357.823	<NA>	<NA>	0	64	[]	[7]
37626	3357825 0 0	ts_int_int	3357825	3357.825	<NA>	<NA>	0	0	[]	[]
37627	3358882 RESETSTIM	ts_str	3358882	3358.882	RESETSTIM	<NA>	<NA>	<NA>	[]	[]
37628	3358882 ifDelay = 1	ts_str_equals_int	3358882	3358.882	1	1	<NA>	<NA>	[]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
287	4236600 1 0	ts_int_int	4236600	4236.600	1	0	[1]	[]
288	4239693 0 0	ts_int_int	4239693	4239.693	0	0	[]	[]
289	4242288 1 0	ts_int_int	4242288	4242.288	1	0	[1]	[]
290	4242749 0 0	ts_int_int	4242749	4242.749	0	0	[]	[]
291	4243151 1 0	ts_int_int	4243151	4243.151	1	0	[1]	[]
292	4244648 0 0	ts_int_int	4244648	4244.648	0	0	[]	[]
293	4313683 1 0	ts_int_int	4313683	4313.683	1	0	[1]	[]
294	4314756 0 0	ts_int_int	4314756	4314.756	0	0	[]	[]
295	4660546 1 0	ts_int_int	4660546	4660.546	1	0	[1]	[]
296	4661064 0 0	ts_int_int	4661064	4661.064	0	0	[]	[]
297	4661360 1 0	ts_int_int	4661360	4661.360	1	0	[1]	[]
298	4661565 0 0	ts_int_int	4661565	4661.565	0	0	[]	[]
299	4666057 8 0	ts_int_int	4666057	4666.057	8	0	[4]	[]
300	4666539 0 0	ts_int_int	4666539	4666.539	0	0	[]	[]
301	4667439 8 0	ts_int_int	4667439	4667.439	8	0	[4]	[]
302	4668457 0 0	ts_int_int	4668457	4668.457	0	0	[]	[]
303	4669200 8 0	ts_int_int	4669200	4669.200	8	0	[4]	[]
304	4669481 0 0	ts_int_int	4669481	4669.481	0	0	[]	[]
305	4675049 2 0	ts_int_int	4675049	4675.049	2	0	[2]	[]
306	4675275 0 0	ts_int_int	4675275	4675.275	0	0	[]	[]
307	4675302 2 0	ts_int_int	4675302	4675.302	2	0	[2]	[]
308	4675318 0 0	ts_int_int	4675318	4675.318	0	0	[]	[]
309	4675615 2 0	ts_int_int	4675615	4675.615	2	0	[2]	[]
310	4676096 0 0	ts_int_int	4676096	4676.096	0	0	[]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
80	3853607 0 64	ts_int_int	3853607	3853.607	NaN	<NA>	0	64	[]	[7]
81	3853630 128 64	ts_int_int	3853630	3853.630	NaN	<NA>	128	64	[8]	[7]
82	3853785 0 64	ts_int_int	3853785	3853.785	NaN	<NA>	0	64	[]	[7]
83	3853796 128 64	ts_int_int	3853796	3853.796	NaN	<NA>	128	64	[8]	[7]
84	3854144 0 64	ts_int_int	3854144	3854.144	NaN	<NA>	0	64	[]	[7]
...	...	...	...	...	...	...	...	...	...	...
6684	5023394 64 0	ts_int_int	5023394	5023.394	NaN	<NA>	64	0	[7]	[]
6685	5026015 0 0	ts_int_int	5026015	5026.015	NaN	<NA>	0	0	[]	[]
6686	5026079 64 0	ts_int_int	5026079	5026.079	NaN	<NA>	64	0	[7]	[]
6687	5026170 0 0	ts_int_int	5026170	5026.170	NaN	<NA>	0	0	[]	[]
6688	5026201 64 0	ts_int_int	5026201	5026.201	NaN	<NA>	64	0	[7]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
1	108023 0 131072	ts_int_int	108023	108.023	0	131072	[]	[18]
2	108024 0 0	ts_int_int	108024	108.024	0	0	[]	[]
3	108188 0 131072	ts_int_int	108188	108.188	0	131072	[]	[18]
4	108189 0 0	ts_int_int	108189	108.189	0	0	[]	[]
5	108353 0 131072	ts_int_int	108353	108.353	0	131072	[]	[18]
...	...	...	...	...	...	...	...	...
4015	1048180 0 0	ts_int_int	1048180	1048.180	0	0	[]	[]
4016	1048344 0 131072	ts_int_int	1048344	1048.344	0	131072	[]	[18]
4017	1048345 0 0	ts_int_int	1048345	1048.345	0	0	[]	[]
4018	1048509 0 131072	ts_int_int	1048509	1048.509	0	131072	[]	[18]
4019	1048510 0 0	ts_int_int	1048510	1048.510	0	0	[]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
487	6935765 first poke	ts_str	6935765	6935.765	first poke	<NA>	<NA>	<NA>	[]	[]
488	6935766 PROXON	ts_str	6935766	6935.766	PROXON	<NA>	<NA>	<NA>	[]	[]
489	6935766 UP 10	ts_str_int	6935766	6935.766	UP	10	<NA>	<NA>	[]	[]
490	6935765 512 512	ts_int_int	6935765	6935.765	<NA>	<NA>	512	512	[10]	[10]
491	6935778 DOWN 10	ts_str_int	6935778	6935.778	DOWN	10	<NA>	<NA>	[]	[]
...	...	...	...	...	...	...	...	...	...	...
25837	9104866 mostRecentOuterWell_ind = 4	ts_str_equals_int	9104866	9104.866	4	4	<NA>	<NA>	[]	[]
25838	9104866 mostRecentRewardOuterWell_ind = 2	ts_str_equals_int	9104866	9104.866	2	2	<NA>	<NA>	[]	[]
25842	9104890 UPIND_4	ts_str	9104890	9104.890	UPIND_4	<NA>	<NA>	<NA>	[]	[]
25849	9104908 0 0	ts_int_int	9104908	9104.908	<NA>	<NA>	0	0	[]	[]
25850	9105380 UPIND_0	ts_str	9105380	9105.380	UPIND_0	<NA>	<NA>	<NA>	[]	[]
	raw_line	type	trodes_timestamp	trodes_timestamp_sec	text	value	active_DIO_inputs_bitmask	active_DIO_outputs_bitmask	active_DIO_inputs	active_DIO_outputs
line_num
47	2300995 rewCount = 1	ts_str_equals_int	2300995	2300.995	1	1	<NA>	<NA>	[]	[]
48	2300995 1 4	ts_int_int	2300995	2300.995	<NA>	<NA>	1	4	[1]	[3]
49	2301028 0 4	ts_int_int	2301028	2301.028	<NA>	<NA>	0	4	[]	[3]
50	2301295 0 0	ts_int_int	2301295	2301.295	<NA>	<NA>	0	0	[]	[]
51	2303333 1 0	ts_int_int	2303333	2303.333	<NA>	<NA>	1	0	[1]	[]
...	...	...	...	...	...	...	...	...	...	...
294	3166053 0 0	ts_int_int	3166053	3166.053	NaN	<NA>	0	0	[]	[]
295	3168403 1 0	ts_int_int	3168403	3168.403	NaN	<NA>	1	0	[1]	[]
296	3168575 0 0	ts_int_int	3168575	3168.575	NaN	<NA>	0	0	[]	[]
297	3168643 1 0	ts_int_int	3168643	3168.643	NaN	<NA>	1	0	[1]	[]
298	3168890 0 0	ts_int_int	3168890	3168.890	NaN	<NA>	0	0	[]	[]