From 8009edf8064ac359c17aa547abec2924776814b1 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 11:49:21 -0400 Subject: [PATCH 01/23] Add statescript parsing --- src/trodes_to_nwb/convert_dios.py | 1 + src/trodes_to_nwb/convert_statescript.py | 1123 +++++++++++++++++ .../tests/test_convert_statescript.py | 544 ++++++++ 3 files changed, 1668 insertions(+) create mode 100644 src/trodes_to_nwb/convert_statescript.py create mode 100644 src/trodes_to_nwb/tests/test_convert_statescript.py diff --git a/src/trodes_to_nwb/convert_dios.py b/src/trodes_to_nwb/convert_dios.py index e532cae..97c57a8 100644 --- a/src/trodes_to_nwb/convert_dios.py +++ b/src/trodes_to_nwb/convert_dios.py @@ -21,6 +21,7 @@ def _get_channel_name_map(metadata: dict) -> dict[str, str]: ------- channel_name_map : dict Parsed behavioral events metadata mapping hardware event name to human-readable name + {"hardware_event_name": {"name": "human_readable_name", "comments": "comments"}} """ dio_metadata = metadata["behavioral_events"] channel_name_map = {} diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py new file mode 100644 index 0000000..e0ce567 --- /dev/null +++ b/src/trodes_to_nwb/convert_statescript.py @@ -0,0 +1,1123 @@ +import pathlib +from typing import Any, Dict, List, Optional, Type, TypeVar, Union + +import numpy as np +import pandas as pd + +from .convert_dios import _get_channel_name_map as _get_dio_channel_name_map + +T_StateScriptLogProcessor = TypeVar( + "T_StateScriptLogProcessor", bound="StateScriptLogProcessor" +) + + +def _parse_int(s: str) -> Optional[int]: + """Attempts to parse a string as an integer. + + Parameters + ---------- + s : str + Input string. + + Returns + ------- + Optional[int] + The parsed integer, or None if parsing fails. + + Raises + ------ + ValueError + If the string cannot be converted to an integer. + """ + try: + return int(s) + except ValueError: + return None + + +def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]: + """Parses lines with structure. + + This pattern typically represents a timestamp followed by two integer values. + These integers are bitwise masks or state values, often used for logging + DIO states or other binary values. + + Example: + 8386500 0 0 -> {'ts': 8386500, 'value1': 0, 'value2': 0} + 1817158 128 512 -> {'ts': 1817158, 'value1': 128, 'value2': 512} + 76566 65536 0 -> {'ts': 76566, 'value1': 65536, 'value2': 0} + + Parameters + ---------- + parts : list + A list of strings obtained by splitting a log line by whitespace. + Expected to contain exactly 3 parts for this pattern. + + Returns + ------- + Optional[Dict[str, Any]] + A dictionary containing the parsed data: + {'type': 'ts_int_int', 'timestamp': int, 'value1': int, 'value2': int} + if the line matches the expected structure and all parts are valid integers. + Returns None otherwise. + """ + if len(parts) == 3: + # Attempt to parse all three parts as integers + timestamp, val1, val2 = [_parse_int(part) for part in parts] + + # Check if all parsing attempts were successful + if timestamp is not None and val1 is not None and val2 is not None: + return { + "type": "ts_int_int", + "timestamp": timestamp, + "value1": val1, + "value2": val2, + } + + +def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]: + """Parses log lines structured as: . + + This pattern consists of a timestamp, a string, and a final + integer value. Often used for logging state changes associated + with an identifier (e.g., DIO pin state). + + Example: + 8386500 DOWN 3 -> {'ts': 8386500, 'text': 'DOWN', 'value': 3} + + Interpretation: At timestamp 8386500, the state associated with + identifier 3 changed to 'DOWN'. + + Parameters + ---------- + parts : list + A list of strings obtained by splitting a log line by whitespace. + Expected to contain exactly 3 parts for this pattern. + + Returns + ------- + Optional[Dict[str, Any]] + A dictionary containing the parsed data: + {'type': 'ts_str_int', 'timestamp': int, 'text': str, 'value': int} + if the line matches the structure (int, non-int string, int). + Returns None otherwise. + """ + if len(parts) == 3: + # Parse the first and third parts as integers + timestamp = _parse_int(parts[0]) + text_part = parts[1] # Middle part is expected to be text + val_int = _parse_int(parts[2]) + + # Check conditions: timestamp and value are ints, text part is not an int + if ( + timestamp is not None + and _parse_int(text_part) is None + and val_int is not None + ): + return { + "type": "ts_str_int", + "timestamp": timestamp, + "text": text_part, + "value": val_int, + } + + +def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]: + """Parses log lines structured as: = . + + This pattern includes a timestamp, followed by one or more strings forming a label, + an equals sign, and a final integer value. Used for logging named integer variables. + + Example Log Lines: + 3610855 totRewards = 70 -> {'ts': 3610855, 'text': 'totRewards', 'value': 70} + 100078 counter_handlePoke = 1 -> {'ts': 100078, 'text': 'counter_handlePoke', 'value': 1} + + Parameters + ---------- + parts : list + A list of strings obtained by splitting a log line by whitespace. + Expected to contain 4 parts, with '=' as the second part. + + Returns + ------- + Optional[Dict[str, Any]] + A dictionary containing the parsed data: + {'type': 'ts_str_equals_int', 'timestamp': int, 'text': str, 'value': int} + if the line matches the expected structure (int, string, '=', int). + Returns None otherwise. + """ + # Check length and presence of '=' in the correct position + if len(parts) == 4 and parts[2] == "=": + timestamp = _parse_int(parts[0]) + value = _parse_int(parts[-1]) # Expect integer value only + text = parts[3] + + # Check if timestamp and value were successfully parsed as integers + if timestamp is not None and value is not None: + return { + "type": "ts_str_equals_int", + "timestamp": timestamp, + "text": text, + "value": value, + } + + +def parse_ts_str(parts: list) -> Optional[Dict[str, Any]]: + """Parses log lines structured as: . + + This pattern represents a timestamp followed by one or more string parts, + where the first string part after the timestamp is *not* parseable as an integer. + Often used for logging timestamped events or messages. + + Example Log Lines: + 1678886401 LOCKEND -> {'ts': 1678886401, 'text': 'LOCKEND'} + 76566 center_poke initiated -> {'ts': 76566, 'text': 'center_poke initiated'} + + Parameters + ---------- + parts : list + A list of strings obtained by splitting a log line by whitespace. + Expected to contain at least 2 parts. + + Returns + ------- + Optional[Dict[str, Any]] + A dictionary containing the parsed data: + {'type': 'ts_str', 'timestamp': int, 'text': str} + if the line matches the structure (int, non-int string, [optional strings...]). + 'text' contains the joined string parts after the timestamp. + Returns None otherwise. + """ + # Check minimum length + if len(parts) >= 2: + timestamp = _parse_int(parts[0]) + # Check if the second part is parseable as an integer + first_word_is_int = _parse_int(parts[1]) is not None + + # Proceed only if timestamp is valid AND the second part is NOT an integer + if timestamp is not None and not first_word_is_int: + # Join all parts after the timestamp + text_part = " ".join(parts[1:]) + return {"type": "ts_str", "timestamp": timestamp, "text": text_part} + + +def parse_statescript_line(line: str, line_num: int = 0) -> Optional[Dict[str, Any]]: + """Attempts to parse a single StateScript log line using a set of parsers. + + It tries parsing the line against known structures in a specific order + of precedence to handle potentially overlapping patterns: + 1. ` = ` ('ts_str_equals_int') + 2. ` ` ('ts_int_int') + 3. ` ` ('ts_str_int', where is not an int) + 4. ` ` ('ts_str', where first is not an int) + + Lines starting with '#' or empty lines are marked as 'comment_or_empty'. + Lines that do not match any known pattern are marked as 'unknown'. + + Parameters + ---------- + line : str + A single line (string) from the StateScript log file. + line_num : int, optional + The line number in the file (for reference), by default 0. + + Returns + ------- + Dict[str, Any] + A dictionary describing the parsed line. It always contains: + - 'type': A string indicating the matched pattern + ('ts_str_equals_int', 'ts_int_int', 'ts_str_int', 'ts_str', + 'comment_or_empty', 'unknown'). + - 'raw_line': The original input line string. + For successfully parsed types, it includes additional keys like + 'timestamp', 'text', 'value', 'value1', 'value2' as appropriate. + """ + line = line.strip() + + # Handle comments and empty lines first + if not line or line.startswith("#"): + return { + "type": "comment_or_empty", + "raw_line": line, + "line_num": line_num, + "timestamp": None, + } + + # Define the parsing functions in order of desired precedence + # More specific patterns should come before more general ones + parsers = [ + parse_ts_str_equals_int, + parse_ts_int_int, + parse_ts_str_int, + parse_ts_str, + ] + parts = line.split() # Split line into parts based on whitespace + + # Iterate through parsers and return the first successful match + for parser in parsers: + parsed = parser(parts) + if parsed: + # Add the original line to the parsed result + parsed["raw_line"] = line + parsed["line_num"] = line_num # Include line number for reference + return parsed + + return { + "type": "unknown", + "raw_line": line, + "line_num": line_num, + "timestamp": None, + } + + +def _interpret_DIO_mask( + DIO_state_value: Optional[int], max_DIOs: int = 32 +) -> List[int]: + """ + Interprets an integer value as a bitmask representing active DIOs. + Assumes a 1-based DIO numbering system (e.g., bit 0 corresponds to DIO 1). + + For example, if there are 32 DIOs, the integer value 9 (binary 1001) + indicates that DIOs 1 and 4 are active (bits 0 and 3 are set). + + If there are 16 DIOs, the integer value 65536 (binary 10000000000000000) + indicates that DIO 17 is active (bit 16 is set). + + + Parameters + ---------- + DIO_state_value : Optional[int] + The integer value representing the combined state of multiple ports. + Handles None or pandas NA values. + max_DIOs : int, optional + The maximum port number to check (bits 0 to max_DIOs-1), by default 32. + + Returns + ------- + List[int] + A sorted list of 1-based port numbers that are active (bit is set). + Returns an empty list if the value is 0, None, or NA. + + Example + ------- + >>> interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4 + [1, 4] + >>> interpret_DIO_mask(65536) # 2^16 -> Port 17 + [17] + """ + if pd.isna(DIO_state_value) or DIO_state_value == 0: + return [] + + # Ensure value is treated as an integer after NA check + try: + DIO_state_value = int(DIO_state_value) + except (ValueError, TypeError): + # Should not happen if input is from Int64Dtype column after NA check, + # but included for robustness if called directly with invalid input. + return [] + + # Create bit masks for positions 0 to max_DIOs-1 + # E.g., [1, 2, 4, 8, ...] + bit_masks = np.left_shift(1, np.arange(max_DIOs)) + + # Check which bits are set in the input value using bitwise AND + active_bits_mask = np.bitwise_and(DIO_state_value, bit_masks) > 0 + + # Get the 0-based indices (bit positions) where bits are active + active_indices = np.where(active_bits_mask)[0] + + # Convert 0-based indices to 1-based DIO numbers and return as a list + active_ports = (active_indices + 1).tolist() + + # np.where returns sorted indices, so list is already sorted + return active_ports + + +# -- Main Class for Processing StateScript Logs -- +class StateScriptLogProcessor: + """Processes StateScript log content, handling parsing and time alignment. + + This class reads StateScript log data (either from a file or a string), + parses each line into a structured format, converts integer timestamps + (assumed to be milliseconds) into seconds, and optionally calculates + a time offset to align the log timestamps with an external reference time + source (e.g., synchronization pulses recorded by another system). + + Attributes + ---------- + log_content : str + The raw string content of the log file. + source_description : str + Information about where the log content came from (e.g., file path). + raw_events : List[Dict[str, Any]] + List of dictionaries, one per parsed line from the log content + (including comments/unknown lines). Generated by `parse_raw_events`. + Timestamps in this list are raw integers from the log. + processed_events_df : Optional[pd.DataFrame] + DataFrame containing structured event data, typically excluding + comments and unknown lines. Generated by `get_events_dataframe`. + Includes 'trodes_timestamp_sec' (float, seconds) converted from raw + timestamps, and potentially 'timestamp_sync' (float, seconds) if + time offset is calculated and applied. + time_offset : Optional[float] + The calculated time offset in seconds, representing the difference: + (external_reference_time_sec - trodes_timestamp_sec). + Set by `calculate_time_offset`. If calculated, adding this offset + to 'trodes_timestamp_sec' yields the synchronized time ('timestamp_sync'). + + Example Usage + ------------- + >>> # Load from file + >>> processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog") + >>> # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds) + >>> # corresponding to the log event "DIO Pin 8 going UP" + >>> processor.calculate_time_offset( + ... external_reference_times=external_sync_times, + ... log_event_type="ts_str_int", + ... log_event_conditions={"text": "UP", "value": 8} + ... ) + >>> # Get the processed DataFrame with synchronized timestamps + >>> df = processor.get_events_dataframe(apply_offset=True) + >>> if df is not None: + ... print(df[['timestamp_sync', 'type', 'text', 'value']].head()) + """ + + MILLISECONDS_PER_SECOND = 1000 + + log_content: str + source_description: str + raw_events: List[Dict[str, Any]] + processed_events_df: Optional[pd.DataFrame] + time_offset: Optional[float] + + def __init__(self, log_content: str, source_info: str = "from string"): + """Initializes the processor with log content and source information. + + Parameters + ---------- + log_content : str + The entire content of the state script log as a single string. + source_info : str, optional + A description of the log content's source (e.g., file path, identifier). + Defaults to "from string". + """ + self.log_content = log_content + self.source_description = source_info + + # Initialize attributes that will be populated by methods + self.raw_events = [] + self.processed_events_df = None + self.time_offset = None + + @classmethod + def from_file( + cls: Type[T_StateScriptLogProcessor], + file_path: Union[str, pathlib.Path], + ) -> T_StateScriptLogProcessor: + """Creates a StateScriptLogProcessor instance by reading a log file. + + Parameters + ---------- + file_path : Union[str, pathlib.Path] + The path to the StateScript log file. + + Returns + ------- + T_StateScriptLogProcessor + An instance of the StateScriptLogProcessor initialized with the + content of the specified file. + + Raises + ------ + FileNotFoundError + If the file specified by `file_path` does not exist. + IOError + If an error occurs during file reading (e.g., permissions). + UnicodeDecodeError + If the file cannot be decoded using UTF-8 encoding (with fallback). + """ + file_path = pathlib.Path(file_path) # Ensure Path object for consistency + source_info = f"from file: {file_path}" + try: + # Read the file content. Using 'surrogateescape' allows reading + # potentially mixed/invalid encodings, preserving problematic bytes. + # UTF-8 is a common default for logs. + content = file_path.read_text(encoding="utf-8", errors="surrogateescape") + # Create and return an instance of the class + return cls(log_content=content, source_info=source_info) + except FileNotFoundError: + print(f"Error: File not found at {file_path}") + raise # Re-raise to signal failure + except IOError as e: + print(f"Error reading file at {file_path}: {e}") + raise # Re-raise + except UnicodeDecodeError as e: + print(f"Error decoding file {file_path} using utf-8: {e}") + print("Consider checking file encoding if errors persist.") + raise # Re-raise + except Exception as e: + print(f"Unexpected error reading file {file_path}: {e}") + raise + + def __repr__(self) -> str: + """Provides a concise, unambiguous string representation of the processor. + + Includes information about the source, parsing status, number of raw events, + time offset status, and DataFrame generation status. + + Returns + ------- + str + String representation of the StateScriptLogProcessor instance. + """ + cls_name = self.__class__.__name__ + source = self.source_description + + # Describe parsing status + if not self.raw_events: + parse_status = "not parsed" + num_raw = "" + else: + parse_status = "parsed" + num_raw = f", raw_events={len(self.raw_events)}" + + # Describe time offset status + offset_status = ( + f"offset={self.time_offset:.4f}s" + if self.time_offset is not None + else "no offset calculated" + ) + + # Describe DataFrame status + df_status = ( + "DataFrame generated" + if self.processed_events_df is not None + else "DataFrame not generated" + ) + + return f"<{cls_name}(source='{source}', status={parse_status}{num_raw}, {offset_status}, {df_status})>" + + def _repr_html_(self) -> str: + """Generates an HTML representation for display in Jupyter/IPython. + + Provides a more visually structured overview of the processor's state, + including source, parsing status, offset, DataFrame status, and a + preview of the DataFrame if generated. + + Returns + ------- + str + HTML string representing the StateScriptLogProcessor instance. + """ + cls_name = self.__class__.__name__ + # Use getattr for robustness in case attributes haven't been set yet + source = getattr(self, "source_description", "source info missing") + raw_events_list = getattr(self, "raw_events", []) # Default to empty list + df_val = getattr(self, "processed_events_df", None) + offset_val = getattr(self, "time_offset", None) + + # Build status strings based on attribute values + if not raw_events_list: + parse_status = "Status: Not Parsed" + num_raw_str = "" + else: + parse_status = "Status: Parsed" + num_raw_str = f" ({len(raw_events_list)} raw entries)" + + offset_status = ( + f"Time Offset: {offset_val:.4f}s" + if offset_val is not None + else "Time Offset: Not Calculated" + ) + df_status = ( + "DataFrame: Generated" + if df_val is not None + else "DataFrame: Not Generated" + ) + + # Basic HTML structure and styling + html = f""" +
+

{cls_name}

+

Source: {source}

+

{parse_status}{num_raw_str}

+

{offset_status}

+

{df_status}

+ """ + + # Add DataFrame preview if it exists and is not empty + if df_val is not None and not df_val.empty: + html += "
DataFrame Preview (first 5 rows):
" + try: + # Generate HTML table from DataFrame head + html += df_val.head().to_html( + index=False, # Don't include DataFrame index + border=0, # No table border + justify="left", # Align text left + classes="dataframe-preview", # Add a class for potential CSS styling + ) + except Exception as e: + html += f"

Error generating DataFrame HTML preview: {e}

" + elif df_val is not None and df_val.empty: + html += "

(DataFrame is empty)

" + + html += "
" + return html + + def parse_raw_events(self) -> List[Dict[str, Any]]: + """Parses the loaded log content line by line. + + Returns + ------- + List[Dict[str, Any]] + The list of parsed event dictionaries stored in `self.raw_events`. + Each dictionary represents one line from the log. + """ + lines = self.log_content.splitlines() + # Use list comprehension for concise parsing of all lines + self.raw_events = [ + parse_statescript_line(line, line_num) + for line_num, line in enumerate(lines) + ] + return self.raw_events + + def _find_reference_events( + self, event_type: str, conditions: Dict[str, Any] + ) -> pd.DataFrame: + """Internal helper to find specific log events for time alignment. + + Filters the `self.raw_events` list to find events matching the specified + `event_type` and satisfying all key-value pairs in `conditions`. + Converts the integer timestamp (assumed to be milliseconds) of matching + events to seconds (float) and stores it in a 'trodes_timestamp_sec' column. + + Parameters + ---------- + event_type : str + The required 'type' field of the events to find + (e.g., 'ts_str_int', 'ts_int_int'). + conditions : Dict[str, Any] + A dictionary where keys are field names within the event dictionary + (e.g., 'text', 'value', 'value1') and values are the required values + for an event to be considered a match. + + Returns + ------- + pd.DataFrame + A DataFrame containing the matching events. Includes the original + 'timestamp' (int, milliseconds), the calculated 'trodes_timestamp_sec' + (float, seconds), and the fields specified in `conditions`. + The DataFrame is sorted by 'trodes_timestamp_sec'. + Returns an empty DataFrame if no matching events are found. + """ + # Ensure raw events are parsed first if not already done + if not self.raw_events: + self.parse_raw_events() + + matching_events = [] + # Iterate through all parsed raw events + for event in self.raw_events: + # Check if the event type matches and it has a timestamp + if event.get("type") == event_type and "trodes_timestamp" in event: + # Check if all specified conditions are met for this event + match = all( + event.get(key) == value for key, value in conditions.items() + ) + if match: + matching_events.append(event) + + # If no matches were found, return an empty DataFrame with expected columns + if not matching_events: + # Define columns for the empty DataFrame for consistency + cols = ["trodes_timestamp", "trodes_timestamp_sec"] + list( + conditions.keys() + ) + # Ensure other relevant columns from potential matches are also defined + potential_value_cols = ["value", "value1", "value2", "text"] + for vc in potential_value_cols: + if vc not in cols: + cols.append(vc) + return pd.DataFrame(columns=cols) + + # Create DataFrame from the list of matching event dictionaries + df = pd.DataFrame(matching_events) + + # Convert timestamp (assumed ms) to seconds (float) + df["trodes_timestamp_sec"] = ( + df["timestamp"].astype(float) / self.MILLISECONDS_PER_SECOND + ) + # Ensure original timestamp remains integer + df["timestamp"] = df["timestamp"].astype(int) + + # Attempt to cast condition columns to appropriate types (e.g., int) + # This improves consistency if values were parsed as strings initially + for key, value in conditions.items(): + if key in df.columns: + try: + if isinstance(value, int): + # Convert column to numeric, then integer (handles potential errors) + df[key] = pd.to_numeric(df[key], errors="coerce").astype(int) + # Add elif for float, bool etc. if needed + except (ValueError, TypeError): + # Ignore casting errors if conversion isn't possible + pass + + # Sort by time and reset index + return df.sort_values("trodes_timestamp_sec") + + def calculate_time_offset( + self, + external_reference_times: np.ndarray, + log_event_type: str, + log_event_conditions: Dict[str, Any], + match_threshold: float = 0.1, + check_n_events: int = 4, + ) -> Optional[float]: + """Calculates the time offset between log events and external timestamps. + + This method aligns timestamps (in seconds) of specific events found + in the log (`log_event_type` with `log_event_conditions`) against a + provided sorted array of `external_reference_times` (also in seconds). + It assumes both sets of timestamps correspond to the same sequence of + real-world events (e.g., synchronization pulses). + + The offset is determined by finding the constant difference + (`offset = external_time - log_time`) that minimizes the timing + discrepancy between the first `check_n_events` corresponding events + in both sequences. + + IMPORTANT: If `external_reference_times` represent Unix time (seconds + since 1970-01-01 UTC), the calculated offset will align the log's + timestamps (`trodes_timestamp_sec`) to Unix time. The resulting + `timestamp_sync` column in the DataFrame will then also be in Unix time. + + Parameters + ---------- + external_reference_times : np.ndarray + A 1D numpy array of timestamps (float, in seconds) from the external + reference system. This array *must* be sorted in ascending order. + If using for Unix time alignment, these must be Unix timestamps. + log_event_type : str + The 'type' of log event to use as the reference points within the log + (e.g., 'ts_str_int', 'ts_int_int'). + log_event_conditions : Dict[str, Any] + Dictionary specifying the exact conditions to identify the reference + log events (e.g., {'text': 'UP', 'value': 8} for a pin state change). + match_threshold : float, optional + The maximum acceptable cumulative absolute difference (in seconds) + between the matched `check_n_events` pairs (log vs. external) for + an offset to be considered valid. Defaults to 0.1 seconds. + check_n_events : int, optional + The number of initial events from both sequences to use for calculating + the mismatch and finding the best offset. Defaults to 4. A higher + number increases robustness against spurious events but requires more + matching events to be present. + + Returns + ------- + Optional[float] + The calculated time offset in seconds (`external_time_sec - log_time_sec`). + Adding this offset to `trodes_timestamp_sec` synchronizes the log time + to the external reference time. Returns `None` if a satisfactory + offset (below `match_threshold`) cannot be found, or if insufficient + events are available in either the log or the external references. + If successful, updates `self.time_offset` with the calculated value. + """ + # Find the timestamps of the reference events within the log + log_reference_df = self._find_reference_events( + log_event_type, log_event_conditions + ) + + # Check if enough log events were found + if log_reference_df.empty or len(log_reference_df) < check_n_events: + print( + f"Warning: Not enough reference events found in log matching " + f"type='{log_event_type}', conditions={log_event_conditions}. " + f"Need at least {check_n_events}, found {len(log_reference_df)}." + ) + self.time_offset = None # Ensure offset is None if calculation fails + return None + + # Extract log event times (in seconds) and ensure external times are a sorted numpy array + sc_times_sec = log_reference_df["trodes_timestamp_sec"].to_numpy() + # Ensure external times are numpy array and sorted (as required by algorithm) + dio_times_sec = np.sort(np.asarray(external_reference_times)) + + # Check if enough external reference times were provided + if len(dio_times_sec) < check_n_events: + print( + f"Warning: Not enough external reference timestamps provided " + f"({len(dio_times_sec)}), need at least {check_n_events} for matching." + ) + self.time_offset = None # Ensure offset is None + return None + + # --- Offset Calculation Logic --- + # This section iterates through potential starting alignments between + # the external times and the first log time, calculates the total mismatch + # for the first 'check_n_events', and finds the offset minimizing this mismatch. + + best_offset = None + min_mismatch = float("inf") + + # Iterate through possible starting points in the external times array + # We only need to check starting alignments where enough subsequent external times exist + # for the check_n_events comparison. + # We test aligning sc_times_sec[0] with each dio_times_sec[event_idx] + for event_idx in range(len(dio_times_sec) - check_n_events + 1): + # Calculate the potential offset based on the first log event and current external event + potential_offset = dio_times_sec[event_idx] - sc_times_sec[0] + current_mismatch = 0.0 + + # Simple check: Calculate mismatch using the *next consecutive* N events + # This assumes no missing events in *either* stream within the checked range. + # If events can be missing, a more complex alignment (like Needleman-Wunsch + # or checking nearest neighbors) might be needed. This simpler approach + # is often sufficient if the sync signals are reliable. + mismatch_found = False + for i in range(check_n_events): + # Calculate the expected external time for the i-th log event using the potential offset + projected_dio_time = sc_times_sec[i] + potential_offset + # Calculate the absolute difference with the corresponding i-th external time + # (relative to the starting event_idx) + diff = abs(dio_times_sec[event_idx + i] - projected_dio_time) + current_mismatch += diff + + # Optimization: If mismatch already exceeds threshold or current best, stop early + if ( + current_mismatch >= match_threshold + and current_mismatch >= min_mismatch + ): + mismatch_found = True # Signal that this offset is not viable + break # Stop checking further events for this offset + + # If loop completed without early exit and this offset has lower mismatch + if not mismatch_found and current_mismatch < min_mismatch: + min_mismatch = current_mismatch + best_offset = potential_offset + + # After checking all potential alignments, evaluate the result + if best_offset is not None and min_mismatch < match_threshold: + print( + f"Time offset calculation successful.\n" + f" Best Offset: {best_offset:.4f} s (External Time - Log Time)\n" + f" Lowest Mismatch: {min_mismatch:.4f} s (summed abs diff over {check_n_events} events)\n" + f" Threshold: {match_threshold:.4f} s" + ) + self.time_offset = best_offset # Store the successful offset + return self.time_offset + else: + # Report failure if no offset met the threshold + print( + f"Warning: Could not find a suitable time offset.\n" + f" Minimum mismatch found: {min_mismatch:.4f} s (using {check_n_events} events)\n" + f" Match threshold: {match_threshold:.4f} s\n" + f" Troubleshooting: Check if reference events match, increase threshold, " + f"or verify external timestamps." + ) + self.time_offset = None # Ensure offset is None on failure + return None + + def get_events_dataframe( + self, + apply_offset: bool = True, + exclude_comments_unknown: bool = True, + max_DIOs: int = 32, + ) -> pd.DataFrame: + """Constructs and returns a pandas DataFrame from the parsed log events. + + Parameters + ---------- + apply_offset : bool, optional + If True (default), and a `time_offset` has been calculated, add the + 'timestamp_sync' column to the DataFrame. If False, or if no offset + is available, this column is omitted. + exclude_comments_unknown : bool, optional + If True (default), lines parsed as 'comment_or_empty' or 'unknown' + are excluded from the DataFrame. If False, all entries from + `raw_events` are included (potentially useful for debugging parsing). + + Returns + ------- + pd.DataFrame + A DataFrame containing the structured event data. Columns are: + - 'trodes_timestamp' (int, ms since start of recording) + - 'trodes_timestamp_sec' (float, seconds since start of recording) + - `timestamp_sync` (float, seconds) + - 'raw_line' (str) + - 'type' (str) + - 'text' (str) + - 'value' (int, if pattern `text = value`, type 'ts_str_equals_int') + - 'active_DIO_inputs_bitmask' (int, from 'ts_int_int') + - 'active_DIO_outputs_bitmask' (int, from 'ts_int_int') + - 'active_DIO_inputs' (list of int) + - 'active_DIO_outputs' (list of int) + + Returns an empty DataFrame if no valid events are found after filtering. + """ + # Ensure raw events are available + if not self.raw_events: + self.parse_raw_events() + if not self.raw_events: + print("Warning: Log content yielded no raw events.") + self.processed_events_df = pd.DataFrame() # Store empty df + return self.processed_events_df + + # Determine which event types to filter out + if exclude_comments_unknown: + exclude_types = ("comment_or_empty", "unknown") + filtered_events = [ + event + for event in self.raw_events + if event.get("type") not in exclude_types + ] + else: + # Include all event types if not excluding + filtered_events = self.raw_events + + # Handle case where filtering leaves no events + if not filtered_events: + print("Warning: No valid events remain after filtering.") + self.processed_events_df = pd.DataFrame() # Store empty df + return self.processed_events_df + + # Define a preferred column order for better readability + # Include all potential columns generated by the parsers + derived columns + preferred_column_order = [ + "line_num", # Line number in the original log + "raw_line", # Original line content + "type", # Type of parsed line pattern + "trodes_timestamp", # trodes integer timestamp (ms since start) + "trodes_timestamp_sec", # trodes timestamp converted to seconds + "timestamp_sync", # Synchronized timestamp (if calculated) + "text", # Text part (from ts_str, ts_str_int, ts_str_equals_int) + "value", # Integer value after equals (from ts_str_int, ts_str_equals_int) + "active_DIO_inputs_bitmask", # DIO input bitmask (from ts_int_int) + "active_DIO_outputs_bitmask", # DIO output bitmask (from ts_int_int) + ] + + # Create DataFrame. Pandas handles missing columns gracefully. + df = pd.DataFrame(filtered_events).rename( + columns={ + "timestamp": "trodes_timestamp", + "value1": "active_DIO_inputs_bitmask", + "value2": "active_DIO_outputs_bitmask", + } + ) + df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply( + lambda mask: _interpret_DIO_mask(mask, max_DIOs) + ) + df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply( + lambda mask: _interpret_DIO_mask(mask, max_DIOs) + ) + + # --- Timestamp Processing --- + # Ensure 'timestamp' column exists and convert to numeric/int + if "trodes_timestamp" in df.columns: + # Coerce errors to NaN, fill NaN with 0, then convert to integer + df["trodes_timestamp"] = ( + pd.to_numeric(df["trodes_timestamp"], errors="coerce") + .fillna(pd.NA) + .astype(pd.Int64Dtype()) + ) + # Calculate timestamp in seconds + df["trodes_timestamp_sec"] = ( + df["trodes_timestamp"].astype(float) / self.MILLISECONDS_PER_SECOND + ) + else: + # Add empty columns if trodes_timestamp was missing (e.g., only comments) + print( + "Warning: 'trodes_timestamp' column not found in parsed data. Timestamp columns will be empty." + ) + df["trodes_timestamp"] = pd.NA + df["trodes_timestamp_sec"] = np.nan + + # Apply time offset if requested and available + if apply_offset: + if self.time_offset is not None: + if "trodes_timestamp_sec" in df.columns: + df["timestamp_sync"] = df["trodes_timestamp_sec"] + self.time_offset + else: + df["timestamp_sync"] = ( + np.nan + ) # Cannot calculate if trodes_timestamp_sec is missing + else: + # Warning if offset applied but not calculated + print( + "Warning: Time offset application requested, but offset has not " + "been calculated or was unsuccessful. 'timestamp_sync' column omitted." + ) + # Ensure the column doesn't exist if it wasn't created + if "timestamp_sync" in df.columns: + df = df.drop(columns=["timestamp_sync"]) + + # --- Data Type Consolidation --- + # Standardize types for common data columns if they exist + int_cols = [ + "value", + "active_DIO_inputs_bitmask", + "active_DIO_outputs_bitmask", + ] + text_cols = ["text"] + + for col in int_cols: + if col in df.columns: + # Convert to numeric (allowing NaNs), then use nullable Int64 type + df[col] = pd.to_numeric(df[col], errors="coerce").astype( + pd.Int64Dtype() + ) + + for col in text_cols: + if col in df.columns: + # Ensure text columns are object type (string) and + # fill potential float NaNs with pandas NA + df[col] = df[col].astype(str).replace("nan", pd.NA).astype("object") + + # Reorder columns according to preference, keeping only existing columns + existing_cols_in_order = [ + col for col in preferred_column_order if col in df.columns + ] + # Add any remaining columns not in the preferred list (e.g., from 'unknown' type) + other_cols = [col for col in df.columns if col not in existing_cols_in_order] + final_column_order = existing_cols_in_order + other_cols + df = df[final_column_order] + + # Store the final DataFrame and return it + self.processed_events_df = df.set_index("line_num") + return self.processed_events_df + + def get_events_by_type( + self, + apply_offset: bool = True, + exclude_comments_unknown: bool = True, + ) -> List[pd.DataFrame]: + """Groups the events in the DataFrame by their 'type' column. + This method first generates the DataFrame using `get_events_dataframe` + and then groups the events by their 'type' column. Each group is + returned as a separate DataFrame, excluding the 'type' column. + This allows for easy access to events of the same type for further + analysis or processing. + + Parameters + ---------- + apply_offset : bool, optional + If True (default), applies the time offset to the DataFrame. + If False, the DataFrame will contain raw timestamps. + exclude_comments_unknown : bool, optional + If True (default), excludes comment and unknown lines from the DataFrame. + If False, all lines are included, which may be useful for debugging. + Returns + ------- + List[pd.DataFrame] + A list of DataFrames, each corresponding to a unique event type. + Each DataFrame contains the events of that type, excluding the 'type' column. + """ + df = self.get_events_dataframe( + apply_offset=apply_offset, + exclude_comments_unknown=exclude_comments_unknown, + ) + return [group.drop(columns=["type"]) for _, group in df.groupby("type")] + + def segment_into_trials( + self, + trial_start_terms: List[str], + trial_end_terms: List[str], + time_column: str = "timestamp_sync", + ) -> List[Dict[str, Any]]: + """ + Segments events from a StateScript log DataFrame into trials. + + Parameters + ---------- + trial_start_terms : List[str] + List of strings found in the 'text' column that mark the start of a trial. + trial_end_terms : List[str] + List of strings found in the 'text' column that mark the end of a trial. + Can overlap with trial_start_terms. + time_column : str, optional + The name of the column to use for time ranges ('timestamp_sync' or + 'trodes_timestamp_sec'), by default 'timestamp_sync'. + + Returns + ------- + List[Dict[str, Any]] + A list where each dictionary represents a trial. Each trial dictionary + contains at least 'start_time' and 'end_time'. Further analysis + (like finding input/output changes within the trial) would typically + be done separately using these time ranges to filter events_df. + + Notes + ----- + - This implementation assumes trials are defined by text messages. + - It handles cases where start/end terms overlap. + """ + events_df = self.processed_events_df + if events_df is None: + print("Error: No processed events DataFrame available.") + return [] + + if "text" not in events_df.columns or time_column not in events_df.columns: + print(f"Error: DataFrame must contain 'text' and '{time_column}' columns.") + return [] + + trials = [] + current_trial_start_time = None + in_trial = False + + # Iterate through the DataFrame rows + for index, row in events_df.iterrows(): + message = row["text"] # Check the 'text' column + current_time = row[time_column] + + if pd.isna(message) or pd.isna(current_time): + continue # Skip rows with missing text or time + + found_end_term = any(term in message for term in trial_end_terms) + found_start_term = any(term in message for term in trial_start_terms) + + # --- End Trial Logic --- + # If we are currently in a trial and find an end term + if in_trial and found_end_term: + # Finalize the previous trial + trials.append( + { + "start_time": current_trial_start_time, + "end_time": current_time, + # Add trial index or other basic info if needed + } + ) + in_trial = False + current_trial_start_time = None # Reset start time + + # --- Start Trial Logic --- + # If we find a start term (potentially the same event as the end term) + if found_start_term: + # If we weren't in a trial, start a new one + if not in_trial: + in_trial = True + current_trial_start_time = current_time + # If we *were* already in a trial (e.g., two start terms back-to-back + # without an end term), you might choose to log a warning or + # implicitly end the previous one here and start a new one. + # This example restarts the trial timer. + else: + print( + f"Warning: Found start term '{message}' at {current_time} while already in a trial started at {current_trial_start_time}. Restarting trial." + ) + current_trial_start_time = current_time + + # Handle case where log ends while still in a trial + if in_trial: + print( + f"Warning: Log ended while still in a trial started at {current_trial_start_time}." + ) + # Optionally add the incomplete trial + trials.append( + { + "start_time": current_trial_start_time, + "end_time": events_df[time_column].iloc[-1], # Use last event time + "status": "incomplete", + } + ) + + return trials diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py new file mode 100644 index 0000000..f115cea --- /dev/null +++ b/src/trodes_to_nwb/tests/test_convert_statescript.py @@ -0,0 +1,544 @@ +import os +import pathlib +import tempfile + +import numpy as np +import pandas as pd +import pytest + +from trodes_to_nwb.convert_statescript import ( + StateScriptLogProcessor, + _parse_int, + parse_statescript_line, + parse_ts_int_int, + parse_ts_str, + parse_ts_str_equals_int, + parse_ts_str_int, +) + +# --- Fixtures --- + + +@pytest.fixture(scope="module") +def sample_log_content(): + """Provides sample log content for general testing.""" + return """# Test log started +76504 0 0 +76566 center_poke +76566 65536 0 +100078 counter_handlePoke = 1 +100078 4 0 +100559 0 0 +Executing this line without timestamp +115030 center_poke +115030 65536 0 +# Test log ended +""" + + +@pytest.fixture(scope="module") +def empty_log_content(): + """Provides empty log content.""" + return "" + + +@pytest.fixture(scope="module") +def comment_only_log_content(): + """Provides log content with only comments and whitespace.""" + return """# Start +# Middle line + +# End + """ + + +@pytest.fixture +def processor(sample_log_content): + """Provides a processor instance initialized with standard sample content.""" + return StateScriptLogProcessor(sample_log_content, source_info="from string") + + +@pytest.fixture +def empty_processor(empty_log_content): + """Provides a processor instance initialized with empty content.""" + return StateScriptLogProcessor(empty_log_content, source_info="empty string") + + +@pytest.fixture +def comment_only_processor(comment_only_log_content): + """Provides a processor instance initialized with only comments.""" + return StateScriptLogProcessor( + comment_only_log_content, source_info="comments only string" + ) + + +@pytest.fixture(scope="module") +def external_times(): + """Provides sample external times for offset calculation tests.""" + # These correspond roughly to the '65536 0' events in sample_log_content + # 76566 ms -> 76.566 s + # 115030 ms -> 115.030 s + # Let's assume a base time for the external system + base_time = 1678880000.0 + return np.array([base_time + 76.566, base_time + 115.030]) + + +@pytest.fixture +def temp_log_file(sample_log_content): + """Creates a temporary log file with standard content and yields its path.""" + with tempfile.NamedTemporaryFile( + mode="w", delete=False, suffix=".stateScriptLog", encoding="utf-8" + ) as tmp_file: + tmp_file.write(sample_log_content) + tmp_file_path = tmp_file.name + yield tmp_file_path + os.remove(tmp_file_path) + + +# --- Tests for Level 1 Parsers --- + + +def test_parse_int(): + """Test the _parse_int helper function.""" + assert _parse_int("123") == 123 + assert _parse_int("-45") == -45 + assert _parse_int("0") == 0 + assert _parse_int("abc") is None + assert _parse_int("12.3") is None + assert _parse_int("") is None + assert _parse_int("123 ") is None + + +def test_parse_ts_int_int(): + """Test parse_ts_int_int directly.""" + parts = ["8386500", "0", "0"] + expected = { + "type": "ts_int_int", + "trodes_timestamp": 8386500, + "value1": 0, + "value2": 0, + } + assert parse_ts_int_int(parts) == expected + + parts_wrong_len = ["123", "0"] + assert parse_ts_int_int(parts_wrong_len) is None + + parts_not_int = ["123", "abc", "0"] + assert parse_ts_int_int(parts_not_int) is None + + parts_float = ["123", "4.5", "0"] + assert parse_ts_int_int(parts_float) is None + + +def test_parse_ts_str_int(): + """Test parse_ts_str_int directly.""" + parts = ["8386500", "DOWN", "3"] + expected = { + "type": "ts_str_int", + "trodes_timestamp": 8386500, + "text": "DOWN", + "value": 3, + } + assert parse_ts_str_int(parts) == expected + + parts_wrong_len = ["123", "UP"] + assert parse_ts_str_int(parts_wrong_len) is None + + parts_str_is_int = ["123", "456", "789"] + assert parse_ts_str_int(parts_str_is_int) is None # Should be handled by ts_int_int + + parts_val_not_int = ["123", "UP", "abc"] + assert parse_ts_str_int(parts_val_not_int) is None + + +def test_parse_ts_str_equals_int(): + """Test parse_ts_str_equals_int directly.""" + parts = ["100078", "counter_handlePoke", "=", "1"] + expected = { + "type": "ts_str_equals_int", + "trodes_timestamp": 100078, + "text": "counter_handlePoke", + "value": 1, + } + assert parse_ts_str_equals_int(parts) == expected + + parts_multi_word = ["3610855", "total", "rewards", "=", "70"] + expected_multi = { + "type": "ts_str_equals_int", + "trodes_timestamp": 3610855, + "text": "total rewards", + "value": 70, + } + assert parse_ts_str_equals_int(parts_multi_word) == expected_multi + + parts_wrong_len = ["123", "=", "1"] + assert parse_ts_str_equals_int(parts_wrong_len) is None + + parts_no_equals = ["123", "text", "1"] + assert parse_ts_str_equals_int(parts_no_equals) is None + + parts_val_not_int = ["123", "text", "=", "abc"] + assert parse_ts_str_equals_int(parts_val_not_int) is None + + +def test_parse_ts_str(): + """Test parse_ts_str directly.""" + parts = ["76566", "center_poke"] + expected = { + "type": "ts_str", + "trodes_timestamp": 76566, + "text": "center_poke", + } + assert parse_ts_str(parts) == expected + + parts_multi_word = ["1271815", "some", "multi", "word", "event"] + expected_multi = { + "type": "ts_str", + "trodes_timestamp": 1271815, + "text": "some multi word event", + } + assert parse_ts_str(parts_multi_word) == expected_multi + + parts_wrong_len = ["123"] + assert parse_ts_str(parts_wrong_len) is None + + parts_second_is_int = [ + "123", + "456", + ] # Second part is int, should fail this parser + assert parse_ts_str(parts_second_is_int) is None + + +# --- Tests for parse_statescript_line (Covers integration and dispatching) --- + + +def test_parse_statescript_line_dispatching(): + """Test parse_statescript_line dispatching for various line types.""" + lines_expected_types = [ + ("8386500 0 0", "ts_int_int"), + ("8386500 DOWN 3", "ts_str_int"), + ("100078 counter_handlePoke = 1", "ts_str_equals_int"), + ("76566 center_poke", "ts_str"), + ("Executing trigger function 22", "unknown"), + ("# comment", "comment_or_empty"), + ("", "comment_or_empty"), + (" ", "comment_or_empty"), + ("123 456 abc", "unknown"), # Doesn't fit ts_int_int because of 'abc' + ("123 abc def", "ts_str"), # Fits ts_str + ("456 123 = 5", "ts_str_equals_int"), # Fits this specific pattern + ] + + for line, expected_type in lines_expected_types: + parsed = parse_statescript_line(line) + assert parsed["type"] == expected_type + assert parsed["raw_line"] == line.strip() # parse_statescript_line strips + if expected_type not in ["unknown", "comment_or_empty"]: + assert "trodes_timestamp" in parsed + else: + assert "trodes_timestamp" not in parsed or pd.isna( + parsed.get("trodes_timestamp") + ) + + +# --- Tests for StateScriptLogProcessor --- + + +def test_init_from_string(processor, sample_log_content): + """Test initialization from string.""" + assert processor.log_content == sample_log_content + assert processor.source_description == "from string" + assert processor.raw_events == [] + assert processor.time_offset is None + assert processor.processed_events_df is None + + +def test_init_from_file(temp_log_file, sample_log_content): + """Test initialization from a file.""" + processor_file = StateScriptLogProcessor.from_file(temp_log_file) + assert processor_file.log_content == sample_log_content + assert processor_file.source_description.startswith("from file:") + assert pathlib.Path(temp_log_file).name in processor_file.source_description + + +def test_init_from_file_not_found(): + """Test initialization from a non-existent file raises error.""" + with pytest.raises(FileNotFoundError): + StateScriptLogProcessor.from_file("non_existent_file_qwerty.log") + + +def test_parse_raw_events(processor, sample_log_content): + """Test parsing the raw log content into events.""" + events = processor.parse_raw_events() + assert processor.raw_events is events # Should store result internally + assert isinstance(events, list) + assert len(events) == len( + sample_log_content.strip().splitlines() + ) # One dict per line + assert events[0]["type"] == "comment_or_empty" + assert events[1]["type"] == "ts_int_int" + assert events[7]["type"] == "unknown" # "Executing this line..." + assert events[9]["type"] == "comment_or_empty" # Last comment + assert events[1]["raw_line"] == "76504 0 0" + assert events[7]["raw_line"] == "Executing this line without timestamp" + + +def test_find_reference_events(processor): + """Test the internal _find_reference_events method.""" + # Case 1: Find 'ts_str' events + ref_df_str = processor._find_reference_events( + event_type="ts_str", conditions={"text": "center_poke"} + ) + assert isinstance(ref_df_str, pd.DataFrame) + assert len(ref_df_str) == 2 + pd.testing.assert_series_equal( + ref_df_str["trodes_timestamp"], + pd.Series([76566, 115030], name="trodes_timestamp"), + check_dtype=False, + ) + assert "trodes_timestamp_sec" in ref_df_str.columns + assert ref_df_str["trodes_timestamp_sec"].iloc[0] == pytest.approx(76.566) + + # Case 2: Find 'ts_int_int' events with specific values + ref_df_int = processor._find_reference_events( + event_type="ts_int_int", conditions={"value1": 4, "value2": 0} + ) + assert len(ref_df_int) == 1 + assert ref_df_int["trodes_timestamp"].iloc[0] == 100078 + + # Case 3: No matching events found + ref_df_none = processor._find_reference_events( + event_type="ts_str_int", conditions={"text": "nonexistent"} + ) + assert ref_df_none.empty + assert isinstance(ref_df_none, pd.DataFrame) # Should still return DF + + # Case 4: Ensure processor parses if raw_events is empty + processor.raw_events = [] + ref_df_reparse = processor._find_reference_events( + event_type="ts_str", conditions={"text": "center_poke"} + ) + assert len(ref_df_reparse) == 2 # Should re-parse automatically + + +def test_calculate_time_offset_success(processor, external_times): + """Test successful time offset calculation.""" + offset = processor.calculate_time_offset( + external_reference_times=external_times, + log_event_type="ts_int_int", # Use the events corresponding to external_times + log_event_conditions={"value1": 65536, "value2": 0}, + check_n_events=2, # Use both events for matching + ) + assert offset is not None + assert processor.time_offset == offset # Check internal storage + # Expected offset = external_base_time = 1678880000.0 + # external_times[0] = base + 76.566; log_times[0] = 76.566 + assert offset == pytest.approx(1678880000.0) + + +def test_calculate_time_offset_fail_not_enough_log(processor, external_times): + """Test offset calculation failure due to insufficient log events.""" + # 'counter_handlePoke' only appears once, need 2 events + offset = processor.calculate_time_offset( + external_reference_times=external_times, + log_event_type="ts_str_equals_int", + log_event_conditions={"text": "counter_handlePoke"}, + check_n_events=2, + ) + assert offset is None + assert processor.time_offset is None # Should remain None + + +def test_calculate_time_offset_fail_not_enough_external(processor): + """Test offset calculation failure due to insufficient external times.""" + # Only one external time provided, need 2 events + offset = processor.calculate_time_offset( + external_reference_times=np.array([1678880076.566]), + log_event_type="ts_int_int", + log_event_conditions={"value1": 65536, "value2": 0}, + check_n_events=2, + ) + assert offset is None + assert processor.time_offset is None + + +def test_calculate_time_offset_fail_mismatch(processor, external_times): + """Test offset calculation failure due to exceeding mismatch threshold.""" + # Shift external times slightly more than default threshold (0.1) + shifted_external_times = external_times + 0.06 # Total shift 0.12 over 2 events + offset = processor.calculate_time_offset( + external_reference_times=shifted_external_times, + log_event_type="ts_int_int", + log_event_conditions={"value1": 65536, "value2": 0}, + check_n_events=2, + match_threshold=0.1, # Default threshold + ) + assert offset is None + assert processor.time_offset is None + + +def test_get_events_dataframe_defaults(processor): + """Test default behavior: exclude comments/unknown, no offset applied yet.""" + df = processor.get_events_dataframe(apply_offset=False) + assert processor.processed_events_df is df # Check internal storage + assert isinstance(df, pd.DataFrame) + # Expected: 11 lines total - 3 comments - 1 unknown = 7 valid events + assert len(df) == 7 + assert "raw_line" in df.columns + assert "trodes_timestamp" in df.columns + assert "trodes_timestamp_sec" in df.columns + assert "timestamp_sync" not in df.columns # Offset not applied + # Check content and types + assert df["type"].iloc[0] == "ts_int_int" + assert df["raw_line"].iloc[0] == "76504 0 0" + assert pd.isna(df["text"].iloc[0]) # text NA for ts_int_int + assert df["value1"].iloc[0] == 0 + assert df["trodes_timestamp"].dtype == "int64" + assert df["trodes_timestamp_sec"].dtype == "float64" + assert df["value"].dtype == pd.Int64Dtype() # Nullable Integer + + +def test_get_events_dataframe_include_all(processor): + """Test including comments and unknown lines.""" + df = processor.get_events_dataframe( + apply_offset=False, exclude_comments_unknown=False + ) + assert isinstance(df, pd.DataFrame) + assert len(df) == 10 # All lines included + assert df["type"].iloc[0] == "comment_or_empty" + assert df["type"].iloc[7] == "unknown" + assert df["raw_line"].iloc[7] == "Executing this line without timestamp" + # Check that timestamp is NA/0 for lines without one + assert ( + pd.isna(df["trodes_timestamp"].iloc[0]) or df["trodes_timestamp"].iloc[0] == 0 + ) + assert ( + pd.isna(df["trodes_timestamp"].iloc[7]) or df["trodes_timestamp"].iloc[7] == 0 + ) + assert pd.isna(df["trodes_timestamp_sec"].iloc[0]) or np.isnan( + df["trodes_timestamp_sec"].iloc[0] + ) + assert pd.isna(df["trodes_timestamp_sec"].iloc[7]) or np.isnan( + df["trodes_timestamp_sec"].iloc[7] + ) + + +def test_get_events_dataframe_with_offset(processor): + """Test applying offset and check sync timestamp calculation.""" + # Simulate successful offset calculation + processor.time_offset = 1678880000.0 + df = processor.get_events_dataframe(apply_offset=True) # Default exclude=True + assert isinstance(df, pd.DataFrame) + assert len(df) == 7 # Excludes comments/unknown + assert "timestamp_sync" in df.columns + # Check calculation for the first valid event (76504 ms) + expected_sync_time = (76504 / 1000.0) + 1678880000.0 + assert df["timestamp_sync"].iloc[0] == pytest.approx(expected_sync_time) + # Check NA value handling in other columns remains correct + assert pd.isna(df["text"].iloc[0]) + assert df["value1"].iloc[0] == 0 + assert df["timestamp_sync"].dtype == "float64" + + +def test_get_events_dataframe_offset_not_calculated(processor, capsys): + """Test applying offset when offset is None.""" + processor.time_offset = None # Ensure no offset is set + df = processor.get_events_dataframe(apply_offset=True) + assert isinstance(df, pd.DataFrame) + assert "timestamp_sync" not in df.columns # Sync column should be absent + assert len(df) == 7 # Should still return the dataframe without the column + + # Check that the warning was printed to stderr/stdout + captured = capsys.readouterr() + assert ( + "Warning: Time offset requested but not calculated" in captured.out + or "Warning: Time offset requested but not calculated" in captured.err + ) + + +def test_empty_log(empty_processor): + """Test processing an empty log file.""" + events = empty_processor.parse_raw_events() + assert events == [] + df = empty_processor.get_events_dataframe() + assert isinstance(df, pd.DataFrame) + assert df.empty + + +def test_comment_only_log(comment_only_processor): + """Test processing a log file with only comments/whitespace.""" + events = comment_only_processor.parse_raw_events() + assert len(events) == 4 # 4 lines in the fixture + assert all(e["type"] == "comment_or_empty" for e in events) + + # Default: exclude comments -> empty DataFrame + df_excluded = comment_only_processor.get_events_dataframe(apply_offset=False) + assert isinstance(df_excluded, pd.DataFrame) + assert df_excluded.empty + + # Include comments -> DataFrame with only comment entries + df_included = comment_only_processor.get_events_dataframe( + apply_offset=False, exclude_comments_unknown=False + ) + assert isinstance(df_included, pd.DataFrame) + assert len(df_included) == 4 + assert all(df_included["type"] == "comment_or_empty") + assert ( + pd.isna(df_included["trodes_timestamp"].iloc[0]) + or df_included["trodes_timestamp"].iloc[0] == 0 + ) + + +def test_repr(processor): + """Test the __repr__ method.""" + # Initial state + initial_repr = repr(processor) + assert isinstance(initial_repr, str) + assert "StateScriptLogProcessor" in initial_repr + assert "not parsed" in initial_repr + assert "no offset" in initial_repr + assert "not generated" in initial_repr + + # After parsing + processor.parse_raw_events() + parsed_repr = repr(processor) + assert "parsed" in parsed_repr + assert f"raw_events={len(processor.raw_events)}" in parsed_repr + assert "no offset" in parsed_repr + assert "not generated" in parsed_repr + + # After offset calculation + processor.time_offset = 1000.0 + offset_repr = repr(processor) + assert "offset=1000.0" in offset_repr + assert "not generated" in offset_repr + + # After DataFrame generation + processor.get_events_dataframe() + df_repr = repr(processor) + assert "DataFrame generated" in df_repr + + +def test_repr_html(processor): + """Test the _repr_html_ method.""" + # Check it runs without error in different states and returns string + html_initial = processor._repr_html_() + assert isinstance(html_initial, str) + assert "StateScriptLogProcessor" in html_initial + assert "Not Parsed" in html_initial + + processor.parse_raw_events() + html_parsed = processor._repr_html_() + assert isinstance(html_parsed, str) + assert "Parsed" in html_parsed + assert f"({len(processor.raw_events)} raw entries)" in html_parsed + + processor.time_offset = 1000.0 + html_offset = processor._repr_html_() + assert isinstance(html_offset, str) + assert "Offset: 1000.0" in html_offset + + processor.get_events_dataframe() + html_df = processor._repr_html_() + assert isinstance(html_df, str) + assert "DataFrame: Generated" in html_df + assert "DataFrame Preview" in html_df # Check for preview section From ca7d70a5660118999b711494df65f95f9225d61d Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 11:52:24 -0400 Subject: [PATCH 02/23] Add notebook --- notebooks/test_statescript_parsing.ipynb | 4464 ++++++++++++++++++++++ 1 file changed, 4464 insertions(+) create mode 100644 notebooks/test_statescript_parsing.ipynb diff --git a/notebooks/test_statescript_parsing.ipynb b/notebooks/test_statescript_parsing.ipynb new file mode 100644 index 0000000..5c9f27c --- /dev/null +++ b/notebooks/test_statescript_parsing.ipynb @@ -0,0 +1,4464 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "46fbf114", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[09:42:05][WARNING] Spyglass: Failed to load SpyglassConfig. Please set up config file.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Log content yielded no raw events.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n", + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
270648028 UP 2ts_str_int648028648.028UP2<NA><NA>[][]
271648028 2 0ts_int_int648028648.028<NA><NA>20[2][]
290648083 lastPort = -1 to currPort = 1ts_str648083648.083lastPort = -1 to currPort = 1<NA><NA><NA>[][]
292658285 DOWN 2ts_str_int658285658.285DOWN2<NA><NA>[][]
293658285 0 0ts_int_int658285658.285<NA><NA>00[][]
.................................
90983925934 8 0ts_int_int39259343925.934<NA><NA>80[4][]
90993926021 DOWN 4ts_str_int39260213926.021DOWN4<NA><NA>[][]
91003926021 0 0ts_int_int39260213926.021<NA><NA>00[][]
91013926086 UP 4ts_str_int39260863926.086UP4<NA><NA>[][]
91023926086 8 0ts_int_int39260863926.086<NA><NA>80[4][]
\n", + "

6241 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp \\\n", + "line_num \n", + "270 648028 UP 2 ts_str_int 648028 \n", + "271 648028 2 0 ts_int_int 648028 \n", + "290 648083 lastPort = -1 to currPort = 1 ts_str 648083 \n", + "292 658285 DOWN 2 ts_str_int 658285 \n", + "293 658285 0 0 ts_int_int 658285 \n", + "... ... ... ... \n", + "9098 3925934 8 0 ts_int_int 3925934 \n", + "9099 3926021 DOWN 4 ts_str_int 3926021 \n", + "9100 3926021 0 0 ts_int_int 3926021 \n", + "9101 3926086 UP 4 ts_str_int 3926086 \n", + "9102 3926086 8 0 ts_int_int 3926086 \n", + "\n", + " trodes_timestamp_sec text value \\\n", + "line_num \n", + "270 648.028 UP 2 \n", + "271 648.028 \n", + "290 648.083 lastPort = -1 to currPort = 1 \n", + "292 658.285 DOWN 2 \n", + "293 658.285 \n", + "... ... ... ... \n", + "9098 3925.934 \n", + "9099 3926.021 DOWN 4 \n", + "9100 3926.021 \n", + "9101 3926.086 UP 4 \n", + "9102 3926.086 \n", + "\n", + " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "270 \n", + "271 2 0 \n", + "290 \n", + "292 \n", + "293 0 0 \n", + "... ... ... \n", + "9098 8 0 \n", + "9099 \n", + "9100 0 0 \n", + "9101 \n", + "9102 8 0 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "270 [] [] \n", + "271 [2] [] \n", + "290 [] [] \n", + "292 [] [] \n", + "293 [] [] \n", + "... ... ... \n", + "9098 [4] [] \n", + "9099 [] [] \n", + "9100 [] [] \n", + "9101 [] [] \n", + "9102 [4] [] \n", + "\n", + "[6241 rows x 10 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from spyglass.utils.statescript import StateScriptLogProcessor\n", + "\n", + "import pathlib\n", + "\n", + "search_dir = pathlib.Path(\"/Users/edeno/Downloads/\")\n", + "log_files_generator = search_dir.glob(\"*.stateScriptLog\")\n", + "\n", + "statescript_dfs = [\n", + " StateScriptLogProcessor.from_file(file_path).get_events_dataframe()\n", + " for file_path in log_files_generator\n", + "]\n", + "statescript_dfs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "35bc8caf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
0#<Hexmaze_NoSequence.sc>comment_or_empty<NA>NaNNaN<NA><NA><NA>[][]
1#% author: XScomment_or_empty<NA>NaNNaN<NA><NA><NA>[][]
2#% date: 20231224; added a reward indicator fo...comment_or_empty<NA>NaNNaN<NA><NA><NA>[][]
3#comment_or_empty<NA>NaNNaN<NA><NA><NA>[][]
4#%initialize constant varscomment_or_empty<NA>NaNNaN<NA><NA><NA>[][]
.................................
37629~~~unknown<NA>NaNNaN<NA><NA><NA>[][]
37630Executing trigger function 22unknown<NA>NaNNaN<NA><NA><NA>[][]
37631Executing trigger function 22unknown<NA>NaNNaN<NA><NA><NA>[][]
37632Executing trigger function 22unknown<NA>NaNNaN<NA><NA><NA>[][]
37633Executing trigger function 22unknown<NA>NaNNaN<NA><NA><NA>[][]
\n", + "

37634 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type \\\n", + "line_num \n", + "0 # comment_or_empty \n", + "1 #% author: XS comment_or_empty \n", + "2 #% date: 20231224; added a reward indicator fo... comment_or_empty \n", + "3 # comment_or_empty \n", + "4 #%initialize constant vars comment_or_empty \n", + "... ... ... \n", + "37629 ~~~ unknown \n", + "37630 Executing trigger function 22 unknown \n", + "37631 Executing trigger function 22 unknown \n", + "37632 Executing trigger function 22 unknown \n", + "37633 Executing trigger function 22 unknown \n", + "\n", + " trodes_timestamp trodes_timestamp_sec text value \\\n", + "line_num \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... ... ... \n", + "37629 NaN NaN \n", + "37630 NaN NaN \n", + "37631 NaN NaN \n", + "37632 NaN NaN \n", + "37633 NaN NaN \n", + "\n", + " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... \n", + "37629 \n", + "37630 \n", + "37631 \n", + "37632 \n", + "37633 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "0 [] [] \n", + "1 [] [] \n", + "2 [] [] \n", + "3 [] [] \n", + "4 [] [] \n", + "... ... ... \n", + "37629 [] [] \n", + "37630 [] [] \n", + "37631 [] [] \n", + "37632 [] [] \n", + "37633 [] [] \n", + "\n", + "[37634 rows x 10 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "StateScriptLogProcessor.from_file(\n", + " \"/Users/edeno/Downloads/20240513_BraveLu_03_r2.stateScriptLog\"\n", + ").get_events_dataframe(exclude_comments_unknown=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8a3daa1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
165173027 DOWN 1ts_str_int173027173.027DOWN1<NA><NA>[][]
166173027 0 0ts_int_int173027173.027<NA><NA>00[][]
167173050 UP 1ts_str_int173050173.050UP1<NA><NA>[][]
168173050 1 0ts_int_int173050173.050<NA><NA>10[1][]
169173658 DOWN 1ts_str_int173658173.658DOWN1<NA><NA>[][]
.................................
35681449843 DOWN 1ts_str_int14498431449.843DOWN1<NA><NA>[][]
35691449843 0 8ts_int_int14498431449.843<NA><NA>08[][4]
35701450010 UP 1ts_str_int14500101450.010UP1<NA><NA>[][]
35711450010 1 8ts_int_int14500101450.010<NA><NA>18[1][4]
35721450078 1 0ts_int_int14500781450.078<NA><NA>10[1][]
\n", + "

2828 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "165 173027 DOWN 1 ts_str_int 173027 173.027 \n", + "166 173027 0 0 ts_int_int 173027 173.027 \n", + "167 173050 UP 1 ts_str_int 173050 173.050 \n", + "168 173050 1 0 ts_int_int 173050 173.050 \n", + "169 173658 DOWN 1 ts_str_int 173658 173.658 \n", + "... ... ... ... ... \n", + "3568 1449843 DOWN 1 ts_str_int 1449843 1449.843 \n", + "3569 1449843 0 8 ts_int_int 1449843 1449.843 \n", + "3570 1450010 UP 1 ts_str_int 1450010 1450.010 \n", + "3571 1450010 1 8 ts_int_int 1450010 1450.010 \n", + "3572 1450078 1 0 ts_int_int 1450078 1450.078 \n", + "\n", + " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "165 DOWN 1 \n", + "166 0 0 \n", + "167 UP 1 \n", + "168 1 0 \n", + "169 DOWN 1 \n", + "... ... ... ... ... \n", + "3568 DOWN 1 \n", + "3569 0 8 \n", + "3570 UP 1 \n", + "3571 1 8 \n", + "3572 1 0 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "165 [] [] \n", + "166 [] [] \n", + "167 [] [] \n", + "168 [1] [] \n", + "169 [] [] \n", + "... ... ... \n", + "3568 [] [] \n", + "3569 [] [4] \n", + "3570 [] [] \n", + "3571 [1] [4] \n", + "3572 [1] [] \n", + "\n", + "[2828 rows x 10 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "06e49235", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
83364241 UP 9ts_str_int364241364.241UP9<NA><NA>[][]
84364241 256 256ts_int_int364241364.241<NA><NA>256256[9][9]
87364269 outer rewardts_str364269364.269outer reward<NA><NA><NA>[][]
89364269 256 2304ts_int_int364269364.269<NA><NA>2562304[9][9, 12]
94364669 256 256ts_int_int364669364.669<NA><NA>256256[9][9]
.................................
74481991064 contentTrialCount = 75ts_str_equals_int19910641991.0647575<NA><NA>[][]
74491991064 contentReward = 75ts_str_equals_int19910641991.0647575<NA><NA>[][]
74501991065 contentOuterCount = 1ts_str_equals_int19910651991.06511<NA><NA>[][]
74521991066 CURRENTGOAL IS 13 TASK_STATE IS 4ts_str19910661991.066CURRENTGOAL IS 13 TASK_STATE IS 4<NA><NA><NA>[][]
74541991331 64 0ts_int_int19913311991.331<NA><NA>640[7][]
\n", + "

5953 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type \\\n", + "line_num \n", + "83 364241 UP 9 ts_str_int \n", + "84 364241 256 256 ts_int_int \n", + "87 364269 outer reward ts_str \n", + "89 364269 256 2304 ts_int_int \n", + "94 364669 256 256 ts_int_int \n", + "... ... ... \n", + "7448 1991064 contentTrialCount = 75 ts_str_equals_int \n", + "7449 1991064 contentReward = 75 ts_str_equals_int \n", + "7450 1991065 contentOuterCount = 1 ts_str_equals_int \n", + "7452 1991066 CURRENTGOAL IS 13 TASK_STATE IS 4 ts_str \n", + "7454 1991331 64 0 ts_int_int \n", + "\n", + " trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "83 364241 364.241 \n", + "84 364241 364.241 \n", + "87 364269 364.269 \n", + "89 364269 364.269 \n", + "94 364669 364.669 \n", + "... ... ... \n", + "7448 1991064 1991.064 \n", + "7449 1991064 1991.064 \n", + "7450 1991065 1991.065 \n", + "7452 1991066 1991.066 \n", + "7454 1991331 1991.331 \n", + "\n", + " text value active_DIO_inputs_bitmask \\\n", + "line_num \n", + "83 UP 9 \n", + "84 256 \n", + "87 outer reward \n", + "89 256 \n", + "94 256 \n", + "... ... ... ... \n", + "7448 75 75 \n", + "7449 75 75 \n", + "7450 1 1 \n", + "7452 CURRENTGOAL IS 13 TASK_STATE IS 4 \n", + "7454 64 \n", + "\n", + " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "83 [] [] \n", + "84 256 [9] [9] \n", + "87 [] [] \n", + "89 2304 [9] [9, 12] \n", + "94 256 [9] [9] \n", + "... ... ... ... \n", + "7448 [] [] \n", + "7449 [] [] \n", + "7450 [] [] \n", + "7452 [] [] \n", + "7454 0 [7] [] \n", + "\n", + "[5953 rows x 10 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6e663e37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
288322450 UP 4ts_str_int322450322.450UP4<NA><NA>[][]
289322450 8 0ts_int_int322450322.450<NA><NA>80[4][]
310322500 8 262144ts_int_int322500322.500<NA><NA>8262144[4][19]
315322510 lastPort = -1 to currPort = 2ts_str322510322.510lastPort = -1 to currPort = 2<NA><NA><NA>[][]
318322634 8 262208ts_int_int322634322.634<NA><NA>8262208[4][7, 19]
.................................
376243357820 0 0ts_int_int33578203357.820<NA><NA>00[][]
376253357823 0 64ts_int_int33578233357.823<NA><NA>064[][7]
376263357825 0 0ts_int_int33578253357.825<NA><NA>00[][]
376273358882 RESETSTIMts_str33588823358.882RESETSTIM<NA><NA><NA>[][]
376283358882 ifDelay = 1ts_str_equals_int33588823358.88211<NA><NA>[][]
\n", + "

34144 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type \\\n", + "line_num \n", + "288 322450 UP 4 ts_str_int \n", + "289 322450 8 0 ts_int_int \n", + "310 322500 8 262144 ts_int_int \n", + "315 322510 lastPort = -1 to currPort = 2 ts_str \n", + "318 322634 8 262208 ts_int_int \n", + "... ... ... \n", + "37624 3357820 0 0 ts_int_int \n", + "37625 3357823 0 64 ts_int_int \n", + "37626 3357825 0 0 ts_int_int \n", + "37627 3358882 RESETSTIM ts_str \n", + "37628 3358882 ifDelay = 1 ts_str_equals_int \n", + "\n", + " trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "288 322450 322.450 \n", + "289 322450 322.450 \n", + "310 322500 322.500 \n", + "315 322510 322.510 \n", + "318 322634 322.634 \n", + "... ... ... \n", + "37624 3357820 3357.820 \n", + "37625 3357823 3357.823 \n", + "37626 3357825 3357.825 \n", + "37627 3358882 3358.882 \n", + "37628 3358882 3358.882 \n", + "\n", + " text value active_DIO_inputs_bitmask \\\n", + "line_num \n", + "288 UP 4 \n", + "289 8 \n", + "310 8 \n", + "315 lastPort = -1 to currPort = 2 \n", + "318 8 \n", + "... ... ... ... \n", + "37624 0 \n", + "37625 0 \n", + "37626 0 \n", + "37627 RESETSTIM \n", + "37628 1 1 \n", + "\n", + " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "288 [] [] \n", + "289 0 [4] [] \n", + "310 262144 [4] [19] \n", + "315 [] [] \n", + "318 262208 [4] [7, 19] \n", + "... ... ... ... \n", + "37624 0 [] [] \n", + "37625 64 [] [7] \n", + "37626 0 [] [] \n", + "37627 [] [] \n", + "37628 [] [] \n", + "\n", + "[34144 rows x 10 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "de34f501", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_secactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
2874236600 1 0ts_int_int42366004236.60010[1][]
2884239693 0 0ts_int_int42396934239.69300[][]
2894242288 1 0ts_int_int42422884242.28810[1][]
2904242749 0 0ts_int_int42427494242.74900[][]
2914243151 1 0ts_int_int42431514243.15110[1][]
2924244648 0 0ts_int_int42446484244.64800[][]
2934313683 1 0ts_int_int43136834313.68310[1][]
2944314756 0 0ts_int_int43147564314.75600[][]
2954660546 1 0ts_int_int46605464660.54610[1][]
2964661064 0 0ts_int_int46610644661.06400[][]
2974661360 1 0ts_int_int46613604661.36010[1][]
2984661565 0 0ts_int_int46615654661.56500[][]
2994666057 8 0ts_int_int46660574666.05780[4][]
3004666539 0 0ts_int_int46665394666.53900[][]
3014667439 8 0ts_int_int46674394667.43980[4][]
3024668457 0 0ts_int_int46684574668.45700[][]
3034669200 8 0ts_int_int46692004669.20080[4][]
3044669481 0 0ts_int_int46694814669.48100[][]
3054675049 2 0ts_int_int46750494675.04920[2][]
3064675275 0 0ts_int_int46752754675.27500[][]
3074675302 2 0ts_int_int46753024675.30220[2][]
3084675318 0 0ts_int_int46753184675.31800[][]
3094675615 2 0ts_int_int46756154675.61520[2][]
3104676096 0 0ts_int_int46760964676.09600[][]
\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "287 4236600 1 0 ts_int_int 4236600 4236.600 \n", + "288 4239693 0 0 ts_int_int 4239693 4239.693 \n", + "289 4242288 1 0 ts_int_int 4242288 4242.288 \n", + "290 4242749 0 0 ts_int_int 4242749 4242.749 \n", + "291 4243151 1 0 ts_int_int 4243151 4243.151 \n", + "292 4244648 0 0 ts_int_int 4244648 4244.648 \n", + "293 4313683 1 0 ts_int_int 4313683 4313.683 \n", + "294 4314756 0 0 ts_int_int 4314756 4314.756 \n", + "295 4660546 1 0 ts_int_int 4660546 4660.546 \n", + "296 4661064 0 0 ts_int_int 4661064 4661.064 \n", + "297 4661360 1 0 ts_int_int 4661360 4661.360 \n", + "298 4661565 0 0 ts_int_int 4661565 4661.565 \n", + "299 4666057 8 0 ts_int_int 4666057 4666.057 \n", + "300 4666539 0 0 ts_int_int 4666539 4666.539 \n", + "301 4667439 8 0 ts_int_int 4667439 4667.439 \n", + "302 4668457 0 0 ts_int_int 4668457 4668.457 \n", + "303 4669200 8 0 ts_int_int 4669200 4669.200 \n", + "304 4669481 0 0 ts_int_int 4669481 4669.481 \n", + "305 4675049 2 0 ts_int_int 4675049 4675.049 \n", + "306 4675275 0 0 ts_int_int 4675275 4675.275 \n", + "307 4675302 2 0 ts_int_int 4675302 4675.302 \n", + "308 4675318 0 0 ts_int_int 4675318 4675.318 \n", + "309 4675615 2 0 ts_int_int 4675615 4675.615 \n", + "310 4676096 0 0 ts_int_int 4676096 4676.096 \n", + "\n", + " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "287 1 0 \n", + "288 0 0 \n", + "289 1 0 \n", + "290 0 0 \n", + "291 1 0 \n", + "292 0 0 \n", + "293 1 0 \n", + "294 0 0 \n", + "295 1 0 \n", + "296 0 0 \n", + "297 1 0 \n", + "298 0 0 \n", + "299 8 0 \n", + "300 0 0 \n", + "301 8 0 \n", + "302 0 0 \n", + "303 8 0 \n", + "304 0 0 \n", + "305 2 0 \n", + "306 0 0 \n", + "307 2 0 \n", + "308 0 0 \n", + "309 2 0 \n", + "310 0 0 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "287 [1] [] \n", + "288 [] [] \n", + "289 [1] [] \n", + "290 [] [] \n", + "291 [1] [] \n", + "292 [] [] \n", + "293 [1] [] \n", + "294 [] [] \n", + "295 [1] [] \n", + "296 [] [] \n", + "297 [1] [] \n", + "298 [] [] \n", + "299 [4] [] \n", + "300 [] [] \n", + "301 [4] [] \n", + "302 [] [] \n", + "303 [4] [] \n", + "304 [] [] \n", + "305 [2] [] \n", + "306 [] [] \n", + "307 [2] [] \n", + "308 [] [] \n", + "309 [2] [] \n", + "310 [] [] " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "27c9f114", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
803853607 0 64ts_int_int38536073853.607NaN<NA>064[][7]
813853630 128 64ts_int_int38536303853.630NaN<NA>12864[8][7]
823853785 0 64ts_int_int38537853853.785NaN<NA>064[][7]
833853796 128 64ts_int_int38537963853.796NaN<NA>12864[8][7]
843854144 0 64ts_int_int38541443854.144NaN<NA>064[][7]
.................................
66845023394 64 0ts_int_int50233945023.394NaN<NA>640[7][]
66855026015 0 0ts_int_int50260155026.015NaN<NA>00[][]
66865026079 64 0ts_int_int50260795026.079NaN<NA>640[7][]
66875026170 0 0ts_int_int50261705026.170NaN<NA>00[][]
66885026201 64 0ts_int_int50262015026.201NaN<NA>640[7][]
\n", + "

6418 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "80 3853607 0 64 ts_int_int 3853607 3853.607 \n", + "81 3853630 128 64 ts_int_int 3853630 3853.630 \n", + "82 3853785 0 64 ts_int_int 3853785 3853.785 \n", + "83 3853796 128 64 ts_int_int 3853796 3853.796 \n", + "84 3854144 0 64 ts_int_int 3854144 3854.144 \n", + "... ... ... ... ... \n", + "6684 5023394 64 0 ts_int_int 5023394 5023.394 \n", + "6685 5026015 0 0 ts_int_int 5026015 5026.015 \n", + "6686 5026079 64 0 ts_int_int 5026079 5026.079 \n", + "6687 5026170 0 0 ts_int_int 5026170 5026.170 \n", + "6688 5026201 64 0 ts_int_int 5026201 5026.201 \n", + "\n", + " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "80 NaN 0 64 \n", + "81 NaN 128 64 \n", + "82 NaN 0 64 \n", + "83 NaN 128 64 \n", + "84 NaN 0 64 \n", + "... ... ... ... ... \n", + "6684 NaN 64 0 \n", + "6685 NaN 0 0 \n", + "6686 NaN 64 0 \n", + "6687 NaN 0 0 \n", + "6688 NaN 64 0 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "80 [] [7] \n", + "81 [8] [7] \n", + "82 [] [7] \n", + "83 [8] [7] \n", + "84 [] [7] \n", + "... ... ... \n", + "6684 [7] [] \n", + "6685 [] [] \n", + "6686 [7] [] \n", + "6687 [] [] \n", + "6688 [7] [] \n", + "\n", + "[6418 rows x 10 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[5]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "860793dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_secactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
1108023 0 131072ts_int_int108023108.0230131072[][18]
2108024 0 0ts_int_int108024108.02400[][]
3108188 0 131072ts_int_int108188108.1880131072[][18]
4108189 0 0ts_int_int108189108.18900[][]
5108353 0 131072ts_int_int108353108.3530131072[][18]
...........................
40151048180 0 0ts_int_int10481801048.18000[][]
40161048344 0 131072ts_int_int10483441048.3440131072[][18]
40171048345 0 0ts_int_int10483451048.34500[][]
40181048509 0 131072ts_int_int10485091048.5090131072[][18]
40191048510 0 0ts_int_int10485101048.51000[][]
\n", + "

4000 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp \\\n", + "line_num \n", + "1 108023 0 131072 ts_int_int 108023 \n", + "2 108024 0 0 ts_int_int 108024 \n", + "3 108188 0 131072 ts_int_int 108188 \n", + "4 108189 0 0 ts_int_int 108189 \n", + "5 108353 0 131072 ts_int_int 108353 \n", + "... ... ... ... \n", + "4015 1048180 0 0 ts_int_int 1048180 \n", + "4016 1048344 0 131072 ts_int_int 1048344 \n", + "4017 1048345 0 0 ts_int_int 1048345 \n", + "4018 1048509 0 131072 ts_int_int 1048509 \n", + "4019 1048510 0 0 ts_int_int 1048510 \n", + "\n", + " trodes_timestamp_sec active_DIO_inputs_bitmask \\\n", + "line_num \n", + "1 108.023 0 \n", + "2 108.024 0 \n", + "3 108.188 0 \n", + "4 108.189 0 \n", + "5 108.353 0 \n", + "... ... ... \n", + "4015 1048.180 0 \n", + "4016 1048.344 0 \n", + "4017 1048.345 0 \n", + "4018 1048.509 0 \n", + "4019 1048.510 0 \n", + "\n", + " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "1 131072 [] [18] \n", + "2 0 [] [] \n", + "3 131072 [] [18] \n", + "4 0 [] [] \n", + "5 131072 [] [18] \n", + "... ... ... ... \n", + "4015 0 [] [] \n", + "4016 131072 [] [18] \n", + "4017 0 [] [] \n", + "4018 131072 [] [18] \n", + "4019 0 [] [] \n", + "\n", + "[4000 rows x 8 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[6]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a0f696c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['first poke',\n", + " 'PROXON',\n", + " 'UP',\n", + " ,\n", + " 'DOWN',\n", + " 'second pokes',\n", + " 'PROXOFF',\n", + " 'UPIND_0',\n", + " 'pump on',\n", + " 'home reward',\n", + " '1',\n", + " '0',\n", + " '10',\n", + " '3',\n", + " 'poke during proximity',\n", + " 'UPIND_4',\n", + " '2',\n", + " '8',\n", + " '4',\n", + " 'UPIND_3',\n", + " 'LOCKOUT',\n", + " 'LOCKEND',\n", + " '6',\n", + " 'UPIND_1',\n", + " '5',\n", + " '12',\n", + " '7',\n", + " 'UPIND_2',\n", + " '9',\n", + " '14',\n", + " 'outer reward',\n", + " '11',\n", + " '13',\n", + " '15',\n", + " '16',\n", + " '17',\n", + " '18',\n", + " '19',\n", + " '20',\n", + " '21',\n", + " '22',\n", + " '23',\n", + " '24',\n", + " '25',\n", + " '26',\n", + " '27',\n", + " '28',\n", + " '29',\n", + " '30',\n", + " '31',\n", + " 'poke during lock period',\n", + " '33',\n", + " '34',\n", + " '35',\n", + " '36',\n", + " '37',\n", + " '38',\n", + " '39',\n", + " '40',\n", + " '41',\n", + " '42',\n", + " '43',\n", + " '44',\n", + " '46',\n", + " '47',\n", + " '48',\n", + " '49',\n", + " '50',\n", + " '51',\n", + " '52',\n", + " '53',\n", + " '54',\n", + " '55',\n", + " '56',\n", + " '57',\n", + " '58',\n", + " '59',\n", + " '60',\n", + " '61',\n", + " '32',\n", + " '62',\n", + " '63',\n", + " '64',\n", + " '65',\n", + " '66',\n", + " '67',\n", + " '68',\n", + " '69',\n", + " '70',\n", + " '71',\n", + " '72',\n", + " '73',\n", + " '74',\n", + " '75',\n", + " '76',\n", + " '77',\n", + " '78',\n", + " '79',\n", + " '80',\n", + " '81',\n", + " '82',\n", + " '83',\n", + " '84',\n", + " '85',\n", + " '86',\n", + " '87',\n", + " '45',\n", + " '88',\n", + " '89',\n", + " '90',\n", + " '91',\n", + " '92',\n", + " '93',\n", + " '94',\n", + " '95',\n", + " '96',\n", + " '97',\n", + " '98',\n", + " '99',\n", + " '100',\n", + " '101',\n", + " '102',\n", + " '103',\n", + " '104',\n", + " '105',\n", + " '106',\n", + " '107',\n", + " '108',\n", + " '109',\n", + " '110',\n", + " '111',\n", + " '112',\n", + " '113',\n", + " '114',\n", + " '115',\n", + " '116',\n", + " '117',\n", + " '118',\n", + " '119',\n", + " '120',\n", + " '121',\n", + " '122',\n", + " '123',\n", + " '124',\n", + " '125',\n", + " '126',\n", + " '127',\n", + " '128',\n", + " '129',\n", + " '130',\n", + " '131',\n", + " '132',\n", + " '133',\n", + " '134',\n", + " '135',\n", + " '136',\n", + " '137',\n", + " '138',\n", + " '139',\n", + " '140',\n", + " '141',\n", + " '142',\n", + " '143',\n", + " '144',\n", + " '145',\n", + " '146',\n", + " '147',\n", + " '148',\n", + " '149',\n", + " '150',\n", + " '151',\n", + " '152',\n", + " '153',\n", + " '154',\n", + " '155',\n", + " '156',\n", + " '157',\n", + " '158',\n", + " '159',\n", + " 'EndSession']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[8].text.unique().tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e8108415", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[7]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5055cb37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
4876935765 first pokets_str69357656935.765first poke<NA><NA><NA>[][]
4886935766 PROXONts_str69357666935.766PROXON<NA><NA><NA>[][]
4896935766 UP 10ts_str_int69357666935.766UP10<NA><NA>[][]
4906935765 512 512ts_int_int69357656935.765<NA><NA>512512[10][10]
4916935778 DOWN 10ts_str_int69357786935.778DOWN10<NA><NA>[][]
.................................
258379104866 mostRecentOuterWell_ind = 4ts_str_equals_int91048669104.86644<NA><NA>[][]
258389104866 mostRecentRewardOuterWell_ind = 2ts_str_equals_int91048669104.86622<NA><NA>[][]
258429104890 UPIND_4ts_str91048909104.890UPIND_4<NA><NA><NA>[][]
258499104908 0 0ts_int_int91049089104.908<NA><NA>00[][]
258509105380 UPIND_0ts_str91053809105.380UPIND_0<NA><NA><NA>[][]
\n", + "

19975 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type \\\n", + "line_num \n", + "487 6935765 first poke ts_str \n", + "488 6935766 PROXON ts_str \n", + "489 6935766 UP 10 ts_str_int \n", + "490 6935765 512 512 ts_int_int \n", + "491 6935778 DOWN 10 ts_str_int \n", + "... ... ... \n", + "25837 9104866 mostRecentOuterWell_ind = 4 ts_str_equals_int \n", + "25838 9104866 mostRecentRewardOuterWell_ind = 2 ts_str_equals_int \n", + "25842 9104890 UPIND_4 ts_str \n", + "25849 9104908 0 0 ts_int_int \n", + "25850 9105380 UPIND_0 ts_str \n", + "\n", + " trodes_timestamp trodes_timestamp_sec text value \\\n", + "line_num \n", + "487 6935765 6935.765 first poke \n", + "488 6935766 6935.766 PROXON \n", + "489 6935766 6935.766 UP 10 \n", + "490 6935765 6935.765 \n", + "491 6935778 6935.778 DOWN 10 \n", + "... ... ... ... ... \n", + "25837 9104866 9104.866 4 4 \n", + "25838 9104866 9104.866 2 2 \n", + "25842 9104890 9104.890 UPIND_4 \n", + "25849 9104908 9104.908 \n", + "25850 9105380 9105.380 UPIND_0 \n", + "\n", + " active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "487 \n", + "488 \n", + "489 \n", + "490 512 512 \n", + "491 \n", + "... ... ... \n", + "25837 \n", + "25838 \n", + "25842 \n", + "25849 0 0 \n", + "25850 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "487 [] [] \n", + "488 [] [] \n", + "489 [] [] \n", + "490 [10] [10] \n", + "491 [] [] \n", + "... ... ... \n", + "25837 [] [] \n", + "25838 [] [] \n", + "25842 [] [] \n", + "25849 [] [] \n", + "25850 [] [] \n", + "\n", + "[19975 rows x 10 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[8]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4cb98d40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
4896935766 UP 10ts_str_int69357666935.766UP10<NA><NA>[][]
4946935987 UP 10ts_str_int69359876935.987UP10<NA><NA>[][]
4996936059 UP 10ts_str_int69360596936.059UP10<NA><NA>[][]
5416937614 UP 10ts_str_int69376146937.614UP10<NA><NA>[][]
5506937781 UP 10ts_str_int69377816937.781UP10<NA><NA>[][]
.................................
257649063659 UP 10ts_str_int90636599063.659UP10<NA><NA>[][]
257759065869 UP 10ts_str_int90658699065.869UP10<NA><NA>[][]
257869071584 UP 8ts_str_int90715849071.584UP8<NA><NA>[][]
258179073200 UP 8ts_str_int90732009073.200UP8<NA><NA>[][]
258249082033 UP 10ts_str_int90820339082.033UP10<NA><NA>[][]
\n", + "

2355 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "489 6935766 UP 10 ts_str_int 6935766 6935.766 \n", + "494 6935987 UP 10 ts_str_int 6935987 6935.987 \n", + "499 6936059 UP 10 ts_str_int 6936059 6936.059 \n", + "541 6937614 UP 10 ts_str_int 6937614 6937.614 \n", + "550 6937781 UP 10 ts_str_int 6937781 6937.781 \n", + "... ... ... ... ... \n", + "25764 9063659 UP 10 ts_str_int 9063659 9063.659 \n", + "25775 9065869 UP 10 ts_str_int 9065869 9065.869 \n", + "25786 9071584 UP 8 ts_str_int 9071584 9071.584 \n", + "25817 9073200 UP 8 ts_str_int 9073200 9073.200 \n", + "25824 9082033 UP 10 ts_str_int 9082033 9082.033 \n", + "\n", + " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "489 UP 10 \n", + "494 UP 10 \n", + "499 UP 10 \n", + "541 UP 10 \n", + "550 UP 10 \n", + "... ... ... ... ... \n", + "25764 UP 10 \n", + "25775 UP 10 \n", + "25786 UP 8 \n", + "25817 UP 8 \n", + "25824 UP 10 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "489 [] [] \n", + "494 [] [] \n", + "499 [] [] \n", + "541 [] [] \n", + "550 [] [] \n", + "... ... ... \n", + "25764 [] [] \n", + "25775 [] [] \n", + "25786 [] [] \n", + "25817 [] [] \n", + "25824 [] [] \n", + "\n", + "[2355 rows x 10 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[8].loc[statescript_dfs[8].text == \"UP\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3245aaaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
472300995 rewCount = 1ts_str_equals_int23009952300.99511<NA><NA>[][]
482300995 1 4ts_int_int23009952300.995<NA><NA>14[1][3]
492301028 0 4ts_int_int23010282301.028<NA><NA>04[][3]
502301295 0 0ts_int_int23012952301.295<NA><NA>00[][]
512303333 1 0ts_int_int23033332303.333<NA><NA>10[1][]
.................................
2943166053 0 0ts_int_int31660533166.053NaN<NA>00[][]
2953168403 1 0ts_int_int31684033168.403NaN<NA>10[1][]
2963168575 0 0ts_int_int31685753168.575NaN<NA>00[][]
2973168643 1 0ts_int_int31686433168.643NaN<NA>10[1][]
2983168890 0 0ts_int_int31688903168.890NaN<NA>00[][]
\n", + "

252 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp \\\n", + "line_num \n", + "47 2300995 rewCount = 1 ts_str_equals_int 2300995 \n", + "48 2300995 1 4 ts_int_int 2300995 \n", + "49 2301028 0 4 ts_int_int 2301028 \n", + "50 2301295 0 0 ts_int_int 2301295 \n", + "51 2303333 1 0 ts_int_int 2303333 \n", + "... ... ... ... \n", + "294 3166053 0 0 ts_int_int 3166053 \n", + "295 3168403 1 0 ts_int_int 3168403 \n", + "296 3168575 0 0 ts_int_int 3168575 \n", + "297 3168643 1 0 ts_int_int 3168643 \n", + "298 3168890 0 0 ts_int_int 3168890 \n", + "\n", + " trodes_timestamp_sec text value active_DIO_inputs_bitmask \\\n", + "line_num \n", + "47 2300.995 1 1 \n", + "48 2300.995 1 \n", + "49 2301.028 0 \n", + "50 2301.295 0 \n", + "51 2303.333 1 \n", + "... ... ... ... ... \n", + "294 3166.053 NaN 0 \n", + "295 3168.403 NaN 1 \n", + "296 3168.575 NaN 0 \n", + "297 3168.643 NaN 1 \n", + "298 3168.890 NaN 0 \n", + "\n", + " active_DIO_outputs_bitmask active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "47 [] [] \n", + "48 4 [1] [3] \n", + "49 4 [] [3] \n", + "50 0 [] [] \n", + "51 0 [1] [] \n", + "... ... ... ... \n", + "294 0 [] [] \n", + "295 0 [1] [] \n", + "296 0 [] [] \n", + "297 0 [1] [] \n", + "298 0 [] [] \n", + "\n", + "[252 rows x 10 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[9]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "688f3a3d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [raw_line, type, trodes_timestamp, trodes_timestamp_sec, text, value, active_DIO_inputs_bitmask, active_DIO_outputs_bitmask, active_DIO_inputs, active_DIO_outputs]\n", + "Index: []" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statescript_dfs[9].loc[statescript_dfs[9].text == \"rewCount\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a0dc407f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: Time offset application requested, but offset has not been calculated or was unsuccessful. 'timestamp_sync' column omitted.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
line_num
803853607 0 64ts_int_int38536073853.607NaN<NA>064[][7]
813853630 128 64ts_int_int38536303853.630NaN<NA>12864[8][7]
823853785 0 64ts_int_int38537853853.785NaN<NA>064[][7]
833853796 128 64ts_int_int38537963853.796NaN<NA>12864[8][7]
843854144 0 64ts_int_int38541443854.144NaN<NA>064[][7]
.................................
66845023394 64 0ts_int_int50233945023.394NaN<NA>640[7][]
66855026015 0 0ts_int_int50260155026.015NaN<NA>00[][]
66865026079 64 0ts_int_int50260795026.079NaN<NA>640[7][]
66875026170 0 0ts_int_int50261705026.170NaN<NA>00[][]
66885026201 64 0ts_int_int50262015026.201NaN<NA>640[7][]
\n", + "

6418 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " raw_line type trodes_timestamp trodes_timestamp_sec \\\n", + "line_num \n", + "80 3853607 0 64 ts_int_int 3853607 3853.607 \n", + "81 3853630 128 64 ts_int_int 3853630 3853.630 \n", + "82 3853785 0 64 ts_int_int 3853785 3853.785 \n", + "83 3853796 128 64 ts_int_int 3853796 3853.796 \n", + "84 3854144 0 64 ts_int_int 3854144 3854.144 \n", + "... ... ... ... ... \n", + "6684 5023394 64 0 ts_int_int 5023394 5023.394 \n", + "6685 5026015 0 0 ts_int_int 5026015 5026.015 \n", + "6686 5026079 64 0 ts_int_int 5026079 5026.079 \n", + "6687 5026170 0 0 ts_int_int 5026170 5026.170 \n", + "6688 5026201 64 0 ts_int_int 5026201 5026.201 \n", + "\n", + " text value active_DIO_inputs_bitmask active_DIO_outputs_bitmask \\\n", + "line_num \n", + "80 NaN 0 64 \n", + "81 NaN 128 64 \n", + "82 NaN 0 64 \n", + "83 NaN 128 64 \n", + "84 NaN 0 64 \n", + "... ... ... ... ... \n", + "6684 NaN 64 0 \n", + "6685 NaN 0 0 \n", + "6686 NaN 64 0 \n", + "6687 NaN 0 0 \n", + "6688 NaN 64 0 \n", + "\n", + " active_DIO_inputs active_DIO_outputs \n", + "line_num \n", + "80 [] [7] \n", + "81 [8] [7] \n", + "82 [] [7] \n", + "83 [8] [7] \n", + "84 [] [7] \n", + "... ... ... \n", + "6684 [7] [] \n", + "6685 [] [] \n", + "6686 [7] [] \n", + "6687 [] [] \n", + "6688 [7] [] \n", + "\n", + "[6418 rows x 10 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "processor = StateScriptLogProcessor.from_file(\n", + " \"/Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog\"\n", + ")\n", + "processor.get_events_dataframe()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7f9b4c03", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

StateScriptLogProcessor

\n", + "

Source: from file: /Users/edeno/Downloads/20220103_Ban77mW_02_lineartrack_p1.stateScriptLog

\n", + "

Status: Parsed (6689 raw entries)

\n", + "

Time Offset: Not Calculated

\n", + "

DataFrame: Generated

\n", + "
DataFrame Preview (first 5 rows):
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raw_linetypetrodes_timestamptrodes_timestamp_sectextvalueactive_DIO_inputs_bitmaskactive_DIO_outputs_bitmaskactive_DIO_inputsactive_DIO_outputs
3853607 0 64ts_int_int38536073853.607<NA><NA>064[][7]
3853630 128 64ts_int_int38536303853.630<NA><NA>12864[8][7]
3853785 0 64ts_int_int38537853853.785<NA><NA>064[][7]
3853796 128 64ts_int_int38537963853.796<NA><NA>12864[8][7]
3854144 0 64ts_int_int38541443854.144<NA><NA>064[][7]
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "processor" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "166a1f1d", + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 427\u001b[0m\n\u001b[1;32m 425\u001b[0m test_parse_ts_int_int_direct()\n\u001b[1;32m 426\u001b[0m test_parse_ts_str_int_direct()\n\u001b[0;32m--> 427\u001b[0m \u001b[43mtest_parse_ts_str_equals_int_direct\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 428\u001b[0m test_parse_ts_str_direct()\n\u001b[1;32m 429\u001b[0m test_parse_statescript_line_ts_int_int()\n", + "Cell \u001b[0;32mIn[17], line 129\u001b[0m, in \u001b[0;36mtest_parse_ts_str_equals_int_direct\u001b[0;34m()\u001b[0m\n\u001b[1;32m 122\u001b[0m parts \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m100078\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcounter_handlePoke\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m=\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 123\u001b[0m expected \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 124\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mts_str_equals_int\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 125\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m100078\u001b[39m,\n\u001b[1;32m 126\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcounter_handlePoke\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 127\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 128\u001b[0m }\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m parse_ts_str_equals_int(parts) \u001b[38;5;241m==\u001b[39m expected\n\u001b[1;32m 131\u001b[0m parts_multi_word \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m3610855\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrewards\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m=\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m70\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 132\u001b[0m expected_multi \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 133\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mts_str_equals_int\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 134\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m3610855\u001b[39m,\n\u001b[1;32m 135\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal rewards\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 136\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m70\u001b[39m,\n\u001b[1;32m 137\u001b[0m }\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "import os\n", + "import pathlib\n", + "import tempfile\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pytest\n", + "\n", + "from spyglass.utils.statescript import (\n", + " StateScriptLogProcessor,\n", + " _parse_int,\n", + " parse_statescript_line,\n", + " parse_ts_int_int,\n", + " parse_ts_str,\n", + " parse_ts_str_equals_int,\n", + " parse_ts_str_int,\n", + ")\n", + "\n", + "\n", + "@pytest.fixture(scope=\"module\")\n", + "def sample_log_content():\n", + " \"\"\"Provides sample log content.\"\"\"\n", + " return \"\"\"# Test log\n", + "76504 0 0\n", + "76566 center_poke\n", + "76566 65536 0\n", + "100078 counter_handlePoke = 1\n", + "100078 4 0\n", + "100559 0 0\n", + "Executing this line\n", + "115030 center_poke\n", + "115030 65536 0\n", + "\"\"\"\n", + "\n", + "\n", + "@pytest.fixture\n", + "def processor(sample_log_content):\n", + " \"\"\"Provides a processor instance initialized with sample content.\"\"\"\n", + " return StateScriptLogProcessor(sample_log_content)\n", + "\n", + "\n", + "@pytest.fixture(scope=\"module\")\n", + "def external_times():\n", + " \"\"\"Provides sample external times.\"\"\"\n", + " return np.array([1678886476.530, 1678886500.100, 1678886515.050])\n", + "\n", + "\n", + "@pytest.fixture\n", + "def temp_log_file(sample_log_content):\n", + " \"\"\"Creates a temporary log file and yields its path.\"\"\"\n", + " with tempfile.NamedTemporaryFile(\n", + " mode=\"w\", delete=False, suffix=\".stateScriptLog\"\n", + " ) as tmp_file:\n", + " tmp_file.write(sample_log_content)\n", + " tmp_file_path = tmp_file.name\n", + " yield tmp_file_path\n", + " os.remove(tmp_file_path)\n", + "\n", + "\n", + "# --- Tests for Level 1 Parsers ---\n", + "\n", + "\n", + "def test_parse_int():\n", + " \"\"\"Test the _parse_int helper function.\"\"\"\n", + " assert _parse_int(\"123\") == 123\n", + " assert _parse_int(\"-45\") == -45\n", + " assert _parse_int(\"0\") == 0\n", + " assert _parse_int(\"abc\") is None\n", + " assert _parse_int(\"12.3\") is None\n", + " assert _parse_int(\"\") is None\n", + " assert (\n", + " _parse_int(\" 123 \") == 123\n", + " ) # Should handle surrounding whitespace if not stripped before\n", + "\n", + "\n", + "def test_parse_ts_int_int_direct():\n", + " \"\"\"Test parse_ts_int_int directly.\"\"\"\n", + " parts = [\"8386500\", \"0\", \"0\"]\n", + " expected = {\n", + " \"type\": \"ts_int_int\",\n", + " \"timestamp\": 8386500,\n", + " \"value1\": 0,\n", + " \"value2\": 0,\n", + " }\n", + " assert parse_ts_int_int(parts) == expected\n", + "\n", + " parts_wrong_len = [\"123\", \"0\"]\n", + " assert parse_ts_int_int(parts_wrong_len) is None\n", + "\n", + " parts_not_int = [\"123\", \"abc\", \"0\"]\n", + " assert parse_ts_int_int(parts_not_int) is None\n", + "\n", + " parts_float = [\"123\", \"4.5\", \"0\"]\n", + " assert parse_ts_int_int(parts_float) is None\n", + "\n", + "\n", + "def test_parse_ts_str_int_direct():\n", + " \"\"\"Test parse_ts_str_int directly.\"\"\"\n", + " parts = [\"8386500\", \"DOWN\", \"3\"]\n", + " expected = {\n", + " \"type\": \"ts_str_int\",\n", + " \"timestamp\": 8386500,\n", + " \"text\": \"DOWN\",\n", + " \"value\": 3,\n", + " }\n", + " assert parse_ts_str_int(parts) == expected\n", + "\n", + " parts_wrong_len = [\"123\", \"UP\"]\n", + " assert parse_ts_str_int(parts_wrong_len) is None\n", + "\n", + " parts_str_is_int = [\"123\", \"456\", \"789\"]\n", + " assert (\n", + " parse_ts_str_int(parts_str_is_int) is None\n", + " ) # Should be handled by ts_int_int\n", + "\n", + " parts_val_not_int = [\"123\", \"UP\", \"abc\"]\n", + " assert parse_ts_str_int(parts_val_not_int) is None\n", + "\n", + "\n", + "def test_parse_ts_str_equals_int_direct():\n", + " \"\"\"Test parse_ts_str_equals_int directly.\"\"\"\n", + " parts = [\"100078\", \"counter_handlePoke\", \"=\", \"1\"]\n", + " expected = {\n", + " \"type\": \"ts_str_equals_int\",\n", + " \"timestamp\": 100078,\n", + " \"text\": \"counter_handlePoke\",\n", + " \"value\": 1,\n", + " }\n", + " assert parse_ts_str_equals_int(parts) == expected\n", + "\n", + " parts_multi_word = [\"3610855\", \"total\", \"rewards\", \"=\", \"70\"]\n", + " expected_multi = {\n", + " \"type\": \"ts_str_equals_int\",\n", + " \"timestamp\": 3610855,\n", + " \"text\": \"total rewards\",\n", + " \"value\": 70,\n", + " }\n", + " assert parse_ts_str_equals_int(parts_multi_word) == expected_multi\n", + "\n", + " parts_wrong_len = [\"123\", \"=\", \"1\"]\n", + " assert parse_ts_str_equals_int(parts_wrong_len) is None\n", + "\n", + " parts_no_equals = [\"123\", \"text\", \"1\"]\n", + " assert parse_ts_str_equals_int(parts_no_equals) is None\n", + "\n", + " parts_val_not_int = [\"123\", \"text\", \"=\", \"abc\"]\n", + " assert parse_ts_str_equals_int(parts_val_not_int) is None\n", + "\n", + "\n", + "def test_parse_ts_str_direct():\n", + " \"\"\"Test parse_ts_str directly.\"\"\"\n", + " parts = [\"76566\", \"center_poke\"]\n", + " expected = {\"type\": \"ts_str\", \"timestamp\": 76566, \"text\": \"center_poke\"}\n", + " assert parse_ts_str(parts) == expected\n", + "\n", + " parts_multi_word = [\n", + " \"1271815\",\n", + " \"lastPort\",\n", + " \"=\",\n", + " \"-1\",\n", + " \"to\",\n", + " \"currPort\",\n", + " \"=\",\n", + " \"2\",\n", + " ]\n", + " expected_multi = {\n", + " \"type\": \"ts_str\",\n", + " \"timestamp\": 1271815,\n", + " \"text\": \"lastPort = -1 to currPort = 2\",\n", + " }\n", + " assert parse_ts_str(parts_multi_word) == expected_multi\n", + "\n", + " parts_wrong_len = [\"123\"]\n", + " assert parse_ts_str(parts_wrong_len) is None\n", + "\n", + " parts_second_is_int = [\n", + " \"123\",\n", + " \"456\",\n", + " ] # Second part is int, should fail this parser\n", + " assert parse_ts_str(parts_second_is_int) is None\n", + "\n", + "\n", + "# --- Tests for parse_statescript_line (Covers integration and dispatching) ---\n", + "\n", + "\n", + "def test_parse_statescript_line_ts_int_int():\n", + " \"\"\"Test parse_statescript_line dispatching for ts_int_int.\"\"\"\n", + " line = \"8386500 0 0\"\n", + " parsed = parse_statescript_line(line)\n", + " assert parsed[\"type\"] == \"ts_int_int\"\n", + " assert parsed[\"timestamp\"] == 8386500\n", + " assert parsed[\"value1\"] == 0\n", + " assert parsed[\"value2\"] == 0\n", + " assert parsed[\"raw_line\"] == line\n", + "\n", + "\n", + "def test_parse_statescript_line_ts_str_int():\n", + " \"\"\"Test parse_statescript_line dispatching for ts_str_int.\"\"\"\n", + " line = \"8386500 DOWN 3\"\n", + " parsed = parse_statescript_line(line)\n", + " assert parsed[\"type\"] == \"ts_str_int\"\n", + " assert parsed[\"timestamp\"] == 8386500\n", + " assert parsed[\"text\"] == \"DOWN\"\n", + " assert parsed[\"value\"] == 3\n", + " assert parsed[\"raw_line\"] == line\n", + "\n", + "\n", + "def test_parse_statescript_line_ts_str_equals_int():\n", + " \"\"\"Test parse_statescript_line dispatching for ts_str_equals_int.\"\"\"\n", + " line = \"100078 counter_handlePoke = 1\"\n", + " parsed = parse_statescript_line(line)\n", + " assert parsed[\"type\"] == \"ts_str_equals_int\"\n", + " assert parsed[\"timestamp\"] == 100078\n", + " assert parsed[\"text\"] == \"counter_handlePoke\"\n", + " assert parsed[\"value\"] == 1\n", + " assert parsed[\"raw_line\"] == line\n", + "\n", + "\n", + "def test_parse_statescript_line_ts_str():\n", + " \"\"\"Test parse_statescript_line dispatching for ts_str.\"\"\"\n", + " line = \"76566 center_poke\"\n", + " parsed = parse_statescript_line(line)\n", + " assert parsed[\"type\"] == \"ts_str\"\n", + " assert parsed[\"timestamp\"] == 76566\n", + " assert parsed[\"text\"] == \"center_poke\"\n", + " assert parsed[\"raw_line\"] == line\n", + "\n", + "\n", + "def test_parse_statescript_line_unknown():\n", + " \"\"\"Test parse_statescript_line dispatching for unknown lines.\"\"\"\n", + " line = \"Executing trigger function 22\" # No timestamp\n", + " parsed = parse_statescript_line(line)\n", + " assert parsed[\"type\"] == \"unknown\"\n", + " assert \"timestamp\" not in parsed\n", + " assert parsed[\"raw_line\"] == line\n", + "\n", + "\n", + "def test_parse_statescript_line_comment_empty():\n", + " \"\"\"Test parse_statescript_line dispatching for comments/empty.\"\"\"\n", + " line_c = \"# comment\"\n", + " line_e = \"\"\n", + " line_s = \" \"\n", + " assert parse_statescript_line(line_c)[\"type\"] == \"comment_or_empty\"\n", + " assert parse_statescript_line(line_c)[\"raw_line\"] == line_c\n", + " assert parse_statescript_line(line_e)[\"type\"] == \"comment_or_empty\"\n", + " assert parse_statescript_line(line_e)[\"raw_line\"] == line_e\n", + " assert parse_statescript_line(line_s)[\"type\"] == \"comment_or_empty\"\n", + " assert parse_statescript_line(line_s)[\"raw_line\"] == \"\"\n", + "\n", + "\n", + "# --- Tests for StateScriptLogProcessor ---\n", + "\n", + "\n", + "def test_init_from_string(processor, sample_log_content):\n", + " assert processor.log_content == sample_log_content\n", + " assert processor.source_description == \"from string\"\n", + " assert processor.raw_events == []\n", + " assert processor.time_offset is None\n", + " assert processor.processed_events_df is None\n", + "\n", + "\n", + "def test_init_from_file(temp_log_file, sample_log_content):\n", + " processor_file = StateScriptLogProcessor.from_file(temp_log_file)\n", + " assert processor_file.log_content == sample_log_content\n", + " assert processor_file.source_description.startswith(\"from file:\")\n", + " assert pathlib.Path(temp_log_file).name in processor_file.source_description\n", + "\n", + "\n", + "def test_init_from_file_not_found():\n", + " with pytest.raises(FileNotFoundError):\n", + " StateScriptLogProcessor.from_file(\"non_existent_file.log\")\n", + "\n", + "\n", + "def test_parse_raw_events(processor):\n", + " events = processor.parse_raw_events()\n", + " assert isinstance(events, list)\n", + " assert len(events) == 10\n", + " assert events[0][\"type\"] == \"comment_or_empty\"\n", + " assert events[1][\"type\"] == \"ts_int_int\"\n", + " assert events[7][\"type\"] == \"unknown\"\n", + " assert events[1][\"raw_line\"] == \"76504 0 0\"\n", + " assert events[7][\"raw_line\"] == \"Executing this line\"\n", + "\n", + "\n", + "def test_find_reference_events(processor):\n", + " ref_df = processor._find_reference_events(\n", + " event_type=\"ts_str\", conditions={\"text\": \"center_poke\"}\n", + " )\n", + " assert isinstance(ref_df, pd.DataFrame)\n", + " assert len(ref_df) == 2\n", + " pd.testing.assert_series_equal(\n", + " ref_df[\"timestamp\"],\n", + " pd.Series([76566, 115030], name=\"timestamp\"),\n", + " check_dtype=False,\n", + " )\n", + " assert \"log_timestamp_sec\" in ref_df.columns\n", + "\n", + " ref_df_num = processor._find_reference_events(\n", + " event_type=\"ts_int_int\", conditions={\"value1\": 4, \"value2\": 0}\n", + " )\n", + " assert len(ref_df_num) == 1\n", + " assert ref_df_num[\"timestamp\"].iloc[0] == 100078\n", + "\n", + " ref_df_none = processor._find_reference_events(\n", + " event_type=\"ts_str_int\", conditions={\"text\": \"nonexistent\"}\n", + " )\n", + " assert ref_df_none.empty\n", + "\n", + "\n", + "def test_calculate_time_offset_success(processor):\n", + " ext_times = np.array([1678880076.566, 1678880115.030])\n", + " offset = processor.calculate_time_offset(\n", + " external_reference_times=ext_times,\n", + " log_event_type=\"ts_int_int\",\n", + " log_event_conditions={\"value1\": 65536, \"value2\": 0},\n", + " check_n_events=2,\n", + " )\n", + " assert offset is not None\n", + " assert offset == pytest.approx(1678880000.0)\n", + "\n", + "\n", + "def test_calculate_time_offset_fail_not_enough_log(processor, external_times):\n", + " offset = processor.calculate_time_offset(\n", + " external_reference_times=external_times,\n", + " log_event_type=\"ts_str_equals_int\",\n", + " log_event_conditions={\"text\": \"counter_handlePoke\"},\n", + " check_n_events=2,\n", + " )\n", + " assert offset is None\n", + " assert processor.time_offset is None\n", + "\n", + "\n", + "def test_calculate_time_offset_fail_not_enough_external(processor):\n", + " offset = processor.calculate_time_offset(\n", + " external_reference_times=np.array([1678880076.566]),\n", + " log_event_type=\"ts_int_int\",\n", + " log_event_conditions={\"value1\": 65536, \"value2\": 0},\n", + " check_n_events=2,\n", + " )\n", + " assert offset is None\n", + " assert processor.time_offset is None\n", + "\n", + "\n", + "def test_get_events_dataframe_defaults(processor):\n", + " \"\"\"Test default behavior: exclude comments/unknown, no offset applied yet.\"\"\"\n", + " df = processor.get_events_dataframe(\n", + " apply_offset=False\n", + " ) # Default exclude=True\n", + " assert isinstance(df, pd.DataFrame)\n", + " assert len(df) == 8 # Excludes comment and unknown line\n", + " assert \"raw_line\" in df.columns\n", + " assert \"timestamp\" in df.columns\n", + " assert \"log_timestamp_sec\" in df.columns\n", + " # Check column order: time first, raw_line last\n", + " expected_cols = [\n", + " \"timestamp\",\n", + " \"log_timestamp_sec\",\n", + " \"timestamp_sync\",\n", + " \"text\",\n", + " \"value\",\n", + " \"value1\",\n", + " \"value2\",\n", + " \"raw_line\",\n", + " \"type\",\n", + " ]\n", + " actual_expected_cols = [col for col in expected_cols if col in df.columns]\n", + " assert list(df.columns) == actual_expected_cols\n", + " # Check content\n", + " assert df[\"raw_line\"].iloc[0] == \"76504 0 0\"\n", + " assert pd.isna(df[\"text\"].iloc[0]) # Should be NA where not applicable\n", + " assert df[\"value1\"].iloc[0] == 0\n", + "\n", + "\n", + "def test_get_events_dataframe_include_all(processor):\n", + " \"\"\"Test including comments and unknown lines.\"\"\"\n", + " df = processor.get_events_dataframe(\n", + " apply_offset=False, exclude_comments_unknown=False\n", + " )\n", + " assert isinstance(df, pd.DataFrame)\n", + " assert len(df) == 10 # Includes comment and unknown line\n", + " assert \"raw_line\" in df.columns\n", + " # Check raw_line for the unknown line\n", + " assert (\n", + " df[\"raw_line\"].iloc[7] == \"Executing this line\"\n", + " ) # Index adjusted for comment\n", + " # Check that timestamp is NA or 0 for lines without one\n", + " assert (\n", + " pd.isna(df[\"timestamp\"].iloc[0]) or df[\"timestamp\"].iloc[0] == 0\n", + " ) # Comment line\n", + " assert (\n", + " pd.isna(df[\"timestamp\"].iloc[7]) or df[\"timestamp\"].iloc[7] == 0\n", + " ) # Unknown line\n", + " # Check column order\n", + " expected_cols = [\n", + " \"timestamp\",\n", + " \"log_timestamp_sec\",\n", + " \"timestamp_sync\",\n", + " \"text\",\n", + " \"value\",\n", + " \"value1\",\n", + " \"value2\",\n", + " \"raw_line\",\n", + " \"type\",\n", + " ]\n", + " actual_expected_cols = [col for col in expected_cols if col in df.columns]\n", + " assert list(df.columns) == actual_expected_cols\n", + "\n", + "\n", + "def test_get_events_dataframe_with_offset(processor):\n", + " \"\"\"Test applying offset and column order.\"\"\"\n", + " processor.time_offset = 1678880000.0\n", + " df = processor.get_events_dataframe(\n", + " apply_offset=True\n", + " ) # Default exclude=True\n", + " assert isinstance(df, pd.DataFrame)\n", + " assert len(df) == 8\n", + " # Check calculation\n", + " expected_sync_time = (76504 / 1000.0) + 1678880000.0\n", + " assert df[\"timestamp_sync\"].iloc[0] == pytest.approx(expected_sync_time)\n", + " # Check NA value handling\n", + " assert pd.isna(df[\"text\"].iloc[0])\n", + "\n", + "\n", + "test_parse_int()\n", + "test_parse_ts_int_int_direct()\n", + "test_parse_ts_str_int_direct()\n", + "test_parse_ts_str_equals_int_direct()\n", + "test_parse_ts_str_direct()\n", + "test_parse_statescript_line_ts_int_int()\n", + "test_parse_statescript_line_ts_str_int()\n", + "test_parse_statescript_line_ts_str_equals_int()\n", + "test_parse_statescript_line_ts_str()\n", + "test_parse_statescript_line_unknown()\n", + "test_parse_statescript_line_comment_empty()\n", + "test_parse_statescript_line_comment_empty()\n", + "\n", + "sample_log_content = \"\"\"# Test log\n", + "76504 0 0\n", + "76566 center_poke\n", + "76566 65536 0\n", + "100078 counter_handlePoke = 1\n", + "100078 4 0\n", + "100559 0 0\n", + "Executing this line\n", + "115030 center_poke\n", + "115030 65536 0\n", + "\"\"\"\n", + "processor = StateScriptLogProcessor(sample_log_content)\n", + "with tempfile.NamedTemporaryFile(\n", + " mode=\"w\", delete=False, suffix=\".stateScriptLog\"\n", + ") as temp_log_file:\n", + " temp_log_file.write(sample_log_content)\n", + " temp_log_file_path = temp_log_file.name\n", + "\n", + "external_times = np.array([1678886476.530, 1678886500.100, 1678886515.050])\n", + "\n", + "test_init_from_string(processor, sample_log_content)\n", + "test_init_from_file(temp_log_file_path, sample_log_content)\n", + "test_init_from_file_not_found()\n", + "test_parse_raw_events(processor)\n", + "test_find_reference_events(processor)\n", + "test_calculate_time_offset_success(processor)\n", + "test_calculate_time_offset_fail_not_enough_log(processor, external_times)\n", + "test_calculate_time_offset_fail_not_enough_external(processor)\n", + "test_get_events_dataframe_defaults(processor)\n", + "test_get_events_dataframe_include_all(processor)\n", + "test_get_events_dataframe_with_offset(processor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30269f5f", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List, Optional\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "def _interpret_port_mask(\n", + " port_state_value: Optional[int], max_ports: int = 32\n", + ") -> List[int]:\n", + " \"\"\"\n", + " Interprets an integer value as a bitmask representing active ports using NumPy.\n", + "\n", + " Assumes a 1-based port numbering system (e.g., bit 0 corresponds to port 1).\n", + "\n", + " Parameters\n", + " ----------\n", + " port_state_value : Optional[int]\n", + " The integer value representing the combined state of multiple ports.\n", + " Handles None or pandas NA values.\n", + " max_ports : int, optional\n", + " The maximum port number to check (bits 0 to max_ports-1), by default 32.\n", + "\n", + " Returns\n", + " -------\n", + " List[int]\n", + " A sorted list of 1-based port numbers that are active (bit is set).\n", + " Returns an empty list if the value is 0, None, or NA.\n", + "\n", + " Example\n", + " -------\n", + " >>> interpret_port_mask(9) # 1001 in binary -> Ports 1 and 4\n", + " [1, 4]\n", + " >>> interpret_port_mask(65536) # 2^16 -> Port 17\n", + " [17]\n", + " \"\"\"\n", + " # Return empty list for 0, None, or pandas NA\n", + " if pd.isna(port_state_value) or port_state_value == 0:\n", + " return []\n", + "\n", + " # Ensure value is treated as an integer after NA check\n", + " try:\n", + " port_state_int = int(port_state_value)\n", + " except (ValueError, TypeError):\n", + " # Should not happen if input is from Int64Dtype column after NA check,\n", + " # but included for robustness if called directly with invalid input.\n", + " return []\n", + "\n", + " # Create bit masks for positions 0 to max_ports-1\n", + " # E.g., [1, 2, 4, 8, ...]\n", + " bit_masks = np.left_shift(1, np.arange(max_ports))\n", + "\n", + " # Check which bits are set in the input value using bitwise AND\n", + " active_bits_mask = np.bitwise_and(port_state_int, bit_masks) > 0\n", + "\n", + " # Get the 0-based indices (bit positions) where bits are active\n", + " active_indices = np.where(active_bits_mask)[0]\n", + "\n", + " # Convert 0-based indices to 1-based port numbers and return as a list\n", + " active_ports = (active_indices + 1).tolist()\n", + "\n", + " # np.where returns sorted indices, so list is already sorted\n", + " return active_ports\n", + "\n", + "\n", + "def add_interpreted_port_columns(\n", + " events_df: pd.DataFrame,\n", + " input_mask_col: str = \"value1\",\n", + " output_mask_col: str = \"value2\",\n", + " max_ports: int = 32,\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Adds 'active_inputs' and 'active_outputs' columns to a DataFrame\n", + " by interpreting bitmask columns representing port states using NumPy.\n", + "\n", + " Operates on and returns a modified copy of the input DataFrame.\n", + "\n", + " Parameters\n", + " ----------\n", + " events_df : pd.DataFrame\n", + " The DataFrame containing the parsed StateScript event data.\n", + " input_mask_col : str, optional\n", + " The name of the column containing the input port bitmask values,\n", + " by default 'value1'.\n", + " output_mask_col : str, optional\n", + " The name of the column containing the output port bitmask values,\n", + " by default 'value2'.\n", + " max_ports : int, optional\n", + " The maximum port number to check for the bitmasks, by default 32.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " A copy of the input DataFrame with 'active_inputs' and 'active_outputs'\n", + " columns added (or updated if they existed). Prints warnings if specified\n", + " mask columns are not found.\n", + "\n", + " Raises\n", + " ------\n", + " TypeError\n", + " If the input `events_df` is not a pandas DataFrame.\n", + " \"\"\"\n", + " if not isinstance(events_df, pd.DataFrame):\n", + " raise TypeError(\"Input 'events_df' must be a pandas DataFrame.\")\n", + "\n", + " # Work on a copy to avoid modifying the original DataFrame\n", + " processed_df = events_df.copy()\n", + "\n", + " # Interpret Input Ports\n", + " if input_mask_col in processed_df.columns:\n", + " # Convert column to numeric, coercing errors, then apply interpretation\n", + " input_series = pd.to_numeric(\n", + " processed_df[input_mask_col], errors=\"coerce\"\n", + " )\n", + " processed_df[\"active_inputs\"] = input_series.apply(\n", + " lambda mask: _interpret_port_mask(mask, max_ports)\n", + " )\n", + " else:\n", + " print(\n", + " f\"Warning: Input mask column '{input_mask_col}' not found in DataFrame. Skipping 'active_inputs'.\"\n", + " )\n", + " # Add empty column if it doesn't exist for consistency\n", + " processed_df[\"active_inputs\"] = [[] for _ in range(len(processed_df))]\n", + "\n", + " # Interpret Output Ports\n", + " if output_mask_col in processed_df.columns:\n", + " output_series = pd.to_numeric(\n", + " processed_df[output_mask_col], errors=\"coerce\"\n", + " )\n", + " processed_df[\"active_outputs\"] = output_series.apply(\n", + " lambda mask: _interpret_port_mask(mask, max_ports)\n", + " )\n", + " else:\n", + " print(\n", + " f\"Warning: Output mask column '{output_mask_col}' not found in DataFrame. Skipping 'active_outputs'.\"\n", + " )\n", + " # Add empty column if it doesn't exist for consistency\n", + " processed_df[\"active_outputs\"] = [[] for _ in range(len(processed_df))]\n", + "\n", + " return processed_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b94f61a", + "metadata": {}, + "outputs": [], + "source": [ + "statescript_dfs[9]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c825d3b", + "metadata": {}, + "outputs": [], + "source": [ + "add_interpreted_port_columns(statescript_dfs[8])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a17a0170", + "metadata": {}, + "outputs": [], + "source": [ + "statescript_dfs[8].groupby(\"type\").groups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4f09598", + "metadata": {}, + "outputs": [], + "source": [ + "events_by_type = [\n", + " group.drop(columns=[\"type\"])\n", + " for _, group in statescript_dfs[8].groupby(\"type\")\n", + "]\n", + "\n", + "events_by_type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83cc5cf1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "spyglass", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 4a9dd5cc38388940516086c8cab18b33cb44e81d Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 11:59:23 -0400 Subject: [PATCH 03/23] Return dataframe --- src/trodes_to_nwb/convert_statescript.py | 165 ++++++++++++++++------- 1 file changed, 115 insertions(+), 50 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index e0ce567..e7bf8a7 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -1023,101 +1023,166 @@ def segment_into_trials( trial_start_terms: List[str], trial_end_terms: List[str], time_column: str = "timestamp_sync", - ) -> List[Dict[str, Any]]: + ) -> pd.DataFrame: """ - Segments events from a StateScript log DataFrame into trials. + Segments events from the processed StateScript log DataFrame into trials. + + Identifies trial boundaries based on the presence of specified start and end + terms within the 'text' column of the `processed_events_df`. Parameters ---------- trial_start_terms : List[str] List of strings found in the 'text' column that mark the start of a trial. + The event containing the start term *is* the start of the trial. trial_end_terms : List[str] List of strings found in the 'text' column that mark the end of a trial. + The event containing the end term *is* the end of the trial. Can overlap with trial_start_terms. time_column : str, optional - The name of the column to use for time ranges ('timestamp_sync' or - 'trodes_timestamp_sec'), by default 'timestamp_sync'. + The name of the time column in `processed_events_df` to use for + reporting trial start and end times. Common choices are 'timestamp_sync' + (if offset calculated) or 'trodes_timestamp_sec'. Defaults to 'timestamp_sync'. Returns ------- - List[Dict[str, Any]] - A list where each dictionary represents a trial. Each trial dictionary - contains at least 'start_time' and 'end_time'. Further analysis - (like finding input/output changes within the trial) would typically - be done separately using these time ranges to filter events_df. + pd.DataFrame + A DataFrame where each row represents a detected trial. Columns include: + - 'start_time': The timestamp (from `time_column`) of the event marking the trial start. + - 'stop_time': The timestamp (from `time_column`) of the event marking the trial end. + - 'status': String indicating if the trial was 'complete' or 'incomplete' + (if the log ended before an end term was found). + Returns an empty DataFrame if no trials are found or if the required + columns ('text', `time_column`) are missing from `processed_events_df`. Notes ----- - - This implementation assumes trials are defined by text messages. - - It handles cases where start/end terms overlap. + - Requires `processed_events_df` to be generated first (e.g., by calling + `get_events_dataframe`). If it's None, this method will attempt to generate it + with default settings (apply_offset=True, exclude_comments_unknown=True). + - Assumes trials are sequential and non-overlapping based on the first occurrence + of start/end terms. + - Handles cases where start/end terms overlap (an event can mark both the end + of one trial and the start of the next). + - Warns if a start term is found while already in a trial (restarts the trial). + - Warns if the log ends while a trial is in progress. """ - events_df = self.processed_events_df - if events_df is None: - print("Error: No processed events DataFrame available.") - return [] + # Attempt to generate the df if it doesn't exist + if self.processed_events_df is None: + print( + "Warning: processed_events_df not found. Generating with default settings." + ) + self.get_events_dataframe() # Use defaults: apply_offset=True, exclude=True + + events_df = self.processed_events_df # Use the potentially newly generated df + + # Check if DataFrame is valid and contains necessary columns + if events_df is None or events_df.empty: + print("Error: No processed events DataFrame available to segment.") + return pd.DataFrame( + columns=["start_time", "stop_time", "status"] + ) # Return empty DF if "text" not in events_df.columns or time_column not in events_df.columns: - print(f"Error: DataFrame must contain 'text' and '{time_column}' columns.") - return [] + print( + f"Error: DataFrame must contain 'text' and '{time_column}' columns for segmentation." + ) + return pd.DataFrame( + columns=["start_time", "stop_time", "status"] + ) # Return empty DF + + # Lists to store data for the final DataFrame + start_times = [] + stop_times = [] + statuses = [] - trials = [] current_trial_start_time = None in_trial = False + last_valid_time = ( + events_df[time_column].dropna().iloc[-1] + if not events_df[time_column].dropna().empty + else None + ) - # Iterate through the DataFrame rows + # Iterate through the DataFrame rows (index is line_num) for index, row in events_df.iterrows(): message = row["text"] # Check the 'text' column current_time = row[time_column] - if pd.isna(message) or pd.isna(current_time): - continue # Skip rows with missing text or time + # Skip rows with missing time in the specified column or missing text + if pd.isna(current_time) or pd.isna(message): + continue + + # Ensure message is treated as string for 'in' check + message_str = str(message) - found_end_term = any(term in message for term in trial_end_terms) - found_start_term = any(term in message for term in trial_start_terms) + # Check if the current message contains any start or end terms + # Use a generator expression for slightly better efficiency + found_end_term = any(term in message_str for term in trial_end_terms) + found_start_term = any(term in message_str for term in trial_start_terms) # --- End Trial Logic --- - # If we are currently in a trial and find an end term + # If we are currently in a trial AND find an end term if in_trial and found_end_term: - # Finalize the previous trial - trials.append( - { - "start_time": current_trial_start_time, - "end_time": current_time, - # Add trial index or other basic info if needed - } - ) + # Finalize the previous trial by adding its data to the lists + start_times.append(current_trial_start_time) + stop_times.append(current_time) + statuses.append("complete") + in_trial = False - current_trial_start_time = None # Reset start time + current_trial_start_time = ( + None # Reset start time for the next potential trial + ) # --- Start Trial Logic --- - # If we find a start term (potentially the same event as the end term) + # If we find a start term (this check happens AFTER potential end logic, + # allowing an event to end a trial and immediately start the next one) if found_start_term: - # If we weren't in a trial, start a new one + # If we were NOT previously in a trial, this starts a new one if not in_trial: in_trial = True current_trial_start_time = current_time - # If we *were* already in a trial (e.g., two start terms back-to-back - # without an end term), you might choose to log a warning or - # implicitly end the previous one here and start a new one. - # This example restarts the trial timer. + # If we *were* already in a trial (e.g., two start terms without an end term), + # log a warning and restart the trial timer from the current event. else: print( - f"Warning: Found start term '{message}' at {current_time} while already in a trial started at {current_trial_start_time}. Restarting trial." + f"Warning (Line {index}): Found start term '{message_str}' at {current_time} " + f"while already in a trial started at {current_trial_start_time}. Restarting trial timer." ) + # Effectively ends the previous (implicit) trial and starts new one current_trial_start_time = current_time - # Handle case where log ends while still in a trial + # --- Handle Incomplete Trial at End of Log --- + # If the loop finishes and we are still marked as 'in_trial' if in_trial: print( - f"Warning: Log ended while still in a trial started at {current_trial_start_time}." + f"Warning: Log processing ended while still in a trial that started at {current_trial_start_time}. " + f"Marking as incomplete." + ) + # Add the incomplete trial to the lists + start_times.append(current_trial_start_time) + # Use the time of the last valid event in the time column as the end time + stop_times.append( + last_valid_time if last_valid_time is not None else np.nan + ) + statuses.append("incomplete") + + # --- Create Final DataFrame --- + # Construct the DataFrame from the collected lists + trials_df = pd.DataFrame( + {"start_time": start_times, "stop_time": stop_times, "status": statuses} + ) + + # Ensure correct dtypes (start/end times should match time_column, status is object) + if not trials_df.empty: + trials_df["start_time"] = trials_df["start_time"].astype( + events_df[time_column].dtype ) - # Optionally add the incomplete trial - trials.append( - { - "start_time": current_trial_start_time, - "end_time": events_df[time_column].iloc[-1], # Use last event time - "status": "incomplete", - } + trials_df["stop_time"] = trials_df["stop_time"].astype( + events_df[time_column].dtype ) + trials_df["status"] = trials_df["status"].astype( + "object" + ) # String/object type - return trials + return trials_df From cf735b9f381f64a8ca7cf8c28b20fe7ef25b9022 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 12:27:34 -0400 Subject: [PATCH 04/23] Fix type --- src/trodes_to_nwb/convert_dios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trodes_to_nwb/convert_dios.py b/src/trodes_to_nwb/convert_dios.py index 97c57a8..377140e 100644 --- a/src/trodes_to_nwb/convert_dios.py +++ b/src/trodes_to_nwb/convert_dios.py @@ -9,7 +9,7 @@ from .spike_gadgets_raw_io import SpikeGadgetsRawIO -def _get_channel_name_map(metadata: dict) -> dict[str, str]: +def _get_channel_name_map(metadata: dict) -> dict[str, dict[str, str]]: """Parses behavioral events metadata from the yaml file Parameters From 81e924c49890c190f2eb7e3512eef0400f95786f Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 12:35:12 -0400 Subject: [PATCH 05/23] Update tests --- .../tests/test_convert_statescript.py | 523 +++++++++++++----- 1 file changed, 375 insertions(+), 148 deletions(-) diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py index f115cea..14e916f 100644 --- a/src/trodes_to_nwb/tests/test_convert_statescript.py +++ b/src/trodes_to_nwb/tests/test_convert_statescript.py @@ -8,6 +8,7 @@ from trodes_to_nwb.convert_statescript import ( StateScriptLogProcessor, + _interpret_DIO_mask, _parse_int, parse_statescript_line, parse_ts_int_int, @@ -28,10 +29,11 @@ def sample_log_content(): 76566 65536 0 100078 counter_handlePoke = 1 100078 4 0 -100559 0 0 +100559 LEFT_PORT 1 Executing this line without timestamp 115030 center_poke 115030 65536 0 +115040 0 0 # Test log ended """ @@ -75,14 +77,31 @@ def comment_only_processor(comment_only_log_content): @pytest.fixture(scope="module") def external_times(): """Provides sample external times for offset calculation tests.""" - # These correspond roughly to the '65536 0' events in sample_log_content + # These correspond to the '65536 0' events (ts_int_int) in sample_log_content # 76566 ms -> 76.566 s # 115030 ms -> 115.030 s - # Let's assume a base time for the external system + # Let's assume a base time (e.g., Unix timestamp) for the external system base_time = 1678880000.0 return np.array([base_time + 76.566, base_time + 115.030]) +@pytest.fixture(scope="module") +def external_times_for_str_int(): + """Provides sample external times for offset calculation tests using ts_str_int.""" + # These correspond to the 'LEFT_PORT 1' event in sample_log_content + # 100559 ms -> 100.559 s + base_time = 1678880000.0 + # Needs enough events for check_n_events default (4), let's assume more exist conceptually + return np.array( + [ + base_time + 100.559, + base_time + 110.0, + base_time + 120.0, + base_time + 130.0, + ] + ) + + @pytest.fixture def temp_log_file(sample_log_content): """Creates a temporary log file with standard content and yields its path.""" @@ -91,11 +110,11 @@ def temp_log_file(sample_log_content): ) as tmp_file: tmp_file.write(sample_log_content) tmp_file_path = tmp_file.name - yield tmp_file_path + yield pathlib.Path(tmp_file_path) # Yield Path object os.remove(tmp_file_path) -# --- Tests for Level 1 Parsers --- +# --- Tests for Level 0 Helpers --- def test_parse_int(): @@ -106,7 +125,27 @@ def test_parse_int(): assert _parse_int("abc") is None assert _parse_int("12.3") is None assert _parse_int("") is None - assert _parse_int("123 ") is None + assert _parse_int("123 ") is None # Fails because of trailing space + + +def test_interpret_dio_mask(): + """Test the _interpret_DIO_mask function.""" + assert _interpret_DIO_mask(9, max_DIOs=8) == [1, 4] # Binary 1001 + assert _interpret_DIO_mask(0) == [] + assert _interpret_DIO_mask(None) == [] + assert _interpret_DIO_mask(pd.NA) == [] + assert _interpret_DIO_mask(1) == [1] + assert _interpret_DIO_mask(65536, max_DIOs=32) == [17] # 2^16 + assert _interpret_DIO_mask(65535, max_DIOs=16) == list( + range(1, 17) + ) # All 16 bits set + assert _interpret_DIO_mask(65535, max_DIOs=32) == list( + range(1, 17) + ) # Check max_DIOs limit + assert _interpret_DIO_mask("abc") == [] # Invalid input type + + +# --- Tests for Level 1 Parsers --- def test_parse_ts_int_int(): @@ -114,7 +153,7 @@ def test_parse_ts_int_int(): parts = ["8386500", "0", "0"] expected = { "type": "ts_int_int", - "trodes_timestamp": 8386500, + "timestamp": 8386500, # Raw timestamp key "value1": 0, "value2": 0, } @@ -135,7 +174,7 @@ def test_parse_ts_str_int(): parts = ["8386500", "DOWN", "3"] expected = { "type": "ts_str_int", - "trodes_timestamp": 8386500, + "timestamp": 8386500, # Raw timestamp key "text": "DOWN", "value": 3, } @@ -144,39 +183,41 @@ def test_parse_ts_str_int(): parts_wrong_len = ["123", "UP"] assert parse_ts_str_int(parts_wrong_len) is None + # This should be parsed by parse_ts_int_int due to precedence, + # so parse_ts_str_int should return None here because str part is int. parts_str_is_int = ["123", "456", "789"] - assert parse_ts_str_int(parts_str_is_int) is None # Should be handled by ts_int_int + assert parse_ts_str_int(parts_str_is_int) is None parts_val_not_int = ["123", "UP", "abc"] assert parse_ts_str_int(parts_val_not_int) is None def test_parse_ts_str_equals_int(): - """Test parse_ts_str_equals_int directly.""" + """Test parse_ts_str_equals_int directly. + NOTE: The code only handles a single word before '='. + """ parts = ["100078", "counter_handlePoke", "=", "1"] expected = { "type": "ts_str_equals_int", - "trodes_timestamp": 100078, - "text": "counter_handlePoke", + "timestamp": 100078, # Raw timestamp key + "text": "counter_handlePoke", # Correctly uses parts[1] "value": 1, } assert parse_ts_str_equals_int(parts) == expected + # This case is NOT handled by the current implementation (len(parts) != 4) parts_multi_word = ["3610855", "total", "rewards", "=", "70"] - expected_multi = { - "type": "ts_str_equals_int", - "trodes_timestamp": 3610855, - "text": "total rewards", - "value": 70, - } - assert parse_ts_str_equals_int(parts_multi_word) == expected_multi + assert parse_ts_str_equals_int(parts_multi_word) is None parts_wrong_len = ["123", "=", "1"] assert parse_ts_str_equals_int(parts_wrong_len) is None - parts_no_equals = ["123", "text", "1"] + parts_no_equals = ["123", "text", "1"] # len=3 != 4 assert parse_ts_str_equals_int(parts_no_equals) is None + parts_wrong_equals_pos = ["123", "text", "1", "="] # '=' is parts[3], not parts[2] + assert parse_ts_str_equals_int(parts_wrong_equals_pos) is None + parts_val_not_int = ["123", "text", "=", "abc"] assert parse_ts_str_equals_int(parts_val_not_int) is None @@ -186,7 +227,7 @@ def test_parse_ts_str(): parts = ["76566", "center_poke"] expected = { "type": "ts_str", - "trodes_timestamp": 76566, + "timestamp": 76566, # Raw timestamp key "text": "center_poke", } assert parse_ts_str(parts) == expected @@ -194,7 +235,7 @@ def test_parse_ts_str(): parts_multi_word = ["1271815", "some", "multi", "word", "event"] expected_multi = { "type": "ts_str", - "trodes_timestamp": 1271815, + "timestamp": 1271815, # Raw timestamp key "text": "some multi word event", } assert parse_ts_str(parts_multi_word) == expected_multi @@ -202,10 +243,8 @@ def test_parse_ts_str(): parts_wrong_len = ["123"] assert parse_ts_str(parts_wrong_len) is None - parts_second_is_int = [ - "123", - "456", - ] # Second part is int, should fail this parser + # Second part is int, should fail this parser (handled by ts_int_int or ts_str_int) + parts_second_is_int = ["123", "456"] assert parse_ts_str(parts_second_is_int) is None @@ -214,30 +253,39 @@ def test_parse_ts_str(): def test_parse_statescript_line_dispatching(): """Test parse_statescript_line dispatching for various line types.""" - lines_expected_types = [ - ("8386500 0 0", "ts_int_int"), - ("8386500 DOWN 3", "ts_str_int"), - ("100078 counter_handlePoke = 1", "ts_str_equals_int"), - ("76566 center_poke", "ts_str"), - ("Executing trigger function 22", "unknown"), - ("# comment", "comment_or_empty"), - ("", "comment_or_empty"), - (" ", "comment_or_empty"), - ("123 456 abc", "unknown"), # Doesn't fit ts_int_int because of 'abc' - ("123 abc def", "ts_str"), # Fits ts_str - ("456 123 = 5", "ts_str_equals_int"), # Fits this specific pattern + lines_expected = [ + ("8386500 0 0", "ts_int_int", 8386500), + ("100559 LEFT_PORT 1", "ts_str_int", 100559), + ("100078 counter_handlePoke = 1", "ts_str_equals_int", 100078), + ("76566 center_poke", "ts_str", 76566), + ("Executing trigger function 22", "unknown", None), # No timestamp + ("# comment", "comment_or_empty", None), + ("", "comment_or_empty", None), + (" ", "comment_or_empty", None), + ("123 456 abc", "unknown", None), # Doesn't fit ts_int_int/ts_str_int/ts_str + ("123 abc def", "ts_str", 123), # Fits ts_str + # Precedence: ts_str_equals_int matches first + ("456 text = 5", "ts_str_equals_int", 456), + # Precedence: ts_int_int matches first + ("8386500 128 512", "ts_int_int", 8386500), + # Precedence: ts_str_int matches (str 'UP' is not int) + ("90000 UP 10", "ts_str_int", 90000), + # Precedence: ts_str matches (str 'some text' is not int) + ("95000 some text here", "ts_str", 95000), ] - for line, expected_type in lines_expected_types: - parsed = parse_statescript_line(line) - assert parsed["type"] == expected_type - assert parsed["raw_line"] == line.strip() # parse_statescript_line strips + for i, (line, expected_type, expected_ts) in enumerate(lines_expected): + parsed = parse_statescript_line(line, line_num=i) + assert parsed["type"] == expected_type, f"Line: {line}" + assert parsed["raw_line"] == line.strip(), f"Line: {line}" + assert parsed["line_num"] == i, f"Line: {line}" + # Check timestamp presence/value based on type if expected_type not in ["unknown", "comment_or_empty"]: - assert "trodes_timestamp" in parsed + assert "timestamp" in parsed, f"Line: {line}" + assert parsed["timestamp"] == expected_ts, f"Line: {line}" else: - assert "trodes_timestamp" not in parsed or pd.isna( - parsed.get("trodes_timestamp") - ) + # Should explicitly contain timestamp: None for these types + assert parsed.get("timestamp") is None, f"Line: {line}" # --- Tests for StateScriptLogProcessor --- @@ -257,7 +305,7 @@ def test_init_from_file(temp_log_file, sample_log_content): processor_file = StateScriptLogProcessor.from_file(temp_log_file) assert processor_file.log_content == sample_log_content assert processor_file.source_description.startswith("from file:") - assert pathlib.Path(temp_log_file).name in processor_file.source_description + assert temp_log_file.name in processor_file.source_description def test_init_from_file_not_found(): @@ -271,78 +319,126 @@ def test_parse_raw_events(processor, sample_log_content): events = processor.parse_raw_events() assert processor.raw_events is events # Should store result internally assert isinstance(events, list) - assert len(events) == len( - sample_log_content.strip().splitlines() - ) # One dict per line + # Count lines in the fixture (includes comments, blanks if any) + num_lines = len(sample_log_content.strip().splitlines()) + assert len(events) == num_lines + + # Check specific lines based on fixture content + # Line 0: # Test log started assert events[0]["type"] == "comment_or_empty" + assert events[0]["line_num"] == 0 + assert events[0]["timestamp"] is None + # Line 1: 76504 0 0 assert events[1]["type"] == "ts_int_int" - assert events[7]["type"] == "unknown" # "Executing this line..." - assert events[9]["type"] == "comment_or_empty" # Last comment + assert events[1]["timestamp"] == 76504 + assert events[1]["value1"] == 0 + assert events[1]["line_num"] == 1 assert events[1]["raw_line"] == "76504 0 0" + # Line 7: Executing this line without timestamp + assert events[7]["type"] == "unknown" assert events[7]["raw_line"] == "Executing this line without timestamp" + assert events[7]["line_num"] == 7 + assert events[7]["timestamp"] is None + # Line 11: # Test log ended + assert events[11]["type"] == "comment_or_empty" + assert events[11]["line_num"] == 11 + assert events[11]["timestamp"] is None def test_find_reference_events(processor): """Test the internal _find_reference_events method.""" - # Case 1: Find 'ts_str' events + # Case 1: Find 'ts_str' events ('center_poke' appears twice) ref_df_str = processor._find_reference_events( event_type="ts_str", conditions={"text": "center_poke"} ) assert isinstance(ref_df_str, pd.DataFrame) assert len(ref_df_str) == 2 + # Check raw timestamp column (renamed from 'timestamp' in raw_events) pd.testing.assert_series_equal( - ref_df_str["trodes_timestamp"], - pd.Series([76566, 115030], name="trodes_timestamp"), - check_dtype=False, + ref_df_str["timestamp"], # Raw integer timestamp + pd.Series([76566, 115030], name="timestamp", dtype=int), + check_names=True, + check_dtype=True, ) + # Check calculated seconds column assert "trodes_timestamp_sec" in ref_df_str.columns - assert ref_df_str["trodes_timestamp_sec"].iloc[0] == pytest.approx(76.566) + pd.testing.assert_series_equal( + ref_df_str["trodes_timestamp_sec"], + pd.Series([76.566, 115.030], name="trodes_timestamp_sec", dtype=float), + check_names=True, + check_dtype=True, + ) + assert ref_df_str["text"].tolist() == ["center_poke", "center_poke"] - # Case 2: Find 'ts_int_int' events with specific values + # Case 2: Find 'ts_int_int' events with specific values (appears twice) ref_df_int = processor._find_reference_events( - event_type="ts_int_int", conditions={"value1": 4, "value2": 0} + event_type="ts_int_int", conditions={"value1": 65536, "value2": 0} ) - assert len(ref_df_int) == 1 - assert ref_df_int["trodes_timestamp"].iloc[0] == 100078 + assert len(ref_df_int) == 2 + assert ref_df_int["timestamp"].tolist() == [76566, 115030] + assert ref_df_int["value1"].tolist() == [65536, 65536] + assert ref_df_int["value2"].tolist() == [0, 0] + assert ref_df_int["trodes_timestamp_sec"].tolist() == [76.566, 115.030] + + # Case 3: Find 'ts_str_equals_int' (appears once) + ref_df_eq = processor._find_reference_events( + event_type="ts_str_equals_int", conditions={"text": "counter_handlePoke"} + ) + assert len(ref_df_eq) == 1 + assert ref_df_eq["timestamp"].iloc[0] == 100078 + assert ref_df_eq["text"].iloc[0] == "counter_handlePoke" + assert ref_df_eq["value"].iloc[0] == 1 + assert ref_df_eq["trodes_timestamp_sec"].iloc[0] == pytest.approx(100.078) - # Case 3: No matching events found + # Case 4: No matching events found ref_df_none = processor._find_reference_events( - event_type="ts_str_int", conditions={"text": "nonexistent"} + event_type="ts_str", conditions={"text": "nonexistent"} ) assert ref_df_none.empty assert isinstance(ref_df_none, pd.DataFrame) # Should still return DF + # Check expected columns exist even if empty + assert "timestamp" in ref_df_none.columns + assert "trodes_timestamp_sec" in ref_df_none.columns + assert "text" in ref_df_none.columns # From conditions - # Case 4: Ensure processor parses if raw_events is empty - processor.raw_events = [] + # Case 5: Ensure processor parses if raw_events is empty + processor.raw_events = [] # Reset raw events + assert processor.raw_events == [] ref_df_reparse = processor._find_reference_events( event_type="ts_str", conditions={"text": "center_poke"} ) - assert len(ref_df_reparse) == 2 # Should re-parse automatically + assert len(processor.raw_events) > 0 # Should have re-parsed + assert len(ref_df_reparse) == 2 # Should find the events def test_calculate_time_offset_success(processor, external_times): """Test successful time offset calculation.""" + # Use the 'ts_int_int' events matching external_times fixture offset = processor.calculate_time_offset( external_reference_times=external_times, - log_event_type="ts_int_int", # Use the events corresponding to external_times + log_event_type="ts_int_int", + # Use the keys from the raw parsed dict ('value1', 'value2') log_event_conditions={"value1": 65536, "value2": 0}, - check_n_events=2, # Use both events for matching + check_n_events=2, # Use both available matching events ) assert offset is not None assert processor.time_offset == offset # Check internal storage # Expected offset = external_base_time = 1678880000.0 - # external_times[0] = base + 76.566; log_times[0] = 76.566 + # external_times[0] = base + 76.566; log_times_sec[0] = 76.566 + # offset = external - log = base assert offset == pytest.approx(1678880000.0) -def test_calculate_time_offset_fail_not_enough_log(processor, external_times): +def test_calculate_time_offset_fail_not_enough_log( + processor, external_times_for_str_int +): """Test offset calculation failure due to insufficient log events.""" - # 'counter_handlePoke' only appears once, need 2 events + # 'LEFT_PORT 1' only appears once in the log, but default check_n_events=4 offset = processor.calculate_time_offset( - external_reference_times=external_times, - log_event_type="ts_str_equals_int", - log_event_conditions={"text": "counter_handlePoke"}, - check_n_events=2, + external_reference_times=external_times_for_str_int, # Has 4 times + log_event_type="ts_str_int", + log_event_conditions={"text": "LEFT_PORT", "value": 1}, + # check_n_events=4, # Default ) assert offset is None assert processor.time_offset is None # Should remain None @@ -350,27 +446,38 @@ def test_calculate_time_offset_fail_not_enough_log(processor, external_times): def test_calculate_time_offset_fail_not_enough_external(processor): """Test offset calculation failure due to insufficient external times.""" - # Only one external time provided, need 2 events + # Log has 2 '65536 0' events, provide only 1 external time, default check=4 offset = processor.calculate_time_offset( - external_reference_times=np.array([1678880076.566]), + external_reference_times=np.array([1678880076.566]), # Only 1 time log_event_type="ts_int_int", log_event_conditions={"value1": 65536, "value2": 0}, - check_n_events=2, + # check_n_events=4, # Default ) assert offset is None assert processor.time_offset is None + # Test again with check_n_events=2 (should still fail, need 2 external) + offset_check2 = processor.calculate_time_offset( + external_reference_times=np.array([1678880076.566]), # Only 1 time + log_event_type="ts_int_int", + log_event_conditions={"value1": 65536, "value2": 0}, + check_n_events=2, + ) + assert offset_check2 is None + assert processor.time_offset is None + def test_calculate_time_offset_fail_mismatch(processor, external_times): """Test offset calculation failure due to exceeding mismatch threshold.""" - # Shift external times slightly more than default threshold (0.1) - shifted_external_times = external_times + 0.06 # Total shift 0.12 over 2 events + # Shift external times enough to exceed default threshold (0.1) over 2 events + # Shift each by 0.06 -> total diff = 0.06 + 0.06 = 0.12 > 0.1 + shifted_external_times = external_times + 0.06 offset = processor.calculate_time_offset( external_reference_times=shifted_external_times, log_event_type="ts_int_int", log_event_conditions={"value1": 65536, "value2": 0}, check_n_events=2, - match_threshold=0.1, # Default threshold + match_threshold=0.1, # Explicitly set default for clarity ) assert offset is None assert processor.time_offset is None @@ -381,80 +488,177 @@ def test_get_events_dataframe_defaults(processor): df = processor.get_events_dataframe(apply_offset=False) assert processor.processed_events_df is df # Check internal storage assert isinstance(df, pd.DataFrame) - # Expected: 11 lines total - 3 comments - 1 unknown = 7 valid events - assert len(df) == 7 + # Expected: 12 lines total - 2 comments - 1 unknown = 9 valid events + assert len(df) == 9 + assert df.index.name == "line_num" # Index should be line_num + + # --- Check Columns --- assert "raw_line" in df.columns + assert "type" in df.columns assert "trodes_timestamp" in df.columns assert "trodes_timestamp_sec" in df.columns + assert "text" in df.columns + assert "value" in df.columns + assert "active_DIO_inputs_bitmask" in df.columns + assert "active_DIO_outputs_bitmask" in df.columns + assert "active_DIO_inputs" in df.columns # List column + assert "active_DIO_outputs" in df.columns # List column assert "timestamp_sync" not in df.columns # Offset not applied - # Check content and types - assert df["type"].iloc[0] == "ts_int_int" - assert df["raw_line"].iloc[0] == "76504 0 0" - assert pd.isna(df["text"].iloc[0]) # text NA for ts_int_int - assert df["value1"].iloc[0] == 0 - assert df["trodes_timestamp"].dtype == "int64" + + # --- Check Content and Types (spot check first few rows) --- + # Row index corresponds to line_num + # Line 1: 76504 0 0 (type: ts_int_int) -> line_num 1 + assert df.loc[1, "type"] == "ts_int_int" + assert df.loc[1, "raw_line"] == "76504 0 0" + assert df.loc[1, "trodes_timestamp"] == 76504 + assert df.loc[1, "trodes_timestamp_sec"] == pytest.approx(76.504) + assert pd.isna(df.loc[1, "text"]) + assert pd.isna(df.loc[1, "value"]) + assert df.loc[1, "active_DIO_inputs_bitmask"] == 0 + assert df.loc[1, "active_DIO_outputs_bitmask"] == 0 + assert df.loc[1, "active_DIO_inputs"] == [] + assert df.loc[1, "active_DIO_outputs"] == [] + + # Line 2: 76566 center_poke (type: ts_str) -> line_num 2 + assert df.loc[2, "type"] == "ts_str" + assert df.loc[2, "trodes_timestamp"] == 76566 + assert df.loc[2, "text"] == "center_poke" + assert pd.isna(df.loc[2, "value"]) + assert pd.isna(df.loc[2, "active_DIO_inputs_bitmask"]) + assert pd.isna(df.loc[2, "active_DIO_outputs_bitmask"]) + assert df.loc[2, "active_DIO_inputs"] == [] # Should be empty list from NA mask + assert df.loc[2, "active_DIO_outputs"] == [] # Should be empty list from NA mask + + # Line 3: 76566 65536 0 (type: ts_int_int) -> line_num 3 + assert df.loc[3, "type"] == "ts_int_int" + assert df.loc[3, "trodes_timestamp"] == 76566 + assert df.loc[3, "active_DIO_inputs_bitmask"] == 65536 # DIO 17 + assert df.loc[3, "active_DIO_outputs_bitmask"] == 0 + assert df.loc[3, "active_DIO_inputs"] == [17] # Check interpretation + assert df.loc[3, "active_DIO_outputs"] == [] + + # Line 4: 100078 counter_handlePoke = 1 (type: ts_str_equals_int) -> line_num 4 + assert df.loc[4, "type"] == "ts_str_equals_int" + assert df.loc[4, "trodes_timestamp"] == 100078 + assert df.loc[4, "text"] == "counter_handlePoke" + assert df.loc[4, "value"] == 1 + assert pd.isna(df.loc[4, "active_DIO_inputs_bitmask"]) + + # Line 6: 100559 LEFT_PORT 1 (type: ts_str_int) -> line_num 6 + assert df.loc[6, "type"] == "ts_str_int" + assert df.loc[6, "trodes_timestamp"] == 100559 + assert df.loc[6, "text"] == "LEFT_PORT" + assert df.loc[6, "value"] == 1 + assert pd.isna(df.loc[6, "active_DIO_inputs_bitmask"]) + + # --- Check Dtypes --- + assert df["trodes_timestamp"].dtype == pd.Int64Dtype() # Nullable int assert df["trodes_timestamp_sec"].dtype == "float64" - assert df["value"].dtype == pd.Int64Dtype() # Nullable Integer + assert df["text"].dtype == "object" # String/mixed + assert df["value"].dtype == pd.Int64Dtype() + assert df["active_DIO_inputs_bitmask"].dtype == pd.Int64Dtype() + assert df["active_DIO_outputs_bitmask"].dtype == pd.Int64Dtype() + assert df["active_DIO_inputs"].dtype == "object" # List type + assert df["active_DIO_outputs"].dtype == "object" # List type -def test_get_events_dataframe_include_all(processor): +def test_get_events_dataframe_include_all(processor, sample_log_content): """Test including comments and unknown lines.""" df = processor.get_events_dataframe( apply_offset=False, exclude_comments_unknown=False ) assert isinstance(df, pd.DataFrame) - assert len(df) == 10 # All lines included - assert df["type"].iloc[0] == "comment_or_empty" - assert df["type"].iloc[7] == "unknown" - assert df["raw_line"].iloc[7] == "Executing this line without timestamp" - # Check that timestamp is NA/0 for lines without one - assert ( - pd.isna(df["trodes_timestamp"].iloc[0]) or df["trodes_timestamp"].iloc[0] == 0 - ) - assert ( - pd.isna(df["trodes_timestamp"].iloc[7]) or df["trodes_timestamp"].iloc[7] == 0 - ) - assert pd.isna(df["trodes_timestamp_sec"].iloc[0]) or np.isnan( - df["trodes_timestamp_sec"].iloc[0] - ) - assert pd.isna(df["trodes_timestamp_sec"].iloc[7]) or np.isnan( - df["trodes_timestamp_sec"].iloc[7] - ) + num_lines = len(sample_log_content.strip().splitlines()) + assert len(df) == num_lines # All lines included (12) + assert df.index.name == "line_num" + + # Check specific lines + # Line 0: Comment + assert df.loc[0, "type"] == "comment_or_empty" + assert df.loc[0, "raw_line"] == "# Test log started" + assert pd.isna(df.loc[0, "trodes_timestamp"]) # Should be NA (Int64Dtype) + assert np.isnan(df.loc[0, "trodes_timestamp_sec"]) # Should be NaN (float) + assert pd.isna(df.loc[0, "text"]) # Should be NA + assert df.loc[0, "active_DIO_inputs"] == [] # Should be empty list for comment + + # Line 7: Unknown + assert df.loc[7, "type"] == "unknown" + assert df.loc[7, "raw_line"] == "Executing this line without timestamp" + assert pd.isna(df.loc[7, "trodes_timestamp"]) + assert np.isnan(df.loc[7, "trodes_timestamp_sec"]) + assert pd.isna(df.loc[7, "text"]) + assert df.loc[7, "active_DIO_inputs"] == [] + + # Line 11: Comment + assert df.loc[11, "type"] == "comment_or_empty" + assert df.loc[11, "raw_line"] == "# Test log ended" + assert pd.isna(df.loc[11, "trodes_timestamp"]) + + # Check a valid line still looks right + assert df.loc[1, "type"] == "ts_int_int" + assert df.loc[1, "trodes_timestamp"] == 76504 def test_get_events_dataframe_with_offset(processor): """Test applying offset and check sync timestamp calculation.""" # Simulate successful offset calculation - processor.time_offset = 1678880000.0 + test_offset = 1678880000.0 + processor.time_offset = test_offset df = processor.get_events_dataframe(apply_offset=True) # Default exclude=True assert isinstance(df, pd.DataFrame) - assert len(df) == 7 # Excludes comments/unknown + assert len(df) == 9 # Excludes comments/unknown + assert df.index.name == "line_num" assert "timestamp_sync" in df.columns - # Check calculation for the first valid event (76504 ms) - expected_sync_time = (76504 / 1000.0) + 1678880000.0 - assert df["timestamp_sync"].iloc[0] == pytest.approx(expected_sync_time) - # Check NA value handling in other columns remains correct - assert pd.isna(df["text"].iloc[0]) - assert df["value1"].iloc[0] == 0 assert df["timestamp_sync"].dtype == "float64" + # Check calculation for a few events + # Line 1: 76504 ms + expected_sync_1 = (76504 / 1000.0) + test_offset + assert df.loc[1, "timestamp_sync"] == pytest.approx(expected_sync_1) + + # Line 3: 76566 ms + expected_sync_3 = (76566 / 1000.0) + test_offset + assert df.loc[3, "timestamp_sync"] == pytest.approx(expected_sync_3) + + # Line 9: 115030 ms + expected_sync_9 = (115030 / 1000.0) + test_offset + assert df.loc[9, "timestamp_sync"] == pytest.approx(expected_sync_9) + + # Check NA value handling in other columns remains correct + assert pd.isna(df.loc[1, "text"]) + assert df.loc[1, "active_DIO_inputs_bitmask"] == 0 + assert df.loc[3, "active_DIO_inputs"] == [17] + -def test_get_events_dataframe_offset_not_calculated(processor, capsys): - """Test applying offset when offset is None.""" +def test_get_events_dataframe_apply_offset_not_calculated(processor, capsys): + """Test applying offset when offset is None generates warning and no column.""" processor.time_offset = None # Ensure no offset is set - df = processor.get_events_dataframe(apply_offset=True) + df = processor.get_events_dataframe(apply_offset=True) # Request offset application assert isinstance(df, pd.DataFrame) assert "timestamp_sync" not in df.columns # Sync column should be absent - assert len(df) == 7 # Should still return the dataframe without the column + assert len(df) == 9 # Should still return the dataframe without the column + assert df.index.name == "line_num" - # Check that the warning was printed to stderr/stdout + # Check that the warning was printed captured = capsys.readouterr() assert ( - "Warning: Time offset requested but not calculated" in captured.out - or "Warning: Time offset requested but not calculated" in captured.err + "Warning: Time offset application requested" in captured.out + or "Warning: Time offset application requested" in captured.err ) +def test_get_events_dataframe_no_apply_offset_calculated(processor): + """Test apply_offset=False ignores existing offset.""" + processor.time_offset = 1000.0 # Set an offset + df = processor.get_events_dataframe( + apply_offset=False + ) # Request NO offset application + assert isinstance(df, pd.DataFrame) + assert "timestamp_sync" not in df.columns # Sync column should be absent + assert len(df) == 9 + assert df.index.name == "line_num" + + def test_empty_log(empty_processor): """Test processing an empty log file.""" events = empty_processor.parse_raw_events() @@ -462,6 +666,8 @@ def test_empty_log(empty_processor): df = empty_processor.get_events_dataframe() assert isinstance(df, pd.DataFrame) assert df.empty + # An empty dataframe doesn't have an index name set + assert df.index.name is None def test_comment_only_log(comment_only_processor): @@ -469,11 +675,13 @@ def test_comment_only_log(comment_only_processor): events = comment_only_processor.parse_raw_events() assert len(events) == 4 # 4 lines in the fixture assert all(e["type"] == "comment_or_empty" for e in events) + assert all(e["timestamp"] is None for e in events) # Default: exclude comments -> empty DataFrame df_excluded = comment_only_processor.get_events_dataframe(apply_offset=False) assert isinstance(df_excluded, pd.DataFrame) assert df_excluded.empty + assert df_excluded.index.name is None # Include comments -> DataFrame with only comment entries df_included = comment_only_processor.get_events_dataframe( @@ -481,36 +689,38 @@ def test_comment_only_log(comment_only_processor): ) assert isinstance(df_included, pd.DataFrame) assert len(df_included) == 4 + assert df_included.index.name == "line_num" assert all(df_included["type"] == "comment_or_empty") - assert ( - pd.isna(df_included["trodes_timestamp"].iloc[0]) - or df_included["trodes_timestamp"].iloc[0] == 0 - ) + assert df_included["trodes_timestamp"].isna().all() + assert df_included["trodes_timestamp_sec"].isna().all() + assert df_included["active_DIO_inputs"].apply(lambda x: x == []).all() def test_repr(processor): - """Test the __repr__ method.""" + """Test the __repr__ method reflects state.""" # Initial state initial_repr = repr(processor) assert isinstance(initial_repr, str) - assert "StateScriptLogProcessor" in initial_repr - assert "not parsed" in initial_repr - assert "no offset" in initial_repr - assert "not generated" in initial_repr + assert "StateScriptLogProcessor" in html_initial + assert "Status: Not Parsed" in html_initial + assert "Offset: Not Calculated" in html_initial + assert "DataFrame: Not Generated" in html_initial + assert "Source: from string" in html_initial + assert "DataFrame Preview" not in html_initial # No preview yet + # After parsing processor.parse_raw_events() + num_raw = len(processor.raw_events) html_parsed = processor._repr_html_() assert isinstance(html_parsed, str) - assert "Parsed" in html_parsed - assert f"({len(processor.raw_events)} raw entries)" in html_parsed + assert "Status: Parsed" in html_parsed + assert f"({num_raw} raw entries)" in html_parsed + assert "Offset: Not Calculated" in html_parsed + assert "DataFrame: Not Generated" in html_parsed - processor.time_offset = 1000.0 + # After offset calculation + processor.time_offset = 1234.5678 html_offset = processor._repr_html_() assert isinstance(html_offset, str) - assert "Offset: 1000.0" in html_offset + assert "Offset: 1234.5678s" in html_offset # Check formatting + assert "DataFrame: Not Generated" in html_offset + # After DataFrame generation processor.get_events_dataframe() html_df = processor._repr_html_() assert isinstance(html_df, str) assert "DataFrame: Generated" in html_df - assert "DataFrame Preview" in html_df # Check for preview section + assert ( + "
DataFrame Preview (first 5 rows):
" in html_df + ) # Check for preview section + assert " Date: Mon, 28 Apr 2025 13:00:54 -0400 Subject: [PATCH 06/23] Fix tests --- src/trodes_to_nwb/convert_statescript.py | 4 ++-- .../tests/test_convert_statescript.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index e7bf8a7..b81e5a1 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -149,8 +149,8 @@ def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]: # Check length and presence of '=' in the correct position if len(parts) == 4 and parts[2] == "=": timestamp = _parse_int(parts[0]) - value = _parse_int(parts[-1]) # Expect integer value only - text = parts[3] + text = parts[1] + value = _parse_int(parts[3]) # Expect integer value only # Check if timestamp and value were successfully parsed as integers if timestamp is not None and value is not None: diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py index 14e916f..29e3a40 100644 --- a/src/trodes_to_nwb/tests/test_convert_statescript.py +++ b/src/trodes_to_nwb/tests/test_convert_statescript.py @@ -51,7 +51,7 @@ def comment_only_log_content(): # Middle line # End - """ +""" @pytest.fixture @@ -91,7 +91,6 @@ def external_times_for_str_int(): # These correspond to the 'LEFT_PORT 1' event in sample_log_content # 100559 ms -> 100.559 s base_time = 1678880000.0 - # Needs enough events for check_n_events default (4), let's assume more exist conceptually return np.array( [ base_time + 100.559, @@ -110,7 +109,7 @@ def temp_log_file(sample_log_content): ) as tmp_file: tmp_file.write(sample_log_content) tmp_file_path = tmp_file.name - yield pathlib.Path(tmp_file_path) # Yield Path object + yield pathlib.Path(tmp_file_path) os.remove(tmp_file_path) @@ -125,7 +124,6 @@ def test_parse_int(): assert _parse_int("abc") is None assert _parse_int("12.3") is None assert _parse_int("") is None - assert _parse_int("123 ") is None # Fails because of trailing space def test_interpret_dio_mask(): @@ -153,7 +151,7 @@ def test_parse_ts_int_int(): parts = ["8386500", "0", "0"] expected = { "type": "ts_int_int", - "timestamp": 8386500, # Raw timestamp key + "timestamp": 8386500, "value1": 0, "value2": 0, } @@ -174,7 +172,7 @@ def test_parse_ts_str_int(): parts = ["8386500", "DOWN", "3"] expected = { "type": "ts_str_int", - "timestamp": 8386500, # Raw timestamp key + "timestamp": 8386500, "text": "DOWN", "value": 3, } @@ -227,7 +225,7 @@ def test_parse_ts_str(): parts = ["76566", "center_poke"] expected = { "type": "ts_str", - "timestamp": 76566, # Raw timestamp key + "timestamp": 76566, "text": "center_poke", } assert parse_ts_str(parts) == expected @@ -235,7 +233,7 @@ def test_parse_ts_str(): parts_multi_word = ["1271815", "some", "multi", "word", "event"] expected_multi = { "type": "ts_str", - "timestamp": 1271815, # Raw timestamp key + "timestamp": 1271815, "text": "some multi word event", } assert parse_ts_str(parts_multi_word) == expected_multi From acca16423caa3650c055641d6de94c56e29c27f3 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 18:09:57 -0400 Subject: [PATCH 07/23] Fix name --- src/trodes_to_nwb/convert_statescript.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index b81e5a1..b920ba1 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -618,7 +618,7 @@ def _find_reference_events( # Iterate through all parsed raw events for event in self.raw_events: # Check if the event type matches and it has a timestamp - if event.get("type") == event_type and "trodes_timestamp" in event: + if event.get("type") == event_type and "timestamp" in event: # Check if all specified conditions are met for this event match = all( event.get(key) == value for key, value in conditions.items() From 550231506575e80ee8c87f554e46df3a481ac7fc Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 18:10:13 -0400 Subject: [PATCH 08/23] Not expected to have active_DIO_inputs --- src/trodes_to_nwb/tests/test_convert_statescript.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py index 29e3a40..0e0a3b5 100644 --- a/src/trodes_to_nwb/tests/test_convert_statescript.py +++ b/src/trodes_to_nwb/tests/test_convert_statescript.py @@ -691,7 +691,6 @@ def test_comment_only_log(comment_only_processor): assert all(df_included["type"] == "comment_or_empty") assert df_included["trodes_timestamp"].isna().all() assert df_included["trodes_timestamp_sec"].isna().all() - assert df_included["active_DIO_inputs"].apply(lambda x: x == []).all() def test_repr(processor): From 42d0c408ca7fb8cb3c5301a20da1c76c03306e72 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 18:12:35 -0400 Subject: [PATCH 09/23] Check for bitmask columns --- src/trodes_to_nwb/convert_statescript.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index b920ba1..1c21216 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -904,12 +904,14 @@ def get_events_dataframe( "value2": "active_DIO_outputs_bitmask", } ) - df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply( - lambda mask: _interpret_DIO_mask(mask, max_DIOs) - ) - df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply( - lambda mask: _interpret_DIO_mask(mask, max_DIOs) - ) + if "active_DIO_inputs" in df.columns: + df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply( + lambda mask: _interpret_DIO_mask(mask, max_DIOs) + ) + if "active_DIO_outputs" in df.columns: + df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply( + lambda mask: _interpret_DIO_mask(mask, max_DIOs) + ) # --- Timestamp Processing --- # Ensure 'timestamp' column exists and convert to numeric/int From b0e2663761ef7b30fa2e53aeb70369cd292613bc Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 18:14:45 -0400 Subject: [PATCH 10/23] Fix examples --- src/trodes_to_nwb/convert_statescript.py | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 1c21216..3ffc856 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -300,9 +300,9 @@ def _interpret_DIO_mask( Example ------- - >>> interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4 + >>> _interpret_DIO_mask(9) # 1001 in binary -> Ports 1 and 4 [1, 4] - >>> interpret_DIO_mask(65536) # 2^16 -> Port 17 + >>> _interpret_DIO_mask(65536) # 2^16 -> Port 17 [17] """ if pd.isna(DIO_state_value) or DIO_state_value == 0: @@ -367,19 +367,19 @@ class StateScriptLogProcessor: Example Usage ------------- - >>> # Load from file - >>> processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog") - >>> # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds) - >>> # corresponding to the log event "DIO Pin 8 going UP" - >>> processor.calculate_time_offset( - ... external_reference_times=external_sync_times, - ... log_event_type="ts_str_int", - ... log_event_conditions={"text": "UP", "value": 8} - ... ) - >>> # Get the processed DataFrame with synchronized timestamps - >>> df = processor.get_events_dataframe(apply_offset=True) - >>> if df is not None: - ... print(df[['timestamp_sync', 'type', 'text', 'value']].head()) + # Load from file + processor = StateScriptLogProcessor.from_file("path/to/session.stateScriptLog") + # Assuming 'external_sync_times' is a numpy array of timestamps (in seconds) + # corresponding to the log event "DIO Pin 8 going UP" + processor.calculate_time_offset( + external_reference_times=external_sync_times, + log_event_type="ts_str_int", + log_event_conditions={"text": "UP", "value": 8} + ) + # Get the processed DataFrame with synchronized timestamps + df = processor.get_events_dataframe(apply_offset=True) + if df is not None: + print(df[['timestamp_sync', 'type', 'text', 'value']].head()) """ MILLISECONDS_PER_SECOND = 1000 From 1eddb74c737a47fccc527b1462f4d2e06ce90b8d Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 18:35:38 -0400 Subject: [PATCH 11/23] Fix name --- src/trodes_to_nwb/convert_statescript.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 3ffc856..f7a8687 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -904,11 +904,11 @@ def get_events_dataframe( "value2": "active_DIO_outputs_bitmask", } ) - if "active_DIO_inputs" in df.columns: + if "active_DIO_inputs_bitmask" in df.columns: df["active_DIO_inputs"] = df["active_DIO_inputs_bitmask"].apply( lambda mask: _interpret_DIO_mask(mask, max_DIOs) ) - if "active_DIO_outputs" in df.columns: + if "active_DIO_outputs_bitmask" in df.columns: df["active_DIO_outputs"] = df["active_DIO_outputs_bitmask"].apply( lambda mask: _interpret_DIO_mask(mask, max_DIOs) ) From 962e60644b06289535580c521ebf46bfc68a3a60 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 21:48:36 -0400 Subject: [PATCH 12/23] Fix test --- src/trodes_to_nwb/tests/test_convert_statescript.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py index 0e0a3b5..aace40c 100644 --- a/src/trodes_to_nwb/tests/test_convert_statescript.py +++ b/src/trodes_to_nwb/tests/test_convert_statescript.py @@ -467,9 +467,10 @@ def test_calculate_time_offset_fail_not_enough_external(processor): def test_calculate_time_offset_fail_mismatch(processor, external_times): """Test offset calculation failure due to exceeding mismatch threshold.""" - # Shift external times enough to exceed default threshold (0.1) over 2 events - # Shift each by 0.06 -> total diff = 0.06 + 0.06 = 0.12 > 0.1 - shifted_external_times = external_times + 0.06 + # Shift external times enough to exceed default threshold (0.1) on + # the second event + shifted_external_times = external_times + shifted_external_times[1] += 0.2 # Shift the second time by 0.2 seconds offset = processor.calculate_time_offset( external_reference_times=shifted_external_times, log_event_type="ts_int_int", From 1dcf3de1473175cfb37308b83086706b34150238 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 22:18:02 -0400 Subject: [PATCH 13/23] Fix name --- src/trodes_to_nwb/tests/test_convert_statescript.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/trodes_to_nwb/tests/test_convert_statescript.py b/src/trodes_to_nwb/tests/test_convert_statescript.py index aace40c..47838c3 100644 --- a/src/trodes_to_nwb/tests/test_convert_statescript.py +++ b/src/trodes_to_nwb/tests/test_convert_statescript.py @@ -395,7 +395,7 @@ def test_find_reference_events(processor): assert ref_df_none.empty assert isinstance(ref_df_none, pd.DataFrame) # Should still return DF # Check expected columns exist even if empty - assert "timestamp" in ref_df_none.columns + assert "trodes_timestamp" in ref_df_none.columns assert "trodes_timestamp_sec" in ref_df_none.columns assert "text" in ref_df_none.columns # From conditions @@ -470,6 +470,8 @@ def test_calculate_time_offset_fail_mismatch(processor, external_times): # Shift external times enough to exceed default threshold (0.1) on # the second event shifted_external_times = external_times + # External times are not a good stable reference + # because the second one is shifted shifted_external_times[1] += 0.2 # Shift the second time by 0.2 seconds offset = processor.calculate_time_offset( external_reference_times=shifted_external_times, From eb87814587ce79cc9c7cfba5471667238b0a6140 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Mon, 28 Apr 2025 22:42:13 -0400 Subject: [PATCH 14/23] Add docstring --- src/trodes_to_nwb/convert_statescript.py | 60 ++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index f7a8687..0545d52 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -1,3 +1,63 @@ +"""StateScript log parsing and processing module. + +This module provides tools for parsing, interpreting, and processing `.stateScriptLog` +files generated by the Trodes neural recording system. It handles the conversion +of Trodes timestamps, alignment with external time sources, interpretation of +Digital Input/Output (DIO) states, and processing of various common log line formats. + +Notes +----- +Source Files: + - Log files parsed by this module typically have the `.stateScriptLog` extension. + - These files are generated by the Trodes software during data acquisition sessions. + +Timestamp Information: + - The primary timestamp (``) found in these logs is a 64-bit integer. + - It represents the number of milliseconds elapsed since the start of the + Trodes recording session. + - This is often referred to as the 'Trodes timestamp'. + +Log Line Formats: + `.stateScriptLog` files usually contain lines adhering to several common formats. + The module aims to parse lines matching these structures: + + ``ts_int_int`` : ` ` + Represents timestamp and two integers. These integers often function as + bitwise masks representing the state of DIO pins. + Example: ``1817158 128 512`` + + ``ts_str_int`` : ` ` + Represents timestamp, a string label, and an integer value. Frequently + used for user-defined messages logging DIO pin state changes (e.g., pin name and state). + Example: ``8386500 DOWN 3`` + + ``ts_str_eq_int`` : ` = ` + Represents timestamp and a named integer variable assignment, useful for + tracking counters or state variables within the StateScript. + Example: ``3610855 totRewards = 70`` + + ``ts_str`` : ` ` + Represents timestamp followed by one or more space-separated strings. + Commonly used for logging event markers or descriptive text messages. + Example: ``1678886401 LOCKEND`` + + ``comment_or_empty`` : Lines starting with `#` or completely empty lines. + Lines starting with '#' are treated as comments. Empty lines may also occur. + These are typically ignored during data extraction. + Example: ``# Starting new trial block`` + + ``unknown`` : Lines that do not conform to the patterns listed above. + These might include initial header lines, formatting variations, or unexpected entries. + Example: ``initiated`` + +Component Definitions: + - ````: 64-bit integer; milliseconds since session start (Trodes timestamp). + - ````: Integer value; often used as a bitwise mask for DIO pin states. + - ````: String value; can represent an event name, variable name, message component, etc. + - ````: Denotes one or more space-separated strings. + +""" + import pathlib from typing import Any, Dict, List, Optional, Type, TypeVar, Union From 9f1ed1a658bd226c59ee90e1636ed8441a0a132a Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 29 Apr 2025 09:20:16 -0400 Subject: [PATCH 15/23] Minor edits to docstrings --- src/trodes_to_nwb/convert_statescript.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 0545d52..21fefac 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -1,7 +1,7 @@ """StateScript log parsing and processing module. This module provides tools for parsing, interpreting, and processing `.stateScriptLog` -files generated by the Trodes neural recording system. It handles the conversion +files generated by Trodes. It handles the conversion of Trodes timestamps, alignment with external time sources, interpretation of Digital Input/Output (DIO) states, and processing of various common log line formats. @@ -9,7 +9,7 @@ ----- Source Files: - Log files parsed by this module typically have the `.stateScriptLog` extension. - - These files are generated by the Trodes software during data acquisition sessions. + - These files are generated by Trodes during data acquisition sessions. Timestamp Information: - The primary timestamp (``) found in these logs is a 64-bit integer. @@ -83,11 +83,6 @@ def _parse_int(s: str) -> Optional[int]: ------- Optional[int] The parsed integer, or None if parsing fails. - - Raises - ------ - ValueError - If the string cannot be converted to an integer. """ try: return int(s) @@ -196,7 +191,6 @@ def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]: ---------- parts : list A list of strings obtained by splitting a log line by whitespace. - Expected to contain 4 parts, with '=' as the second part. Returns ------- From d605914953c00279ac14b909d8425d09a83a1a97 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 29 Apr 2025 09:20:37 -0400 Subject: [PATCH 16/23] Use Int64Dtype --- src/trodes_to_nwb/convert_statescript.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 21fefac..3415533 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -710,8 +710,9 @@ def _find_reference_events( try: if isinstance(value, int): # Convert column to numeric, then integer (handles potential errors) - df[key] = pd.to_numeric(df[key], errors="coerce").astype(int) - # Add elif for float, bool etc. if needed + df[key] = pd.to_numeric(df[key], errors="coerce").astype( + pd.Int64Dtype() + ) except (ValueError, TypeError): # Ignore casting errors if conversion isn't possible pass From 4304051b27d8139bbe1a6ba21d7bb6ef92cd09b6 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 29 Apr 2025 09:20:57 -0400 Subject: [PATCH 17/23] Minor edit --- src/trodes_to_nwb/convert_statescript.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 3415533..77a27d8 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -191,6 +191,7 @@ def parse_ts_str_equals_int(parts: list) -> Optional[Dict[str, Any]]: ---------- parts : list A list of strings obtained by splitting a log line by whitespace. + Expected to contain 4 parts, with '=' as the third part. Returns ------- From 9a89ae5485f650f88ea453bcdb487192b4160831 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 29 Apr 2025 09:21:35 -0400 Subject: [PATCH 18/23] Exclude int int by default --- src/trodes_to_nwb/convert_statescript.py | 26 ++++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 77a27d8..7f03c95 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -878,6 +878,7 @@ def get_events_dataframe( self, apply_offset: bool = True, exclude_comments_unknown: bool = True, + exclude_int_int: bool = True, max_DIOs: int = 32, ) -> pd.DataFrame: """Constructs and returns a pandas DataFrame from the parsed log events. @@ -892,6 +893,14 @@ def get_events_dataframe( If True (default), lines parsed as 'comment_or_empty' or 'unknown' are excluded from the DataFrame. If False, all entries from `raw_events` are included (potentially useful for debugging parsing). + exclude_int_int : bool, optional + If True (default), lines parsed as 'ts_int_int' are excluded from + the DataFrame. These are often used for DIO state changes and may not + be relevant for most analyses. + max_DIOs : int, optional + The maximum number of DIOs to consider when interpreting bitmasks + for active DIO inputs/outputs. Default is 32. This is used to + determine the number of bits to check in the bitmask values. Returns ------- @@ -920,16 +929,15 @@ def get_events_dataframe( return self.processed_events_df # Determine which event types to filter out + exclude_types = [] if exclude_comments_unknown: - exclude_types = ("comment_or_empty", "unknown") - filtered_events = [ - event - for event in self.raw_events - if event.get("type") not in exclude_types - ] - else: - # Include all event types if not excluding - filtered_events = self.raw_events + exclude_types += ["comment_or_empty", "unknown"] + if exclude_int_int: + exclude_types += ["ts_int_int"] + + filtered_events = [ + event for event in self.raw_events if event.get("type") not in exclude_types + ] # Handle case where filtering leaves no events if not filtered_events: From ce58d140b445c655baf219ac4b64194b16e2f7ad Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 29 Apr 2025 10:06:24 -0400 Subject: [PATCH 19/23] false by default --- src/trodes_to_nwb/convert_statescript.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 7f03c95..e66b607 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -878,7 +878,7 @@ def get_events_dataframe( self, apply_offset: bool = True, exclude_comments_unknown: bool = True, - exclude_int_int: bool = True, + exclude_int_int: bool = False, max_DIOs: int = 32, ) -> pd.DataFrame: """Constructs and returns a pandas DataFrame from the parsed log events. @@ -894,7 +894,7 @@ def get_events_dataframe( are excluded from the DataFrame. If False, all entries from `raw_events` are included (potentially useful for debugging parsing). exclude_int_int : bool, optional - If True (default), lines parsed as 'ts_int_int' are excluded from + If True, lines parsed as 'ts_int_int' are excluded from the DataFrame. These are often used for DIO state changes and may not be relevant for most analyses. max_DIOs : int, optional From 180e804a9e1bffa7e26fe8629bd2510045c49b4b Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Wed, 30 Apr 2025 10:58:26 -0700 Subject: [PATCH 20/23] Update src/trodes_to_nwb/convert_statescript.py Co-authored-by: Samuel Bray --- src/trodes_to_nwb/convert_statescript.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index e66b607..977fa71 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -116,17 +116,18 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]: if the line matches the expected structure and all parts are valid integers. Returns None otherwise. """ - if len(parts) == 3: - # Attempt to parse all three parts as integers - timestamp, val1, val2 = [_parse_int(part) for part in parts] + if len(parts) != 3: + return + # Attempt to parse all three parts as integers + int_parts = [_parse_int(part) for part in parts] - # Check if all parsing attempts were successful - if timestamp is not None and val1 is not None and val2 is not None: - return { - "type": "ts_int_int", - "timestamp": timestamp, - "value1": val1, - "value2": val2, + # Check if all parsing attempts were successful + if all([part is not None for part in int_parts]): + return { + "type": "ts_int_int", + "timestamp": part[0], + "value1": part[1], + "value2": part[2], } From bbe68f06a83e6c7b6795ef96ce353217b3973826 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Wed, 30 Apr 2025 10:58:53 -0700 Subject: [PATCH 21/23] Update src/trodes_to_nwb/convert_statescript.py Co-authored-by: Samuel Bray --- src/trodes_to_nwb/convert_statescript.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 977fa71..3911d4b 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -99,8 +99,6 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]: Example: 8386500 0 0 -> {'ts': 8386500, 'value1': 0, 'value2': 0} - 1817158 128 512 -> {'ts': 1817158, 'value1': 128, 'value2': 512} - 76566 65536 0 -> {'ts': 76566, 'value1': 65536, 'value2': 0} Parameters ---------- From 5b1b611e34dccfdea0d08c3f5ee5bef4ddaaadc6 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Wed, 30 Apr 2025 15:14:55 -0400 Subject: [PATCH 22/23] Fix linting --- src/trodes_to_nwb/convert_statescript.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index 3911d4b..f407347 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -126,7 +126,7 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]: "timestamp": part[0], "value1": part[1], "value2": part[2], - } + } def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]: From b300cb95cd0ae6363e1105d0c1b31c1e3d7a890f Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Wed, 30 Apr 2025 16:31:48 -0400 Subject: [PATCH 23/23] Update convert_statescript.py --- src/trodes_to_nwb/convert_statescript.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/trodes_to_nwb/convert_statescript.py b/src/trodes_to_nwb/convert_statescript.py index f407347..b2c8f57 100644 --- a/src/trodes_to_nwb/convert_statescript.py +++ b/src/trodes_to_nwb/convert_statescript.py @@ -114,19 +114,18 @@ def parse_ts_int_int(parts: list) -> Optional[Dict[str, Any]]: if the line matches the expected structure and all parts are valid integers. Returns None otherwise. """ - if len(parts) != 3: - return - # Attempt to parse all three parts as integers - int_parts = [_parse_int(part) for part in parts] + if len(parts) == 3: + # Attempt to parse all three parts as integers + timestamp, val1, val2 = [_parse_int(part) for part in parts] - # Check if all parsing attempts were successful - if all([part is not None for part in int_parts]): - return { - "type": "ts_int_int", - "timestamp": part[0], - "value1": part[1], - "value2": part[2], - } + # Check if all parsing attempts were successful + if timestamp is not None and val1 is not None and val2 is not None: + return { + "type": "ts_int_int", + "timestamp": timestamp, + "value1": val1, + "value2": val2, + } def parse_ts_str_int(parts: list) -> Optional[Dict[str, Any]]: