siliconlad
diff --git a/‎src/pybag/bag_writer.py‎
Lines changed: 32 additions & 6 deletions b/‎src/pybag/bag_writer.py‎
Lines changed: 32 additions & 6 deletions
diff --git a/‎src/pybag/io/raw_writer.py‎
Lines changed: 1 addition & 1 deletion b/‎src/pybag/io/raw_writer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pybag/schema/ros1msg.py‎
Lines changed: 137 additions & 12 deletions b/‎src/pybag/schema/ros1msg.py‎
Lines changed: 137 additions & 12 deletions
@@ -68,9 +68,13 @@ def __init__(
         self._chunk_end_time_sec: int | None = None
         self._chunk_end_time_nsec: int | None = None
         self._chunk_message_counts: dict[int, int] = {}
+        # Index entries for current chunk: conn_id -> [(time_sec, time_nsec, offset)]
+        self._chunk_index_entries: dict[int, list[tuple[int, int, int]]] = {}
 
         # Chunk info records (for summary)
         self._chunk_infos: list[ChunkInfoRecord] = []
+        # Index data for all chunks: list of (conn_id, entries) per chunk
+        self._all_index_data: list[list[tuple[int, list[tuple[int, int, int]]]]] = []
 
         # Write initial file structure
         self._write_header()
@@ -216,6 +220,9 @@ def write_message(
         # Track message count per connection
         self._chunk_message_counts[conn_id] = self._chunk_message_counts.get(conn_id, 0) + 1
 
+        # Record the offset within the chunk buffer before writing
+        msg_offset = self._chunk_buffer.size()
+
         # Write message to chunk buffer
         msg_record = MessageDataRecord(
             conn=conn_id,
@@ -225,6 +232,11 @@ def write_message(
         )
         self._chunk_record_writer.write_message_data(msg_record)
 
+        # Track index entry for this message
+        if conn_id not in self._chunk_index_entries:
+            self._chunk_index_entries[conn_id] = []
+        self._chunk_index_entries[conn_id].append((time_sec, time_nsec, msg_offset))
+
         # Check if we should flush the chunk
         if self._chunk_buffer.size() >= self._chunk_size:
             self._flush_chunk()
@@ -254,22 +266,34 @@ def _flush_chunk(self) -> None:
         )
         self._chunk_infos.append(chunk_info)
 
+        # Save index entries for this chunk
+        chunk_index_data: list[tuple[int, list[tuple[int, int, int]]]] = []
+        for conn_id, entries in self._chunk_index_entries.items():
+            chunk_index_data.append((conn_id, list(entries)))
+        self._all_index_data.append(chunk_index_data)
+
         # Reset chunk state
         self._chunk_buffer.clear()
         self._chunk_start_time_sec = None
         self._chunk_start_time_nsec = None
         self._chunk_end_time_sec = None
         self._chunk_end_time_nsec = None
         self._chunk_message_counts.clear()
+        self._chunk_index_entries.clear()
 
     def close(self) -> None:
         """Finalize and close the bag file."""
         # Flush any remaining chunk data
         self._flush_chunk()
 
-        # Record the index position
+        # Record the index position (where index data, connections and chunk infos start)
         index_pos = self._record_writer.tell()
 
+        # Write INDEX_DATA records for each chunk
+        for chunk_index_data in self._all_index_data:
+            for conn_id, entries in chunk_index_data:
+                self._record_writer.write_index_data(conn_id, entries)
+
         # Write all connection records
         for conn in self._connections.values():
             self._record_writer.write_connection(conn)
@@ -278,11 +302,13 @@ def close(self) -> None:
         for chunk_info in self._chunk_infos:
             self._record_writer.write_chunk_info(chunk_info)
 
-        # Update the bag header with correct values
-        # We need to seek back and rewrite it
-        # For simplicity, we'll just note that proper implementation would
-        # seek back to header_pos and rewrite with correct values
-        # This is a limitation of the simple writer approach
+        # Seek back to the header position and rewrite with correct values
+        self._writer.seek_from_start(self._header_pos)
+        self._record_writer.write_bag_header(
+            index_pos=index_pos,
+            conn_count=len(self._connections),
+            chunk_count=len(self._chunk_infos),
+        )
 
         self._record_writer.close()
 
 
@@ -57,7 +57,7 @@ def close(self) -> None:
 class FileWriter(BaseWriter):
     """Write binary data to a file."""
 
-    def __init__(self, file_path: Path | str, mode: str = "wb"):
+    def __init__(self, file_path: Path | str, mode: str = "w+b"):
         self._file_path = Path(file_path).absolute()
         self._file = open(self._file_path, mode)
 
 
@@ -411,26 +411,151 @@ def parse_schema(self, schema: Message | type[Message]) -> tuple[Schema, dict[st
 def compute_md5sum(message_definition: str, msg_type: str) -> str:
     """Compute the MD5 hash for a ROS 1 message definition.
 
-    The MD5 sum is computed from the "canonical" form of the message,
-    which removes comments and normalizes whitespace.
+    The MD5 sum is computed following the ROS 1 algorithm:
+    1. Remove comments and normalize whitespace
+    2. Constants appear first in original order as "type name=value"
+    3. For builtin types: "type name"
+    4. For complex types: the MD5 of the nested message replaces the type name
+
+    Args:
+        message_definition: The full message definition text (may include
+            embedded sub-message definitions separated by 80 '=' characters).
+        msg_type: The message type name (e.g., 'std_msgs/Header').
+
+    Returns:
+        The 32-character hexadecimal MD5 hash.
+    """
+    # Parse sub-message definitions from the full message definition
+    sub_msg_defs = _parse_sub_message_definitions(message_definition)
+
+    # Get the main message definition (first part before any separator)
+    main_def = message_definition.split('=' * 80)[0].strip()
+
+    # Compute MD5 text for the main message
+    md5_text = _compute_md5_text(main_def, msg_type, sub_msg_defs)
+
+    return hashlib.md5(md5_text.encode('utf-8')).hexdigest()
+
+
+def _parse_sub_message_definitions(message_definition: str) -> dict[str, str]:
+    """Parse embedded sub-message definitions from a full message definition.
+
+    Sub-messages are separated by 80 '=' characters and start with 'MSG: type'.
 
     Args:
         message_definition: The full message definition text.
+
+    Returns:
+        Dictionary mapping message type to its definition text.
+    """
+    sub_msgs: dict[str, str] = {}
+
+    # Split on the 80 '=' separator
+    parts = message_definition.split('=' * 80)
+
+    for part in parts[1:]:  # Skip the first part (main message)
+        part = part.strip()
+        if not part:
+            continue
+
+        lines = part.split('\n')
+        first_line = lines[0].strip()
+
+        if first_line.startswith('MSG: '):
+            msg_type = first_line[5:].strip()
+            # The rest is the message definition
+            msg_def = '\n'.join(lines[1:]).strip()
+            sub_msgs[msg_type] = msg_def
+
+    return sub_msgs
+
+
+# ROS 1 builtin types (including time and duration which are special in ROS 1)
+_ROS1_BUILTIN_TYPES = {
+    'bool', 'byte', 'char',
+    'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64',
+    'float32', 'float64',
+    'string',
+    'time', 'duration',
+}
+
+
+def _is_builtin_type(type_name: str) -> bool:
+    """Check if a type is a ROS 1 builtin type."""
+    # Strip array notation
+    bare_type = re.sub(r'\[.*\]$', '', type_name)
+    return bare_type in _ROS1_BUILTIN_TYPES
+
+
+def _compute_md5_text(
+    msg_def: str,
+    msg_type: str,
+    sub_msg_defs: dict[str, str]
+) -> str:
+    """Compute the canonical MD5 text for a message definition.
+
+    Args:
+        msg_def: The message definition (just fields, no embedded types).
         msg_type: The message type name.
+        sub_msg_defs: Dictionary of sub-message type -> definition.
 
     Returns:
-        The 32-character hexadecimal MD5 hash.
+        The canonical text to hash for MD5 computation.
     """
-    # Simplified MD5 computation - in practice this should match
-    # ROS 1's exact algorithm which is more complex
-    canonical = []
-    for line in message_definition.split('\n'):
+    package = msg_type.split('/')[0] if '/' in msg_type else ''
+
+    constants: list[str] = []
+    fields: list[str] = []
+
+    for line in msg_def.split('\n'):
         # Remove comments
         if '#' in line:
             line = line[:line.index('#')]
         line = line.strip()
-        if line:
-            canonical.append(line)
-
-    canonical_text = '\n'.join(canonical)
-    return hashlib.md5(canonical_text.encode('utf-8')).hexdigest()
+        if not line:
+            continue
+
+        # Parse the line to determine if it's a constant or field
+        # Constants have the form: TYPE NAME=VALUE
+        if '=' in line:
+            # It's a constant
+            constants.append(line)
+        else:
+            # It's a field: TYPE NAME
+            parts = line.split()
+            if len(parts) >= 2:
+                field_type = parts[0]
+                field_name = parts[1]
+
+                # Get the bare type (without array notation) for type checking
+                bare_type = re.sub(r'\[.*\]$', '', field_type)
+
+                if _is_builtin_type(field_type):
+                    # Builtin type: use as-is
+                    fields.append(f"{field_type} {field_name}")
+                else:
+                    # Complex type: compute its MD5 and use that instead
+                    # Resolve the type name (add package if not specified)
+                    if '/' not in bare_type:
+                        if bare_type == 'Header':
+                            full_type = 'std_msgs/Header'
+                        else:
+                            full_type = f"{package}/{bare_type}"
+                    else:
+                        full_type = bare_type
+
+                    # Get the sub-message definition
+                    sub_def = sub_msg_defs.get(full_type, '')
+                    if not sub_def and full_type == 'std_msgs/Header':
+                        # Built-in Header definition
+                        sub_def = "uint32 seq\ntime stamp\nstring frame_id"
+
+                    # Recursively compute MD5 for the sub-message
+                    sub_md5 = _compute_md5_text(sub_def, full_type, sub_msg_defs)
+                    sub_md5_hash = hashlib.md5(sub_md5.encode('utf-8')).hexdigest()
+
+                    fields.append(f"{sub_md5_hash} {field_name}")
+
+    # Combine: constants first, then fields
+    result_lines = constants + fields
+    return '\n'.join(result_lines)