@@ -411,26 +411,151 @@ def parse_schema(self, schema: Message | type[Message]) -> tuple[Schema, dict[st
411411def compute_md5sum (message_definition : str , msg_type : str ) -> str :
412412 """Compute the MD5 hash for a ROS 1 message definition.
413413
414- The MD5 sum is computed from the "canonical" form of the message,
415- which removes comments and normalizes whitespace.
414+ The MD5 sum is computed following the ROS 1 algorithm:
415+ 1. Remove comments and normalize whitespace
416+ 2. Constants appear first in original order as "type name=value"
417+ 3. For builtin types: "type name"
418+ 4. For complex types: the MD5 of the nested message replaces the type name
419+
420+ Args:
421+ message_definition: The full message definition text (may include
422+ embedded sub-message definitions separated by 80 '=' characters).
423+ msg_type: The message type name (e.g., 'std_msgs/Header').
424+
425+ Returns:
426+ The 32-character hexadecimal MD5 hash.
427+ """
428+ # Parse sub-message definitions from the full message definition
429+ sub_msg_defs = _parse_sub_message_definitions (message_definition )
430+
431+ # Get the main message definition (first part before any separator)
432+ main_def = message_definition .split ('=' * 80 )[0 ].strip ()
433+
434+ # Compute MD5 text for the main message
435+ md5_text = _compute_md5_text (main_def , msg_type , sub_msg_defs )
436+
437+ return hashlib .md5 (md5_text .encode ('utf-8' )).hexdigest ()
438+
439+
440+ def _parse_sub_message_definitions (message_definition : str ) -> dict [str , str ]:
441+ """Parse embedded sub-message definitions from a full message definition.
442+
443+ Sub-messages are separated by 80 '=' characters and start with 'MSG: type'.
416444
417445 Args:
418446 message_definition: The full message definition text.
447+
448+ Returns:
449+ Dictionary mapping message type to its definition text.
450+ """
451+ sub_msgs : dict [str , str ] = {}
452+
453+ # Split on the 80 '=' separator
454+ parts = message_definition .split ('=' * 80 )
455+
456+ for part in parts [1 :]: # Skip the first part (main message)
457+ part = part .strip ()
458+ if not part :
459+ continue
460+
461+ lines = part .split ('\n ' )
462+ first_line = lines [0 ].strip ()
463+
464+ if first_line .startswith ('MSG: ' ):
465+ msg_type = first_line [5 :].strip ()
466+ # The rest is the message definition
467+ msg_def = '\n ' .join (lines [1 :]).strip ()
468+ sub_msgs [msg_type ] = msg_def
469+
470+ return sub_msgs
471+
472+
473+ # ROS 1 builtin types (including time and duration which are special in ROS 1)
474+ _ROS1_BUILTIN_TYPES = {
475+ 'bool' , 'byte' , 'char' ,
476+ 'int8' , 'uint8' , 'int16' , 'uint16' , 'int32' , 'uint32' , 'int64' , 'uint64' ,
477+ 'float32' , 'float64' ,
478+ 'string' ,
479+ 'time' , 'duration' ,
480+ }
481+
482+
483+ def _is_builtin_type (type_name : str ) -> bool :
484+ """Check if a type is a ROS 1 builtin type."""
485+ # Strip array notation
486+ bare_type = re .sub (r'\[.*\]$' , '' , type_name )
487+ return bare_type in _ROS1_BUILTIN_TYPES
488+
489+
490+ def _compute_md5_text (
491+ msg_def : str ,
492+ msg_type : str ,
493+ sub_msg_defs : dict [str , str ]
494+ ) -> str :
495+ """Compute the canonical MD5 text for a message definition.
496+
497+ Args:
498+ msg_def: The message definition (just fields, no embedded types).
419499 msg_type: The message type name.
500+ sub_msg_defs: Dictionary of sub-message type -> definition.
420501
421502 Returns:
422- The 32-character hexadecimal MD5 hash.
503+ The canonical text to hash for MD5 computation .
423504 """
424- # Simplified MD5 computation - in practice this should match
425- # ROS 1's exact algorithm which is more complex
426- canonical = []
427- for line in message_definition .split ('\n ' ):
505+ package = msg_type .split ('/' )[0 ] if '/' in msg_type else ''
506+
507+ constants : list [str ] = []
508+ fields : list [str ] = []
509+
510+ for line in msg_def .split ('\n ' ):
428511 # Remove comments
429512 if '#' in line :
430513 line = line [:line .index ('#' )]
431514 line = line .strip ()
432- if line :
433- canonical .append (line )
434-
435- canonical_text = '\n ' .join (canonical )
436- return hashlib .md5 (canonical_text .encode ('utf-8' )).hexdigest ()
515+ if not line :
516+ continue
517+
518+ # Parse the line to determine if it's a constant or field
519+ # Constants have the form: TYPE NAME=VALUE
520+ if '=' in line :
521+ # It's a constant
522+ constants .append (line )
523+ else :
524+ # It's a field: TYPE NAME
525+ parts = line .split ()
526+ if len (parts ) >= 2 :
527+ field_type = parts [0 ]
528+ field_name = parts [1 ]
529+
530+ # Get the bare type (without array notation) for type checking
531+ bare_type = re .sub (r'\[.*\]$' , '' , field_type )
532+
533+ if _is_builtin_type (field_type ):
534+ # Builtin type: use as-is
535+ fields .append (f"{ field_type } { field_name } " )
536+ else :
537+ # Complex type: compute its MD5 and use that instead
538+ # Resolve the type name (add package if not specified)
539+ if '/' not in bare_type :
540+ if bare_type == 'Header' :
541+ full_type = 'std_msgs/Header'
542+ else :
543+ full_type = f"{ package } /{ bare_type } "
544+ else :
545+ full_type = bare_type
546+
547+ # Get the sub-message definition
548+ sub_def = sub_msg_defs .get (full_type , '' )
549+ if not sub_def and full_type == 'std_msgs/Header' :
550+ # Built-in Header definition
551+ sub_def = "uint32 seq\n time stamp\n string frame_id"
552+
553+ # Recursively compute MD5 for the sub-message
554+ sub_md5 = _compute_md5_text (sub_def , full_type , sub_msg_defs )
555+ sub_md5_hash = hashlib .md5 (sub_md5 .encode ('utf-8' )).hexdigest ()
556+
557+ fields .append (f"{ sub_md5_hash } { field_name } " )
558+
559+ # Combine: constants first, then fields
560+ result_lines = constants + fields
561+ return '\n ' .join (result_lines )
0 commit comments