owasp-sbot
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎osbot_utils/helpers/html/Dict__To__Css.py renamed to ‎osbot_utils/helpers/html/CSS_Dict__To__Css.py
Lines changed: 1 addition & 2 deletions b/‎osbot_utils/helpers/html/Dict__To__Css.py renamed to ‎osbot_utils/helpers/html/CSS_Dict__To__Css.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎osbot_utils/helpers/html/Dict__To__Html.py renamed to ‎osbot_utils/helpers/html/Html_Dict__To__Html.py
Lines changed: 19 additions & 17 deletions b/‎osbot_utils/helpers/html/Dict__To__Html.py renamed to ‎osbot_utils/helpers/html/Html_Dict__To__Html.py
Lines changed: 19 additions & 17 deletions
diff --git a/‎osbot_utils/helpers/html/Html_Dict__To__Html_Document.py
Lines changed: 37 additions & 0 deletions b/‎osbot_utils/helpers/html/Html_Dict__To__Html_Document.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎osbot_utils/helpers/html/Dict__To__Tags.py renamed to ‎osbot_utils/helpers/html/Html_Dict__To__Html_Tags.py
Lines changed: 37 additions & 36 deletions b/‎osbot_utils/helpers/html/Dict__To__Tags.py renamed to ‎osbot_utils/helpers/html/Html_Dict__To__Html_Tags.py
Lines changed: 37 additions & 36 deletions
diff --git a/‎osbot_utils/helpers/html/Html__To__Dict.py renamed to ‎osbot_utils/helpers/html/Html__To__Html_Dict.py
Lines changed: 15 additions & 12 deletions b/‎osbot_utils/helpers/html/Html__To__Dict.py renamed to ‎osbot_utils/helpers/html/Html__To__Html_Dict.py
Lines changed: 15 additions & 12 deletions
diff --git a/‎osbot_utils/helpers/html/Html__To__Html_Document.py
Lines changed: 19 additions & 0 deletions b/‎osbot_utils/helpers/html/Html__To__Html_Document.py
Lines changed: 19 additions & 0 deletions
@@ -2,7 +2,7 @@
 
 Powerful Python util methods and classes that simplify common apis and tasks.
 
-![Current Release](https://img.shields.io/badge/release-v2.56.0-blue)
+![Current Release](https://img.shields.io/badge/release-v2.56.6-blue)
 [![codecov](https://codecov.io/gh/owasp-sbot/OSBot-Utils/graph/badge.svg?token=GNVW0COX1N)](https://codecov.io/gh/owasp-sbot/OSBot-Utils)
 
 
 
@@ -1,7 +1,6 @@
 from osbot_utils.base_classes.Kwargs_To_Self import Kwargs_To_Self
 
-
-class Dict__To__Css(Kwargs_To_Self):
+class CSS_Dict__To__Css(Kwargs_To_Self):
     css: dict
 
 
 
@@ -1,7 +1,9 @@
+from osbot_utils.helpers.html.Html__To__Html_Dict import STRING__SCHEMA_TEXT, STRING__SCHEMA_NODES
+
 HTML_SELF_CLOSING_TAGS     = {'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'}
 HTML_DEFAULT_DOCTYPE_VALUE = "<!DOCTYPE html>\n"
 
-class Dict__To__Html:
+class Html_Dict__To__Html:
     def __init__(self, root, include_doctype=True, doctype=HTML_DEFAULT_DOCTYPE_VALUE):
         self.self_closing_tags = HTML_SELF_CLOSING_TAGS             # Define a list of self-closing tags
         self.root              = root
@@ -35,41 +37,41 @@ def convert_attrs(self, attrs):
     def convert_element(self, element, indent_level):
         """Recursively converts a dictionary to an HTML string with indentation."""
         # Check if this is a text node
-        if element.get("type") == "text":
+        if element.get("type") == STRING__SCHEMA_TEXT:
             return element.get("data", "")                                  # Return text content directly for text nodes
 
-        tag      = element.get("tag")
-        attrs    = element.get("attrs", {})
-        children = element.get("children", [])
+        tag   = element.get("tag")
+        attrs = element.get("attrs", {})
+        nodes = element.get(STRING__SCHEMA_NODES, [])
 
         attrs_str = self.convert_attrs(attrs)                               # Convert attributes dictionary to a string
         indent = "    " * indent_level                                      # Indentation for the current level, assuming 4 spaces per indent level
 
         # Handle self-closing tags
-        if tag in self.self_closing_tags and not children:                  # Check if the tag is self-closing and has no children
+        if tag in self.self_closing_tags and not nodes:                  # Check if the tag is self-closing and has no nodes
             return f"{indent}<{tag}{attrs_str} />\n"
 
         # Start building the HTML
         html = f"{indent}<{tag}{attrs_str}>"                                # Opening tag with indentation
 
-        # Separate children into text nodes and element nodes
-        text_nodes = [child for child in children if child.get("type") == "text"]
-        element_nodes = [child for child in children if child.get("type") != "text"]
+        # Separate nodes into text nodes and element nodes
+        text_nodes    = [node for node in nodes if node.get("type") == STRING__SCHEMA_TEXT]
+        element_nodes = [node for node in nodes if node.get("type") != STRING__SCHEMA_TEXT]
 
         # If there are only element nodes, add a newline after the opening tag
         if element_nodes and not text_nodes:
             html += "\n"
 
-        # Process children, maintaining the original order but with proper formatting
-        if children:
+        # Process nodes, maintaining the original order but with proper formatting
+        if nodes:
             # Track if we're currently in a text section or element section
             # This helps us add newlines only between elements, not text
             previous_was_element = False
 
-            for child in children:
-                if child.get("type") == "text":
+            for node in nodes:
+                if node.get("type") == STRING__SCHEMA_TEXT:
                     # Text node - directly append content
-                    html += child.get("data", "")
+                    html += node.get("data", "")
                     previous_was_element = False
                 else:
                     # Element node - format with proper indentation
@@ -78,14 +80,14 @@ def convert_element(self, element, indent_level):
                         if not html.endswith("\n"):
                             html += "\n"
 
-                    html += self.convert_element(child, indent_level + 1)
+                    html += self.convert_element(node, indent_level + 1)
                     previous_was_element = True
 
         # Handle closing tag based on content
         if element_nodes and not text_nodes:
-            # If only element children, add indented closing tag
+            # If only element nodes, add indented closing tag
             html += f"{indent}</{tag}>\n"
-        elif children:  # Any type of children
+        elif nodes:  # Any type of nodes
             # If mixed content or only text, add closing tag without indentation
             html += f"</{tag}>\n"
         else:
 
@@ -0,0 +1,37 @@
+from typing                                                         import Dict, Union, Any
+from osbot_utils.helpers.html.Html__To__Html_Dict                   import STRING__SCHEMA_TEXT, STRING__SCHEMA_NODES
+from osbot_utils.helpers.html.schemas.Schema__Html_Document         import Schema__Html_Document
+from osbot_utils.helpers.html.schemas.Schema__Html_Node             import Schema__Html_Node
+from osbot_utils.helpers.html.schemas.Schema__Html_Node__Data       import Schema__Html_Node__Data
+from osbot_utils.helpers.html.schemas.Schema__Html_Node__Data__Type import Schema__Html_Node__Data__Type
+from osbot_utils.type_safe.Type_Safe                                import Type_Safe
+
+
+class Html_Dict__To__Html_Document(Type_Safe):
+    html__dict    : dict                  = None
+    html__document: Schema__Html_Document = None
+
+    def convert(self):
+        self.html__document = self.parse_html_dict(self.html__dict)
+        return self.html__document
+
+    def parse_html_dict(self, target: Dict[str, Any]) -> Schema__Html_Document:
+        if not target or not isinstance(target, dict):
+            raise ValueError("Invalid HTML dictionary structure")
+
+        root_node = self.parse_node(target)
+        return Schema__Html_Document(root_node=root_node)
+
+    def parse_node(self, target: Dict[str, Any]) -> Union[Schema__Html_Node, Schema__Html_Node__Data]:
+
+        if target.get('type') == STRING__SCHEMA_TEXT:                                           # Handle text nodes
+            return Schema__Html_Node__Data(data = target.get('data', ''),
+                                           type = Schema__Html_Node__Data__Type.TEXT)
+        else:                                                                                   # Handle element nodes
+            nodes = []
+            for node in target.get(STRING__SCHEMA_NODES, []):
+                nodes.append(self.parse_node(node))
+
+            return Schema__Html_Node(attrs = target.get('attrs', {})           ,
+                                     nodes = nodes                              ,
+                                     tag   = target.get('tag', ''))
@@ -1,13 +1,14 @@
-from osbot_utils.helpers.html.Dict__To__Html import HTML_SELF_CLOSING_TAGS
-from osbot_utils.helpers.html.Tag__Base      import Tag__Base
-from osbot_utils.helpers.html.Tag__Body      import Tag__Body
-from osbot_utils.helpers.html.Tag__Head      import Tag__Head
-from osbot_utils.helpers.html.Tag__Html      import Tag__Html
-from osbot_utils.helpers.html.Tag__Link      import Tag__Link
-from osbot_utils.helpers.html.Tag__Text      import Tag__Text
+from osbot_utils.helpers.html.Html_Dict__To__Html import HTML_SELF_CLOSING_TAGS
+from osbot_utils.helpers.html.Html__To__Html_Dict import STRING__SCHEMA_TEXT, STRING__SCHEMA_NODES
+from osbot_utils.helpers.html.Tag__Base           import Tag__Base
+from osbot_utils.helpers.html.Tag__Body           import Tag__Body
+from osbot_utils.helpers.html.Tag__Head           import Tag__Head
+from osbot_utils.helpers.html.Tag__Html           import Tag__Html
+from osbot_utils.helpers.html.Tag__Link           import Tag__Link
+from osbot_utils.helpers.html.Tag__Text           import Tag__Text
 
 
-class Dict__To__Tags:
+class Html_Dict__To__Html_Tags:
 
     def __init__(self, root):
         self.root = root
@@ -30,25 +31,25 @@ def convert_element(self, element):
             return self.convert_to__tag(Tag__Base, element, 0)  # Default indent 0
 
     def collect_inner_text(self, element):
-        """Extract all text from an element's text node children."""
+        """Extract all text from an element's text node nodes."""
         inner_text = ""
-        for child in element.get("children", []):
-            if child.get("type") == "text":
-                inner_text += child.get("data", "")
+        for node in element.get(STRING__SCHEMA_NODES, []):
+            if node.get("type") == STRING__SCHEMA_TEXT:
+                inner_text += node.get("data", "")
         return inner_text
 
     def convert_to__tag(self, target_tag, element, indent):
-        if element.get("type") == "text":
+        if element.get("type") == STRING__SCHEMA_TEXT:
             # Handle text nodes directly
             return Tag__Text(element.get("data", ""))
 
         tag_name   = element.get("tag")
         attrs      = element.get("attrs", {})
-        children   = element.get("children", [])
+        nodes      = element.get(STRING__SCHEMA_NODES, [])
         end_tag    = tag_name not in HTML_SELF_CLOSING_TAGS
         tag_indent = indent + 1
 
-        # Collect inner text from all text node children
+        # Collect inner text from all text node nodes
         inner_html = self.collect_inner_text(element)
 
         tag_kwargs = dict(
@@ -61,60 +62,60 @@ def convert_to__tag(self, target_tag, element, indent):
 
         tag = target_tag(**tag_kwargs)
 
-        # Process only element nodes as children (text is already handled via inner_html)
-        for child in children:
-            if child.get("type") != "text":  # Skip text nodes, they're in inner_html
-                child_tag = self.convert_to__tag(Tag__Base, child, tag_indent)
+        # Process only element nodes as nodes (text is already handled via inner_html)
+        for node in nodes:
+            if node.get("type") != STRING__SCHEMA_TEXT:  # Skip text nodes, they're in inner_html
+                child_tag = self.convert_to__tag(Tag__Base, node, tag_indent)
                 tag.elements.append(child_tag)
 
         return tag
 
     def convert_to__tag__head(self, element, indent):
-        attrs    = element.get("attrs", {})
-        children = element.get("children", [])
+        attrs = element.get("attrs", {})
+        nodes = element.get(STRING__SCHEMA_NODES, [])
 
         head_indent = indent + 1
         tag_head = Tag__Head(indent=head_indent, **attrs)
 
-        for child in children:
-            tag_name = child.get("tag")
+        for node in nodes:
+            tag_name = node.get("tag")
 
             if tag_name == 'title':
-                # Extract title text from text node children
-                tag_head.title = self.collect_inner_text(child)
+                # Extract title text from text node nodes
+                tag_head.title = self.collect_inner_text(node)
             elif tag_name == 'link':
-                tag_head.links.append(self.convert_to__tag__link(child))
+                tag_head.links.append(self.convert_to__tag__link(node))
             elif tag_name == 'meta':
-                tag_head.elements.append(self.convert_to__tag(Tag__Base, child, head_indent))
+                tag_head.elements.append(self.convert_to__tag(Tag__Base, node, head_indent))
             elif tag_name == 'style':
                 # For style tags, collect the CSS content from text nodes
-                style_element = self.convert_to__tag(Tag__Base, child, head_indent)
+                style_element = self.convert_to__tag(Tag__Base, node, head_indent)
                 tag_head.elements.append(style_element)
             else:
                 # Handle any other head elements
-                tag_head.elements.append(self.convert_to__tag(Tag__Base, child, head_indent))
+                tag_head.elements.append(self.convert_to__tag(Tag__Base, node, head_indent))
 
         return tag_head
 
     def convert_to__tag__html(self, element):
-        attrs    = element.get("attrs", {})
-        children = element.get("children", [])
-        lang     = attrs.get("lang")
+        attrs = element.get("attrs", {})
+        nodes = element.get(STRING__SCHEMA_NODES, [])
+        lang  = attrs.get("lang")
 
         tag_html = Tag__Html(attributes=attrs, lang=lang, doc_type=False)
 
         # Initialize head and body if not found
         head_found = False
         body_found = False
 
-        for child in children:
-            tag_name = child.get("tag")
+        for node in nodes:
+            tag_name = node.get("tag")
 
             if tag_name == 'head':
-                tag_html.head = self.convert_to__tag__head(child, tag_html.indent)
+                tag_html.head = self.convert_to__tag__head(node, tag_html.indent)
                 head_found = True
             elif tag_name == 'body':
-                tag_html.body = self.convert_to__tag(Tag__Body, child, tag_html.indent)
+                tag_html.body = self.convert_to__tag(Tag__Body, node, tag_html.indent)
                 body_found = True
             else:
                 # Log unexpected child elements of html
 
@@ -1,9 +1,12 @@
 from html.parser import HTMLParser
 
 HTML_SELF_CLOSING_TAGS = {'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'}
-STRING__DATA_TEXT = 'TEXT:'
+STRING__SCHEMA_TEXT    = 'TEXT'
+STRING__SCHEMA_NODES   = 'nodes'
+STRING__DATA_TEXT      = f'{STRING__SCHEMA_TEXT}:'
 
-class Html__To__Dict(HTMLParser):
+
+class Html__To__Html_Dict(HTMLParser):
     def __init__(self, html):
         super().__init__()
         self.root            = None                               # No root initially
@@ -18,15 +21,15 @@ def convert(self):
         return self.root
 
     def handle_starttag(self, tag, attrs):
-        new_tag = {"tag": tag, "attrs": dict(attrs), "children": []}
+        new_tag = {"tag": tag, "attrs": dict(attrs), STRING__SCHEMA_NODES: []}
 
         if self.current is None:
             # When the first tag is encountered, it becomes the root
             self.root = new_tag
             self.current = new_tag
         else:
             # Otherwise, append the new tag as a child of the current tag
-            self.current["children"].append(new_tag)
+            self.current[STRING__SCHEMA_NODES].append(new_tag)
 
         # If this tag is not a void element, push it onto the stack
         if tag.lower() not in self.void_elements:
@@ -48,33 +51,33 @@ def handle_endtag(self, tag):
     def handle_data(self, data):
         if data.strip():  # Ignore whitespace
             # Create a text node as a child
-            text_node = {"type": "text", "data": data}
-            self.current["children"].append(text_node)
+            text_node = {"type": STRING__SCHEMA_TEXT, "data": data}
+            self.current[STRING__SCHEMA_NODES].append(text_node)
 
     def print__generate_lines(self, node, indent="", last=True, is_root=True):
         lines = []
 
         prefix = "" if is_root else ("└── " if last else "├── ")
 
-        if node.get("type") == "text":
+        if node.get("type") == STRING__SCHEMA_TEXT:
             text_data = node.get('data')
             if self.strip_text_data:
                 text_data = text_data.strip()
             lines.append(f"{indent}{prefix}{STRING__DATA_TEXT} {text_data}")
         else:
             tag       = node.get("tag")
             attrs     = node.get("attrs", {})
-            children  = node.get("children", [])
+            nodes     = node.get(STRING__SCHEMA_NODES, [])
             attrs_str = ' '.join(f'{key}="{value}"' for key, value in attrs.items())
             attrs_str = f' ({attrs_str})' if attrs_str else ''
 
             lines.append(f"{indent}{prefix}{tag}{attrs_str}")
 
             child_indent = indent + ("    " if last else "│   ")
 
-            for i, child in enumerate(children):
-                is_last = i == len(children) - 1
-                child_lines = self.print__generate_lines(child, indent=child_indent, last=is_last, is_root=False)
+            for i, node in enumerate(nodes):
+                is_last = i == len(nodes) - 1
+                child_lines = self.print__generate_lines(node, indent=child_indent, last=is_last, is_root=False)
                 lines.extend(child_lines if isinstance(child_lines, list) else [child_lines])
 
         return lines if is_root else "\n".join(lines)
@@ -95,7 +98,7 @@ def print__lines(self, lines):
 
 def html_to_dict(html_code: str) -> dict:
     try:
-        html_to_dict = Html__To__Dict(html_code)
+        html_to_dict = Html__To__Html_Dict(html_code)
         html_dict = html_to_dict.convert()
         return html_dict
     except:  # todo: see if there is a better Exception to capture
 
@@ -0,0 +1,19 @@
+from osbot_utils.helpers.html.Html_Dict__To__Html_Document  import Html_Dict__To__Html_Document
+from osbot_utils.helpers.html.Html__To__Html_Dict           import Html__To__Html_Dict
+from osbot_utils.helpers.html.schemas.Schema__Html_Document import Schema__Html_Document
+from osbot_utils.type_safe.Type_Safe                        import Type_Safe
+
+
+class Html__To__Html_Document(Type_Safe):
+    html: str
+    html__dict    : dict
+    html__document: Schema__Html_Document
+
+    def convert(self):
+        if self.html:
+            html__dict =  Html__To__Html_Dict(self.html).convert()
+            if html__dict:
+                with Html_Dict__To__Html_Document(html__dict=html__dict).convert()  as html__document:
+                    if html__document:
+                        self.html__document = html__document
+                        return html__document