owasp-sbot
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Accept.py‎
Lines changed: 19 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Accept.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Authorization.py‎
Lines changed: 18 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Authorization.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cache_Control.py‎
Lines changed: 15 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cache_Control.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cookie.py‎
Lines changed: 16 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cookie.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Name.py‎
Lines changed: 18 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Name.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Value.py‎
Lines changed: 15 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Value.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Location.py‎
Lines changed: 16 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Location.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__User_Agent.py‎
Lines changed: 16 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__User_Agent.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/web/safe_str/Safe_Str__Html.py‎
Lines changed: 25 additions & 9 deletions b/‎osbot_utils/type_safe/primitives/domains/web/safe_str/Safe_Str__Html.py‎
Lines changed: 25 additions & 9 deletions
@@ -1,6 +1,6 @@
 # OSBot-Utils
 
-![Current Release](https://img.shields.io/badge/release-v3.21.0-blue)
+![Current Release](https://img.shields.io/badge/release-v3.21.1-blue)
 ![Python](https://img.shields.io/badge/python-3.8+-green)
 ![Type-Safe](https://img.shields.io/badge/Type--Safe-✓-brightgreen)
 ![Caching](https://img.shields.io/badge/Caching-Built--In-orange)
 
@@ -0,0 +1,19 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__ACCEPT__REGEX      = re.compile(r'[^a-zA-Z0-9/\-+.*,;=\s]')
+TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH = 512
+
+# todo: review with other http safe_str classes and see if we can't an Safe_Str__Http__Base
+#       defines chars like this, and with the max length being the main difference
+#       also, connect this with the RFP for the HTTP protocol, since that one should be providing a good set of
+#       mappings for what chars are allowed in these http values
+class Safe_Str__Http__Accept(Safe_Str):
+    """
+    Safe string class for HTTP Accept header values.
+    Allows MIME types with quality parameters.
+    Examples: 'text/html,application/json;q=0.9', 'application/*', '*/*'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__ACCEPT__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,18 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+# todo: review this REGEX, since this look far too permissive for an Auth string (which is usually just ascii values
+
+TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH = 2048
+
+
+class Safe_Str__Http__Authorization(Safe_Str):
+    """
+    Safe string class for HTTP Authorization header values.
+    Supports Bearer tokens, Basic auth, and other auth schemes.
+    Examples: 'Bearer eyJ...', 'Basic dXNlcjpwYXNz'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,15 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX      = re.compile(r'[^a-zA-Z0-9\-,=\s]')
+TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH = 256
+
+class Safe_Str__Http__Cache_Control(Safe_Str):
+    """
+    Safe string class for HTTP Cache-Control header values.
+    Allows standard cache directives with parameters.
+    Examples: 'no-cache', 'max-age=3600', 'private, must-revalidate'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,16 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__COOKIE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+#TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 4096
+TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 32768     # 32k but, this should really be 4k , but validate this with data from live usage of this class
+
+class Safe_Str__Http__Cookie(Safe_Str):
+    """
+    Safe string class for HTTP Cookie header values.
+    Allows cookie name-value pairs with standard separators.
+    Example: 'session=abc123; user_id=456; preferences={"theme":"dark"}'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__COOKIE__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,18 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX      = re.compile(r'[^a-zA-Z0-9\-]')
+TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH = 128
+
+class Safe_Str__Http__Header__Name(Safe_Str):
+    """
+    Safe string class for HTTP header names.
+    Allows alphanumerics and hyphens as per RFC 7230.
+    HTTP/2 (RFC 7540) and HTTP/3 (RFC 9114) require header names to be lowercase.
+    Common examples: content-type, authorization, user-agent, accept, cache-control
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH
+    trim_whitespace            = True
+    to_lower_case              = True
+    allow_empty                = True
@@ -0,0 +1,15 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars except tab
+TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH = 8192
+
+class Safe_Str__Http__Header__Value(Safe_Str):
+    """
+    Safe string class for HTTP header values.
+    Allows visible ASCII and spaces per RFC 7230.
+    Filters out control characters except tab (0x09).
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,16 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+# note: as with the others check if this regex is over permissive
+TYPE_SAFE_STR__HTTP__LOCATION__REGEX      = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH = 2048
+
+class Safe_Str__Http__Location(Safe_Str):
+    """
+    Safe string class for HTTP Location header values (redirect URLs).
+    Used in redirect responses (3xx status codes).
+    Example: 'https://example.com/new-page', '/relative/path'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__LOCATION__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,16 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+# todo: review this regex, since should we be allowing any non text values here?
+TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX      = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH = 512
+
+class Safe_Str__Http__User_Agent(Safe_Str):
+    """
+    Safe string class for HTTP User-Agent header values.
+    Allows standard user agent strings with various characters.
+    Example: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH
+    trim_whitespace            = True
@@ -2,16 +2,32 @@
 from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
 
 # Define the size constant
-TYPE_SAFE_STR__HTML__MAX_LENGTH = 1048576  # 1 megabyte in bytes
 
-# A minimal regex that only filters out:
-# - NULL byte (U+0000)
-# - Control characters (U+0001 to U+0008, U+000B to U+000C, U+000E to U+001F)
-# We explicitly allow:
-# - Tab (U+0009), Line Feed (U+000A), and Carriage Return (U+000D)
-# - All other Unicode characters
-TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F]')
+# Safe string class for HTML content with a 10MB limit.
+# Allows HTML tags, attributes, and all characters needed for valid HTML,
+# while filtering out control characters and NULL bytes that could cause
+# security issues or rendering problems.
+#
+# This is specifically for HTML content (not general text), so it:
+# - Allows angle brackets < > for tags
+# - Allows quotes " ' for attributes
+# - Preserves tabs and newlines for formatting
+# - Has a large 10MB limit for full HTML documents
+# - Trims outer whitespace but preserves internal formatting
+
+TYPE_SAFE_STR__HTML__MAX_LENGTH = 10485760  # 10 megabytes in bytes (for large HTML documents)
+TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F\x7F]')
+
 
 class Safe_Str__Html(Safe_Str):
     max_length                 = TYPE_SAFE_STR__HTML__MAX_LENGTH
-    regex                      = TYPE_SAFE_STR__HTML__REGEX
+    regex                      = TYPE_SAFE_STR__HTML__REGEX
+    trim_whitespace             = True                          # Trim leading/trailing whitespace
+    normalize_newlines          = True                          # Normalize different newline styles
+
+    def __new__(cls, value=None):
+        if cls.normalize_newlines and value is not None and isinstance(value, str):
+            value = value.replace('\r\n', '\n').replace('\r', '\n')                     # Normalize to \n
+
+        return super().__new__(cls, value)
+