Skip to content

Commit d2819c2

Browse files
committed
Merge dev into main
2 parents 40f75db + cfb8267 commit d2819c2

21 files changed

+1369
-13
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# OSBot-Utils
22

3-
![Current Release](https://img.shields.io/badge/release-v3.21.0-blue)
3+
![Current Release](https://img.shields.io/badge/release-v3.21.1-blue)
44
![Python](https://img.shields.io/badge/python-3.8+-green)
55
![Type-Safe](https://img.shields.io/badge/Type--Safe-✓-brightgreen)
66
![Caching](https://img.shields.io/badge/Caching-Built--In-orange)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__ACCEPT__REGEX = re.compile(r'[^a-zA-Z0-9/\-+.*,;=\s]')
5+
TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH = 512
6+
7+
# todo: review with other http safe_str classes and see if we can't an Safe_Str__Http__Base
8+
# defines chars like this, and with the max length being the main difference
9+
# also, connect this with the RFP for the HTTP protocol, since that one should be providing a good set of
10+
# mappings for what chars are allowed in these http values
11+
class Safe_Str__Http__Accept(Safe_Str):
12+
"""
13+
Safe string class for HTTP Accept header values.
14+
Allows MIME types with quality parameters.
15+
Examples: 'text/html,application/json;q=0.9', 'application/*', '*/*'
16+
"""
17+
regex = TYPE_SAFE_STR__HTTP__ACCEPT__REGEX
18+
max_length = TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH
19+
trim_whitespace = True
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
# todo: review this REGEX, since this look far too permissive for an Auth string (which is usually just ascii values
5+
6+
TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
7+
TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH = 2048
8+
9+
10+
class Safe_Str__Http__Authorization(Safe_Str):
11+
"""
12+
Safe string class for HTTP Authorization header values.
13+
Supports Bearer tokens, Basic auth, and other auth schemes.
14+
Examples: 'Bearer eyJ...', 'Basic dXNlcjpwYXNz'
15+
"""
16+
regex = TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX
17+
max_length = TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH
18+
trim_whitespace = True
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX = re.compile(r'[^a-zA-Z0-9\-,=\s]')
5+
TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH = 256
6+
7+
class Safe_Str__Http__Cache_Control(Safe_Str):
8+
"""
9+
Safe string class for HTTP Cache-Control header values.
10+
Allows standard cache directives with parameters.
11+
Examples: 'no-cache', 'max-age=3600', 'private, must-revalidate'
12+
"""
13+
regex = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX
14+
max_length = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH
15+
trim_whitespace = True
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__COOKIE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
5+
#TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 4096
6+
TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 32768 # 32k but, this should really be 4k , but validate this with data from live usage of this class
7+
8+
class Safe_Str__Http__Cookie(Safe_Str):
9+
"""
10+
Safe string class for HTTP Cookie header values.
11+
Allows cookie name-value pairs with standard separators.
12+
Example: 'session=abc123; user_id=456; preferences={"theme":"dark"}'
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__COOKIE__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH
16+
trim_whitespace = True
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX = re.compile(r'[^a-zA-Z0-9\-]')
5+
TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH = 128
6+
7+
class Safe_Str__Http__Header__Name(Safe_Str):
8+
"""
9+
Safe string class for HTTP header names.
10+
Allows alphanumerics and hyphens as per RFC 7230.
11+
HTTP/2 (RFC 7540) and HTTP/3 (RFC 9114) require header names to be lowercase.
12+
Common examples: content-type, authorization, user-agent, accept, cache-control
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH
16+
trim_whitespace = True
17+
to_lower_case = True
18+
allow_empty = True
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars except tab
5+
TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH = 8192
6+
7+
class Safe_Str__Http__Header__Value(Safe_Str):
8+
"""
9+
Safe string class for HTTP header values.
10+
Allows visible ASCII and spaces per RFC 7230.
11+
Filters out control characters except tab (0x09).
12+
"""
13+
regex = TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX
14+
max_length = TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH
15+
trim_whitespace = True
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
# note: as with the others check if this regex is over permissive
5+
TYPE_SAFE_STR__HTTP__LOCATION__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
6+
TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH = 2048
7+
8+
class Safe_Str__Http__Location(Safe_Str):
9+
"""
10+
Safe string class for HTTP Location header values (redirect URLs).
11+
Used in redirect responses (3xx status codes).
12+
Example: 'https://example.com/new-page', '/relative/path'
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__LOCATION__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH
16+
trim_whitespace = True
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
# todo: review this regex, since should we be allowing any non text values here?
5+
TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
6+
TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH = 512
7+
8+
class Safe_Str__Http__User_Agent(Safe_Str):
9+
"""
10+
Safe string class for HTTP User-Agent header values.
11+
Allows standard user agent strings with various characters.
12+
Example: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH
16+
trim_whitespace = True

osbot_utils/type_safe/primitives/domains/web/safe_str/Safe_Str__Html.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,32 @@
22
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
33

44
# Define the size constant
5-
TYPE_SAFE_STR__HTML__MAX_LENGTH = 1048576 # 1 megabyte in bytes
65

7-
# A minimal regex that only filters out:
8-
# - NULL byte (U+0000)
9-
# - Control characters (U+0001 to U+0008, U+000B to U+000C, U+000E to U+001F)
10-
# We explicitly allow:
11-
# - Tab (U+0009), Line Feed (U+000A), and Carriage Return (U+000D)
12-
# - All other Unicode characters
13-
TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F]')
6+
# Safe string class for HTML content with a 10MB limit.
7+
# Allows HTML tags, attributes, and all characters needed for valid HTML,
8+
# while filtering out control characters and NULL bytes that could cause
9+
# security issues or rendering problems.
10+
#
11+
# This is specifically for HTML content (not general text), so it:
12+
# - Allows angle brackets < > for tags
13+
# - Allows quotes " ' for attributes
14+
# - Preserves tabs and newlines for formatting
15+
# - Has a large 10MB limit for full HTML documents
16+
# - Trims outer whitespace but preserves internal formatting
17+
18+
TYPE_SAFE_STR__HTML__MAX_LENGTH = 10485760 # 10 megabytes in bytes (for large HTML documents)
19+
TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F\x7F]')
20+
1421

1522
class Safe_Str__Html(Safe_Str):
1623
max_length = TYPE_SAFE_STR__HTML__MAX_LENGTH
17-
regex = TYPE_SAFE_STR__HTML__REGEX
24+
regex = TYPE_SAFE_STR__HTML__REGEX
25+
trim_whitespace = True # Trim leading/trailing whitespace
26+
normalize_newlines = True # Normalize different newline styles
27+
28+
def __new__(cls, value=None):
29+
if cls.normalize_newlines and value is not None and isinstance(value, str):
30+
value = value.replace('\r\n', '\n').replace('\r', '\n') # Normalize to \n
31+
32+
return super().__new__(cls, value)
33+

0 commit comments

Comments
 (0)