Skip to content

Commit 649683c

Browse files
committed
refactored into this project the Cache__Hash__Generator which is a very useful class to have here
that class needs the (also added) : Schema__Cache__Hash__Config , Enum__Hash__Algorithm and Safe_Str__Json__Field_Path improved the Safe_Str__Url class added new helpers for: Safe_Str__Url__Path, Safe_Str__Url__Path_Query, Safe_Str__Url__Query, Safe_Str__Url__Server and Safe_Str__Domain added comprehensive tests for all of the above
1 parent 763b94b commit 649683c

File tree

22 files changed

+2588
-119
lines changed

22 files changed

+2588
-119
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import hashlib
2+
import json
3+
from typing import List
4+
from osbot_utils.helpers.cache.schemas.Schema__Cache__Hash__Config import Schema__Cache__Hash__Config
5+
from osbot_utils.type_safe.Type_Safe import Type_Safe
6+
from osbot_utils.type_safe.primitives.domains.cryptography.enums.Enum__Hash__Algorithm import Enum__Hash__Algorithm
7+
from osbot_utils.type_safe.primitives.domains.cryptography.safe_str.Safe_Str__Cache_Hash import Safe_Str__Cache_Hash
8+
from osbot_utils.type_safe.primitives.domains.identifiers.safe_str.Safe_Str__Json__Field_Path import Safe_Str__Json__Field_Path
9+
10+
11+
class Cache__Hash__Generator(Type_Safe): # Generate consistent hashes from various input types
12+
config: Schema__Cache__Hash__Config
13+
14+
def calculate(self, data: bytes) -> Safe_Str__Cache_Hash: # Calculate hash from raw bytes
15+
if self.config.algorithm == Enum__Hash__Algorithm.MD5:
16+
hash_full = hashlib.md5(data).hexdigest()
17+
elif self.config.algorithm == Enum__Hash__Algorithm.SHA256:
18+
hash_full = hashlib.sha256(data).hexdigest()
19+
elif self.config.algorithm == Enum__Hash__Algorithm.SHA384:
20+
hash_full = hashlib.sha384(data).hexdigest()
21+
22+
return Safe_Str__Cache_Hash(hash_full[:self.config.length])
23+
24+
def from_string(self, data: str) -> Safe_Str__Cache_Hash: # Hash from string
25+
return self.calculate(data.encode('utf-8'))
26+
27+
def from_bytes(self, data: bytes) -> Safe_Str__Cache_Hash: # Hash from bytes
28+
return self.calculate(data)
29+
30+
def from_json(self, data : dict , # Hash JSON with optional field exclusion
31+
exclude_fields: List[str] = None
32+
) -> Safe_Str__Cache_Hash:
33+
if exclude_fields:
34+
data = {k: v for k, v in data.items() if k not in exclude_fields}
35+
json_str = json.dumps(data, sort_keys=True)
36+
return self.from_string(json_str)
37+
38+
def from_type_safe(self, obj : Type_Safe , # Hash Type_Safe object
39+
exclude_fields : List[str] = None
40+
) -> Safe_Str__Cache_Hash:
41+
return self.from_json(obj.json(), exclude_fields)
42+
43+
def from_json_field(self, data : dict,
44+
json_field: Safe_Str__Json__Field_Path
45+
) -> Safe_Str__Cache_Hash:
46+
47+
field_value = self.extract_field_value(data, json_field) # Extract field value using dot notation
48+
49+
if field_value is None:
50+
raise ValueError(f"Field '{json_field}' not found in data")
51+
52+
if isinstance(field_value, str): return self.from_string( field_value ) # Convert field value to string representation
53+
elif isinstance(field_value, (int, float, bool)): return self.from_string(str (field_value))
54+
elif isinstance(field_value, dict): return self.from_json ( field_value )
55+
elif isinstance(field_value, list): return self.from_string(json.dumps(field_value, sort_keys=True)) # Hash the JSON representation of the list
56+
else:
57+
raise ValueError(f"Unsupported field type: {type(field_value)}")
58+
59+
def extract_field_value(self, data: dict, json_field: Safe_Str__Json__Field_Path):
60+
parts = json_field.split('.')
61+
current = data
62+
63+
for part in parts:
64+
if not isinstance(current, dict):
65+
return None
66+
current = current.get(part)
67+
if current is None:
68+
return None
69+
70+
return current

osbot_utils/helpers/cache/__init__.py

Whitespace-only changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from osbot_utils.type_safe.Type_Safe import Type_Safe
2+
from osbot_utils.type_safe.primitives.domains.cryptography.enums.Enum__Hash__Algorithm import Enum__Hash__Algorithm
3+
4+
5+
class Schema__Cache__Hash__Config(Type_Safe): # Configuration for hash generation
6+
algorithm : Enum__Hash__Algorithm = Enum__Hash__Algorithm.SHA256 # Hash algorithm to use
7+
length : int = 16 # Hash length: 10, 16, 32, 64, 96

osbot_utils/helpers/cache/schemas/__init__.py

Whitespace-only changes.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from enum import Enum
2+
3+
class Enum__Hash__Algorithm(Enum):
4+
MD5 = "md5"
5+
SHA256 = "sha256"
6+
SHA384 = "sha384"

osbot_utils/type_safe/primitives/domains/cryptography/enums/__init__.py

Whitespace-only changes.

osbot_utils/type_safe/primitives/domains/http/enums/__init__.py

Whitespace-only changes.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__FIELD_PATH__REGEX = re.compile(r'[^a-zA-Z0-9_\-.]') # Allow dots for nesting
5+
TYPE_SAFE_STR__FIELD_PATH__MAX_LENGTH = 256 # Longer to support deep nesting
6+
7+
class Safe_Str__Json__Field_Path(Safe_Str):
8+
regex = TYPE_SAFE_STR__FIELD_PATH__REGEX
9+
max_length = TYPE_SAFE_STR__FIELD_PATH__MAX_LENGTH
10+
trim_whitespace = True
11+
12+
def __new__(cls, value: str = None):
13+
if value:
14+
if '..' in value: # Don't allow consecutive dots
15+
raise ValueError(f"Field path cannot contain consecutive dots: '{value}'")
16+
17+
if value.startswith('.') or value.endswith('.'): # Don't allow leading/trailing dots
18+
raise ValueError(f"Field path cannot start or end with a dot: '{value}'")
19+
20+
segments = value.split('.') # Each segment should be non-empty after splitting
21+
for segment in segments:
22+
if not segment:
23+
raise ValueError(f"Field path has empty segment: '{value}'")
24+
25+
return Safe_Str.__new__(cls, value)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
from osbot_utils.type_safe.primitives.core.enums.Enum__Safe_Str__Regex_Mode import Enum__Safe_Str__Regex_Mode
4+
5+
TYPE_SAFE_STR__DOMAIN__MAX_LENGTH = 253 # RFC 1035 max domain length
6+
TYPE_SAFE_STR__DOMAIN__REGEX = re.compile(r'^[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)*$')
7+
8+
class Safe_Str__Domain(Safe_Str):
9+
"""
10+
Safe string class for domain names (without scheme or port).
11+
12+
Validates according to RFC 1035 and RFC 1123:
13+
- Each label max 63 characters
14+
- Total max 253 characters
15+
- Labels: alphanumeric + hyphens (not at start/end)
16+
- At least one dot (except 'localhost')
17+
18+
Examples:
19+
- "example.com"
20+
- "api.example.com"
21+
- "sub.domain.example.com"
22+
- "localhost"
23+
- "my-site.co.uk"
24+
25+
Not valid:
26+
- "https://example.com" (use Safe_Str__Url__Server)
27+
- "192.168.1.1" (use Safe_Str__IP_Address)
28+
- "example.com:8080" (use Safe_Str__Url__Server)
29+
"""
30+
regex = TYPE_SAFE_STR__DOMAIN__REGEX
31+
regex_mode = Enum__Safe_Str__Regex_Mode.MATCH
32+
max_length = TYPE_SAFE_STR__DOMAIN__MAX_LENGTH
33+
trim_whitespace = True
34+
strict_validation = True
35+
allow_empty = True
36+
37+
def __new__(cls, value=None):
38+
if value is None or value == '':
39+
return super().__new__(cls, value)
40+
41+
# Basic regex validation via parent
42+
instance = super().__new__(cls, value)
43+
44+
# Additional validation
45+
cls._validate_domain(str(instance))
46+
47+
return instance
48+
49+
@classmethod
50+
def _validate_domain(cls, domain):
51+
"""Validate domain name structure"""
52+
if not domain or domain == '':
53+
return
54+
55+
# Special case: localhost
56+
if domain == 'localhost':
57+
return
58+
59+
# Must have at least one dot
60+
if '.' not in domain:
61+
raise ValueError(f"Invalid domain '{domain}': must contain at least one dot (or be 'localhost')")
62+
63+
# Split and validate each label
64+
labels = domain.split('.')
65+
66+
for label in labels:
67+
# Each label must be 1-63 characters
68+
if not label or len(label) > 63:
69+
raise ValueError(f"Invalid domain '{domain}': label '{label}' length must be 1-63 characters")
70+
71+
# Cannot start or end with hyphen
72+
if label.startswith('-') or label.endswith('-'):
73+
raise ValueError(f"Invalid domain '{domain}': label '{label}' cannot start or end with hyphen")
74+
75+
# Must be alphanumeric + hyphens
76+
if not re.match(r'^[a-zA-Z0-9\-]+$', label):
77+
raise ValueError(f"Invalid domain '{domain}': label '{label}' contains invalid characters")
78+
79+
def __add__(self, other):
80+
"""Enable composability with URL components"""
81+
from osbot_utils.type_safe.primitives.domains.web.safe_str.Safe_Str__Url__Server import Safe_Str__Url__Server
82+
83+
# Domain + "https://" prefix = Server
84+
if isinstance(other, str) and other.startswith('http'):
85+
return Safe_Str__Url__Server(f"{other}{self}")
86+
87+
# Default string concatenation
88+
return str(self) + str(other)
89+
90+
def __radd__(self, other):
91+
"""Reverse addition: scheme + domain = server"""
92+
from osbot_utils.type_safe.primitives.domains.web.safe_str.Safe_Str__Url__Server import Safe_Str__Url__Server
93+
94+
if isinstance(other, str):
95+
if other in ('https://', 'http://'):
96+
return Safe_Str__Url__Server(f"{other}{self}")
97+
98+
return str(other) + str(self)
Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,38 @@
11
import re
2-
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
from osbot_utils.type_safe.primitives.core.enums.Enum__Safe_Str__Regex_Mode import Enum__Safe_Str__Regex_Mode
34

4-
TYPE_SAFE_STR__URL__MAX_LENGTH = 2048 # Common maximum URL length
5-
TYPE_SAFE_STR__URL__REGEX = re.compile(r'^(?!https?://).*|[^a-zA-Z0-9:/\-._~&=?#+%@]') # Allow characters valid in URLs
5+
TYPE_SAFE_STR__URL__MAX_LENGTH = 8192 # Modern URL length limit (8192 characters)
6+
# - Old IE11 limit was 2,083 (obsolete as of 2022)
7+
# - Modern browsers support 64K+ characters
8+
# - Common server defaults: Apache (8K), Nginx (4-8K)
9+
# - CDN limits: Cloudflare (16K), AWS API Gateway (10K)
10+
# - Handles OAuth/SAML redirects (typically 3-6K)
11+
# - Handles marketing/analytics URLs (typically 2-4K)
12+
# - Still protects against abuse (8K+ is suspicious)
13+
14+
TYPE_SAFE_STR__URL__REGEX = re.compile(
15+
r'^https?://' # Scheme
16+
r'[a-zA-Z0-9.\-]+' # Domain/IP
17+
r'(:[0-9]{1,5})?' # Optional port
18+
r'(/[a-zA-Z0-9/\-._~%]*)?' # Optional path
19+
r'(\?[a-zA-Z0-9=&\-._~%+]*)?' # Optional query
20+
r'(#[a-zA-Z0-9\-._~%]*)?$' # Optional fragment
21+
)
622

723
class Safe_Str__Url(Safe_Str):
24+
"""
25+
Safe string class for complete URLs.
26+
27+
Examples:
28+
- "https://example.com"
29+
- "http://localhost:8080/api/users?page=1"
30+
- "https://api.example.com/v1/products/123?format=json"
31+
"""
832
regex = TYPE_SAFE_STR__URL__REGEX
33+
regex_mode = Enum__Safe_Str__Regex_Mode.MATCH
934
max_length = TYPE_SAFE_STR__URL__MAX_LENGTH
1035
trim_whitespace = True
36+
strict_validation = True
37+
allow_empty = True
1138
allow_all_replacement_char = False

0 commit comments

Comments
 (0)