Skip to content

Commit 227e5b6

Browse files
committed
Merge dev into main
2 parents 9cbf346 + a98a593 commit 227e5b6

16 files changed

+368
-22
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Powerful Python util methods and classes that simplify common apis and tasks.
44

5-
![Current Release](https://img.shields.io/badge/release-v2.58.0-blue)
5+
![Current Release](https://img.shields.io/badge/release-v2.58.3-blue)
66
[![codecov](https://codecov.io/gh/owasp-sbot/OSBot-Utils/graph/badge.svg?token=GNVW0COX1N)](https://codecov.io/gh/owasp-sbot/OSBot-Utils)
77

88

osbot_utils/helpers/ast/Ast_Base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import ast
22
import inspect
33

4-
from osbot_utils.utils.Dev import pprint, jprint
5-
from osbot_utils.utils.Exceptions import syntax_error
6-
from osbot_utils.utils.Files import is_file, file_contents
7-
from osbot_utils.utils.Objects import obj_data, obj_info
8-
from osbot_utils.utils.Str import str_dedent
4+
from osbot_utils.utils.Dev import pprint, jprint
5+
from osbot_utils.utils.Exceptions import syntax_error
6+
from osbot_utils.utils.Files import is_file, file_contents
7+
from osbot_utils.utils.Objects import obj_data, obj_info
8+
from osbot_utils.utils.Str import str_dedent
99

1010

1111
class Ast_Base:
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import re
2+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__CONTENT_TYPE__REGEX = re.compile(r'[^a-zA-Z0-9/\-+.;= ]')
5+
TYPE_SAFE_STR__HTTP__CONTENT_TYPE__MAX_LENGTH = 256
6+
7+
class Safe_Str__Http__Content_Type(Safe_Str):
8+
regex = TYPE_SAFE_STR__HTTP__CONTENT_TYPE__REGEX
9+
max_length = TYPE_SAFE_STR__HTTP__CONTENT_TYPE__MAX_LENGTH
10+
allow_empty = False
11+
trim_whitespace = True
12+
allow_all_replacement_char = False
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import re
2+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__ETAG__REGEX = re.compile(r'[^a-zA-Z0-9"\/\-_.:]') # Allow alphanumerics, quotes, slashes, hyphens, underscores, periods, colons
5+
TYPE_SAFE_STR__HTTP__ETAG__MAX_LENGTH = 128
6+
7+
class Safe_Str__Http__ETag(Safe_Str):
8+
regex = TYPE_SAFE_STR__HTTP__ETAG__REGEX
9+
max_length = TYPE_SAFE_STR__HTTP__ETAG__MAX_LENGTH
10+
trim_whitespace = True
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import re
2+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__LAST_MODIFIED__REGEX = re.compile(r'[^a-zA-Z0-9:, -]')
5+
TYPE_SAFE_STR__HTTP__LAST_MODIFIED__MAX_LENGTH = 64
6+
7+
class Safe_Str__Http__Last_Modified(Safe_Str):
8+
regex = TYPE_SAFE_STR__HTTP__LAST_MODIFIED__REGEX
9+
max_length = TYPE_SAFE_STR__HTTP__LAST_MODIFIED__MAX_LENGTH
10+
trim_whitespace = True
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import re
2+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
3+
4+
5+
TYPE_SAFE_STR__TEXT__MAX_LENGTH = 1048576 # Define the size constant - 1 megabyte in bytes
6+
7+
# A more permissive regex that primarily filters out:
8+
# - NULL byte (U+0000)
9+
# - Control characters (U+0001 to U+0008, U+000B to U+000C, U+000E to U+001F)
10+
# - Some potentially problematic characters in various contexts
11+
# But allows:
12+
# - All standard printable ASCII characters
13+
# - Tab (U+0009), Line Feed (U+000A), and Carriage Return (U+000D)
14+
# - A wide range of punctuation, symbols, and Unicode characters for international text
15+
16+
TYPE_SAFE_STR__HTTP__TEXT__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F\x7F]')
17+
18+
class Safe_Str__Http__Text(Safe_Str):
19+
"""
20+
Safe string class for general text content with a 1MB limit.
21+
Allows a wide range of characters suitable for natural language text,
22+
including international characters, while filtering out control characters
23+
and other potentially problematic sequences.
24+
"""
25+
max_length = TYPE_SAFE_STR__TEXT__MAX_LENGTH
26+
regex = TYPE_SAFE_STR__HTTP__TEXT__REGEX
27+
trim_whitespace = True # Trim leading/trailing whitespace
28+
normalize_newlines = True # Option to normalize different newline styles
29+
30+
def __new__(cls, value=None):
31+
32+
if cls.normalize_newlines and value is not None and isinstance(value, str): # Handle newline normalization before passing to parent class
33+
value = value.replace('\r\n', '\n').replace('\r', '\n') # Normalize different newline styles to \n
34+
35+
return super().__new__(cls, value) # Now call the parent implementation

osbot_utils/utils/Functions.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ def function_name(function):
2525
if isinstance(function, types.FunctionType):
2626
return function.__name__
2727

28-
def function_source_code(function):
29-
if isinstance(function, types.FunctionType):
30-
source_code = inspect.getsource(function)
28+
def function_source_code(target):
29+
if isinstance(target, (types.FunctionType, types.MethodType)):
30+
source_code = inspect.getsource(target)
3131
source_code = textwrap.dedent(source_code).strip()
3232
return source_code
33-
elif isinstance(function, str):
34-
return function
33+
elif isinstance(target, str): # todo: see if we really need this logic (or we just return none when "target" is a str)
34+
return target
3535
return None
3636

3737
def get_line_number(function):
@@ -110,4 +110,5 @@ def type_file(target):
110110

111111

112112
function_line_number = get_line_number
113-
method_line_number = get_line_number
113+
method_line_number = get_line_number
114+
method_source_code = function_source_code

osbot_utils/version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
v2.58.0
1+
v2.58.3

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "osbot_utils"
3-
version = "v2.58.0"
3+
version = "v2.58.3"
44
description = "OWASP Security Bot - Utils"
55
authors = ["Dinis Cruz <[email protected]>"]
66
license = "MIT"

tests/unit/helpers/safe_str/test_Safe_Str__Html.py renamed to tests/unit/helpers/safe_str/html/test_Safe_Str__Html.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import re
2-
from unittest import TestCase
3-
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
4-
from osbot_utils.helpers.safe_str.Safe_Str__Html import Safe_Str__Html, TYPE_SAFE_STR__HTML__REGEX, TYPE_SAFE_STR__HTML__MAX_LENGTH
5-
from osbot_utils.utils.Objects import base_types
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
4+
from osbot_utils.helpers.safe_str.http.Safe_Str__Html import Safe_Str__Html, TYPE_SAFE_STR__HTML__REGEX, TYPE_SAFE_STR__HTML__MAX_LENGTH
5+
from osbot_utils.utils.Objects import base_types
66

77

88
class test_Safe_Str__Html(TestCase):
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import pytest
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__Content_Type import Safe_Str__Http__Content_Type
4+
5+
6+
class test_Safe_Str__Http__Content_Type(TestCase):
7+
8+
def test_Safe_Str__Http__ContentType_class(self):
9+
# Standard MIME types
10+
assert Safe_Str__Http__Content_Type('text/html') == 'text/html'
11+
assert Safe_Str__Http__Content_Type('application/json') == 'application/json'
12+
assert Safe_Str__Http__Content_Type('image/jpeg') == 'image/jpeg'
13+
assert Safe_Str__Http__Content_Type('audio/mpeg') == 'audio/mpeg'
14+
assert Safe_Str__Http__Content_Type('video/mp4') == 'video/mp4'
15+
16+
# With parameters
17+
assert Safe_Str__Http__Content_Type('text/html; charset=utf-8') == 'text/html; charset=utf-8'
18+
assert Safe_Str__Http__Content_Type('application/json; charset=utf-8') == 'application/json; charset=utf-8'
19+
assert Safe_Str__Http__Content_Type('text/plain; charset=iso-8859-1') == 'text/plain; charset=iso-8859-1'
20+
21+
# Complex content types
22+
assert Safe_Str__Http__Content_Type('application/vnd.api+json') == 'application/vnd.api+json'
23+
assert Safe_Str__Http__Content_Type('application/ld+json') == 'application/ld+json'
24+
assert Safe_Str__Http__Content_Type('application/vnd.ms-excel') == 'application/vnd.ms-excel'
25+
26+
# Whitespace handling (trim_whitespace = True)
27+
assert Safe_Str__Http__Content_Type(' text/html ') == 'text/html'
28+
assert Safe_Str__Http__Content_Type('application/json; charset=utf-8 ') == 'application/json; charset=utf-8'
29+
30+
# Numeric conversion
31+
assert Safe_Str__Http__Content_Type(12345) == '12345'
32+
33+
# Invalid characters get replaced
34+
assert Safe_Str__Http__Content_Type('text/html<script>') == 'text/html_script_'
35+
assert Safe_Str__Http__Content_Type('text/html:invalid') == 'text/html_invalid'
36+
assert Safe_Str__Http__Content_Type('text@html') == 'text_html'
37+
38+
# Edge cases and exceptions
39+
with pytest.raises(ValueError) as exc_info:
40+
Safe_Str__Http__Content_Type(None)
41+
assert "Value cannot be None when allow_empty is False" in str(exc_info.value)
42+
43+
with pytest.raises(ValueError) as exc_info:
44+
Safe_Str__Http__Content_Type('')
45+
assert "Value cannot be empty when allow_empty is False" in str(exc_info.value)
46+
47+
with pytest.raises(ValueError) as exc_info:
48+
Safe_Str__Http__Content_Type('<?&*^?>') # All invalid chars
49+
assert "Sanitized value consists entirely of '_' characters" in str(exc_info.value)
50+
51+
with pytest.raises(ValueError) as exc_info:
52+
Safe_Str__Http__Content_Type(' ') # Spaces only (will be trimmed)
53+
assert "Value cannot be empty when allow_empty is False" in str(exc_info.value)
54+
55+
with pytest.raises(ValueError) as exc_info:
56+
Safe_Str__Http__Content_Type('a' * 257) # Exceeds max length
57+
assert "Value exceeds maximum length of 256" in str(exc_info.value)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pytest
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__ETag import Safe_Str__Http__ETag
4+
5+
6+
class test_Safe_Str__Http__ETag(TestCase):
7+
8+
def test_Safe_Str__Http__ETag_class(self):
9+
# Strong ETags
10+
assert str(Safe_Str__Http__ETag('"abc123"')) == '"abc123"'
11+
assert str(Safe_Str__Http__ETag('"67890"')) == '"67890"'
12+
assert str(Safe_Str__Http__ETag('"a1b2c3d4e5f6"')) == '"a1b2c3d4e5f6"'
13+
assert str(Safe_Str__Http__ETag('"0123456789abcdef"')) == '"0123456789abcdef"'
14+
15+
# Weak ETags
16+
assert str(Safe_Str__Http__ETag('W/"abc123"')) == 'W/"abc123"'
17+
assert str(Safe_Str__Http__ETag('W/"67890"')) == 'W/"67890"'
18+
assert str(Safe_Str__Http__ETag('W/"a1b2c3d4e5f6"')) == 'W/"a1b2c3d4e5f6"'
19+
20+
# ETags with special characters
21+
assert str(Safe_Str__Http__ETag('"abc-123"')) == '"abc-123"'
22+
assert str(Safe_Str__Http__ETag('"file.txt"')) == '"file.txt"'
23+
assert str(Safe_Str__Http__ETag('"resource:123"')) == '"resource:123"'
24+
assert str(Safe_Str__Http__ETag('"v1.0/api"')) == '"v1.0/api"'
25+
assert str(Safe_Str__Http__ETag('"v1_2"')) == '"v1_2"'
26+
27+
# Without quotes (still valid as HTTP servers can return them)
28+
assert str(Safe_Str__Http__ETag('abc123')) == 'abc123'
29+
assert str(Safe_Str__Http__ETag('67890')) == '67890'
30+
31+
# Whitespace handling (trim_whitespace = True)
32+
assert str(Safe_Str__Http__ETag(' "abc123" ')) == '"abc123"'
33+
assert str(Safe_Str__Http__ETag('W/"abc123" ')) == 'W/"abc123"'
34+
35+
# Numeric conversion
36+
assert str(Safe_Str__Http__ETag(12345)) == '12345'
37+
38+
# Invalid characters get replaced
39+
assert Safe_Str__Http__ETag('"abc<script>123"' ) == '"abc_script_123"'
40+
assert Safe_Str__Http__ETag('"abc!@#$%^&*()123"' ) == '"abc__________123"'
41+
assert Safe_Str__Http__ETag('W/"abc+=[]{};\'\\<>?,123"') == 'W/"abc_____________123"'
42+
assert Safe_Str__Http__ETag('<?&*^?>' ) == '_______'
43+
44+
# empty values
45+
assert Safe_Str__Http__ETag(None ) == ''
46+
assert Safe_Str__Http__ETag('' ) == ''
47+
assert Safe_Str__Http__ETag(' ') == '' # Spaces only (will be trimmed)
48+
49+
with pytest.raises(ValueError) as exc_info:
50+
Safe_Str__Http__ETag('a' * 129) # Exceeds max length
51+
assert "Value exceeds maximum length of 128" in str(exc_info.value)
52+
53+
def test_special_etag_formats(self):
54+
# More complex ETags that servers might generate
55+
assert str(Safe_Str__Http__ETag('"5d8e-3f4-f2340"')) == '"5d8e-3f4-f2340"'
56+
assert str(Safe_Str__Http__ETag('"5d8e_3f4_f2340"')) == '"5d8e_3f4_f2340"'
57+
assert str(Safe_Str__Http__ETag('"a.b.c.d"')) == '"a.b.c.d"'
58+
assert str(Safe_Str__Http__ETag('"v1.2.3:4567"')) == '"v1.2.3:4567"'
59+
60+
# Non-standard but potentially used formats
61+
assert str(Safe_Str__Http__ETag('W/"v1-5d8e3f4f2340"')) == 'W/"v1-5d8e3f4f2340"'
62+
assert str(Safe_Str__Http__ETag('W/"a/b/c:12345"')) == 'W/"a/b/c:12345"'
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pytest
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__Last_Modified import Safe_Str__Http__Last_Modified
4+
5+
6+
class test_Safe_Str__Http__Last_Modified(TestCase):
7+
8+
def test_Safe_Str__Http__LastModified_class(self):
9+
# Standard RFC formats
10+
assert Safe_Str__Http__Last_Modified('Wed, 21 Oct 2023 07:28:00 GMT') == 'Wed, 21 Oct 2023 07:28:00 GMT'
11+
assert Safe_Str__Http__Last_Modified('Mon, 15 May 2024 12:30:45 GMT') == 'Mon, 15 May 2024 12:30:45 GMT'
12+
assert Safe_Str__Http__Last_Modified('Sat, 01 Jan 2022 00:00:00 GMT') == 'Sat, 01 Jan 2022 00:00:00 GMT'
13+
14+
# Different date formats that might be used
15+
assert Safe_Str__Http__Last_Modified('2023-10-21T07:28:00Z') == '2023-10-21T07:28:00Z'
16+
assert Safe_Str__Http__Last_Modified('21 Oct 2023 07:28:00 GMT') == '21 Oct 2023 07:28:00 GMT'
17+
18+
# Whitespace handling (trim_whitespace = True)
19+
assert Safe_Str__Http__Last_Modified(' Wed, 21 Oct 2023 07:28:00 GMT ') == 'Wed, 21 Oct 2023 07:28:00 GMT'
20+
21+
# Invalid characters get replaced
22+
assert Safe_Str__Http__Last_Modified('Wed, 21 Oct 2023<script>') == 'Wed, 21 Oct 2023_script_'
23+
assert Safe_Str__Http__Last_Modified('Wed; 21 Oct 2023') == 'Wed_ 21 Oct 2023'
24+
assert Safe_Str__Http__Last_Modified('Wed, 21/Oct/2023') == 'Wed, 21_Oct_2023'
25+
26+
assert Safe_Str__Http__Last_Modified('<?&*^?>') == '_______'
27+
# allow empty values
28+
assert Safe_Str__Http__Last_Modified(None) == ''
29+
assert Safe_Str__Http__Last_Modified('') == ''
30+
assert Safe_Str__Http__Last_Modified(' ') == '' # Spaces only (will be trimmed)
31+
32+
# Numeric conversion
33+
assert Safe_Str__Http__Last_Modified(20231021) == '20231021'
34+
35+
with pytest.raises(ValueError) as exc_info:
36+
Safe_Str__Http__Last_Modified('a' * 65) # Exceeds max length
37+
assert "Value exceeds maximum length of 64" in str(exc_info.value)

0 commit comments

Comments
 (0)