Skip to content

Commit 99b66c4

Browse files
committed
added Safe_Str__Http__Text
1 parent e39fc12 commit 99b66c4

6 files changed

+195
-46
lines changed

osbot_utils/helpers/safe_str/http/Safe_Str__Http__ContentType.py renamed to osbot_utils/helpers/safe_str/http/Safe_Str__Http__Content_Type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
TYPE_SAFE_STR__HTTP__CONTENT_TYPE__REGEX = re.compile(r'[^a-zA-Z0-9/\-+.;= ]')
55
TYPE_SAFE_STR__HTTP__CONTENT_TYPE__MAX_LENGTH = 256
66

7-
class Safe_Str__Http__ContentType(Safe_Str):
7+
class Safe_Str__Http__Content_Type(Safe_Str):
88
regex = TYPE_SAFE_STR__HTTP__CONTENT_TYPE__REGEX
99
max_length = TYPE_SAFE_STR__HTTP__CONTENT_TYPE__MAX_LENGTH
1010
allow_empty = False

osbot_utils/helpers/safe_str/http/Safe_Str__Http__LastModified.py renamed to osbot_utils/helpers/safe_str/http/Safe_Str__Http__Last_Modified.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
TYPE_SAFE_STR__HTTP__LAST_MODIFIED__REGEX = re.compile(r'[^a-zA-Z0-9:, -]')
55
TYPE_SAFE_STR__HTTP__LAST_MODIFIED__MAX_LENGTH = 64
66

7-
class Safe_Str__Http__LastModified(Safe_Str):
7+
class Safe_Str__Http__Last_Modified(Safe_Str):
88
regex = TYPE_SAFE_STR__HTTP__LAST_MODIFIED__REGEX
99
max_length = TYPE_SAFE_STR__HTTP__LAST_MODIFIED__MAX_LENGTH
1010
allow_empty = False
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import re
2+
from osbot_utils.helpers.safe_str.Safe_Str import Safe_Str
3+
4+
5+
TYPE_SAFE_STR__TEXT__MAX_LENGTH = 1048576 # Define the size constant - 1 megabyte in bytes
6+
7+
# A more permissive regex that primarily filters out:
8+
# - NULL byte (U+0000)
9+
# - Control characters (U+0001 to U+0008, U+000B to U+000C, U+000E to U+001F)
10+
# - Some potentially problematic characters in various contexts
11+
# But allows:
12+
# - All standard printable ASCII characters
13+
# - Tab (U+0009), Line Feed (U+000A), and Carriage Return (U+000D)
14+
# - A wide range of punctuation, symbols, and Unicode characters for international text
15+
16+
TYPE_SAFE_STR__HTTP__TEXT__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F\x7F]')
17+
18+
class Safe_Str__Http__Text(Safe_Str):
19+
"""
20+
Safe string class for general text content with a 1MB limit.
21+
Allows a wide range of characters suitable for natural language text,
22+
including international characters, while filtering out control characters
23+
and other potentially problematic sequences.
24+
"""
25+
max_length = TYPE_SAFE_STR__TEXT__MAX_LENGTH
26+
regex = TYPE_SAFE_STR__HTTP__TEXT__REGEX
27+
trim_whitespace = True # Trim leading/trailing whitespace
28+
normalize_newlines = True # Option to normalize different newline styles
29+
30+
def __new__(cls, value=None):
31+
32+
if cls.normalize_newlines and value is not None and isinstance(value, str): # Handle newline normalization before passing to parent class
33+
value = value.replace('\r\n', '\n').replace('\r', '\n') # Normalize different newline styles to \n
34+
35+
return super().__new__(cls, value) # Now call the parent implementation
Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,57 @@
11
import pytest
2-
from unittest import TestCase
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__Content_Type import Safe_Str__Http__Content_Type
34

4-
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__ContentType import Safe_Str__Http__ContentType
55

6-
7-
class test_Safe_Str__Http__ContentType(TestCase):
6+
class test_Safe_Str__Http__Content_Type(TestCase):
87

98
def test_Safe_Str__Http__ContentType_class(self):
109
# Standard MIME types
11-
assert Safe_Str__Http__ContentType('text/html' ) == 'text/html'
12-
assert Safe_Str__Http__ContentType('application/json') == 'application/json'
13-
assert Safe_Str__Http__ContentType('image/jpeg' ) == 'image/jpeg'
14-
assert Safe_Str__Http__ContentType('audio/mpeg' ) == 'audio/mpeg'
15-
assert Safe_Str__Http__ContentType('video/mp4' ) == 'video/mp4'
10+
assert Safe_Str__Http__Content_Type('text/html') == 'text/html'
11+
assert Safe_Str__Http__Content_Type('application/json') == 'application/json'
12+
assert Safe_Str__Http__Content_Type('image/jpeg') == 'image/jpeg'
13+
assert Safe_Str__Http__Content_Type('audio/mpeg') == 'audio/mpeg'
14+
assert Safe_Str__Http__Content_Type('video/mp4') == 'video/mp4'
1615

1716
# With parameters
18-
assert Safe_Str__Http__ContentType('text/html; charset=utf-8' ) == 'text/html; charset=utf-8'
19-
assert Safe_Str__Http__ContentType('application/json; charset=utf-8') == 'application/json; charset=utf-8'
20-
assert Safe_Str__Http__ContentType('text/plain; charset=iso-8859-1' ) == 'text/plain; charset=iso-8859-1'
17+
assert Safe_Str__Http__Content_Type('text/html; charset=utf-8') == 'text/html; charset=utf-8'
18+
assert Safe_Str__Http__Content_Type('application/json; charset=utf-8') == 'application/json; charset=utf-8'
19+
assert Safe_Str__Http__Content_Type('text/plain; charset=iso-8859-1') == 'text/plain; charset=iso-8859-1'
2120

2221
# Complex content types
23-
assert Safe_Str__Http__ContentType('application/vnd.api+json' ) == 'application/vnd.api+json'
24-
assert Safe_Str__Http__ContentType('application/ld+json' ) == 'application/ld+json'
25-
assert Safe_Str__Http__ContentType('application/vnd.ms-excel' ) == 'application/vnd.ms-excel'
22+
assert Safe_Str__Http__Content_Type('application/vnd.api+json') == 'application/vnd.api+json'
23+
assert Safe_Str__Http__Content_Type('application/ld+json') == 'application/ld+json'
24+
assert Safe_Str__Http__Content_Type('application/vnd.ms-excel') == 'application/vnd.ms-excel'
2625

2726
# Whitespace handling (trim_whitespace = True)
28-
assert Safe_Str__Http__ContentType(' text/html ' ) == 'text/html'
29-
assert Safe_Str__Http__ContentType('application/json; charset=utf-8 ') == 'application/json; charset=utf-8'
27+
assert Safe_Str__Http__Content_Type(' text/html ') == 'text/html'
28+
assert Safe_Str__Http__Content_Type('application/json; charset=utf-8 ') == 'application/json; charset=utf-8'
3029

3130
# Numeric conversion
32-
assert Safe_Str__Http__ContentType(12345) == '12345'
31+
assert Safe_Str__Http__Content_Type(12345) == '12345'
3332

3433
# Invalid characters get replaced
35-
assert Safe_Str__Http__ContentType('text/html<script>' ) == 'text/html_script_'
36-
assert Safe_Str__Http__ContentType('text/html:invalid' ) == 'text/html_invalid'
37-
assert Safe_Str__Http__ContentType('text@html' ) == 'text_html'
34+
assert Safe_Str__Http__Content_Type('text/html<script>') == 'text/html_script_'
35+
assert Safe_Str__Http__Content_Type('text/html:invalid') == 'text/html_invalid'
36+
assert Safe_Str__Http__Content_Type('text@html') == 'text_html'
3837

3938
# Edge cases and exceptions
4039
with pytest.raises(ValueError) as exc_info:
41-
Safe_Str__Http__ContentType(None)
40+
Safe_Str__Http__Content_Type(None)
4241
assert "Value cannot be None when allow_empty is False" in str(exc_info.value)
4342

4443
with pytest.raises(ValueError) as exc_info:
45-
Safe_Str__Http__ContentType('')
44+
Safe_Str__Http__Content_Type('')
4645
assert "Value cannot be empty when allow_empty is False" in str(exc_info.value)
4746

4847
with pytest.raises(ValueError) as exc_info:
49-
Safe_Str__Http__ContentType('<?&*^?>') # All invalid chars
48+
Safe_Str__Http__Content_Type('<?&*^?>') # All invalid chars
5049
assert "Sanitized value consists entirely of '_' characters" in str(exc_info.value)
5150

5251
with pytest.raises(ValueError) as exc_info:
53-
Safe_Str__Http__ContentType(' ') # Spaces only (will be trimmed)
52+
Safe_Str__Http__Content_Type(' ') # Spaces only (will be trimmed)
5453
assert "Value cannot be empty when allow_empty is False" in str(exc_info.value)
5554

5655
with pytest.raises(ValueError) as exc_info:
57-
Safe_Str__Http__ContentType('a' * 257) # Exceeds max length
56+
Safe_Str__Http__Content_Type('a' * 257) # Exceeds max length
5857
assert "Value exceeds maximum length of 256" in str(exc_info.value)
Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,48 @@
11
import pytest
2-
from unittest import TestCase
3-
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__LastModified import Safe_Str__Http__LastModified
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__Last_Modified import Safe_Str__Http__Last_Modified
44

55

6-
class test_Safe_Str__Http__LastModified(TestCase):
6+
class test_Safe_Str__Http__Last_Modified(TestCase):
77

88
def test_Safe_Str__Http__LastModified_class(self):
99
# Standard RFC formats
10-
assert Safe_Str__Http__LastModified('Wed, 21 Oct 2023 07:28:00 GMT') == 'Wed, 21 Oct 2023 07:28:00 GMT'
11-
assert Safe_Str__Http__LastModified('Mon, 15 May 2024 12:30:45 GMT') == 'Mon, 15 May 2024 12:30:45 GMT'
12-
assert Safe_Str__Http__LastModified('Sat, 01 Jan 2022 00:00:00 GMT') == 'Sat, 01 Jan 2022 00:00:00 GMT'
10+
assert Safe_Str__Http__Last_Modified('Wed, 21 Oct 2023 07:28:00 GMT') == 'Wed, 21 Oct 2023 07:28:00 GMT'
11+
assert Safe_Str__Http__Last_Modified('Mon, 15 May 2024 12:30:45 GMT') == 'Mon, 15 May 2024 12:30:45 GMT'
12+
assert Safe_Str__Http__Last_Modified('Sat, 01 Jan 2022 00:00:00 GMT') == 'Sat, 01 Jan 2022 00:00:00 GMT'
1313

1414
# Different date formats that might be used
15-
assert Safe_Str__Http__LastModified('2023-10-21T07:28:00Z') == '2023-10-21T07:28:00Z'
16-
assert Safe_Str__Http__LastModified('21 Oct 2023 07:28:00 GMT') == '21 Oct 2023 07:28:00 GMT'
15+
assert Safe_Str__Http__Last_Modified('2023-10-21T07:28:00Z') == '2023-10-21T07:28:00Z'
16+
assert Safe_Str__Http__Last_Modified('21 Oct 2023 07:28:00 GMT') == '21 Oct 2023 07:28:00 GMT'
1717

1818
# Whitespace handling (trim_whitespace = True)
19-
assert Safe_Str__Http__LastModified(' Wed, 21 Oct 2023 07:28:00 GMT ') == 'Wed, 21 Oct 2023 07:28:00 GMT'
19+
assert Safe_Str__Http__Last_Modified(' Wed, 21 Oct 2023 07:28:00 GMT ') == 'Wed, 21 Oct 2023 07:28:00 GMT'
2020

2121
# Invalid characters get replaced
22-
assert Safe_Str__Http__LastModified('Wed, 21 Oct 2023<script>') == 'Wed, 21 Oct 2023_script_'
23-
assert Safe_Str__Http__LastModified('Wed; 21 Oct 2023') == 'Wed_ 21 Oct 2023'
24-
assert Safe_Str__Http__LastModified('Wed, 21/Oct/2023') == 'Wed, 21_Oct_2023'
22+
assert Safe_Str__Http__Last_Modified('Wed, 21 Oct 2023<script>') == 'Wed, 21 Oct 2023_script_'
23+
assert Safe_Str__Http__Last_Modified('Wed; 21 Oct 2023') == 'Wed_ 21 Oct 2023'
24+
assert Safe_Str__Http__Last_Modified('Wed, 21/Oct/2023') == 'Wed, 21_Oct_2023'
2525

2626
# Numeric conversion
27-
assert Safe_Str__Http__LastModified(20231021) == '20231021'
27+
assert Safe_Str__Http__Last_Modified(20231021) == '20231021'
2828

2929
# Edge cases and exceptions
3030
with pytest.raises(ValueError) as exc_info:
31-
Safe_Str__Http__LastModified(None)
31+
Safe_Str__Http__Last_Modified(None)
3232
assert "Value cannot be None when allow_empty is False" in str(exc_info.value)
3333

3434
with pytest.raises(ValueError) as exc_info:
35-
Safe_Str__Http__LastModified('')
35+
Safe_Str__Http__Last_Modified('')
3636
assert "Value cannot be empty when allow_empty is False" in str(exc_info.value)
3737

3838
with pytest.raises(ValueError) as exc_info:
39-
Safe_Str__Http__LastModified('<?&*^?>') # All invalid chars
39+
Safe_Str__Http__Last_Modified('<?&*^?>') # All invalid chars
4040
assert "Sanitized value consists entirely of '_' characters" in str(exc_info.value)
4141

4242
with pytest.raises(ValueError) as exc_info:
43-
Safe_Str__Http__LastModified(' ') # Spaces only (will be trimmed)
43+
Safe_Str__Http__Last_Modified(' ') # Spaces only (will be trimmed)
4444
assert "Value cannot be empty when allow_empty is False" in str(exc_info.value)
4545

4646
with pytest.raises(ValueError) as exc_info:
47-
Safe_Str__Http__LastModified('a' * 65) # Exceeds max length
47+
Safe_Str__Http__Last_Modified('a' * 65) # Exceeds max length
4848
assert "Value exceeds maximum length of 64" in str(exc_info.value)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import pytest
2+
from unittest import TestCase
3+
from osbot_utils.helpers.safe_str.http.Safe_Str__Http__Text import Safe_Str__Http__Text, \
4+
TYPE_SAFE_STR__HTTP__TEXT__REGEX, TYPE_SAFE_STR__TEXT__MAX_LENGTH
5+
from osbot_utils.utils.Str import trim
6+
7+
8+
class test_Safe_Str__Http__Text(TestCase):
9+
10+
def test_Safe_Str__Http__Text_basic(self):
11+
# Basic text with various allowed characters
12+
assert str(Safe_Str__Http__Text("Hello, world!" )) == "Hello, world!"
13+
assert str(Safe_Str__Http__Text("This is a test. 123" )) == "This is a test. 123"
14+
assert str(Safe_Str__Http__Text("Line 1\nLine 2" )) == "Line 1\nLine 2"
15+
16+
# Text with various punctuation and special characters
17+
assert str(Safe_Str__Http__Text("Symbols: !@#$%^&*()_+-=[]{}|;':\",./<>?")) == "Symbols: !@#$%^&*()_+-=[]{}|;':\",./<>?"
18+
assert str(Safe_Str__Http__Text("Math: 5 + 5 = 10, 10 * 10 = 100")) == "Math: 5 + 5 = 10, 10 * 10 = 100"
19+
assert str(Safe_Str__Http__Text("Currency: $100, €50, £75, ¥500")) == "Currency: $100, €50, £75, ¥500"
20+
21+
# Whitespace handling (trim_whitespace = True)
22+
assert str(Safe_Str__Http__Text(" Hello ")) == "Hello"
23+
assert str(Safe_Str__Http__Text("\tTabbed text\t")) == "Tabbed text"
24+
assert str(Safe_Str__Http__Text("\nText with newlines\n")) == "Text with newlines"
25+
26+
# Newline normalization (normalize_newlines = True)
27+
assert str(Safe_Str__Http__Text("Windows\r\nLine\r\nBreaks" )) == "Windows\nLine\nBreaks"
28+
assert str(Safe_Str__Http__Text("Mac\rLine\rBreaks" )) == "Mac\nLine\nBreaks"
29+
assert str(Safe_Str__Http__Text("Mixed\nLine\r\nBreaks\r" )) == "Mixed\nLine\nBreaks"
30+
31+
# Empty and None handling (allow_empty = True)
32+
assert str(Safe_Str__Http__Text("")) == ""
33+
assert str(Safe_Str__Http__Text(None)) == ""
34+
35+
# Unicode text
36+
assert str(Safe_Str__Http__Text("Unicode: ☺ ♥ ★ ☆ ☂ ☃ ♫ ♪")) == "Unicode: ☺ ♥ ★ ☆ ☂ ☃ ♫ ♪"
37+
assert str(Safe_Str__Http__Text("Languages: English, Español, Français, Deutsch, 日本語, 中文, Русский")) == "Languages: English, Español, Français, Deutsch, 日本語, 中文, Русский"
38+
39+
def test_Safe_Str__Http__Text_control_chars(self):
40+
# Text with control characters (should be filtered out)
41+
input_with_control = "Text with control chars: \x00\x01\x02\x03"
42+
expected = "Text with control chars: ____"
43+
assert str(Safe_Str__Http__Text(input_with_control)) == expected
44+
45+
# Text with other problematic sequences
46+
input_with_escape = "Text with escape sequences: \x1B[31mRed\x1B[0m" # ANSI color codes
47+
expected = "Text with escape sequences: _[31mRed_[0m"
48+
assert str(Safe_Str__Http__Text(input_with_escape)) == expected
49+
50+
def test_Safe_Str__Http__Text_length_limits(self):
51+
# Text at the limit
52+
text_at_limit = "a" * TYPE_SAFE_STR__TEXT__MAX_LENGTH
53+
assert len(str(Safe_Str__Http__Text(text_at_limit))) == TYPE_SAFE_STR__TEXT__MAX_LENGTH
54+
55+
# Text exceeding the limit
56+
with pytest.raises(ValueError) as exc_info:
57+
Safe_Str__Http__Text("a" * (TYPE_SAFE_STR__TEXT__MAX_LENGTH + 1))
58+
assert f"Value exceeds maximum length of {TYPE_SAFE_STR__TEXT__MAX_LENGTH}" in str(exc_info.value)
59+
60+
def test_Safe_Str__Http__Text_code_snippets(self):
61+
# Python code snippet
62+
python_code = """def hello_world():
63+
print("Hello, world!")
64+
return True
65+
"""
66+
assert str(Safe_Str__Http__Text(python_code)) == trim(python_code)
67+
68+
# JavaScript code snippet
69+
js_code = """function helloWorld() {
70+
console.log("Hello, world!");
71+
return true;
72+
}
73+
"""
74+
assert str(Safe_Str__Http__Text(js_code)) == trim(js_code)
75+
76+
# HTML snippet
77+
html_snippet = """<div class="container">
78+
<h1>Hello, world!</h1>
79+
<p>This is a paragraph.</p>
80+
</div>
81+
"""
82+
assert str(Safe_Str__Http__Text(html_snippet)) == trim(html_snippet)
83+
84+
# SQL snippet
85+
sql_snippet = """SELECT id, name, email
86+
FROM users
87+
WHERE status = 'active'
88+
ORDER BY name ASC;
89+
"""
90+
assert str(Safe_Str__Http__Text(sql_snippet)) == trim(sql_snippet)
91+
92+
def test_Safe_Str__Http__Text_multiline(self):
93+
# Multiline text with various formatting
94+
multiline = """# Title
95+
96+
## Subtitle
97+
98+
This is a paragraph with *emphasis* and **strong** text.
99+
100+
- List item 1
101+
- List item 2
102+
- List item 3
103+
104+
1. Numbered item 1
105+
2. Numbered item 2
106+
3. Numbered item 3
107+
108+
> This is a blockquote.
109+
110+
```python
111+
def hello():
112+
print("Hello, world!")
113+
```
114+
"""
115+
assert str(Safe_Str__Http__Text(multiline)) == multiline.strip() # because trim_whitespace=True

0 commit comments

Comments
 (0)