-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_reader.py
147 lines (120 loc) · 4.52 KB
/
test_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
Test script for the reader package.
Tests all the fixed functions to ensure they work correctly.
"""
import os
import sys
import logging
from dotenv import load_dotenv
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Add the project root to the Python path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# Import Django setup to initialize Django environment
from tagwiseapp.reader.django_setup import setup_django
setup_django()
# Import the fixed functions
from tagwiseapp.reader.html_utils import clean_html_content, MAX_CONTENT_LENGTH
from tagwiseapp.reader.category_matcher import (
find_similar_category,
find_similar_tag,
get_existing_categories,
get_existing_tags,
match_categories_and_tags
)
from tagwiseapp.reader.category_prompt_factory import CategoryPromptFactory
from tagwiseapp.reader.settings import get_model_config
# Test HTML content
TEST_HTML = """
<!DOCTYPE html>
<html>
<head>
<title>Test Page</title>
</head>
<body>
<h1>Test Content</h1>
<p>This is a test paragraph for HTML cleaning.</p>
<script>console.log("This should be removed");</script>
</body>
</html>
"""
def test_html_utils():
"""Test HTML utility functions"""
logger.info("Testing HTML utility functions...")
# Test clean_html_content
cleaned_text = clean_html_content(TEST_HTML)
logger.info(f"Cleaned HTML: {cleaned_text}")
# Test MAX_CONTENT_LENGTH
logger.info(f"MAX_CONTENT_LENGTH: {MAX_CONTENT_LENGTH}")
return "HTML utils test passed" if cleaned_text else "HTML utils test failed"
def test_category_matcher():
"""Test category matcher functions"""
logger.info("Testing category matcher functions...")
# Test get_existing_categories
categories = get_existing_categories()
logger.info(f"Found {len(categories)} categories")
# Test get_existing_tags
tags = get_existing_tags()
logger.info(f"Found {len(tags)} tags")
# Test find_similar_category
test_category = "Technology"
matched_category = find_similar_category(test_category, categories, is_main_category=True, accept_new=True)
logger.info(f"Matched category for '{test_category}': {matched_category}")
# Test find_similar_tag (if tags exist)
if tags:
test_tag = tags[0]['name']
matched_tag = find_similar_tag(test_tag, tags, accept_new=True)
logger.info(f"Matched tag for '{test_tag}': {matched_tag}")
return "Category matcher test passed"
def test_category_prompt_factory():
"""Test category prompt factory"""
logger.info("Testing category prompt factory...")
# Test create_category_prompt
prompt = CategoryPromptFactory.create_category_prompt(
content="This is a test content",
url="https://example.com",
existing_title="Test Title",
existing_description="Test Description",
existing_categories=get_existing_categories(),
existing_tags=get_existing_tags()
)
logger.info(f"Generated prompt length: {len(prompt)}")
# Test create_screenshot_category_prompt
screenshot_prompt = CategoryPromptFactory.create_screenshot_category_prompt(
url="https://example.com",
existing_title="Test Title",
existing_description="Test Description",
existing_categories=get_existing_categories(),
existing_tags=get_existing_tags()
)
logger.info(f"Generated screenshot prompt length: {len(screenshot_prompt)}")
return "Category prompt factory test passed" if prompt and screenshot_prompt else "Category prompt factory test failed"
def test_settings():
"""Test settings functions"""
logger.info("Testing settings functions...")
# Test get_model_config
config = get_model_config()
logger.info(f"Model config: {config}")
return "Settings test passed" if config else "Settings test failed"
if __name__ == "__main__":
# Load environment variables
load_dotenv()
# Run tests
results = []
results.append(test_html_utils())
results.append(test_category_matcher())
results.append(test_category_prompt_factory())
results.append(test_settings())
# Print results
logger.info("Test results:")
for result in results:
logger.info(f"- {result}")
# Check if all tests passed
if all(result.endswith("passed") for result in results):
logger.info("All tests passed!")
sys.exit(0)
else:
logger.error("Some tests failed!")
sys.exit(1)