-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnew_tests.py
More file actions
224 lines (173 loc) · 9.6 KB
/
Copy pathnew_tests.py
File metadata and controls
224 lines (173 loc) · 9.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import os
import sys
import unittest
import time
import json
import re
# Add root folder to path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from base_tool import BaseTool, register_tool, TOOL_REGISTRY
from nlp_engine import ArabicNLPEngine
from system_manager import SystemManager
from llm_agent import LLMAgent
from thinking_engine import ThinkingEngine
class CustomTestTool(BaseTool):
"""A mock tool used to verify dynamic type-annotated parameters schema auto-generation."""
name = "CUSTOM_TEST_TOOL"
description = "أداة تجريبية لفحص الإنشاء التلقائي للمخطط المعلمي."
def execute(self, folder_name: str, delete_on_exit: bool = False, max_files: int = 100):
"""
أداة تجريبية.
folder_name: اسم المجلد المطلوب
delete_on_exit: تحديد ما إذا كان يجب مسح الملفات عند الخروج
max_files: الحد الأقصى للملفات المسموح بها
"""
pass
def call(self, params, **kwargs):
return "success"
class TestAIBobAdvancedSuite(unittest.TestCase):
def test_type_annotated_schema_generation(self):
"""Verify dynamic type-annotated parameters schema auto-generation using inspection."""
tool = CustomTestTool()
schema = tool.parameters
# Verify schema structure
self.assertEqual(schema.get("type"), "object")
properties = schema.get("properties", {})
self.assertIn("folder_name", properties)
self.assertIn("delete_on_exit", properties)
self.assertIn("max_files", properties)
# Verify type mappings
self.assertEqual(properties["folder_name"]["type"], "string")
self.assertEqual(properties["delete_on_exit"]["type"], "boolean")
self.assertEqual(properties["max_files"]["type"], "integer")
# Verify descriptions extracted from docstring
self.assertIn("اسم المجلد", properties["folder_name"]["description"])
self.assertIn("مسح الملفات", properties["delete_on_exit"]["description"])
# Verify required arguments list
required = schema.get("required", [])
self.assertIn("folder_name", required)
self.assertNotIn("delete_on_exit", required) # has default value
self.assertNotIn("max_files", required) # has default value
def test_semantic_cache(self):
"""Verify that identical/similar queries are retrieved instantly from semantic cache."""
agent = LLMAgent(load_now=False) # Disable real model load
agent.is_loaded = False
query_1 = "شيك الرامات عيني فدوة"
query_2 = "شيك الرامات عيني فدوة!"
# Prime the cache manually
normalized_q1 = agent._normalize_key(query_1)
agent.semantic_cache[normalized_q1] = "تم فحص الرامات وطلعت 8 جيجا [RUN_COMMAND: CHECK_RAM]"
# Verify normalization key matches
normalized_q2 = agent._normalize_key(query_2)
self.assertEqual(normalized_q1, normalized_q2)
# Check cache hit in generate_stream
start_time = time.time()
stream_generator = agent.generate_stream(query_2, [])
result = "".join(list(stream_generator))
duration_ms = (time.time() - start_time) * 1000
self.assertIn("CHECK_RAM", result)
# Cache hit should be near-instant (under 200ms)
self.assertLess(duration_ms, 200, f"Cache latency is too high: {duration_ms:.2f}ms")
def test_context_window_pruner(self):
"""Verify that chat history is pruned and compressed dynamically to avoid token overflows."""
agent = LLMAgent(load_now=False)
# Create a mock long conversation history (exceeds token budget)
long_history = []
for i in range(15):
long_history.append({"sender": "user", "text": f"رسالة تجريبية طويلة جداً من المستخدم رقم {i}"})
long_history.append({"sender": "assistant", "text": f"رد البوت التجريبي الطويل جداً على الرسالة رقم {i}"})
# Verify history starts very long
self.assertEqual(len(long_history), 30)
# Run pruner
pruned = agent._prune_history(long_history, max_tokens=200)
# Assert size is reduced
self.assertLess(len(pruned), len(long_history))
# Keep last 4 messages (for continuity)
self.assertEqual(len(pruned), 4)
# Verify the older logs are compacted into active pruned summary
self.assertTrue(hasattr(agent, "_active_pruned_summary"))
self.assertTrue(agent._active_pruned_summary.startswith("سياق الحوار المتقادم:"))
self.assertIn("المستخدم", agent._active_pruned_summary)
# Check formatting injection
prompt = agent.format_chat_prompt("شلونك", long_history)
self.assertIn(agent._active_pruned_summary, prompt)
def test_react_parsing_robustness(self):
"""Verify Qwen ReAct action parsing and traditional command tag extraction."""
engine = ThinkingEngine()
# 1. Test Qwen ReAct Format
qwen_react_output = (
"<think>\nيحتاج فحص المعالج.\n</think>\n"
"Action: CHECK_CPU\n"
"Action Input: {}\n"
)
has_act, act_name, act_input, cleaned = engine.parse_action(qwen_react_output)
self.assertTrue(has_act)
self.assertEqual(act_name, "CHECK_CPU")
self.assertEqual(act_input, "{}")
# 2. Test Traditional Tag Format
traditional_output = "صار عيني هسة أشيكلك المعالج [RUN_COMMAND: CHECK_CPU]"
has_act, act_name, act_input, cleaned = engine.parse_action(traditional_output)
self.assertTrue(has_act)
self.assertEqual(act_name, "CHECK_CPU")
self.assertEqual(act_input, "")
self.assertNotIn("[RUN_COMMAND:", cleaned)
def test_arabic_nlp_engine(self):
"""Verify Iraqi and standard Arabic intent parsing in ArabicNLPEngine."""
nlp = ArabicNLPEngine()
# RAM Check
res = nlp.parse("شيكلي رامات الجهاز فدوة")
self.assertEqual(res["intent"], "SYSTEM_INFO")
self.assertEqual(res["sub_intent"], "ram")
# File Create
res = nlp.parse("سويلي فولدر جديد باسم مشاريع في D:\\")
self.assertEqual(res["intent"], "FILE_CREATE")
self.assertEqual(res["parameters"].get("name"), "مشاريع")
# App Opening
res = nlp.parse("افتح الحاسبة")
self.assertEqual(res["intent"], "EXECUTE_CMD")
self.assertEqual(res["sub_intent"], "open_calc")
def test_system_manager_metrics(self):
"""Verify SystemManager returns active and valid Windows metrics."""
mgr = SystemManager()
cpu = mgr.get_cpu_usage()
self.assertIsInstance(cpu, (int, float))
self.assertTrue(0 <= cpu <= 100)
ram = mgr.get_ram_usage()
self.assertIn("percent", ram)
self.assertTrue(0 <= ram["percent"] <= 100)
temp = mgr.get_cpu_temperature()
self.assertIsInstance(temp, (int, float))
def test_bob_identity_safety_and_no_emojis(self):
"""Verify that Bob's developer identity is safe and polisher removes emojis."""
engine = ThinkingEngine()
# 1. Test identity safety (non-identity queries should not leak identity details)
leaked_response = "هذا البرنامج كتبه مستر بوب وهو بالمرتبة 7 على غيت هاب."
safe_response = engine.identity_leak_filter(leaked_response, "كم الرام؟")
self.assertNotIn("مستر بوب", safe_response)
# 2. Test emoji cleanup (strict user requirement: absolutely no emojis in response)
emoji_response = "صار عيني! هسة راح أنظفلك الكاش 😊🚀💻🔥"
polished_response = engine.format_response(emoji_response)
# Check no emojis exist in final text
# Regex matches emojis/symbols
emojis_found = re.findall(r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF]', polished_response)
self.assertEqual(len(emojis_found), 0, f"Leaked emojis found: {emojis_found}")
def test_math_interceptor(self):
"""Verify that mathematical queries are parsed and evaluated with 100% accuracy."""
nlp = ArabicNLPEngine()
# Test basic addition
self.assertEqual(nlp.parse_math("واحد زائد واحد"), 2)
self.assertEqual(nlp.parse_math("زين واحد زاد واحد كم"), 2)
# Test multiplication
self.assertEqual(nlp.parse_math("شكد 5 في 4"), 20)
# Test division and dialect spelling swaps (th -> t)
self.assertEqual(nlp.parse_math("تلاتة تقسيم تلاثة يطلع شكد؟"), 1)
# Test subtraction with composite tens
self.assertEqual(nlp.parse_math("كم 100 ناقص خمسة وعشرين"), 75)
# Test addition with composite tens
self.assertEqual(nlp.parse_math("خمسة وعشرين زائد خمسة وعشرين"), 50)
# Test normal numbers math
self.assertEqual(nlp.parse_math("10 + 20"), 30)
# Non-math query should return None
self.assertIsNone(nlp.parse_math("كم الرام عيني؟"))
if __name__ == "__main__":
unittest.main()