extend havoc/noise with completely random sentences

st0ffregen · st0ffregen · commit f300a5070482 · 2025-07-18T00:17:04.000+02:00
diff --git a/checker/src/checker.py b/checker/src/checker.py
@@ -358,6 +358,14 @@ def aes_cfb8_encrypt(plaintext, key, iv):
 def random_string(length=12):
     return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
 
+def random_ascii_char():
+    excluded = "\"'\\ "
+    chars = ''.join(chr(i) for i in range(32, 127) if chr(i) not in excluded)
+    return random.choice(chars)
+
+def get_ascii_chars():
+    excluded = "\"'\\ "
+    return ''.join(chr(i) for i in range(32, 127) if chr(i) not in excluded)
 
 class JwtAuth(Auth):
     def __init__(self, token):
@@ -517,12 +525,17 @@ async def exploit_embed(
 
     imported_chat_session_id = await conn.import_session(flag_hint_session_id)
 
-    search_flag = "A" * SUPPOSED_FLAG_LENGTH
+    # test various mutations of search flag
+    search_character = random_ascii_char()
+    search_flag = search_character * SUPPOSED_FLAG_LENGTH
     top_k = random.randint(80, 120)
     found_docs = await conn.search_collection(imported_chat_session_id, search_flag, top_k=top_k)
 
-    chars = "+-<>[].,abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ/=#@"
-    chars = chars + "=" * (SUPPOSED_FLAG_LENGTH - len(chars))
+    # test various mutations of the dict look-up embedding characters
+    chars = get_ascii_chars()
+    chars = ''.join(random.sample(chars, len(chars))) # re-order characters randomly
+    padding_character = random_ascii_char()
+    chars = chars + padding_character * (SUPPOSED_FLAG_LENGTH - len(chars))
     char_embeddings = {}
 
     embedding_dim = 3
@@ -578,10 +591,10 @@ async def putnoise_embed(
     _, chat_session_id = await conn.register_user(username, password)
     await conn.login_user(username, password)
 
-    sentence_id = random.randint(0, len(checker_sentences)-1)
-    doc_id, _, _ = await conn.embed_text(checker_sentences[sentence_id], chat_session_id)
+    sentence = ''.join(random.choices(get_ascii_chars(), k=SUPPOSED_FLAG_LENGTH))
+    doc_id, _, _ = await conn.embed_text(sentence, chat_session_id)
 
-    await db.set("userdata", {"username": username, "password": password, "sentence": checker_sentences[sentence_id], "chat_session_id": chat_session_id})
+    await db.set("userdata", {"username": username, "password": password, "sentence": sentence, "chat_session_id": chat_session_id})
 
 
 @checker.getnoise(0)
@@ -628,12 +641,7 @@ async def havoc_search_collections(
 
     imported_session_id = await conn.import_session(first_chat_session_id)
 
-    second_sentence_id = first_sentence_id
-    # ensure that the second sentence is different from the first
-    while second_sentence_id == first_sentence_id:
-        second_sentence_id = random.randint(0, len(checker_sentences)-1)
-
-    second_sentence = checker_sentences[second_sentence_id]
+    second_sentence = ''.join(random.choices(get_ascii_chars(), k=SUPPOSED_FLAG_LENGTH))
     second_doc_id, _, _ = await conn.embed_text(second_sentence, imported_session_id)
 
     first_doc = await conn.search_collection(imported_session_id, second_sentence)
@@ -677,7 +685,8 @@ async def havoc_norm(
     sentence_id = random.randint(0, len(checker_sentences) - 2)
     sentences = [
         checker_sentences[sentence_id],
-        checker_sentences[sentence_id + 1]
+        checker_sentences[sentence_id + 1],
+        ''.join(random.choices(get_ascii_chars(), k=SUPPOSED_FLAG_LENGTH))
     ]
     for sentence in sentences:
         doc_id, embedding, embed_norm = await conn.embed_text(sentence, chat_session_id)