@@ -358,6 +358,14 @@ def aes_cfb8_encrypt(plaintext, key, iv):
358358def random_string (length = 12 ):
359359 return '' .join (random .choices (string .ascii_letters + string .digits , k = length ))
360360
361+ def random_ascii_char ():
362+ excluded = "\" '\\ "
363+ chars = '' .join (chr (i ) for i in range (32 , 127 ) if chr (i ) not in excluded )
364+ return random .choice (chars )
365+
366+ def get_ascii_chars ():
367+ excluded = "\" '\\ "
368+ return '' .join (chr (i ) for i in range (32 , 127 ) if chr (i ) not in excluded )
361369
362370class JwtAuth (Auth ):
363371 def __init__ (self , token ):
@@ -517,12 +525,17 @@ async def exploit_embed(
517525
518526 imported_chat_session_id = await conn .import_session (flag_hint_session_id )
519527
520- search_flag = "A" * SUPPOSED_FLAG_LENGTH
528+ # test various mutations of search flag
529+ search_character = random_ascii_char ()
530+ search_flag = search_character * SUPPOSED_FLAG_LENGTH
521531 top_k = random .randint (80 , 120 )
522532 found_docs = await conn .search_collection (imported_chat_session_id , search_flag , top_k = top_k )
523533
524- chars = "+-<>[].,abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ/=#@"
525- chars = chars + "=" * (SUPPOSED_FLAG_LENGTH - len (chars ))
534+ # test various mutations of the dict look-up embedding characters
535+ chars = get_ascii_chars ()
536+ chars = '' .join (random .sample (chars , len (chars ))) # re-order characters randomly
537+ padding_character = random_ascii_char ()
538+ chars = chars + padding_character * (SUPPOSED_FLAG_LENGTH - len (chars ))
526539 char_embeddings = {}
527540
528541 embedding_dim = 3
@@ -578,10 +591,10 @@ async def putnoise_embed(
578591 _ , chat_session_id = await conn .register_user (username , password )
579592 await conn .login_user (username , password )
580593
581- sentence_id = random .randint ( 0 , len ( checker_sentences ) - 1 )
582- doc_id , _ , _ = await conn .embed_text (checker_sentences [ sentence_id ] , chat_session_id )
594+ sentence = '' . join ( random .choices ( get_ascii_chars (), k = SUPPOSED_FLAG_LENGTH ) )
595+ doc_id , _ , _ = await conn .embed_text (sentence , chat_session_id )
583596
584- await db .set ("userdata" , {"username" : username , "password" : password , "sentence" : checker_sentences [ sentence_id ] , "chat_session_id" : chat_session_id })
597+ await db .set ("userdata" , {"username" : username , "password" : password , "sentence" : sentence , "chat_session_id" : chat_session_id })
585598
586599
587600@checker .getnoise (0 )
@@ -628,12 +641,7 @@ async def havoc_search_collections(
628641
629642 imported_session_id = await conn .import_session (first_chat_session_id )
630643
631- second_sentence_id = first_sentence_id
632- # ensure that the second sentence is different from the first
633- while second_sentence_id == first_sentence_id :
634- second_sentence_id = random .randint (0 , len (checker_sentences )- 1 )
635-
636- second_sentence = checker_sentences [second_sentence_id ]
644+ second_sentence = '' .join (random .choices (get_ascii_chars (), k = SUPPOSED_FLAG_LENGTH ))
637645 second_doc_id , _ , _ = await conn .embed_text (second_sentence , imported_session_id )
638646
639647 first_doc = await conn .search_collection (imported_session_id , second_sentence )
@@ -677,7 +685,8 @@ async def havoc_norm(
677685 sentence_id = random .randint (0 , len (checker_sentences ) - 2 )
678686 sentences = [
679687 checker_sentences [sentence_id ],
680- checker_sentences [sentence_id + 1 ]
688+ checker_sentences [sentence_id + 1 ],
689+ '' .join (random .choices (get_ascii_chars (), k = SUPPOSED_FLAG_LENGTH ))
681690 ]
682691 for sentence in sentences :
683692 doc_id , embedding , embed_norm = await conn .embed_text (sentence , chat_session_id )
0 commit comments