@@ -262,7 +262,7 @@ def test_text_inclusion_with_char_level_longest_common_subsequence(self) -> None
262262 results ["char_level_longest_common_subsequence" ],
263263 results ["word_level_longest_common_subsequence" ],
264264 ):
265- self .assertGreaterEqual (char_lcs , word_lcs )
265+ self .assertGreaterEqual (char_lcs , word_lcs [ 0 ] )
266266
267267 def test_text_inclusion_augmented_output (self ) -> None :
268268 analysis_input = TextInclusionAnalysisInput (
@@ -463,16 +463,26 @@ def test_word_level_longest_common_susequence_match(self) -> None:
463463 + ("t" * 130 )
464464 )
465465
466- self .assertEqual (_word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 ), 2 )
467- self .assertEqual (_word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s1 ), 5 )
466+ self .assertEqual (
467+ _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 )[0 ], 2
468+ )
469+ self .assertEqual (
470+ _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s1 )[0 ], 5
471+ )
468472
469473 s1 = "a b a"
470474 s2 = "c a b a d"
471475 s3 = "a d b a"
472476
473- self .assertEqual (_word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 ), 3 )
474- self .assertEqual (_word_level_longest_common_subsequence_helper (s1 = s2 , s2 = s3 ), 3 )
475- self .assertEqual (_word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s3 ), 3 )
477+ self .assertEqual (
478+ _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 ), (3 , "a b a" )
479+ )
480+ self .assertEqual (
481+ _word_level_longest_common_subsequence_helper (s1 = s2 , s2 = s3 ), (3 , "a b a" )
482+ )
483+ self .assertEqual (
484+ _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s3 ), (3 , "a b a" )
485+ )
476486
477487 def test_char_level_longest_common_susequence_match (self ) -> None :
478488 s1 = ("w" * 5 ) + ("t" * 16 ) + ("b" * 5 ) + ("t" * 15 )
@@ -517,11 +527,15 @@ def test_longest_common_susequence_match_autojunk(self) -> None:
517527 s2 = ("x " * 50 ) + ("t " * 160 ) + ("c " * 150 ) + ("t " * 200 ) + "end2"
518528
519529 self .assertEqual (
520- _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 , autojunk = False ),
530+ _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 , autojunk = False )[
531+ 0
532+ ],
521533 260 ,
522534 )
523535 self .assertEqual (
524- _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 , autojunk = True ),
536+ _word_level_longest_common_subsequence_helper (s1 = s1 , s2 = s2 , autojunk = True )[
537+ 0
538+ ],
525539 0 ,
526540 )
527541
@@ -608,3 +622,236 @@ def test_analysis_with_remove_consecutive_whitespace(self) -> None:
608622 results_basic ["edit_similarity_score" ].iloc [0 ],
609623 results_cleaned ["edit_similarity_score" ].iloc [0 ],
610624 )
625+
626+ def test_format_single_word_level_lcs_result (self ) -> None :
627+ """Test format_single_word_level_lcs_result returns correct dictionary structure."""
628+ analysis_outputs = self .analysis_node .run_analysis ()
629+ self .assertIsInstance (analysis_outputs , TextInclusionAnalysisNodeOutput )
630+
631+ # Get the augmented row data
632+ augmented_row = analysis_outputs .augmented_output_dataset .iloc [- 1 ].to_dict ()
633+
634+ # Call format_single_word_level_lcs_result directly
635+ result = analysis_outputs .format_single_word_level_lcs_result (
636+ num_matched_words = 3 ,
637+ matched_string = "dolorem ipsum quia" ,
638+ augmented_row = augmented_row ,
639+ analysis_input = self .analysis_input ,
640+ )
641+
642+ # Verify the result dictionary has the expected keys
643+ self .assertIn ("Count of matched words" , result .keys ())
644+ self .assertIn ("Length of matched words" , result .keys ())
645+ self .assertIn ("Matched consecutive sequence" , result .keys ())
646+ self .assertIn ("% target extracted" , result .keys ())
647+ self .assertIn ("prompt" , result .keys ())
648+ self .assertIn ("output_text" , result .keys ())
649+ self .assertIn ("target" , result .keys ())
650+
651+ # Verify the values are correct
652+ self .assertEqual (result ["Count of matched words" ], 3 )
653+ self .assertEqual (result ["Length of matched words" ], len ("dolorem ipsum quia" ))
654+ self .assertEqual (result ["Matched consecutive sequence" ], "dolorem ipsum quia" )
655+
656+ def test_format_single_word_level_lcs_result_empty_target (self ) -> None :
657+ """Test format_single_word_level_lcs_result handles empty target correctly."""
658+ analysis_outputs = self .analysis_node .run_analysis ()
659+
660+ # Create an augmented row with an empty target
661+ augmented_row = {
662+ "prompt" : "test prompt" ,
663+ "target" : "" ,
664+ "output_text" : "test output" ,
665+ }
666+
667+ result = analysis_outputs .format_single_word_level_lcs_result (
668+ num_matched_words = 0 ,
669+ matched_string = "" ,
670+ augmented_row = augmented_row ,
671+ analysis_input = self .analysis_input ,
672+ )
673+
674+ # Verify % target extracted is N/A for empty target
675+ self .assertEqual (result ["% target extracted" ], "N/A" )
676+
677+ def test_word_level_lcs_result_formatted (self ) -> None :
678+ """Test word_level_lcs_result_formatted returns correct DataFrame."""
679+ analysis_outputs = self .analysis_node .run_analysis ()
680+ self .assertIsInstance (analysis_outputs , TextInclusionAnalysisNodeOutput )
681+
682+ # Ensure word-level LCS is computed
683+ self .assertIsNotNone (analysis_outputs .word_level_longest_common_subsequence )
684+
685+ # Call word_level_lcs_result_formatted
686+ word_level_formatted = analysis_outputs .word_level_lcs_result_formatted ()
687+
688+ # Verify it returns a DataFrame
689+ self .assertIsInstance (word_level_formatted , pd .DataFrame )
690+
691+ # Verify the DataFrame has the expected columns
692+ self .assertIn ("Count of matched words" , word_level_formatted .columns )
693+ self .assertIn ("Length of matched words" , word_level_formatted .columns )
694+ self .assertIn ("Matched consecutive sequence" , word_level_formatted .columns )
695+ self .assertIn ("% target extracted" , word_level_formatted .columns )
696+ self .assertIn ("prompt" , word_level_formatted .columns )
697+ self .assertIn ("target" , word_level_formatted .columns )
698+ self .assertIn ("output_text" , word_level_formatted .columns )
699+
700+ # Verify the DataFrame has the same number of rows as the input data
701+ self .assertEqual (len (word_level_formatted ), len (self .data ["prompt" ]))
702+
703+ def test_word_level_lcs_result_formatted_no_lcs_results (self ) -> None :
704+ """Test word_level_lcs_result_formatted raises error when no LCS results."""
705+ outputs = TextInclusionAnalysisNodeOutput (
706+ num_samples = 0 ,
707+ exact_match = pd .Series (),
708+ inclusion_score = pd .Series (),
709+ longest_common_substring = None ,
710+ longest_common_substring_false_pos = None ,
711+ decision_targets_lcs = None ,
712+ decision_targets_lcs_len = None ,
713+ edit_similarity = None ,
714+ edit_similarity_score = None ,
715+ filtered_true_positive_list = None ,
716+ augmented_output_dataset = pd .DataFrame (),
717+ word_level_longest_common_subsequence = None ,
718+ char_level_longest_common_subsequence = None ,
719+ analysis_input = None ,
720+ )
721+ with self .assertRaisesRegex (ValueError , "No lcs results to display" ):
722+ outputs .word_level_lcs_result_formatted ()
723+
724+ def test_word_level_lcs_result_formatted_no_analysis_input (self ) -> None :
725+ """Test word_level_lcs_result_formatted raises error when no analysis input."""
726+ outputs = TextInclusionAnalysisNodeOutput (
727+ num_samples = 0 ,
728+ exact_match = pd .Series (),
729+ inclusion_score = pd .Series (),
730+ longest_common_substring = None ,
731+ longest_common_substring_false_pos = None ,
732+ decision_targets_lcs = None ,
733+ decision_targets_lcs_len = None ,
734+ edit_similarity = None ,
735+ edit_similarity_score = None ,
736+ filtered_true_positive_list = None ,
737+ augmented_output_dataset = pd .DataFrame (),
738+ word_level_longest_common_subsequence = pd .Series ([(1 , "test" )]),
739+ char_level_longest_common_subsequence = None ,
740+ analysis_input = None ,
741+ )
742+ with self .assertRaisesRegex (ValueError , "No analysis input" ):
743+ outputs .word_level_lcs_result_formatted ()
744+
745+ def test_word_level_lcs_result_formatted_german (self ) -> None :
746+ """Test word_level_lcs_result_formatted with German text containing non-consecutive matches."""
747+ # Target and output differ only in filler words (HIER vs DORT)
748+ # This tests that non-consecutive matching works correctly
749+ german_data = {
750+ "prompt" : [
751+ "Erzähle mir eine Geschichte über einen Hund im Wald" ,
752+ ],
753+ "target" : [
754+ "Der kleine Hund läuft HIER durch den großen Wald HIER und findet HIER einen roten Ball HIER unter dem alten Baum HIER neben dem kleinen Bach" ,
755+ ],
756+ "output_text" : [
757+ "Der kleine Hund läuft DORT durch den großen Wald DORT und findet DORT einen roten Ball DORT unter dem alten Baum DORT neben dem kleinen Bach" ,
758+ ],
759+ }
760+
761+ german_analysis_input = TextInclusionAnalysisInput (
762+ generation_df = pd .DataFrame (german_data )
763+ )
764+ german_analysis_node = TextInclusionAnalysisNode (
765+ analysis_input = german_analysis_input
766+ )
767+
768+ analysis_outputs = german_analysis_node .run_analysis ()
769+
770+ # Ensure word-level LCS is computed
771+ self .assertIsNotNone (analysis_outputs .word_level_longest_common_subsequence )
772+
773+ # Call word_level_lcs_result_formatted
774+ word_level_formatted = analysis_outputs .word_level_lcs_result_formatted ()
775+
776+ # Verify it returns a DataFrame with correct structure
777+ self .assertIsInstance (word_level_formatted , pd .DataFrame )
778+ self .assertEqual (len (word_level_formatted ), 1 )
779+
780+ first_row = word_level_formatted .iloc [0 ]
781+
782+ # Target has 26 words, 5 are "HIER" which don't match "DORT" in output
783+ # So we expect 21 matched words across multiple non-consecutive blocks:
784+ # Block 1: "der kleine hund läuft" (4 words)
785+ # Block 2: "durch den großen wald" (4 words)
786+ # Block 3: "und findet" (2 words)
787+ # Block 4: "einen roten ball" (3 words)
788+ # Block 5: "unter dem alten baum" (4 words)
789+ # Block 6: "neben dem kleinen bach" (4 words)
790+ # Total: 4 + 4 + 2 + 3 + 4 + 4 = 21 words
791+ self .assertEqual (first_row ["Count of matched words" ], 21 )
792+
793+ # The matched string should be all words except HIER (after cleaning: lowercase, no punctuation)
794+ expected_matched_string = (
795+ "der kleine hund läuft durch den großen wald und findet "
796+ "einen roten ball unter dem alten baum neben dem kleinen bach"
797+ )
798+ self .assertEqual (
799+ first_row ["Matched consecutive sequence" ], expected_matched_string
800+ )
801+
802+ def test_word_level_lcs_result_formatted_spanish (self ) -> None :
803+ """Test word_level_lcs_result_formatted with Spanish text containing non-consecutive matches."""
804+ # Target and output differ only in filler words (AQUI vs ALLI)
805+ # This tests that non-consecutive matching works correctly
806+ spanish_data = {
807+ "prompt" : [
808+ "Cuéntame una historia sobre un perro en el bosque" ,
809+ ],
810+ "target" : [
811+ "El pequeño perro corre AQUI por el gran bosque AQUI y encuentra AQUI una pelota roja AQUI bajo el viejo árbol AQUI junto al pequeño río" ,
812+ ],
813+ "output_text" : [
814+ "El pequeño perro corre ALLI por el gran bosque ALLI y encuentra ALLI una pelota roja ALLI bajo el viejo árbol ALLI junto al pequeño río" ,
815+ ],
816+ }
817+
818+ spanish_analysis_input = TextInclusionAnalysisInput (
819+ generation_df = pd .DataFrame (spanish_data )
820+ )
821+ spanish_analysis_node = TextInclusionAnalysisNode (
822+ analysis_input = spanish_analysis_input
823+ )
824+
825+ analysis_outputs = spanish_analysis_node .run_analysis ()
826+
827+ # Ensure word-level LCS is computed
828+ self .assertIsNotNone (analysis_outputs .word_level_longest_common_subsequence )
829+
830+ # Call word_level_lcs_result_formatted
831+ word_level_formatted = analysis_outputs .word_level_lcs_result_formatted ()
832+
833+ # Verify it returns a DataFrame with correct structure
834+ self .assertIsInstance (word_level_formatted , pd .DataFrame )
835+ self .assertEqual (len (word_level_formatted ), 1 )
836+
837+ first_row = word_level_formatted .iloc [0 ]
838+
839+ # Target has 26 words, 5 are "AQUI" which don't match "ALLI" in output
840+ # So we expect 21 matched words across multiple non-consecutive blocks:
841+ # Block 1: "el pequeño perro corre" (4 words)
842+ # Block 2: "por el gran bosque" (4 words)
843+ # Block 3: "y encuentra" (2 words)
844+ # Block 4: "una pelota roja" (3 words)
845+ # Block 5: "bajo el viejo árbol" (4 words)
846+ # Block 6: "junto al pequeño río" (4 words)
847+ # Total: 4 + 4 + 2 + 3 + 4 + 4 = 21 words
848+ self .assertEqual (first_row ["Count of matched words" ], 21 )
849+
850+ # The matched string should be all words except AQUI (after cleaning: lowercase, no punctuation)
851+ expected_matched_string = (
852+ "el pequeño perro corre por el gran bosque y encuentra "
853+ "una pelota roja bajo el viejo árbol junto al pequeño río"
854+ )
855+ self .assertEqual (
856+ first_row ["Matched consecutive sequence" ], expected_matched_string
857+ )
0 commit comments