11"""
22Extract rankings from LLM response content, handling various output formats.
33Returns a vector of integers representing document IDs.
4- Accepted first-line formats:
4+ Accepted formats:
55- "[1, 2, 3]"
66- "1,2,3" (optionally space-separated), extra text after the list is ignored.
7+ - Checks both first and last non-empty lines
78"""
89function extract_ranking (content:: AbstractString ; verbose:: Int = 0 ):: Vector{Int}
910 content = strip (content)
@@ -12,51 +13,58 @@ function extract_ranking(content::AbstractString; verbose::Int=0)::Vector{Int}
1213 return Int[]
1314 end
1415
15- # Check if content is multiline (unexpected)
16+ # Get all non-empty lines
1617 lines = split (content, ' \n ' )
1718 non_empty_lines = filter (line -> ! isempty (strip (line)), lines)
19+
20+ if isempty (non_empty_lines)
21+ verbose >= 1 && @info " extract_ranking: No non-empty lines found"
22+ return Int[]
23+ end
24+
1825 if length (non_empty_lines) > 1 && verbose >= 2
19- @info " extract_ranking: Unexpected multiline content: \n $(content) "
26+ @info " extract_ranking: Multiline content, checking first and last lines "
2027 end
2128
22- # First non-empty line
23- first_line = " "
24- for line in lines
29+ # Try both first and last non-empty lines
30+ lines_to_try = length (non_empty_lines) == 1 ? [non_empty_lines[1 ]] : [non_empty_lines[end ], non_empty_lines[1 ]]
31+
32+ for line in lines_to_try
2533 line = strip (line)
26- if ! isempty (line)
27- first_line = line
28- break
34+ result = try_extract_from_line (line; verbose )
35+ if ! isempty (result)
36+ return result
2937 end
3038 end
31- if isempty (first_line)
32- verbose >= 1 && @info " extract_ranking: No non-empty lines found"
33- return Int[]
34- end
3539
40+ # Fallback: unrecognizable format - always warn with full content
41+ @warn " extract_ranking: Unrecognizable format, full content:\n $(content) "
42+ return Int[]
43+ end
44+
45+ function try_extract_from_line (line:: AbstractString ; verbose:: Int = 0 ):: Vector{Int}
3646 # Case 1: Bracket format [1,2,3] or [1]
37- if startswith (first_line , ' [' )
47+ if startswith (line , ' [' )
3848 local inner
39- if endswith (first_line , ' ]' )
40- inner = first_line [2 : end - 1 ]
49+ if endswith (line , ' ]' )
50+ inner = line [2 : end - 1 ]
4151 else
4252 # take until first closing bracket if present
43- ci = findfirst (== (' ]' ), first_line )
44- inner = ci === nothing ? first_line [2 : end ] : first_line [2 : ci- 1 ]
45- verbose >= 1 && @info " extract_ranking: Bracket format without closing bracket: $(repr (first_line )) "
53+ ci = findfirst (== (' ]' ), line )
54+ inner = ci === nothing ? line [2 : end ] : line [2 : ci- 1 ]
55+ verbose >= 1 && @info " extract_ranking: Bracket format without closing bracket: $(repr (line )) "
4656 end
4757 result = parse_number_sequence (inner; verbose)
4858 return result
4959 end
5060
5161 # Case 2: Starts with a number (comma/space separated); take number-prefix only
52- if ! isempty (first_line ) && isdigit (first_line [1 ])
53- number_prefix = take_number_prefix (first_line )
62+ if ! isempty (line ) && isdigit (line [1 ])
63+ number_prefix = take_number_prefix (line )
5464 result = parse_number_sequence (number_prefix; verbose)
5565 return result
5666 end
5767
58- # Fallback: unrecognizable format - always warn with full content
59- @warn " extract_ranking: Unrecognizable format, full content:\n $(content) "
6068 return Int[]
6169end
6270
0 commit comments