Skip to content

Commit 2bb3831

Browse files
committed
New reranker extraction. supporting last line.
1 parent 11696e9 commit 2bb3831

File tree

1 file changed

+31
-23
lines changed

1 file changed

+31
-23
lines changed

src/rerankers/RankingExtractor.jl

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""
22
Extract rankings from LLM response content, handling various output formats.
33
Returns a vector of integers representing document IDs.
4-
Accepted first-line formats:
4+
Accepted formats:
55
- "[1, 2, 3]"
66
- "1,2,3" (optionally space-separated), extra text after the list is ignored.
7+
- Checks both first and last non-empty lines
78
"""
89
function extract_ranking(content::AbstractString; verbose::Int=0)::Vector{Int}
910
content = strip(content)
@@ -12,51 +13,58 @@ function extract_ranking(content::AbstractString; verbose::Int=0)::Vector{Int}
1213
return Int[]
1314
end
1415

15-
# Check if content is multiline (unexpected)
16+
# Get all non-empty lines
1617
lines = split(content, '\n')
1718
non_empty_lines = filter(line -> !isempty(strip(line)), lines)
19+
20+
if isempty(non_empty_lines)
21+
verbose >= 1 && @info "extract_ranking: No non-empty lines found"
22+
return Int[]
23+
end
24+
1825
if length(non_empty_lines) > 1 && verbose >= 2
19-
@info "extract_ranking: Unexpected multiline content:\n$(content)"
26+
@info "extract_ranking: Multiline content, checking first and last lines"
2027
end
2128

22-
# First non-empty line
23-
first_line = ""
24-
for line in lines
29+
# Try both first and last non-empty lines
30+
lines_to_try = length(non_empty_lines) == 1 ? [non_empty_lines[1]] : [non_empty_lines[end], non_empty_lines[1]]
31+
32+
for line in lines_to_try
2533
line = strip(line)
26-
if !isempty(line)
27-
first_line = line
28-
break
34+
result = try_extract_from_line(line; verbose)
35+
if !isempty(result)
36+
return result
2937
end
3038
end
31-
if isempty(first_line)
32-
verbose >= 1 && @info "extract_ranking: No non-empty lines found"
33-
return Int[]
34-
end
3539

40+
# Fallback: unrecognizable format - always warn with full content
41+
@warn "extract_ranking: Unrecognizable format, full content:\n$(content)"
42+
return Int[]
43+
end
44+
45+
function try_extract_from_line(line::AbstractString; verbose::Int=0)::Vector{Int}
3646
# Case 1: Bracket format [1,2,3] or [1]
37-
if startswith(first_line, '[')
47+
if startswith(line, '[')
3848
local inner
39-
if endswith(first_line, ']')
40-
inner = first_line[2:end-1]
49+
if endswith(line, ']')
50+
inner = line[2:end-1]
4151
else
4252
# take until first closing bracket if present
43-
ci = findfirst(==(']'), first_line)
44-
inner = ci === nothing ? first_line[2:end] : first_line[2:ci-1]
45-
verbose >= 1 && @info "extract_ranking: Bracket format without closing bracket: $(repr(first_line))"
53+
ci = findfirst(==(']'), line)
54+
inner = ci === nothing ? line[2:end] : line[2:ci-1]
55+
verbose >= 1 && @info "extract_ranking: Bracket format without closing bracket: $(repr(line))"
4656
end
4757
result = parse_number_sequence(inner; verbose)
4858
return result
4959
end
5060

5161
# Case 2: Starts with a number (comma/space separated); take number-prefix only
52-
if !isempty(first_line) && isdigit(first_line[1])
53-
number_prefix = take_number_prefix(first_line)
62+
if !isempty(line) && isdigit(line[1])
63+
number_prefix = take_number_prefix(line)
5464
result = parse_number_sequence(number_prefix; verbose)
5565
return result
5666
end
5767

58-
# Fallback: unrecognizable format - always warn with full content
59-
@warn "extract_ranking: Unrecognizable format, full content:\n$(content)"
6068
return Int[]
6169
end
6270

0 commit comments

Comments
 (0)