|
| 1 | +"""Tests for the MLX reranker wrapper without loading real MLX models.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import math |
| 6 | +import sys |
| 7 | +import types |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import pytest |
| 11 | + |
| 12 | +import seeklink.reranker as reranker_mod |
| 13 | +from seeklink.reranker import Reranker |
| 14 | + |
| 15 | + |
| 16 | +@pytest.fixture |
| 17 | +def fake_mlx(monkeypatch): |
| 18 | + mlx_module = types.ModuleType("mlx") |
| 19 | + core_module = types.ModuleType("mlx.core") |
| 20 | + core_module.array = lambda value: np.array(value, dtype=np.int64) |
| 21 | + core_module.eval = lambda *args, **kwargs: None |
| 22 | + mlx_module.core = core_module |
| 23 | + monkeypatch.setitem(sys.modules, "mlx", mlx_module) |
| 24 | + monkeypatch.setitem(sys.modules, "mlx.core", core_module) |
| 25 | + |
| 26 | + |
| 27 | +class FakeTokenizer: |
| 28 | + pad_token_id = 0 |
| 29 | + eos_token_id = 0 |
| 30 | + |
| 31 | + def convert_tokens_to_ids(self, token: str) -> int: |
| 32 | + return {"yes": 1, "no": 2}[token] |
| 33 | + |
| 34 | + def apply_chat_template(self, messages, tokenize: bool, add_generation_prompt: bool): |
| 35 | + assert tokenize is False |
| 36 | + assert add_generation_prompt is True |
| 37 | + return messages[0]["content"] |
| 38 | + |
| 39 | + def encode(self, text: str, return_tensors=None) -> list[int]: |
| 40 | + assert return_tensors is None |
| 41 | + if "Document: " not in text: |
| 42 | + return [1] * len(text) |
| 43 | + passage = text.split("Document: ", 1)[1].split("<think>", 1)[0] |
| 44 | + marker = max(1, len(passage)) |
| 45 | + return [1] * (3 + len(passage)) + [marker] |
| 46 | + |
| 47 | + def decode(self, tokens: list[int], skip_special_tokens: bool = True) -> str: |
| 48 | + assert skip_special_tokens is True |
| 49 | + return "x" * len(tokens) |
| 50 | + |
| 51 | + |
| 52 | +class RecordingModel: |
| 53 | + def __init__(self, *, fail_all: bool = False): |
| 54 | + self.fail_all = fail_all |
| 55 | + self.shapes: list[tuple[int, int]] = [] |
| 56 | + |
| 57 | + def __call__(self, input_ids): |
| 58 | + arr = np.asarray(input_ids) |
| 59 | + self.shapes.append(tuple(arr.shape)) |
| 60 | + if self.fail_all: |
| 61 | + raise RuntimeError("fake model failure") |
| 62 | + |
| 63 | + logits = np.zeros((arr.shape[0], arr.shape[1], 3), dtype=np.float32) |
| 64 | + for row_index, row in enumerate(arr): |
| 65 | + non_padding = np.flatnonzero(row != 0) |
| 66 | + last_real = int(non_padding[-1]) |
| 67 | + marker = float(row[last_real]) |
| 68 | + logits[row_index, last_real, 1] = marker |
| 69 | + logits[row_index, last_real, 2] = 0.0 |
| 70 | + if last_real != arr.shape[1] - 1: |
| 71 | + logits[row_index, -1, 1] = -100.0 |
| 72 | + logits[row_index, -1, 2] = 100.0 |
| 73 | + return logits |
| 74 | + |
| 75 | + |
| 76 | +def _ready_reranker(model: RecordingModel) -> Reranker: |
| 77 | + reranker = Reranker() |
| 78 | + reranker._model = model |
| 79 | + reranker._tokenizer = FakeTokenizer() |
| 80 | + reranker._token_yes = 1 |
| 81 | + reranker._token_no = 2 |
| 82 | + return reranker |
| 83 | + |
| 84 | + |
| 85 | +def _sigmoid(value: float) -> float: |
| 86 | + return math.exp(value) / (math.exp(value) + 1.0) |
| 87 | + |
| 88 | + |
| 89 | +def test_rerank_caps_long_passages_before_scoring(fake_mlx, monkeypatch): |
| 90 | + monkeypatch.setattr(reranker_mod, "_MAX_PASSAGE_TOKENS", 2) |
| 91 | + model = RecordingModel() |
| 92 | + reranker = _ready_reranker(model) |
| 93 | + |
| 94 | + scores = reranker.rerank("query", ["abcdef"]) |
| 95 | + |
| 96 | + assert scores == pytest.approx([_sigmoid(2)]) |
| 97 | + assert model.shapes == [(1, 6)] |
| 98 | + |
| 99 | + |
| 100 | +def test_rerank_keeps_short_passages_intact(fake_mlx, monkeypatch): |
| 101 | + monkeypatch.setattr(reranker_mod, "_MAX_PASSAGE_TOKENS", 10) |
| 102 | + model = RecordingModel() |
| 103 | + reranker = _ready_reranker(model) |
| 104 | + |
| 105 | + scores = reranker.rerank("query", ["abc"]) |
| 106 | + |
| 107 | + assert scores == pytest.approx([_sigmoid(3)]) |
| 108 | + assert model.shapes == [(1, 7)] |
| 109 | + |
| 110 | + |
| 111 | +def test_rerank_returns_none_when_inference_fails(fake_mlx): |
| 112 | + reranker = _ready_reranker(RecordingModel(fail_all=True)) |
| 113 | + |
| 114 | + assert reranker.rerank("query", ["passage"]) is None |
0 commit comments