|
1 | 1 | # Copyright (c) ModelScope Contributors. All rights reserved. |
2 | | -"""Client for fetching teacher model logprobs from OpenAI-compatible endpoints.""" |
| 2 | +"""Fetch teacher model logprobs from OpenAI-compatible endpoints.""" |
3 | 3 | import logging |
4 | 4 | import requests |
5 | 5 | import torch |
|
8 | 8 |
|
9 | 9 | logger = logging.getLogger(__name__) |
10 | 10 |
|
| 11 | +_model_name_cache: dict = {} |
11 | 12 |
|
12 | | -class TeacherAPIClient: |
13 | | - """Fetch teacher top-k logprobs from an OpenAI-compatible completions API. |
| 13 | + |
| 14 | +def _get_model_name(base_url: str) -> str: |
| 15 | + if base_url not in _model_name_cache: |
| 16 | + try: |
| 17 | + resp = requests.get(f'{base_url}/v1/models', timeout=10) |
| 18 | + if resp.ok and resp.json().get('data'): |
| 19 | + _model_name_cache[base_url] = resp.json()['data'][0]['id'] |
| 20 | + except Exception as e: |
| 21 | + logger.warning(f'Failed to detect model name: {e}') |
| 22 | + if base_url not in _model_name_cache: |
| 23 | + _model_name_cache[base_url] = 'default' |
| 24 | + return _model_name_cache[base_url] |
| 25 | + |
| 26 | + |
| 27 | +def fetch_teacher_logprobs( |
| 28 | + base_url: str, |
| 29 | + input_ids: List[List[int]], |
| 30 | + topk: int = 20, |
| 31 | + timeout: float = 300.0, |
| 32 | +) -> Tuple[torch.Tensor, torch.Tensor]: |
| 33 | + """Fetch top-k logprobs from an OpenAI-compatible completions API. |
14 | 34 |
|
15 | 35 | Args: |
16 | 36 | base_url: Server URL (e.g., 'http://localhost:8000'). |
17 | | - top_logprobs: Number of top log probabilities per token. |
| 37 | + input_ids: List of token ID sequences. |
| 38 | + topk: Number of top log probabilities per token. |
18 | 39 | timeout: Request timeout in seconds. |
19 | | - """ |
20 | 40 |
|
21 | | - def __init__(self, base_url: str, top_logprobs: int = 20, timeout: float = 300.0): |
22 | | - self.base_url = base_url.rstrip('/') |
23 | | - self.top_logprobs = top_logprobs |
24 | | - self.timeout = timeout |
25 | | - self._model_name = None |
| 41 | + Returns: |
| 42 | + (logprobs, indices) tensors of shape [batch, max_seq_len, topk]. |
| 43 | + """ |
| 44 | + base_url = base_url.rstrip('/') |
| 45 | + batch_size = len(input_ids) |
| 46 | + max_seq_len = max(len(ids) for ids in input_ids) |
| 47 | + url = f'{base_url}/v1/completions' |
| 48 | + model = _get_model_name(base_url) |
26 | 49 |
|
27 | | - @property |
28 | | - def model_name(self) -> str: |
29 | | - if self._model_name is None: |
30 | | - try: |
31 | | - resp = requests.get(f'{self.base_url}/v1/models', timeout=10) |
32 | | - if resp.ok and resp.json().get('data'): |
33 | | - self._model_name = resp.json()['data'][0]['id'] |
34 | | - except Exception as e: |
35 | | - logger.warning(f'Failed to detect model name: {e}') |
36 | | - if self._model_name is None: |
37 | | - self._model_name = 'default' |
38 | | - return self._model_name |
| 50 | + logprobs_out = torch.full((batch_size, max_seq_len, topk), float('-inf'), dtype=torch.float32) |
| 51 | + indices_out = torch.zeros((batch_size, max_seq_len, topk), dtype=torch.long) |
39 | 52 |
|
40 | | - def check_health(self, timeout: float = 5.0) -> bool: |
41 | | - """Check if the teacher model server is reachable.""" |
| 53 | + def _fetch_one(batch_idx: int): |
| 54 | + payload = { |
| 55 | + 'model': model, |
| 56 | + 'prompt': input_ids[batch_idx], |
| 57 | + 'max_tokens': 0, |
| 58 | + 'temperature': 0, |
| 59 | + 'logprobs': topk, |
| 60 | + 'echo': True, |
| 61 | + } |
42 | 62 | try: |
43 | | - resp = requests.get(f'{self.base_url}/v1/models', timeout=timeout) |
44 | | - return resp.ok |
45 | | - except requests.RequestException: |
46 | | - return False |
47 | | - |
48 | | - def get_logprobs_sync( |
49 | | - self, |
50 | | - input_ids: List[List[int]], |
51 | | - top_logprobs: Optional[int] = None, |
52 | | - ) -> Tuple[torch.Tensor, torch.Tensor]: |
53 | | - """Fetch top-k logprobs for a batch of token sequences. |
54 | | -
|
55 | | - Returns: |
56 | | - (logprobs, indices) tensors of shape [batch, max_seq_len, topk]. |
57 | | - """ |
58 | | - topk = top_logprobs or self.top_logprobs |
59 | | - batch_size = len(input_ids) |
60 | | - max_seq_len = max(len(ids) for ids in input_ids) |
61 | | - url = f'{self.base_url}/v1/completions' |
62 | | - model = self.model_name |
63 | | - |
64 | | - logprobs_out = torch.full((batch_size, max_seq_len, topk), float('-inf'), dtype=torch.float32) |
65 | | - indices_out = torch.zeros((batch_size, max_seq_len, topk), dtype=torch.long) |
66 | | - |
67 | | - def _fetch_one(batch_idx: int): |
68 | | - payload = { |
69 | | - 'model': model, |
70 | | - 'prompt': input_ids[batch_idx], |
71 | | - 'max_tokens': 0, |
72 | | - 'temperature': 0, |
73 | | - 'logprobs': topk, |
74 | | - 'echo': True, |
75 | | - } |
76 | | - try: |
77 | | - resp = requests.post(url, json=payload, timeout=self.timeout) |
78 | | - resp.raise_for_status() |
79 | | - top_logprobs_list = resp.json()['choices'][0].get('logprobs', {}).get('top_logprobs', []) |
80 | | - for pos, pos_lp in enumerate(top_logprobs_list): |
81 | | - if pos_lp is None: |
82 | | - continue |
83 | | - sorted_items = sorted(pos_lp.items(), key=lambda x: -x[1])[:topk] |
84 | | - for k, (tid_str, lp) in enumerate(sorted_items): |
85 | | - indices_out[batch_idx, pos, k] = int(tid_str) |
86 | | - logprobs_out[batch_idx, pos, k] = lp |
87 | | - except Exception as e: |
88 | | - logger.error(f'Failed to get logprobs for sequence {batch_idx}: {e}') |
| 63 | + resp = requests.post(url, json=payload, timeout=timeout) |
| 64 | + resp.raise_for_status() |
| 65 | + top_logprobs_list = resp.json()['choices'][0].get('logprobs', {}).get('top_logprobs', []) |
| 66 | + for pos, pos_lp in enumerate(top_logprobs_list): |
| 67 | + if pos_lp is None: |
| 68 | + continue |
| 69 | + sorted_items = sorted(pos_lp.items(), key=lambda x: -x[1])[:topk] |
| 70 | + for k, (tid_str, lp) in enumerate(sorted_items): |
| 71 | + indices_out[batch_idx, pos, k] = int(tid_str) |
| 72 | + logprobs_out[batch_idx, pos, k] = lp |
| 73 | + except Exception as e: |
| 74 | + logger.error(f'Failed to get logprobs for sequence {batch_idx}: {e}') |
89 | 75 |
|
90 | | - with ThreadPoolExecutor(max_workers=min(batch_size, 8)) as pool: |
91 | | - list(pool.map(_fetch_one, range(batch_size))) |
| 76 | + with ThreadPoolExecutor(max_workers=min(batch_size, 8)) as pool: |
| 77 | + list(pool.map(_fetch_one, range(batch_size))) |
92 | 78 |
|
93 | | - return logprobs_out, indices_out |
| 79 | + return logprobs_out, indices_out |
0 commit comments