-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathollama_client.py
More file actions
135 lines (115 loc) · 5.56 KB
/
Copy pathollama_client.py
File metadata and controls
135 lines (115 loc) · 5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Ollama local LLM client for Anki card generation
By Gibran Ali
https://gibranali.com
"""
import urllib.request
import urllib.error
import json
import re
CHUNK_CARD_PROMPT = """You are an expert Anki flashcard writer trained on the minimum information principle.
Your task: given one chunk of text, produce ONE Anki card where the BACK contains the real knowledge and the FRONT tests whether someone knows it.
STEP 1 — Identify the core fact in the chunk.
Ask yourself: "What is the actual piece of knowledge here? What would someone need to memorise?"
STEP 2 — Write the BACK first. It must be a short, self-contained answer (1-2 sentences max).
STEP 3 — Write the FRONT as a question that can ONLY be answered if you know the BACK.
The FRONT must NOT contain the answer, and answering it must require genuine recall.
STRICT RULES:
- The BACK must contain specific, concrete information (a definition, a criterion, a value, a mechanism, a name, a consequence)
- The FRONT must not be answerable by just restating words from itself
- NEVER produce a card where the answer is just a rephrasing of the question
- If the chunk says "X must be Y to qualify as Z", the BACK = "Y" and the FRONT = "What must X be to qualify as Z?"
- Keep BACK under 20 words where possible
BAD example (circular — never do this):
chunk: "A medical intervention is clinically inappropriate if it lacks evidence of benefit."
FRONT: "What must be true about a medical intervention for it to be deemed clinically inappropriate?"
BACK: "It must be deemed clinically inappropriate." ← WRONG: back just repeats the question
GOOD example (tests real knowledge):
chunk: "A medical intervention is clinically inappropriate if it lacks evidence of benefit."
FRONT: "What makes a medical intervention clinically inappropriate?"
BACK: "It lacks evidence of benefit." ← RIGHT: specific, testable, not circular
Return ONLY a raw JSON object, no markdown, no explanation:
{{"front": "Question?", "back": "Specific answer.", "tags": []}}
Note title: {title}
Chunk:
{chunk}
"""
class OllamaClient:
def __init__(self, base_url: str, model: str):
self.base_url = base_url.rstrip("/")
self.model = model
def test_connection(self):
"""Returns (success: bool, message: str)"""
try:
url = f"{self.base_url}/api/tags"
req = urllib.request.Request(url, method="GET")
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read().decode())
models = [m["name"] for m in data.get("models", [])]
model_list = ", ".join(models) if models else "none pulled yet"
return True, f"Connected! Models available: {model_list}"
except urllib.error.URLError as e:
return False, f"Cannot reach Ollama at {self.base_url} — is it running? ({e.reason})"
except Exception as e:
return False, str(e)
def generate(self, prompt: str, timeout: int = 180) -> str:
"""Send a prompt to Ollama and return the response string. Raises on failure."""
url = f"{self.base_url}/api/generate"
payload = {
"model": self.model,
"prompt": prompt,
"stream": False,
# No client-side GPU options — GPU acceleration is handled
# server-side by the Ollama instance (e.g. on a NAS or remote host).
}
data = json.dumps(payload).encode()
req = urllib.request.Request(
url,
data=data,
method="POST",
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
raw = resp.read().decode()
result = json.loads(raw)
return result.get("response", "")
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"Ollama HTTP {e.code}: {body[:300]}") from e
except urllib.error.URLError as e:
raise RuntimeError(
f"Cannot connect to Ollama at {self.base_url}.\n"
f"Make sure Ollama is running (`ollama serve`).\nError: {e.reason}"
) from e
def generate_card_from_chunk(self, title: str, chunk: str) -> dict:
"""
Generate a single Anki card dict {front, back, tags} from a text chunk.
Raises RuntimeError with a descriptive message on any failure so the
caller can surface it to the user instead of silently dropping the card.
"""
prompt = CHUNK_CARD_PROMPT.format(
title=title,
chunk=chunk[:2000],
)
raw = self.generate(prompt)
# Strip markdown fences if model wrapped response
cleaned = re.sub(r"```(?:json)?|```", "", raw).strip()
# Extract first JSON object from the response
json_match = re.search(r"\{[^{}]*\}", cleaned, re.DOTALL)
if not json_match:
# Try broader match (nested objects)
json_match = re.search(r"\{.*\}", cleaned, re.DOTALL)
if not json_match:
raise RuntimeError(
f"Ollama returned no JSON for chunk.\n"
f"Model: {self.model}\n"
f"Raw response (first 300 chars): {raw[:300]}"
)
card = json.loads(json_match.group())
if "front" not in card or "back" not in card:
raise RuntimeError(
f"Ollama JSON missing 'front' or 'back' keys.\nParsed: {card}"
)
card.setdefault("tags", [])
return card