trilium-ai-anki-addon/ollama_client.py at main · Gib-git/trilium-ai-anki-addon · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Ollama local LLM client for Anki card generation
By Gibran Ali
https://gibranali.com
"""

import urllib.request
import urllib.error
import json
import re


CHUNK_CARD_PROMPT = """You are an expert Anki flashcard writer trained on the minimum information principle.

Your task: given one chunk of text, produce ONE Anki card where the BACK contains the real knowledge and the FRONT tests whether someone knows it.

STEP 1 — Identify the core fact in the chunk.
Ask yourself: "What is the actual piece of knowledge here? What would someone need to memorise?"

STEP 2 — Write the BACK first. It must be a short, self-contained answer (1-2 sentences max).

STEP 3 — Write the FRONT as a question that can ONLY be answered if you know the BACK.
The FRONT must NOT contain the answer, and answering it must require genuine recall.

STRICT RULES:
- The BACK must contain specific, concrete information (a definition, a criterion, a value, a mechanism, a name, a consequence)
- The FRONT must not be answerable by just restating words from itself
- NEVER produce a card where the answer is just a rephrasing of the question
- If the chunk says "X must be Y to qualify as Z", the BACK = "Y" and the FRONT = "What must X be to qualify as Z?"
- Keep BACK under 20 words where possible

BAD example (circular — never do this):
  chunk: "A medical intervention is clinically inappropriate if it lacks evidence of benefit."
  FRONT: "What must be true about a medical intervention for it to be deemed clinically inappropriate?"
  BACK: "It must be deemed clinically inappropriate."  ← WRONG: back just repeats the question

GOOD example (tests real knowledge):
  chunk: "A medical intervention is clinically inappropriate if it lacks evidence of benefit."
  FRONT: "What makes a medical intervention clinically inappropriate?"
  BACK: "It lacks evidence of benefit."  ← RIGHT: specific, testable, not circular

Return ONLY a raw JSON object, no markdown, no explanation:
{{"front": "Question?", "back": "Specific answer.", "tags": []}}

Note title: {title}
Chunk:
{chunk}
"""


class OllamaClient:
    def __init__(self, base_url: str, model: str):
        self.base_url = base_url.rstrip("/")
        self.model = model

    def test_connection(self):
        """Returns (success: bool, message: str)"""
        try:
            url = f"{self.base_url}/api/tags"
            req = urllib.request.Request(url, method="GET")
            with urllib.request.urlopen(req, timeout=5) as resp:
                data = json.loads(resp.read().decode())
                models = [m["name"] for m in data.get("models", [])]
                model_list = ", ".join(models) if models else "none pulled yet"
                return True, f"Connected! Models available: {model_list}"
        except urllib.error.URLError as e:
            return False, f"Cannot reach Ollama at {self.base_url} — is it running? ({e.reason})"
        except Exception as e:
            return False, str(e)

    def generate(self, prompt: str, timeout: int = 180) -> str:
        """Send a prompt to Ollama and return the response string. Raises on failure."""
        url = f"{self.base_url}/api/generate"
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": False,
            # No client-side GPU options — GPU acceleration is handled
            # server-side by the Ollama instance (e.g. on a NAS or remote host).
        }
        data = json.dumps(payload).encode()
        req = urllib.request.Request(
            url,
            data=data,
            method="POST",
            headers={"Content-Type": "application/json"},
        )
        try:
            with urllib.request.urlopen(req, timeout=timeout) as resp:
                raw = resp.read().decode()
                result = json.loads(raw)
                return result.get("response", "")
        except urllib.error.HTTPError as e:
            body = e.read().decode("utf-8", errors="replace")
            raise RuntimeError(f"Ollama HTTP {e.code}: {body[:300]}") from e
        except urllib.error.URLError as e:
            raise RuntimeError(
                f"Cannot connect to Ollama at {self.base_url}.\n"
                f"Make sure Ollama is running (`ollama serve`).\nError: {e.reason}"
            ) from e

    def generate_card_from_chunk(self, title: str, chunk: str) -> dict:
        """
        Generate a single Anki card dict {front, back, tags} from a text chunk.
        Raises RuntimeError with a descriptive message on any failure so the
        caller can surface it to the user instead of silently dropping the card.
        """
        prompt = CHUNK_CARD_PROMPT.format(
            title=title,
            chunk=chunk[:2000],
        )
        raw = self.generate(prompt)

        # Strip markdown fences if model wrapped response
        cleaned = re.sub(r"```(?:json)?|```", "", raw).strip()

        # Extract first JSON object from the response
        json_match = re.search(r"\{[^{}]*\}", cleaned, re.DOTALL)
        if not json_match:
            # Try broader match (nested objects)
            json_match = re.search(r"\{.*\}", cleaned, re.DOTALL)
        if not json_match:
            raise RuntimeError(
                f"Ollama returned no JSON for chunk.\n"
                f"Model: {self.model}\n"
                f"Raw response (first 300 chars): {raw[:300]}"
            )

        card = json.loads(json_match.group())
        if "front" not in card or "back" not in card:
            raise RuntimeError(
                f"Ollama JSON missing 'front' or 'back' keys.\nParsed: {card}"
            )
        card.setdefault("tags", [])
        return card