Skip to content
18 changes: 18 additions & 0 deletions plugin/plugins/study_companion/entry_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,23 @@ def _entry_exception_error(
return Err(SdkError(str(exc) if message is None else message))


def _validate_optional_vision_image_payload(
owner: Any,
image_base64: str,
*,
operation: str,
):
image_payload = str(image_base64 or "").strip()
if not image_payload:
return ""
if not bool(getattr(owner._cfg, "llm_vision_enabled", False)):
return Err(SdkError("llm_vision_enabled is not enabled"))
try:
return _normalize_submitted_image_payload(image_payload)
except ValueError as exc:
return _entry_exception_error(owner, exc, operation=operation)


__all__ = [
"Any",
"Mapping",
Expand Down Expand Up @@ -297,6 +314,7 @@ def _entry_exception_error(
"_validated_pomodoro_focus_minutes",
"_detect_mastery_threshold_crossed",
"_normalize_submitted_image_payload",
"_validate_optional_vision_image_payload",
"_plugin_lock",
"_entry_exception_error",
"_event_ratio",
Expand Down
17 changes: 17 additions & 0 deletions plugin/plugins/study_companion/entry_tutor_answer_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Ok,
SdkError,
_entry_exception_error,
_validate_optional_vision_image_payload,
plugin_entry,
tr,
LLM_OPERATION_ANSWER_EVALUATE,
Expand All @@ -26,6 +27,7 @@ class _TutorAnswerEntriesMixin:
"answer": {"type": "string", "default": ""},
"question": {"type": "string", "default": ""},
"expected_answer": {"type": "string", "default": ""},
"vision_image_base64": {"type": "string", "default": ""},
},
},
timeout=60.0,
Expand Down Expand Up @@ -53,6 +55,13 @@ async def study_evaluate_answer(
resolved_question = supplied_question or state_question
if not resolved_question:
return Err(SdkError("study tutor requires a question to evaluate against"))
vision_image_payload = str(kwargs.get("vision_image_base64") or "").strip()
validated_vision_image = _validate_optional_vision_image_payload(
self, vision_image_payload, operation="study_evaluate_answer"
)
if isinstance(validated_vision_image, Err):
return validated_vision_image
vision_image_payload = validated_vision_image
resolved_expected = supplied_expected
if not resolved_expected and (
not supplied_question or supplied_question == state_question
Expand Down Expand Up @@ -89,6 +98,14 @@ async def study_evaluate_answer(
"run_id": run_id,
"session_id": session_id,
"mode": active_mode,
**(
{
"vision_enabled": True,
"vision_image_base64": vision_image_payload,
}
if vision_image_payload
else {}
),
},
)
reply = await self._agent.answer_evaluate(
Expand Down
10 changes: 6 additions & 4 deletions plugin/plugins/study_companion/entry_tutor_explain_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
SdkError,
_entry_exception_error,
_normalize_submitted_image_payload,
_validate_optional_vision_image_payload,
_plugin_lock,
plugin_entry,
tr,
Expand Down Expand Up @@ -146,11 +147,12 @@ async def study_explain_text(
"source_text": source_text,
}
if vision_image_payload:
if not bool(self._cfg.llm_vision_enabled):
return Err(SdkError("llm_vision_enabled is not enabled"))
vision_image_payload = _normalize_submitted_image_payload(
vision_image_payload,
validated_vision_image = _validate_optional_vision_image_payload(
self, vision_image_payload, operation="study_explain_text"
)
if isinstance(validated_vision_image, Err):
return validated_vision_image
vision_image_payload = validated_vision_image
extra_context["vision_enabled"] = True
extra_context["vision_image_base64"] = vision_image_payload
tutor_context = await self._build_learning_context(
Expand Down
103 changes: 69 additions & 34 deletions plugin/plugins/study_companion/entry_tutor_question_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Err,
Ok,
SdkError,
_entry_exception_error,
_validate_optional_vision_image_payload,
plugin_entry,
tr,
LLM_OPERATION_QUESTION_GENERATE,
Expand All @@ -23,6 +25,7 @@ class _TutorQuestionEntriesMixin:
"properties": {
"text": {"type": "string", "default": ""},
"topic": {"type": "string", "default": ""},
"vision_image_base64": {"type": "string", "default": ""},
},
},
timeout=60.0,
Expand All @@ -35,52 +38,84 @@ class _TutorQuestionEntriesMixin:
"topic",
],
)
async def study_generate_question(self, text: str = "", topic: str = "", **_):
async def study_generate_question(
self,
text: str = "",
topic: str = "",
vision_image_base64: str = "",
**_,
):
if self._agent is None:
return Err(SdkError("study tutor agent is not initialized"))
source_text = str(text or "").strip()
vision_image_payload = str(vision_image_base64 or "").strip()
used_ocr_fallback = False
if not source_text:
async with self._lock:
source_text = self._state.last_ocr_text
used_ocr_fallback = bool(source_text.strip())
source_text = source_text.strip()
if not source_text:
if not source_text and not vision_image_payload:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Invoke vision for image-only question generation

This now accepts vision_image_base64 without any text/OCR, but in that scenario source_text remains empty and the later call to self._agent.question_generate(source_text, ...) short-circuits in tutor_llm_agent_question_generate.question_generate to the empty_input fallback before _invoke_structured_operation can attach the image. Users pasting only a diagram/photo and clicking Generate Question therefore get the generic empty-input fallback instead of a vision-generated question.

Useful? React with 👍 / 👎.

return Err(
SdkError(
"study tutor requires text or a non-empty OCR snapshot",
"study tutor requires text, an image, or a non-empty OCR snapshot",
code="MISSING_TEXT",
)
)
async with self._lock:
active_mode = self._state.active_mode
tutor_context = await self._build_learning_context(
LLM_OPERATION_QUESTION_GENERATE,
input_text=source_text,
extra={
"source": "ocr_snapshot" if used_ocr_fallback or not text else "manual",
"source_text": source_text,
"topic_hint": str(topic or "").strip(),
"mode": active_mode,
},
)
reply = await self._agent.question_generate(
source_text, mode=active_mode, context=tutor_context
)
payload = await self._finalize_tutor_call(
LLM_OPERATION_QUESTION_GENERATE,
reply,
history_kind=LLM_OPERATION_QUESTION_GENERATE,
metadata={
"degraded": reply.degraded,
"diagnostic": reply.diagnostic,
"payload": reply.payload,
"screen_classification": tutor_context.get("screen_classification")
or {},
},
extra_context=tutor_context,
)
payload["screen_classification"] = (
tutor_context.get("screen_classification") or {}
validated_vision_image = _validate_optional_vision_image_payload(
self, vision_image_payload, operation="study_generate_question"
)
return Ok(payload)
if isinstance(validated_vision_image, Err):
return validated_vision_image
vision_image_payload = validated_vision_image
try:
async with self._lock:
active_mode = self._state.active_mode
tutor_context = await self._build_learning_context(
LLM_OPERATION_QUESTION_GENERATE,
input_text=source_text,
extra={
"source": "ocr_snapshot"
if used_ocr_fallback
else (
"vision_image"
if vision_image_payload and not source_text
else "manual"
),
"source_text": source_text,
"topic_hint": str(topic or "").strip(),
"mode": active_mode,
**(
{
"vision_enabled": True,
"vision_image_base64": vision_image_payload,
}
if vision_image_payload
else {}
),
},
)
reply = await self._agent.question_generate(
source_text, mode=active_mode, context=tutor_context
)
payload = await self._finalize_tutor_call(
LLM_OPERATION_QUESTION_GENERATE,
reply,
history_kind=LLM_OPERATION_QUESTION_GENERATE,
metadata={
"degraded": reply.degraded,
"diagnostic": reply.diagnostic,
"payload": reply.payload,
"screen_classification": tutor_context.get("screen_classification")
or {},
},
extra_context=tutor_context,
)
payload["screen_classification"] = (
tutor_context.get("screen_classification") or {}
)
return Ok(payload)
except Exception as exc:
return _entry_exception_error(
self, exc, operation="study_generate_question"
)
60 changes: 60 additions & 0 deletions plugin/plugins/study_companion/static/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,66 @@ textarea {
overflow-wrap: anywhere;
}

.study-panel__image-preview {
position: relative;
display: inline-flex;
max-width: 220px;
margin: 8px 0 0;
overflow: hidden;
border: 1px solid #c8d0dc;
border-radius: 6px;
background: #ffffff;
}

.study-panel__image-preview img {
display: block;
max-width: 100%;
max-height: 160px;
object-fit: contain;
}

.study-panel__image-remove {
position: absolute;
top: 3px;
right: 3px;
width: 24px;
min-height: 24px;
height: 24px;
padding: 0;
border: 1px solid rgba(255, 255, 255, 0.3);
border-radius: 50%;
background: rgba(0, 0, 0, 0.65);
color: #ffffff;
font-size: 15px;
line-height: 22px;
text-align: center;
}

.study-panel__image-remove:hover {
background: rgba(190, 42, 42, 0.86);
}

.study-panel__image-remove:focus-visible {
outline: 2px solid #2e5bff;
outline-offset: 2px;
}

.study-panel__paste-error {
margin: 8px 0 0;
color: #b42318;
font-size: 13px;
line-height: 1.45;
}

.study-panel[data-busy="true"] .study-panel__image-preview {
opacity: 0.55;
}

.study-panel[data-busy="true"] .study-panel__image-remove {
pointer-events: none;
opacity: 0.4;
}

.reply-panel {
border-top: 1px solid #d8dde6;
padding-top: 18px;
Expand Down
Loading
Loading