-
Notifications
You must be signed in to change notification settings - Fork 159
伴学插件支持文本框图片粘贴与视觉输入 #1603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
伴学插件支持文本框图片粘贴与视觉输入 #1603
Changes from all commits
f19d511
c1468db
5e1e75c
8851217
93ca0c8
3df8ca2
5f298f1
6319e2b
a4883e4
de7491a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,12 +4,28 @@ | |
| Err, | ||
| Ok, | ||
| SdkError, | ||
| _entry_exception_error, | ||
| _validate_optional_vision_image_payload, | ||
| plugin_entry, | ||
| tr, | ||
| LLM_OPERATION_QUESTION_GENERATE, | ||
| ) | ||
|
|
||
|
|
||
| IMAGE_ONLY_QUESTION_PROMPT_EN = "Generate a study question from the pasted image." | ||
| IMAGE_ONLY_QUESTION_PROMPT_ZH_CN = "请根据这张图片生成一道学习题。" | ||
| IMAGE_ONLY_QUESTION_PROMPT_ZH_TW = "請根據這張圖片生成一道學習題。" | ||
|
|
||
|
|
||
| def _image_only_question_prompt(language: str) -> str: | ||
| normalized = str(language or "").strip().lower() | ||
| if normalized.startswith(("zh-tw", "zh-hk", "zh-hant")): | ||
| return IMAGE_ONLY_QUESTION_PROMPT_ZH_TW | ||
| if normalized.startswith("zh"): | ||
| return IMAGE_ONLY_QUESTION_PROMPT_ZH_CN | ||
| return IMAGE_ONLY_QUESTION_PROMPT_EN | ||
|
|
||
|
|
||
| class _TutorQuestionEntriesMixin: | ||
| @plugin_entry( | ||
| id="study_generate_question", | ||
|
|
@@ -23,6 +39,7 @@ class _TutorQuestionEntriesMixin: | |
| "properties": { | ||
| "text": {"type": "string", "default": ""}, | ||
| "topic": {"type": "string", "default": ""}, | ||
| "vision_image_base64": {"type": "string", "default": ""}, | ||
| }, | ||
| }, | ||
| timeout=60.0, | ||
|
|
@@ -35,52 +52,84 @@ class _TutorQuestionEntriesMixin: | |
| "topic", | ||
| ], | ||
| ) | ||
| async def study_generate_question(self, text: str = "", topic: str = "", **_): | ||
| async def study_generate_question( | ||
| self, | ||
| text: str = "", | ||
| topic: str = "", | ||
| vision_image_base64: str = "", | ||
| **_, | ||
| ): | ||
| if self._agent is None: | ||
| return Err(SdkError("study tutor agent is not initialized")) | ||
| source_text = str(text or "").strip() | ||
| vision_image_payload = str(vision_image_base64 or "").strip() | ||
| used_ocr_fallback = False | ||
| if not source_text: | ||
| if not source_text and not vision_image_payload: | ||
| async with self._lock: | ||
| source_text = self._state.last_ocr_text | ||
| used_ocr_fallback = bool(source_text.strip()) | ||
| source_text = source_text.strip() | ||
| if not source_text: | ||
| if not source_text and not vision_image_payload: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This now accepts Useful? React with 👍 / 👎. |
||
| return Err( | ||
| SdkError( | ||
| "study tutor requires text or a non-empty OCR snapshot", | ||
| "study tutor requires text, an image, or a non-empty OCR snapshot", | ||
| code="MISSING_TEXT", | ||
| ) | ||
| ) | ||
| async with self._lock: | ||
| active_mode = self._state.active_mode | ||
| tutor_context = await self._build_learning_context( | ||
| LLM_OPERATION_QUESTION_GENERATE, | ||
| input_text=source_text, | ||
| extra={ | ||
| "source": "ocr_snapshot" if used_ocr_fallback or not text else "manual", | ||
| "source_text": source_text, | ||
| "topic_hint": str(topic or "").strip(), | ||
| "mode": active_mode, | ||
| }, | ||
| ) | ||
| reply = await self._agent.question_generate( | ||
| source_text, mode=active_mode, context=tutor_context | ||
| ) | ||
| payload = await self._finalize_tutor_call( | ||
| LLM_OPERATION_QUESTION_GENERATE, | ||
| reply, | ||
| history_kind=LLM_OPERATION_QUESTION_GENERATE, | ||
| metadata={ | ||
| "degraded": reply.degraded, | ||
| "diagnostic": reply.diagnostic, | ||
| "payload": reply.payload, | ||
| "screen_classification": tutor_context.get("screen_classification") | ||
| or {}, | ||
| }, | ||
| extra_context=tutor_context, | ||
| validated_vision_image = _validate_optional_vision_image_payload( | ||
| self, vision_image_payload, operation="study_generate_question" | ||
| ) | ||
| payload["screen_classification"] = ( | ||
| tutor_context.get("screen_classification") or {} | ||
| ) | ||
| return Ok(payload) | ||
| if isinstance(validated_vision_image, Err): | ||
| return validated_vision_image | ||
| vision_image_payload = validated_vision_image | ||
| try: | ||
| image_only_source = False | ||
| if not source_text and vision_image_payload: | ||
| source_text = _image_only_question_prompt(self._cfg.language) | ||
| image_only_source = True | ||
|
Comment on lines
+87
to
+89
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When the user pastes only an image while Useful? React with 👍 / 👎. |
||
| async with self._lock: | ||
| active_mode = self._state.active_mode | ||
| tutor_context = await self._build_learning_context( | ||
| LLM_OPERATION_QUESTION_GENERATE, | ||
| input_text=source_text, | ||
| extra={ | ||
| "source": "ocr_snapshot" | ||
| if used_ocr_fallback | ||
| else ("vision_image" if image_only_source else "manual"), | ||
| "source_text": source_text, | ||
| "topic_hint": str(topic or "").strip(), | ||
| "mode": active_mode, | ||
| **( | ||
| { | ||
| "vision_enabled": True, | ||
| "vision_image_base64": vision_image_payload, | ||
| } | ||
| if vision_image_payload | ||
| else {} | ||
| ), | ||
| }, | ||
| ) | ||
| reply = await self._agent.question_generate( | ||
| source_text, mode=active_mode, context=tutor_context | ||
| ) | ||
| payload = await self._finalize_tutor_call( | ||
| LLM_OPERATION_QUESTION_GENERATE, | ||
| reply, | ||
| history_kind=LLM_OPERATION_QUESTION_GENERATE, | ||
| metadata={ | ||
| "degraded": reply.degraded, | ||
| "diagnostic": reply.diagnostic, | ||
| "payload": reply.payload, | ||
| "screen_classification": tutor_context.get("screen_classification") | ||
| or {}, | ||
| }, | ||
| extra_context=tutor_context, | ||
| ) | ||
| payload["screen_classification"] = ( | ||
| tutor_context.get("screen_classification") or {} | ||
| ) | ||
| return Ok(payload) | ||
| except Exception as exc: | ||
| return _entry_exception_error( | ||
| self, exc, operation="study_generate_question" | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For an image-only Explain action with a previous OCR snapshot in state,
source_texthas already been filled fromlast_ocr_textbefore this check, so the image-only prompt is never selected and the context source remainsocr_snapshot. This makes a pasted image get explained with stale OCR text from a prior screen; the OCR fallback should only run when no pasted vision image was provided.Useful? React with 👍 / 👎.