Skip to content

Commit 2317d11

Browse files
committed
fix: clipboard image paste on WSL2, Wayland, and VSCode terminal
The original implementation only supported xclip (X11), which silently fails on WSL2 (can't access Windows clipboard for images), Wayland desktops (xclip is X11-only), and VSCode terminal on WSL2. Clipboard backend changes (hermes_cli/clipboard.py): - WSL2: detect via /proc/version, use powershell.exe with .NET System.Windows.Forms.Clipboard to extract images as base64 PNG - Wayland: use wl-paste with MIME type detection, auto-convert BMP to PNG for WSLg environments (via Pillow or ImageMagick) - Dispatch order: WSL → Wayland → X11 (xclip), with fallthrough - New has_clipboard_image() for lightweight clipboard checks - Cache WSL detection result per-process CLI changes (cli.py): - /paste command: explicit clipboard image check for terminals where BracketedPaste doesn't fire (image-only clipboard in VSCode/WinTerm) - Ctrl+V keybinding: fallback for Linux terminals where Ctrl+V sends raw byte instead of triggering bracketed paste Tests: 80 tests (up from 37) covering WSL, Wayland, X11 dispatch, BMP conversion, has_clipboard_image, and /paste command.
1 parent 8253b54 commit 2317d11

File tree

3 files changed

+703
-24
lines changed

3 files changed

+703
-24
lines changed

cli.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic
704704
"/cron": "Manage scheduled tasks (list, add, remove)",
705705
"/skills": "Search, install, inspect, or manage skills from online registries",
706706
"/platforms": "Show gateway/messaging platform status",
707+
"/paste": "Check clipboard for an image and attach it",
707708
"/reload-mcp": "Reload MCP servers from config.yaml",
708709
"/quit": "Exit the CLI (also: /exit, /q)",
709710
}
@@ -1132,6 +1133,23 @@ def _try_attach_clipboard_image(self) -> bool:
11321133
self._image_counter -= 1
11331134
return False
11341135

1136+
def _handle_paste_command(self):
1137+
"""Handle /paste — explicitly check clipboard for an image.
1138+
1139+
This is the reliable fallback for terminals where BracketedPaste
1140+
doesn't fire for image-only clipboard content (e.g., VSCode terminal,
1141+
Windows Terminal with WSL2).
1142+
"""
1143+
from hermes_cli.clipboard import has_clipboard_image
1144+
if has_clipboard_image():
1145+
if self._try_attach_clipboard_image():
1146+
n = len(self._attached_images)
1147+
_cprint(f" 📎 Image #{n} attached from clipboard")
1148+
else:
1149+
_cprint(f" {_DIM}(>_<) Clipboard has an image but extraction failed{_RST}")
1150+
else:
1151+
_cprint(f" {_DIM}(._.) No image found in clipboard{_RST}")
1152+
11351153
def _build_multimodal_content(self, text: str, images: list) -> list:
11361154
"""Convert text + image paths into OpenAI vision multimodal content.
11371155
@@ -1837,6 +1855,8 @@ def process_command(self, command: str) -> bool:
18371855
self._manual_compress()
18381856
elif cmd_lower == "/usage":
18391857
self._show_usage()
1858+
elif cmd_lower == "/paste":
1859+
self._handle_paste_command()
18401860
elif cmd_lower == "/reload-mcp":
18411861
self._reload_mcp()
18421862
else:
@@ -2598,13 +2618,32 @@ def handle_ctrl_d(event):
25982618

25992619
@kb.add(Keys.BracketedPaste, eager=True)
26002620
def handle_paste(event):
2601-
"""Handle Cmd+V / Ctrl+V paste — detect clipboard images."""
2621+
"""Handle terminal paste — detect clipboard images.
2622+
2623+
When the terminal supports bracketed paste, Ctrl+V / Cmd+V
2624+
triggers this with the pasted text. We also check the
2625+
clipboard for an image on every paste event.
2626+
"""
26022627
pasted_text = event.data or ""
26032628
if self._try_attach_clipboard_image():
26042629
event.app.invalidate()
26052630
if pasted_text:
26062631
event.current_buffer.insert_text(pasted_text)
2607-
2632+
2633+
@kb.add('c-v')
2634+
def handle_ctrl_v(event):
2635+
"""Fallback image paste for terminals without bracketed paste.
2636+
2637+
On Linux terminals (GNOME Terminal, Konsole, etc.), Ctrl+V
2638+
sends raw byte 0x16 instead of triggering a paste. This
2639+
binding catches that and checks the clipboard for images.
2640+
On terminals that DO intercept Ctrl+V for paste (macOS
2641+
Terminal, iTerm2, VSCode, Windows Terminal), the bracketed
2642+
paste handler fires instead and this binding never triggers.
2643+
"""
2644+
if self._try_attach_clipboard_image():
2645+
event.app.invalidate()
2646+
26082647
# Dynamic prompt: shows Hermes symbol when agent is working,
26092648
# or answer prompt when clarify freetext mode is active.
26102649
cli_ref = self

hermes_cli/clipboard.py

Lines changed: 244 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Clipboard image extraction for macOS and Linux.
1+
"""Clipboard image extraction for macOS, Linux, and WSL2.
22
33
Provides a single function `save_clipboard_image(dest)` that checks the
44
system clipboard for image data, saves it to *dest* as PNG, and returns
@@ -7,16 +7,22 @@
77
88
Platform support:
99
macOS — osascript (always available), pngpaste (if installed)
10-
Linux — xclip (apt install xclip)
10+
WSL2 — powershell.exe via .NET System.Windows.Forms.Clipboard
11+
Linux — wl-paste (Wayland), xclip (X11)
1112
"""
1213

14+
import base64
1315
import logging
16+
import os
1417
import subprocess
1518
import sys
1619
from pathlib import Path
1720

1821
logger = logging.getLogger(__name__)
1922

23+
# Cache WSL detection (checked once per process)
24+
_wsl_detected: bool | None = None
25+
2026

2127
def save_clipboard_image(dest: Path) -> bool:
2228
"""Extract an image from the system clipboard and save it as PNG.
@@ -29,13 +35,39 @@ def save_clipboard_image(dest: Path) -> bool:
2935
return _linux_save(dest)
3036

3137

38+
def has_clipboard_image() -> bool:
39+
"""Quick check: does the clipboard currently contain an image?
40+
41+
Lighter than save_clipboard_image — doesn't extract or write anything.
42+
"""
43+
if sys.platform == "darwin":
44+
return _macos_has_image()
45+
if _is_wsl():
46+
return _wsl_has_image()
47+
if os.environ.get("WAYLAND_DISPLAY"):
48+
return _wayland_has_image()
49+
return _xclip_has_image()
50+
51+
3252
# ── macOS ────────────────────────────────────────────────────────────────
3353

3454
def _macos_save(dest: Path) -> bool:
3555
"""Try pngpaste first (fast, handles more formats), fall back to osascript."""
3656
return _macos_pngpaste(dest) or _macos_osascript(dest)
3757

3858

59+
def _macos_has_image() -> bool:
60+
"""Check if macOS clipboard contains image data."""
61+
try:
62+
info = subprocess.run(
63+
["osascript", "-e", "clipboard info"],
64+
capture_output=True, text=True, timeout=3,
65+
)
66+
return "«class PNGf»" in info.stdout or "«class TIFF»" in info.stdout
67+
except Exception:
68+
return False
69+
70+
3971
def _macos_pngpaste(dest: Path) -> bool:
4072
"""Use pngpaste (brew install pngpaste) — fastest, cleanest."""
4173
try:
@@ -54,16 +86,7 @@ def _macos_pngpaste(dest: Path) -> bool:
5486

5587
def _macos_osascript(dest: Path) -> bool:
5688
"""Use osascript to extract PNG data from clipboard (always available)."""
57-
# First check if clipboard contains image data
58-
try:
59-
info = subprocess.run(
60-
["osascript", "-e", "clipboard info"],
61-
capture_output=True, text=True, timeout=3,
62-
)
63-
has_image = "«class PNGf»" in info.stdout or "«class TIFF»" in info.stdout
64-
if not has_image:
65-
return False
66-
except Exception:
89+
if not _macos_has_image():
6790
return False
6891

6992
# Extract as PNG
@@ -91,8 +114,215 @@ def _macos_osascript(dest: Path) -> bool:
91114

92115
# ── Linux ────────────────────────────────────────────────────────────────
93116

117+
def _is_wsl() -> bool:
118+
"""Detect if running inside WSL (1 or 2)."""
119+
global _wsl_detected
120+
if _wsl_detected is not None:
121+
return _wsl_detected
122+
try:
123+
with open("/proc/version", "r") as f:
124+
_wsl_detected = "microsoft" in f.read().lower()
125+
except Exception:
126+
_wsl_detected = False
127+
return _wsl_detected
128+
129+
94130
def _linux_save(dest: Path) -> bool:
95-
"""Use xclip to extract clipboard image."""
131+
"""Try clipboard backends in priority order: WSL → Wayland → X11."""
132+
if _is_wsl():
133+
if _wsl_save(dest):
134+
return True
135+
# Fall through — WSLg might have wl-paste or xclip working
136+
137+
if os.environ.get("WAYLAND_DISPLAY"):
138+
if _wayland_save(dest):
139+
return True
140+
141+
return _xclip_save(dest)
142+
143+
144+
# ── WSL2 (powershell.exe) ────────────────────────────────────────────────
145+
146+
# PowerShell script: get clipboard image as base64-encoded PNG on stdout.
147+
# Using .NET System.Windows.Forms.Clipboard — always available on Windows.
148+
_PS_CHECK_IMAGE = (
149+
"Add-Type -AssemblyName System.Windows.Forms;"
150+
"[System.Windows.Forms.Clipboard]::ContainsImage()"
151+
)
152+
153+
_PS_EXTRACT_IMAGE = (
154+
"Add-Type -AssemblyName System.Windows.Forms;"
155+
"Add-Type -AssemblyName System.Drawing;"
156+
"$img = [System.Windows.Forms.Clipboard]::GetImage();"
157+
"if ($null -eq $img) { exit 1 }"
158+
"$ms = New-Object System.IO.MemoryStream;"
159+
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
160+
"[System.Convert]::ToBase64String($ms.ToArray())"
161+
)
162+
163+
164+
def _wsl_has_image() -> bool:
165+
"""Check if Windows clipboard has an image (via powershell.exe)."""
166+
try:
167+
r = subprocess.run(
168+
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
169+
_PS_CHECK_IMAGE],
170+
capture_output=True, text=True, timeout=8,
171+
)
172+
return r.returncode == 0 and "True" in r.stdout
173+
except FileNotFoundError:
174+
logger.debug("powershell.exe not found — WSL clipboard unavailable")
175+
except Exception as e:
176+
logger.debug("WSL clipboard check failed: %s", e)
177+
return False
178+
179+
180+
def _wsl_save(dest: Path) -> bool:
181+
"""Extract clipboard image via powershell.exe → base64 → decode to PNG."""
182+
try:
183+
r = subprocess.run(
184+
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
185+
_PS_EXTRACT_IMAGE],
186+
capture_output=True, text=True, timeout=15,
187+
)
188+
if r.returncode != 0:
189+
return False
190+
191+
b64_data = r.stdout.strip()
192+
if not b64_data:
193+
return False
194+
195+
png_bytes = base64.b64decode(b64_data)
196+
dest.write_bytes(png_bytes)
197+
return dest.exists() and dest.stat().st_size > 0
198+
199+
except FileNotFoundError:
200+
logger.debug("powershell.exe not found — WSL clipboard unavailable")
201+
except Exception as e:
202+
logger.debug("WSL clipboard extraction failed: %s", e)
203+
dest.unlink(missing_ok=True)
204+
return False
205+
206+
207+
# ── Wayland (wl-paste) ──────────────────────────────────────────────────
208+
209+
def _wayland_has_image() -> bool:
210+
"""Check if Wayland clipboard has image content."""
211+
try:
212+
r = subprocess.run(
213+
["wl-paste", "--list-types"],
214+
capture_output=True, text=True, timeout=3,
215+
)
216+
return r.returncode == 0 and any(
217+
t.startswith("image/") for t in r.stdout.splitlines()
218+
)
219+
except FileNotFoundError:
220+
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
221+
except Exception:
222+
pass
223+
return False
224+
225+
226+
def _wayland_save(dest: Path) -> bool:
227+
"""Use wl-paste to extract clipboard image (Wayland sessions)."""
228+
try:
229+
# Check available MIME types
230+
types_r = subprocess.run(
231+
["wl-paste", "--list-types"],
232+
capture_output=True, text=True, timeout=3,
233+
)
234+
if types_r.returncode != 0:
235+
return False
236+
types = types_r.stdout.splitlines()
237+
238+
# Prefer PNG, fall back to other image formats
239+
mime = None
240+
for preferred in ("image/png", "image/jpeg", "image/bmp",
241+
"image/gif", "image/webp"):
242+
if preferred in types:
243+
mime = preferred
244+
break
245+
246+
if not mime:
247+
return False
248+
249+
# Extract the image data
250+
with open(dest, "wb") as f:
251+
subprocess.run(
252+
["wl-paste", "--type", mime],
253+
stdout=f, stderr=subprocess.DEVNULL, timeout=5, check=True,
254+
)
255+
256+
if not dest.exists() or dest.stat().st_size == 0:
257+
return False
258+
259+
# BMP needs conversion to PNG (common in WSLg where only BMP
260+
# is bridged from Windows clipboard via RDP).
261+
if mime == "image/bmp":
262+
return _convert_to_png(dest)
263+
264+
return True
265+
266+
except FileNotFoundError:
267+
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
268+
except Exception as e:
269+
logger.debug("wl-paste clipboard extraction failed: %s", e)
270+
dest.unlink(missing_ok=True)
271+
return False
272+
273+
274+
def _convert_to_png(path: Path) -> bool:
275+
"""Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
276+
# Try Pillow first (likely installed in the venv)
277+
try:
278+
from PIL import Image
279+
img = Image.open(path)
280+
img.save(path, "PNG")
281+
return True
282+
except ImportError:
283+
pass
284+
except Exception as e:
285+
logger.debug("Pillow BMP→PNG conversion failed: %s", e)
286+
287+
# Fall back to ImageMagick convert
288+
try:
289+
tmp = path.with_suffix(".bmp")
290+
path.rename(tmp)
291+
r = subprocess.run(
292+
["convert", str(tmp), "png:" + str(path)],
293+
capture_output=True, timeout=5,
294+
)
295+
tmp.unlink(missing_ok=True)
296+
if r.returncode == 0 and path.exists() and path.stat().st_size > 0:
297+
return True
298+
except FileNotFoundError:
299+
logger.debug("ImageMagick not installed — cannot convert BMP to PNG")
300+
except Exception as e:
301+
logger.debug("ImageMagick BMP→PNG conversion failed: %s", e)
302+
303+
# Can't convert — BMP is still usable as-is for most APIs
304+
return path.exists() and path.stat().st_size > 0
305+
306+
307+
# ── X11 (xclip) ─────────────────────────────────────────────────────────
308+
309+
def _xclip_has_image() -> bool:
310+
"""Check if X11 clipboard has image content."""
311+
try:
312+
r = subprocess.run(
313+
["xclip", "-selection", "clipboard", "-t", "TARGETS", "-o"],
314+
capture_output=True, text=True, timeout=3,
315+
)
316+
return r.returncode == 0 and "image/png" in r.stdout
317+
except FileNotFoundError:
318+
pass
319+
except Exception:
320+
pass
321+
return False
322+
323+
324+
def _xclip_save(dest: Path) -> bool:
325+
"""Use xclip to extract clipboard image (X11 sessions)."""
96326
# Check if clipboard has image content
97327
try:
98328
targets = subprocess.run(
@@ -102,7 +332,7 @@ def _linux_save(dest: Path) -> bool:
102332
if "image/png" not in targets.stdout:
103333
return False
104334
except FileNotFoundError:
105-
logger.debug("xclip not installed — clipboard image paste unavailable")
335+
logger.debug("xclip not installed — X11 clipboard image paste unavailable")
106336
return False
107337
except Exception:
108338
return False

0 commit comments

Comments
 (0)