feat: clipboard image paste in CLI (Cmd+V / Ctrl+V)
Copy an image to clipboard (screenshot, browser, etc.) and paste into the Hermes CLI. The image is saved to ~/.hermes/images/, shown as a badge above the input ([📎 Image #1]), and sent to the model as a base64-encoded OpenAI vision multimodal content block. Implementation: - hermes_cli/clipboard.py: clean module with platform-specific extraction - macOS: pngpaste (if installed) → osascript fallback (always available) - Linux: xclip (apt install xclip) - cli.py: BracketedPaste key handler checks clipboard on every paste, image bar widget shows attached images, chat() converts to multimodal content format, Ctrl+C clears attachments Inspired by @m0at's fork (https://github.com/m0at/hermes-agent) which implemented image paste support for local vision models. Reimplemented cleanly as a separate module with tests.
This commit is contained in:
127
cli.py
127
cli.py
@@ -2135,20 +2135,21 @@ class HermesCLI:
|
||||
self._approval_state = None
|
||||
self._approval_deadline = 0
|
||||
self._invalidate()
|
||||
_cprint(f"\n{_DIM} ⏱ Timeout — denying command{_RST}")
|
||||
return "deny"
|
||||
|
||||
def chat(self, message: str) -> Optional[str]:
|
||||
def chat(self, message, images: list = None) -> Optional[str]:
|
||||
"""
|
||||
Send a message to the agent and get a response.
|
||||
|
||||
Handles streaming output, interrupt detection (user typing while agent
|
||||
is working), and re-queueing of interrupted messages.
|
||||
|
||||
Uses a dedicated _interrupt_queue (separate from _pending_input) to avoid
|
||||
race conditions between the process_loop and interrupt monitoring. Messages
|
||||
typed while the agent is running go to _interrupt_queue; messages typed while
|
||||
idle go to _pending_input.
|
||||
|
||||
Args:
|
||||
message: The user's message
|
||||
message: The user's message (str or multimodal content list)
|
||||
images: Optional list of Path objects for attached images
|
||||
|
||||
Returns:
|
||||
The agent's response, or None on error
|
||||
@@ -2161,6 +2162,28 @@ class HermesCLI:
|
||||
if not self._init_agent():
|
||||
return None
|
||||
|
||||
# Convert attached images to OpenAI vision multimodal content
|
||||
if images:
|
||||
import base64 as _b64
|
||||
content_parts = []
|
||||
text_part = message if isinstance(message, str) else ""
|
||||
if not text_part:
|
||||
text_part = "What do you see in this image?"
|
||||
content_parts.append({"type": "text", "text": text_part})
|
||||
for img_path in images:
|
||||
if img_path.exists():
|
||||
data = _b64.b64encode(img_path.read_bytes()).decode()
|
||||
ext = img_path.suffix.lower().lstrip(".")
|
||||
mime = {"png": "image/png", "jpg": "image/jpeg",
|
||||
"jpeg": "image/jpeg", "gif": "image/gif",
|
||||
"webp": "image/webp"}.get(ext, "image/png")
|
||||
content_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{mime};base64,{data}"}
|
||||
})
|
||||
_cprint(f" {_DIM}📎 attached {img_path.name} ({img_path.stat().st_size // 1024}KB){_RST}")
|
||||
message = content_parts
|
||||
|
||||
# Add user message to history
|
||||
self.conversation_history.append({"role": "user", "content": message})
|
||||
|
||||
@@ -2325,6 +2348,10 @@ class HermesCLI:
|
||||
self._approval_state = None # dict with command, description, choices, selected, response_queue
|
||||
self._approval_deadline = 0
|
||||
|
||||
# Clipboard image attachments (paste images into the CLI)
|
||||
self._attached_images: list[Path] = []
|
||||
self._image_counter = 0
|
||||
|
||||
# Register callbacks so terminal_tool prompts route through our UI
|
||||
set_sudo_password_callback(self._sudo_password_callback)
|
||||
set_approval_callback(self._approval_callback)
|
||||
@@ -2394,11 +2421,18 @@ class HermesCLI:
|
||||
|
||||
# --- Normal input routing ---
|
||||
text = event.app.current_buffer.text.strip()
|
||||
if text:
|
||||
if self._agent_running and not text.startswith("/"):
|
||||
self._interrupt_queue.put(text)
|
||||
has_images = bool(self._attached_images)
|
||||
if text or has_images:
|
||||
# Snapshot and clear attached images
|
||||
images = list(self._attached_images)
|
||||
self._attached_images.clear()
|
||||
event.app.invalidate()
|
||||
# Bundle text + images as a tuple when images are present
|
||||
payload = (text, images) if images else text
|
||||
if self._agent_running and not (text and text.startswith("/")):
|
||||
self._interrupt_queue.put(payload)
|
||||
else:
|
||||
self._pending_input.put(text)
|
||||
self._pending_input.put(payload)
|
||||
event.app.current_buffer.reset(append_to_history=True)
|
||||
|
||||
@kb.add('escape', 'enter')
|
||||
@@ -2511,10 +2545,12 @@ class HermesCLI:
|
||||
print("\n⚡ Interrupting agent... (press Ctrl+C again to force exit)")
|
||||
self.agent.interrupt()
|
||||
else:
|
||||
# If there's text in the input buffer, clear it (like bash).
|
||||
# If the buffer is already empty, exit.
|
||||
if event.app.current_buffer.text:
|
||||
# If there's text or images, clear them (like bash).
|
||||
# If everything is already empty, exit.
|
||||
if event.app.current_buffer.text or self._attached_images:
|
||||
event.app.current_buffer.reset()
|
||||
self._attached_images.clear()
|
||||
event.app.invalidate()
|
||||
else:
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
@@ -2524,6 +2560,36 @@ class HermesCLI:
|
||||
"""Handle Ctrl+D - exit."""
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
|
||||
from prompt_toolkit.keys import Keys
|
||||
|
||||
@kb.add(Keys.BracketedPaste, eager=True)
|
||||
def handle_paste(event):
|
||||
"""Handle Cmd+V / Ctrl+V paste — detect clipboard images.
|
||||
|
||||
On every paste event, check the system clipboard for image data.
|
||||
If found, save to ~/.hermes/images/ and attach it to the next
|
||||
message. Any pasted text is inserted into the buffer normally.
|
||||
"""
|
||||
from hermes_cli.clipboard import save_clipboard_image
|
||||
|
||||
pasted_text = event.data or ""
|
||||
|
||||
# Check clipboard for image
|
||||
img_dir = Path.home() / ".hermes" / "images"
|
||||
self._image_counter += 1
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
img_path = img_dir / f"clip_{ts}_{self._image_counter}.png"
|
||||
|
||||
if save_clipboard_image(img_path):
|
||||
self._attached_images.append(img_path)
|
||||
event.app.invalidate()
|
||||
else:
|
||||
self._image_counter -= 1
|
||||
|
||||
# Insert any pasted text normally
|
||||
if pasted_text:
|
||||
event.current_buffer.insert_text(pasted_text)
|
||||
|
||||
# Dynamic prompt: shows Hermes symbol when agent is working,
|
||||
# or answer prompt when clarify freetext mode is active.
|
||||
@@ -2834,6 +2900,24 @@ class HermesCLI:
|
||||
style='class:input-rule',
|
||||
)
|
||||
|
||||
# Image attachment indicator — shows badges like [📎 Image #1] above input
|
||||
cli_ref = self
|
||||
|
||||
def _get_image_bar():
|
||||
if not cli_ref._attached_images:
|
||||
return []
|
||||
base = cli_ref._image_counter - len(cli_ref._attached_images) + 1
|
||||
badges = " ".join(
|
||||
f"[📎 Image #{base + i}]"
|
||||
for i in range(len(cli_ref._attached_images))
|
||||
)
|
||||
return [("class:image-badge", f" {badges} ")]
|
||||
|
||||
image_bar = Window(
|
||||
content=FormattedTextControl(_get_image_bar),
|
||||
height=Condition(lambda: bool(cli_ref._attached_images)),
|
||||
)
|
||||
|
||||
# Layout: interactive prompt widgets + ruled input at bottom.
|
||||
# The sudo, approval, and clarify widgets appear above the input when
|
||||
# the corresponding interactive prompt is active.
|
||||
@@ -2845,6 +2929,7 @@ class HermesCLI:
|
||||
clarify_widget,
|
||||
spacer,
|
||||
input_rule_top,
|
||||
image_bar,
|
||||
input_area,
|
||||
input_rule_bot,
|
||||
CompletionsMenu(max_height=12, scroll_offset=1),
|
||||
@@ -2860,6 +2945,8 @@ class HermesCLI:
|
||||
'hint': '#555555 italic',
|
||||
# Bronze horizontal rules around the input area
|
||||
'input-rule': '#CD7F32',
|
||||
# Clipboard image attachment badges
|
||||
'image-badge': '#87CEEB bold',
|
||||
'completion-menu': 'bg:#1a1a2e #FFF8DC',
|
||||
'completion-menu.completion': 'bg:#1a1a2e #FFF8DC',
|
||||
'completion-menu.completion.current': 'bg:#333355 #FFD700',
|
||||
@@ -2909,9 +2996,14 @@ class HermesCLI:
|
||||
|
||||
if not user_input:
|
||||
continue
|
||||
|
||||
# Unpack image payload: (text, [Path, ...]) or plain str
|
||||
submit_images = []
|
||||
if isinstance(user_input, tuple):
|
||||
user_input, submit_images = user_input
|
||||
|
||||
# Check for commands
|
||||
if user_input.startswith("/"):
|
||||
if isinstance(user_input, str) and user_input.startswith("/"):
|
||||
print(f"\n⚙️ {user_input}")
|
||||
if not self.process_command(user_input):
|
||||
self._should_exit = True
|
||||
@@ -2922,7 +3014,7 @@ class HermesCLI:
|
||||
|
||||
# Expand paste references back to full content
|
||||
import re as _re
|
||||
paste_match = _re.match(r'\[Pasted text #\d+: \d+ lines → (.+)\]', user_input)
|
||||
paste_match = _re.match(r'\[Pasted text #\d+: \d+ lines → (.+)\]', user_input) if isinstance(user_input, str) else None
|
||||
if paste_match:
|
||||
paste_path = Path(paste_match.group(1))
|
||||
if paste_path.exists():
|
||||
@@ -2944,12 +3036,17 @@ class HermesCLI:
|
||||
print()
|
||||
_cprint(f"{_GOLD}●{_RST} {_BOLD}{user_input}{_RST}")
|
||||
|
||||
# Show image attachment count
|
||||
if submit_images:
|
||||
n = len(submit_images)
|
||||
_cprint(f" {_DIM}📎 {n} image{'s' if n > 1 else ''} attached{_RST}")
|
||||
|
||||
# Regular chat - run agent
|
||||
self._agent_running = True
|
||||
app.invalidate() # Refresh status line
|
||||
|
||||
try:
|
||||
self.chat(user_input)
|
||||
self.chat(user_input, images=submit_images or None)
|
||||
finally:
|
||||
self._agent_running = False
|
||||
app.invalidate() # Refresh status line
|
||||
|
||||
122
hermes_cli/clipboard.py
Normal file
122
hermes_cli/clipboard.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Clipboard image extraction for macOS and Linux.
|
||||
|
||||
Provides a single function `save_clipboard_image(dest)` that checks the
|
||||
system clipboard for image data, saves it to *dest* as PNG, and returns
|
||||
True on success. No external Python dependencies — uses only OS-level
|
||||
CLI tools that ship with the platform (or are commonly installed).
|
||||
|
||||
Platform support:
|
||||
macOS — osascript (always available), pngpaste (if installed)
|
||||
Linux — xclip (apt install xclip)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def save_clipboard_image(dest: Path) -> bool:
|
||||
"""Extract an image from the system clipboard and save it as PNG.
|
||||
|
||||
Returns True if an image was found and saved, False otherwise.
|
||||
"""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
if sys.platform == "darwin":
|
||||
return _macos_save(dest)
|
||||
return _linux_save(dest)
|
||||
|
||||
|
||||
# ── macOS ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _macos_save(dest: Path) -> bool:
|
||||
"""Try pngpaste first (fast, handles more formats), fall back to osascript."""
|
||||
return _macos_pngpaste(dest) or _macos_osascript(dest)
|
||||
|
||||
|
||||
def _macos_pngpaste(dest: Path) -> bool:
|
||||
"""Use pngpaste (brew install pngpaste) — fastest, cleanest."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["pngpaste", str(dest)],
|
||||
capture_output=True, timeout=3,
|
||||
)
|
||||
if r.returncode == 0 and dest.exists() and dest.stat().st_size > 0:
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
pass # pngpaste not installed
|
||||
except Exception as e:
|
||||
logger.debug("pngpaste failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def _macos_osascript(dest: Path) -> bool:
|
||||
"""Use osascript to extract PNG data from clipboard (always available)."""
|
||||
# First check if clipboard contains image data
|
||||
try:
|
||||
info = subprocess.run(
|
||||
["osascript", "-e", "clipboard info"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
has_image = "«class PNGf»" in info.stdout or "«class TIFF»" in info.stdout
|
||||
if not has_image:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# Extract as PNG
|
||||
script = (
|
||||
'try\n'
|
||||
' set imgData to the clipboard as «class PNGf»\n'
|
||||
f' set f to open for access POSIX file "{dest}" with write permission\n'
|
||||
' write imgData to f\n'
|
||||
' close access f\n'
|
||||
'on error\n'
|
||||
' return "fail"\n'
|
||||
'end try\n'
|
||||
)
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["osascript", "-e", script],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and "fail" not in r.stdout and dest.exists() and dest.stat().st_size > 0:
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("osascript clipboard extract failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
# ── Linux ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _linux_save(dest: Path) -> bool:
|
||||
"""Use xclip to extract clipboard image."""
|
||||
# Check if clipboard has image content
|
||||
try:
|
||||
targets = subprocess.run(
|
||||
["xclip", "-selection", "clipboard", "-t", "TARGETS", "-o"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
if "image/png" not in targets.stdout:
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
logger.debug("xclip not installed — clipboard image paste unavailable")
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# Extract PNG data
|
||||
try:
|
||||
with open(dest, "wb") as f:
|
||||
subprocess.run(
|
||||
["xclip", "-selection", "clipboard", "-t", "image/png", "-o"],
|
||||
stdout=f, stderr=subprocess.DEVNULL, timeout=5, check=True,
|
||||
)
|
||||
if dest.exists() and dest.stat().st_size > 0:
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("xclip image extraction failed: %s", e)
|
||||
dest.unlink(missing_ok=True)
|
||||
return False
|
||||
107
tests/tools/test_clipboard.py
Normal file
107
tests/tools/test_clipboard.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""Tests for hermes_cli/clipboard.py — clipboard image extraction."""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.clipboard import (
|
||||
save_clipboard_image,
|
||||
_linux_save,
|
||||
_macos_pngpaste,
|
||||
_macos_osascript,
|
||||
)
|
||||
|
||||
|
||||
class TestSaveClipboardImage:
|
||||
"""Platform dispatch."""
|
||||
|
||||
def test_dispatches_to_macos_on_darwin(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.sys") as mock_sys:
|
||||
mock_sys.platform = "darwin"
|
||||
with patch("hermes_cli.clipboard._macos_save", return_value=False) as mock_mac:
|
||||
save_clipboard_image(dest)
|
||||
mock_mac.assert_called_once_with(dest)
|
||||
|
||||
def test_dispatches_to_linux_on_linux(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.sys") as mock_sys:
|
||||
mock_sys.platform = "linux"
|
||||
with patch("hermes_cli.clipboard._linux_save", return_value=False) as mock_linux:
|
||||
save_clipboard_image(dest)
|
||||
mock_linux.assert_called_once_with(dest)
|
||||
|
||||
def test_creates_parent_dirs(self, tmp_path):
|
||||
dest = tmp_path / "deep" / "nested" / "out.png"
|
||||
with patch("hermes_cli.clipboard.sys") as mock_sys:
|
||||
mock_sys.platform = "linux"
|
||||
with patch("hermes_cli.clipboard._linux_save", return_value=False):
|
||||
save_clipboard_image(dest)
|
||||
assert dest.parent.exists()
|
||||
|
||||
|
||||
class TestMacosPngpaste:
|
||||
def test_success(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
dest.write_bytes(b"fake png data") # simulate pngpaste writing
|
||||
with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
assert _macos_pngpaste(dest) is True
|
||||
|
||||
def test_not_installed(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError):
|
||||
assert _macos_pngpaste(dest) is False
|
||||
|
||||
def test_no_image_in_clipboard(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=1)
|
||||
assert _macos_pngpaste(dest) is False
|
||||
|
||||
|
||||
class TestMacosOsascript:
|
||||
def test_no_image_type_in_clipboard(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
stdout="«class ut16», «class utf8»", returncode=0
|
||||
)
|
||||
assert _macos_osascript(dest) is False
|
||||
|
||||
def test_clipboard_info_check_fails(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.subprocess.run", side_effect=Exception("fail")):
|
||||
assert _macos_osascript(dest) is False
|
||||
|
||||
|
||||
class TestLinuxSave:
|
||||
def test_no_xclip_installed(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError):
|
||||
assert _linux_save(dest) is False
|
||||
|
||||
def test_no_image_in_clipboard(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(stdout="text/plain\n", returncode=0)
|
||||
assert _linux_save(dest) is False
|
||||
|
||||
def test_image_in_clipboard(self, tmp_path):
|
||||
dest = tmp_path / "out.png"
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
if "TARGETS" in cmd:
|
||||
return MagicMock(stdout="image/png\ntext/plain\n", returncode=0)
|
||||
# Extract call — write fake data
|
||||
if "stdout" in kwargs and kwargs["stdout"]:
|
||||
kwargs["stdout"].write(b"fake png")
|
||||
return MagicMock(returncode=0)
|
||||
|
||||
with patch("hermes_cli.clipboard.subprocess.run", side_effect=fake_run):
|
||||
# Create the file to simulate xclip writing
|
||||
dest.write_bytes(b"fake png")
|
||||
assert _linux_save(dest) is True
|
||||
Reference in New Issue
Block a user