From ada0b4f131baf95034ecb125ac36cec847eb6a0b Mon Sep 17 00:00:00 2001 From: teknium1 Date: Tue, 10 Feb 2026 21:02:40 -0800 Subject: [PATCH 1/4] Enhance image handling in platform adapters - Updated the image generation function description to clarify usage with markdown. - Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms. - Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly. - Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing. - Enhanced message handling to support sending images as attachments while maintaining text content. --- gateway/platforms/base.py | 109 +++++++++++++++++++++++++++++----- gateway/platforms/discord.py | 86 +++++++++++++++++++++++++++ gateway/platforms/telegram.py | 25 ++++++++ model_tools.py | 2 +- 4 files changed, 207 insertions(+), 15 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 2e9da3354..b3ddb8359 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -6,10 +6,11 @@ and implement the required methods. """ import asyncio +import re from abc import ABC, abstractmethod from dataclasses import dataclass, field from datetime import datetime -from typing import Dict, List, Optional, Any, Callable, Awaitable +from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple from enum import Enum import sys @@ -177,6 +178,68 @@ class BasePlatformAdapter(ABC): """ pass + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """ + Send an image natively via the platform API. + + Override in subclasses to send images as proper attachments + instead of plain-text URLs. Default falls back to sending the + URL as a text message. + """ + # Fallback: send URL as text (subclasses override for native images) + text = f"{caption}\n{image_url}" if caption else image_url + return await self.send(chat_id=chat_id, content=text, reply_to=reply_to) + + @staticmethod + def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]: + """ + Extract image URLs from markdown and HTML image tags in a response. + + Finds patterns like: + - ![alt text](https://example.com/image.png) + - + - + + Args: + content: The response text to scan. + + Returns: + Tuple of (list of (url, alt_text) pairs, cleaned content with image tags removed). + """ + images = [] + cleaned = content + + # Match markdown images: ![alt](url) + md_pattern = r'!\[([^\]]*)\]\((https?://[^\s\)]+)\)' + for match in re.finditer(md_pattern, content): + alt_text = match.group(1) + url = match.group(2) + # Only extract URLs that look like actual images + if any(url.lower().endswith(ext) or ext in url.lower() for ext in + ['.png', '.jpg', '.jpeg', '.gif', '.webp', 'fal.media', 'fal-cdn', 'replicate.delivery']): + images.append((url, alt_text)) + + # Match HTML img tags: or or + html_pattern = r']+)["\']?\s*/?>\s*(?:)?' + for match in re.finditer(html_pattern, content): + url = match.group(1) + images.append((url, "")) + + # Remove matched image tags from content if we found images + if images: + cleaned = re.sub(md_pattern, '', cleaned) + cleaned = re.sub(html_pattern, '', cleaned) + # Clean up leftover blank lines + cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip() + + return images, cleaned + async def _keep_typing(self, chat_id: str, interval: float = 2.0) -> None: """ Continuously send typing indicator until cancelled. @@ -231,23 +294,41 @@ class BasePlatformAdapter(ABC): # Send response if any if response: - result = await self.send( - chat_id=event.source.chat_id, - content=response, - reply_to=event.message_id - ) + # Extract image URLs and send them as native platform attachments + images, text_content = self.extract_images(response) - # Log send failures (don't raise - user already saw tool progress) - if not result.success: - print(f"[{self.name}] Failed to send response: {result.error}") - # Try sending without markdown as fallback - fallback_result = await self.send( + # Send the text portion first (if any remains after extracting images) + if text_content: + result = await self.send( chat_id=event.source.chat_id, - content=f"(Response formatting failed, plain text:)\n\n{response[:3500]}", + content=text_content, reply_to=event.message_id ) - if not fallback_result.success: - print(f"[{self.name}] Fallback send also failed: {fallback_result.error}") + + # Log send failures (don't raise - user already saw tool progress) + if not result.success: + print(f"[{self.name}] Failed to send response: {result.error}") + # Try sending without markdown as fallback + fallback_result = await self.send( + chat_id=event.source.chat_id, + content=f"(Response formatting failed, plain text:)\n\n{text_content[:3500]}", + reply_to=event.message_id + ) + if not fallback_result.success: + print(f"[{self.name}] Fallback send also failed: {fallback_result.error}") + + # Send extracted images as native attachments + for image_url, alt_text in images: + try: + img_result = await self.send_image( + chat_id=event.source.chat_id, + image_url=image_url, + caption=alt_text if alt_text else None, + ) + if not img_result.success: + print(f"[{self.name}] Failed to send image: {img_result.error}") + except Exception as img_err: + print(f"[{self.name}] Error sending image: {img_err}") # Check if there's a pending message that was queued during our processing if session_key in self._pending_messages: diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 345b19899..0d0cc9e25 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -8,6 +8,7 @@ Uses discord.py library for: """ import asyncio +import os from typing import Dict, List, Optional, Any try: @@ -173,6 +174,61 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: return SendResult(success=False, error=str(e)) + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send an image natively as a Discord file attachment.""" + if not self._client: + return SendResult(success=False, error="Not connected") + + try: + import aiohttp + + channel = self._client.get_channel(int(chat_id)) + if not channel: + channel = await self._client.fetch_channel(int(chat_id)) + if not channel: + return SendResult(success=False, error=f"Channel {chat_id} not found") + + # Download the image and send as a Discord file attachment + # (Discord renders attachments inline, unlike plain URLs) + async with aiohttp.ClientSession() as session: + async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30)) as resp: + if resp.status != 200: + raise Exception(f"Failed to download image: HTTP {resp.status}") + + image_data = await resp.read() + + # Determine filename from URL or content type + content_type = resp.headers.get("content-type", "image/png") + ext = "png" + if "jpeg" in content_type or "jpg" in content_type: + ext = "jpg" + elif "gif" in content_type: + ext = "gif" + elif "webp" in content_type: + ext = "webp" + + import io + file = discord.File(io.BytesIO(image_data), filename=f"image.{ext}") + + msg = await channel.send( + content=caption if caption else None, + file=file, + ) + return SendResult(success=True, message_id=str(msg.id)) + + except ImportError: + print(f"[{self.name}] aiohttp not installed, falling back to URL. Run: pip install aiohttp") + return await super().send_image(chat_id, image_url, caption, reply_to) + except Exception as e: + print(f"[{self.name}] Failed to send image attachment, falling back to URL: {e}") + return await super().send_image(chat_id, image_url, caption, reply_to) + async def send_typing(self, chat_id: str) -> None: """Send typing indicator.""" if self._client: @@ -232,6 +288,36 @@ class DiscordAdapter(BasePlatformAdapter): async def _handle_message(self, message: DiscordMessage) -> None: """Handle incoming Discord messages.""" + # In server channels (not DMs), require the bot to be @mentioned + # UNLESS the channel is in the free-response list. + # + # Config: + # DISCORD_FREE_RESPONSE_CHANNELS: Comma-separated channel IDs where the + # bot responds to every message without needing a mention. + # DISCORD_REQUIRE_MENTION: Set to "false" to disable mention requirement + # globally (all channels become free-response). Default: "true". + + if not isinstance(message.channel, discord.DMChannel): + # Check if this channel is in the free-response list + free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "") + free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()} + channel_id = str(message.channel.id) + + # Global override: if DISCORD_REQUIRE_MENTION=false, all channels are free + require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no") + + is_free_channel = channel_id in free_channels + + if require_mention and not is_free_channel: + # Must be @mentioned to respond + if self._client.user not in message.mentions: + return # Silently ignore messages that don't mention the bot + + # Strip the bot mention from the message text so the agent sees clean input + if self._client.user and self._client.user in message.mentions: + message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip() + message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip() + # Determine message type msg_type = MessageType.TEXT if message.content.startswith("/"): diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 10c67c96b..8cd8fc2fe 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -174,6 +174,31 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as e: return SendResult(success=False, error=str(e)) + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send an image natively as a Telegram photo.""" + if not self._bot: + return SendResult(success=False, error="Not connected") + + try: + # Telegram can send photos directly from URLs + msg = await self._bot.send_photo( + chat_id=int(chat_id), + photo=image_url, + caption=caption[:1024] if caption else None, # Telegram caption limit + reply_to_message_id=int(reply_to) if reply_to else None, + ) + return SendResult(success=True, message_id=str(msg.message_id)) + except Exception as e: + print(f"[{self.name}] Failed to send photo, falling back to URL: {e}") + # Fallback: send as text link + return await super().send_image(chat_id, image_url, caption, reply_to) + async def send_typing(self, chat_id: str) -> None: """Send typing indicator.""" if self._bot: diff --git a/model_tools.py b/model_tools.py index b5035ab32..f0250ee21 100644 --- a/model_tools.py +++ b/model_tools.py @@ -392,7 +392,7 @@ def get_image_tool_definitions() -> List[Dict[str, Any]]: "type": "function", "function": { "name": "image_generate", - "description": "Generate high-quality images from text prompts using FLUX 2 Pro model with automatic 2x upscaling. Creates detailed, artistic images that are automatically upscaled for hi-rez results. Returns a single upscaled image URL that can be displayed using tags.", + "description": "Generate high-quality images from text prompts using FLUX 2 Pro model with automatic 2x upscaling. Creates detailed, artistic images that are automatically upscaled for hi-rez results. Returns a single upscaled image URL. Display it using markdown: ![description](URL)", "parameters": { "type": "object", "properties": { From 137ce05324d07489a1e7e8a71d81b4b6473f37f0 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Tue, 10 Feb 2026 21:04:24 -0800 Subject: [PATCH 2/4] Add image generation tool to toolsets for messaging platforms - Included "image_generate" in the toolsets for web, vision, and skills categories, expanding functionality for image-related tasks. - Updated comments for clarity on the new tool's purpose, ensuring users understand its integration within the existing framework. --- toolsets.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/toolsets.py b/toolsets.py index 04785b02b..7896d1ecd 100644 --- a/toolsets.py +++ b/toolsets.py @@ -169,6 +169,8 @@ TOOLSETS = { "web_search", "web_extract", # Vision - analyze images sent by users "vision_analyze", + # Image generation + "image_generate", # Skills - access knowledge base "skills_list", "skill_view", # Cronjob management - let users schedule tasks @@ -188,6 +190,8 @@ TOOLSETS = { "web_search", "web_extract", # Vision - analyze images sent by users "vision_analyze", + # Image generation + "image_generate", # Skills - access knowledge base "skills_list", "skill_view", # Cronjob management - let users schedule tasks @@ -207,6 +211,8 @@ TOOLSETS = { "read_file", "write_file", "patch", "search", # Vision "vision_analyze", + # Image generation + "image_generate", # Skills "skills_list", "skill_view", # Cronjob management From 07501bef14bff9358e07dee2b56a6be87378d6b8 Mon Sep 17 00:00:00 2001 From: nightwing Date: Wed, 11 Feb 2026 17:36:18 -0700 Subject: [PATCH 3/4] =?UTF-8?q?Add=20Project=5Fnotes.md=20=E2=80=94=20cent?= =?UTF-8?q?ralized=20status=20tracker=20for=20all=20side=20projects?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Project_notes.md | 136 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 Project_notes.md diff --git a/Project_notes.md b/Project_notes.md new file mode 100644 index 000000000..eb116fb9b --- /dev/null +++ b/Project_notes.md @@ -0,0 +1,136 @@ +# Project Notes + +*Maintained by Hermes — last updated February 2025* + +--- + +## 1. Kandinsky (Multimodal Transformer) +- **Repo:** https://github.com/samherring99/kandinsky +- **Local path:** `~/Desktop/Projects/kandinsky` +- **Description:** An anything-to-anything transformer combining text, image, and audio modalities. Trains on Pokemon BLIP captions paired with Gen 1 Pokemon audio cries. Uses audio tokenization adapted from nanoGPT. +- **Status:** Early POC. Training code exists (`model.py`) and dataset creation (`create_dataset.py`) works. Audio heads are producing the same sound — unclear if it's a training issue or data issue. +- **TODO:** + - Debug why audio heads produce identical output + - Investigate if model needs more training time + - Design a data pipeline for better/more training data + - General repo cleanup (requirements.txt, proper CLI, etc.) + +--- + +## 2. NightwingGameSim (LLM → GameBoy ROM Generator) +- **Repo:** https://github.com/samherring99/NightwingGameSim +- **Local path:** `~/Desktop/Projects/NightwingGameSim` +- **Description:** AI-powered pipeline that turns natural language prompts into playable GameBoy ROM files. Generates C code, compiles with GBDK, outputs `.gb` files. Supports Claude API, local Llama, and RAG backends. +- **Status:** Functional — generation pipeline works end-to-end with Claude 4 system prompt. Has tests, docs, examples, and retry logic. +- **TODO:** + - Harden the repo, clean up structure + - Build a better testing pipeline + - Come up with better prompt ideas / examples + +--- + +## 3. ContentBasedMIR (Music Information Retrieval) +- **Repo:** https://github.com/samherring99/ContentBasedMIR +- **Local path:** `~/Desktop/Projects/ContentBasedMIR` +- **Description:** Music similarity analysis using Spotify API track data. Extracts 54 audio features per song and visualizes similarity matrices for music recommendation. +- **Status:** Early stage. Can download Spotify track analysis data and plot similarity matrices. Needs significant expansion. +- **TODO:** + - Expand analysis pipeline with more features + - Integrate with text message data for personalized recommendations + - Build out visualization and exploration tools + - General modernization (dependencies, structure) + +--- + +## 4. MessageRetrieval (iMessage RAG/SQL) +- **Repo:** https://github.com/samherring99/MessageRetrieval +- **Local path:** `~/Desktop/Projects/MessageRetrieval` +- **Description:** Natural language querying over iMessage data using SQL generation (text2SQL) instead of vector embeddings. Uses LLM-as-Judge pattern for scoring and ranking retrieved messages. +- **Status:** Has initial text2SQL pipeline and summarization tool. Recently worked on with Claude Code. Needs testing. +- **TODO:** + - Test out the recent Claude Code work + - Build "iMessage Jarvis" — answer questions about texts + - Improve SQL generation prompts and accuracy + - Better error handling and UX + +--- + +## 5. Grailed Embedding Search +- **Repo:** https://github.com/samherring99/grailed-embedding-search +- **Local path:** `~/Desktop/Projects/grailed-embedding-search` +- **Description:** Embedding-based semantic search over Grailed fashion listings. Uses vector similarity to find related items. +- **Status:** Very early — has a basic similarity search implementation. Previously had a more complex version that's being reworked. +- **TODO:** + - Build out the search pipeline + - Add scraping/indexing for listings + - Improve embedding approach + - Add UI or CLI for exploring results + +--- + +## 6. NightwingNBA (Sports Analytics) +- **Repo:** https://github.com/samherring99/NightwingNBA +- **Local path:** `~/Desktop/Projects/NightwingNBA` +- **Description:** NBA game prediction system. Builds a database of game data, trains a PyTorch model, and makes daily predictions. Has full pipeline: build DB → write data → train → predict. +- **Status:** Functional pipeline exists. Has database building, training, prediction, and daily update scripts. +- **TODO:** + - Explore and potentially revive + - Update data sources if stale + - Improve model accuracy + - Add visualization/reporting + +--- + +## 7. Stable Audio Sample Explorer +- **Repo:** https://github.com/samherring99/stable-audio-sample-explorer +- **Local path:** `~/Desktop/Projects/stable-audio-sample-explorer` +- **Description:** Tool for exploring audio samples generated by Stable Audio. +- **Status:** 🪦 **Dead** — no active work needed per Sam. + +--- + +## 8. NightwingArt (Art Tools) +- **Repo:** https://github.com/samherring99/NightwingArt +- **Local path:** `~/Desktop/Projects/NightwingArt` +- **Description:** Collection of art tooling scripts — video editing, clip splicing with beat matching, damage effects, and general image manipulation. +- **Status:** Maintenance mode. Tools exist for various effects. Work happens as-needed. +- **TODO:** + - Add tools as needed for new art projects + +--- + +## 9. Claude-based VST Building ⚠️ *Needs new repo* +- **Description:** Generate VST audio plugins for DAWs from English language prompts. LLM-powered audio plugin creation. +- **Status:** Concept only — no repo exists yet. +- **TODO:** + - Create repo + - Research VST SDK / JUCE framework + - Design prompt → code → compile pipeline + +--- + +## 10. Government Auction Site Scraper ⚠️ *Needs new repo* +- **Description:** Tool that monitors and scrapes government auction sites in San Francisco for deals. +- **Status:** Concept only — no repo exists yet. +- **TODO:** + - Create repo + - Research SF government auction sites and their structure + - Build scraper + notification system + +--- + +## Priority Assessment + +| Project | Activity Level | Suggested Priority | +|---------|---------------|-------------------| +| NightwingGameSim | Active | 🔴 High | +| MessageRetrieval | Active | 🔴 High | +| Kandinsky | Active | 🟡 Medium | +| ContentBasedMIR | Exploratory | 🟡 Medium | +| Grailed Embedding Search | Early | 🟡 Medium | +| NightwingNBA | Dormant | 🟢 Low | +| NightwingArt | As-needed | 🟢 Low | +| VST Builder | Concept | 🔵 Future | +| Gov Auction Scraper | Concept | 🔵 Future | +| Stable Audio Explorer | Dead | ⚫ None | + From fc792a4be9279495ff0c2a75e95e3ae3c65e1b23 Mon Sep 17 00:00:00 2001 From: nightwing Date: Wed, 11 Feb 2026 17:54:47 -0700 Subject: [PATCH 4/4] Update Project_notes.md: grailed-embedding-search status and TODOs (June 2025) --- Project_notes.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Project_notes.md b/Project_notes.md index eb116fb9b..81c2083ff 100644 --- a/Project_notes.md +++ b/Project_notes.md @@ -1,6 +1,6 @@ # Project Notes -*Maintained by Hermes — last updated February 2025* +*Maintained by Hermes — last updated June 2025* --- @@ -58,13 +58,17 @@ ## 5. Grailed Embedding Search - **Repo:** https://github.com/samherring99/grailed-embedding-search - **Local path:** `~/Desktop/Projects/grailed-embedding-search` -- **Description:** Embedding-based semantic search over Grailed fashion listings. Uses vector similarity to find related items. -- **Status:** Very early — has a basic similarity search implementation. Previously had a more complex version that's being reworked. +- **Description:** Semantic similarity search over Grailed fashion listings using CLIP embeddings and FAISS. Search by image URL or text description to find visually similar products. +- **Status:** Functional core pipeline. CLIP ViT-B/32 embeds product cover photos into 512-dim vectors, indexed with FAISS cosine similarity. Has CLI, batch embedding, persistent index save/load, and logging. +- **Recent work (June 2025):** + - PR #1 — Initial cleanup: docstrings, type hints, `.gitignore`, `requirements.txt`, README rewrite + - PR #2 — Feature improvements: persistent FAISS save/load, batch embedding, CLI (`cli.py`), proper logging throughout, lazy Grailed client, `fetch_details` toggle - **TODO:** - - Build out the search pipeline - - Add scraping/indexing for listings - - Improve embedding approach - - Add UI or CLI for exploring results + - Embedding cache (avoid re-embedding known product URLs) + - Async/threaded image downloads for faster batch indexing + - Search result visualization (matplotlib grid of cover photos) + - Filter by category, designer, price range before search + - Web UI (Gradio or Streamlit) --- @@ -134,3 +138,5 @@ | Gov Auction Scraper | Concept | 🔵 Future | | Stable Audio Explorer | Dead | ⚫ None | + +