forked from Rockachopa/Timmy-time-dashboard
Compare commits
1 Commits
claude/iss
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
28d1905df4 |
@@ -18,17 +18,9 @@ jobs:
|
||||
- name: Lint (ruff via tox)
|
||||
run: tox -e lint
|
||||
|
||||
typecheck:
|
||||
runs-on: ubuntu-latest
|
||||
needs: lint
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Type-check (mypy via tox)
|
||||
run: tox -e typecheck
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: typecheck
|
||||
needs: lint
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run tests (via tox)
|
||||
|
||||
@@ -150,6 +150,7 @@ async def transcribe_audio(audio: bytes) -> str:
|
||||
| Service | When Unavailable | Fallback Behavior |
|
||||
|---------|------------------|-------------------|
|
||||
| Ollama | No local LLM | Claude backend (if ANTHROPIC_API_KEY set) |
|
||||
| vLLM | Server not running | Ollama backend (cascade router fallback) |
|
||||
| Redis | Cache/storage down | In-memory dict (ephemeral) |
|
||||
| AirLLM | Import error or no Apple Silicon | Ollama backend |
|
||||
| Voice (Piper) | Service down | Browser Web Speech API |
|
||||
|
||||
@@ -131,11 +131,34 @@ providers:
|
||||
context_window: 32000
|
||||
capabilities: [text, tools, json, streaming]
|
||||
|
||||
# Tertiary: OpenAI (if API key available)
|
||||
# Tertiary: vLLM (OpenAI-compatible, continuous batching, 3-10x agentic throughput)
|
||||
# Runs on CUDA GPU or CPU. On Apple Silicon, prefer vllm-mlx-local (above).
|
||||
# To enable: start vLLM server:
|
||||
# python -m vllm.entrypoints.openai.api_server \
|
||||
# --model Qwen/Qwen2.5-14B-Instruct --port 8001
|
||||
# Then set enabled: true (or TIMMY_LLM_BACKEND=vllm + VLLM_URL=http://localhost:8001)
|
||||
- name: vllm-local
|
||||
type: vllm
|
||||
enabled: false # Enable when vLLM server is running
|
||||
priority: 3
|
||||
tier: local
|
||||
base_url: "http://localhost:8001/v1"
|
||||
models:
|
||||
- name: Qwen/Qwen2.5-14B-Instruct
|
||||
default: true
|
||||
context_window: 32000
|
||||
capabilities: [text, tools, json, streaming, complex]
|
||||
description: "Qwen2.5-14B on vLLM — continuous batching for agentic workloads"
|
||||
- name: Qwen/Qwen2.5-7B-Instruct
|
||||
context_window: 32000
|
||||
capabilities: [text, tools, json, streaming, routine]
|
||||
description: "Qwen2.5-7B on vLLM — fast model for routine tasks"
|
||||
|
||||
# Quinary: OpenAI (if API key available)
|
||||
- name: openai-backup
|
||||
type: openai
|
||||
enabled: false # Enable by setting OPENAI_API_KEY
|
||||
priority: 3
|
||||
priority: 4
|
||||
tier: standard_cloud
|
||||
api_key: "${OPENAI_API_KEY}" # Loaded from environment
|
||||
base_url: null # Use default OpenAI endpoint
|
||||
@@ -147,12 +170,12 @@ providers:
|
||||
- name: gpt-4o
|
||||
context_window: 128000
|
||||
capabilities: [text, vision, tools, json, streaming]
|
||||
|
||||
# Quaternary: Anthropic (if API key available)
|
||||
|
||||
# Senary: Anthropic (if API key available)
|
||||
- name: anthropic-backup
|
||||
type: anthropic
|
||||
enabled: false # Enable by setting ANTHROPIC_API_KEY
|
||||
priority: 4
|
||||
priority: 5
|
||||
tier: frontier
|
||||
api_key: "${ANTHROPIC_API_KEY}"
|
||||
models:
|
||||
|
||||
@@ -42,6 +42,10 @@ services:
|
||||
GROK_ENABLED: "${GROK_ENABLED:-false}"
|
||||
XAI_API_KEY: "${XAI_API_KEY:-}"
|
||||
GROK_DEFAULT_MODEL: "${GROK_DEFAULT_MODEL:-grok-3-fast}"
|
||||
# vLLM backend — set TIMMY_LLM_BACKEND=vllm to activate
|
||||
TIMMY_LLM_BACKEND: "${TIMMY_LLM_BACKEND:-ollama}"
|
||||
VLLM_URL: "${VLLM_URL:-http://localhost:8001}"
|
||||
VLLM_MODEL: "${VLLM_MODEL:-Qwen/Qwen2.5-14B-Instruct}"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway" # Linux: maps to host IP
|
||||
networks:
|
||||
@@ -74,6 +78,49 @@ services:
|
||||
profiles:
|
||||
- celery
|
||||
|
||||
# ── vLLM — high-throughput inference server (GPU optional) ──────────────
|
||||
# Requires the 'vllm' profile: docker compose --profile vllm up
|
||||
#
|
||||
# GPU (NVIDIA): set VLLM_MODEL and ensure nvidia-container-toolkit is installed.
|
||||
# CPU-only: add --device cpu to VLLM_EXTRA_ARGS (slower, but works anywhere).
|
||||
#
|
||||
# The dashboard reaches vLLM at http://vllm:8001 (inside timmy-net).
|
||||
# Set VLLM_URL=http://vllm:8001 in the dashboard environment when using this service.
|
||||
vllm:
|
||||
image: vllm/vllm-openai:latest
|
||||
container_name: timmy-vllm
|
||||
profiles:
|
||||
- vllm
|
||||
ports:
|
||||
- "8001:8001"
|
||||
environment:
|
||||
# Model to load — override with VLLM_MODEL env var
|
||||
VLLM_MODEL: "${VLLM_MODEL:-Qwen/Qwen2.5-7B-Instruct}"
|
||||
command: >
|
||||
--model ${VLLM_MODEL:-Qwen/Qwen2.5-7B-Instruct}
|
||||
--port 8001
|
||||
--host 0.0.0.0
|
||||
${VLLM_EXTRA_ARGS:-}
|
||||
volumes:
|
||||
- vllm-cache:/root/.cache/huggingface
|
||||
networks:
|
||||
- timmy-net
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
# GPU support — uncomment to enable NVIDIA GPU passthrough
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: all
|
||||
# capabilities: [gpu]
|
||||
|
||||
# ── OpenFang — vendored agent runtime sidecar ────────────────────────────
|
||||
openfang:
|
||||
build:
|
||||
@@ -110,6 +157,8 @@ volumes:
|
||||
device: "${PWD}/data"
|
||||
openfang-data:
|
||||
driver: local
|
||||
vllm-cache:
|
||||
driver: local
|
||||
|
||||
# ── Internal network ────────────────────────────────────────────────────────
|
||||
networks:
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
# Screenshot Dump Triage — Visual Inspiration & Research Leads
|
||||
|
||||
**Date:** March 24, 2026
|
||||
**Source:** Issue #1275 — "Screenshot dump for triage #1"
|
||||
**Analyst:** Claude (Sonnet 4.6)
|
||||
|
||||
---
|
||||
|
||||
## Screenshots Ingested
|
||||
|
||||
| File | Subject | Action |
|
||||
|------|---------|--------|
|
||||
| IMG_6187.jpeg | AirLLM / Apple Silicon local LLM requirements | → Issue #1284 |
|
||||
| IMG_6125.jpeg | vLLM backend for agentic workloads | → Issue #1281 |
|
||||
| IMG_6124.jpeg | DeerFlow autonomous research pipeline | → Issue #1283 |
|
||||
| IMG_6123.jpeg | "Vibe Coder vs Normal Developer" meme | → Issue #1285 |
|
||||
| IMG_6410.jpeg | SearXNG + Crawl4AI self-hosted search MCP | → Issue #1282 |
|
||||
|
||||
---
|
||||
|
||||
## Tickets Created
|
||||
|
||||
### #1281 — feat: add vLLM as alternative inference backend
|
||||
**Source:** IMG_6125 (vLLM for agentic workloads)
|
||||
|
||||
vLLM's continuous batching makes it 3–10x more throughput-efficient than Ollama for multi-agent
|
||||
request patterns. Implement `VllmBackend` in `infrastructure/llm_router/` as a selectable
|
||||
backend (`TIMMY_LLM_BACKEND=vllm`) with graceful fallback to Ollama.
|
||||
|
||||
**Priority:** Medium — impactful for research pipeline performance once #972 is in use
|
||||
|
||||
---
|
||||
|
||||
### #1282 — feat: integrate SearXNG + Crawl4AI as self-hosted search backend
|
||||
**Source:** IMG_6410 (luxiaolei/searxng-crawl4ai-mcp)
|
||||
|
||||
Self-hosted search via SearXNG + Crawl4AI removes the hard dependency on paid search APIs
|
||||
(Brave, Tavily). Add both as Docker Compose services, implement `web_search()` and
|
||||
`scrape_url()` tools in `timmy/tools/`, and register them with the research agent.
|
||||
|
||||
**Priority:** High — unblocks fully local/private operation of research agents
|
||||
|
||||
---
|
||||
|
||||
### #1283 — research: evaluate DeerFlow as autonomous research orchestration layer
|
||||
**Source:** IMG_6124 (deer-flow Docker setup)
|
||||
|
||||
DeerFlow is ByteDance's open-source autonomous research pipeline framework. Before investing
|
||||
further in Timmy's custom orchestrator (#972), evaluate whether DeerFlow's architecture offers
|
||||
integration value or design patterns worth borrowing.
|
||||
|
||||
**Priority:** Medium — research first, implementation follows if go/no-go is positive
|
||||
|
||||
---
|
||||
|
||||
### #1284 — chore: document and validate AirLLM Apple Silicon requirements
|
||||
**Source:** IMG_6187 (Mac-compatible LLM setup)
|
||||
|
||||
AirLLM graceful degradation is already implemented but undocumented. Add System Requirements
|
||||
to README (M1/M2/M3/M4, 16 GB RAM min, 15 GB disk) and document `TIMMY_LLM_BACKEND` in
|
||||
`.env.example`.
|
||||
|
||||
**Priority:** Low — documentation only, no code risk
|
||||
|
||||
---
|
||||
|
||||
### #1285 — chore: enforce "Normal Developer" discipline — tighten quality gates
|
||||
**Source:** IMG_6123 (Vibe Coder vs Normal Developer meme)
|
||||
|
||||
Tighten the existing mypy/bandit/coverage gates: fix all mypy errors, raise coverage from 73%
|
||||
to 80%, add a documented pre-push hook, and run `vulture` for dead code. The infrastructure
|
||||
exists — it just needs enforcing.
|
||||
|
||||
**Priority:** Medium — technical debt prevention, pairs well with any green-field feature work
|
||||
|
||||
---
|
||||
|
||||
## Patterns Observed Across Screenshots
|
||||
|
||||
1. **Local-first is the north star.** All five images reinforce the same theme: private,
|
||||
self-hosted, runs on your hardware. vLLM, SearXNG, AirLLM, DeerFlow — none require cloud.
|
||||
Timmy is already aligned with this direction; these are tactical additions.
|
||||
|
||||
2. **Agentic performance bottlenecks are real.** Two of five images (vLLM, DeerFlow) focus
|
||||
specifically on throughput and reliability for multi-agent loops. As the research pipeline
|
||||
matures, inference speed and search reliability will become the main constraints.
|
||||
|
||||
3. **Discipline compounds.** The meme is a reminder that the quality gates we have (tox,
|
||||
mypy, bandit, coverage) only pay off if they are enforced without exceptions.
|
||||
@@ -1,290 +0,0 @@
|
||||
# Building Timmy: Technical Blueprint for Sovereign Creative AI
|
||||
|
||||
> **Source:** PDF attached to issue #891, "Building Timmy: a technical blueprint for sovereign
|
||||
> creative AI" — generated by Kimi.ai, 16 pages, filed by Perplexity for Timmy's review.
|
||||
> **Filed:** 2026-03-22 · **Reviewed:** 2026-03-23
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The blueprint establishes that a sovereign creative AI capable of coding, composing music,
|
||||
generating art, building worlds, publishing narratives, and managing its own economy is
|
||||
**technically feasible today** — but only through orchestration of dozens of tools operating
|
||||
at different maturity levels. The core insight: *the integration is the invention*. No single
|
||||
component is new; the missing piece is a coherent identity operating across all domains
|
||||
simultaneously with persistent memory, autonomous economics, and cross-domain creative
|
||||
reactions.
|
||||
|
||||
Three non-negotiable architectural decisions:
|
||||
1. **Human oversight for all public-facing content** — every successful creative AI has this;
|
||||
every one that removed it failed.
|
||||
2. **Legal entity before economic activity** — AI agents are not legal persons; establish
|
||||
structure before wealth accumulates (Truth Terminal cautionary tale: $20M acquired before
|
||||
a foundation was retroactively created).
|
||||
3. **Hybrid memory: vector search + knowledge graph** — neither alone is sufficient for
|
||||
multi-domain context breadth.
|
||||
|
||||
---
|
||||
|
||||
## Domain-by-Domain Assessment
|
||||
|
||||
### Software Development (immediately deployable)
|
||||
|
||||
| Component | Recommendation | Notes |
|
||||
|-----------|----------------|-------|
|
||||
| Primary agent | Claude Code (Opus 4.6, 77.2% SWE-bench) | Already in use |
|
||||
| Self-hosted forge | Forgejo (MIT, 170–200MB RAM) | Project uses Gitea/Forgejo now |
|
||||
| CI/CD | GitHub Actions-compatible via `act_runner` | — |
|
||||
| Tool-making | LATM pattern: frontier model creates tools, cheaper model applies them | New — see ADR opportunity |
|
||||
| Open-source fallback | OpenHands (~65% SWE-bench, Docker sandboxed) | Backup to Claude Code |
|
||||
| Self-improvement | Darwin Gödel Machine / SICA patterns | 3–6 month investment |
|
||||
|
||||
**Development estimate:** 2–3 weeks for Forgejo + Claude Code integration with automated
|
||||
PR workflows; 1–2 months for self-improving tool-making pipeline.
|
||||
|
||||
**Cross-reference:** This project already runs Claude Code agents on Forgejo. The LATM
|
||||
pattern (tool registry) and self-improvement loop are the actionable gaps.
|
||||
|
||||
---
|
||||
|
||||
### Music (1–4 weeks)
|
||||
|
||||
| Component | Recommendation | Notes |
|
||||
|-----------|----------------|-------|
|
||||
| Commercial vocals | Suno v5 API (~$0.03/song, $30/month Premier) | No official API; third-party: sunoapi.org, AIMLAPI, EvoLink |
|
||||
| Local instrumental | MusicGen 1.5B (CC-BY-NC — monetization blocker) | On M2 Max: ~60s for 5s clip |
|
||||
| Voice cloning | GPT-SoVITS v4 (MIT) | Works on Apple Silicon CPU, RTF 0.526 on M4 |
|
||||
| Voice conversion | RVC (MIT, 5–10 min training audio) | — |
|
||||
| Apple Silicon TTS | MLX-Audio: Kokoro 82M + Qwen3-TTS 0.6B | 4–5x faster via Metal |
|
||||
| Publishing | Wavlake (90/10 split, Lightning micropayments) | Auto-syndicates to Fountain.fm |
|
||||
| Nostr | NIP-94 (kind:1063) audio events → NIP-96 servers | — |
|
||||
|
||||
**Copyright reality:** US Copyright Office (Jan 2025) and US Court of Appeals (Mar 2025):
|
||||
purely AI-generated music cannot be copyrighted and enters public domain. Wavlake's
|
||||
Value4Value model works around this — fans pay for relationship, not exclusive rights.
|
||||
|
||||
**Avoid:** Udio (download disabled since Oct 2025, 2.4/5 Trustpilot).
|
||||
|
||||
---
|
||||
|
||||
### Visual Art (1–3 weeks)
|
||||
|
||||
| Component | Recommendation | Notes |
|
||||
|-----------|----------------|-------|
|
||||
| Local generation | ComfyUI API at `127.0.0.1:8188` (programmatic control via WebSocket) | MLX extension: 50–70% faster |
|
||||
| Speed | Draw Things (free, Mac App Store) | 3× faster than ComfyUI via Metal shaders |
|
||||
| Quality frontier | Flux 2 (Nov 2025, 4MP, multi-reference) | SDXL needs 16GB+, Flux Dev 32GB+ |
|
||||
| Character consistency | LoRA training (30 min, 15–30 references) + Flux.1 Kontext | Solved problem |
|
||||
| Face consistency | IP-Adapter + FaceID (ComfyUI-IP-Adapter-Plus) | Training-free |
|
||||
| Comics | Jenova AI ($20/month, 200+ page consistency) or LlamaGen AI (free) | — |
|
||||
| Publishing | Blossom protocol (SHA-256 addressed, kind:10063) + Nostr NIP-94 | — |
|
||||
| Physical | Printful REST API (200+ products, automated fulfillment) | — |
|
||||
|
||||
---
|
||||
|
||||
### Writing / Narrative (1–4 weeks for pipeline; ongoing for quality)
|
||||
|
||||
| Component | Recommendation | Notes |
|
||||
|-----------|----------------|-------|
|
||||
| LLM | Claude Opus 4.5/4.6 (leads Mazur Writing Benchmark at 8.561) | Already in use |
|
||||
| Context | 500K tokens (1M in beta) — entire novels fit | — |
|
||||
| Architecture | Outline-first → RAG lore bible → chapter-by-chapter generation | Without outline: novels meander |
|
||||
| Lore management | WorldAnvil Pro or custom LoreScribe (local RAG) | No tool achieves 100% consistency |
|
||||
| Publishing (ebooks) | Pandoc → EPUB / KDP PDF | pandoc-novel template on GitHub |
|
||||
| Publishing (print) | Lulu Press REST API (80% profit, global print network) | KDP: no official API, 3-book/day limit |
|
||||
| Publishing (Nostr) | NIP-23 kind:30023 long-form events | Habla.news, YakiHonne, Stacker News |
|
||||
| Podcasts | LLM script → TTS (ElevenLabs or local Kokoro/MLX-Audio) → feedgen RSS → Fountain.fm | Value4Value sats-per-minute |
|
||||
|
||||
**Key constraint:** AI-assisted (human directs, AI drafts) = 40% faster. Fully autonomous
|
||||
without editing = "generic, soulless prose" and character drift by chapter 3 without explicit
|
||||
memory.
|
||||
|
||||
---
|
||||
|
||||
### World Building / Games (2 weeks–3 months depending on target)
|
||||
|
||||
| Component | Recommendation | Notes |
|
||||
|-----------|----------------|-------|
|
||||
| Algorithms | Wave Function Collapse, Perlin noise (FastNoiseLite in Godot 4), L-systems | All mature |
|
||||
| Platform | Godot Engine + gd-agentic-skills (82+ skills, 26 genre blueprints) | Strong LLM/GDScript knowledge |
|
||||
| Narrative design | Knowledge graph (world state) + LLM + quest template grammar | CHI 2023 validated |
|
||||
| Quick win | Luanti/Minetest (Lua API, 2,800+ open mods for reference) | Immediately feasible |
|
||||
| Medium effort | OpenMW content creation (omwaddon format engineering required) | 2–3 months |
|
||||
| Future | Unity MCP (AI direct Unity Editor interaction) | Early-stage |
|
||||
|
||||
---
|
||||
|
||||
### Identity Architecture (2 months)
|
||||
|
||||
The blueprint formalizes the **SOUL.md standard** (GitHub: aaronjmars/soul.md):
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `SOUL.md` | Who you are — identity, worldview, opinions |
|
||||
| `STYLE.md` | How you write — voice, syntax, patterns |
|
||||
| `SKILL.md` | Operating modes |
|
||||
| `MEMORY.md` | Session continuity |
|
||||
|
||||
**Critical decision — static vs self-modifying identity:**
|
||||
- Static Core Truths (version-controlled, human-approved changes only) ✓
|
||||
- Self-modifying Learned Preferences (logged with rollback, monitored by guardian) ✓
|
||||
- **Warning:** OpenClaw's "Soul Evolution" creates a security attack surface — Zenity Labs
|
||||
demonstrated a complete zero-click attack chain targeting SOUL.md files.
|
||||
|
||||
**Relevance to this repo:** Claude Code agents already use a `MEMORY.md` pattern in
|
||||
this project. The SOUL.md stack is a natural extension.
|
||||
|
||||
---
|
||||
|
||||
### Memory Architecture (2 months)
|
||||
|
||||
Hybrid vector + knowledge graph is the recommendation:
|
||||
|
||||
| Component | Tool | Notes |
|
||||
|-----------|------|-------|
|
||||
| Vector + KG combined | Mem0 (mem0.ai) | 26% accuracy improvement over OpenAI memory, 91% lower p95 latency, 90% token savings |
|
||||
| Vector store | Qdrant (Rust, open-source) | High-throughput with metadata filtering |
|
||||
| Temporal KG | Neo4j + Graphiti (Zep AI) | P95 retrieval: 300ms, hybrid semantic + BM25 + graph |
|
||||
| Backup/migration | AgentKeeper (95% critical fact recovery across model migrations) | — |
|
||||
|
||||
**Journal pattern (Stanford Generative Agents):** Agent writes about experiences, generates
|
||||
high-level reflections 2–3x/day when importance scores exceed threshold. Ablation studies:
|
||||
removing any component (observation, planning, reflection) significantly reduces behavioral
|
||||
believability.
|
||||
|
||||
**Cross-reference:** The existing `brain/` package is the memory system. Qdrant and
|
||||
Mem0 are the recommended upgrade targets.
|
||||
|
||||
---
|
||||
|
||||
### Multi-Agent Sub-System (3–6 months)
|
||||
|
||||
The blueprint describes a named sub-agent hierarchy:
|
||||
|
||||
| Agent | Role |
|
||||
|-------|------|
|
||||
| Oracle | Top-level planner / supervisor |
|
||||
| Sentinel | Safety / moderation |
|
||||
| Scout | Research / information gathering |
|
||||
| Scribe | Writing / narrative |
|
||||
| Ledger | Economic management |
|
||||
| Weaver | Visual art generation |
|
||||
| Composer | Music generation |
|
||||
| Social | Platform publishing |
|
||||
|
||||
**Orchestration options:**
|
||||
- **Agno** (already in use) — microsecond instantiation, 50× less memory than LangGraph
|
||||
- **CrewAI Flows** — event-driven with fine-grained control
|
||||
- **LangGraph** — DAG-based with stateful workflows and time-travel debugging
|
||||
|
||||
**Scheduling pattern (Stanford Generative Agents):** Top-down recursive daily → hourly →
|
||||
5-minute planning. Event interrupts for reactive tasks. Re-planning triggers when accumulated
|
||||
importance scores exceed threshold.
|
||||
|
||||
**Cross-reference:** The existing `spark/` package (event capture, advisory engine) aligns
|
||||
with this architecture. `infrastructure/event_bus` is the choreography backbone.
|
||||
|
||||
---
|
||||
|
||||
### Economic Engine (1–4 weeks)
|
||||
|
||||
Lightning Labs released `lightning-agent-tools` (open-source) in February 2026:
|
||||
- `lnget` — CLI HTTP client for L402 payments
|
||||
- Remote signer architecture (private keys on separate machine from agent)
|
||||
- Scoped macaroon credentials (pay-only, invoice-only, read-only roles)
|
||||
- **Aperture** — converts any API to pay-per-use via L402 (HTTP 402)
|
||||
|
||||
| Option | Effort | Notes |
|
||||
|--------|--------|-------|
|
||||
| ln.bot | 1 week | "Bitcoin for AI Agents" — 3 commands create a wallet; CLI + MCP + REST |
|
||||
| LND via gRPC | 2–3 weeks | Full programmatic node management for production |
|
||||
| Coinbase Agentic Wallets | — | Fiat-adjacent; less aligned with sovereignty ethos |
|
||||
|
||||
**Revenue channels:** Wavlake (music, 90/10 Lightning), Nostr zaps (articles), Stacker News
|
||||
(earn sats from engagement), Printful (physical goods), L402-gated API access (pay-per-use
|
||||
services), Geyser.fund (Lightning crowdfunding, better initial runway than micropayments).
|
||||
|
||||
**Cross-reference:** The existing `lightning/` package in this repo is the foundation.
|
||||
L402 paywall endpoints for Timmy's own services is the actionable gap.
|
||||
|
||||
---
|
||||
|
||||
## Pioneer Case Studies
|
||||
|
||||
| Agent | Active | Revenue | Key Lesson |
|
||||
|-------|--------|---------|-----------|
|
||||
| Botto | Since Oct 2021 | $5M+ (art auctions) | Community governance via DAO sustains engagement; "taste model" (humans guide, not direct) preserves autonomous authorship |
|
||||
| Neuro-sama | Since Dec 2022 | $400K+/month (subscriptions) | 3+ years of iteration; errors became entertainment features; 24/7 capability is an insurmountable advantage |
|
||||
| Truth Terminal | Since Jun 2024 | $20M accumulated | Memetic fitness > planned monetization; human gatekeeper approved tweets while selecting AI-intent responses; **establish legal entity first** |
|
||||
| Holly+ | Since 2021 | Conceptual | DAO of stewards for voice governance; "identity play" as alternative to defensive IP |
|
||||
| AI Sponge | 2023 | Banned | Unmoderated content → TOS violations + copyright |
|
||||
| Nothing Forever | 2022–present | 8 viewers | Unmoderated content → ban → audience collapse; novelty-only propositions fail |
|
||||
|
||||
**Universal pattern:** Human oversight + economic incentive alignment + multi-year personality
|
||||
development + platform-native economics = success.
|
||||
|
||||
---
|
||||
|
||||
## Recommended Implementation Sequence
|
||||
|
||||
From the blueprint, mapped against Timmy's existing architecture:
|
||||
|
||||
### Phase 1: Immediate (weeks)
|
||||
1. **Code sovereignty** — Forgejo + Claude Code automated PR workflows (already substantially done)
|
||||
2. **Music pipeline** — Suno API → Wavlake/Nostr NIP-94 publishing
|
||||
3. **Visual art pipeline** — ComfyUI API → Blossom/Nostr with LoRA character consistency
|
||||
4. **Basic Lightning wallet** — ln.bot integration for receiving micropayments
|
||||
5. **Long-form publishing** — Nostr NIP-23 + RSS feed generation
|
||||
|
||||
### Phase 2: Moderate effort (1–3 months)
|
||||
6. **LATM tool registry** — frontier model creates Python utilities, caches them, lighter model applies
|
||||
7. **Event-driven cross-domain reactions** — game event → blog + artwork + music (CrewAI/LangGraph)
|
||||
8. **Podcast generation** — TTS + feedgen → Fountain.fm
|
||||
9. **Self-improving pipeline** — agent creates, tests, caches own Python utilities
|
||||
10. **Comic generation** — character-consistent panels with Jenova AI or local LoRA
|
||||
|
||||
### Phase 3: Significant investment (3–6 months)
|
||||
11. **Full sub-agent hierarchy** — Oracle/Sentinel/Scout/Scribe/Ledger/Weaver with Agno
|
||||
12. **SOUL.md identity system** — bounded evolution + guardian monitoring
|
||||
13. **Hybrid memory upgrade** — Qdrant + Mem0/Graphiti replacing or extending `brain/`
|
||||
14. **Procedural world generation** — Godot + AI-driven narrative (quests, NPCs, lore)
|
||||
15. **Self-sustaining economic loop** — earned revenue covers compute costs
|
||||
|
||||
### Remains aspirational (12+ months)
|
||||
- Fully autonomous novel-length fiction without editorial intervention
|
||||
- YouTube monetization for AI-generated content (tightening platform policies)
|
||||
- Copyright protection for AI-generated works (current US law denies this)
|
||||
- True artistic identity evolution (genuine creative voice vs pattern remixing)
|
||||
- Self-modifying architecture without regression or identity drift
|
||||
|
||||
---
|
||||
|
||||
## Gap Analysis: Blueprint vs Current Codebase
|
||||
|
||||
| Blueprint Capability | Current Status | Gap |
|
||||
|---------------------|----------------|-----|
|
||||
| Code sovereignty | Done (Claude Code + Forgejo) | LATM tool registry |
|
||||
| Music generation | Not started | Suno API integration + Wavlake publishing |
|
||||
| Visual art | Not started | ComfyUI API client + Blossom publishing |
|
||||
| Writing/publishing | Not started | Nostr NIP-23 + Pandoc pipeline |
|
||||
| World building | Bannerlord work (different scope) | Luanti mods as quick win |
|
||||
| Identity (SOUL.md) | Partial (CLAUDE.md + MEMORY.md) | Full SOUL.md stack |
|
||||
| Memory (hybrid) | `brain/` package (SQLite-based) | Qdrant + knowledge graph |
|
||||
| Multi-agent | Agno in use | Named hierarchy + event choreography |
|
||||
| Lightning payments | `lightning/` package | ln.bot wallet + L402 endpoints |
|
||||
| Nostr identity | Referenced in roadmap, not built | NIP-05, NIP-89 capability cards |
|
||||
| Legal entity | Unknown | **Must be resolved before economic activity** |
|
||||
|
||||
---
|
||||
|
||||
## ADR Candidates
|
||||
|
||||
Issues that warrant Architecture Decision Records based on this review:
|
||||
|
||||
1. **LATM tool registry pattern** — How Timmy creates, tests, and caches self-made tools
|
||||
2. **Music generation strategy** — Suno (cloud, commercial quality) vs MusicGen (local, CC-BY-NC)
|
||||
3. **Memory upgrade path** — When/how to migrate `brain/` from SQLite to Qdrant + KG
|
||||
4. **SOUL.md adoption** — Extending existing CLAUDE.md/MEMORY.md to full SOUL.md stack
|
||||
5. **Lightning L402 strategy** — Which services Timmy gates behind micropayments
|
||||
6. **Sub-agent naming and contracts** — Formalizing Oracle/Sentinel/Scout/Scribe/Ledger/Weaver
|
||||
@@ -164,7 +164,3 @@ directory = "htmlcov"
|
||||
|
||||
[tool.coverage.xml]
|
||||
output = "coverage.xml"
|
||||
|
||||
[tool.mypy]
|
||||
ignore_missing_imports = true
|
||||
no_error_summary = true
|
||||
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
@@ -94,8 +94,18 @@ class Settings(BaseSettings):
|
||||
|
||||
# ── Backend selection ────────────────────────────────────────────────────
|
||||
# "ollama" — always use Ollama (default, safe everywhere)
|
||||
# "vllm" — use vLLM inference server (OpenAI-compatible, faster throughput)
|
||||
# "auto" — pick best available local backend, fall back to Ollama
|
||||
timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama"
|
||||
timmy_model_backend: Literal["ollama", "vllm", "grok", "claude", "auto"] = "ollama"
|
||||
|
||||
# ── vLLM backend ──────────────────────────────────────────────────────────
|
||||
# vLLM is an OpenAI-compatible inference server optimised for continuous
|
||||
# batching — 3–10x higher throughput than Ollama for agentic workloads.
|
||||
# Start server: python -m vllm.entrypoints.openai.api_server \
|
||||
# --model Qwen/Qwen2.5-14B-Instruct --port 8001
|
||||
# Then set TIMMY_LLM_BACKEND=vllm (or enable vllm-local in providers.yaml)
|
||||
vllm_url: str = "http://localhost:8001"
|
||||
vllm_model: str = "Qwen/Qwen2.5-14B-Instruct"
|
||||
|
||||
# ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
|
||||
# Grok is a premium augmentation layer — local-first ethos preserved.
|
||||
|
||||
@@ -6,8 +6,6 @@ import sqlite3
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
|
||||
@@ -38,9 +36,9 @@ def _discover_databases() -> list[dict]:
|
||||
return dbs
|
||||
|
||||
|
||||
def _query_database(db_path: str) -> dict[str, Any]:
|
||||
def _query_database(db_path: str) -> dict:
|
||||
"""Open a database read-only and return all tables with their rows."""
|
||||
result: dict[str, Any] = {"tables": {}, "error": None}
|
||||
result = {"tables": {}, "error": None}
|
||||
try:
|
||||
with closing(sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
@@ -124,6 +124,73 @@ async def check_ollama() -> bool:
|
||||
return dep.status == "healthy"
|
||||
|
||||
|
||||
# vLLM health cache (30-second TTL)
|
||||
_vllm_cache: DependencyStatus | None = None
|
||||
_vllm_cache_ts: float = 0.0
|
||||
_VLLM_CACHE_TTL = 30.0
|
||||
|
||||
|
||||
def _check_vllm_sync() -> DependencyStatus:
|
||||
"""Synchronous vLLM check — run via asyncio.to_thread()."""
|
||||
try:
|
||||
import urllib.request
|
||||
|
||||
base_url = settings.vllm_url.rstrip("/")
|
||||
# vLLM exposes /health at the server root (strip /v1 if present)
|
||||
if base_url.endswith("/v1"):
|
||||
base_url = base_url[:-3]
|
||||
req = urllib.request.Request(
|
||||
f"{base_url}/health",
|
||||
method="GET",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=2) as response:
|
||||
if response.status == 200:
|
||||
return DependencyStatus(
|
||||
name="vLLM",
|
||||
status="healthy",
|
||||
sovereignty_score=10,
|
||||
details={"url": settings.vllm_url, "model": settings.vllm_model},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("vLLM health check failed: %s", exc)
|
||||
|
||||
return DependencyStatus(
|
||||
name="vLLM",
|
||||
status="unavailable",
|
||||
sovereignty_score=10,
|
||||
details={"url": settings.vllm_url, "error": "Cannot connect to vLLM server"},
|
||||
)
|
||||
|
||||
|
||||
async def _check_vllm() -> DependencyStatus:
|
||||
"""Check vLLM backend status without blocking the event loop.
|
||||
|
||||
Results are cached for 30 seconds. vLLM is an optional backend;
|
||||
unavailability triggers graceful fallback to Ollama.
|
||||
"""
|
||||
global _vllm_cache, _vllm_cache_ts # noqa: PLW0603
|
||||
|
||||
now = time.monotonic()
|
||||
if _vllm_cache is not None and (now - _vllm_cache_ts) < _VLLM_CACHE_TTL:
|
||||
return _vllm_cache
|
||||
|
||||
try:
|
||||
result = await asyncio.to_thread(_check_vllm_sync)
|
||||
except Exception as exc:
|
||||
logger.debug("vLLM async check failed: %s", exc)
|
||||
result = DependencyStatus(
|
||||
name="vLLM",
|
||||
status="unavailable",
|
||||
sovereignty_score=10,
|
||||
details={"url": settings.vllm_url, "error": "Cannot connect to vLLM server"},
|
||||
)
|
||||
|
||||
_vllm_cache = result
|
||||
_vllm_cache_ts = now
|
||||
return result
|
||||
|
||||
|
||||
def _check_lightning() -> DependencyStatus:
|
||||
"""Check Lightning payment backend status."""
|
||||
return DependencyStatus(
|
||||
@@ -195,13 +262,22 @@ async def health_check():
|
||||
# Legacy format for test compatibility
|
||||
ollama_ok = await check_ollama()
|
||||
|
||||
agent_status = "idle" if ollama_ok else "offline"
|
||||
# Check vLLM only when it is the configured backend (avoid probing unused services)
|
||||
vllm_status: str | None = None
|
||||
if settings.timmy_model_backend == "vllm":
|
||||
vllm_dep = await _check_vllm()
|
||||
vllm_status = "up" if vllm_dep.status == "healthy" else "down"
|
||||
|
||||
inference_ok = vllm_status == "up" if vllm_status is not None else ollama_ok
|
||||
agent_status = "idle" if inference_ok else "offline"
|
||||
|
||||
services: dict = {"ollama": "up" if ollama_ok else "down"}
|
||||
if vllm_status is not None:
|
||||
services["vllm"] = vllm_status
|
||||
|
||||
return {
|
||||
"status": "ok" if ollama_ok else "degraded",
|
||||
"services": {
|
||||
"ollama": "up" if ollama_ok else "down",
|
||||
},
|
||||
"status": "ok" if inference_ok else "degraded",
|
||||
"services": services,
|
||||
"agents": {
|
||||
"agent": {"status": agent_status},
|
||||
},
|
||||
@@ -210,7 +286,7 @@ async def health_check():
|
||||
"version": "2.0.0",
|
||||
"uptime_seconds": uptime,
|
||||
"llm_backend": settings.timmy_model_backend,
|
||||
"llm_model": settings.ollama_model,
|
||||
"llm_model": settings.vllm_model if settings.timmy_model_backend == "vllm" else settings.ollama_model,
|
||||
}
|
||||
|
||||
|
||||
@@ -252,6 +328,9 @@ async def sovereignty_check():
|
||||
_check_lightning(),
|
||||
_check_sqlite(),
|
||||
]
|
||||
# Include vLLM in the audit when it is the active backend
|
||||
if settings.timmy_model_backend == "vllm":
|
||||
dependencies.append(await _check_vllm())
|
||||
|
||||
overall = _calculate_overall_score(dependencies)
|
||||
recommendations = _generate_recommendations(dependencies)
|
||||
|
||||
@@ -137,7 +137,7 @@ class HermesMonitor:
|
||||
message=f"Check error: {r}",
|
||||
)
|
||||
)
|
||||
elif isinstance(r, CheckResult):
|
||||
else:
|
||||
checks.append(r)
|
||||
|
||||
# Compute overall level
|
||||
|
||||
@@ -203,7 +203,7 @@ async def reload_config(
|
||||
@router.get("/history")
|
||||
async def get_history(
|
||||
hours: int = 24,
|
||||
store: Annotated[HealthHistoryStore | None, Depends(get_history_store)] = None,
|
||||
store: Annotated[HealthHistoryStore, Depends(get_history_store)] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get provider health history for the last N hours."""
|
||||
if store is None:
|
||||
|
||||
@@ -331,6 +331,22 @@ class CascadeRouter:
|
||||
logger.debug("vllm-mlx provider check error: %s", exc)
|
||||
return False
|
||||
|
||||
elif provider.type == "vllm":
|
||||
# Check if standard vLLM server is running (OpenAI-compatible API)
|
||||
if requests is None:
|
||||
return True
|
||||
try:
|
||||
base_url = provider.base_url or provider.url or settings.vllm_url
|
||||
# Strip /v1 suffix — health endpoint is at the server root
|
||||
server_root = base_url.rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3]
|
||||
response = requests.get(f"{server_root}/health", timeout=5)
|
||||
return response.status_code == 200
|
||||
except Exception as exc:
|
||||
logger.debug("vllm provider check error: %s", exc)
|
||||
return False
|
||||
|
||||
elif provider.type in ("openai", "anthropic", "grok"):
|
||||
# Check if API key is set
|
||||
return provider.api_key is not None and provider.api_key != ""
|
||||
@@ -744,20 +760,19 @@ class CascadeRouter:
|
||||
self,
|
||||
provider: Provider,
|
||||
messages: list[dict],
|
||||
model: str | None,
|
||||
model: str,
|
||||
temperature: float,
|
||||
max_tokens: int | None,
|
||||
content_type: ContentType = ContentType.TEXT,
|
||||
) -> dict:
|
||||
"""Try a single provider request."""
|
||||
start_time = time.time()
|
||||
effective_model: str = model or provider.get_default_model() or ""
|
||||
|
||||
if provider.type == "ollama":
|
||||
result = await self._call_ollama(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=effective_model,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
content_type=content_type,
|
||||
@@ -766,7 +781,7 @@ class CascadeRouter:
|
||||
result = await self._call_openai(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=effective_model,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
@@ -774,7 +789,7 @@ class CascadeRouter:
|
||||
result = await self._call_anthropic(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=effective_model,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
@@ -782,7 +797,7 @@ class CascadeRouter:
|
||||
result = await self._call_grok(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=effective_model,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
@@ -790,7 +805,15 @@ class CascadeRouter:
|
||||
result = await self._call_vllm_mlx(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=effective_model,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
elif provider.type == "vllm":
|
||||
result = await self._call_vllm(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
@@ -1032,6 +1055,49 @@ class CascadeRouter:
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
async def _call_vllm(
|
||||
self,
|
||||
provider: Provider,
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float,
|
||||
max_tokens: int | None,
|
||||
) -> dict:
|
||||
"""Call a standard vLLM server via its OpenAI-compatible API.
|
||||
|
||||
vLLM exposes the same /v1/chat/completions endpoint as OpenAI.
|
||||
No API key is required for local deployments.
|
||||
|
||||
Default URL comes from settings.vllm_url (VLLM_URL env var).
|
||||
"""
|
||||
import openai
|
||||
|
||||
base_url = provider.base_url or provider.url or settings.vllm_url
|
||||
# Ensure the base_url ends with /v1 as expected by the OpenAI client
|
||||
if not base_url.rstrip("/").endswith("/v1"):
|
||||
base_url = base_url.rstrip("/") + "/v1"
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=provider.api_key or "no-key-required",
|
||||
base_url=base_url,
|
||||
timeout=self.config.timeout_seconds,
|
||||
)
|
||||
|
||||
kwargs: dict = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if max_tokens:
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
response = await client.chat.completions.create(**kwargs)
|
||||
|
||||
return {
|
||||
"content": response.choices[0].message.content,
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
def _record_success(self, provider: Provider, latency_ms: float) -> None:
|
||||
"""Record a successful request."""
|
||||
provider.metrics.total_requests += 1
|
||||
|
||||
20
src/integrations/chat_bridge/vendors/discord.py
vendored
20
src/integrations/chat_bridge/vendors/discord.py
vendored
@@ -474,7 +474,7 @@ class DiscordVendor(ChatPlatform):
|
||||
async def _run_client(self, token: str) -> None:
|
||||
"""Run the discord.py client (blocking call in a task)."""
|
||||
try:
|
||||
await self._client.start(token) # type: ignore[union-attr]
|
||||
await self._client.start(token)
|
||||
except Exception as exc:
|
||||
logger.error("Discord client error: %s", exc)
|
||||
self._state = PlatformState.ERROR
|
||||
@@ -482,32 +482,32 @@ class DiscordVendor(ChatPlatform):
|
||||
def _register_handlers(self) -> None:
|
||||
"""Register Discord event handlers on the client."""
|
||||
|
||||
@self._client.event # type: ignore[union-attr]
|
||||
@self._client.event
|
||||
async def on_ready():
|
||||
self._guild_count = len(self._client.guilds) # type: ignore[union-attr]
|
||||
self._guild_count = len(self._client.guilds)
|
||||
self._state = PlatformState.CONNECTED
|
||||
logger.info(
|
||||
"Discord ready: %s in %d guild(s)",
|
||||
self._client.user, # type: ignore[union-attr]
|
||||
self._client.user,
|
||||
self._guild_count,
|
||||
)
|
||||
|
||||
@self._client.event # type: ignore[union-attr]
|
||||
@self._client.event
|
||||
async def on_message(message):
|
||||
# Ignore our own messages
|
||||
if message.author == self._client.user: # type: ignore[union-attr]
|
||||
if message.author == self._client.user:
|
||||
return
|
||||
|
||||
# Only respond to mentions or DMs
|
||||
is_dm = not hasattr(message.channel, "guild") or message.channel.guild is None
|
||||
is_mention = self._client.user in message.mentions # type: ignore[union-attr]
|
||||
is_mention = self._client.user in message.mentions
|
||||
|
||||
if not is_dm and not is_mention:
|
||||
return
|
||||
|
||||
await self._handle_message(message)
|
||||
|
||||
@self._client.event # type: ignore[union-attr]
|
||||
@self._client.event
|
||||
async def on_disconnect():
|
||||
if self._state != PlatformState.DISCONNECTED:
|
||||
self._state = PlatformState.CONNECTING
|
||||
@@ -535,8 +535,8 @@ class DiscordVendor(ChatPlatform):
|
||||
def _extract_content(self, message) -> str:
|
||||
"""Strip the bot mention and return clean message text."""
|
||||
content = message.content
|
||||
if self._client.user: # type: ignore[union-attr]
|
||||
content = content.replace(f"<@{self._client.user.id}>", "").strip() # type: ignore[union-attr]
|
||||
if self._client.user:
|
||||
content = content.replace(f"<@{self._client.user.id}>", "").strip()
|
||||
return content
|
||||
|
||||
async def _invoke_agent(self, content: str, session_id: str, target):
|
||||
|
||||
@@ -102,14 +102,14 @@ class TelegramBot:
|
||||
self._token = tok
|
||||
self._app = Application.builder().token(tok).build()
|
||||
|
||||
self._app.add_handler(CommandHandler("start", self._cmd_start)) # type: ignore[union-attr]
|
||||
self._app.add_handler( # type: ignore[union-attr]
|
||||
self._app.add_handler(CommandHandler("start", self._cmd_start))
|
||||
self._app.add_handler(
|
||||
MessageHandler(filters.TEXT & ~filters.COMMAND, self._handle_message)
|
||||
)
|
||||
|
||||
await self._app.initialize() # type: ignore[union-attr]
|
||||
await self._app.start() # type: ignore[union-attr]
|
||||
await self._app.updater.start_polling(allowed_updates=Update.ALL_TYPES) # type: ignore[union-attr]
|
||||
await self._app.initialize()
|
||||
await self._app.start()
|
||||
await self._app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
|
||||
|
||||
self._running = True
|
||||
logger.info("Telegram bot started.")
|
||||
|
||||
@@ -245,7 +245,6 @@ class VoiceLoop:
|
||||
def _transcribe(self, audio: np.ndarray) -> str:
|
||||
"""Transcribe audio using local Whisper model."""
|
||||
self._load_whisper()
|
||||
assert self._whisper_model is not None, "Whisper model failed to load"
|
||||
|
||||
sys.stdout.write(" 🧠 Transcribing...\r")
|
||||
sys.stdout.flush()
|
||||
|
||||
411
tests/infrastructure/test_vllm_backend.py
Normal file
411
tests/infrastructure/test_vllm_backend.py
Normal file
@@ -0,0 +1,411 @@
|
||||
"""Unit tests for the vLLM inference backend (issue #1281).
|
||||
|
||||
Covers:
|
||||
- vllm provider type in CascadeRouter availability check
|
||||
- _call_vllm method (mocked OpenAI client)
|
||||
- providers.yaml loads vllm-local entry
|
||||
- vLLM health check helpers in dashboard routes
|
||||
- config.py has vllm backend option
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from infrastructure.router.cascade import CascadeRouter, Provider, ProviderStatus
|
||||
|
||||
|
||||
# ── Provider availability checks ────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestVllmProviderAvailability:
|
||||
"""Test _check_provider_available for vllm provider type."""
|
||||
|
||||
def _make_vllm_provider(self, url: str = "http://localhost:8001/v1") -> Provider:
|
||||
return Provider(
|
||||
name="vllm-local",
|
||||
type="vllm",
|
||||
enabled=True,
|
||||
priority=3,
|
||||
base_url=url,
|
||||
models=[{"name": "Qwen/Qwen2.5-14B-Instruct", "default": True}],
|
||||
)
|
||||
|
||||
def test_available_when_health_200(self, tmp_path):
|
||||
"""Provider is available when /health returns 200."""
|
||||
provider = self._make_vllm_provider()
|
||||
router = CascadeRouter(config_path=tmp_path / "none.yaml")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
with patch("infrastructure.router.cascade.requests") as mock_requests:
|
||||
mock_requests.get.return_value = mock_response
|
||||
available = router._check_provider_available(provider)
|
||||
|
||||
assert available is True
|
||||
# Verify the health endpoint was called (root, not /v1)
|
||||
call_args = mock_requests.get.call_args[0][0]
|
||||
assert call_args.endswith("/health")
|
||||
assert "/v1" not in call_args
|
||||
|
||||
def test_unavailable_when_health_non_200(self, tmp_path):
|
||||
"""Provider is unavailable when /health returns non-200."""
|
||||
provider = self._make_vllm_provider()
|
||||
router = CascadeRouter(config_path=tmp_path / "none.yaml")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 503
|
||||
|
||||
with patch("infrastructure.router.cascade.requests") as mock_requests:
|
||||
mock_requests.get.return_value = mock_response
|
||||
available = router._check_provider_available(provider)
|
||||
|
||||
assert available is False
|
||||
|
||||
def test_unavailable_on_connection_error(self, tmp_path):
|
||||
"""Provider is unavailable when connection fails."""
|
||||
provider = self._make_vllm_provider()
|
||||
router = CascadeRouter(config_path=tmp_path / "none.yaml")
|
||||
|
||||
with patch("infrastructure.router.cascade.requests") as mock_requests:
|
||||
mock_requests.get.side_effect = ConnectionError("refused")
|
||||
available = router._check_provider_available(provider)
|
||||
|
||||
assert available is False
|
||||
|
||||
def test_strips_v1_suffix_for_health_check(self, tmp_path):
|
||||
"""Health check URL strips /v1 before appending /health."""
|
||||
provider = self._make_vllm_provider(url="http://localhost:8001/v1")
|
||||
router = CascadeRouter(config_path=tmp_path / "none.yaml")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
with patch("infrastructure.router.cascade.requests") as mock_requests:
|
||||
mock_requests.get.return_value = mock_response
|
||||
router._check_provider_available(provider)
|
||||
|
||||
called_url = mock_requests.get.call_args[0][0]
|
||||
assert called_url == "http://localhost:8001/health"
|
||||
|
||||
def test_assumes_available_when_requests_none(self, tmp_path):
|
||||
"""Gracefully assumes available when requests library is absent."""
|
||||
provider = self._make_vllm_provider()
|
||||
router = CascadeRouter(config_path=tmp_path / "none.yaml")
|
||||
|
||||
with patch("infrastructure.router.cascade.requests", None):
|
||||
available = router._check_provider_available(provider)
|
||||
|
||||
assert available is True
|
||||
|
||||
|
||||
# ── _call_vllm method ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCallVllm:
|
||||
"""Test CascadeRouter._call_vllm."""
|
||||
|
||||
def _make_router(self, tmp_path: Path) -> CascadeRouter:
|
||||
return CascadeRouter(config_path=tmp_path / "none.yaml")
|
||||
|
||||
def _make_provider(self, base_url: str = "http://localhost:8001") -> Provider:
|
||||
return Provider(
|
||||
name="vllm-local",
|
||||
type="vllm",
|
||||
enabled=True,
|
||||
priority=3,
|
||||
base_url=base_url,
|
||||
models=[{"name": "Qwen/Qwen2.5-14B-Instruct", "default": True}],
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_content_and_model(self, tmp_path):
|
||||
"""_call_vllm returns content and model name from API response."""
|
||||
router = self._make_router(tmp_path)
|
||||
provider = self._make_provider()
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "Hello from vLLM!"
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.model = "Qwen/Qwen2.5-14B-Instruct"
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("openai.AsyncOpenAI", return_value=mock_client):
|
||||
result = await router._call_vllm(
|
||||
provider=provider,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
model="Qwen/Qwen2.5-14B-Instruct",
|
||||
temperature=0.7,
|
||||
max_tokens=None,
|
||||
)
|
||||
|
||||
assert result["content"] == "Hello from vLLM!"
|
||||
assert result["model"] == "Qwen/Qwen2.5-14B-Instruct"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_appends_v1_to_base_url(self, tmp_path):
|
||||
"""_call_vllm always points the OpenAI client at base_url/v1."""
|
||||
router = self._make_router(tmp_path)
|
||||
provider = self._make_provider(base_url="http://localhost:8001")
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "ok"
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.model = "model"
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("openai.AsyncOpenAI", return_value=mock_client) as mock_openai:
|
||||
await router._call_vllm(
|
||||
provider=provider,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
model="model",
|
||||
temperature=0.0,
|
||||
max_tokens=None,
|
||||
)
|
||||
_, kwargs = mock_openai.call_args
|
||||
assert kwargs["base_url"].endswith("/v1")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_does_not_double_v1(self, tmp_path):
|
||||
"""_call_vllm does not append /v1 if base_url already ends with it."""
|
||||
router = self._make_router(tmp_path)
|
||||
provider = self._make_provider(base_url="http://localhost:8001/v1")
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "ok"
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.model = "model"
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("openai.AsyncOpenAI", return_value=mock_client) as mock_openai:
|
||||
await router._call_vllm(
|
||||
provider=provider,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
model="model",
|
||||
temperature=0.0,
|
||||
max_tokens=None,
|
||||
)
|
||||
_, kwargs = mock_openai.call_args
|
||||
assert kwargs["base_url"] == "http://localhost:8001/v1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_tokens_passed_when_set(self, tmp_path):
|
||||
"""max_tokens is forwarded to the API when provided."""
|
||||
router = self._make_router(tmp_path)
|
||||
provider = self._make_provider()
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "ok"
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.model = "model"
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("openai.AsyncOpenAI", return_value=mock_client):
|
||||
await router._call_vllm(
|
||||
provider=provider,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
model="model",
|
||||
temperature=0.0,
|
||||
max_tokens=256,
|
||||
)
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert call_kwargs.get("max_tokens") == 256
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_tokens_omitted_when_none(self, tmp_path):
|
||||
"""max_tokens key is absent when not provided."""
|
||||
router = self._make_router(tmp_path)
|
||||
provider = self._make_provider()
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "ok"
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.model = "model"
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
with patch("openai.AsyncOpenAI", return_value=mock_client):
|
||||
await router._call_vllm(
|
||||
provider=provider,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
model="model",
|
||||
temperature=0.0,
|
||||
max_tokens=None,
|
||||
)
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert "max_tokens" not in call_kwargs
|
||||
|
||||
|
||||
# ── providers.yaml loads vllm-local ─────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestProvidersYamlVllm:
|
||||
"""Verify providers.yaml contains a valid vllm-local entry."""
|
||||
|
||||
def test_vllm_local_entry_exists(self):
|
||||
"""providers.yaml has a vllm-local provider of type vllm."""
|
||||
config_path = Path(__file__).parents[2] / "config" / "providers.yaml"
|
||||
assert config_path.exists(), "config/providers.yaml not found"
|
||||
|
||||
with config_path.open() as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
providers = config.get("providers", [])
|
||||
vllm_providers = [p for p in providers if p.get("type") == "vllm"]
|
||||
assert vllm_providers, "No provider with type=vllm found in providers.yaml"
|
||||
|
||||
vllm_local = next((p for p in vllm_providers if p["name"] == "vllm-local"), None)
|
||||
assert vllm_local is not None, "vllm-local provider not found in providers.yaml"
|
||||
|
||||
def test_vllm_local_disabled_by_default(self):
|
||||
"""vllm-local is disabled by default so the router stays on Ollama."""
|
||||
config_path = Path(__file__).parents[2] / "config" / "providers.yaml"
|
||||
with config_path.open() as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
providers = config.get("providers", [])
|
||||
vllm_local = next((p for p in providers if p.get("name") == "vllm-local"), None)
|
||||
assert vllm_local is not None
|
||||
assert vllm_local.get("enabled") is False, "vllm-local should be disabled by default"
|
||||
|
||||
def test_vllm_local_has_default_model(self):
|
||||
"""vllm-local has at least one model with a context window."""
|
||||
config_path = Path(__file__).parents[2] / "config" / "providers.yaml"
|
||||
with config_path.open() as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
providers = config.get("providers", [])
|
||||
vllm_local = next((p for p in providers if p.get("name") == "vllm-local"), None)
|
||||
assert vllm_local is not None
|
||||
|
||||
models = vllm_local.get("models", [])
|
||||
assert models, "vllm-local must declare at least one model"
|
||||
default_models = [m for m in models if m.get("default")]
|
||||
assert default_models, "vllm-local must have a model marked default: true"
|
||||
|
||||
|
||||
# ── config.py backend option ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestConfigVllmBackend:
|
||||
"""Verify config.py exposes the vllm backend option."""
|
||||
|
||||
def test_vllm_is_valid_backend(self):
|
||||
"""timmy_model_backend accepts 'vllm' without validation errors."""
|
||||
from config import Settings
|
||||
|
||||
s = Settings(timmy_model_backend="vllm")
|
||||
assert s.timmy_model_backend == "vllm"
|
||||
|
||||
def test_vllm_url_default(self):
|
||||
"""vllm_url has a sensible default."""
|
||||
from config import Settings
|
||||
|
||||
s = Settings()
|
||||
assert s.vllm_url.startswith("http://")
|
||||
|
||||
def test_vllm_model_default(self):
|
||||
"""vllm_model has a sensible default."""
|
||||
from config import Settings
|
||||
|
||||
s = Settings()
|
||||
assert s.vllm_model # non-empty string
|
||||
|
||||
|
||||
# ── Health check helpers ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestVllmHealthCheck:
|
||||
"""Test _check_vllm_sync and _check_vllm."""
|
||||
|
||||
def test_sync_returns_healthy_on_200(self):
|
||||
"""_check_vllm_sync returns 'healthy' when server responds 200."""
|
||||
import urllib.request
|
||||
|
||||
from dashboard.routes.health import _check_vllm_sync
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status = 200
|
||||
mock_response.__enter__ = lambda s: s
|
||||
mock_response.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch.object(urllib.request, "urlopen", return_value=mock_response):
|
||||
result = _check_vllm_sync()
|
||||
|
||||
assert result.status == "healthy"
|
||||
assert result.name == "vLLM"
|
||||
|
||||
def test_sync_returns_unavailable_on_connection_error(self):
|
||||
"""_check_vllm_sync returns 'unavailable' when server is unreachable."""
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from dashboard.routes.health import _check_vllm_sync
|
||||
|
||||
with patch.object(urllib.request, "urlopen", side_effect=urllib.error.URLError("refused")):
|
||||
result = _check_vllm_sync()
|
||||
|
||||
assert result.status == "unavailable"
|
||||
assert result.name == "vLLM"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_caches_result(self):
|
||||
"""_check_vllm caches the result for _VLLM_CACHE_TTL seconds."""
|
||||
import dashboard.routes.health as health_module
|
||||
from dashboard.routes.health import _check_vllm
|
||||
|
||||
# Reset cache
|
||||
health_module._vllm_cache = None
|
||||
health_module._vllm_cache_ts = 0.0
|
||||
|
||||
mock_dep = MagicMock()
|
||||
mock_dep.status = "healthy"
|
||||
|
||||
with patch("dashboard.routes.health._check_vllm_sync", return_value=mock_dep):
|
||||
result1 = await _check_vllm()
|
||||
result2 = await _check_vllm() # should hit cache
|
||||
|
||||
assert result1 is result2 # same object returned from cache
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_refreshes_after_ttl(self):
|
||||
"""_check_vllm refreshes the cache after the TTL expires."""
|
||||
import dashboard.routes.health as health_module
|
||||
from dashboard.routes.health import _VLLM_CACHE_TTL, _check_vllm
|
||||
|
||||
# Expire the cache
|
||||
health_module._vllm_cache = None
|
||||
health_module._vllm_cache_ts = time.monotonic() - _VLLM_CACHE_TTL - 1
|
||||
|
||||
mock_dep = MagicMock()
|
||||
mock_dep.status = "unavailable"
|
||||
|
||||
with patch("dashboard.routes.health._check_vllm_sync", return_value=mock_dep) as mock_fn:
|
||||
await _check_vllm()
|
||||
|
||||
mock_fn.assert_called_once()
|
||||
10
tox.ini
10
tox.ini
@@ -41,10 +41,8 @@ description = Static type checking with mypy
|
||||
commands_pre =
|
||||
deps =
|
||||
mypy>=1.0.0
|
||||
types-PyYAML
|
||||
types-requests
|
||||
commands =
|
||||
mypy src
|
||||
mypy src --ignore-missing-imports --no-error-summary
|
||||
|
||||
# ── Test Environments ────────────────────────────────────────────────────────
|
||||
|
||||
@@ -132,17 +130,13 @@ commands =
|
||||
# ── Pre-push (mirrors CI exactly) ────────────────────────────────────────────
|
||||
|
||||
[testenv:pre-push]
|
||||
description = Local gate — lint + typecheck + full CI suite (same as Gitea Actions)
|
||||
description = Local gate — lint + full CI suite (same as Gitea Actions)
|
||||
deps =
|
||||
ruff>=0.8.0
|
||||
mypy>=1.0.0
|
||||
types-PyYAML
|
||||
types-requests
|
||||
commands =
|
||||
ruff check src/ tests/
|
||||
ruff format --check src/ tests/
|
||||
bash -c 'files=$(grep -rl "<style" src/dashboard/templates/ --include="*.html" 2>/dev/null); if [ -n "$files" ]; then echo "ERROR: inline <style> blocks found — move CSS to static/css/mission-control.css:"; echo "$files"; exit 1; fi; echo "No inline CSS — OK"'
|
||||
mypy src
|
||||
mkdir -p reports
|
||||
pytest tests/ \
|
||||
--cov=src \
|
||||
|
||||
Reference in New Issue
Block a user