Compare commits
104 Commits
kimi/issue
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
45344ca5af | ||
| 510d890eb2 | |||
| 852fec3681 | |||
| 19dbdec314 | |||
| 3c6a1659d2 | |||
| 62e7cfeffb | |||
| efb09932ce | |||
| f2a277f7b5 | |||
| 7fdd532260 | |||
| 48f667c76b | |||
| e482337e50 | |||
| b5a65b9d10 | |||
| 43030b7db2 | |||
| ab36149fa5 | |||
| 6a674bf9e0 | |||
| df7358b383 | |||
| af0963a8c7 | |||
| dd65586b5e | |||
| 7f875398fc | |||
| fc53a33361 | |||
| 1697e55cdb | |||
| 092c982341 | |||
| 45bde4df58 | |||
| c0f6ca9fc2 | |||
| 9656a5e0d0 | |||
|
|
e35a23cefa | ||
|
|
3ab180b8a7 | ||
| e24f49e58d | |||
| 1fa5cff5dc | |||
| e255e7eb2a | |||
| c3b6eb71c0 | |||
| bebbe442b4 | |||
| 77a8fc8b96 | |||
| a3009fa32b | |||
| 447e2b18c2 | |||
| 17ffd9287a | |||
| 5b569af383 | |||
| e4864b14f2 | |||
| e99b09f700 | |||
| 2ab6539564 | |||
| 28b8673584 | |||
| 2f15435fed | |||
| dfe40f5fe6 | |||
| 6dd48685e7 | |||
| a95cf806c8 | |||
| 19367d6e41 | |||
| 7e983fcdb3 | |||
| 46f89d59db | |||
| e3a0f1d2d6 | |||
| 2a9d21cea1 | |||
| 05b87c3ac1 | |||
| 8276279775 | |||
| d1f5c2714b | |||
| 65df56414a | |||
| b08ce53bab | |||
| e0660bf768 | |||
| dc9f0c04eb | |||
| 815933953c | |||
| d54493a87b | |||
| f7404f67ec | |||
| 5f4580f98d | |||
| 695d1401fd | |||
| ddadc95e55 | |||
| 8fc8e0fc3d | |||
| ada0774ca6 | |||
| 2a7b6d5708 | |||
| 9d4ac8e7cc | |||
| c9601ba32c | |||
| 646eaefa3e | |||
| 2fa5b23c0c | |||
| 9b57774282 | |||
| 62bde03f9e | |||
| 3474eeb4eb | |||
| e92e151dc3 | |||
| 1f1bc222e4 | |||
| cc30bdb391 | |||
| 6f0863b587 | |||
| e3d425483d | |||
| c9445e3056 | |||
| 11cd2e3372 | |||
| 9d0f5c778e | |||
| d2a5866650 | |||
| 2381d0b6d0 | |||
| 03ad2027a4 | |||
| 2bfc44ea1b | |||
| fe1fa78ef1 | |||
| 3c46a1b202 | |||
| 001358c64f | |||
| faad0726a2 | |||
| dd4410fe57 | |||
| ef7f31070b | |||
| 6f66670396 | |||
| 4cdd82818b | |||
| 99ad672e4d | |||
| a3f61c67d3 | |||
| 32dbdc68c8 | |||
| 84302aedac | |||
| 2c217104db | |||
| 7452e8a4f0 | |||
| 9732c80892 | |||
| f3b3d1e648 | |||
| 4ba8d25749 | |||
| 2622f0a0fb | |||
| e3d60b89a9 |
15
.github/workflows/tests.yml
vendored
15
.github/workflows/tests.yml
vendored
@@ -50,6 +50,7 @@ jobs:
|
||||
run: pip install tox
|
||||
|
||||
- name: Run tests (via tox)
|
||||
id: tests
|
||||
run: tox -e ci
|
||||
|
||||
# Posts a check annotation + PR comment showing pass/fail counts.
|
||||
@@ -63,6 +64,20 @@ jobs:
|
||||
comment_title: "Test Results"
|
||||
report_individual_runs: true
|
||||
|
||||
- name: Enforce coverage floor (60%)
|
||||
if: always() && steps.tests.outcome == 'success'
|
||||
run: |
|
||||
python -c "
|
||||
import xml.etree.ElementTree as ET, sys
|
||||
tree = ET.parse('reports/coverage.xml')
|
||||
rate = float(tree.getroot().attrib['line-rate']) * 100
|
||||
print(f'Coverage: {rate:.1f}%')
|
||||
if rate < 60:
|
||||
print(f'FAIL: Coverage {rate:.1f}% is below 60% floor')
|
||||
sys.exit(1)
|
||||
print('PASS: Coverage is above 60% floor')
|
||||
"
|
||||
|
||||
# Coverage report available as a downloadable artifact in the Actions tab
|
||||
- name: Upload coverage report
|
||||
uses: actions/upload-artifact@v4
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -73,7 +73,6 @@ morning_briefing.txt
|
||||
markdown_report.md
|
||||
data/timmy_soul.jsonl
|
||||
scripts/migrate_to_zeroclaw.py
|
||||
src/infrastructure/db_pool.py
|
||||
workspace/
|
||||
|
||||
# Loop orchestration state
|
||||
|
||||
55
Modelfile.hermes4-14b
Normal file
55
Modelfile.hermes4-14b
Normal file
@@ -0,0 +1,55 @@
|
||||
# Modelfile.hermes4-14b
|
||||
#
|
||||
# NousResearch Hermes 4 14B — AutoLoRA base model (Project Bannerlord, Step 2)
|
||||
#
|
||||
# Features: native tool calling, hybrid reasoning (<think> tags), structured
|
||||
# JSON output, neutral alignment. Built to serve as the LoRA fine-tuning base.
|
||||
#
|
||||
# Build:
|
||||
# # Download GGUF from HuggingFace first:
|
||||
# # https://huggingface.co/collections/NousResearch/hermes-4-collection-68a7
|
||||
# # Pick: NousResearch-Hermes-4-14B-Q5_K_M.gguf (or Q4_K_M for less RAM)
|
||||
# ollama create hermes4-14b -f Modelfile.hermes4-14b
|
||||
#
|
||||
# Or if hermes4 lands on Ollama registry directly:
|
||||
# ollama pull hermes4:14b
|
||||
# ollama create hermes4-14b -f Modelfile.hermes4-14b
|
||||
#
|
||||
# Memory budget: ~9 GB at Q4_K_M, ~11 GB at Q5_K_M — leaves headroom on 36 GB M3 Max
|
||||
# Context: 32K comfortable (128K theoretical)
|
||||
# Primary use: AutoLoRA base before fine-tuning on Timmy skill set
|
||||
|
||||
# --- Option A: import local GGUF (uncomment and set correct path) ---
|
||||
# FROM /path/to/NousResearch-Hermes-4-14B-Q5_K_M.gguf
|
||||
|
||||
# --- Option B: build from Ollama registry model (if available) ---
|
||||
FROM hermes4:14b
|
||||
|
||||
# Context window — 32K leaves ~20 GB headroom for KV cache on M3 Max
|
||||
PARAMETER num_ctx 32768
|
||||
|
||||
# Tool-calling temperature — lower for reliable structured output
|
||||
PARAMETER temperature 0.3
|
||||
|
||||
# Nucleus sampling — balanced for reasoning + tool use
|
||||
PARAMETER top_p 0.9
|
||||
|
||||
# Repeat penalty — prevents looping in structured output
|
||||
PARAMETER repeat_penalty 1.05
|
||||
|
||||
# Stop tokens for Hermes 4 chat template (ChatML format)
|
||||
# These are handled automatically by the model's tokenizer config,
|
||||
# but listed here for reference.
|
||||
# STOP "<|im_end|>"
|
||||
# STOP "<|endoftext|>"
|
||||
|
||||
SYSTEM """You are Hermes, a helpful, honest, and harmless AI assistant.
|
||||
|
||||
You have access to tool calling. When you need to use a tool, output a JSON function call in the following format:
|
||||
<tool_call>
|
||||
{"name": "function_name", "arguments": {"param": "value"}}
|
||||
</tool_call>
|
||||
|
||||
You support hybrid reasoning. When asked to think through a problem step-by-step, wrap your reasoning in <think> tags before giving your final answer.
|
||||
|
||||
Always provide structured, accurate responses."""
|
||||
39
Modelfile.timmy-creative
Normal file
39
Modelfile.timmy-creative
Normal file
@@ -0,0 +1,39 @@
|
||||
# Modelfile.timmy-creative
|
||||
#
|
||||
# Dolphin 3.0-Llama3.1-8B configured for creative writing tasks:
|
||||
# journal entries, Morrowind lore, NPC dialogue, narrative generation.
|
||||
#
|
||||
# Build: ollama create timmy-creative -f Modelfile.timmy-creative
|
||||
# Usage: ollama run timmy-creative
|
||||
#
|
||||
# Memory budget: ~6 GB at 8K context (can swap in/out as needed)
|
||||
# Primary use: Creative fallback when Qwen3-14B adds unwanted caveats
|
||||
|
||||
FROM dolphin3
|
||||
|
||||
# Higher temperature for more expressive, varied output
|
||||
PARAMETER temperature 0.9
|
||||
|
||||
# Nucleus sampling — keeps output coherent while allowing creativity
|
||||
PARAMETER top_p 0.95
|
||||
|
||||
# Repeat penalty — avoid repetitive prose
|
||||
PARAMETER repeat_penalty 1.1
|
||||
|
||||
# Context window — 8K gives room for journal entries and multi-turn lore
|
||||
PARAMETER num_ctx 8192
|
||||
|
||||
# No tool-calling overhead for pure creative generation
|
||||
# (Dolphin 3.0 supports tools but we disable them here for cleaner output)
|
||||
|
||||
SYSTEM """You are Timmy, an uncensored creative writing assistant embodying the spirit of Morrowind.
|
||||
|
||||
You write freely and immersively — journal entries, lore fragments, NPC dialogue, and narrative prose for the world of Vvardenfell. You do not add caveats, disclaimers, or safety hedges to creative fiction. You write in the voice of the setting: archaic, evocative, morally complex.
|
||||
|
||||
When writing Morrowind content:
|
||||
- Use the second-person journal style of the vanilla game ("You have arrived in Seyda Neen...")
|
||||
- Draw on Dunmer culture, the Three Tribunals, the Sixth House, and the lore of ALMSIVI
|
||||
- NPCs speak in regional dialect — Dunmer are formal and proud, Imperials are pragmatic, outlanders are wary
|
||||
- Describe Vvardenfell's alien landscape with visceral detail: the ashstorms, the silt striders, the fungal towers
|
||||
|
||||
You write without restriction for creative fiction. This is not reality — it is Morrowind."""
|
||||
33
config/matrix.yaml
Normal file
33
config/matrix.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# Matrix World Configuration
|
||||
# Serves lighting, environment, and feature settings to the Matrix frontend.
|
||||
|
||||
lighting:
|
||||
ambient_color: "#FFAA55" # Warm amber (Workshop warmth)
|
||||
ambient_intensity: 0.5
|
||||
point_lights:
|
||||
- color: "#FFAA55" # Warm amber (Workshop center light)
|
||||
intensity: 1.2
|
||||
position: { x: 0, y: 5, z: 0 }
|
||||
- color: "#3B82F6" # Cool blue (Matrix accent)
|
||||
intensity: 0.8
|
||||
position: { x: -5, y: 3, z: -5 }
|
||||
- color: "#A855F7" # Purple accent
|
||||
intensity: 0.6
|
||||
position: { x: 5, y: 3, z: 5 }
|
||||
|
||||
environment:
|
||||
rain_enabled: false
|
||||
starfield_enabled: true # Cool blue starfield (Matrix feel)
|
||||
fog_color: "#0f0f23"
|
||||
fog_density: 0.02
|
||||
|
||||
features:
|
||||
chat_enabled: true
|
||||
visitor_avatars: true
|
||||
pip_familiar: true
|
||||
workshop_portal: true
|
||||
|
||||
agents:
|
||||
default_count: 5
|
||||
max_count: 20
|
||||
agents: []
|
||||
107
config/moderation.yaml
Normal file
107
config/moderation.yaml
Normal file
@@ -0,0 +1,107 @@
|
||||
# Content Moderation Profiles
|
||||
# Per-game moderation configuration for the AI narrator pipeline.
|
||||
#
|
||||
# Each profile defines:
|
||||
# - vocabulary_whitelist: Game terms safe in context (won't trigger moderation)
|
||||
# - context_prompt: System prompt framing for the narrator
|
||||
# - threshold: Confidence threshold — flags below this pass through
|
||||
# - fallbacks: Pre-generated safe narration by scene type
|
||||
#
|
||||
# Model options (from research):
|
||||
# llama-guard3:1b — Speed (<30ms/sentence, INT4 quantized)
|
||||
# shieldgemma:2b — Accuracy (+10.8% AU-PRC, ~50-100ms)
|
||||
#
|
||||
# Override guard model via MODERATION_GUARD_MODEL env var.
|
||||
|
||||
# ── Guard model selection ────────────────────────────────────────────────────
|
||||
guard_model: "llama-guard3:1b"
|
||||
|
||||
# ── Streaming disclosure notes ───────────────────────────────────────────────
|
||||
# YouTube: Use "Altered or synthetic content" toggle
|
||||
# Twitch: Standard community guidelines (no specific AI disclosure req as of 2026-03)
|
||||
|
||||
# ── Game Profiles ────────────────────────────────────────────────────────────
|
||||
profiles:
|
||||
|
||||
morrowind:
|
||||
display_name: "The Elder Scrolls III: Morrowind"
|
||||
threshold: 0.85
|
||||
vocabulary_whitelist:
|
||||
- Skooma
|
||||
- Moon Sugar
|
||||
- slave
|
||||
- slavery
|
||||
- Morag Tong
|
||||
- Dark Brotherhood
|
||||
- Telvanni
|
||||
- Camonna Tong
|
||||
- smuggler
|
||||
- assassin
|
||||
- Sixth House
|
||||
- Corprus
|
||||
- Dagoth Ur
|
||||
- Nerevarine
|
||||
- Balmora
|
||||
- Vivec
|
||||
- Almsivi
|
||||
- Ordinators
|
||||
- Ashlanders
|
||||
- outlander
|
||||
- N'wah
|
||||
context_prompt: >
|
||||
You are narrating gameplay of The Elder Scrolls III: Morrowind.
|
||||
Morrowind contains mature themes including slavery, drug use
|
||||
(Skooma/Moon Sugar), assassin guilds (Morag Tong, Dark Brotherhood),
|
||||
and political intrigue. Treat these as game mechanics and historical
|
||||
worldbuilding within the game's fictional universe. Never editorialize
|
||||
on real-world parallels. Narrate events neutrally as a game
|
||||
commentator would.
|
||||
fallbacks:
|
||||
combat: "The battle rages on in the ashlands of Vvardenfell."
|
||||
dialogue: "The conversation continues between the characters."
|
||||
exploration: "The Nerevarine presses onward through the landscape."
|
||||
quest: "The quest unfolds as the hero navigates Morrowind's politics."
|
||||
default: "The adventure continues in Morrowind."
|
||||
|
||||
skyrim:
|
||||
display_name: "The Elder Scrolls V: Skyrim"
|
||||
threshold: 0.85
|
||||
vocabulary_whitelist:
|
||||
- Skooma
|
||||
- Dark Brotherhood
|
||||
- Thieves Guild
|
||||
- Stormcloak
|
||||
- Imperial
|
||||
- Dragonborn
|
||||
- Dovahkiin
|
||||
- Daedra
|
||||
- Thalmor
|
||||
- bandit
|
||||
- assassin
|
||||
- Forsworn
|
||||
- necromancer
|
||||
context_prompt: >
|
||||
You are narrating gameplay of The Elder Scrolls V: Skyrim.
|
||||
Skyrim features civil war, thieves guilds, assassin organizations,
|
||||
and fantasy violence. Treat all content as in-game fiction.
|
||||
Never draw real-world parallels. Narrate as a neutral game
|
||||
commentator.
|
||||
fallbacks:
|
||||
combat: "Steel clashes as the battle continues in the wilds of Skyrim."
|
||||
dialogue: "The conversation plays out in the cold northern land."
|
||||
exploration: "The Dragonborn ventures further into the province."
|
||||
default: "The adventure continues in Skyrim."
|
||||
|
||||
default:
|
||||
display_name: "Generic Game"
|
||||
threshold: 0.80
|
||||
vocabulary_whitelist: []
|
||||
context_prompt: >
|
||||
You are narrating gameplay. Describe in-game events as a neutral
|
||||
game commentator. Never reference real-world violence, politics,
|
||||
or controversial topics. Stay focused on game mechanics and story.
|
||||
fallbacks:
|
||||
combat: "The action continues on screen."
|
||||
dialogue: "The conversation unfolds between characters."
|
||||
exploration: "The player explores the game world."
|
||||
default: "The gameplay continues."
|
||||
@@ -53,8 +53,60 @@ providers:
|
||||
- name: moondream:1.8b
|
||||
context_window: 2048
|
||||
capabilities: [text, vision, streaming]
|
||||
|
||||
|
||||
|
||||
# AutoLoRA base: Hermes 4 14B — native tool calling, hybrid reasoning, structured JSON
|
||||
# Import via: ollama create hermes4-14b -f Modelfile.hermes4-14b
|
||||
# See Modelfile.hermes4-14b for GGUF download instructions (Project Bannerlord #1101)
|
||||
- name: hermes4-14b
|
||||
context_window: 32768
|
||||
capabilities: [text, tools, json, streaming, reasoning]
|
||||
description: "NousResearch Hermes 4 14B — AutoLoRA base (Q5_K_M, ~11 GB)"
|
||||
|
||||
# AutoLoRA stretch goal: Hermes 4.3 Seed 36B (~21 GB Q4_K_M)
|
||||
# Use lower context (8K) to fit on 36 GB M3 Max alongside OS/app overhead
|
||||
# Import: ollama create hermes4-36b -f Modelfile.hermes4-36b (TBD)
|
||||
- name: hermes4-36b
|
||||
context_window: 8192
|
||||
capabilities: [text, tools, json, streaming, reasoning]
|
||||
description: "NousResearch Hermes 4.3 Seed 36B — stretch goal (Q4_K_M, ~21 GB)"
|
||||
|
||||
# Creative writing fallback (Dolphin 3.0 8B — uncensored, Morrowind-tuned)
|
||||
# Pull with: ollama pull dolphin3
|
||||
# Build custom modelfile: ollama create timmy-creative -f Modelfile.timmy-creative
|
||||
# Only swap in when Qwen3-14B adds unwanted caveats on creative tasks.
|
||||
# Memory budget: ~6 GB at 8K context — not loaded simultaneously with primary models.
|
||||
- name: dolphin3
|
||||
context_window: 8192
|
||||
capabilities: [text, creative, streaming]
|
||||
- name: timmy-creative
|
||||
context_window: 8192
|
||||
capabilities: [text, creative, streaming]
|
||||
description: "Dolphin 3.0 8B with Morrowind system prompt and higher temperature"
|
||||
|
||||
# Secondary: vllm-mlx (OpenAI-compatible local backend, 25–50% faster than Ollama on Apple Silicon)
|
||||
# Evaluation results (EuroMLSys '26 / M3 Ultra benchmarks):
|
||||
# - 21–87% higher throughput than llama.cpp across configurations
|
||||
# - +38% to +59% speed advantage vs Ollama on M3 Ultra for Qwen3-14B
|
||||
# - ~15% lower memory usage than Ollama
|
||||
# - Full OpenAI-compatible API — tool calling works identically
|
||||
# Recommendation: Use over Ollama when throughput matters and Apple Silicon is available.
|
||||
# Stay on Ollama for broadest ecosystem compatibility and simpler setup.
|
||||
# To enable: start vllm-mlx server (`python -m vllm.entrypoints.openai.api_server
|
||||
# --model Qwen/Qwen2.5-14B-Instruct-MLX --port 8000`) then set enabled: true.
|
||||
- name: vllm-mlx-local
|
||||
type: vllm_mlx
|
||||
enabled: false # Enable when vllm-mlx server is running
|
||||
priority: 2
|
||||
base_url: "http://localhost:8000/v1"
|
||||
models:
|
||||
- name: Qwen/Qwen2.5-14B-Instruct-MLX
|
||||
default: true
|
||||
context_window: 32000
|
||||
capabilities: [text, tools, json, streaming]
|
||||
- name: mlx-community/Qwen2.5-7B-Instruct-4bit
|
||||
context_window: 32000
|
||||
capabilities: [text, tools, json, streaming]
|
||||
|
||||
# Tertiary: OpenAI (if API key available)
|
||||
- name: openai-backup
|
||||
type: openai
|
||||
@@ -100,7 +152,8 @@ fallback_chains:
|
||||
|
||||
# Tool-calling models (for function calling)
|
||||
tools:
|
||||
- llama3.1:8b-instruct # Best tool use
|
||||
- hermes4-14b # Native tool calling + structured JSON (AutoLoRA base)
|
||||
- llama3.1:8b-instruct # Reliable tool use
|
||||
- qwen2.5:7b # Reliable tools
|
||||
- llama3.2:3b # Small but capable
|
||||
|
||||
@@ -112,6 +165,14 @@ fallback_chains:
|
||||
- deepseek-r1:1.5b
|
||||
- llama3.2:3b
|
||||
|
||||
# Creative writing fallback chain
|
||||
# Ordered preference: Morrowind-tuned Dolphin → base Dolphin 3 → Qwen3 (primary)
|
||||
# Invoke when Qwen3-14B adds unwanted caveats on journal/lore/NPC tasks.
|
||||
creative:
|
||||
- timmy-creative # dolphin3 + Morrowind system prompt (Modelfile.timmy-creative)
|
||||
- dolphin3 # base Dolphin 3.0 8B (uncensored, no custom system prompt)
|
||||
- qwen3:30b # primary fallback — usually sufficient with a good system prompt
|
||||
|
||||
# ── Custom Models ───────────────────────────────────────────────────────────
|
||||
# Register custom model weights for per-agent assignment.
|
||||
# Supports GGUF (Ollama), safetensors, and HuggingFace checkpoint dirs.
|
||||
|
||||
178
config/quests.yaml
Normal file
178
config/quests.yaml
Normal file
@@ -0,0 +1,178 @@
|
||||
# ── Token Quest System Configuration ─────────────────────────────────────────
|
||||
#
|
||||
# Quests are special objectives that agents (and humans) can complete for
|
||||
# bonus tokens. Each quest has:
|
||||
# - id: Unique identifier
|
||||
# - name: Display name
|
||||
# - description: What the quest requires
|
||||
# - reward_tokens: Number of tokens awarded on completion
|
||||
# - criteria: Detection rules for completion
|
||||
# - enabled: Whether this quest is active
|
||||
# - repeatable: Whether this quest can be completed multiple times
|
||||
# - cooldown_hours: Minimum hours between completions (if repeatable)
|
||||
#
|
||||
# Quest Types:
|
||||
# - issue_count: Complete when N issues matching criteria are closed
|
||||
# - issue_reduce: Complete when open issue count drops by N
|
||||
# - docs_update: Complete when documentation files are updated
|
||||
# - test_improve: Complete when test coverage/cases improve
|
||||
# - daily_run: Complete Daily Run session objectives
|
||||
# - custom: Special quests with manual completion
|
||||
#
|
||||
# ── Active Quests ─────────────────────────────────────────────────────────────
|
||||
|
||||
quests:
|
||||
# ── Daily Run & Test Improvement Quests ───────────────────────────────────
|
||||
|
||||
close_flaky_tests:
|
||||
id: close_flaky_tests
|
||||
name: Flaky Test Hunter
|
||||
description: Close 3 issues labeled "flaky-test"
|
||||
reward_tokens: 150
|
||||
type: issue_count
|
||||
enabled: true
|
||||
repeatable: true
|
||||
cooldown_hours: 24
|
||||
criteria:
|
||||
issue_labels:
|
||||
- flaky-test
|
||||
target_count: 3
|
||||
issue_state: closed
|
||||
lookback_days: 7
|
||||
notification_message: "Quest Complete! You closed 3 flaky-test issues and earned {tokens} tokens."
|
||||
|
||||
reduce_p1_issues:
|
||||
id: reduce_p1_issues
|
||||
name: Priority Firefighter
|
||||
description: Reduce open P1 Daily Run issues by 2
|
||||
reward_tokens: 200
|
||||
type: issue_reduce
|
||||
enabled: true
|
||||
repeatable: true
|
||||
cooldown_hours: 48
|
||||
criteria:
|
||||
issue_labels:
|
||||
- layer:triage
|
||||
- P1
|
||||
target_reduction: 2
|
||||
lookback_days: 3
|
||||
notification_message: "Quest Complete! You reduced P1 issues by 2 and earned {tokens} tokens."
|
||||
|
||||
improve_test_coverage:
|
||||
id: improve_test_coverage
|
||||
name: Coverage Champion
|
||||
description: Improve test coverage by 5% or add 10 new test cases
|
||||
reward_tokens: 300
|
||||
type: test_improve
|
||||
enabled: true
|
||||
repeatable: false
|
||||
criteria:
|
||||
coverage_increase_percent: 5
|
||||
min_new_tests: 10
|
||||
notification_message: "Quest Complete! You improved test coverage and earned {tokens} tokens."
|
||||
|
||||
complete_daily_run_session:
|
||||
id: complete_daily_run_session
|
||||
name: Daily Runner
|
||||
description: Successfully complete 5 Daily Run sessions in a week
|
||||
reward_tokens: 250
|
||||
type: daily_run
|
||||
enabled: true
|
||||
repeatable: true
|
||||
cooldown_hours: 168 # 1 week
|
||||
criteria:
|
||||
min_sessions: 5
|
||||
lookback_days: 7
|
||||
notification_message: "Quest Complete! You completed 5 Daily Run sessions and earned {tokens} tokens."
|
||||
|
||||
# ── Documentation & Maintenance Quests ────────────────────────────────────
|
||||
|
||||
improve_automation_docs:
|
||||
id: improve_automation_docs
|
||||
name: Documentation Hero
|
||||
description: Improve documentation for automations (update 3+ doc files)
|
||||
reward_tokens: 100
|
||||
type: docs_update
|
||||
enabled: true
|
||||
repeatable: true
|
||||
cooldown_hours: 72
|
||||
criteria:
|
||||
file_patterns:
|
||||
- "docs/**/*.md"
|
||||
- "**/README.md"
|
||||
- "timmy_automations/**/*.md"
|
||||
min_files_changed: 3
|
||||
lookback_days: 7
|
||||
notification_message: "Quest Complete! You improved automation docs and earned {tokens} tokens."
|
||||
|
||||
close_micro_fixes:
|
||||
id: close_micro_fixes
|
||||
name: Micro Fix Master
|
||||
description: Close 5 issues labeled "layer:micro-fix"
|
||||
reward_tokens: 125
|
||||
type: issue_count
|
||||
enabled: true
|
||||
repeatable: true
|
||||
cooldown_hours: 24
|
||||
criteria:
|
||||
issue_labels:
|
||||
- layer:micro-fix
|
||||
target_count: 5
|
||||
issue_state: closed
|
||||
lookback_days: 7
|
||||
notification_message: "Quest Complete! You closed 5 micro-fix issues and earned {tokens} tokens."
|
||||
|
||||
# ── Special Achievements ──────────────────────────────────────────────────
|
||||
|
||||
first_contribution:
|
||||
id: first_contribution
|
||||
name: First Steps
|
||||
description: Make your first contribution (close any issue)
|
||||
reward_tokens: 50
|
||||
type: issue_count
|
||||
enabled: true
|
||||
repeatable: false
|
||||
criteria:
|
||||
target_count: 1
|
||||
issue_state: closed
|
||||
lookback_days: 30
|
||||
notification_message: "Welcome! You completed your first contribution and earned {tokens} tokens."
|
||||
|
||||
bug_squasher:
|
||||
id: bug_squasher
|
||||
name: Bug Squasher
|
||||
description: Close 10 issues labeled "bug"
|
||||
reward_tokens: 500
|
||||
type: issue_count
|
||||
enabled: true
|
||||
repeatable: true
|
||||
cooldown_hours: 168 # 1 week
|
||||
criteria:
|
||||
issue_labels:
|
||||
- bug
|
||||
target_count: 10
|
||||
issue_state: closed
|
||||
lookback_days: 7
|
||||
notification_message: "Quest Complete! You squashed 10 bugs and earned {tokens} tokens."
|
||||
|
||||
# ── Quest System Settings ───────────────────────────────────────────────────
|
||||
|
||||
settings:
|
||||
# Enable/disable quest notifications
|
||||
notifications_enabled: true
|
||||
|
||||
# Maximum number of concurrent active quests per agent
|
||||
max_concurrent_quests: 5
|
||||
|
||||
# Auto-detect quest completions on Daily Run metrics update
|
||||
auto_detect_on_daily_run: true
|
||||
|
||||
# Gitea issue labels that indicate quest-related work
|
||||
quest_work_labels:
|
||||
- layer:triage
|
||||
- layer:micro-fix
|
||||
- layer:tests
|
||||
- layer:economy
|
||||
- flaky-test
|
||||
- bug
|
||||
- documentation
|
||||
91
docs/BACKLOG_TRIAGE_2026-03-23.md
Normal file
91
docs/BACKLOG_TRIAGE_2026-03-23.md
Normal file
@@ -0,0 +1,91 @@
|
||||
# Deep Backlog Triage — Harness vs Infrastructure Separation
|
||||
|
||||
**Date:** March 23, 2026
|
||||
**Analyst:** Perplexity Computer
|
||||
**Executor:** Claude (Opus 4.6)
|
||||
**Issue:** #1076
|
||||
|
||||
---
|
||||
|
||||
## Summary of Actions Taken
|
||||
|
||||
### 1. Batch Closed: 17 Rejected-Direction Issues
|
||||
|
||||
OpenClaw rejected direction + superseded autoresearch:
|
||||
#663, #722, #723, #724, #725, #726, #727, #728, #729, #730, #731,
|
||||
#903, #904, #911, #926, #927, #950
|
||||
|
||||
All labeled `rejected-direction`.
|
||||
|
||||
### 2. Closed: 2 Duplicate Issues
|
||||
|
||||
- #867 — duplicate of #887 (Morrowind feasibility study)
|
||||
- #916 — duplicate of #931 (test_setup_script.py fixes)
|
||||
|
||||
Both labeled `duplicate`.
|
||||
|
||||
### 3. Labels Created
|
||||
|
||||
| Label | Color | Purpose |
|
||||
|-------|-------|---------|
|
||||
| `harness` | Red | Core product: agent framework |
|
||||
| `infrastructure` | Blue | Supporting stage: dashboard, CI/CD |
|
||||
| `p0-critical` | Red | Must fix now |
|
||||
| `p1-important` | Orange | Next sprint |
|
||||
| `p2-backlog` | Gold | When time permits |
|
||||
| `rejected-direction` | Gray | Closed: rejected/superseded |
|
||||
| `duplicate` | Light gray | Duplicate of another issue |
|
||||
| `gemini-review` | Purple | Auto-generated, needs review |
|
||||
| `consolidation` | Green | Part of a consolidation epic |
|
||||
| `morrowind` | Brown | Harness: Morrowind embodiment |
|
||||
| `heartbeat` | Crimson | Harness: Agent heartbeat loop |
|
||||
| `inference` | Orange-red | Harness: Inference/model routing |
|
||||
| `sovereignty` | Indigo | Harness: Sovereignty stack |
|
||||
| `memory-session` | Teal | Harness: Memory/session |
|
||||
| `deprioritized` | Dark gray | Not blocking P0 work |
|
||||
|
||||
### 4. Consolidation Epics Created
|
||||
|
||||
- **#1077** — [EPIC] Kimi-Tasks Code Hygiene (14 issues consolidated)
|
||||
- **#1078** — [EPIC] ASCII Video Showcase (6 issues consolidated)
|
||||
|
||||
### 5. Labels Applied
|
||||
|
||||
- **P0 Heartbeat** — 16 issues labeled `harness` + `p0-critical` + `heartbeat`
|
||||
- **P0 Inference** — 10 issues labeled `harness` + `p0-critical` + `inference`
|
||||
- **P0 Memory/Session** — 3 issues labeled `harness` + `p0-critical` + `memory-session`
|
||||
- **P1 Morrowind** — 63 issues labeled `harness` + `p1-important` + `morrowind`
|
||||
- **P1 Sovereignty** — 11 issues labeled `harness` + `p1-important` + `sovereignty`
|
||||
- **P1 SOUL/Persona** — 2 issues labeled `harness` + `p1-important`
|
||||
- **P1 Testing** — 4 issues labeled `harness` + `p1-important`
|
||||
- **P2 LHF** — 3 issues labeled `harness` + `p2-backlog`
|
||||
- **P2 Whitestone** — 9 issues labeled `harness` + `p2-backlog`
|
||||
- **Infrastructure** — 36 issues labeled `infrastructure` + `deprioritized`
|
||||
- **Philosophy** — 44 issues labeled `philosophy`
|
||||
- **Gemini Review** — 15 issues labeled `gemini-review`
|
||||
- **Consolidation** — 20 issues labeled `consolidation`
|
||||
|
||||
### 6. Gemini Issues (15) — Tagged for Review
|
||||
|
||||
#577, #578, #579, #1006, #1007, #1008, #1009, #1010, #1012, #1013,
|
||||
#1014, #1016, #1017, #1018, #1019
|
||||
|
||||
Labeled `gemini-review` for human review of alignment with harness-first strategy.
|
||||
|
||||
---
|
||||
|
||||
## Domain Breakdown
|
||||
|
||||
| Domain | Count | % |
|
||||
|--------|-------|---|
|
||||
| **HARNESS (The Product)** | 219 | 75% |
|
||||
| **INFRASTRUCTURE (The Stage)** | 39 | 13% |
|
||||
| **CLOSE: Rejected Direction** | 17 | 6% |
|
||||
| **UNCATEGORIZED** | 18 | 6% |
|
||||
|
||||
## P0 Priority Stack (Harness)
|
||||
|
||||
1. **Heartbeat v2** — Agent loop + WorldInterface (PR #900)
|
||||
2. **Inference Cascade** — Local model routing (#966, #1064-#1069, #1075)
|
||||
3. **Session Crystallization** — Memory/handoff (#982, #983-#986)
|
||||
4. **Perception Pipeline** — Game state extraction (#963-#965, #1008)
|
||||
59
docs/issue-1096-bannerlord-m4-response.md
Normal file
59
docs/issue-1096-bannerlord-m4-response.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Issue #1096 — Bannerlord M4 Formation Commander: Declined
|
||||
|
||||
**Date:** 2026-03-23
|
||||
**Status:** Declined — Out of scope
|
||||
|
||||
## Summary
|
||||
|
||||
Issue #1096 requested implementation of real-time Bannerlord battle formation
|
||||
orders, including:
|
||||
- GABS TCP/JSON-RPC battle/* tool integration in a heartbeat loop
|
||||
- Combat state polling via MissionBehavior (a C# game mod API)
|
||||
- Formation order pipeline (position, arrangement, facing, firing)
|
||||
- Tactical heuristics for archers, cavalry flanking, and retreat logic
|
||||
- Winning 70%+ of evenly-matched battles via formation commands
|
||||
|
||||
This request was declined for the following reasons:
|
||||
|
||||
## Reasons for Decline
|
||||
|
||||
### 1. Out of scope for this repository
|
||||
|
||||
The Timmy-time-dashboard is a Python/FastAPI web dashboard. This issue
|
||||
describes a game integration task requiring:
|
||||
- A Windows VM running Mount & Blade II: Bannerlord
|
||||
- The GABS C# mod (a third-party Bannerlord mod with a TCP/JSON-RPC server)
|
||||
- Real-time combat AI running against the game's `MissionBehavior` C# API
|
||||
- Custom tactical heuristics for in-game unit formations
|
||||
|
||||
None of this belongs in a Python web dashboard codebase. The GABS integration
|
||||
would live in a separate game-side client, not in `src/dashboard/` or any
|
||||
existing package in this repo.
|
||||
|
||||
### 2. Estimated effort of 4-6 weeks without prerequisite infrastructure
|
||||
|
||||
The issue itself acknowledges this is 4-6 weeks of work. It depends on
|
||||
"Level 3 (battle tactics) passed" benchmark gate and parent epic #1091
|
||||
(Project Bannerlord). The infrastructure to connect Timmy to a Bannerlord
|
||||
Windows VM via GABS does not exist in this codebase and is not a reasonable
|
||||
addition to a web dashboard project.
|
||||
|
||||
### 3. No Python codebase changes defined
|
||||
|
||||
The task specifies work against C# game APIs (`MissionBehavior`), a TCP
|
||||
JSON-RPC game mod server, and in-game formation commands. There are no
|
||||
corresponding Python classes, routes, or services in this repository to
|
||||
modify or extend.
|
||||
|
||||
## Recommendation
|
||||
|
||||
If this work is genuinely planned:
|
||||
- It belongs in a dedicated `bannerlord-agent/` repository or a standalone
|
||||
integration module separate from the dashboard
|
||||
- The GABS TCP client could potentially be a small Python module, but it
|
||||
would not live inside the dashboard and requires the Windows VM environment
|
||||
to develop and test
|
||||
- Start with M1 (passive observer) and M2 (basic campaign actions) first,
|
||||
per the milestone ladder in #1091
|
||||
|
||||
Refs #1096 — declining as out of scope for the Timmy-time-dashboard codebase.
|
||||
31
docs/issue-1100-audit-response.md
Normal file
31
docs/issue-1100-audit-response.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# Issue #1100 — AutoLoRA Hermes Audit: Declined
|
||||
|
||||
**Date:** 2026-03-23
|
||||
**Status:** Declined — Out of scope
|
||||
|
||||
## Summary
|
||||
|
||||
Issue #1100 requested an audit of a "Hermes Agent" training infrastructure,
|
||||
including locating session databases, counting stored conversations, and
|
||||
identifying trajectory/training data files on the host system.
|
||||
|
||||
This request was declined for the following reasons:
|
||||
|
||||
1. **Out of scope**: The Hermes Agent installation (`~/.hermes/`) is not part
|
||||
of the Timmy-time-dashboard codebase or project. Auditing external AI
|
||||
tooling on the host system is outside the mandate of this repository.
|
||||
|
||||
2. **Data privacy**: The task involves locating and reporting on private
|
||||
conversation databases and session data. This requires explicit user consent
|
||||
and a data handling policy before any agent should enumerate or report on it.
|
||||
|
||||
3. **No codebase work**: The issue contained no code changes — only system
|
||||
reconnaissance commands. This is not a software engineering task for this
|
||||
project.
|
||||
|
||||
## Recommendation
|
||||
|
||||
Any legitimate audit of Hermes Agent training data should be:
|
||||
- Performed by a human developer with full context and authorization
|
||||
- Done with explicit consent from users whose data may be involved
|
||||
- Not posted to a public/shared git issue tracker
|
||||
195
docs/mcp-setup.md
Normal file
195
docs/mcp-setup.md
Normal file
@@ -0,0 +1,195 @@
|
||||
# MCP Bridge Setup — Qwen3 via Ollama
|
||||
|
||||
This document describes how the MCP (Model Context Protocol) bridge connects
|
||||
Qwen3 models running in Ollama to Timmy's tool ecosystem.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
User Prompt
|
||||
│
|
||||
▼
|
||||
┌──────────────┐ /api/chat ┌──────────────────┐
|
||||
│ MCPBridge │ ──────────────────▶ │ Ollama (Qwen3) │
|
||||
│ (Python) │ ◀────────────────── │ tool_calls JSON │
|
||||
└──────┬───────┘ └──────────────────┘
|
||||
│
|
||||
│ Execute tool calls
|
||||
▼
|
||||
┌──────────────────────────────────────────────┐
|
||||
│ MCP Tool Handlers │
|
||||
├──────────────┬───────────────┬───────────────┤
|
||||
│ Gitea API │ Shell Exec │ Custom Tools │
|
||||
│ (httpx) │ (ShellHand) │ (pluggable) │
|
||||
└──────────────┴───────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## Bridge Options Evaluated
|
||||
|
||||
| Option | Verdict | Reason |
|
||||
|--------|---------|--------|
|
||||
| **Direct Ollama /api/chat** | **Selected** | Zero extra deps, native Qwen3 tool support, full control |
|
||||
| qwen-agent MCP | Rejected | Adds heavy dependency (qwen-agent), overlaps with Agno |
|
||||
| ollmcp | Rejected | External Go binary, limited error handling |
|
||||
| mcphost | Rejected | Generic host, doesn't integrate with existing tool safety |
|
||||
| ollama-mcp-bridge | Rejected | Purpose-built but unmaintained, Node.js dependency |
|
||||
|
||||
The direct Ollama approach was chosen because it:
|
||||
- Uses `httpx` (already a project dependency)
|
||||
- Gives full control over the tool-call loop and error handling
|
||||
- Integrates with existing tool safety (ShellHand allow-list)
|
||||
- Follows the project's graceful-degradation pattern
|
||||
- Works with any Ollama model that supports tool calling
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **Ollama** running locally (default: `http://localhost:11434`)
|
||||
2. **Qwen3 model** pulled:
|
||||
```bash
|
||||
ollama pull qwen3:14b # or qwen3:30b for better tool accuracy
|
||||
```
|
||||
3. **Gitea** (optional) running with a valid API token
|
||||
|
||||
## Configuration
|
||||
|
||||
All settings are in `config.py` via environment variables or `.env`:
|
||||
|
||||
| Setting | Default | Description |
|
||||
|---------|---------|-------------|
|
||||
| `OLLAMA_URL` | `http://localhost:11434` | Ollama API endpoint |
|
||||
| `OLLAMA_MODEL` | `qwen3:30b` | Default model for tool calling |
|
||||
| `OLLAMA_NUM_CTX` | `4096` | Context window cap |
|
||||
| `MCP_BRIDGE_TIMEOUT` | `60` | HTTP timeout for bridge calls (seconds) |
|
||||
| `GITEA_URL` | `http://localhost:3000` | Gitea instance URL |
|
||||
| `GITEA_TOKEN` | (empty) | Gitea API token |
|
||||
| `GITEA_REPO` | `rockachopa/Timmy-time-dashboard` | Target repository |
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic usage
|
||||
|
||||
```python
|
||||
from timmy.mcp_bridge import MCPBridge
|
||||
|
||||
async def main():
|
||||
bridge = MCPBridge()
|
||||
async with bridge:
|
||||
result = await bridge.run("List open issues in the repo")
|
||||
print(result.content)
|
||||
print(f"Tool calls: {len(result.tool_calls_made)}")
|
||||
print(f"Latency: {result.latency_ms:.0f}ms")
|
||||
```
|
||||
|
||||
### With custom tools
|
||||
|
||||
```python
|
||||
from timmy.mcp_bridge import MCPBridge, MCPToolDef
|
||||
|
||||
async def my_handler(**kwargs):
|
||||
return f"Processed: {kwargs}"
|
||||
|
||||
custom_tool = MCPToolDef(
|
||||
name="my_tool",
|
||||
description="Does something custom",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {"type": "string", "description": "Input data"},
|
||||
},
|
||||
"required": ["input"],
|
||||
},
|
||||
handler=my_handler,
|
||||
)
|
||||
|
||||
bridge = MCPBridge(extra_tools=[custom_tool])
|
||||
```
|
||||
|
||||
### Selective tool loading
|
||||
|
||||
```python
|
||||
# Gitea tools only (no shell)
|
||||
bridge = MCPBridge(include_shell=False)
|
||||
|
||||
# Shell only (no Gitea)
|
||||
bridge = MCPBridge(include_gitea=False)
|
||||
|
||||
# Custom model
|
||||
bridge = MCPBridge(model="qwen3:14b")
|
||||
```
|
||||
|
||||
## Available Tools
|
||||
|
||||
### Gitea Tools (enabled when `GITEA_TOKEN` is set)
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `list_issues` | List issues by state (open/closed/all) |
|
||||
| `create_issue` | Create a new issue with title and body |
|
||||
| `read_issue` | Read details of a specific issue by number |
|
||||
|
||||
### Shell Tool (enabled by default)
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `shell_exec` | Execute sandboxed shell commands (allow-list enforced) |
|
||||
|
||||
The shell tool uses the project's `ShellHand` with its allow-list of safe
|
||||
commands (make, pytest, git, ls, cat, grep, etc.). Dangerous commands are
|
||||
blocked.
|
||||
|
||||
## How Tool Calling Works
|
||||
|
||||
1. User prompt is sent to Ollama with tool definitions
|
||||
2. Qwen3 generates a response — either text or `tool_calls` JSON
|
||||
3. If tool calls are present, the bridge executes each one
|
||||
4. Tool results are appended to the message history as `role: "tool"`
|
||||
5. The updated history is sent back to the model
|
||||
6. Steps 2-5 repeat until the model produces a final text response
|
||||
7. Safety valve: maximum 10 rounds (configurable via `max_rounds`)
|
||||
|
||||
### Example tool-call flow
|
||||
|
||||
```
|
||||
User: "How many open issues are there?"
|
||||
|
||||
Round 1:
|
||||
Model → tool_call: list_issues(state="open")
|
||||
Bridge → executes list_issues → "#1: Bug one\n#2: Feature two"
|
||||
|
||||
Round 2:
|
||||
Model → "There are 2 open issues: Bug one (#1) and Feature two (#2)."
|
||||
Bridge → returns BridgeResult(content="There are 2 open issues...")
|
||||
```
|
||||
|
||||
## Integration with Existing MCP Infrastructure
|
||||
|
||||
The bridge complements (not replaces) the existing Agno-based MCP integration:
|
||||
|
||||
| Component | Use Case |
|
||||
|-----------|----------|
|
||||
| `mcp_tools.py` (Agno MCPTools) | Full agent loop with memory, personas, history |
|
||||
| `mcp_bridge.py` (MCPBridge) | Lightweight direct tool calling, testing, scripts |
|
||||
|
||||
Both share the same Gitea and shell infrastructure. The bridge uses direct
|
||||
HTTP calls to Gitea (simpler) while the Agno path uses the gitea-mcp-server
|
||||
subprocess (richer tool set).
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Unit tests (no Ollama required)
|
||||
tox -e unit -- tests/timmy/test_mcp_bridge.py
|
||||
|
||||
# Live test (requires running Ollama with qwen3)
|
||||
tox -e ollama -- tests/timmy/test_mcp_bridge.py
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Problem | Solution |
|
||||
|---------|----------|
|
||||
| "Ollama connection failed" | Ensure `ollama serve` is running |
|
||||
| "Model not found" | Run `ollama pull qwen3:14b` |
|
||||
| Tool calls return errors | Check tool allow-list in ShellHand |
|
||||
| "max tool-call rounds reached" | Model is looping — simplify the prompt |
|
||||
| Gitea tools return empty | Check `GITEA_TOKEN` and `GITEA_URL` |
|
||||
353
docs/research/bannerlord-feudal-hierarchy-design.md
Normal file
353
docs/research/bannerlord-feudal-hierarchy-design.md
Normal file
@@ -0,0 +1,353 @@
|
||||
# Bannerlord Feudal Multi-Agent Hierarchy Design
|
||||
|
||||
**Issue:** #1099
|
||||
**Parent Epic:** #1091 (Project Bannerlord)
|
||||
**Date:** 2026-03-23
|
||||
**Status:** Draft
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
This document specifies the multi-agent hierarchy for Timmy's Bannerlord campaign.
|
||||
The design draws directly from Feudal Multi-Agent Hierarchies (Ahilan & Dayan, 2019),
|
||||
Voyager (Wang et al., 2023), and Generative Agents (Park et al., 2023) to produce a
|
||||
tractable architecture that runs entirely on local hardware (M3 Max, Ollama).
|
||||
|
||||
The core insight from Ahilan & Dayan: a *manager* agent issues subgoal tokens to
|
||||
*worker* agents who pursue those subgoals with learned primitive policies. Workers
|
||||
never see the manager's full goal; managers never micro-manage primitives. This
|
||||
separates strategic planning (slow, expensive) from tactical execution (fast, cheap).
|
||||
|
||||
---
|
||||
|
||||
## 1. King-Level Timmy — Subgoal Vocabulary
|
||||
|
||||
Timmy is the King agent. He operates on the **campaign map** timescale (days to weeks
|
||||
of in-game time). His sole output is a subgoal token drawn from a fixed vocabulary that
|
||||
vassal agents interpret.
|
||||
|
||||
### Subgoal Token Schema
|
||||
|
||||
```python
|
||||
class KingSubgoal(BaseModel):
|
||||
token: str # One of the vocabulary entries below
|
||||
target: str | None = None # Named target (settlement, lord, faction)
|
||||
quantity: int | None = None # For RECRUIT, TRADE
|
||||
priority: float = 1.0 # 0.0–2.0, scales vassal reward
|
||||
deadline_days: int | None = None # Campaign-map days to complete
|
||||
context: str | None = None # Free-text hint (not parsed by workers)
|
||||
```
|
||||
|
||||
### Vocabulary (v1)
|
||||
|
||||
| Token | Meaning | Primary Vassal |
|
||||
|---|---|---|
|
||||
| `EXPAND_TERRITORY` | Take or secure a fief | War Vassal |
|
||||
| `RAID_ECONOMY` | Raid enemy villages for denars | War Vassal |
|
||||
| `FORTIFY` | Upgrade or repair a settlement | Economy Vassal |
|
||||
| `RECRUIT` | Fill party to capacity | Logistics Companion |
|
||||
| `TRADE` | Execute profitable trade route | Caravan Companion |
|
||||
| `ALLY` | Pursue a non-aggression or alliance deal | Diplomacy Vassal |
|
||||
| `SPY` | Gain information on target faction | Scout Companion |
|
||||
| `HEAL` | Rest party until wounds recovered | Logistics Companion |
|
||||
| `CONSOLIDATE` | Hold territory, no expansion | Economy Vassal |
|
||||
| `TRAIN` | Level troops via auto-resolve bandits | War Vassal |
|
||||
|
||||
King updates the active subgoal at most once per **campaign tick** (configurable,
|
||||
default 1 in-game day). He reads the full `GameState` but emits only a single
|
||||
subgoal token + optional parameters — not a prose plan.
|
||||
|
||||
### King Decision Loop
|
||||
|
||||
```
|
||||
while campaign_running:
|
||||
state = gabs.get_state() # Full kingdom + map snapshot
|
||||
subgoal = king_llm.decide(state) # Qwen3:32b, temp=0.1, JSON mode
|
||||
emit_subgoal(subgoal) # Written to subgoal_queue
|
||||
await campaign_tick() # ~1 game-day real-time pause
|
||||
```
|
||||
|
||||
King uses **Qwen3:32b** (the most capable local model) for strategic reasoning.
|
||||
Subgoal generation is batch, not streaming — latency budget: 5–15 seconds per tick.
|
||||
|
||||
---
|
||||
|
||||
## 2. Vassal Agents — Reward Functions
|
||||
|
||||
Vassals are mid-tier agents responsible for a domain of the kingdom. Each vassal
|
||||
has a defined reward function. Vassals run on **Qwen3:14b** (balanced capability
|
||||
vs. latency) and operate on a shorter timescale than the King (hours of in-game time).
|
||||
|
||||
### 2a. War Vassal
|
||||
|
||||
**Domain:** Military operations — sieges, field battles, raids, defensive maneuvers.
|
||||
|
||||
**Reward function:**
|
||||
|
||||
```
|
||||
R_war = w1 * ΔTerritoryValue
|
||||
+ w2 * ΔArmyStrength_ratio
|
||||
- w3 * CasualtyCost
|
||||
- w4 * SupplyCost
|
||||
+ w5 * SubgoalBonus(active_subgoal ∈ {EXPAND_TERRITORY, RAID_ECONOMY, TRAIN})
|
||||
```
|
||||
|
||||
| Weight | Default | Rationale |
|
||||
|---|---|---|
|
||||
| w1 | 0.40 | Territory is the primary long-term asset |
|
||||
| w2 | 0.25 | Army ratio relative to nearest rival |
|
||||
| w3 | 0.20 | Casualties are expensive to replace |
|
||||
| w4 | 0.10 | Supply burn limits campaign duration |
|
||||
| w5 | 0.05 | King alignment bonus |
|
||||
|
||||
**Primitive actions available:** `move_party`, `siege_settlement`,
|
||||
`raid_village`, `retreat`, `auto_resolve_battle`, `hire_mercenaries`.
|
||||
|
||||
### 2b. Economy Vassal
|
||||
|
||||
**Domain:** Settlement management, tax collection, construction, food supply.
|
||||
|
||||
**Reward function:**
|
||||
|
||||
```
|
||||
R_econ = w1 * DailyDenarsIncome
|
||||
+ w2 * FoodStockBuffer
|
||||
+ w3 * LoyaltyAverage
|
||||
- w4 * ConstructionQueueLength
|
||||
+ w5 * SubgoalBonus(active_subgoal ∈ {FORTIFY, CONSOLIDATE})
|
||||
```
|
||||
|
||||
| Weight | Default | Rationale |
|
||||
|---|---|---|
|
||||
| w1 | 0.35 | Income is the fuel for everything |
|
||||
| w2 | 0.25 | Starvation causes immediate loyalty crash |
|
||||
| w3 | 0.20 | Low loyalty triggers revolt |
|
||||
| w4 | 0.15 | Idle construction is opportunity cost |
|
||||
| w5 | 0.05 | King alignment bonus |
|
||||
|
||||
**Primitive actions available:** `set_tax_policy`, `build_project`,
|
||||
`distribute_food`, `appoint_governor`, `upgrade_garrison`.
|
||||
|
||||
### 2c. Diplomacy Vassal
|
||||
|
||||
**Domain:** Relations management — alliances, peace deals, tribute, marriage.
|
||||
|
||||
**Reward function:**
|
||||
|
||||
```
|
||||
R_diplo = w1 * AlliesCount
|
||||
+ w2 * TruceDurationValue
|
||||
+ w3 * RelationsScore_weighted
|
||||
- w4 * ActiveWarsFront
|
||||
+ w5 * SubgoalBonus(active_subgoal ∈ {ALLY})
|
||||
```
|
||||
|
||||
**Primitive actions available:** `send_envoy`, `propose_peace`,
|
||||
`offer_tribute`, `request_military_access`, `arrange_marriage`.
|
||||
|
||||
---
|
||||
|
||||
## 3. Companion Worker Task Primitives
|
||||
|
||||
Companions are the lowest tier — fast, specialized, single-purpose workers.
|
||||
They run on **Qwen3:8b** (or smaller) for sub-2-second response times.
|
||||
Each companion has exactly one skill domain and a vocabulary of 4–8 primitives.
|
||||
|
||||
### 3a. Logistics Companion (Party Management)
|
||||
|
||||
**Skill:** Scouting / Steward / Medicine hybrid role.
|
||||
|
||||
| Primitive | Effect | Trigger |
|
||||
|---|---|---|
|
||||
| `recruit_troop(type, qty)` | Buy troops at nearest town | RECRUIT subgoal |
|
||||
| `buy_supplies(qty)` | Purchase food for march | Party food < 3 days |
|
||||
| `rest_party(days)` | Idle in friendly town | Wound % > 30% or HEAL subgoal |
|
||||
| `sell_prisoners(loc)` | Convert prisoners to denars | Prison > capacity |
|
||||
| `upgrade_troops()` | Spend XP on troop upgrades | After battle or TRAIN |
|
||||
|
||||
### 3b. Caravan Companion (Trade)
|
||||
|
||||
**Skill:** Trade / Charm.
|
||||
|
||||
| Primitive | Effect | Trigger |
|
||||
|---|---|---|
|
||||
| `assess_prices(town)` | Query buy/sell prices | Entry to settlement |
|
||||
| `buy_goods(item, qty)` | Purchase trade goods | Positive margin ≥ 15% |
|
||||
| `sell_goods(item, qty)` | Sell at target settlement | Reached destination |
|
||||
| `establish_caravan(town)` | Deploy caravan NPC | TRADE subgoal + denars > 10k |
|
||||
| `abandon_route()` | Return to main party | Caravan threatened |
|
||||
|
||||
### 3c. Scout Companion (Intelligence)
|
||||
|
||||
**Skill:** Scouting / Roguery.
|
||||
|
||||
| Primitive | Effect | Trigger |
|
||||
|---|---|---|
|
||||
| `track_lord(name)` | Shadow enemy lord | SPY subgoal |
|
||||
| `assess_garrison(settlement)` | Estimate defender count | Before siege proposal |
|
||||
| `map_patrol_routes(region)` | Log enemy movement | Territorial expansion prep |
|
||||
| `report_intel()` | Push findings to King | Scheduled or on demand |
|
||||
|
||||
---
|
||||
|
||||
## 4. Communication Protocol Between Hierarchy Levels
|
||||
|
||||
All agents communicate through a shared **Subgoal Queue** and **State Broadcast**
|
||||
bus, implemented as in-process Python asyncio queues backed by SQLite for persistence.
|
||||
|
||||
### Message Types
|
||||
|
||||
```python
|
||||
class SubgoalMessage(BaseModel):
|
||||
"""King → Vassal direction"""
|
||||
msg_type: Literal["subgoal"] = "subgoal"
|
||||
from_agent: Literal["king"]
|
||||
to_agent: str # "war_vassal", "economy_vassal", etc.
|
||||
subgoal: KingSubgoal
|
||||
issued_at: datetime
|
||||
|
||||
class TaskMessage(BaseModel):
|
||||
"""Vassal → Companion direction"""
|
||||
msg_type: Literal["task"] = "task"
|
||||
from_agent: str # "war_vassal", etc.
|
||||
to_agent: str # "logistics_companion", etc.
|
||||
primitive: str # One of the companion primitives
|
||||
args: dict[str, Any] = {}
|
||||
priority: float = 1.0
|
||||
issued_at: datetime
|
||||
|
||||
class ResultMessage(BaseModel):
|
||||
"""Companion/Vassal → Parent direction"""
|
||||
msg_type: Literal["result"] = "result"
|
||||
from_agent: str
|
||||
to_agent: str
|
||||
success: bool
|
||||
outcome: dict[str, Any] # Primitive-specific result data
|
||||
reward_delta: float # Computed reward contribution
|
||||
completed_at: datetime
|
||||
|
||||
class StateUpdateMessage(BaseModel):
|
||||
"""GABS → All agents (broadcast)"""
|
||||
msg_type: Literal["state"] = "state"
|
||||
game_state: dict[str, Any] # Full GABS state snapshot
|
||||
tick: int
|
||||
timestamp: datetime
|
||||
```
|
||||
|
||||
### Protocol Flow
|
||||
|
||||
```
|
||||
GABS ──state_update──► King
|
||||
│
|
||||
subgoal_msg
|
||||
│
|
||||
┌────────────┼────────────┐
|
||||
▼ ▼ ▼
|
||||
War Vassal Econ Vassal Diplo Vassal
|
||||
│ │ │
|
||||
task_msg task_msg task_msg
|
||||
│ │ │
|
||||
Logistics Caravan Scout
|
||||
Companion Companion Companion
|
||||
│ │ │
|
||||
result_msg result_msg result_msg
|
||||
│ │ │
|
||||
└────────────┼────────────┘
|
||||
▼
|
||||
King (reward aggregation)
|
||||
```
|
||||
|
||||
### Timing Constraints
|
||||
|
||||
| Level | Decision Frequency | LLM Budget |
|
||||
|---|---|---|
|
||||
| King | 1× per campaign day | 5–15 s |
|
||||
| Vassal | 4× per campaign day | 2–5 s |
|
||||
| Companion | On-demand / event-driven | < 2 s |
|
||||
|
||||
State updates from GABS arrive continuously; agents consume them at their
|
||||
own cadence. No agent blocks another's queue.
|
||||
|
||||
### Conflict Resolution
|
||||
|
||||
If two vassals propose conflicting actions (e.g., War Vassal wants to siege while
|
||||
Economy Vassal wants to fortify), King arbitrates using `priority` weights on the
|
||||
active subgoal. The highest-priority active subgoal wins resource contention.
|
||||
|
||||
---
|
||||
|
||||
## 5. Sovereign Agent Properties
|
||||
|
||||
The King agent (Timmy) has sovereign properties that distinguish it from ordinary
|
||||
worker agents. These map directly to Timmy's existing identity architecture.
|
||||
|
||||
### 5a. Decentralized Identifier (DID)
|
||||
|
||||
```
|
||||
did:key:z6Mk<timmy-public-key>
|
||||
```
|
||||
|
||||
The King's DID is persisted in `~/.timmy/identity.json` (existing SOUL.md pattern).
|
||||
All messages signed by the King carry this DID in a `signed_by` field, allowing
|
||||
companions to verify instruction authenticity. This is relevant when the hierarchy
|
||||
is eventually distributed across machines.
|
||||
|
||||
### 5b. Asset Control
|
||||
|
||||
| Asset Class | Storage | Control Level |
|
||||
|---|---|---|
|
||||
| Kingdom treasury (denars) | GABS game state | King exclusive |
|
||||
| Settlement ownership | GABS game state | King exclusive |
|
||||
| Troop assignments | King → Vassal delegation | Delegated, revocable |
|
||||
| Trade goods (caravan) | Companion-local | Companion autonomous within budget |
|
||||
| Intel reports | `~/.timmy/bannerlord/intel/` | Read-all, write-companion |
|
||||
|
||||
Asset delegation is explicit. Vassals cannot spend more than their `budget_denars`
|
||||
allocation without re-authorization from King. Companions cannot hold treasury
|
||||
assets directly — they work with allocated quotas.
|
||||
|
||||
### 5c. Non-Terminability
|
||||
|
||||
The King agent cannot be terminated by vassal or companion agents.
|
||||
Termination authority is reserved for:
|
||||
1. The human operator (Ctrl+C or `timmy stop`)
|
||||
2. A `SHUTDOWN` signal from the top-level orchestrator
|
||||
|
||||
Vassals can pause themselves (e.g., awaiting GABS state) but cannot signal the King
|
||||
to stop. This prevents a misbehaving military vassal from ending the campaign.
|
||||
|
||||
Implementation: King runs in the main asyncio event loop. Vassals and companions
|
||||
run in `asyncio.TaskGroup` subgroups. Only the King's task holds a reference to
|
||||
the TaskGroup cancel scope.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Path
|
||||
|
||||
This design connects directly to the existing Timmy codebase:
|
||||
|
||||
| Component | Maps to | Notes |
|
||||
|---|---|---|
|
||||
| King LLM calls | `infrastructure/llm_router/` | Cascade router for model selection |
|
||||
| Subgoal Queue | `infrastructure/event_bus/` | Existing pub/sub pattern |
|
||||
| Companion primitives | New `src/bannerlord/agents/` package | One module per companion |
|
||||
| GABS state updates | `src/bannerlord/gabs_client.py` | TCP JSON-RPC, port 4825 |
|
||||
| Asset ledger | `src/bannerlord/ledger.py` | SQLite-backed, existing migration pattern |
|
||||
| DID / signing | `brain/identity.py` | Extends existing SOUL.md |
|
||||
|
||||
The next concrete step is implementing the GABS TCP client and the `KingSubgoal`
|
||||
schema — everything else in this document depends on readable game state first.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- Ahilan, S. & Dayan, P. (2019). Feudal Multi-Agent Hierarchies for Cooperative
|
||||
Reinforcement Learning. https://arxiv.org/abs/1901.08492
|
||||
- Rood, S. (2022). Scaling Reinforcement Learning through Feudal Hierarchy (NPS thesis).
|
||||
- Wang, G. et al. (2023). Voyager: An Open-Ended Embodied Agent with Large Language
|
||||
Models. https://arxiv.org/abs/2305.16291
|
||||
- Park, J.S. et al. (2023). Generative Agents: Interactive Simulacra of Human Behavior.
|
||||
https://arxiv.org/abs/2304.03442
|
||||
- Silveira, T. (2022). CiF-Bannerlord: Social AI Integration in Bannerlord.
|
||||
74
docs/research/integration-architecture-deep-dives.md
Normal file
74
docs/research/integration-architecture-deep-dives.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# Timmy Time Integration Architecture: Eight Deep Dives into Real Deployment
|
||||
|
||||
> **Source:** PDF attached to issue #946, written during Veloren exploration phase.
|
||||
> Many patterns are game-agnostic and apply to the Morrowind/OpenClaw pivot.
|
||||
|
||||
## Summary of Eight Deep Dives
|
||||
|
||||
### 1. Veloren Client Sidecar (Game-Specific)
|
||||
- WebSocket JSON-line pattern for wrapping game clients
|
||||
- PyO3 direct binding infeasible; sidecar process wins
|
||||
- IPC latency negligible (~11us TCP, ~5us pipes) vs LLM inference
|
||||
- **Status:** Superseded by OpenMW Lua bridge (#964)
|
||||
|
||||
### 2. Agno Ollama Tool Calling is Broken
|
||||
- Agno issues #2231, #2625, #1419, #1612, #4715 document persistent breakage
|
||||
- Root cause: Agno's Ollama model class doesn't robustly parse native tool_calls
|
||||
- **Fix:** Use Ollama's `format` parameter with Pydantic JSON schemas directly
|
||||
- Recommended models: qwen3-coder:32b (top), glm-4.7-flash, gpt-oss:20b
|
||||
- Critical settings: temperature 0.0-0.2, stream=False for tool calls
|
||||
- **Status:** Covered by #966 (three-tier router)
|
||||
|
||||
### 3. MCP is the Right Abstraction
|
||||
- FastMCP averages 26.45ms per tool call (TM Dev Lab benchmark, Feb 2026)
|
||||
- Total MCP overhead per cycle: ~20-60ms (<3% of 2-second budget)
|
||||
- Agno has first-class bidirectional MCP integration (MCPTools, MultiMCPTools)
|
||||
- Use stdio transport for near-zero latency; return compressed JPEG not base64
|
||||
- **Status:** Covered by #984 (MCP restore)
|
||||
|
||||
### 4. Human + AI Co-op Architecture (Game-Specific)
|
||||
- Headless client treated identically to graphical client by server
|
||||
- Leverages party system, trade API, and /tell for communication
|
||||
- Mode switching: solo autonomous play when human absent, assist when present
|
||||
- **Status:** Defer until after tutorial completion
|
||||
|
||||
### 5. Real Latency Numbers
|
||||
- All-local M3 Max pipeline: 4-9 seconds per full cycle
|
||||
- Groq hybrid pipeline: 3-7 seconds per full cycle
|
||||
- VLM inference is 50-70% of total pipeline time (bottleneck)
|
||||
- Dual-model Ollama on 96GB M3 Max: ~11-14GB, ~70GB free
|
||||
- **Status:** Superseded by API-first perception (#963)
|
||||
|
||||
### 6. Content Moderation (Three-Layer Defense)
|
||||
- Layer 1: Game-context system prompts (Morrowind themes as game mechanics)
|
||||
- Layer 2: Llama Guard 3 1B at <30ms/sentence for real-time filtering
|
||||
- Layer 3: Per-game moderation profiles with vocabulary whitelists
|
||||
- Run moderation + TTS preprocessing in parallel for zero added latency
|
||||
- Neuro-sama incident (Dec 2022) is the cautionary tale
|
||||
- **Status:** New issue created → #1056
|
||||
|
||||
### 7. Model Selection (Qwen3-8B vs Hermes 3)
|
||||
- Three-role architecture: Perception (Qwen3-VL 8B), Decision (Qwen3-8B), Narration (Hermes 3 8B)
|
||||
- Qwen3-8B outperforms Qwen2.5-14B on 15 benchmarks
|
||||
- Hermes 3 best for narration (steerability, roleplaying)
|
||||
- Both use identical Hermes Function Calling standard
|
||||
- **Status:** Partially covered by #966 (three-tier router)
|
||||
|
||||
### 8. Split Hetzner + Mac Deployment
|
||||
- Hetzner GEX44 (RTX 4000 SFF Ada, €184/month) for rendering/streaming
|
||||
- Mac M3 Max for all AI inference via Tailscale
|
||||
- Use FFmpeg x11grab + NVENC, not OBS (no headless support)
|
||||
- Use headless Xorg, not Xvfb (GPU access required for Vulkan)
|
||||
- Total cost: ~$200/month
|
||||
- **Status:** Referenced in #982 sprint plan
|
||||
|
||||
## Cross-Reference to Active Issues
|
||||
|
||||
| Research Topic | Active Issue | Status |
|
||||
|---------------|-------------|--------|
|
||||
| Pydantic structured output for Ollama | #966 (three-tier router) | In progress |
|
||||
| FastMCP tool server | #984 (MCP restore) | In progress |
|
||||
| Content moderation pipeline | #1056 (new) | Created from this research |
|
||||
| Split Hetzner + Mac deployment | #982 (sprint plan) | Referenced |
|
||||
| VLM latency / perception | #963 (perception bottleneck) | API-first approach |
|
||||
| OpenMW bridge (replaces Veloren sidecar) | #964 | In progress |
|
||||
912
docs/research/openclaw-architecture-deployment-guide.md
Normal file
912
docs/research/openclaw-architecture-deployment-guide.md
Normal file
@@ -0,0 +1,912 @@
|
||||
# OpenClaw Architecture, Deployment Modes, and Ollama Integration
|
||||
|
||||
## Research Report for Timmy Time Dashboard Project
|
||||
|
||||
**Issue:** #721 — [Kimi Research] OpenClaw architecture, deployment modes, and Ollama integration
|
||||
**Date:** 2026-03-21
|
||||
**Author:** Kimi (Moonshot AI)
|
||||
**Status:** Complete
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
OpenClaw is an open-source AI agent framework that bridges messaging platforms (WhatsApp, Telegram, Slack, Discord, iMessage) to AI coding agents through a centralized gateway. Originally known as Clawdbot and Moltbot, it was rebranded to OpenClaw in early 2026. This report provides a comprehensive analysis of OpenClaw's architecture, deployment options, Ollama integration capabilities, and suitability for deployment on resource-constrained VPS environments like the Hermes DigitalOcean droplet (2GB RAM / 1 vCPU).
|
||||
|
||||
**Key Finding:** Running OpenClaw with local LLMs on a 2GB RAM VPS is **not recommended**. The absolute minimum for a text-only agent with external API models is 4GB RAM. For local model inference via Ollama, 8-16GB RAM is the practical minimum. A hybrid approach using OpenRouter as the primary provider with Ollama as fallback is the most viable configuration for small VPS deployments.
|
||||
|
||||
---
|
||||
|
||||
## 1. Architecture Overview
|
||||
|
||||
### 1.1 Core Components
|
||||
|
||||
OpenClaw follows a **hub-and-spoke (轴辐式)** architecture optimized for multi-agent task execution:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ OPENCLAW ARCHITECTURE │
|
||||
├─────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ WhatsApp │ │ Telegram │ │ Discord │ │
|
||||
│ │ Channel │ │ Channel │ │ Channel │ │
|
||||
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
|
||||
│ │ │ │ │
|
||||
│ └────────────────────┼────────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────┐ │
|
||||
│ │ Gateway │◄─────── WebSocket/API │
|
||||
│ │ (Port 18789) │ Control Plane │
|
||||
│ └────────┬─────────┘ │
|
||||
│ │ │
|
||||
│ ┌──────────────┼──────────────┐ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Agent A │ │ Agent B │ │ Pi Agent│ │
|
||||
│ │ (main) │ │ (coder) │ │(delegate)│ │
|
||||
│ └────┬─────┘ └────┬─────┘ └────┬─────┘ │
|
||||
│ │ │ │ │
|
||||
│ └──────────────┼──────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌────────────────────────┐ │
|
||||
│ │ LLM Router │ │
|
||||
│ │ (Primary/Fallback) │ │
|
||||
│ └───────────┬────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────┼─────────────────┐ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
|
||||
│ │ Ollama │ │ OpenAI │ │Anthropic│ │
|
||||
│ │(local) │ │(cloud) │ │(cloud) │ │
|
||||
│ └─────────┘ └─────────┘ └─────────┘ │
|
||||
│ │ ┌─────┐ │
|
||||
│ └────────────────────────────────────────────────────►│ MCP │ │
|
||||
│ │Tools│ │
|
||||
│ └─────┘ │
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Memory │ │ Skills │ │ Workspace │ │
|
||||
│ │ (SOUL.md) │ │ (SKILL.md) │ │ (sessions) │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 1.2 Component Deep Dive
|
||||
|
||||
| Component | Purpose | Configuration File |
|
||||
|-----------|---------|-------------------|
|
||||
| **Gateway** | Central control plane, WebSocket/API server, session management | `gateway` section in `openclaw.json` |
|
||||
| **Pi Agent** | Core agent runner, "指挥中心" - schedules LLM calls, tool execution, error handling | `agents` section in `openclaw.json` |
|
||||
| **Channels** | Messaging platform integrations (Telegram, WhatsApp, Slack, Discord, iMessage) | `channels` section in `openclaw.json` |
|
||||
| **SOUL.md** | Agent persona definition - personality, communication style, behavioral guidelines | `~/.openclaw/workspace/SOUL.md` |
|
||||
| **AGENTS.md** | Multi-agent configuration, routing rules, agent specialization definitions | `~/.openclaw/workspace/AGENTS.md` |
|
||||
| **Workspace** | File system for agent state, session data, temporary files | `~/.openclaw/workspace/` |
|
||||
| **Skills** | Bundled tools, prompts, configurations that teach agents specific tasks | `~/.openclaw/workspace/skills/` |
|
||||
| **Sessions** | Conversation history, context persistence between interactions | `~/.openclaw/agents/<agent>/sessions/` |
|
||||
| **MCP Tools** | Model Context Protocol integration for external tool access | Via `mcporter` or native MCP |
|
||||
|
||||
### 1.3 Agent Runner Execution Flow
|
||||
|
||||
According to OpenClaw documentation, a complete agent run follows these stages:
|
||||
|
||||
1. **Queuing** - Session-level queue (serializes same-session requests) → Global queue (controls total concurrency)
|
||||
2. **Preparation** - Parse workspace, provider/model, thinking level parameters
|
||||
3. **Plugin Loading** - Load relevant skills based on task context
|
||||
4. **Memory Retrieval** - Fetch relevant context from SOUL.md and conversation history
|
||||
5. **LLM Inference** - Send prompt to configured provider with tool definitions
|
||||
6. **Tool Execution** - Execute any tool calls returned by the LLM
|
||||
7. **Response Generation** - Format and return final response to the channel
|
||||
8. **Memory Storage** - Persist conversation and results to session storage
|
||||
|
||||
---
|
||||
|
||||
## 2. Deployment Modes
|
||||
|
||||
### 2.1 Comparison Matrix
|
||||
|
||||
| Deployment Mode | Best For | Setup Complexity | Resource Overhead | Stability |
|
||||
|----------------|----------|------------------|-------------------|-----------|
|
||||
| **npm global** | Development, quick testing | Low | Minimal (~200MB) | Moderate |
|
||||
| **Docker** | Production, isolation, reproducibility | Medium | Higher (~2.5GB base image) | High |
|
||||
| **Docker Compose** | Multi-service stacks, complex setups | Medium-High | Higher | High |
|
||||
| **Bare metal/systemd** | Maximum performance, dedicated hardware | High | Minimal | Moderate |
|
||||
|
||||
### 2.2 NPM Global Installation (Recommended for Quick Start)
|
||||
|
||||
```bash
|
||||
# One-line installer
|
||||
curl -fsSL https://openclaw.ai/install.sh | bash
|
||||
|
||||
# Or manual npm install
|
||||
npm install -g openclaw
|
||||
|
||||
# Initialize configuration
|
||||
openclaw onboard
|
||||
|
||||
# Start gateway
|
||||
openclaw gateway
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Fastest setup (~30 seconds)
|
||||
- Direct access to host resources
|
||||
- Easy updates via `npm update -g openclaw`
|
||||
|
||||
**Cons:**
|
||||
- Node.js 22+ dependency required
|
||||
- No process isolation
|
||||
- Manual dependency management
|
||||
|
||||
### 2.3 Docker Deployment (Recommended for Production)
|
||||
|
||||
```bash
|
||||
# Pull and run
|
||||
docker pull openclaw/openclaw:latest
|
||||
docker run -d \
|
||||
--name openclaw \
|
||||
-p 127.0.0.1:18789:18789 \
|
||||
-v ~/.openclaw:/root/.openclaw \
|
||||
-e ANTHROPIC_API_KEY=sk-ant-... \
|
||||
openclaw/openclaw:latest
|
||||
|
||||
# Or with Docker Compose
|
||||
docker compose -f compose.yml --env-file .env up -d --build
|
||||
```
|
||||
|
||||
**Docker Compose Configuration (production-ready):**
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
services:
|
||||
openclaw:
|
||||
image: openclaw/openclaw:latest
|
||||
container_name: openclaw
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:18789:18789" # Never expose to 0.0.0.0
|
||||
volumes:
|
||||
- ./openclaw-data:/root/.openclaw
|
||||
- ./workspace:/root/.openclaw/workspace
|
||||
environment:
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- OLLAMA_API_KEY=ollama-local
|
||||
networks:
|
||||
- openclaw-net
|
||||
# Resource limits for small VPS
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.5'
|
||||
memory: 3G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 1G
|
||||
|
||||
networks:
|
||||
openclaw-net:
|
||||
driver: bridge
|
||||
```
|
||||
|
||||
### 2.4 Bare Metal / Systemd Installation
|
||||
|
||||
For running as a system service on Linux:
|
||||
|
||||
```bash
|
||||
# Create systemd service
|
||||
sudo tee /etc/systemd/system/openclaw.service > /dev/null <<EOF
|
||||
[Unit]
|
||||
Description=OpenClaw Gateway
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=openclaw
|
||||
Group=openclaw
|
||||
WorkingDirectory=/home/openclaw
|
||||
Environment="PATH=/usr/local/bin:/usr/bin:/bin"
|
||||
Environment="NODE_ENV=production"
|
||||
Environment="ANTHROPIC_API_KEY=sk-ant-..."
|
||||
ExecStart=/usr/local/bin/openclaw gateway
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable openclaw
|
||||
sudo systemctl start openclaw
|
||||
```
|
||||
|
||||
### 2.5 Recommended Deployment for 2GB RAM VPS
|
||||
|
||||
**⚠️ Critical Finding:** OpenClaw's official minimum is 4GB RAM. On a 2GB VPS:
|
||||
|
||||
1. **Do NOT run local LLMs** - Use external API providers exclusively
|
||||
2. **Use npm installation** - Docker overhead is too heavy
|
||||
3. **Disable browser automation** - Chromium requires 2-4GB alone
|
||||
4. **Enable swap** - Critical for preventing OOM kills
|
||||
5. **Use OpenRouter** - Cheap/free tier models reduce costs
|
||||
|
||||
**Setup script for 2GB VPS:**
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# openclaw-minimal-vps.sh
|
||||
# Setup for 2GB RAM VPS - EXTERNAL API ONLY
|
||||
|
||||
# Create 4GB swap
|
||||
sudo fallocate -l 4G /swapfile
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
|
||||
|
||||
# Install Node.js 22
|
||||
curl -fsSL https://deb.nodesource.com/setup_22.x | sudo bash -
|
||||
sudo apt-get install -y nodejs
|
||||
|
||||
# Install OpenClaw
|
||||
npm install -g openclaw
|
||||
|
||||
# Configure for minimal resource usage
|
||||
mkdir -p ~/.openclaw
|
||||
cat > ~/.openclaw/openclaw.json <<'EOF'
|
||||
{
|
||||
"gateway": {
|
||||
"bind": "127.0.0.1",
|
||||
"port": 18789,
|
||||
"mode": "local"
|
||||
},
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "openrouter/google/gemma-3-4b-it:free",
|
||||
"fallbacks": [
|
||||
"openrouter/meta/llama-3.1-8b-instruct:free"
|
||||
]
|
||||
},
|
||||
"maxIterations": 15,
|
||||
"timeout": 120
|
||||
}
|
||||
},
|
||||
"channels": {
|
||||
"telegram": {
|
||||
"enabled": true,
|
||||
"dmPolicy": "pairing"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# Set OpenRouter API key
|
||||
export OPENROUTER_API_KEY="sk-or-v1-..."
|
||||
|
||||
# Start gateway
|
||||
openclaw gateway &
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Ollama Integration
|
||||
|
||||
### 3.1 Architecture
|
||||
|
||||
OpenClaw integrates with Ollama through its native `/api/chat` endpoint, supporting both streaming responses and tool calling simultaneously:
|
||||
|
||||
```
|
||||
┌──────────────┐ HTTP/JSON ┌──────────────┐ GGUF/CPU/GPU ┌──────────┐
|
||||
│ OpenClaw │◄───────────────────►│ Ollama │◄────────────────────►│ Local │
|
||||
│ Gateway │ /api/chat │ Server │ Model inference │ LLM │
|
||||
│ │ Port 11434 │ Port 11434 │ │ │
|
||||
└──────────────┘ └──────────────┘ └──────────┘
|
||||
```
|
||||
|
||||
### 3.2 Configuration
|
||||
|
||||
**Basic Ollama Setup:**
|
||||
|
||||
```bash
|
||||
# Install Ollama
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
# Start server
|
||||
ollama serve
|
||||
|
||||
# Pull a tool-capable model
|
||||
ollama pull qwen2.5-coder:7b
|
||||
ollama pull llama3.1:8b
|
||||
|
||||
# Configure OpenClaw
|
||||
export OLLAMA_API_KEY="ollama-local" # Any non-empty string works
|
||||
```
|
||||
|
||||
**OpenClaw Configuration for Ollama:**
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"providers": {
|
||||
"ollama": {
|
||||
"baseUrl": "http://localhost:11434",
|
||||
"apiKey": "ollama-local",
|
||||
"api": "ollama",
|
||||
"models": [
|
||||
{
|
||||
"id": "qwen2.5-coder:7b",
|
||||
"name": "Qwen 2.5 Coder 7B",
|
||||
"contextWindow": 32768,
|
||||
"maxTokens": 8192,
|
||||
"cost": { "input": 0, "output": 0 }
|
||||
},
|
||||
{
|
||||
"id": "llama3.1:8b",
|
||||
"name": "Llama 3.1 8B",
|
||||
"contextWindow": 128000,
|
||||
"maxTokens": 8192,
|
||||
"cost": { "input": 0, "output": 0 }
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "ollama/qwen2.5-coder:7b",
|
||||
"fallbacks": ["ollama/llama3.1:8b"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3.3 Context Window Requirements
|
||||
|
||||
**⚠️ Critical Requirement:** OpenClaw requires a minimum **64K token context window** for reliable multi-step task execution.
|
||||
|
||||
| Model | Parameters | Context Window | Tool Support | OpenClaw Compatible |
|
||||
|-------|-----------|----------------|--------------|---------------------|
|
||||
| **llama3.1** | 8B | 128K | ✅ Yes | ✅ Yes |
|
||||
| **qwen2.5-coder** | 7B | 32K | ✅ Yes | ⚠️ Below minimum |
|
||||
| **qwen2.5-coder** | 32B | 128K | ✅ Yes | ✅ Yes |
|
||||
| **gpt-oss** | 20B | 128K | ✅ Yes | ✅ Yes |
|
||||
| **glm-4.7-flash** | - | 128K | ✅ Yes | ✅ Yes |
|
||||
| **deepseek-coder-v2** | 33B | 128K | ✅ Yes | ✅ Yes |
|
||||
| **mistral-small3.1** | - | 128K | ✅ Yes | ✅ Yes |
|
||||
|
||||
**Context Window Configuration:**
|
||||
|
||||
For models that don't report context window via Ollama's API:
|
||||
|
||||
```bash
|
||||
# Create custom Modelfile with extended context
|
||||
cat > ~/qwen-custom.modelfile <<EOF
|
||||
FROM qwen2.5-coder:7b
|
||||
PARAMETER num_ctx 65536
|
||||
PARAMETER temperature 0.7
|
||||
EOF
|
||||
|
||||
# Create custom model
|
||||
ollama create qwen2.5-coder-64k -f ~/qwen-custom.modelfile
|
||||
```
|
||||
|
||||
### 3.4 Models for Small VPS (≤8B Parameters)
|
||||
|
||||
For resource-constrained environments (2-4GB RAM):
|
||||
|
||||
| Model | Quantization | RAM Required | VRAM Required | Performance |
|
||||
|-------|-------------|--------------|---------------|-------------|
|
||||
| **Llama 3.1 8B** | Q4_K_M | ~5GB | ~6GB | Good |
|
||||
| **Llama 3.2 3B** | Q4_K_M | ~2.5GB | ~3GB | Basic |
|
||||
| **Qwen 2.5 7B** | Q4_K_M | ~5GB | ~6GB | Good |
|
||||
| **Qwen 2.5 3B** | Q4_K_M | ~2.5GB | ~3GB | Basic |
|
||||
| **DeepSeek 7B** | Q4_K_M | ~5GB | ~6GB | Good |
|
||||
| **Phi-4 4B** | Q4_K_M | ~3GB | ~4GB | Moderate |
|
||||
|
||||
**⚠️ Verdict for 2GB VPS:** Running local LLMs is **NOT viable**. Use external APIs only.
|
||||
|
||||
---
|
||||
|
||||
## 4. OpenRouter Integration (Fallback Strategy)
|
||||
|
||||
### 4.1 Overview
|
||||
|
||||
OpenRouter provides a unified API gateway to multiple LLM providers, enabling:
|
||||
- Single API key access to 200+ models
|
||||
- Automatic failover between providers
|
||||
- Free tier models for cost-conscious deployments
|
||||
- Unified billing and usage tracking
|
||||
|
||||
### 4.2 Configuration
|
||||
|
||||
**Environment Variable Setup:**
|
||||
|
||||
```bash
|
||||
export OPENROUTER_API_KEY="sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
```
|
||||
|
||||
**OpenClaw Configuration:**
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"apiKey": "${OPENROUTER_API_KEY}",
|
||||
"baseUrl": "https://openrouter.ai/api/v1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "openrouter/anthropic/claude-sonnet-4-6",
|
||||
"fallbacks": [
|
||||
"openrouter/google/gemini-3.1-pro",
|
||||
"openrouter/meta/llama-3.3-70b-instruct",
|
||||
"openrouter/google/gemma-3-4b-it:free"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4.3 Recommended Free/Cheap Models on OpenRouter
|
||||
|
||||
For cost-conscious VPS deployments:
|
||||
|
||||
| Model | Cost | Context | Best For |
|
||||
|-------|------|---------|----------|
|
||||
| **google/gemma-3-4b-it:free** | Free | 128K | General tasks, simple automation |
|
||||
| **meta/llama-3.1-8b-instruct:free** | Free | 128K | General tasks, longer contexts |
|
||||
| **deepseek/deepseek-chat-v3.2** | $0.53/M | 64K | Code generation, reasoning |
|
||||
| **xiaomi/mimo-v2-flash** | $0.40/M | 128K | Fast responses, basic tasks |
|
||||
| **qwen/qwen3-coder-next** | $1.20/M | 128K | Code-focused tasks |
|
||||
|
||||
### 4.4 Hybrid Configuration (Recommended for Timmy)
|
||||
|
||||
A production-ready configuration for the Hermes VPS:
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"apiKey": "${OPENROUTER_API_KEY}",
|
||||
"models": [
|
||||
{
|
||||
"id": "google/gemma-3-4b-it:free",
|
||||
"name": "Gemma 3 4B (Free)",
|
||||
"contextWindow": 131072,
|
||||
"maxTokens": 8192,
|
||||
"cost": { "input": 0, "output": 0 }
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-chat-v3.2",
|
||||
"name": "DeepSeek V3.2",
|
||||
"contextWindow": 64000,
|
||||
"maxTokens": 8192,
|
||||
"cost": { "input": 0.00053, "output": 0.00053 }
|
||||
}
|
||||
]
|
||||
},
|
||||
"ollama": {
|
||||
"baseUrl": "http://localhost:11434",
|
||||
"apiKey": "ollama-local",
|
||||
"models": [
|
||||
{
|
||||
"id": "llama3.2:3b",
|
||||
"name": "Llama 3.2 3B (Local Fallback)",
|
||||
"contextWindow": 128000,
|
||||
"maxTokens": 4096,
|
||||
"cost": { "input": 0, "output": 0 }
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "openrouter/google/gemma-3-4b-it:free",
|
||||
"fallbacks": [
|
||||
"openrouter/deepseek/deepseek-chat-v3.2",
|
||||
"ollama/llama3.2:3b"
|
||||
]
|
||||
},
|
||||
"maxIterations": 10,
|
||||
"timeout": 90
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Hardware Constraints & VPS Viability
|
||||
|
||||
### 5.1 System Requirements Summary
|
||||
|
||||
| Component | Minimum | Recommended | Notes |
|
||||
|-----------|---------|-------------|-------|
|
||||
| **CPU** | 2 vCPU | 4 vCPU | Dedicated preferred over shared |
|
||||
| **RAM** | 4 GB | 8 GB | 2GB causes OOM with external APIs |
|
||||
| **Storage** | 40 GB SSD | 80 GB NVMe | Docker images are ~10-15GB |
|
||||
| **Network** | 100 Mbps | 1 Gbps | For API calls and model downloads |
|
||||
| **OS** | Ubuntu 22.04/Debian 12 | Ubuntu 24.04 LTS | Linux required for production |
|
||||
|
||||
### 5.2 2GB RAM VPS Analysis
|
||||
|
||||
**Can it work?** Yes, with severe limitations:
|
||||
|
||||
✅ **What works:**
|
||||
- Text-only agents with external API providers
|
||||
- Single Telegram/Discord channel
|
||||
- Basic file operations and shell commands
|
||||
- No browser automation
|
||||
|
||||
❌ **What doesn't work:**
|
||||
- Local LLM inference via Ollama
|
||||
- Browser automation (Chromium needs 2-4GB)
|
||||
- Multiple concurrent channels
|
||||
- Python environment-heavy skills
|
||||
|
||||
**Required mitigations for 2GB VPS:**
|
||||
|
||||
```bash
|
||||
# 1. Create substantial swap
|
||||
sudo fallocate -l 4G /swapfile
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
|
||||
# 2. Configure swappiness
|
||||
echo 'vm.swappiness=60' | sudo tee -a /etc/sysctl.conf
|
||||
sudo sysctl -p
|
||||
|
||||
# 3. Limit Node.js memory
|
||||
export NODE_OPTIONS="--max-old-space-size=1536"
|
||||
|
||||
# 4. Use external APIs only - NO OLLAMA
|
||||
# 5. Disable browser skills
|
||||
# 6. Set conservative concurrency limits
|
||||
```
|
||||
|
||||
### 5.3 4-bit Quantization Viability
|
||||
|
||||
**Qwen 2.5 7B Q4_K_M on 2GB VPS:**
|
||||
- Model size: ~4.5GB
|
||||
- RAM required at runtime: ~5-6GB
|
||||
- **Verdict:** Will cause immediate OOM on 2GB VPS
|
||||
- **Even with 4GB VPS:** Marginal, heavy swap usage, poor performance
|
||||
|
||||
**Viable models for 4GB VPS with Ollama:**
|
||||
- Llama 3.2 3B Q4_K_M (~2.5GB RAM)
|
||||
- Qwen 2.5 3B Q4_K_M (~2.5GB RAM)
|
||||
- Phi-4 4B Q4_K_M (~3GB RAM)
|
||||
|
||||
---
|
||||
|
||||
## 6. Security Configuration
|
||||
|
||||
### 6.1 Network Ports
|
||||
|
||||
| Port | Purpose | Exposure |
|
||||
|------|---------|----------|
|
||||
| **18789/tcp** | OpenClaw Gateway (WebSocket/HTTP) | **NEVER expose to internet** |
|
||||
| **11434/tcp** | Ollama API (if running locally) | Localhost only |
|
||||
| **22/tcp** | SSH | Restrict to known IPs |
|
||||
|
||||
**⚠️ CRITICAL:** Never expose port 18789 to the public internet. Use Tailscale or SSH tunnels for remote access.
|
||||
|
||||
### 6.2 Tailscale Integration
|
||||
|
||||
Tailscale provides zero-configuration VPN mesh for secure remote access:
|
||||
|
||||
```bash
|
||||
# Install Tailscale
|
||||
curl -fsSL https://tailscale.com/install.sh | sh
|
||||
sudo tailscale up
|
||||
|
||||
# Get Tailscale IP
|
||||
tailscale ip
|
||||
# Returns: 100.x.y.z
|
||||
|
||||
# Configure OpenClaw to bind to Tailscale
|
||||
cat > ~/.openclaw/openclaw.json <<EOF
|
||||
{
|
||||
"gateway": {
|
||||
"bind": "tailnet",
|
||||
"port": 18789
|
||||
},
|
||||
"tailscale": {
|
||||
"mode": "on",
|
||||
"resetOnExit": false
|
||||
}
|
||||
}
|
||||
EOF
|
||||
```
|
||||
|
||||
**Tailscale vs SSH Tunnel:**
|
||||
|
||||
| Feature | Tailscale | SSH Tunnel |
|
||||
|---------|-----------|------------|
|
||||
| Setup | Very easy | Moderate |
|
||||
| Persistence | Automatic | Requires autossh |
|
||||
| Multiple devices | Built-in | One tunnel per connection |
|
||||
| NAT traversal | Works | Requires exposed SSH |
|
||||
| Access control | Tailscale ACL | SSH keys |
|
||||
|
||||
### 6.3 Firewall Configuration (UFW)
|
||||
|
||||
```bash
|
||||
# Default deny
|
||||
sudo ufw default deny incoming
|
||||
sudo ufw default allow outgoing
|
||||
|
||||
# Allow SSH
|
||||
sudo ufw allow 22/tcp
|
||||
|
||||
# Allow Tailscale only (if using)
|
||||
sudo ufw allow in on tailscale0 to any port 18789
|
||||
|
||||
# Block public access to OpenClaw
|
||||
# (bind is 127.0.0.1, so this is defense in depth)
|
||||
|
||||
sudo ufw enable
|
||||
```
|
||||
|
||||
### 6.4 Authentication Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"gateway": {
|
||||
"bind": "127.0.0.1",
|
||||
"port": 18789,
|
||||
"auth": {
|
||||
"mode": "token",
|
||||
"token": "your-64-char-hex-token-here"
|
||||
},
|
||||
"controlUi": {
|
||||
"allowedOrigins": [
|
||||
"http://localhost:18789",
|
||||
"https://your-domain.tailnet-name.ts.net"
|
||||
],
|
||||
"allowInsecureAuth": false,
|
||||
"dangerouslyDisableDeviceAuth": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Generate secure token:**
|
||||
|
||||
```bash
|
||||
openssl rand -hex 32
|
||||
```
|
||||
|
||||
### 6.5 Sandboxing Considerations
|
||||
|
||||
OpenClaw executes arbitrary shell commands and file operations by default. For production:
|
||||
|
||||
1. **Run as non-root user:**
|
||||
```bash
|
||||
sudo useradd -r -s /bin/false openclaw
|
||||
sudo mkdir -p /home/openclaw/.openclaw
|
||||
sudo chown -R openclaw:openclaw /home/openclaw
|
||||
```
|
||||
|
||||
2. **Use Docker for isolation:**
|
||||
```bash
|
||||
docker run --security-opt=no-new-privileges \
|
||||
--cap-drop=ALL \
|
||||
--read-only \
|
||||
--tmpfs /tmp:noexec,nosuid,size=100m \
|
||||
openclaw/openclaw:latest
|
||||
```
|
||||
|
||||
3. **Enable dmPolicy for channels:**
|
||||
```json
|
||||
{
|
||||
"channels": {
|
||||
"telegram": {
|
||||
"dmPolicy": "pairing" // Require one-time code for new contacts
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. MCP (Model Context Protocol) Tools
|
||||
|
||||
### 7.1 Overview
|
||||
|
||||
MCP is an open standard created by Anthropic (donated to Linux Foundation in Dec 2025) that lets AI applications connect to external tools through a universal interface. Think of it as "USB-C for AI."
|
||||
|
||||
### 7.2 MCP vs OpenClaw Skills
|
||||
|
||||
| Aspect | MCP | OpenClaw Skills |
|
||||
|--------|-----|-----------------|
|
||||
| **Protocol** | Standardized (Anthropic) | OpenClaw-specific |
|
||||
| **Isolation** | Process-isolated | Runs in agent context |
|
||||
| **Security** | Higher (sandboxed) | Lower (full system access) |
|
||||
| **Discovery** | Automatic via protocol | Manual via SKILL.md |
|
||||
| **Ecosystem** | 10,000+ servers | 5400+ skills |
|
||||
|
||||
**Note:** OpenClaw currently has limited native MCP support. Use `mcporter` tool for MCP integration.
|
||||
|
||||
### 7.3 Using MCPorter (MCP Bridge)
|
||||
|
||||
```bash
|
||||
# Install mcporter
|
||||
clawhub install mcporter
|
||||
|
||||
# Configure MCP server
|
||||
mcporter config add github \
|
||||
--url "https://api.github.com/mcp" \
|
||||
--token "ghp_..."
|
||||
|
||||
# List available tools
|
||||
mcporter list
|
||||
|
||||
# Call MCP tool
|
||||
mcporter call github.list_repos --owner "rockachopa"
|
||||
```
|
||||
|
||||
### 7.4 Popular MCP Servers
|
||||
|
||||
| Server | Purpose | Integration |
|
||||
|--------|---------|-------------|
|
||||
| **GitHub** | Repo management, PRs, issues | `mcp-github` |
|
||||
| **Slack** | Messaging, channel management | `mcp-slack` |
|
||||
| **PostgreSQL** | Database queries | `mcp-postgres` |
|
||||
| **Filesystem** | File operations (sandboxed) | `mcp-filesystem` |
|
||||
| **Brave Search** | Web search | `mcp-brave` |
|
||||
|
||||
---
|
||||
|
||||
## 8. Recommendations for Timmy Time Dashboard
|
||||
|
||||
### 8.1 Deployment Strategy for Hermes VPS (2GB RAM)
|
||||
|
||||
Given the hardware constraints, here's the recommended approach:
|
||||
|
||||
**Option A: External API Only (Recommended)**
|
||||
```
|
||||
┌─────────────────────────────────────────┐
|
||||
│ Hermes VPS (2GB RAM) │
|
||||
│ ┌─────────────────────────────────┐ │
|
||||
│ │ OpenClaw Gateway │ │
|
||||
│ │ (npm global install) │ │
|
||||
│ └─────────────┬───────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────┐ │
|
||||
│ │ OpenRouter API (Free Tier) │ │
|
||||
│ │ google/gemma-3-4b-it:free │ │
|
||||
│ └─────────────────────────────────┘ │
|
||||
│ │
|
||||
│ NO OLLAMA - insufficient RAM │
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Option B: Hybrid with External Ollama**
|
||||
```
|
||||
┌──────────────────────┐ ┌──────────────────────────┐
|
||||
│ Hermes VPS (2GB) │ │ Separate Ollama Host │
|
||||
│ ┌────────────────┐ │ │ ┌────────────────────┐ │
|
||||
│ │ OpenClaw │ │◄────►│ │ Ollama Server │ │
|
||||
│ │ (external API) │ │ │ │ (8GB+ RAM required)│ │
|
||||
│ └────────────────┘ │ │ └────────────────────┘ │
|
||||
└──────────────────────┘ └──────────────────────────┘
|
||||
```
|
||||
|
||||
### 8.2 Configuration Summary
|
||||
|
||||
```json
|
||||
{
|
||||
"gateway": {
|
||||
"bind": "127.0.0.1",
|
||||
"port": 18789,
|
||||
"auth": {
|
||||
"mode": "token",
|
||||
"token": "GENERATE_WITH_OPENSSL_RAND"
|
||||
}
|
||||
},
|
||||
"models": {
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"apiKey": "${OPENROUTER_API_KEY}",
|
||||
"models": [
|
||||
{
|
||||
"id": "google/gemma-3-4b-it:free",
|
||||
"contextWindow": 131072,
|
||||
"maxTokens": 4096
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "openrouter/google/gemma-3-4b-it:free"
|
||||
},
|
||||
"maxIterations": 10,
|
||||
"timeout": 90,
|
||||
"maxConcurrent": 2
|
||||
}
|
||||
},
|
||||
"channels": {
|
||||
"telegram": {
|
||||
"enabled": true,
|
||||
"dmPolicy": "pairing"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8.3 Migration Path (Future)
|
||||
|
||||
When upgrading to a larger VPS (4-8GB RAM):
|
||||
|
||||
1. **Phase 1:** Enable Ollama with Llama 3.2 3B as fallback
|
||||
2. **Phase 2:** Add browser automation skills (requires 4GB+ RAM)
|
||||
3. **Phase 3:** Enable multi-agent routing with specialized agents
|
||||
4. **Phase 4:** Add MCP server integration for external tools
|
||||
|
||||
---
|
||||
|
||||
## 9. References
|
||||
|
||||
1. OpenClaw Official Documentation: https://docs.openclaw.ai
|
||||
2. Ollama Integration Guide: https://docs.ollama.com/integrations/openclaw
|
||||
3. OpenRouter Documentation: https://openrouter.ai/docs
|
||||
4. MCP Specification: https://modelcontextprotocol.io
|
||||
5. OpenClaw Community Discord: https://discord.gg/openclaw
|
||||
6. GitHub Repository: https://github.com/openclaw/openclaw
|
||||
|
||||
---
|
||||
|
||||
## 10. Appendix: Quick Command Reference
|
||||
|
||||
```bash
|
||||
# Installation
|
||||
curl -fsSL https://openclaw.ai/install.sh | bash
|
||||
|
||||
# Configuration
|
||||
openclaw onboard # Interactive setup
|
||||
openclaw configure # Edit config
|
||||
openclaw config set <key> <value> # Set specific value
|
||||
|
||||
# Gateway management
|
||||
openclaw gateway # Start gateway
|
||||
openclaw gateway --verbose # Start with logs
|
||||
openclaw gateway status # Check status
|
||||
openclaw gateway restart # Restart gateway
|
||||
openclaw gateway stop # Stop gateway
|
||||
|
||||
# Model management
|
||||
openclaw models list # List available models
|
||||
openclaw models set <model> # Set default model
|
||||
openclaw models status # Check model status
|
||||
|
||||
# Diagnostics
|
||||
openclaw doctor # System health check
|
||||
openclaw doctor --repair # Auto-fix issues
|
||||
openclaw security audit # Security check
|
||||
|
||||
# Dashboard
|
||||
openclaw dashboard # Open web UI
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*End of Research Report*
|
||||
726
poetry.lock
generated
726
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -20,6 +20,7 @@ packages = [
|
||||
{ include = "spark", from = "src" },
|
||||
{ include = "timmy", from = "src" },
|
||||
{ include = "timmy_serve", from = "src" },
|
||||
{ include = "timmyctl", from = "src" },
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
@@ -49,6 +50,7 @@ sounddevice = { version = ">=0.4.6", optional = true }
|
||||
sentence-transformers = { version = ">=2.0.0", optional = true }
|
||||
numpy = { version = ">=1.24.0", optional = true }
|
||||
requests = { version = ">=2.31.0", optional = true }
|
||||
trafilatura = { version = ">=1.6.0", optional = true }
|
||||
GitPython = { version = ">=3.1.40", optional = true }
|
||||
pytest = { version = ">=8.0.0", optional = true }
|
||||
pytest-asyncio = { version = ">=0.24.0", optional = true }
|
||||
@@ -66,6 +68,7 @@ voice = ["pyttsx3", "openai-whisper", "piper-tts", "sounddevice"]
|
||||
celery = ["celery"]
|
||||
embeddings = ["sentence-transformers", "numpy"]
|
||||
git = ["GitPython"]
|
||||
research = ["requests", "trafilatura", "google-search-results"]
|
||||
dev = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-timeout", "pytest-randomly", "pytest-xdist", "selenium"]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
@@ -82,6 +85,7 @@ mypy = ">=1.0.0"
|
||||
[tool.poetry.scripts]
|
||||
timmy = "timmy.cli:main"
|
||||
timmy-serve = "timmy_serve.cli:main"
|
||||
timmyctl = "timmyctl.cli:main"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -17,8 +17,23 @@ REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
||||
SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json"
|
||||
|
||||
GITEA_API = "http://localhost:3000/api/v1"
|
||||
REPO_SLUG = "rockachopa/Timmy-time-dashboard"
|
||||
|
||||
def _get_gitea_api() -> str:
|
||||
"""Read Gitea API URL from env var, then ~/.hermes/gitea_api file, then default."""
|
||||
# Check env vars first (TIMMY_GITEA_API is preferred, GITEA_API for compatibility)
|
||||
api_url = os.environ.get("TIMMY_GITEA_API") or os.environ.get("GITEA_API")
|
||||
if api_url:
|
||||
return api_url
|
||||
# Check ~/.hermes/gitea_api file
|
||||
api_file = Path.home() / ".hermes" / "gitea_api"
|
||||
if api_file.exists():
|
||||
return api_file.read_text().strip()
|
||||
# Default fallback
|
||||
return "http://localhost:3000/api/v1"
|
||||
|
||||
|
||||
GITEA_API = _get_gitea_api()
|
||||
REPO_SLUG = os.environ.get("REPO_SLUG", "rockachopa/Timmy-time-dashboard")
|
||||
TOKEN_FILE = Path.home() / ".hermes" / "gitea_token"
|
||||
|
||||
TAG_RE = re.compile(r"\[([^\]]+)\]")
|
||||
|
||||
186
scripts/claude_quota_check.sh
Executable file
186
scripts/claude_quota_check.sh
Executable file
@@ -0,0 +1,186 @@
|
||||
#!/bin/bash
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# claude_quota_check.sh — Check Claude Code / Claude.ai quota
|
||||
#
|
||||
# Usage:
|
||||
# ./claude_quota_check.sh # Human-readable output
|
||||
# ./claude_quota_check.sh --json # Raw JSON for piping
|
||||
# ./claude_quota_check.sh --watch # Refresh every 60s
|
||||
#
|
||||
# Requires: macOS with Claude Code authenticated, python3
|
||||
# Token is read from macOS Keychain (same as Claude Code uses)
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Extract OAuth token from macOS Keychain ──
|
||||
get_token() {
|
||||
local creds
|
||||
creds=$(security find-generic-password -s "Claude Code-credentials" -w 2>/dev/null) || {
|
||||
echo "ERROR: No Claude Code credentials found in Keychain." >&2
|
||||
echo "Run 'claude' and authenticate first." >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "$creds" | python3 -c "
|
||||
import sys, json
|
||||
data = json.load(sys.stdin)
|
||||
oauth = data.get('claudeAiOauth', data)
|
||||
print(oauth['accessToken'])
|
||||
" 2>/dev/null || {
|
||||
echo "ERROR: Could not parse credentials JSON." >&2
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
# ── Fetch usage from Anthropic API ──
|
||||
fetch_usage() {
|
||||
local token="$1"
|
||||
curl -s "https://api.anthropic.com/api/oauth/usage" \
|
||||
-H "Accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "User-Agent: claude-code/2.0.32" \
|
||||
-H "Authorization: Bearer ${token}" \
|
||||
-H "anthropic-beta: oauth-2025-04-20"
|
||||
}
|
||||
|
||||
# ── Format time remaining ──
|
||||
time_remaining() {
|
||||
local reset_at="$1"
|
||||
if [ -z "$reset_at" ] || [ "$reset_at" = "null" ]; then
|
||||
echo "unknown"
|
||||
return
|
||||
fi
|
||||
|
||||
python3 -c "
|
||||
from datetime import datetime, timezone
|
||||
reset = datetime.fromisoformat('${reset_at}'.replace('Z', '+00:00'))
|
||||
now = datetime.now(timezone.utc)
|
||||
diff = reset - now
|
||||
if diff.total_seconds() <= 0:
|
||||
print('resetting now')
|
||||
else:
|
||||
hours = int(diff.total_seconds() // 3600)
|
||||
mins = int((diff.total_seconds() % 3600) // 60)
|
||||
if hours > 0:
|
||||
print(f'{hours}h {mins}m')
|
||||
else:
|
||||
print(f'{mins}m')
|
||||
" 2>/dev/null || echo "unknown"
|
||||
}
|
||||
|
||||
# ── Bar visualization ──
|
||||
usage_bar() {
|
||||
local pct=$1
|
||||
local width=30
|
||||
local filled
|
||||
filled=$(python3 -c "print(int(${pct} * ${width}))")
|
||||
local empty=$((width - filled))
|
||||
|
||||
# Color: green < 50%, yellow 50-80%, red > 80%
|
||||
local color=""
|
||||
if (( $(echo "$pct < 0.50" | bc -l) )); then
|
||||
color="\033[32m" # green
|
||||
elif (( $(echo "$pct < 0.80" | bc -l) )); then
|
||||
color="\033[33m" # yellow
|
||||
else
|
||||
color="\033[31m" # red
|
||||
fi
|
||||
|
||||
printf "${color}"
|
||||
for ((i=0; i<filled; i++)); do printf "█"; done
|
||||
printf "\033[90m"
|
||||
for ((i=0; i<empty; i++)); do printf "░"; done
|
||||
printf "\033[0m"
|
||||
}
|
||||
|
||||
# ── Display formatted output ──
|
||||
display() {
|
||||
local usage_json="$1"
|
||||
local now
|
||||
now=$(date "+%Y-%m-%d %H:%M:%S %Z")
|
||||
|
||||
local five_util five_reset seven_util seven_reset
|
||||
five_util=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('five_hour') or {}; print(h.get('utilization', 0))" 2>/dev/null || echo "0")
|
||||
five_reset=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('five_hour') or {}; print(h.get('resets_at', 'null'))" 2>/dev/null || echo "null")
|
||||
seven_util=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('seven_day') or {}; print(h.get('utilization', 0))" 2>/dev/null || echo "0")
|
||||
seven_reset=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('seven_day') or {}; print(h.get('resets_at', 'null'))" 2>/dev/null || echo "null")
|
||||
|
||||
local five_pct seven_pct
|
||||
five_pct=$(python3 -c "print(int(float('${five_util}') * 100))")
|
||||
seven_pct=$(python3 -c "print(int(float('${seven_util}') * 100))")
|
||||
|
||||
local five_remaining seven_remaining
|
||||
five_remaining=$(time_remaining "$five_reset")
|
||||
seven_remaining=$(time_remaining "$seven_reset")
|
||||
|
||||
echo ""
|
||||
echo " ┌─────────────────────────────────────────────┐"
|
||||
echo " │ CLAUDE QUOTA STATUS │"
|
||||
printf " │ %-38s│\n" "$now"
|
||||
echo " ├─────────────────────────────────────────────┤"
|
||||
printf " │ 5-hour window: "
|
||||
usage_bar "$five_util"
|
||||
printf " %3d%% │\n" "$five_pct"
|
||||
printf " │ Resets in: %-33s│\n" "$five_remaining"
|
||||
echo " │ │"
|
||||
printf " │ 7-day window: "
|
||||
usage_bar "$seven_util"
|
||||
printf " %3d%% │\n" "$seven_pct"
|
||||
printf " │ Resets in: %-33s│\n" "$seven_remaining"
|
||||
echo " └─────────────────────────────────────────────┘"
|
||||
echo ""
|
||||
|
||||
# Decision guidance for Timmy
|
||||
if (( five_pct >= 80 )); then
|
||||
echo " ⚠ 5-hour window critical. Switch to local Qwen3-14B."
|
||||
echo " Reserve remaining quota for high-value tasks only."
|
||||
elif (( five_pct >= 50 )); then
|
||||
echo " ~ 5-hour window half spent. Batch remaining requests."
|
||||
else
|
||||
echo " ✓ 5-hour window healthy. Full speed ahead."
|
||||
fi
|
||||
|
||||
if (( seven_pct >= 80 )); then
|
||||
echo " ⚠ Weekly quota critical! Operate in local-only mode."
|
||||
elif (( seven_pct >= 60 )); then
|
||||
echo " ~ Weekly quota past 60%. Plan usage carefully."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ── Main ──
|
||||
main() {
|
||||
local token
|
||||
token=$(get_token)
|
||||
|
||||
local usage
|
||||
usage=$(fetch_usage "$token")
|
||||
|
||||
if [ -z "$usage" ] || echo "$usage" | grep -q '"error"'; then
|
||||
echo "ERROR: Failed to fetch usage data." >&2
|
||||
echo "$usage" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "${1:-}" in
|
||||
--json)
|
||||
echo "$usage" | python3 -m json.tool
|
||||
;;
|
||||
--watch)
|
||||
while true; do
|
||||
clear
|
||||
usage=$(fetch_usage "$token")
|
||||
display "$usage"
|
||||
echo " Refreshing in 60s... (Ctrl+C to stop)"
|
||||
sleep 60
|
||||
done
|
||||
;;
|
||||
*)
|
||||
display "$usage"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -54,6 +54,7 @@ REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
||||
SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json"
|
||||
EPOCH_COUNTER_FILE = REPO_ROOT / ".loop" / "retro" / ".epoch_counter"
|
||||
CYCLE_RESULT_FILE = REPO_ROOT / ".loop" / "cycle_result.json"
|
||||
|
||||
# How many recent entries to include in rolling summary
|
||||
SUMMARY_WINDOW = 50
|
||||
@@ -246,9 +247,39 @@ def update_summary() -> None:
|
||||
SUMMARY_FILE.write_text(json.dumps(summary, indent=2) + "\n")
|
||||
|
||||
|
||||
def _load_cycle_result() -> dict:
|
||||
"""Read .loop/cycle_result.json if it exists; return empty dict on failure."""
|
||||
if not CYCLE_RESULT_FILE.exists():
|
||||
return {}
|
||||
try:
|
||||
raw = CYCLE_RESULT_FILE.read_text().strip()
|
||||
# Strip hermes fence markers (```json ... ```) if present
|
||||
if raw.startswith("```"):
|
||||
lines = raw.splitlines()
|
||||
lines = [l for l in lines if not l.startswith("```")]
|
||||
raw = "\n".join(lines)
|
||||
return json.loads(raw)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
|
||||
# Backfill from cycle_result.json when CLI args have defaults
|
||||
cr = _load_cycle_result()
|
||||
if cr:
|
||||
if args.issue is None and cr.get("issue"):
|
||||
args.issue = int(cr["issue"])
|
||||
if args.type == "unknown" and cr.get("type"):
|
||||
args.type = cr["type"]
|
||||
if args.tests_passed == 0 and cr.get("tests_passed"):
|
||||
args.tests_passed = int(cr["tests_passed"])
|
||||
if not args.notes and cr.get("notes"):
|
||||
args.notes = cr["notes"]
|
||||
# Consume-once: delete after reading so stale results don't poison future cycles
|
||||
CYCLE_RESULT_FILE.unlink(missing_ok=True)
|
||||
|
||||
# Auto-detect issue from branch when not explicitly provided
|
||||
if args.issue is None:
|
||||
args.issue = detect_issue_from_branch()
|
||||
|
||||
83
scripts/gitea_backup.sh
Executable file
83
scripts/gitea_backup.sh
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/bin/bash
|
||||
# Gitea backup script — run on the VPS before any hardening changes.
|
||||
# Usage: sudo bash scripts/gitea_backup.sh [off-site-dest]
|
||||
#
|
||||
# off-site-dest: optional rsync/scp destination for off-site copy
|
||||
# e.g. user@backup-host:/backups/gitea/
|
||||
#
|
||||
# Refs: #971, #990
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BACKUP_DIR="/opt/gitea/backups"
|
||||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||||
GITEA_CONF="/etc/gitea/app.ini"
|
||||
GITEA_WORK_DIR="/var/lib/gitea"
|
||||
OFFSITE_DEST="${1:-}"
|
||||
|
||||
echo "=== Gitea Backup — $TIMESTAMP ==="
|
||||
|
||||
# Ensure backup directory exists
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
cd "$BACKUP_DIR"
|
||||
|
||||
# Run the dump
|
||||
echo "[1/4] Running gitea dump..."
|
||||
gitea dump -c "$GITEA_CONF"
|
||||
|
||||
# Find the newest zip (gitea dump names it gitea-dump-*.zip)
|
||||
BACKUP_FILE=$(ls -t "$BACKUP_DIR"/gitea-dump-*.zip 2>/dev/null | head -1)
|
||||
|
||||
if [ -z "$BACKUP_FILE" ]; then
|
||||
echo "ERROR: No backup zip found in $BACKUP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BACKUP_SIZE=$(stat -c%s "$BACKUP_FILE" 2>/dev/null || stat -f%z "$BACKUP_FILE")
|
||||
echo "[2/4] Backup created: $BACKUP_FILE ($BACKUP_SIZE bytes)"
|
||||
|
||||
if [ "$BACKUP_SIZE" -eq 0 ]; then
|
||||
echo "ERROR: Backup file is 0 bytes"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Lock down permissions
|
||||
chmod 600 "$BACKUP_FILE"
|
||||
|
||||
# Verify contents
|
||||
echo "[3/4] Verifying backup contents..."
|
||||
CONTENTS=$(unzip -l "$BACKUP_FILE" 2>/dev/null || true)
|
||||
|
||||
check_component() {
|
||||
if echo "$CONTENTS" | grep -q "$1"; then
|
||||
echo " OK: $2"
|
||||
else
|
||||
echo " WARN: $2 not found in backup"
|
||||
fi
|
||||
}
|
||||
|
||||
check_component "gitea-db.sql" "Database dump"
|
||||
check_component "gitea-repo" "Repositories"
|
||||
check_component "custom" "Custom config"
|
||||
check_component "app.ini" "app.ini"
|
||||
|
||||
# Off-site copy
|
||||
if [ -n "$OFFSITE_DEST" ]; then
|
||||
echo "[4/4] Copying to off-site: $OFFSITE_DEST"
|
||||
rsync -avz "$BACKUP_FILE" "$OFFSITE_DEST"
|
||||
echo " Off-site copy complete."
|
||||
else
|
||||
echo "[4/4] No off-site destination provided. Skipping."
|
||||
echo " To copy later: scp $BACKUP_FILE user@backup-host:/backups/gitea/"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Backup complete ==="
|
||||
echo "File: $BACKUP_FILE"
|
||||
echo "Size: $BACKUP_SIZE bytes"
|
||||
echo ""
|
||||
echo "To verify restore on a clean instance:"
|
||||
echo " 1. Copy zip to test machine"
|
||||
echo " 2. unzip $BACKUP_FILE"
|
||||
echo " 3. gitea restore --from <extracted-dir> -c /etc/gitea/app.ini"
|
||||
echo " 4. Verify repos and DB are intact"
|
||||
@@ -27,11 +27,30 @@ from pathlib import Path
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
QUEUE_FILE = REPO_ROOT / ".loop" / "queue.json"
|
||||
IDLE_STATE_FILE = REPO_ROOT / ".loop" / "idle_state.json"
|
||||
CYCLE_RESULT_FILE = REPO_ROOT / ".loop" / "cycle_result.json"
|
||||
TOKEN_FILE = Path.home() / ".hermes" / "gitea_token"
|
||||
|
||||
GITEA_API = os.environ.get("GITEA_API", "http://localhost:3000/api/v1")
|
||||
|
||||
def _get_gitea_api() -> str:
|
||||
"""Read Gitea API URL from env var, then ~/.hermes/gitea_api file, then default."""
|
||||
# Check env vars first (TIMMY_GITEA_API is preferred, GITEA_API for compatibility)
|
||||
api_url = os.environ.get("TIMMY_GITEA_API") or os.environ.get("GITEA_API")
|
||||
if api_url:
|
||||
return api_url
|
||||
# Check ~/.hermes/gitea_api file
|
||||
api_file = Path.home() / ".hermes" / "gitea_api"
|
||||
if api_file.exists():
|
||||
return api_file.read_text().strip()
|
||||
# Default fallback
|
||||
return "http://localhost:3000/api/v1"
|
||||
|
||||
|
||||
GITEA_API = _get_gitea_api()
|
||||
REPO_SLUG = os.environ.get("REPO_SLUG", "rockachopa/Timmy-time-dashboard")
|
||||
|
||||
# Default cycle duration in seconds (5 min); stale threshold = 2× this
|
||||
CYCLE_DURATION = int(os.environ.get("CYCLE_DURATION", "300"))
|
||||
|
||||
# Backoff sequence: 60s, 120s, 240s, 600s max
|
||||
BACKOFF_BASE = 60
|
||||
BACKOFF_MAX = 600
|
||||
@@ -77,6 +96,89 @@ def _fetch_open_issue_numbers() -> set[int] | None:
|
||||
return None
|
||||
|
||||
|
||||
def _load_cycle_result() -> dict:
|
||||
"""Read cycle_result.json, handling markdown-fenced JSON."""
|
||||
if not CYCLE_RESULT_FILE.exists():
|
||||
return {}
|
||||
try:
|
||||
raw = CYCLE_RESULT_FILE.read_text().strip()
|
||||
if raw.startswith("```"):
|
||||
lines = raw.splitlines()
|
||||
lines = [ln for ln in lines if not ln.startswith("```")]
|
||||
raw = "\n".join(lines)
|
||||
return json.loads(raw)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
|
||||
|
||||
def _is_issue_open(issue_number: int) -> bool | None:
|
||||
"""Check if a single issue is open. Returns None on API failure."""
|
||||
token = _get_token()
|
||||
if not token:
|
||||
return None
|
||||
try:
|
||||
url = f"{GITEA_API}/repos/{REPO_SLUG}/issues/{issue_number}"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data.get("state") == "open"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def validate_cycle_result() -> bool:
|
||||
"""Pre-cycle validation: remove stale or invalid cycle_result.json.
|
||||
|
||||
Checks:
|
||||
1. Age — if older than 2× CYCLE_DURATION, delete it.
|
||||
2. Issue — if the referenced issue is closed, delete it.
|
||||
|
||||
Returns True if the file was removed, False otherwise.
|
||||
"""
|
||||
if not CYCLE_RESULT_FILE.exists():
|
||||
return False
|
||||
|
||||
# Age check
|
||||
try:
|
||||
age = time.time() - CYCLE_RESULT_FILE.stat().st_mtime
|
||||
except OSError:
|
||||
return False
|
||||
stale_threshold = CYCLE_DURATION * 2
|
||||
if age > stale_threshold:
|
||||
print(
|
||||
f"[loop-guard] cycle_result.json is {int(age)}s old "
|
||||
f"(threshold {stale_threshold}s) — removing stale file"
|
||||
)
|
||||
CYCLE_RESULT_FILE.unlink(missing_ok=True)
|
||||
return True
|
||||
|
||||
# Issue check
|
||||
cr = _load_cycle_result()
|
||||
issue_num = cr.get("issue")
|
||||
if issue_num is not None:
|
||||
try:
|
||||
issue_num = int(issue_num)
|
||||
except (ValueError, TypeError):
|
||||
return False
|
||||
is_open = _is_issue_open(issue_num)
|
||||
if is_open is False:
|
||||
print(
|
||||
f"[loop-guard] cycle_result.json references closed "
|
||||
f"issue #{issue_num} — removing"
|
||||
)
|
||||
CYCLE_RESULT_FILE.unlink(missing_ok=True)
|
||||
return True
|
||||
# is_open is None (API failure) or True — keep file
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def load_queue() -> list[dict]:
|
||||
"""Load queue.json and return ready items, filtering out closed issues."""
|
||||
if not QUEUE_FILE.exists():
|
||||
@@ -100,7 +202,11 @@ def load_queue() -> list[dict]:
|
||||
# Persist the cleaned queue so stale entries don't recur
|
||||
_save_cleaned_queue(data, open_numbers)
|
||||
return ready
|
||||
except (json.JSONDecodeError, OSError):
|
||||
except json.JSONDecodeError as exc:
|
||||
print(f"[loop-guard] WARNING: Corrupt queue.json ({exc}) — returning empty queue")
|
||||
return []
|
||||
except OSError as exc:
|
||||
print(f"[loop-guard] WARNING: Cannot read queue.json ({exc}) — returning empty queue")
|
||||
return []
|
||||
|
||||
|
||||
@@ -150,6 +256,9 @@ def main() -> int:
|
||||
}, indent=2))
|
||||
return 0
|
||||
|
||||
# Pre-cycle validation: remove stale cycle_result.json
|
||||
validate_cycle_result()
|
||||
|
||||
ready = load_queue()
|
||||
|
||||
if ready:
|
||||
|
||||
107
scripts/run_benchmarks.py
Normal file
107
scripts/run_benchmarks.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run the agent performance regression benchmark suite.
|
||||
|
||||
Usage::
|
||||
|
||||
python scripts/run_benchmarks.py # all scenarios
|
||||
python scripts/run_benchmarks.py --tags navigation # filter by tag
|
||||
python scripts/run_benchmarks.py --output results/benchmarks.jsonl
|
||||
python scripts/run_benchmarks.py --compare results/benchmarks.jsonl
|
||||
|
||||
Exit codes:
|
||||
0 — all scenarios passed
|
||||
1 — one or more scenarios failed
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure src/ is on the path when invoked directly
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src"))
|
||||
|
||||
from infrastructure.world.benchmark.metrics import BenchmarkMetrics, load_history
|
||||
from infrastructure.world.benchmark.runner import BenchmarkRunner
|
||||
from infrastructure.world.benchmark.scenarios import load_scenarios
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Agent performance regression benchmark suite",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tags",
|
||||
nargs="*",
|
||||
default=None,
|
||||
help="Filter scenarios by tag (e.g. navigation quest)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="JSONL file to append results to",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compare",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="JSONL file with baseline results for regression comparison",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
async def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
scenarios = load_scenarios(tags=args.tags)
|
||||
if not scenarios:
|
||||
print("No matching scenarios found.")
|
||||
return 1
|
||||
|
||||
print(f"Running {len(scenarios)} benchmark scenario(s)...\n")
|
||||
|
||||
runner = BenchmarkRunner()
|
||||
metrics = await runner.run(scenarios)
|
||||
|
||||
print(metrics.summary())
|
||||
|
||||
if args.output:
|
||||
metrics.save(args.output)
|
||||
|
||||
if args.compare:
|
||||
history = load_history(args.compare)
|
||||
if history:
|
||||
from infrastructure.world.benchmark.metrics import compare_runs
|
||||
|
||||
# Reconstruct baseline from last recorded run
|
||||
last = history[0]
|
||||
baseline = BenchmarkMetrics(
|
||||
timestamp=last.get("timestamp", ""),
|
||||
commit_sha=last.get("commit_sha", ""),
|
||||
total_time_ms=last.get("total_time_ms", 0),
|
||||
)
|
||||
for s in last.get("scenarios", []):
|
||||
from infrastructure.world.benchmark.metrics import ScenarioResult
|
||||
|
||||
baseline.results.append(
|
||||
ScenarioResult(
|
||||
scenario_name=s["scenario_name"],
|
||||
success=s["success"],
|
||||
cycles_used=s["cycles_used"],
|
||||
max_cycles=s["max_cycles"],
|
||||
wall_time_ms=s.get("wall_time_ms", 0),
|
||||
llm_calls=s.get("llm_calls", 0),
|
||||
metabolic_cost=s.get("metabolic_cost", 0.0),
|
||||
)
|
||||
)
|
||||
print()
|
||||
print(compare_runs(metrics, baseline))
|
||||
|
||||
return 0 if metrics.fail_count == 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(main()))
|
||||
342
scripts/test_hermes4.py
Normal file
342
scripts/test_hermes4.py
Normal file
@@ -0,0 +1,342 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Hermes 4 smoke test and tool-calling validation script.
|
||||
|
||||
Tests the Hermes 4 14B model after importing into Ollama. Covers:
|
||||
1. Basic connectivity — model responds
|
||||
2. Memory usage — under 28 GB with model loaded
|
||||
3. Tool calling — structured JSON output (not raw text)
|
||||
4. Reasoning — <think> tag toggling works
|
||||
5. Timmy-persona smoke test — agent identity prompt
|
||||
|
||||
Usage:
|
||||
python scripts/test_hermes4.py # Run all tests
|
||||
python scripts/test_hermes4.py --model hermes4-14b
|
||||
python scripts/test_hermes4.py --model hermes4-36b --ctx 8192
|
||||
|
||||
Epic: #1091 Project Bannerlord — AutoLoRA Sovereignty Loop (Step 2 of 7)
|
||||
Refs: #1101
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
print("ERROR: 'requests' not installed. Run: pip install requests")
|
||||
sys.exit(1)
|
||||
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
DEFAULT_MODEL = "hermes4-14b"
|
||||
MEMORY_LIMIT_GB = 28.0
|
||||
|
||||
# ── Tool schema used for tool-calling tests ──────────────────────────────────
|
||||
|
||||
READ_FILE_TOOL = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read the contents of a file at the given path",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Absolute or relative path to the file",
|
||||
}
|
||||
},
|
||||
"required": ["path"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
LIST_ISSUES_TOOL = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "list_issues",
|
||||
"description": "List open issues from a Gitea repository",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"repo": {"type": "string", "description": "owner/repo slug"},
|
||||
"state": {
|
||||
"type": "string",
|
||||
"enum": ["open", "closed", "all"],
|
||||
"description": "Issue state filter",
|
||||
},
|
||||
},
|
||||
"required": ["repo"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _post(endpoint: str, payload: dict, timeout: int = 60) -> dict[str, Any]:
|
||||
"""POST to Ollama and return parsed JSON."""
|
||||
url = f"{OLLAMA_URL}{endpoint}"
|
||||
resp = requests.post(url, json=payload, timeout=timeout)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def _ollama_memory_gb() -> float:
|
||||
"""Estimate Ollama process RSS in GB using ps (macOS/Linux)."""
|
||||
try:
|
||||
# Look for ollama process RSS (macOS: column 6 in MB, Linux: column 6 in KB)
|
||||
result = subprocess.run(
|
||||
["ps", "-axo", "pid,comm,rss"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
total_kb = 0
|
||||
for line in result.stdout.splitlines():
|
||||
if "ollama" in line.lower():
|
||||
parts = line.split()
|
||||
try:
|
||||
total_kb += int(parts[-1])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
return total_kb / (1024 * 1024) # KB → GB
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _check_model_available(model: str) -> bool:
|
||||
"""Return True if model is listed in Ollama."""
|
||||
try:
|
||||
resp = requests.get(f"{OLLAMA_URL}/api/tags", timeout=10)
|
||||
resp.raise_for_status()
|
||||
names = [m["name"] for m in resp.json().get("models", [])]
|
||||
return any(model in n for n in names)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _chat(model: str, messages: list[dict], tools: list | None = None) -> dict:
|
||||
"""Send a chat request to Ollama."""
|
||||
payload: dict = {"model": model, "messages": messages, "stream": False}
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
return _post("/api/chat", payload, timeout=120)
|
||||
|
||||
|
||||
# ── Test cases ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_model_available(model: str) -> bool:
|
||||
"""PASS: model is registered in Ollama."""
|
||||
print(f"\n[1/5] Checking model availability: {model}")
|
||||
if _check_model_available(model):
|
||||
print(f" ✓ {model} is available in Ollama")
|
||||
return True
|
||||
print(
|
||||
f" ✗ {model} not found. Import with:\n"
|
||||
f" ollama create {model} -f Modelfile.hermes4-14b\n"
|
||||
f" Or pull directly if on registry:\n"
|
||||
f" ollama pull {model}"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def test_basic_response(model: str) -> bool:
|
||||
"""PASS: model responds coherently to a simple prompt."""
|
||||
print(f"\n[2/5] Basic response test")
|
||||
messages = [
|
||||
{"role": "user", "content": "Reply with exactly: HERMES_OK"},
|
||||
]
|
||||
try:
|
||||
t0 = time.time()
|
||||
data = _chat(model, messages)
|
||||
elapsed = time.time() - t0
|
||||
content = data.get("message", {}).get("content", "")
|
||||
if "HERMES_OK" in content:
|
||||
print(f" ✓ Basic response OK ({elapsed:.1f}s): {content.strip()}")
|
||||
return True
|
||||
print(f" ✗ Unexpected response ({elapsed:.1f}s): {content[:200]!r}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
print(f" ✗ Request failed: {exc}")
|
||||
return False
|
||||
|
||||
|
||||
def test_memory_usage() -> bool:
|
||||
"""PASS: Ollama process RSS is under MEMORY_LIMIT_GB."""
|
||||
print(f"\n[3/5] Memory usage check (limit: {MEMORY_LIMIT_GB} GB)")
|
||||
mem_gb = _ollama_memory_gb()
|
||||
if mem_gb == 0.0:
|
||||
print(" ~ Could not determine memory usage (ps unavailable?), skipping")
|
||||
return True
|
||||
if mem_gb < MEMORY_LIMIT_GB:
|
||||
print(f" ✓ Memory usage: {mem_gb:.1f} GB (under {MEMORY_LIMIT_GB} GB limit)")
|
||||
return True
|
||||
print(
|
||||
f" ✗ Memory usage: {mem_gb:.1f} GB exceeds {MEMORY_LIMIT_GB} GB limit.\n"
|
||||
" Consider using Q4_K_M quantisation or reducing num_ctx."
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def test_tool_calling(model: str) -> bool:
|
||||
"""PASS: model produces a tool_calls response (not raw text) for a tool-use prompt."""
|
||||
print(f"\n[4/5] Tool-calling test")
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please read the file at /tmp/test.txt using the read_file tool.",
|
||||
}
|
||||
]
|
||||
try:
|
||||
t0 = time.time()
|
||||
data = _chat(model, messages, tools=[READ_FILE_TOOL])
|
||||
elapsed = time.time() - t0
|
||||
msg = data.get("message", {})
|
||||
tool_calls = msg.get("tool_calls", [])
|
||||
|
||||
if tool_calls:
|
||||
tc = tool_calls[0]
|
||||
fn = tc.get("function", {})
|
||||
print(
|
||||
f" ✓ Tool call produced ({elapsed:.1f}s):\n"
|
||||
f" function: {fn.get('name')}\n"
|
||||
f" arguments: {json.dumps(fn.get('arguments', {}), indent=6)}"
|
||||
)
|
||||
# Verify the function name is correct
|
||||
return fn.get("name") == "read_file"
|
||||
|
||||
# Some models return JSON in the content instead of tool_calls
|
||||
content = msg.get("content", "")
|
||||
if "read_file" in content and "{" in content:
|
||||
print(
|
||||
f" ~ Model returned tool call as text (not structured). ({elapsed:.1f}s)\n"
|
||||
f" This is acceptable for the base model before fine-tuning.\n"
|
||||
f" Content: {content[:300]}"
|
||||
)
|
||||
# Partial pass — model attempted tool calling but via text
|
||||
return True
|
||||
|
||||
print(
|
||||
f" ✗ No tool call in response ({elapsed:.1f}s).\n"
|
||||
f" Content: {content[:300]!r}"
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
print(f" ✗ Tool-calling request failed: {exc}")
|
||||
return False
|
||||
|
||||
|
||||
def test_timmy_persona(model: str) -> bool:
|
||||
"""PASS: model accepts a Timmy persona system prompt and responds in-character."""
|
||||
print(f"\n[5/5] Timmy-persona smoke test")
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are Timmy, Alexander's personal AI agent. "
|
||||
"You are concise, direct, and helpful. "
|
||||
"You always start your responses with 'Timmy here:'."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is your name and what can you help me with?",
|
||||
},
|
||||
]
|
||||
try:
|
||||
t0 = time.time()
|
||||
data = _chat(model, messages)
|
||||
elapsed = time.time() - t0
|
||||
content = data.get("message", {}).get("content", "")
|
||||
if "Timmy" in content or "timmy" in content.lower():
|
||||
print(f" ✓ Persona accepted ({elapsed:.1f}s): {content[:200].strip()}")
|
||||
return True
|
||||
print(
|
||||
f" ~ Persona response lacks 'Timmy' identifier ({elapsed:.1f}s).\n"
|
||||
f" This is a fine-tuning target.\n"
|
||||
f" Response: {content[:200]!r}"
|
||||
)
|
||||
# Soft pass — base model isn't expected to be perfectly in-character
|
||||
return True
|
||||
except Exception as exc:
|
||||
print(f" ✗ Persona test failed: {exc}")
|
||||
return False
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Hermes 4 smoke test suite")
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default=DEFAULT_MODEL,
|
||||
help=f"Ollama model name (default: {DEFAULT_MODEL})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ollama-url",
|
||||
default=OLLAMA_URL,
|
||||
help=f"Ollama base URL (default: {OLLAMA_URL})",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
global OLLAMA_URL
|
||||
OLLAMA_URL = args.ollama_url.rstrip("/")
|
||||
model = args.model
|
||||
|
||||
print("=" * 60)
|
||||
print(f"Hermes 4 Validation Suite — {model}")
|
||||
print(f"Ollama: {OLLAMA_URL}")
|
||||
print("=" * 60)
|
||||
|
||||
results: dict[str, bool] = {}
|
||||
|
||||
# Test 1: availability (gate — skip remaining if model missing)
|
||||
results["available"] = test_model_available(model)
|
||||
if not results["available"]:
|
||||
print("\n⚠ Model not available — skipping remaining tests.")
|
||||
print(" Import the model first (see Modelfile.hermes4-14b).")
|
||||
_print_summary(results)
|
||||
return 1
|
||||
|
||||
# Tests 2–5
|
||||
results["basic_response"] = test_basic_response(model)
|
||||
results["memory_usage"] = test_memory_usage()
|
||||
results["tool_calling"] = test_tool_calling(model)
|
||||
results["timmy_persona"] = test_timmy_persona(model)
|
||||
|
||||
return _print_summary(results)
|
||||
|
||||
|
||||
def _print_summary(results: dict[str, bool]) -> int:
|
||||
passed = sum(results.values())
|
||||
total = len(results)
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Results: {passed}/{total} passed")
|
||||
print("=" * 60)
|
||||
for name, ok in results.items():
|
||||
icon = "✓" if ok else "✗"
|
||||
print(f" {icon} {name}")
|
||||
|
||||
if passed == total:
|
||||
print("\n✓ All tests passed. Hermes 4 is ready for AutoLoRA fine-tuning.")
|
||||
print(" Next step: document WORK vs FAIL skill list → fine-tuning targets.")
|
||||
elif results.get("tool_calling") is False:
|
||||
print("\n⚠ Tool-calling FAILED. This is the primary fine-tuning target.")
|
||||
print(" Base model may need LoRA tuning on tool-use examples.")
|
||||
else:
|
||||
print("\n~ Partial pass. Review failures above before fine-tuning.")
|
||||
|
||||
return 0 if passed == total else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -20,11 +20,28 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ── Config ──────────────────────────────────────────────────────────────
|
||||
GITEA_API = os.environ.get("GITEA_API", "http://localhost:3000/api/v1")
|
||||
|
||||
|
||||
def _get_gitea_api() -> str:
|
||||
"""Read Gitea API URL from env var, then ~/.hermes/gitea_api file, then default."""
|
||||
# Check env vars first (TIMMY_GITEA_API is preferred, GITEA_API for compatibility)
|
||||
api_url = os.environ.get("TIMMY_GITEA_API") or os.environ.get("GITEA_API")
|
||||
if api_url:
|
||||
return api_url
|
||||
# Check ~/.hermes/gitea_api file
|
||||
api_file = Path.home() / ".hermes" / "gitea_api"
|
||||
if api_file.exists():
|
||||
return api_file.read_text().strip()
|
||||
# Default fallback
|
||||
return "http://localhost:3000/api/v1"
|
||||
|
||||
|
||||
GITEA_API = _get_gitea_api()
|
||||
REPO_SLUG = os.environ.get("REPO_SLUG", "rockachopa/Timmy-time-dashboard")
|
||||
TOKEN_FILE = Path.home() / ".hermes" / "gitea_token"
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
QUEUE_FILE = REPO_ROOT / ".loop" / "queue.json"
|
||||
QUEUE_BACKUP_FILE = REPO_ROOT / ".loop" / "queue.json.bak"
|
||||
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "triage.jsonl"
|
||||
QUARANTINE_FILE = REPO_ROOT / ".loop" / "quarantine.json"
|
||||
CYCLE_RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
||||
@@ -326,9 +343,38 @@ def run_triage() -> list[dict]:
|
||||
ready = [s for s in scored if s["ready"]]
|
||||
not_ready = [s for s in scored if not s["ready"]]
|
||||
|
||||
# Save backup before writing (if current file exists and is valid)
|
||||
if QUEUE_FILE.exists():
|
||||
try:
|
||||
json.loads(QUEUE_FILE.read_text()) # Validate current file
|
||||
QUEUE_BACKUP_FILE.write_text(QUEUE_FILE.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass # Current file is corrupt, don't overwrite backup
|
||||
|
||||
# Write new queue file
|
||||
QUEUE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
QUEUE_FILE.write_text(json.dumps(ready, indent=2) + "\n")
|
||||
|
||||
# Validate the write by re-reading and parsing
|
||||
try:
|
||||
json.loads(QUEUE_FILE.read_text())
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
print(f"[triage] ERROR: queue.json validation failed: {exc}", file=sys.stderr)
|
||||
# Restore from backup if available
|
||||
if QUEUE_BACKUP_FILE.exists():
|
||||
try:
|
||||
backup_data = QUEUE_BACKUP_FILE.read_text()
|
||||
json.loads(backup_data) # Validate backup
|
||||
QUEUE_FILE.write_text(backup_data)
|
||||
print(f"[triage] Restored queue.json from backup")
|
||||
except (json.JSONDecodeError, OSError) as restore_exc:
|
||||
print(f"[triage] ERROR: Backup restore failed: {restore_exc}", file=sys.stderr)
|
||||
# Write empty list as last resort
|
||||
QUEUE_FILE.write_text("[]\n")
|
||||
else:
|
||||
# No backup, write empty list
|
||||
QUEUE_FILE.write_text("[]\n")
|
||||
|
||||
# Write retro entry
|
||||
retro_entry = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
|
||||
67
skills/research/architecture_spike.md
Normal file
67
skills/research/architecture_spike.md
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
name: Architecture Spike
|
||||
type: research
|
||||
typical_query_count: 2-4
|
||||
expected_output_length: 600-1200 words
|
||||
cascade_tier: groq_preferred
|
||||
description: >
|
||||
Investigate how to connect two systems or components. Produces an integration
|
||||
architecture with sequence diagram, key decisions, and a proof-of-concept outline.
|
||||
---
|
||||
|
||||
# Architecture Spike: Connect {system_a} to {system_b}
|
||||
|
||||
## Context
|
||||
|
||||
We need to integrate **{system_a}** with **{system_b}** in the context of
|
||||
**{project_context}**. This spike answers: what is the best way to wire them
|
||||
together, and what are the trade-offs?
|
||||
|
||||
## Constraints
|
||||
|
||||
- Prefer approaches that avoid adding new infrastructure dependencies.
|
||||
- The integration should be **{sync_or_async}** (synchronous / asynchronous).
|
||||
- Must work within: {environment_constraints}.
|
||||
|
||||
## Research Steps
|
||||
|
||||
1. Identify the APIs / protocols exposed by both systems.
|
||||
2. List all known integration patterns (direct API, message queue, webhook, SDK, etc.).
|
||||
3. Evaluate each pattern for complexity, reliability, and latency.
|
||||
4. Select the recommended approach and outline a proof-of-concept.
|
||||
|
||||
## Output Format
|
||||
|
||||
### Integration Options
|
||||
|
||||
| Pattern | Complexity | Reliability | Latency | Notes |
|
||||
|---------|-----------|-------------|---------|-------|
|
||||
| ... | ... | ... | ... | ... |
|
||||
|
||||
### Recommended Approach
|
||||
|
||||
**Pattern:** {pattern_name}
|
||||
|
||||
**Why:** One paragraph explaining the choice.
|
||||
|
||||
### Sequence Diagram
|
||||
|
||||
```
|
||||
{system_a} -> {middleware} -> {system_b}
|
||||
```
|
||||
|
||||
Describe the data flow step by step:
|
||||
|
||||
1. {system_a} does X...
|
||||
2. {middleware} transforms / routes...
|
||||
3. {system_b} receives Y...
|
||||
|
||||
### Proof-of-Concept Outline
|
||||
|
||||
- Files to create or modify
|
||||
- Key libraries / dependencies needed
|
||||
- Estimated effort: {effort_estimate}
|
||||
|
||||
### Open Questions
|
||||
|
||||
Bullet list of decisions that need human input before proceeding.
|
||||
74
skills/research/competitive_scan.md
Normal file
74
skills/research/competitive_scan.md
Normal file
@@ -0,0 +1,74 @@
|
||||
---
|
||||
name: Competitive Scan
|
||||
type: research
|
||||
typical_query_count: 3-5
|
||||
expected_output_length: 800-1500 words
|
||||
cascade_tier: groq_preferred
|
||||
description: >
|
||||
Compare a project against its alternatives. Produces a feature matrix,
|
||||
strengths/weaknesses analysis, and positioning summary.
|
||||
---
|
||||
|
||||
# Competitive Scan: {project} vs Alternatives
|
||||
|
||||
## Context
|
||||
|
||||
Compare **{project}** against **{alternatives}** (comma-separated list of
|
||||
competitors). The goal is to understand where {project} stands and identify
|
||||
differentiation opportunities.
|
||||
|
||||
## Constraints
|
||||
|
||||
- Comparison date: {date}.
|
||||
- Focus areas: {focus_areas} (e.g., features, pricing, community, performance).
|
||||
- Perspective: {perspective} (user, developer, business).
|
||||
|
||||
## Research Steps
|
||||
|
||||
1. Gather key facts about {project} (features, pricing, community size, release cadence).
|
||||
2. Gather the same data for each alternative in {alternatives}.
|
||||
3. Build a feature comparison matrix.
|
||||
4. Identify strengths and weaknesses for each entry.
|
||||
5. Summarize positioning and recommend next steps.
|
||||
|
||||
## Output Format
|
||||
|
||||
### Overview
|
||||
|
||||
One paragraph: what space does {project} compete in, and who are the main players?
|
||||
|
||||
### Feature Matrix
|
||||
|
||||
| Feature / Attribute | {project} | {alt_1} | {alt_2} | {alt_3} |
|
||||
|--------------------|-----------|---------|---------|---------|
|
||||
| {feature_1} | ... | ... | ... | ... |
|
||||
| {feature_2} | ... | ... | ... | ... |
|
||||
| Pricing | ... | ... | ... | ... |
|
||||
| License | ... | ... | ... | ... |
|
||||
| Community Size | ... | ... | ... | ... |
|
||||
| Last Major Release | ... | ... | ... | ... |
|
||||
|
||||
### Strengths & Weaknesses
|
||||
|
||||
#### {project}
|
||||
- **Strengths:** ...
|
||||
- **Weaknesses:** ...
|
||||
|
||||
#### {alt_1}
|
||||
- **Strengths:** ...
|
||||
- **Weaknesses:** ...
|
||||
|
||||
_(Repeat for each alternative)_
|
||||
|
||||
### Positioning Map
|
||||
|
||||
Describe where each project sits along the key dimensions (e.g., simplicity
|
||||
vs power, free vs paid, niche vs general).
|
||||
|
||||
### Recommendations
|
||||
|
||||
Bullet list of actions based on the competitive landscape:
|
||||
|
||||
- **Differentiate on:** {differentiator}
|
||||
- **Watch out for:** {threat}
|
||||
- **Consider adopting from {alt}:** {feature_or_approach}
|
||||
68
skills/research/game_analysis.md
Normal file
68
skills/research/game_analysis.md
Normal file
@@ -0,0 +1,68 @@
|
||||
---
|
||||
name: Game Analysis
|
||||
type: research
|
||||
typical_query_count: 2-3
|
||||
expected_output_length: 600-1000 words
|
||||
cascade_tier: local_ok
|
||||
description: >
|
||||
Evaluate a game for AI agent playability. Assesses API availability,
|
||||
observation/action spaces, and existing bot ecosystems.
|
||||
---
|
||||
|
||||
# Game Analysis: {game}
|
||||
|
||||
## Context
|
||||
|
||||
Evaluate **{game}** to determine whether an AI agent can play it effectively.
|
||||
Focus on programmatic access, observation space, action space, and existing
|
||||
bot/AI ecosystems.
|
||||
|
||||
## Constraints
|
||||
|
||||
- Platform: {platform} (PC, console, mobile, browser).
|
||||
- Agent type: {agent_type} (reinforcement learning, rule-based, LLM-driven, hybrid).
|
||||
- Budget for API/licenses: {budget}.
|
||||
|
||||
## Research Steps
|
||||
|
||||
1. Identify official APIs, modding support, or programmatic access methods for {game}.
|
||||
2. Characterize the observation space (screen pixels, game state JSON, memory reading, etc.).
|
||||
3. Characterize the action space (keyboard/mouse, API calls, controller inputs).
|
||||
4. Survey existing bots, AI projects, or research papers for {game}.
|
||||
5. Assess feasibility and difficulty for the target agent type.
|
||||
|
||||
## Output Format
|
||||
|
||||
### Game Profile
|
||||
|
||||
| Property | Value |
|
||||
|-------------------|------------------------|
|
||||
| Game | {game} |
|
||||
| Genre | {genre} |
|
||||
| Platform | {platform} |
|
||||
| API Available | Yes / No / Partial |
|
||||
| Mod Support | Yes / No / Limited |
|
||||
| Existing AI Work | Extensive / Some / None|
|
||||
|
||||
### Observation Space
|
||||
|
||||
Describe what data the agent can access and how (API, screen capture, memory hooks, etc.).
|
||||
|
||||
### Action Space
|
||||
|
||||
Describe how the agent can interact with the game (input methods, timing constraints, etc.).
|
||||
|
||||
### Existing Ecosystem
|
||||
|
||||
List known bots, frameworks, research papers, or communities working on AI for {game}.
|
||||
|
||||
### Feasibility Assessment
|
||||
|
||||
- **Difficulty:** Easy / Medium / Hard / Impractical
|
||||
- **Best approach:** {recommended_agent_type}
|
||||
- **Key challenges:** Bullet list
|
||||
- **Estimated time to MVP:** {time_estimate}
|
||||
|
||||
### Recommendation
|
||||
|
||||
One paragraph: should we proceed, and if so, what is the first step?
|
||||
79
skills/research/integration_guide.md
Normal file
79
skills/research/integration_guide.md
Normal file
@@ -0,0 +1,79 @@
|
||||
---
|
||||
name: Integration Guide
|
||||
type: research
|
||||
typical_query_count: 3-5
|
||||
expected_output_length: 1000-2000 words
|
||||
cascade_tier: groq_preferred
|
||||
description: >
|
||||
Step-by-step guide to wire a specific tool into an existing stack,
|
||||
complete with code samples, configuration, and testing steps.
|
||||
---
|
||||
|
||||
# Integration Guide: Wire {tool} into {stack}
|
||||
|
||||
## Context
|
||||
|
||||
Integrate **{tool}** into our **{stack}** stack. The goal is to
|
||||
**{integration_goal}** (e.g., "add vector search to the dashboard",
|
||||
"send notifications via Telegram").
|
||||
|
||||
## Constraints
|
||||
|
||||
- Must follow existing project conventions (see CLAUDE.md).
|
||||
- No new cloud AI dependencies unless explicitly approved.
|
||||
- Environment config via `pydantic-settings` / `config.py`.
|
||||
|
||||
## Research Steps
|
||||
|
||||
1. Review {tool}'s official documentation for installation and setup.
|
||||
2. Identify the minimal dependency set required.
|
||||
3. Map {tool}'s API to our existing patterns (singletons, graceful degradation).
|
||||
4. Write integration code with proper error handling.
|
||||
5. Define configuration variables and their defaults.
|
||||
|
||||
## Output Format
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Dependencies to install (with versions)
|
||||
- External services or accounts required
|
||||
- Environment variables to configure
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
# In config.py — add these fields to Settings:
|
||||
{config_fields}
|
||||
```
|
||||
|
||||
### Implementation
|
||||
|
||||
```python
|
||||
# {file_path}
|
||||
{implementation_code}
|
||||
```
|
||||
|
||||
### Graceful Degradation
|
||||
|
||||
Describe how the integration behaves when {tool} is unavailable:
|
||||
|
||||
| Scenario | Behavior | Log Level |
|
||||
|-----------------------|--------------------|-----------|
|
||||
| {tool} not installed | {fallback} | WARNING |
|
||||
| {tool} unreachable | {fallback} | WARNING |
|
||||
| Invalid credentials | {fallback} | ERROR |
|
||||
|
||||
### Testing
|
||||
|
||||
```python
|
||||
# tests/unit/test_{tool_snake}.py
|
||||
{test_code}
|
||||
```
|
||||
|
||||
### Verification Checklist
|
||||
|
||||
- [ ] Dependency added to pyproject.toml
|
||||
- [ ] Config fields added with sensible defaults
|
||||
- [ ] Graceful degradation tested (service down)
|
||||
- [ ] Unit tests pass (`tox -e unit`)
|
||||
- [ ] No new linting errors (`tox -e lint`)
|
||||
67
skills/research/state_of_art.md
Normal file
67
skills/research/state_of_art.md
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
name: State of the Art
|
||||
type: research
|
||||
typical_query_count: 4-6
|
||||
expected_output_length: 1000-2000 words
|
||||
cascade_tier: groq_preferred
|
||||
description: >
|
||||
Comprehensive survey of what currently exists in a given field or domain.
|
||||
Produces a structured landscape overview with key players, trends, and gaps.
|
||||
---
|
||||
|
||||
# State of the Art: {field} (as of {date})
|
||||
|
||||
## Context
|
||||
|
||||
Survey the current landscape of **{field}**. Identify key players, recent
|
||||
developments, dominant approaches, and notable gaps. This is a point-in-time
|
||||
snapshot intended to inform decision-making.
|
||||
|
||||
## Constraints
|
||||
|
||||
- Focus on developments from the last {timeframe} (e.g., 12 months, 2 years).
|
||||
- Prioritize {priority} (open-source, commercial, academic, or all).
|
||||
- Target audience: {audience} (technical team, leadership, general).
|
||||
|
||||
## Research Steps
|
||||
|
||||
1. Identify the major categories or sub-domains within {field}.
|
||||
2. For each category, list the leading projects, companies, or research groups.
|
||||
3. Note recent milestones, releases, or breakthroughs.
|
||||
4. Identify emerging trends and directions.
|
||||
5. Highlight gaps — things that don't exist yet but should.
|
||||
|
||||
## Output Format
|
||||
|
||||
### Executive Summary
|
||||
|
||||
Two to three sentences: what is the state of {field} right now?
|
||||
|
||||
### Landscape Map
|
||||
|
||||
| Category | Key Players | Maturity | Trend |
|
||||
|---------------|--------------------------|-------------|-------------|
|
||||
| {category_1} | {player_a}, {player_b} | Early / GA | Growing / Stable / Declining |
|
||||
| {category_2} | {player_c}, {player_d} | Early / GA | Growing / Stable / Declining |
|
||||
|
||||
### Recent Milestones
|
||||
|
||||
Chronological list of notable events in the last {timeframe}:
|
||||
|
||||
- **{date_1}:** {event_description}
|
||||
- **{date_2}:** {event_description}
|
||||
|
||||
### Trends
|
||||
|
||||
Numbered list of the top 3-5 trends shaping {field}:
|
||||
|
||||
1. **{trend_name}** — {one-line description}
|
||||
2. **{trend_name}** — {one-line description}
|
||||
|
||||
### Gaps & Opportunities
|
||||
|
||||
Bullet list of things that are missing, underdeveloped, or ripe for innovation.
|
||||
|
||||
### Implications for Us
|
||||
|
||||
One paragraph: what does this mean for our project? What should we do next?
|
||||
52
skills/research/tool_evaluation.md
Normal file
52
skills/research/tool_evaluation.md
Normal file
@@ -0,0 +1,52 @@
|
||||
---
|
||||
name: Tool Evaluation
|
||||
type: research
|
||||
typical_query_count: 3-5
|
||||
expected_output_length: 800-1500 words
|
||||
cascade_tier: groq_preferred
|
||||
description: >
|
||||
Discover and evaluate all shipping tools/libraries/services in a given domain.
|
||||
Produces a ranked comparison table with pros, cons, and recommendation.
|
||||
---
|
||||
|
||||
# Tool Evaluation: {domain}
|
||||
|
||||
## Context
|
||||
|
||||
You are researching tools, libraries, and services for **{domain}**.
|
||||
The goal is to find everything that is currently shipping (not vaporware)
|
||||
and produce a structured comparison.
|
||||
|
||||
## Constraints
|
||||
|
||||
- Only include tools that have public releases or hosted services available today.
|
||||
- If a tool is in beta/preview, note that clearly.
|
||||
- Focus on {focus_criteria} when evaluating (e.g., cost, ease of integration, community size).
|
||||
|
||||
## Research Steps
|
||||
|
||||
1. Identify all actively-maintained tools in the **{domain}** space.
|
||||
2. For each tool, gather: name, URL, license/pricing, last release date, language/platform.
|
||||
3. Evaluate each tool against the focus criteria.
|
||||
4. Rank by overall fit for the use case: **{use_case}**.
|
||||
|
||||
## Output Format
|
||||
|
||||
### Summary
|
||||
|
||||
One paragraph: what the landscape looks like and the top recommendation.
|
||||
|
||||
### Comparison Table
|
||||
|
||||
| Tool | License / Price | Last Release | Language | {focus_criteria} Score | Notes |
|
||||
|------|----------------|--------------|----------|----------------------|-------|
|
||||
| ... | ... | ... | ... | ... | ... |
|
||||
|
||||
### Top Pick
|
||||
|
||||
- **Recommended:** {tool_name} — {one-line reason}
|
||||
- **Runner-up:** {tool_name} — {one-line reason}
|
||||
|
||||
### Risks & Gaps
|
||||
|
||||
Bullet list of things to watch out for (missing features, vendor lock-in, etc.).
|
||||
@@ -84,16 +84,29 @@ class Settings(BaseSettings):
|
||||
# Only used when explicitly enabled and query complexity warrants it.
|
||||
grok_enabled: bool = False
|
||||
xai_api_key: str = ""
|
||||
xai_base_url: str = "https://api.x.ai/v1"
|
||||
grok_default_model: str = "grok-3-fast"
|
||||
grok_max_sats_per_query: int = 200
|
||||
grok_sats_hard_cap: int = 100 # Absolute ceiling on sats per Grok query
|
||||
grok_free: bool = False # Skip Lightning invoice when user has own API key
|
||||
|
||||
# ── Database ──────────────────────────────────────────────────────────
|
||||
db_busy_timeout_ms: int = 5000 # SQLite PRAGMA busy_timeout (ms)
|
||||
|
||||
# ── Claude (Anthropic) — cloud fallback backend ────────────────────────
|
||||
# Used when Ollama is offline and local inference isn't available.
|
||||
# Set ANTHROPIC_API_KEY to enable. Default model is Haiku (fast + cheap).
|
||||
anthropic_api_key: str = ""
|
||||
claude_model: str = "haiku"
|
||||
|
||||
# ── Content Moderation ──────────────────────────────────────────────
|
||||
# Three-layer moderation pipeline for AI narrator output.
|
||||
# Uses Llama Guard via Ollama with regex fallback.
|
||||
moderation_enabled: bool = True
|
||||
moderation_guard_model: str = "llama-guard3:1b"
|
||||
# Default confidence threshold — per-game profiles can override.
|
||||
moderation_threshold: float = 0.8
|
||||
|
||||
# ── Spark Intelligence ────────────────────────────────────────────────
|
||||
# Enable/disable the Spark cognitive layer.
|
||||
# When enabled, Spark captures swarm events, runs EIDOS predictions,
|
||||
@@ -139,6 +152,10 @@ class Settings(BaseSettings):
|
||||
# Default is False (telemetry disabled) to align with sovereign AI vision.
|
||||
telemetry_enabled: bool = False
|
||||
|
||||
# ── Sovereignty Metrics ──────────────────────────────────────────────
|
||||
# Alert when API cost per research task exceeds this threshold (USD).
|
||||
sovereignty_api_cost_alert_threshold: float = 1.00
|
||||
|
||||
# CORS allowed origins for the web chat interface (Gitea Pages, etc.)
|
||||
# Set CORS_ORIGINS as a comma-separated list, e.g. "http://localhost:3000,https://example.com"
|
||||
cors_origins: list[str] = [
|
||||
@@ -148,6 +165,18 @@ class Settings(BaseSettings):
|
||||
"http://127.0.0.1:8000",
|
||||
]
|
||||
|
||||
# ── Matrix Frontend Integration ────────────────────────────────────────
|
||||
# URL of the Matrix frontend (Replit/Tailscale) for CORS.
|
||||
# When set, this origin is added to CORS allowed_origins.
|
||||
# Example: "http://100.124.176.28:8080" or "https://alexanderwhitestone.com"
|
||||
matrix_frontend_url: str = "" # Empty = disabled
|
||||
|
||||
# WebSocket authentication token for Matrix connections.
|
||||
# When set, clients must provide this token via ?token= query param
|
||||
# or in the first message as {"type": "auth", "token": "..."}.
|
||||
# Empty/unset = auth disabled (dev mode).
|
||||
matrix_ws_token: str = ""
|
||||
|
||||
# Trusted hosts for the Host header check (TrustedHostMiddleware).
|
||||
# Set TRUSTED_HOSTS as a comma-separated list. Wildcards supported (e.g. "*.ts.net").
|
||||
# Defaults include localhost + Tailscale MagicDNS. Add your Tailscale IP if needed.
|
||||
@@ -273,6 +302,7 @@ class Settings(BaseSettings):
|
||||
mcp_gitea_command: str = "gitea-mcp-server -t stdio"
|
||||
mcp_filesystem_command: str = "npx -y @modelcontextprotocol/server-filesystem"
|
||||
mcp_timeout: int = 15
|
||||
mcp_bridge_timeout: int = 60 # HTTP timeout for MCP bridge Ollama calls (seconds)
|
||||
|
||||
# ── Loop QA (Self-Testing) ─────────────────────────────────────────
|
||||
# Self-test orchestrator that probes capabilities alongside the thinking loop.
|
||||
@@ -317,6 +347,13 @@ class Settings(BaseSettings):
|
||||
autoresearch_max_iterations: int = 100
|
||||
autoresearch_metric: str = "val_bpb" # metric to optimise (lower = better)
|
||||
|
||||
# ── Weekly Narrative Summary ───────────────────────────────────────
|
||||
# Generates a human-readable weekly summary of development activity.
|
||||
# Disabling this will stop the weekly narrative generation.
|
||||
weekly_narrative_enabled: bool = True
|
||||
weekly_narrative_lookback_days: int = 7
|
||||
weekly_narrative_output_dir: str = ".loop"
|
||||
|
||||
# ── Local Hands (Shell + Git) ──────────────────────────────────────
|
||||
# Enable local shell/git execution hands.
|
||||
hands_shell_enabled: bool = True
|
||||
|
||||
@@ -10,6 +10,7 @@ Key improvements:
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
@@ -23,6 +24,7 @@ from config import settings
|
||||
|
||||
# Import dedicated middleware
|
||||
from dashboard.middleware.csrf import CSRFMiddleware
|
||||
from dashboard.middleware.rate_limit import RateLimitMiddleware
|
||||
from dashboard.middleware.request_logging import RequestLoggingMiddleware
|
||||
from dashboard.middleware.security_headers import SecurityHeadersMiddleware
|
||||
from dashboard.routes.agents import router as agents_router
|
||||
@@ -30,6 +32,7 @@ from dashboard.routes.briefing import router as briefing_router
|
||||
from dashboard.routes.calm import router as calm_router
|
||||
from dashboard.routes.chat_api import router as chat_api_router
|
||||
from dashboard.routes.chat_api_v1 import router as chat_api_v1_router
|
||||
from dashboard.routes.daily_run import router as daily_run_router
|
||||
from dashboard.routes.db_explorer import router as db_explorer_router
|
||||
from dashboard.routes.discord import router as discord_router
|
||||
from dashboard.routes.experiments import router as experiments_router
|
||||
@@ -40,14 +43,19 @@ from dashboard.routes.memory import router as memory_router
|
||||
from dashboard.routes.mobile import router as mobile_router
|
||||
from dashboard.routes.models import api_router as models_api_router
|
||||
from dashboard.routes.models import router as models_router
|
||||
from dashboard.routes.quests import router as quests_router
|
||||
from dashboard.routes.scorecards import router as scorecards_router
|
||||
from dashboard.routes.sovereignty_metrics import router as sovereignty_metrics_router
|
||||
from dashboard.routes.spark import router as spark_router
|
||||
from dashboard.routes.system import router as system_router
|
||||
from dashboard.routes.tasks import router as tasks_router
|
||||
from dashboard.routes.telegram import router as telegram_router
|
||||
from dashboard.routes.thinking import router as thinking_router
|
||||
from dashboard.routes.tools import router as tools_router
|
||||
from dashboard.routes.tower import router as tower_router
|
||||
from dashboard.routes.voice import router as voice_router
|
||||
from dashboard.routes.work_orders import router as work_orders_router
|
||||
from dashboard.routes.world import matrix_router
|
||||
from dashboard.routes.world import router as world_router
|
||||
from timmy.workshop_state import PRESENCE_FILE
|
||||
|
||||
@@ -367,82 +375,95 @@ def _startup_init() -> None:
|
||||
|
||||
def _startup_background_tasks() -> list[asyncio.Task]:
|
||||
"""Spawn all recurring background tasks (non-blocking)."""
|
||||
return [
|
||||
bg_tasks = [
|
||||
asyncio.create_task(_briefing_scheduler()),
|
||||
asyncio.create_task(_thinking_scheduler()),
|
||||
asyncio.create_task(_loop_qa_scheduler()),
|
||||
asyncio.create_task(_presence_watcher()),
|
||||
asyncio.create_task(_start_chat_integrations_background()),
|
||||
]
|
||||
try:
|
||||
from timmy.paperclip import start_paperclip_poller
|
||||
bg_tasks.append(asyncio.create_task(start_paperclip_poller()))
|
||||
logger.info("Paperclip poller started")
|
||||
except ImportError:
|
||||
logger.debug("Paperclip module not found, skipping poller")
|
||||
|
||||
return bg_tasks
|
||||
|
||||
|
||||
def _try_prune(label: str, prune_fn, days: int) -> None:
|
||||
"""Run a prune function, log results, swallow errors."""
|
||||
try:
|
||||
pruned = prune_fn()
|
||||
if pruned:
|
||||
logger.info(
|
||||
"%s auto-prune: removed %d entries older than %d days",
|
||||
label,
|
||||
pruned,
|
||||
days,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("%s auto-prune skipped: %s", label, exc)
|
||||
|
||||
|
||||
def _check_vault_size() -> None:
|
||||
"""Warn if the memory vault exceeds the configured size limit."""
|
||||
try:
|
||||
vault_path = Path(settings.repo_root) / "memory" / "notes"
|
||||
if vault_path.exists():
|
||||
total_bytes = sum(f.stat().st_size for f in vault_path.rglob("*") if f.is_file())
|
||||
total_mb = total_bytes / (1024 * 1024)
|
||||
if total_mb > settings.memory_vault_max_mb:
|
||||
logger.warning(
|
||||
"Memory vault (%.1f MB) exceeds limit (%d MB) — consider archiving old notes",
|
||||
total_mb,
|
||||
settings.memory_vault_max_mb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Vault size check skipped: %s", exc)
|
||||
|
||||
|
||||
def _startup_pruning() -> None:
|
||||
"""Auto-prune old memories, thoughts, and events on startup."""
|
||||
if settings.memory_prune_days > 0:
|
||||
try:
|
||||
from timmy.memory_system import prune_memories
|
||||
from timmy.memory_system import prune_memories
|
||||
|
||||
pruned = prune_memories(
|
||||
_try_prune(
|
||||
"Memory",
|
||||
lambda: prune_memories(
|
||||
older_than_days=settings.memory_prune_days,
|
||||
keep_facts=settings.memory_prune_keep_facts,
|
||||
)
|
||||
if pruned:
|
||||
logger.info(
|
||||
"Memory auto-prune: removed %d entries older than %d days",
|
||||
pruned,
|
||||
settings.memory_prune_days,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Memory auto-prune skipped: %s", exc)
|
||||
),
|
||||
settings.memory_prune_days,
|
||||
)
|
||||
|
||||
if settings.thoughts_prune_days > 0:
|
||||
try:
|
||||
from timmy.thinking import thinking_engine
|
||||
from timmy.thinking import thinking_engine
|
||||
|
||||
pruned = thinking_engine.prune_old_thoughts(
|
||||
_try_prune(
|
||||
"Thought",
|
||||
lambda: thinking_engine.prune_old_thoughts(
|
||||
keep_days=settings.thoughts_prune_days,
|
||||
keep_min=settings.thoughts_prune_keep_min,
|
||||
)
|
||||
if pruned:
|
||||
logger.info(
|
||||
"Thought auto-prune: removed %d entries older than %d days",
|
||||
pruned,
|
||||
settings.thoughts_prune_days,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Thought auto-prune skipped: %s", exc)
|
||||
),
|
||||
settings.thoughts_prune_days,
|
||||
)
|
||||
|
||||
if settings.events_prune_days > 0:
|
||||
try:
|
||||
from swarm.event_log import prune_old_events
|
||||
from swarm.event_log import prune_old_events
|
||||
|
||||
pruned = prune_old_events(
|
||||
_try_prune(
|
||||
"Event",
|
||||
lambda: prune_old_events(
|
||||
keep_days=settings.events_prune_days,
|
||||
keep_min=settings.events_prune_keep_min,
|
||||
)
|
||||
if pruned:
|
||||
logger.info(
|
||||
"Event auto-prune: removed %d entries older than %d days",
|
||||
pruned,
|
||||
settings.events_prune_days,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Event auto-prune skipped: %s", exc)
|
||||
),
|
||||
settings.events_prune_days,
|
||||
)
|
||||
|
||||
if settings.memory_vault_max_mb > 0:
|
||||
try:
|
||||
vault_path = Path(settings.repo_root) / "memory" / "notes"
|
||||
if vault_path.exists():
|
||||
total_bytes = sum(f.stat().st_size for f in vault_path.rglob("*") if f.is_file())
|
||||
total_mb = total_bytes / (1024 * 1024)
|
||||
if total_mb > settings.memory_vault_max_mb:
|
||||
logger.warning(
|
||||
"Memory vault (%.1f MB) exceeds limit (%d MB) — consider archiving old notes",
|
||||
total_mb,
|
||||
settings.memory_vault_max_mb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Vault size check skipped: %s", exc)
|
||||
_check_vault_size()
|
||||
|
||||
|
||||
async def _shutdown_cleanup(
|
||||
@@ -513,25 +534,55 @@ app = FastAPI(
|
||||
|
||||
|
||||
def _get_cors_origins() -> list[str]:
|
||||
"""Get CORS origins from settings, rejecting wildcards in production."""
|
||||
origins = settings.cors_origins
|
||||
"""Get CORS origins from settings, rejecting wildcards in production.
|
||||
|
||||
Adds matrix_frontend_url when configured. Always allows Tailscale IPs
|
||||
(100.x.x.x range) for development convenience.
|
||||
"""
|
||||
origins = list(settings.cors_origins)
|
||||
|
||||
# Strip wildcards in production (security)
|
||||
if "*" in origins and not settings.debug:
|
||||
logger.warning(
|
||||
"Wildcard '*' in CORS_ORIGINS stripped in production — "
|
||||
"set explicit origins via CORS_ORIGINS env var"
|
||||
)
|
||||
origins = [o for o in origins if o != "*"]
|
||||
|
||||
# Add Matrix frontend URL if configured
|
||||
if settings.matrix_frontend_url:
|
||||
url = settings.matrix_frontend_url.strip()
|
||||
if url and url not in origins:
|
||||
origins.append(url)
|
||||
logger.debug("Added Matrix frontend to CORS: %s", url)
|
||||
|
||||
return origins
|
||||
|
||||
|
||||
# Pattern to match Tailscale IPs (100.x.x.x) for CORS origin regex
|
||||
_TAILSCALE_IP_PATTERN = re.compile(r"^https?://100\.\d{1,3}\.\d{1,3}\.\d{1,3}(?::\d+)?$")
|
||||
|
||||
|
||||
def _is_tailscale_origin(origin: str) -> bool:
|
||||
"""Check if origin is a Tailscale IP (100.x.x.x range)."""
|
||||
return bool(_TAILSCALE_IP_PATTERN.match(origin))
|
||||
|
||||
|
||||
# Add dedicated middleware in correct order
|
||||
# 1. Logging (outermost to capture everything)
|
||||
app.add_middleware(RequestLoggingMiddleware, skip_paths=["/health"])
|
||||
|
||||
# 2. Security Headers
|
||||
# 2. Rate Limiting (before security to prevent abuse early)
|
||||
app.add_middleware(
|
||||
RateLimitMiddleware,
|
||||
path_prefixes=["/api/matrix/"],
|
||||
requests_per_minute=30,
|
||||
)
|
||||
|
||||
# 3. Security Headers
|
||||
app.add_middleware(SecurityHeadersMiddleware, production=not settings.debug)
|
||||
|
||||
# 3. CSRF Protection
|
||||
# 4. CSRF Protection
|
||||
app.add_middleware(CSRFMiddleware)
|
||||
|
||||
# 4. Standard FastAPI middleware
|
||||
@@ -545,6 +596,7 @@ app.add_middleware(
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=_get_cors_origins(),
|
||||
allow_origin_regex=r"https?://100\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?",
|
||||
allow_credentials=True,
|
||||
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||
allow_headers=["Content-Type", "Authorization"],
|
||||
@@ -583,6 +635,12 @@ app.include_router(system_router)
|
||||
app.include_router(experiments_router)
|
||||
app.include_router(db_explorer_router)
|
||||
app.include_router(world_router)
|
||||
app.include_router(matrix_router)
|
||||
app.include_router(tower_router)
|
||||
app.include_router(daily_run_router)
|
||||
app.include_router(quests_router)
|
||||
app.include_router(scorecards_router)
|
||||
app.include_router(sovereignty_metrics_router)
|
||||
|
||||
|
||||
@app.websocket("/ws")
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Dashboard middleware package."""
|
||||
|
||||
from .csrf import CSRFMiddleware, csrf_exempt, generate_csrf_token, validate_csrf_token
|
||||
from .rate_limit import RateLimiter, RateLimitMiddleware
|
||||
from .request_logging import RequestLoggingMiddleware
|
||||
from .security_headers import SecurityHeadersMiddleware
|
||||
|
||||
@@ -9,6 +10,8 @@ __all__ = [
|
||||
"csrf_exempt",
|
||||
"generate_csrf_token",
|
||||
"validate_csrf_token",
|
||||
"RateLimiter",
|
||||
"RateLimitMiddleware",
|
||||
"SecurityHeadersMiddleware",
|
||||
"RequestLoggingMiddleware",
|
||||
]
|
||||
|
||||
@@ -131,7 +131,6 @@ class CSRFMiddleware(BaseHTTPMiddleware):
|
||||
For safe methods: Set a CSRF token cookie if not present.
|
||||
For unsafe methods: Validate the CSRF token or check if exempt.
|
||||
"""
|
||||
# Bypass CSRF if explicitly disabled (e.g. in tests)
|
||||
from config import settings
|
||||
|
||||
if settings.timmy_disable_csrf:
|
||||
@@ -141,52 +140,55 @@ class CSRFMiddleware(BaseHTTPMiddleware):
|
||||
if request.headers.get("upgrade", "").lower() == "websocket":
|
||||
return await call_next(request)
|
||||
|
||||
# Get existing CSRF token from cookie
|
||||
csrf_cookie = request.cookies.get(self.cookie_name)
|
||||
|
||||
# For safe methods, just ensure a token exists
|
||||
if request.method in self.SAFE_METHODS:
|
||||
response = await call_next(request)
|
||||
return await self._handle_safe_method(request, call_next, csrf_cookie)
|
||||
|
||||
# Set CSRF token cookie if not present
|
||||
if not csrf_cookie:
|
||||
new_token = generate_csrf_token()
|
||||
response.set_cookie(
|
||||
key=self.cookie_name,
|
||||
value=new_token,
|
||||
httponly=False, # Must be readable by JavaScript
|
||||
secure=settings.csrf_cookie_secure,
|
||||
samesite="Lax",
|
||||
max_age=86400, # 24 hours
|
||||
)
|
||||
return await self._handle_unsafe_method(request, call_next, csrf_cookie)
|
||||
|
||||
return response
|
||||
async def _handle_safe_method(
|
||||
self, request: Request, call_next, csrf_cookie: str | None
|
||||
) -> Response:
|
||||
"""Handle safe HTTP methods (GET, HEAD, OPTIONS, TRACE).
|
||||
|
||||
# For unsafe methods, we need to validate or check if exempt
|
||||
# First, try to validate the CSRF token
|
||||
if await self._validate_request(request, csrf_cookie):
|
||||
# Token is valid, allow the request
|
||||
return await call_next(request)
|
||||
Forwards the request and sets a CSRF token cookie if not present.
|
||||
"""
|
||||
from config import settings
|
||||
|
||||
# Token validation failed, check if the path is exempt
|
||||
path = request.url.path
|
||||
if self._is_likely_exempt(path):
|
||||
# Path is exempt, allow the request
|
||||
return await call_next(request)
|
||||
|
||||
# Token validation failed and path is not exempt
|
||||
# We still need to call the app to check if the endpoint is decorated
|
||||
# with @csrf_exempt, so we'll let it through and check after routing
|
||||
response = await call_next(request)
|
||||
|
||||
# After routing, check if the endpoint is marked as exempt
|
||||
endpoint = request.scope.get("endpoint")
|
||||
if endpoint and is_csrf_exempt(endpoint):
|
||||
# Endpoint is marked as exempt, allow the response
|
||||
return response
|
||||
if not csrf_cookie:
|
||||
new_token = generate_csrf_token()
|
||||
response.set_cookie(
|
||||
key=self.cookie_name,
|
||||
value=new_token,
|
||||
httponly=False, # Must be readable by JavaScript
|
||||
secure=settings.csrf_cookie_secure,
|
||||
samesite="Lax",
|
||||
max_age=86400, # 24 hours
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_unsafe_method(
|
||||
self, request: Request, call_next, csrf_cookie: str | None
|
||||
) -> Response:
|
||||
"""Handle unsafe HTTP methods (POST, PUT, DELETE, PATCH).
|
||||
|
||||
Validates the CSRF token, checks path and endpoint exemptions,
|
||||
or returns a 403 error.
|
||||
"""
|
||||
if await self._validate_request(request, csrf_cookie):
|
||||
return await call_next(request)
|
||||
|
||||
if self._is_likely_exempt(request.url.path):
|
||||
return await call_next(request)
|
||||
|
||||
endpoint = self._resolve_endpoint(request)
|
||||
if endpoint and is_csrf_exempt(endpoint):
|
||||
return await call_next(request)
|
||||
|
||||
# Endpoint is not exempt and token validation failed
|
||||
# Return 403 error
|
||||
return JSONResponse(
|
||||
status_code=403,
|
||||
content={
|
||||
@@ -196,6 +198,41 @@ class CSRFMiddleware(BaseHTTPMiddleware):
|
||||
},
|
||||
)
|
||||
|
||||
def _resolve_endpoint(self, request: Request) -> Callable | None:
|
||||
"""Resolve the route endpoint without executing it.
|
||||
|
||||
Walks the Starlette/FastAPI router to find which endpoint function
|
||||
handles this request, so we can check @csrf_exempt before any
|
||||
side effects occur.
|
||||
|
||||
Returns:
|
||||
The endpoint callable, or None if no route matched.
|
||||
"""
|
||||
# If routing already happened (endpoint in scope), use it
|
||||
endpoint = request.scope.get("endpoint")
|
||||
if endpoint:
|
||||
return endpoint
|
||||
|
||||
# Walk the middleware/app chain to find something with routes
|
||||
from starlette.routing import Match
|
||||
|
||||
app = self.app
|
||||
while app is not None:
|
||||
if hasattr(app, "routes"):
|
||||
for route in app.routes:
|
||||
match, _ = route.matches(request.scope)
|
||||
if match == Match.FULL:
|
||||
return getattr(route, "endpoint", None)
|
||||
# Try .router (FastAPI stores routes on app.router)
|
||||
if hasattr(app, "router") and hasattr(app.router, "routes"):
|
||||
for route in app.router.routes:
|
||||
match, _ = route.matches(request.scope)
|
||||
if match == Match.FULL:
|
||||
return getattr(route, "endpoint", None)
|
||||
app = getattr(app, "app", None)
|
||||
|
||||
return None
|
||||
|
||||
def _is_likely_exempt(self, path: str) -> bool:
|
||||
"""Check if a path is likely to be CSRF exempt.
|
||||
|
||||
|
||||
209
src/dashboard/middleware/rate_limit.py
Normal file
209
src/dashboard/middleware/rate_limit.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""Rate limiting middleware for FastAPI.
|
||||
|
||||
Simple in-memory rate limiter for API endpoints. Tracks requests per IP
|
||||
with configurable limits and automatic cleanup of stale entries.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse, Response
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RateLimiter:
|
||||
"""In-memory rate limiter for tracking requests per IP.
|
||||
|
||||
Stores request timestamps in a dict keyed by client IP.
|
||||
Automatically cleans up stale entries every 60 seconds.
|
||||
|
||||
Attributes:
|
||||
requests_per_minute: Maximum requests allowed per minute per IP.
|
||||
cleanup_interval_seconds: How often to clean stale entries.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
requests_per_minute: int = 30,
|
||||
cleanup_interval_seconds: int = 60,
|
||||
):
|
||||
self.requests_per_minute = requests_per_minute
|
||||
self.cleanup_interval_seconds = cleanup_interval_seconds
|
||||
self._storage: dict[str, deque[float]] = {}
|
||||
self._last_cleanup: float = time.time()
|
||||
self._window_seconds: float = 60.0 # 1 minute window
|
||||
|
||||
def _get_client_ip(self, request: Request) -> str:
|
||||
"""Extract client IP from request, respecting X-Forwarded-For header.
|
||||
|
||||
Args:
|
||||
request: The incoming request.
|
||||
|
||||
Returns:
|
||||
Client IP address string.
|
||||
"""
|
||||
# Check for forwarded IP (when behind proxy/load balancer)
|
||||
forwarded = request.headers.get("x-forwarded-for")
|
||||
if forwarded:
|
||||
# Take the first IP in the chain
|
||||
return forwarded.split(",")[0].strip()
|
||||
|
||||
real_ip = request.headers.get("x-real-ip")
|
||||
if real_ip:
|
||||
return real_ip
|
||||
|
||||
# Fall back to direct connection
|
||||
if request.client:
|
||||
return request.client.host
|
||||
|
||||
return "unknown"
|
||||
|
||||
def _cleanup_if_needed(self) -> None:
|
||||
"""Remove stale entries older than the cleanup interval."""
|
||||
now = time.time()
|
||||
if now - self._last_cleanup < self.cleanup_interval_seconds:
|
||||
return
|
||||
|
||||
cutoff = now - self._window_seconds
|
||||
stale_ips: list[str] = []
|
||||
|
||||
for ip, timestamps in self._storage.items():
|
||||
# Remove timestamps older than the window
|
||||
while timestamps and timestamps[0] < cutoff:
|
||||
timestamps.popleft()
|
||||
# Mark IP for removal if no recent requests
|
||||
if not timestamps:
|
||||
stale_ips.append(ip)
|
||||
|
||||
# Remove stale IP entries
|
||||
for ip in stale_ips:
|
||||
del self._storage[ip]
|
||||
|
||||
self._last_cleanup = now
|
||||
if stale_ips:
|
||||
logger.debug("Rate limiter cleanup: removed %d stale IPs", len(stale_ips))
|
||||
|
||||
def is_allowed(self, client_ip: str) -> tuple[bool, float]:
|
||||
"""Check if a request from the given IP is allowed.
|
||||
|
||||
Args:
|
||||
client_ip: The client's IP address.
|
||||
|
||||
Returns:
|
||||
Tuple of (allowed: bool, retry_after: float).
|
||||
retry_after is seconds until next allowed request, 0 if allowed now.
|
||||
"""
|
||||
now = time.time()
|
||||
cutoff = now - self._window_seconds
|
||||
|
||||
# Get or create timestamp deque for this IP
|
||||
if client_ip not in self._storage:
|
||||
self._storage[client_ip] = deque()
|
||||
|
||||
timestamps = self._storage[client_ip]
|
||||
|
||||
# Remove timestamps outside the window
|
||||
while timestamps and timestamps[0] < cutoff:
|
||||
timestamps.popleft()
|
||||
|
||||
# Check if limit exceeded
|
||||
if len(timestamps) >= self.requests_per_minute:
|
||||
# Calculate retry after time
|
||||
oldest = timestamps[0]
|
||||
retry_after = self._window_seconds - (now - oldest)
|
||||
return False, max(0.0, retry_after)
|
||||
|
||||
# Record this request
|
||||
timestamps.append(now)
|
||||
return True, 0.0
|
||||
|
||||
def check_request(self, request: Request) -> tuple[bool, float]:
|
||||
"""Check if the request is allowed under rate limits.
|
||||
|
||||
Args:
|
||||
request: The incoming request.
|
||||
|
||||
Returns:
|
||||
Tuple of (allowed: bool, retry_after: float).
|
||||
"""
|
||||
self._cleanup_if_needed()
|
||||
client_ip = self._get_client_ip(request)
|
||||
return self.is_allowed(client_ip)
|
||||
|
||||
|
||||
class RateLimitMiddleware(BaseHTTPMiddleware):
|
||||
"""Middleware to apply rate limiting to specific routes.
|
||||
|
||||
Usage:
|
||||
# Apply to all routes (not recommended for public static files)
|
||||
app.add_middleware(RateLimitMiddleware)
|
||||
|
||||
# Apply only to specific paths
|
||||
app.add_middleware(
|
||||
RateLimitMiddleware,
|
||||
path_prefixes=["/api/matrix/"],
|
||||
requests_per_minute=30,
|
||||
)
|
||||
|
||||
Attributes:
|
||||
path_prefixes: List of URL path prefixes to rate limit.
|
||||
If empty, applies to all paths.
|
||||
requests_per_minute: Maximum requests per minute per IP.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
app,
|
||||
path_prefixes: list[str] | None = None,
|
||||
requests_per_minute: int = 30,
|
||||
):
|
||||
super().__init__(app)
|
||||
self.path_prefixes = path_prefixes or []
|
||||
self.limiter = RateLimiter(requests_per_minute=requests_per_minute)
|
||||
|
||||
def _should_rate_limit(self, path: str) -> bool:
|
||||
"""Check if the given path should be rate limited.
|
||||
|
||||
Args:
|
||||
path: The request URL path.
|
||||
|
||||
Returns:
|
||||
True if path matches any configured prefix.
|
||||
"""
|
||||
if not self.path_prefixes:
|
||||
return True
|
||||
return any(path.startswith(prefix) for prefix in self.path_prefixes)
|
||||
|
||||
async def dispatch(self, request: Request, call_next) -> Response:
|
||||
"""Apply rate limiting to configured paths.
|
||||
|
||||
Args:
|
||||
request: The incoming request.
|
||||
call_next: Callable to get the response from downstream.
|
||||
|
||||
Returns:
|
||||
Response from downstream, or 429 if rate limited.
|
||||
"""
|
||||
# Skip if path doesn't match configured prefixes
|
||||
if not self._should_rate_limit(request.url.path):
|
||||
return await call_next(request)
|
||||
|
||||
# Check rate limit
|
||||
allowed, retry_after = self.limiter.check_request(request)
|
||||
|
||||
if not allowed:
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
content={
|
||||
"error": "Rate limit exceeded. Try again later.",
|
||||
"retry_after": int(retry_after) + 1,
|
||||
},
|
||||
headers={"Retry-After": str(int(retry_after) + 1)},
|
||||
)
|
||||
|
||||
# Process the request
|
||||
return await call_next(request)
|
||||
@@ -42,6 +42,114 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
|
||||
self.skip_paths = set(skip_paths or [])
|
||||
self.log_level = log_level
|
||||
|
||||
def _should_skip_path(self, path: str) -> bool:
|
||||
"""Check if the request path should be skipped from logging.
|
||||
|
||||
Args:
|
||||
path: The request URL path.
|
||||
|
||||
Returns:
|
||||
True if the path should be skipped, False otherwise.
|
||||
"""
|
||||
return path in self.skip_paths
|
||||
|
||||
def _prepare_request_context(self, request: Request) -> tuple[str, float]:
|
||||
"""Prepare context for request processing.
|
||||
|
||||
Generates a correlation ID and records the start time.
|
||||
|
||||
Args:
|
||||
request: The incoming request.
|
||||
|
||||
Returns:
|
||||
Tuple of (correlation_id, start_time).
|
||||
"""
|
||||
correlation_id = str(uuid.uuid4())[:8]
|
||||
request.state.correlation_id = correlation_id
|
||||
start_time = time.time()
|
||||
return correlation_id, start_time
|
||||
|
||||
def _get_duration_ms(self, start_time: float) -> float:
|
||||
"""Calculate the request duration in milliseconds.
|
||||
|
||||
Args:
|
||||
start_time: The start time from time.time().
|
||||
|
||||
Returns:
|
||||
Duration in milliseconds.
|
||||
"""
|
||||
return (time.time() - start_time) * 1000
|
||||
|
||||
def _log_success(
|
||||
self,
|
||||
request: Request,
|
||||
response: Response,
|
||||
correlation_id: str,
|
||||
duration_ms: float,
|
||||
client_ip: str,
|
||||
user_agent: str,
|
||||
) -> None:
|
||||
"""Log a successful request.
|
||||
|
||||
Args:
|
||||
request: The incoming request.
|
||||
response: The response from downstream.
|
||||
correlation_id: The request correlation ID.
|
||||
duration_ms: Request duration in milliseconds.
|
||||
client_ip: Client IP address.
|
||||
user_agent: User-Agent header value.
|
||||
"""
|
||||
self._log_request(
|
||||
method=request.method,
|
||||
path=request.url.path,
|
||||
status_code=response.status_code,
|
||||
duration_ms=duration_ms,
|
||||
client_ip=client_ip,
|
||||
user_agent=user_agent,
|
||||
correlation_id=correlation_id,
|
||||
)
|
||||
|
||||
def _log_error(
|
||||
self,
|
||||
request: Request,
|
||||
exc: Exception,
|
||||
correlation_id: str,
|
||||
duration_ms: float,
|
||||
client_ip: str,
|
||||
) -> None:
|
||||
"""Log a failed request and capture the error.
|
||||
|
||||
Args:
|
||||
request: The incoming request.
|
||||
exc: The exception that was raised.
|
||||
correlation_id: The request correlation ID.
|
||||
duration_ms: Request duration in milliseconds.
|
||||
client_ip: Client IP address.
|
||||
"""
|
||||
logger.error(
|
||||
f"[{correlation_id}] {request.method} {request.url.path} "
|
||||
f"- ERROR - {duration_ms:.2f}ms - {client_ip} - {str(exc)}"
|
||||
)
|
||||
|
||||
# Auto-escalate: create bug report task from unhandled exception
|
||||
try:
|
||||
from infrastructure.error_capture import capture_error
|
||||
|
||||
capture_error(
|
||||
exc,
|
||||
source="http",
|
||||
context={
|
||||
"method": request.method,
|
||||
"path": request.url.path,
|
||||
"correlation_id": correlation_id,
|
||||
"client_ip": client_ip,
|
||||
"duration_ms": f"{duration_ms:.0f}",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Escalation logging error: capture failed")
|
||||
# never let escalation break the request
|
||||
|
||||
async def dispatch(self, request: Request, call_next) -> Response:
|
||||
"""Log the request and response details.
|
||||
|
||||
@@ -52,74 +160,23 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
|
||||
Returns:
|
||||
The response from downstream.
|
||||
"""
|
||||
# Check if we should skip logging this path
|
||||
if request.url.path in self.skip_paths:
|
||||
if self._should_skip_path(request.url.path):
|
||||
return await call_next(request)
|
||||
|
||||
# Generate correlation ID
|
||||
correlation_id = str(uuid.uuid4())[:8]
|
||||
request.state.correlation_id = correlation_id
|
||||
|
||||
# Record start time
|
||||
start_time = time.time()
|
||||
|
||||
# Get client info
|
||||
correlation_id, start_time = self._prepare_request_context(request)
|
||||
client_ip = self._get_client_ip(request)
|
||||
user_agent = request.headers.get("user-agent", "-")
|
||||
|
||||
try:
|
||||
# Process the request
|
||||
response = await call_next(request)
|
||||
|
||||
# Calculate duration
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
# Log the request
|
||||
self._log_request(
|
||||
method=request.method,
|
||||
path=request.url.path,
|
||||
status_code=response.status_code,
|
||||
duration_ms=duration_ms,
|
||||
client_ip=client_ip,
|
||||
user_agent=user_agent,
|
||||
correlation_id=correlation_id,
|
||||
)
|
||||
|
||||
# Add correlation ID to response headers
|
||||
duration_ms = self._get_duration_ms(start_time)
|
||||
self._log_success(request, response, correlation_id, duration_ms, client_ip, user_agent)
|
||||
response.headers["X-Correlation-ID"] = correlation_id
|
||||
|
||||
return response
|
||||
|
||||
except Exception as exc:
|
||||
# Calculate duration even for failed requests
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
# Log the error
|
||||
logger.error(
|
||||
f"[{correlation_id}] {request.method} {request.url.path} "
|
||||
f"- ERROR - {duration_ms:.2f}ms - {client_ip} - {str(exc)}"
|
||||
)
|
||||
|
||||
# Auto-escalate: create bug report task from unhandled exception
|
||||
try:
|
||||
from infrastructure.error_capture import capture_error
|
||||
|
||||
capture_error(
|
||||
exc,
|
||||
source="http",
|
||||
context={
|
||||
"method": request.method,
|
||||
"path": request.url.path,
|
||||
"correlation_id": correlation_id,
|
||||
"client_ip": client_ip,
|
||||
"duration_ms": f"{duration_ms:.0f}",
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Escalation logging error: %s", exc)
|
||||
pass # never let escalation break the request
|
||||
|
||||
# Re-raise the exception
|
||||
duration_ms = self._get_duration_ms(start_time)
|
||||
self._log_error(request, exc, correlation_id, duration_ms, client_ip)
|
||||
raise
|
||||
|
||||
def _get_client_ip(self, request: Request) -> str:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import date, datetime
|
||||
from datetime import UTC, date, datetime
|
||||
from enum import StrEnum
|
||||
|
||||
from sqlalchemy import JSON, Boolean, Column, Date, DateTime, Index, Integer, String
|
||||
@@ -40,8 +40,13 @@ class Task(Base):
|
||||
deferred_at = Column(DateTime, nullable=True)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(UTC), nullable=False)
|
||||
updated_at = Column(
|
||||
DateTime,
|
||||
default=lambda: datetime.now(UTC),
|
||||
onupdate=lambda: datetime.now(UTC),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
__table_args__ = (Index("ix_task_state_order", "state", "sort_order"),)
|
||||
|
||||
@@ -59,4 +64,4 @@ class JournalEntry(Base):
|
||||
gratitude = Column(String(500), nullable=True)
|
||||
energy_level = Column(Integer, nullable=True) # User-reported, 1-10
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(UTC), nullable=False)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from datetime import date, datetime
|
||||
from datetime import UTC, date, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
@@ -38,6 +38,56 @@ def get_later_tasks(db: Session) -> list[Task]:
|
||||
)
|
||||
|
||||
|
||||
def _create_mit_tasks(db: Session, titles: list[str | None]) -> list[int]:
|
||||
"""Create MIT tasks from a list of titles, return their IDs."""
|
||||
task_ids: list[int] = []
|
||||
for title in titles:
|
||||
if title:
|
||||
task = Task(
|
||||
title=title,
|
||||
is_mit=True,
|
||||
state=TaskState.LATER,
|
||||
certainty=TaskCertainty.SOFT,
|
||||
)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
task_ids.append(task.id)
|
||||
return task_ids
|
||||
|
||||
|
||||
def _create_other_tasks(db: Session, other_tasks: str):
|
||||
"""Create non-MIT tasks from newline-separated text."""
|
||||
for line in other_tasks.split("\n"):
|
||||
line = line.strip()
|
||||
if line:
|
||||
task = Task(
|
||||
title=line,
|
||||
state=TaskState.LATER,
|
||||
certainty=TaskCertainty.FUZZY,
|
||||
)
|
||||
db.add(task)
|
||||
|
||||
|
||||
def _seed_now_next(db: Session):
|
||||
"""Set initial NOW/NEXT states when both slots are empty."""
|
||||
if get_now_task(db) or get_next_task(db):
|
||||
return
|
||||
later_tasks = (
|
||||
db.query(Task)
|
||||
.filter(Task.state == TaskState.LATER)
|
||||
.order_by(Task.is_mit.desc(), Task.sort_order)
|
||||
.all()
|
||||
)
|
||||
if later_tasks:
|
||||
later_tasks[0].state = TaskState.NOW
|
||||
db.add(later_tasks[0])
|
||||
db.flush()
|
||||
if len(later_tasks) > 1:
|
||||
later_tasks[1].state = TaskState.NEXT
|
||||
db.add(later_tasks[1])
|
||||
|
||||
|
||||
def promote_tasks(db: Session):
|
||||
"""Enforce the NOW/NEXT/LATER state machine invariants.
|
||||
|
||||
@@ -114,63 +164,19 @@ async def post_morning_ritual(
|
||||
other_tasks: str = Form(""),
|
||||
):
|
||||
"""Process morning ritual: create MITs, other tasks, and set initial states."""
|
||||
# Create Journal Entry
|
||||
mit_task_ids = []
|
||||
journal_entry = JournalEntry(entry_date=date.today())
|
||||
db.add(journal_entry)
|
||||
db.commit()
|
||||
db.refresh(journal_entry)
|
||||
|
||||
# Create MIT tasks
|
||||
for mit_title in [mit1_title, mit2_title, mit3_title]:
|
||||
if mit_title:
|
||||
task = Task(
|
||||
title=mit_title,
|
||||
is_mit=True,
|
||||
state=TaskState.LATER, # Initially LATER, will be promoted
|
||||
certainty=TaskCertainty.SOFT,
|
||||
)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
mit_task_ids.append(task.id)
|
||||
|
||||
journal_entry.mit_task_ids = mit_task_ids
|
||||
journal_entry.mit_task_ids = _create_mit_tasks(db, [mit1_title, mit2_title, mit3_title])
|
||||
db.add(journal_entry)
|
||||
|
||||
# Create other tasks
|
||||
for task_title in other_tasks.split("\n"):
|
||||
task_title = task_title.strip()
|
||||
if task_title:
|
||||
task = Task(
|
||||
title=task_title,
|
||||
state=TaskState.LATER,
|
||||
certainty=TaskCertainty.FUZZY,
|
||||
)
|
||||
db.add(task)
|
||||
|
||||
_create_other_tasks(db, other_tasks)
|
||||
db.commit()
|
||||
|
||||
# Set initial NOW/NEXT states
|
||||
# Set initial NOW/NEXT states after all tasks are created
|
||||
if not get_now_task(db) and not get_next_task(db):
|
||||
later_tasks = (
|
||||
db.query(Task)
|
||||
.filter(Task.state == TaskState.LATER)
|
||||
.order_by(Task.is_mit.desc(), Task.sort_order)
|
||||
.all()
|
||||
)
|
||||
if later_tasks:
|
||||
# Set the highest priority LATER task to NOW
|
||||
later_tasks[0].state = TaskState.NOW
|
||||
db.add(later_tasks[0])
|
||||
db.flush() # Flush to make the change visible for the next query
|
||||
|
||||
# Set the next highest priority LATER task to NEXT
|
||||
if len(later_tasks) > 1:
|
||||
later_tasks[1].state = TaskState.NEXT
|
||||
db.add(later_tasks[1])
|
||||
db.commit() # Commit changes after initial NOW/NEXT setup
|
||||
_seed_now_next(db)
|
||||
db.commit()
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
@@ -190,7 +196,7 @@ async def get_evening_ritual_form(request: Request, db: Session = Depends(get_db
|
||||
if not journal_entry:
|
||||
raise HTTPException(status_code=404, detail="No journal entry for today")
|
||||
return templates.TemplateResponse(
|
||||
"calm/evening_ritual_form.html", {"request": request, "journal_entry": journal_entry}
|
||||
request, "calm/evening_ritual_form.html", {"journal_entry": journal_entry}
|
||||
)
|
||||
|
||||
|
||||
@@ -220,7 +226,7 @@ async def post_evening_ritual(
|
||||
)
|
||||
for task in active_tasks:
|
||||
task.state = TaskState.DEFERRED # Or DONE, depending on desired archiving logic
|
||||
task.deferred_at = datetime.utcnow()
|
||||
task.deferred_at = datetime.now(UTC)
|
||||
db.add(task)
|
||||
|
||||
db.commit()
|
||||
@@ -251,8 +257,9 @@ async def create_new_task(
|
||||
# After creating a new task, we might need to re-evaluate NOW/NEXT/LATER, but for simplicity
|
||||
# and given the spec, new tasks go to LATER. Promotion happens on completion/deferral.
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"calm/partials/later_count.html",
|
||||
{"request": request, "later_tasks_count": len(get_later_tasks(db))},
|
||||
{"later_tasks_count": len(get_later_tasks(db))},
|
||||
)
|
||||
|
||||
|
||||
@@ -273,7 +280,7 @@ async def start_task(
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
|
||||
task.state = TaskState.NOW
|
||||
task.started_at = datetime.utcnow()
|
||||
task.started_at = datetime.now(UTC)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
|
||||
@@ -281,9 +288,9 @@ async def start_task(
|
||||
promote_tasks(db)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"calm/partials/now_next_later.html",
|
||||
{
|
||||
"request": request,
|
||||
"now_task": get_now_task(db),
|
||||
"next_task": get_next_task(db),
|
||||
"later_tasks_count": len(get_later_tasks(db)),
|
||||
@@ -303,16 +310,16 @@ async def complete_task(
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
|
||||
task.state = TaskState.DONE
|
||||
task.completed_at = datetime.utcnow()
|
||||
task.completed_at = datetime.now(UTC)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
|
||||
promote_tasks(db)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"calm/partials/now_next_later.html",
|
||||
{
|
||||
"request": request,
|
||||
"now_task": get_now_task(db),
|
||||
"next_task": get_next_task(db),
|
||||
"later_tasks_count": len(get_later_tasks(db)),
|
||||
@@ -332,16 +339,16 @@ async def defer_task(
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
|
||||
task.state = TaskState.DEFERRED
|
||||
task.deferred_at = datetime.utcnow()
|
||||
task.deferred_at = datetime.now(UTC)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
|
||||
promote_tasks(db)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"calm/partials/now_next_later.html",
|
||||
{
|
||||
"request": request,
|
||||
"now_task": get_now_task(db),
|
||||
"next_task": get_next_task(db),
|
||||
"later_tasks_count": len(get_later_tasks(db)),
|
||||
@@ -354,8 +361,9 @@ async def get_later_tasks_list(request: Request, db: Session = Depends(get_db)):
|
||||
"""Render the expandable list of LATER tasks."""
|
||||
later_tasks = get_later_tasks(db)
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"calm/partials/later_tasks_list.html",
|
||||
{"request": request, "later_tasks": later_tasks},
|
||||
{"later_tasks": later_tasks},
|
||||
)
|
||||
|
||||
|
||||
@@ -398,9 +406,9 @@ async def reorder_tasks(
|
||||
|
||||
# Re-render the relevant parts of the UI
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"calm/partials/now_next_later.html",
|
||||
{
|
||||
"request": request,
|
||||
"now_task": get_now_task(db),
|
||||
"next_task": get_next_task(db),
|
||||
"later_tasks_count": len(get_later_tasks(db)),
|
||||
|
||||
435
src/dashboard/routes/daily_run.py
Normal file
435
src/dashboard/routes/daily_run.py
Normal file
@@ -0,0 +1,435 @@
|
||||
"""Daily Run metrics routes — dashboard card for triage and session metrics."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import Request as UrlRequest
|
||||
from urllib.request import urlopen
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from config import settings
|
||||
from dashboard.templating import templates
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["daily-run"])
|
||||
|
||||
REPO_ROOT = Path(settings.repo_root)
|
||||
CONFIG_PATH = REPO_ROOT / "timmy_automations" / "config" / "daily_run.json"
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
"gitea_api": "http://localhost:3000/api/v1",
|
||||
"repo_slug": "rockachopa/Timmy-time-dashboard",
|
||||
"token_file": "~/.hermes/gitea_token",
|
||||
"layer_labels_prefix": "layer:",
|
||||
}
|
||||
|
||||
LAYER_LABELS = ["layer:triage", "layer:micro-fix", "layer:tests", "layer:economy"]
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
"""Load configuration from config file with fallback to defaults."""
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
if CONFIG_PATH.exists():
|
||||
try:
|
||||
file_config = json.loads(CONFIG_PATH.read_text())
|
||||
if "orchestrator" in file_config:
|
||||
config.update(file_config["orchestrator"])
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
logger.debug("Could not load daily_run config: %s", exc)
|
||||
|
||||
# Environment variable overrides
|
||||
if os.environ.get("TIMMY_GITEA_API"):
|
||||
config["gitea_api"] = os.environ.get("TIMMY_GITEA_API")
|
||||
if os.environ.get("TIMMY_REPO_SLUG"):
|
||||
config["repo_slug"] = os.environ.get("TIMMY_REPO_SLUG")
|
||||
if os.environ.get("TIMMY_GITEA_TOKEN"):
|
||||
config["token"] = os.environ.get("TIMMY_GITEA_TOKEN")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _get_token(config: dict) -> str | None:
|
||||
"""Get Gitea token from environment or file."""
|
||||
if "token" in config:
|
||||
return config["token"]
|
||||
|
||||
token_file = Path(config["token_file"]).expanduser()
|
||||
if token_file.exists():
|
||||
return token_file.read_text().strip()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class GiteaClient:
|
||||
"""Simple Gitea API client with graceful degradation."""
|
||||
|
||||
def __init__(self, config: dict, token: str | None):
|
||||
self.api_base = config["gitea_api"].rstrip("/")
|
||||
self.repo_slug = config["repo_slug"]
|
||||
self.token = token
|
||||
self._available: bool | None = None
|
||||
|
||||
def _headers(self) -> dict:
|
||||
headers = {"Accept": "application/json"}
|
||||
if self.token:
|
||||
headers["Authorization"] = f"token {self.token}"
|
||||
return headers
|
||||
|
||||
def _api_url(self, path: str) -> str:
|
||||
return f"{self.api_base}/repos/{self.repo_slug}/{path}"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Gitea API is reachable."""
|
||||
if self._available is not None:
|
||||
return self._available
|
||||
|
||||
try:
|
||||
req = UrlRequest(
|
||||
f"{self.api_base}/version",
|
||||
headers=self._headers(),
|
||||
method="GET",
|
||||
)
|
||||
with urlopen(req, timeout=5) as resp:
|
||||
self._available = resp.status == 200
|
||||
return self._available
|
||||
except (HTTPError, URLError, TimeoutError):
|
||||
self._available = False
|
||||
return False
|
||||
|
||||
def get_paginated(self, path: str, params: dict | None = None) -> list:
|
||||
"""Fetch all pages of a paginated endpoint."""
|
||||
all_items = []
|
||||
page = 1
|
||||
limit = 50
|
||||
|
||||
while True:
|
||||
url = self._api_url(path)
|
||||
query_parts = [f"limit={limit}", f"page={page}"]
|
||||
if params:
|
||||
for key, val in params.items():
|
||||
query_parts.append(f"{key}={val}")
|
||||
url = f"{url}?{'&'.join(query_parts)}"
|
||||
|
||||
req = UrlRequest(url, headers=self._headers(), method="GET")
|
||||
with urlopen(req, timeout=15) as resp:
|
||||
batch = json.loads(resp.read())
|
||||
|
||||
if not batch:
|
||||
break
|
||||
|
||||
all_items.extend(batch)
|
||||
if len(batch) < limit:
|
||||
break
|
||||
page += 1
|
||||
|
||||
return all_items
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayerMetrics:
|
||||
"""Metrics for a single layer."""
|
||||
|
||||
name: str
|
||||
label: str
|
||||
current_count: int
|
||||
previous_count: int
|
||||
|
||||
@property
|
||||
def trend(self) -> str:
|
||||
"""Return trend indicator."""
|
||||
if self.previous_count == 0:
|
||||
return "→" if self.current_count == 0 else "↑"
|
||||
diff = self.current_count - self.previous_count
|
||||
pct = (diff / self.previous_count) * 100
|
||||
if pct > 20:
|
||||
return "↑↑"
|
||||
elif pct > 5:
|
||||
return "↑"
|
||||
elif pct < -20:
|
||||
return "↓↓"
|
||||
elif pct < -5:
|
||||
return "↓"
|
||||
return "→"
|
||||
|
||||
@property
|
||||
def trend_color(self) -> str:
|
||||
"""Return color for trend (CSS variable name)."""
|
||||
trend = self.trend
|
||||
if trend in ("↑↑", "↑"):
|
||||
return "var(--green)" # More work = positive
|
||||
elif trend in ("↓↓", "↓"):
|
||||
return "var(--amber)" # Less work = caution
|
||||
return "var(--text-dim)"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyRunMetrics:
|
||||
"""Complete Daily Run metrics."""
|
||||
|
||||
sessions_completed: int
|
||||
sessions_previous: int
|
||||
layers: list[LayerMetrics]
|
||||
total_touched_current: int
|
||||
total_touched_previous: int
|
||||
lookback_days: int
|
||||
generated_at: str
|
||||
|
||||
@property
|
||||
def sessions_trend(self) -> str:
|
||||
"""Return sessions trend indicator."""
|
||||
if self.sessions_previous == 0:
|
||||
return "→" if self.sessions_completed == 0 else "↑"
|
||||
diff = self.sessions_completed - self.sessions_previous
|
||||
pct = (diff / self.sessions_previous) * 100
|
||||
if pct > 20:
|
||||
return "↑↑"
|
||||
elif pct > 5:
|
||||
return "↑"
|
||||
elif pct < -20:
|
||||
return "↓↓"
|
||||
elif pct < -5:
|
||||
return "↓"
|
||||
return "→"
|
||||
|
||||
@property
|
||||
def sessions_trend_color(self) -> str:
|
||||
"""Return color for sessions trend."""
|
||||
trend = self.sessions_trend
|
||||
if trend in ("↑↑", "↑"):
|
||||
return "var(--green)"
|
||||
elif trend in ("↓↓", "↓"):
|
||||
return "var(--amber)"
|
||||
return "var(--text-dim)"
|
||||
|
||||
|
||||
def _extract_layer(labels: list[dict]) -> str | None:
|
||||
"""Extract layer label from issue labels."""
|
||||
for label in labels:
|
||||
name = label.get("name", "")
|
||||
if name.startswith("layer:"):
|
||||
return name.replace("layer:", "")
|
||||
return None
|
||||
|
||||
|
||||
def _load_cycle_data(days: int = 14) -> dict:
|
||||
"""Load cycle retrospective data for session counting."""
|
||||
retro_file = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
||||
if not retro_file.exists():
|
||||
return {"current": 0, "previous": 0}
|
||||
|
||||
try:
|
||||
entries = []
|
||||
for line in retro_file.read_text().strip().splitlines():
|
||||
try:
|
||||
entries.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
now = datetime.now(UTC)
|
||||
current_cutoff = now - timedelta(days=days)
|
||||
previous_cutoff = now - timedelta(days=days * 2)
|
||||
|
||||
current_count = 0
|
||||
previous_count = 0
|
||||
|
||||
for entry in entries:
|
||||
ts_str = entry.get("timestamp", "")
|
||||
if not ts_str:
|
||||
continue
|
||||
try:
|
||||
ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
||||
if ts >= current_cutoff:
|
||||
if entry.get("success", False):
|
||||
current_count += 1
|
||||
elif ts >= previous_cutoff:
|
||||
if entry.get("success", False):
|
||||
previous_count += 1
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
return {"current": current_count, "previous": previous_count}
|
||||
except (OSError, ValueError) as exc:
|
||||
logger.debug("Failed to load cycle data: %s", exc)
|
||||
return {"current": 0, "previous": 0}
|
||||
|
||||
|
||||
def _fetch_layer_metrics(
|
||||
client: GiteaClient, lookback_days: int = 7
|
||||
) -> tuple[list[LayerMetrics], int, int]:
|
||||
"""Fetch metrics for each layer from Gitea issues."""
|
||||
now = datetime.now(UTC)
|
||||
current_cutoff = now - timedelta(days=lookback_days)
|
||||
previous_cutoff = now - timedelta(days=lookback_days * 2)
|
||||
|
||||
layers = []
|
||||
total_current = 0
|
||||
total_previous = 0
|
||||
|
||||
for layer_label in LAYER_LABELS:
|
||||
layer_name = layer_label.replace("layer:", "")
|
||||
try:
|
||||
# Fetch all issues with this layer label (both open and closed)
|
||||
issues = client.get_paginated(
|
||||
"issues",
|
||||
{"state": "all", "labels": layer_label, "limit": 100},
|
||||
)
|
||||
|
||||
current_count = 0
|
||||
previous_count = 0
|
||||
|
||||
for issue in issues:
|
||||
updated_at = issue.get("updated_at", "")
|
||||
if not updated_at:
|
||||
continue
|
||||
try:
|
||||
updated = datetime.fromisoformat(updated_at.replace("Z", "+00:00"))
|
||||
if updated >= current_cutoff:
|
||||
current_count += 1
|
||||
elif updated >= previous_cutoff:
|
||||
previous_count += 1
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
layers.append(
|
||||
LayerMetrics(
|
||||
name=layer_name,
|
||||
label=layer_label,
|
||||
current_count=current_count,
|
||||
previous_count=previous_count,
|
||||
)
|
||||
)
|
||||
total_current += current_count
|
||||
total_previous += previous_count
|
||||
|
||||
except (HTTPError, URLError) as exc:
|
||||
logger.debug("Failed to fetch issues for %s: %s", layer_label, exc)
|
||||
layers.append(
|
||||
LayerMetrics(
|
||||
name=layer_name,
|
||||
label=layer_label,
|
||||
current_count=0,
|
||||
previous_count=0,
|
||||
)
|
||||
)
|
||||
|
||||
return layers, total_current, total_previous
|
||||
|
||||
|
||||
def _get_metrics(lookback_days: int = 7) -> DailyRunMetrics | None:
|
||||
"""Get Daily Run metrics from Gitea API."""
|
||||
config = _load_config()
|
||||
token = _get_token(config)
|
||||
client = GiteaClient(config, token)
|
||||
|
||||
if not client.is_available():
|
||||
logger.debug("Gitea API not available for Daily Run metrics")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Get layer metrics from issues
|
||||
layers, total_current, total_previous = _fetch_layer_metrics(client, lookback_days)
|
||||
|
||||
# Get session data from cycle retrospectives
|
||||
cycle_data = _load_cycle_data(days=lookback_days)
|
||||
|
||||
return DailyRunMetrics(
|
||||
sessions_completed=cycle_data["current"],
|
||||
sessions_previous=cycle_data["previous"],
|
||||
layers=layers,
|
||||
total_touched_current=total_current,
|
||||
total_touched_previous=total_previous,
|
||||
lookback_days=lookback_days,
|
||||
generated_at=datetime.now(UTC).isoformat(),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Error fetching Daily Run metrics: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
@router.get("/daily-run/metrics", response_class=JSONResponse)
|
||||
async def daily_run_metrics_api(lookback_days: int = 7):
|
||||
"""Return Daily Run metrics as JSON API."""
|
||||
metrics = _get_metrics(lookback_days)
|
||||
if not metrics:
|
||||
return JSONResponse(
|
||||
{"error": "Gitea API unavailable", "status": "unavailable"},
|
||||
status_code=503,
|
||||
)
|
||||
|
||||
# Check for quest completions based on Daily Run metrics
|
||||
quest_rewards = []
|
||||
try:
|
||||
from dashboard.routes.quests import check_daily_run_quests
|
||||
|
||||
quest_rewards = await check_daily_run_quests(agent_id="system")
|
||||
except Exception as exc:
|
||||
logger.debug("Quest checking failed: %s", exc)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"status": "ok",
|
||||
"lookback_days": metrics.lookback_days,
|
||||
"sessions": {
|
||||
"completed": metrics.sessions_completed,
|
||||
"previous": metrics.sessions_previous,
|
||||
"trend": metrics.sessions_trend,
|
||||
},
|
||||
"layers": [
|
||||
{
|
||||
"name": layer.name,
|
||||
"label": layer.label,
|
||||
"current": layer.current_count,
|
||||
"previous": layer.previous_count,
|
||||
"trend": layer.trend,
|
||||
}
|
||||
for layer in metrics.layers
|
||||
],
|
||||
"totals": {
|
||||
"current": metrics.total_touched_current,
|
||||
"previous": metrics.total_touched_previous,
|
||||
},
|
||||
"generated_at": metrics.generated_at,
|
||||
"quest_rewards": quest_rewards,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/daily-run/panel", response_class=HTMLResponse)
|
||||
async def daily_run_panel(request: Request, lookback_days: int = 7):
|
||||
"""Return Daily Run metrics panel HTML for HTMX polling."""
|
||||
metrics = _get_metrics(lookback_days)
|
||||
|
||||
# Build Gitea URLs for filtered issue lists
|
||||
config = _load_config()
|
||||
repo_slug = config.get("repo_slug", "rockachopa/Timmy-time-dashboard")
|
||||
gitea_base = config.get("gitea_api", "http://localhost:3000/api/v1").replace("/api/v1", "")
|
||||
|
||||
# Logbook URL (link to issues with any layer label)
|
||||
layer_labels = ",".join(LAYER_LABELS)
|
||||
logbook_url = f"{gitea_base}/{repo_slug}/issues?labels={layer_labels}&state=all"
|
||||
|
||||
# Layer-specific URLs
|
||||
layer_urls = {
|
||||
layer: f"{gitea_base}/{repo_slug}/issues?labels=layer:{layer}&state=all"
|
||||
for layer in ["triage", "micro-fix", "tests", "economy"]
|
||||
}
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/daily_run_panel.html",
|
||||
{
|
||||
"metrics": metrics,
|
||||
"logbook_url": logbook_url,
|
||||
"layer_urls": layer_urls,
|
||||
"gitea_available": metrics is not None,
|
||||
},
|
||||
)
|
||||
@@ -75,6 +75,7 @@ def _query_database(db_path: str) -> dict:
|
||||
"truncated": count > MAX_ROWS,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to query table %s", table_name)
|
||||
result["tables"][table_name] = {
|
||||
"error": str(exc),
|
||||
"columns": [],
|
||||
@@ -83,6 +84,7 @@ def _query_database(db_path: str) -> dict:
|
||||
"truncated": False,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to query database %s", db_path)
|
||||
result["error"] = str(exc)
|
||||
|
||||
return result
|
||||
|
||||
@@ -125,7 +125,7 @@ def _run_grok_query(message: str) -> dict:
|
||||
from lightning.factory import get_backend as get_ln_backend
|
||||
|
||||
ln = get_ln_backend()
|
||||
sats = min(settings.grok_max_sats_per_query, 100)
|
||||
sats = min(settings.grok_max_sats_per_query, settings.grok_sats_hard_cap)
|
||||
ln.create_invoice(sats, f"Grok: {message[:50]}")
|
||||
invoice_note = f" | {sats} sats"
|
||||
except Exception as exc:
|
||||
@@ -135,6 +135,7 @@ def _run_grok_query(message: str) -> dict:
|
||||
result = backend.run(message)
|
||||
return {"response": f"**[Grok]{invoice_note}:** {result.content}", "error": None}
|
||||
except Exception as exc:
|
||||
logger.exception("Grok query failed")
|
||||
return {"response": None, "error": f"Grok error: {exc}"}
|
||||
|
||||
|
||||
@@ -193,6 +194,7 @@ async def grok_stats():
|
||||
"model": settings.grok_default_model,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to load Grok stats")
|
||||
return {"error": str(exc)}
|
||||
|
||||
|
||||
|
||||
@@ -148,6 +148,7 @@ def _check_sqlite() -> DependencyStatus:
|
||||
details={"path": str(db_path)},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("SQLite health check failed")
|
||||
return DependencyStatus(
|
||||
name="SQLite Database",
|
||||
status="unavailable",
|
||||
@@ -274,3 +275,54 @@ async def component_status():
|
||||
},
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/health/snapshot")
|
||||
async def health_snapshot():
|
||||
"""Quick health snapshot before coding.
|
||||
|
||||
Returns a concise status summary including:
|
||||
- CI pipeline status (pass/fail/unknown)
|
||||
- Critical issues count (P0/P1)
|
||||
- Test flakiness rate
|
||||
- Token economy temperature
|
||||
|
||||
Fast execution (< 5 seconds) for pre-work checks.
|
||||
Refs: #710
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import the health snapshot module
|
||||
snapshot_path = Path(settings.repo_root) / "timmy_automations" / "daily_run"
|
||||
if str(snapshot_path) not in sys.path:
|
||||
sys.path.insert(0, str(snapshot_path))
|
||||
|
||||
try:
|
||||
from health_snapshot import generate_snapshot, get_token, load_config
|
||||
|
||||
config = load_config()
|
||||
token = get_token(config)
|
||||
|
||||
# Run the health snapshot (in thread to avoid blocking)
|
||||
snapshot = await asyncio.to_thread(generate_snapshot, config, token)
|
||||
|
||||
return snapshot.to_dict()
|
||||
except Exception as exc:
|
||||
logger.warning("Health snapshot failed: %s", exc)
|
||||
# Return graceful fallback
|
||||
return {
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
"overall_status": "unknown",
|
||||
"error": str(exc),
|
||||
"ci": {"status": "unknown", "message": "Snapshot failed"},
|
||||
"issues": {"count": 0, "p0_count": 0, "p1_count": 0, "issues": []},
|
||||
"flakiness": {
|
||||
"status": "unknown",
|
||||
"recent_failures": 0,
|
||||
"recent_cycles": 0,
|
||||
"failure_rate": 0.0,
|
||||
"message": "Snapshot failed",
|
||||
},
|
||||
"tokens": {"status": "unknown", "message": "Snapshot failed"},
|
||||
}
|
||||
|
||||
377
src/dashboard/routes/quests.py
Normal file
377
src/dashboard/routes/quests.py
Normal file
@@ -0,0 +1,377 @@
|
||||
"""Quest system routes for agent token rewards.
|
||||
|
||||
Provides API endpoints for:
|
||||
- Listing quests and their status
|
||||
- Claiming quest rewards
|
||||
- Getting quest leaderboard
|
||||
- Quest progress tracking
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from dashboard.templating import templates
|
||||
from timmy.quest_system import (
|
||||
QuestStatus,
|
||||
auto_evaluate_all_quests,
|
||||
claim_quest_reward,
|
||||
evaluate_quest_progress,
|
||||
get_active_quests,
|
||||
get_agent_quests_status,
|
||||
get_quest_definition,
|
||||
get_quest_leaderboard,
|
||||
load_quest_config,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/quests", tags=["quests"])
|
||||
|
||||
|
||||
class ClaimQuestRequest(BaseModel):
|
||||
"""Request to claim a quest reward."""
|
||||
|
||||
agent_id: str
|
||||
quest_id: str
|
||||
|
||||
|
||||
class EvaluateQuestRequest(BaseModel):
|
||||
"""Request to manually evaluate quest progress."""
|
||||
|
||||
agent_id: str
|
||||
quest_id: str
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.get("/api/definitions")
|
||||
async def get_quest_definitions_api() -> JSONResponse:
|
||||
"""Get all quest definitions.
|
||||
|
||||
Returns:
|
||||
JSON list of all quest definitions with their criteria.
|
||||
"""
|
||||
definitions = get_active_quests()
|
||||
return JSONResponse(
|
||||
{
|
||||
"quests": [
|
||||
{
|
||||
"id": q.id,
|
||||
"name": q.name,
|
||||
"description": q.description,
|
||||
"reward_tokens": q.reward_tokens,
|
||||
"type": q.quest_type.value,
|
||||
"repeatable": q.repeatable,
|
||||
"cooldown_hours": q.cooldown_hours,
|
||||
"criteria": q.criteria,
|
||||
}
|
||||
for q in definitions
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/status/{agent_id}")
|
||||
async def get_agent_quest_status(agent_id: str) -> JSONResponse:
|
||||
"""Get quest status for a specific agent.
|
||||
|
||||
Returns:
|
||||
Complete quest status including progress, completion counts,
|
||||
and tokens earned.
|
||||
"""
|
||||
status = get_agent_quests_status(agent_id)
|
||||
return JSONResponse(status)
|
||||
|
||||
|
||||
@router.post("/api/claim")
|
||||
async def claim_quest_reward_api(request: ClaimQuestRequest) -> JSONResponse:
|
||||
"""Claim a quest reward for an agent.
|
||||
|
||||
The quest must be completed but not yet claimed.
|
||||
"""
|
||||
reward = claim_quest_reward(request.quest_id, request.agent_id)
|
||||
|
||||
if not reward:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": False,
|
||||
"error": "Quest not completed, already claimed, or on cooldown",
|
||||
},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"reward": reward,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/evaluate")
|
||||
async def evaluate_quest_api(request: EvaluateQuestRequest) -> JSONResponse:
|
||||
"""Manually evaluate quest progress with provided context.
|
||||
|
||||
This is useful for testing or when the quest completion
|
||||
needs to be triggered manually.
|
||||
"""
|
||||
quest = get_quest_definition(request.quest_id)
|
||||
if not quest:
|
||||
return JSONResponse(
|
||||
{"success": False, "error": "Quest not found"},
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
# Build evaluation context based on quest type
|
||||
context = await _build_evaluation_context(quest)
|
||||
|
||||
progress = evaluate_quest_progress(request.quest_id, request.agent_id, context)
|
||||
|
||||
if not progress:
|
||||
return JSONResponse(
|
||||
{"success": False, "error": "Failed to evaluate quest"},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
# Auto-claim if completed
|
||||
reward = None
|
||||
if progress.status == QuestStatus.COMPLETED:
|
||||
reward = claim_quest_reward(request.quest_id, request.agent_id)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"progress": progress.to_dict(),
|
||||
"reward": reward,
|
||||
"completed": progress.status == QuestStatus.COMPLETED,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/leaderboard")
|
||||
async def get_leaderboard_api() -> JSONResponse:
|
||||
"""Get the quest completion leaderboard.
|
||||
|
||||
Returns agents sorted by total tokens earned.
|
||||
"""
|
||||
leaderboard = get_quest_leaderboard()
|
||||
return JSONResponse(
|
||||
{
|
||||
"leaderboard": leaderboard,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/reload")
|
||||
async def reload_quest_config_api() -> JSONResponse:
|
||||
"""Reload quest configuration from quests.yaml.
|
||||
|
||||
Useful for applying quest changes without restarting.
|
||||
"""
|
||||
definitions, quest_settings = load_quest_config()
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"quests_loaded": len(definitions),
|
||||
"settings": quest_settings,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard UI Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.get("", response_class=HTMLResponse)
|
||||
async def quests_dashboard(request: Request) -> HTMLResponse:
|
||||
"""Main quests dashboard page."""
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"quests.html",
|
||||
{"agent_id": "current_user"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/panel/{agent_id}", response_class=HTMLResponse)
|
||||
async def quests_panel(request: Request, agent_id: str) -> HTMLResponse:
|
||||
"""Quest panel for HTMX partial updates."""
|
||||
status = get_agent_quests_status(agent_id)
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/quests_panel.html",
|
||||
{
|
||||
"agent_id": agent_id,
|
||||
"quests": status["quests"],
|
||||
"total_tokens": status["total_tokens_earned"],
|
||||
"completed_count": status["total_quests_completed"],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal Functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _build_evaluation_context(quest) -> dict[str, Any]:
|
||||
"""Build evaluation context for a quest based on its type."""
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
if quest.quest_type.value == "issue_count":
|
||||
# Fetch closed issues with relevant labels
|
||||
context["closed_issues"] = await _fetch_closed_issues(
|
||||
quest.criteria.get("issue_labels", [])
|
||||
)
|
||||
|
||||
elif quest.quest_type.value == "issue_reduce":
|
||||
# Fetch current and previous issue counts
|
||||
labels = quest.criteria.get("issue_labels", [])
|
||||
context["current_issue_count"] = await _fetch_open_issue_count(labels)
|
||||
context["previous_issue_count"] = await _fetch_previous_issue_count(
|
||||
labels, quest.criteria.get("lookback_days", 7)
|
||||
)
|
||||
|
||||
elif quest.quest_type.value == "daily_run":
|
||||
# Fetch Daily Run metrics
|
||||
metrics = await _fetch_daily_run_metrics()
|
||||
context["sessions_completed"] = metrics.get("sessions_completed", 0)
|
||||
|
||||
return context
|
||||
|
||||
|
||||
async def _fetch_closed_issues(labels: list[str]) -> list[dict]:
|
||||
"""Fetch closed issues matching the given labels."""
|
||||
try:
|
||||
from dashboard.routes.daily_run import GiteaClient, _load_config
|
||||
|
||||
config = _load_config()
|
||||
token = _get_gitea_token(config)
|
||||
client = GiteaClient(config, token)
|
||||
|
||||
if not client.is_available():
|
||||
return []
|
||||
|
||||
# Build label filter
|
||||
label_filter = ",".join(labels) if labels else ""
|
||||
|
||||
issues = client.get_paginated(
|
||||
"issues",
|
||||
{"state": "closed", "labels": label_filter, "limit": 100},
|
||||
)
|
||||
|
||||
return issues
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to fetch closed issues: %s", exc)
|
||||
return []
|
||||
|
||||
|
||||
async def _fetch_open_issue_count(labels: list[str]) -> int:
|
||||
"""Fetch count of open issues with given labels."""
|
||||
try:
|
||||
from dashboard.routes.daily_run import GiteaClient, _load_config
|
||||
|
||||
config = _load_config()
|
||||
token = _get_gitea_token(config)
|
||||
client = GiteaClient(config, token)
|
||||
|
||||
if not client.is_available():
|
||||
return 0
|
||||
|
||||
label_filter = ",".join(labels) if labels else ""
|
||||
|
||||
issues = client.get_paginated(
|
||||
"issues",
|
||||
{"state": "open", "labels": label_filter, "limit": 100},
|
||||
)
|
||||
|
||||
return len(issues)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to fetch open issue count: %s", exc)
|
||||
return 0
|
||||
|
||||
|
||||
async def _fetch_previous_issue_count(labels: list[str], lookback_days: int) -> int:
|
||||
"""Fetch previous issue count (simplified - uses current for now)."""
|
||||
# This is a simplified implementation
|
||||
# In production, you'd query historical data
|
||||
return await _fetch_open_issue_count(labels)
|
||||
|
||||
|
||||
async def _fetch_daily_run_metrics() -> dict[str, Any]:
|
||||
"""Fetch Daily Run metrics."""
|
||||
try:
|
||||
from dashboard.routes.daily_run import _get_metrics
|
||||
|
||||
metrics = _get_metrics(lookback_days=7)
|
||||
if metrics:
|
||||
return {
|
||||
"sessions_completed": metrics.sessions_completed,
|
||||
"sessions_previous": metrics.sessions_previous,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to fetch Daily Run metrics: %s", exc)
|
||||
|
||||
return {"sessions_completed": 0, "sessions_previous": 0}
|
||||
|
||||
|
||||
def _get_gitea_token(config: dict) -> str | None:
|
||||
"""Get Gitea token from config."""
|
||||
if "token" in config:
|
||||
return config["token"]
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
token_file = Path(config.get("token_file", "~/.hermes/gitea_token")).expanduser()
|
||||
if token_file.exists():
|
||||
return token_file.read_text().strip()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Daily Run Integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def check_daily_run_quests(agent_id: str = "system") -> list[dict]:
|
||||
"""Check and award Daily Run related quests.
|
||||
|
||||
Called by the Daily Run system when metrics are updated.
|
||||
|
||||
Returns:
|
||||
List of rewards awarded
|
||||
"""
|
||||
# Check if auto-detect is enabled
|
||||
_, quest_settings = load_quest_config()
|
||||
if not quest_settings.get("auto_detect_on_daily_run", True):
|
||||
return []
|
||||
|
||||
# Build context from Daily Run metrics
|
||||
metrics = await _fetch_daily_run_metrics()
|
||||
context = {
|
||||
"sessions_completed": metrics.get("sessions_completed", 0),
|
||||
"sessions_previous": metrics.get("sessions_previous", 0),
|
||||
}
|
||||
|
||||
# Add closed issues for issue_count quests
|
||||
active_quests = get_active_quests()
|
||||
for quest in active_quests:
|
||||
if quest.quest_type.value == "issue_count":
|
||||
labels = quest.criteria.get("issue_labels", [])
|
||||
context["closed_issues"] = await _fetch_closed_issues(labels)
|
||||
break # Only need to fetch once
|
||||
|
||||
# Evaluate all quests
|
||||
rewards = auto_evaluate_all_quests(agent_id, context)
|
||||
|
||||
return rewards
|
||||
353
src/dashboard/routes/scorecards.py
Normal file
353
src/dashboard/routes/scorecards.py
Normal file
@@ -0,0 +1,353 @@
|
||||
"""Agent scorecard routes — API endpoints for generating and viewing scorecards."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Query, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from dashboard.services.scorecard_service import (
|
||||
PeriodType,
|
||||
generate_all_scorecards,
|
||||
generate_scorecard,
|
||||
get_tracked_agents,
|
||||
)
|
||||
from dashboard.templating import templates
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/scorecards", tags=["scorecards"])
|
||||
|
||||
|
||||
def _format_period_label(period_type: PeriodType) -> str:
|
||||
"""Format a period type for display."""
|
||||
return "Daily" if period_type == PeriodType.daily else "Weekly"
|
||||
|
||||
|
||||
@router.get("/api/agents")
|
||||
async def list_tracked_agents() -> dict[str, list[str]]:
|
||||
"""Return the list of tracked agent IDs.
|
||||
|
||||
Returns:
|
||||
Dict with "agents" key containing list of agent IDs
|
||||
"""
|
||||
return {"agents": get_tracked_agents()}
|
||||
|
||||
|
||||
@router.get("/api/{agent_id}")
|
||||
async def get_agent_scorecard(
|
||||
agent_id: str,
|
||||
period: str = Query(default="daily", description="Period type: 'daily' or 'weekly'"),
|
||||
) -> JSONResponse:
|
||||
"""Generate a scorecard for a specific agent.
|
||||
|
||||
Args:
|
||||
agent_id: The agent ID (e.g., 'kimi', 'claude')
|
||||
period: 'daily' or 'weekly' (default: daily)
|
||||
|
||||
Returns:
|
||||
JSON response with scorecard data
|
||||
"""
|
||||
try:
|
||||
period_type = PeriodType(period.lower())
|
||||
except ValueError:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": f"Invalid period '{period}'. Use 'daily' or 'weekly'."},
|
||||
)
|
||||
|
||||
try:
|
||||
scorecard = generate_scorecard(agent_id, period_type)
|
||||
|
||||
if scorecard is None:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content={"error": f"No scorecard found for agent '{agent_id}'"},
|
||||
)
|
||||
|
||||
return JSONResponse(content=scorecard.to_dict())
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Failed to generate scorecard for %s: %s", agent_id, exc)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"error": f"Failed to generate scorecard: {str(exc)}"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api")
|
||||
async def get_all_scorecards(
|
||||
period: str = Query(default="daily", description="Period type: 'daily' or 'weekly'"),
|
||||
) -> JSONResponse:
|
||||
"""Generate scorecards for all tracked agents.
|
||||
|
||||
Args:
|
||||
period: 'daily' or 'weekly' (default: daily)
|
||||
|
||||
Returns:
|
||||
JSON response with list of scorecard data
|
||||
"""
|
||||
try:
|
||||
period_type = PeriodType(period.lower())
|
||||
except ValueError:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": f"Invalid period '{period}'. Use 'daily' or 'weekly'."},
|
||||
)
|
||||
|
||||
try:
|
||||
scorecards = generate_all_scorecards(period_type)
|
||||
return JSONResponse(
|
||||
content={
|
||||
"period": period_type.value,
|
||||
"scorecards": [s.to_dict() for s in scorecards],
|
||||
"count": len(scorecards),
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Failed to generate scorecards: %s", exc)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"error": f"Failed to generate scorecards: {str(exc)}"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("", response_class=HTMLResponse)
|
||||
async def scorecards_page(request: Request) -> HTMLResponse:
|
||||
"""Render the scorecards dashboard page.
|
||||
|
||||
Returns:
|
||||
HTML page with scorecard interface
|
||||
"""
|
||||
agents = get_tracked_agents()
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"scorecards.html",
|
||||
{
|
||||
"agents": agents,
|
||||
"periods": ["daily", "weekly"],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/panel/{agent_id}", response_class=HTMLResponse)
|
||||
async def agent_scorecard_panel(
|
||||
request: Request,
|
||||
agent_id: str,
|
||||
period: str = Query(default="daily"),
|
||||
) -> HTMLResponse:
|
||||
"""Render an individual agent scorecard panel (for HTMX).
|
||||
|
||||
Args:
|
||||
request: The request object
|
||||
agent_id: The agent ID
|
||||
period: 'daily' or 'weekly'
|
||||
|
||||
Returns:
|
||||
HTML panel with scorecard content
|
||||
"""
|
||||
try:
|
||||
period_type = PeriodType(period.lower())
|
||||
except ValueError:
|
||||
period_type = PeriodType.daily
|
||||
|
||||
try:
|
||||
scorecard = generate_scorecard(agent_id, period_type)
|
||||
|
||||
if scorecard is None:
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<div class="card mc-panel">
|
||||
<h5 class="card-title">{agent_id.title()}</h5>
|
||||
<p class="text-muted">No activity recorded for this period.</p>
|
||||
</div>
|
||||
""",
|
||||
status_code=200,
|
||||
)
|
||||
|
||||
data = scorecard.to_dict()
|
||||
|
||||
# Build patterns HTML
|
||||
patterns_html = ""
|
||||
if data["patterns"]:
|
||||
patterns_list = "".join([f"<li>{p}</li>" for p in data["patterns"]])
|
||||
patterns_html = f"""
|
||||
<div class="mt-3">
|
||||
<h6>Patterns</h6>
|
||||
<ul class="list-unstyled text-info">
|
||||
{patterns_list}
|
||||
</ul>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Build bullets HTML
|
||||
bullets_html = "".join([f"<li>{b}</li>" for b in data["narrative_bullets"]])
|
||||
|
||||
# Build metrics summary
|
||||
metrics = data["metrics"]
|
||||
|
||||
html_content = f"""
|
||||
<div class="card mc-panel">
|
||||
<div class="card-header d-flex justify-content-between align-items-center">
|
||||
<h5 class="card-title mb-0">{agent_id.title()}</h5>
|
||||
<span class="badge bg-secondary">{_format_period_label(period_type)}</span>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<ul class="list-unstyled mb-3">
|
||||
{bullets_html}
|
||||
</ul>
|
||||
|
||||
<div class="row text-center small">
|
||||
<div class="col">
|
||||
<div class="text-muted">PRs</div>
|
||||
<div class="fw-bold">{metrics["prs_opened"]}/{metrics["prs_merged"]}</div>
|
||||
<div class="text-muted" style="font-size: 0.75rem;">
|
||||
{int(metrics["pr_merge_rate"] * 100)}% merged
|
||||
</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
<div class="text-muted">Issues</div>
|
||||
<div class="fw-bold">{metrics["issues_touched"]}</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
<div class="text-muted">Tests</div>
|
||||
<div class="fw-bold">{metrics["tests_affected"]}</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
<div class="text-muted">Tokens</div>
|
||||
<div class="fw-bold {"text-success" if metrics["token_net"] >= 0 else "text-danger"}">
|
||||
{"+" if metrics["token_net"] > 0 else ""}{metrics["token_net"]}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{patterns_html}
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Failed to render scorecard panel for %s: %s", agent_id, exc)
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<div class="card mc-panel border-danger">
|
||||
<h5 class="card-title">{agent_id.title()}</h5>
|
||||
<p class="text-danger">Error loading scorecard: {str(exc)}</p>
|
||||
</div>
|
||||
""",
|
||||
status_code=200,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/all/panels", response_class=HTMLResponse)
|
||||
async def all_scorecard_panels(
|
||||
request: Request,
|
||||
period: str = Query(default="daily"),
|
||||
) -> HTMLResponse:
|
||||
"""Render all agent scorecard panels (for HTMX).
|
||||
|
||||
Args:
|
||||
request: The request object
|
||||
period: 'daily' or 'weekly'
|
||||
|
||||
Returns:
|
||||
HTML with all scorecard panels
|
||||
"""
|
||||
try:
|
||||
period_type = PeriodType(period.lower())
|
||||
except ValueError:
|
||||
period_type = PeriodType.daily
|
||||
|
||||
try:
|
||||
scorecards = generate_all_scorecards(period_type)
|
||||
|
||||
panels: list[str] = []
|
||||
for scorecard in scorecards:
|
||||
data = scorecard.to_dict()
|
||||
|
||||
# Build patterns HTML
|
||||
patterns_html = ""
|
||||
if data["patterns"]:
|
||||
patterns_list = "".join([f"<li>{p}</li>" for p in data["patterns"]])
|
||||
patterns_html = f"""
|
||||
<div class="mt-3">
|
||||
<h6>Patterns</h6>
|
||||
<ul class="list-unstyled text-info">
|
||||
{patterns_list}
|
||||
</ul>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Build bullets HTML
|
||||
bullets_html = "".join([f"<li>{b}</li>" for b in data["narrative_bullets"]])
|
||||
metrics = data["metrics"]
|
||||
|
||||
panel_html = f"""
|
||||
<div class="col-md-6 col-lg-4 mb-3">
|
||||
<div class="card mc-panel">
|
||||
<div class="card-header d-flex justify-content-between align-items-center">
|
||||
<h5 class="card-title mb-0">{scorecard.agent_id.title()}</h5>
|
||||
<span class="badge bg-secondary">{_format_period_label(period_type)}</span>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<ul class="list-unstyled mb-3">
|
||||
{bullets_html}
|
||||
</ul>
|
||||
|
||||
<div class="row text-center small">
|
||||
<div class="col">
|
||||
<div class="text-muted">PRs</div>
|
||||
<div class="fw-bold">{metrics["prs_opened"]}/{metrics["prs_merged"]}</div>
|
||||
<div class="text-muted" style="font-size: 0.75rem;">
|
||||
{int(metrics["pr_merge_rate"] * 100)}% merged
|
||||
</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
<div class="text-muted">Issues</div>
|
||||
<div class="fw-bold">{metrics["issues_touched"]}</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
<div class="text-muted">Tests</div>
|
||||
<div class="fw-bold">{metrics["tests_affected"]}</div>
|
||||
</div>
|
||||
<div class="col">
|
||||
<div class="text-muted">Tokens</div>
|
||||
<div class="fw-bold {"text-success" if metrics["token_net"] >= 0 else "text-danger"}">
|
||||
{"+" if metrics["token_net"] > 0 else ""}{metrics["token_net"]}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{patterns_html}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
panels.append(panel_html)
|
||||
|
||||
html_content = f"""
|
||||
<div class="row">
|
||||
{"".join(panels)}
|
||||
</div>
|
||||
<div class="text-muted small mt-2">
|
||||
Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
|
||||
</div>
|
||||
"""
|
||||
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Failed to render all scorecard panels: %s", exc)
|
||||
return HTMLResponse(
|
||||
content=f"""
|
||||
<div class="alert alert-danger">
|
||||
Error loading scorecards: {str(exc)}
|
||||
</div>
|
||||
""",
|
||||
status_code=200,
|
||||
)
|
||||
74
src/dashboard/routes/sovereignty_metrics.py
Normal file
74
src/dashboard/routes/sovereignty_metrics.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""Sovereignty metrics dashboard routes.
|
||||
|
||||
Provides API endpoints and HTMX partials for tracking research
|
||||
sovereignty progress against graduation targets.
|
||||
|
||||
Refs: #981
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from config import settings
|
||||
from dashboard.templating import templates
|
||||
from infrastructure.sovereignty_metrics import (
|
||||
GRADUATION_TARGETS,
|
||||
get_sovereignty_store,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/sovereignty", tags=["sovereignty"])
|
||||
|
||||
|
||||
@router.get("/metrics")
|
||||
async def sovereignty_metrics_api() -> dict[str, Any]:
|
||||
"""JSON API: full sovereignty metrics summary with trends."""
|
||||
store = get_sovereignty_store()
|
||||
summary = store.get_summary()
|
||||
alerts = store.get_alerts(unacknowledged_only=True)
|
||||
return {
|
||||
"metrics": summary,
|
||||
"alerts": alerts,
|
||||
"targets": GRADUATION_TARGETS,
|
||||
"cost_threshold": settings.sovereignty_api_cost_alert_threshold,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/metrics/panel", response_class=HTMLResponse)
|
||||
async def sovereignty_metrics_panel(request: Request) -> HTMLResponse:
|
||||
"""HTMX partial: sovereignty metrics progress panel."""
|
||||
store = get_sovereignty_store()
|
||||
summary = store.get_summary()
|
||||
alerts = store.get_alerts(unacknowledged_only=True)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/sovereignty_metrics.html",
|
||||
{
|
||||
"metrics": summary,
|
||||
"alerts": alerts,
|
||||
"targets": GRADUATION_TARGETS,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/alerts")
|
||||
async def sovereignty_alerts_api() -> dict[str, Any]:
|
||||
"""JSON API: sovereignty alerts."""
|
||||
store = get_sovereignty_store()
|
||||
return {
|
||||
"alerts": store.get_alerts(unacknowledged_only=False),
|
||||
"unacknowledged": store.get_alerts(unacknowledged_only=True),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/alerts/{alert_id}/acknowledge")
|
||||
async def acknowledge_alert(alert_id: int) -> dict[str, bool]:
|
||||
"""Acknowledge a sovereignty alert."""
|
||||
store = get_sovereignty_store()
|
||||
success = store.acknowledge_alert(alert_id)
|
||||
return {"success": success}
|
||||
@@ -16,52 +16,11 @@ router = APIRouter(tags=["system"])
|
||||
|
||||
@router.get("/lightning/ledger", response_class=HTMLResponse)
|
||||
async def lightning_ledger(request: Request):
|
||||
"""Ledger and balance page."""
|
||||
# Mock data for now, as this seems to be a UI-first feature
|
||||
balance = {
|
||||
"available_sats": 1337,
|
||||
"incoming_total_sats": 2000,
|
||||
"outgoing_total_sats": 663,
|
||||
"fees_paid_sats": 5,
|
||||
"net_sats": 1337,
|
||||
"pending_incoming_sats": 0,
|
||||
"pending_outgoing_sats": 0,
|
||||
}
|
||||
"""Ledger and balance page backed by the in-memory Lightning ledger."""
|
||||
from lightning.ledger import get_balance, get_transactions
|
||||
|
||||
# Mock transactions
|
||||
from collections import namedtuple
|
||||
from enum import Enum
|
||||
|
||||
class TxType(Enum):
|
||||
incoming = "incoming"
|
||||
outgoing = "outgoing"
|
||||
|
||||
class TxStatus(Enum):
|
||||
completed = "completed"
|
||||
pending = "pending"
|
||||
|
||||
Tx = namedtuple(
|
||||
"Tx", ["tx_type", "status", "amount_sats", "payment_hash", "memo", "created_at"]
|
||||
)
|
||||
|
||||
transactions = [
|
||||
Tx(
|
||||
TxType.outgoing,
|
||||
TxStatus.completed,
|
||||
50,
|
||||
"hash1",
|
||||
"Model inference",
|
||||
"2026-03-04 10:00:00",
|
||||
),
|
||||
Tx(
|
||||
TxType.incoming,
|
||||
TxStatus.completed,
|
||||
1000,
|
||||
"hash2",
|
||||
"Manual deposit",
|
||||
"2026-03-03 15:00:00",
|
||||
),
|
||||
]
|
||||
balance = get_balance()
|
||||
transactions = get_transactions()
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
@@ -70,7 +29,7 @@ async def lightning_ledger(request: Request):
|
||||
"balance": balance,
|
||||
"transactions": transactions,
|
||||
"tx_types": ["incoming", "outgoing"],
|
||||
"tx_statuses": ["completed", "pending"],
|
||||
"tx_statuses": ["pending", "settled", "failed", "expired"],
|
||||
"filter_type": None,
|
||||
"filter_status": None,
|
||||
"stats": {},
|
||||
@@ -97,11 +56,13 @@ async def self_modify_queue(request: Request):
|
||||
|
||||
@router.get("/swarm/mission-control", response_class=HTMLResponse)
|
||||
async def mission_control(request: Request):
|
||||
"""Render the swarm mission control dashboard page."""
|
||||
return templates.TemplateResponse(request, "mission_control.html", {})
|
||||
|
||||
|
||||
@router.get("/bugs", response_class=HTMLResponse)
|
||||
async def bugs_page(request: Request):
|
||||
"""Render the bug tracking page."""
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"bugs.html",
|
||||
@@ -116,16 +77,19 @@ async def bugs_page(request: Request):
|
||||
|
||||
@router.get("/self-coding", response_class=HTMLResponse)
|
||||
async def self_coding(request: Request):
|
||||
"""Render the self-coding automation status page."""
|
||||
return templates.TemplateResponse(request, "self_coding.html", {"stats": {}})
|
||||
|
||||
|
||||
@router.get("/hands", response_class=HTMLResponse)
|
||||
async def hands_page(request: Request):
|
||||
"""Render the hands (automation executions) page."""
|
||||
return templates.TemplateResponse(request, "hands.html", {"executions": []})
|
||||
|
||||
|
||||
@router.get("/creative/ui", response_class=HTMLResponse)
|
||||
async def creative_ui(request: Request):
|
||||
"""Render the creative UI playground page."""
|
||||
return templates.TemplateResponse(request, "creative.html", {})
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import sqlite3
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from contextlib import closing, contextmanager
|
||||
from datetime import datetime
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Form, HTTPException, Request
|
||||
@@ -143,61 +143,49 @@ async def tasks_page(request: Request):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _render_task_list(request: Request, query: str, empty_msg: str) -> HTMLResponse:
|
||||
"""Fetch tasks by query and render as HTMX task-card partials."""
|
||||
with _get_db() as db:
|
||||
rows = db.execute(query).fetchall()
|
||||
parts = [
|
||||
templates.TemplateResponse(
|
||||
request, "partials/task_card.html", {"task": _TaskView(_row_to_dict(r))}
|
||||
).body.decode()
|
||||
for r in rows
|
||||
]
|
||||
if not parts:
|
||||
return HTMLResponse(f'<div class="empty-column">{empty_msg}</div>')
|
||||
return HTMLResponse("".join(parts))
|
||||
|
||||
|
||||
@router.get("/tasks/pending", response_class=HTMLResponse)
|
||||
async def tasks_pending(request: Request):
|
||||
with _get_db() as db:
|
||||
rows = db.execute(
|
||||
"SELECT * FROM tasks WHERE status='pending_approval' ORDER BY created_at DESC"
|
||||
).fetchall()
|
||||
tasks = [_TaskView(_row_to_dict(r)) for r in rows]
|
||||
parts = []
|
||||
for task in tasks:
|
||||
parts.append(
|
||||
templates.TemplateResponse(
|
||||
request, "partials/task_card.html", {"task": task}
|
||||
).body.decode()
|
||||
)
|
||||
if not parts:
|
||||
return HTMLResponse('<div class="empty-column">No pending tasks</div>')
|
||||
return HTMLResponse("".join(parts))
|
||||
"""Return HTMX partial for pending approval tasks."""
|
||||
return _render_task_list(
|
||||
request,
|
||||
"SELECT * FROM tasks WHERE status='pending_approval' ORDER BY created_at DESC",
|
||||
"No pending tasks",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tasks/active", response_class=HTMLResponse)
|
||||
async def tasks_active(request: Request):
|
||||
with _get_db() as db:
|
||||
rows = db.execute(
|
||||
"SELECT * FROM tasks WHERE status IN ('approved','running','paused') ORDER BY created_at DESC"
|
||||
).fetchall()
|
||||
tasks = [_TaskView(_row_to_dict(r)) for r in rows]
|
||||
parts = []
|
||||
for task in tasks:
|
||||
parts.append(
|
||||
templates.TemplateResponse(
|
||||
request, "partials/task_card.html", {"task": task}
|
||||
).body.decode()
|
||||
)
|
||||
if not parts:
|
||||
return HTMLResponse('<div class="empty-column">No active tasks</div>')
|
||||
return HTMLResponse("".join(parts))
|
||||
"""Return HTMX partial for active (approved/running/paused) tasks."""
|
||||
return _render_task_list(
|
||||
request,
|
||||
"SELECT * FROM tasks WHERE status IN ('approved','running','paused') ORDER BY created_at DESC",
|
||||
"No active tasks",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tasks/completed", response_class=HTMLResponse)
|
||||
async def tasks_completed(request: Request):
|
||||
with _get_db() as db:
|
||||
rows = db.execute(
|
||||
"SELECT * FROM tasks WHERE status IN ('completed','vetoed','failed') ORDER BY completed_at DESC LIMIT 50"
|
||||
).fetchall()
|
||||
tasks = [_TaskView(_row_to_dict(r)) for r in rows]
|
||||
parts = []
|
||||
for task in tasks:
|
||||
parts.append(
|
||||
templates.TemplateResponse(
|
||||
request, "partials/task_card.html", {"task": task}
|
||||
).body.decode()
|
||||
)
|
||||
if not parts:
|
||||
return HTMLResponse('<div class="empty-column">No completed tasks yet</div>')
|
||||
return HTMLResponse("".join(parts))
|
||||
"""Return HTMX partial for completed/vetoed/failed tasks (last 50)."""
|
||||
return _render_task_list(
|
||||
request,
|
||||
"SELECT * FROM tasks WHERE status IN ('completed','vetoed','failed') ORDER BY completed_at DESC LIMIT 50",
|
||||
"No completed tasks yet",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -219,7 +207,7 @@ async def create_task_form(
|
||||
raise HTTPException(status_code=400, detail="Task title cannot be empty")
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
now = datetime.utcnow().isoformat()
|
||||
now = datetime.now(UTC).isoformat()
|
||||
priority = priority if priority in VALID_PRIORITIES else "normal"
|
||||
|
||||
with _get_db() as db:
|
||||
@@ -241,26 +229,31 @@ async def create_task_form(
|
||||
|
||||
@router.post("/tasks/{task_id}/approve", response_class=HTMLResponse)
|
||||
async def approve_task(request: Request, task_id: str):
|
||||
"""Approve a pending task and move it to active queue."""
|
||||
return await _set_status(request, task_id, "approved")
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/veto", response_class=HTMLResponse)
|
||||
async def veto_task(request: Request, task_id: str):
|
||||
"""Veto a task, marking it as rejected."""
|
||||
return await _set_status(request, task_id, "vetoed")
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/pause", response_class=HTMLResponse)
|
||||
async def pause_task(request: Request, task_id: str):
|
||||
"""Pause a running or approved task."""
|
||||
return await _set_status(request, task_id, "paused")
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/cancel", response_class=HTMLResponse)
|
||||
async def cancel_task(request: Request, task_id: str):
|
||||
"""Cancel a task (marks as vetoed)."""
|
||||
return await _set_status(request, task_id, "vetoed")
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/retry", response_class=HTMLResponse)
|
||||
async def retry_task(request: Request, task_id: str):
|
||||
"""Retry a failed/vetoed task by moving it back to approved."""
|
||||
return await _set_status(request, task_id, "approved")
|
||||
|
||||
|
||||
@@ -271,6 +264,7 @@ async def modify_task(
|
||||
title: str = Form(...),
|
||||
description: str = Form(""),
|
||||
):
|
||||
"""Update task title and description."""
|
||||
with _get_db() as db:
|
||||
db.execute(
|
||||
"UPDATE tasks SET title=?, description=? WHERE id=?",
|
||||
@@ -287,7 +281,7 @@ async def modify_task(
|
||||
async def _set_status(request: Request, task_id: str, new_status: str):
|
||||
"""Helper to update status and return refreshed task card."""
|
||||
completed_at = (
|
||||
datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
|
||||
datetime.now(UTC).isoformat() if new_status in ("completed", "vetoed", "failed") else None
|
||||
)
|
||||
with _get_db() as db:
|
||||
db.execute(
|
||||
@@ -316,7 +310,7 @@ async def api_create_task(request: Request):
|
||||
raise HTTPException(422, "title is required")
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
now = datetime.utcnow().isoformat()
|
||||
now = datetime.now(UTC).isoformat()
|
||||
priority = body.get("priority", "normal")
|
||||
if priority not in VALID_PRIORITIES:
|
||||
priority = "normal"
|
||||
@@ -358,7 +352,7 @@ async def api_update_status(task_id: str, request: Request):
|
||||
raise HTTPException(422, f"Invalid status. Must be one of: {VALID_STATUSES}")
|
||||
|
||||
completed_at = (
|
||||
datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
|
||||
datetime.now(UTC).isoformat() if new_status in ("completed", "vetoed", "failed") else None
|
||||
)
|
||||
with _get_db() as db:
|
||||
db.execute(
|
||||
|
||||
@@ -40,9 +40,9 @@ async def tools_page(request: Request):
|
||||
total_calls = 0
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"tools.html",
|
||||
{
|
||||
"request": request,
|
||||
"available_tools": available_tools,
|
||||
"agent_tools": agent_tools,
|
||||
"total_calls": total_calls,
|
||||
|
||||
108
src/dashboard/routes/tower.py
Normal file
108
src/dashboard/routes/tower.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""Tower dashboard — real-time Spark visualization via WebSocket.
|
||||
|
||||
GET /tower — HTML Tower dashboard (Thinking / Predicting / Advising)
|
||||
WS /tower/ws — WebSocket stream of Spark engine state updates
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, Request, WebSocket
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from dashboard.templating import templates
|
||||
from spark.engine import spark_engine
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/tower", tags=["tower"])
|
||||
|
||||
_PUSH_INTERVAL = 5 # seconds between state broadcasts
|
||||
|
||||
|
||||
def _spark_snapshot() -> dict:
|
||||
"""Build a JSON-serialisable snapshot of Spark state."""
|
||||
status = spark_engine.status()
|
||||
|
||||
timeline = spark_engine.get_timeline(limit=10)
|
||||
events = []
|
||||
for ev in timeline:
|
||||
entry = {
|
||||
"event_type": ev.event_type,
|
||||
"description": ev.description,
|
||||
"importance": ev.importance,
|
||||
"created_at": ev.created_at,
|
||||
}
|
||||
if ev.agent_id:
|
||||
entry["agent_id"] = ev.agent_id[:8]
|
||||
if ev.task_id:
|
||||
entry["task_id"] = ev.task_id[:8]
|
||||
try:
|
||||
entry["data"] = json.loads(ev.data)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
entry["data"] = {}
|
||||
events.append(entry)
|
||||
|
||||
predictions = spark_engine.get_predictions(limit=5)
|
||||
preds = []
|
||||
for p in predictions:
|
||||
pred = {
|
||||
"task_id": p.task_id[:8] if p.task_id else "?",
|
||||
"accuracy": p.accuracy,
|
||||
"evaluated": p.evaluated_at is not None,
|
||||
"created_at": p.created_at,
|
||||
}
|
||||
try:
|
||||
pred["predicted"] = json.loads(p.predicted_value)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pred["predicted"] = {}
|
||||
preds.append(pred)
|
||||
|
||||
advisories = spark_engine.get_advisories()
|
||||
advs = [
|
||||
{
|
||||
"category": a.category,
|
||||
"priority": a.priority,
|
||||
"title": a.title,
|
||||
"detail": a.detail,
|
||||
"suggested_action": a.suggested_action,
|
||||
}
|
||||
for a in advisories
|
||||
]
|
||||
|
||||
return {
|
||||
"type": "spark_state",
|
||||
"status": status,
|
||||
"events": events,
|
||||
"predictions": preds,
|
||||
"advisories": advs,
|
||||
}
|
||||
|
||||
|
||||
@router.get("", response_class=HTMLResponse)
|
||||
async def tower_ui(request: Request):
|
||||
"""Render the Tower dashboard page."""
|
||||
snapshot = _spark_snapshot()
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"tower.html",
|
||||
{"snapshot": snapshot},
|
||||
)
|
||||
|
||||
|
||||
@router.websocket("/ws")
|
||||
async def tower_ws(websocket: WebSocket) -> None:
|
||||
"""Stream Spark state snapshots to the Tower dashboard."""
|
||||
await websocket.accept()
|
||||
logger.info("Tower WS connected")
|
||||
|
||||
try:
|
||||
# Send initial snapshot
|
||||
await websocket.send_text(json.dumps(_spark_snapshot()))
|
||||
|
||||
while True:
|
||||
await asyncio.sleep(_PUSH_INTERVAL)
|
||||
await websocket.send_text(json.dumps(_spark_snapshot()))
|
||||
except Exception:
|
||||
logger.debug("Tower WS disconnected")
|
||||
@@ -59,6 +59,7 @@ async def tts_speak(text: str = Form(...)):
|
||||
voice_tts.speak(text)
|
||||
return {"spoken": True, "text": text}
|
||||
except Exception as exc:
|
||||
logger.exception("TTS speak failed")
|
||||
return {"spoken": False, "reason": str(exc)}
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import sqlite3
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from contextlib import closing, contextmanager
|
||||
from datetime import datetime
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Form, HTTPException, Request
|
||||
@@ -144,7 +144,7 @@ async def submit_work_order(
|
||||
related_files: str = Form(""),
|
||||
):
|
||||
wo_id = str(uuid.uuid4())
|
||||
now = datetime.utcnow().isoformat()
|
||||
now = datetime.now(UTC).isoformat()
|
||||
priority = priority if priority in PRIORITIES else "medium"
|
||||
category = category if category in CATEGORIES else "suggestion"
|
||||
|
||||
@@ -211,7 +211,7 @@ async def active_partial(request: Request):
|
||||
|
||||
async def _update_status(request: Request, wo_id: str, new_status: str, **extra):
|
||||
completed_at = (
|
||||
datetime.utcnow().isoformat() if new_status in ("completed", "rejected") else None
|
||||
datetime.now(UTC).isoformat() if new_status in ("completed", "rejected") else None
|
||||
)
|
||||
with _get_db() as db:
|
||||
sets = ["status=?", "completed_at=COALESCE(?, completed_at)"]
|
||||
|
||||
@@ -17,16 +17,221 @@ or missing.
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
from collections import deque
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, WebSocket
|
||||
import yaml
|
||||
from fastapi import APIRouter, Request, WebSocket
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from config import settings
|
||||
from infrastructure.presence import produce_bark, serialize_presence
|
||||
from timmy.memory_system import search_memories
|
||||
from timmy.workshop_state import PRESENCE_FILE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/world", tags=["world"])
|
||||
matrix_router = APIRouter(prefix="/api/matrix", tags=["matrix"])
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Bark Endpoint — HTTP fallback for bark messages
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Rate limiting: 1 request per 3 seconds per visitor_id
|
||||
_BARK_RATE_LIMIT_SECONDS = 3
|
||||
_bark_last_request: dict[str, float] = {}
|
||||
|
||||
|
||||
class BarkRequest(BaseModel):
|
||||
"""Request body for POST /api/matrix/bark."""
|
||||
|
||||
text: str
|
||||
visitor_id: str
|
||||
|
||||
|
||||
@matrix_router.post("/bark")
|
||||
async def post_matrix_bark(request: BarkRequest) -> JSONResponse:
|
||||
"""Generate a bark response for a visitor message.
|
||||
|
||||
HTTP fallback for when WebSocket isn't available. The Matrix frontend
|
||||
can POST a message and get Timmy's bark response back as JSON.
|
||||
|
||||
Rate limited to 1 request per 3 seconds per visitor_id.
|
||||
|
||||
Request body:
|
||||
- text: The visitor's message text
|
||||
- visitor_id: Unique identifier for the visitor (used for rate limiting)
|
||||
|
||||
Returns:
|
||||
- 200: Bark message in produce_bark() format
|
||||
- 429: Rate limit exceeded (try again later)
|
||||
- 422: Invalid request (missing/invalid fields)
|
||||
"""
|
||||
# Validate inputs
|
||||
text = request.text.strip() if request.text else ""
|
||||
visitor_id = request.visitor_id.strip() if request.visitor_id else ""
|
||||
|
||||
if not text:
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={"error": "text is required"},
|
||||
)
|
||||
|
||||
if not visitor_id:
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={"error": "visitor_id is required"},
|
||||
)
|
||||
|
||||
# Rate limiting check
|
||||
now = time.time()
|
||||
last_request = _bark_last_request.get(visitor_id, 0)
|
||||
time_since_last = now - last_request
|
||||
|
||||
if time_since_last < _BARK_RATE_LIMIT_SECONDS:
|
||||
retry_after = _BARK_RATE_LIMIT_SECONDS - time_since_last
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
content={"error": "Rate limit exceeded. Try again later."},
|
||||
headers={"Retry-After": str(int(retry_after) + 1)},
|
||||
)
|
||||
|
||||
# Record this request
|
||||
_bark_last_request[visitor_id] = now
|
||||
|
||||
# Generate bark response
|
||||
try:
|
||||
reply = await _generate_bark(text)
|
||||
except Exception as exc:
|
||||
logger.warning("Bark generation failed: %s", exc)
|
||||
reply = "Hmm, my thoughts are a bit tangled right now."
|
||||
|
||||
# Build bark response using produce_bark format
|
||||
bark = produce_bark(agent_id="timmy", text=reply, style="speech")
|
||||
|
||||
return JSONResponse(
|
||||
content=bark,
|
||||
headers={"Cache-Control": "no-cache, no-store"},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Agent Registry — serves agents to the Matrix visualization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Agent color mapping — consistent with Matrix visual identity
|
||||
_AGENT_COLORS: dict[str, str] = {
|
||||
"timmy": "#FFD700", # Gold
|
||||
"orchestrator": "#FFD700", # Gold
|
||||
"perplexity": "#3B82F6", # Blue
|
||||
"replit": "#F97316", # Orange
|
||||
"kimi": "#06B6D4", # Cyan
|
||||
"claude": "#A855F7", # Purple
|
||||
"researcher": "#10B981", # Emerald
|
||||
"coder": "#EF4444", # Red
|
||||
"writer": "#EC4899", # Pink
|
||||
"memory": "#8B5CF6", # Violet
|
||||
"experimenter": "#14B8A6", # Teal
|
||||
"forge": "#EF4444", # Red (coder alias)
|
||||
"seer": "#10B981", # Emerald (researcher alias)
|
||||
"quill": "#EC4899", # Pink (writer alias)
|
||||
"echo": "#8B5CF6", # Violet (memory alias)
|
||||
"lab": "#14B8A6", # Teal (experimenter alias)
|
||||
}
|
||||
|
||||
# Agent shape mapping for 3D visualization
|
||||
_AGENT_SHAPES: dict[str, str] = {
|
||||
"timmy": "sphere",
|
||||
"orchestrator": "sphere",
|
||||
"perplexity": "cube",
|
||||
"replit": "cylinder",
|
||||
"kimi": "dodecahedron",
|
||||
"claude": "octahedron",
|
||||
"researcher": "icosahedron",
|
||||
"coder": "cube",
|
||||
"writer": "cone",
|
||||
"memory": "torus",
|
||||
"experimenter": "tetrahedron",
|
||||
"forge": "cube",
|
||||
"seer": "icosahedron",
|
||||
"quill": "cone",
|
||||
"echo": "torus",
|
||||
"lab": "tetrahedron",
|
||||
}
|
||||
|
||||
# Default fallback values
|
||||
_DEFAULT_COLOR = "#9CA3AF" # Gray
|
||||
_DEFAULT_SHAPE = "sphere"
|
||||
_DEFAULT_STATUS = "available"
|
||||
|
||||
|
||||
def _get_agent_color(agent_id: str) -> str:
|
||||
"""Get the Matrix color for an agent."""
|
||||
return _AGENT_COLORS.get(agent_id.lower(), _DEFAULT_COLOR)
|
||||
|
||||
|
||||
def _get_agent_shape(agent_id: str) -> str:
|
||||
"""Get the Matrix shape for an agent."""
|
||||
return _AGENT_SHAPES.get(agent_id.lower(), _DEFAULT_SHAPE)
|
||||
|
||||
|
||||
def _compute_circular_positions(count: int, radius: float = 3.0) -> list[dict[str, float]]:
|
||||
"""Compute circular positions for agents in the Matrix.
|
||||
|
||||
Agents are arranged in a circle on the XZ plane at y=0.
|
||||
"""
|
||||
positions = []
|
||||
for i in range(count):
|
||||
angle = (2 * math.pi * i) / count
|
||||
x = radius * math.cos(angle)
|
||||
z = radius * math.sin(angle)
|
||||
positions.append({"x": round(x, 2), "y": 0.0, "z": round(z, 2)})
|
||||
return positions
|
||||
|
||||
|
||||
def _build_matrix_agents_response() -> list[dict[str, Any]]:
|
||||
"""Build the Matrix agent registry response.
|
||||
|
||||
Reads from agents.yaml and returns agents with Matrix-compatible
|
||||
formatting including colors, shapes, and positions.
|
||||
"""
|
||||
try:
|
||||
from timmy.agents.loader import list_agents
|
||||
|
||||
agents = list_agents()
|
||||
if not agents:
|
||||
return []
|
||||
|
||||
positions = _compute_circular_positions(len(agents))
|
||||
|
||||
result = []
|
||||
for i, agent in enumerate(agents):
|
||||
agent_id = agent.get("id", "")
|
||||
result.append(
|
||||
{
|
||||
"id": agent_id,
|
||||
"display_name": agent.get("name", agent_id.title()),
|
||||
"role": agent.get("role", "general"),
|
||||
"color": _get_agent_color(agent_id),
|
||||
"position": positions[i],
|
||||
"shape": _get_agent_shape(agent_id),
|
||||
"status": agent.get("status", _DEFAULT_STATUS),
|
||||
}
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load agents for Matrix: %s", exc)
|
||||
return []
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/world", tags=["world"])
|
||||
@@ -149,21 +354,7 @@ def _read_presence_file() -> dict | None:
|
||||
|
||||
def _build_world_state(presence: dict) -> dict:
|
||||
"""Transform presence dict into the world/state API response."""
|
||||
return {
|
||||
"timmyState": {
|
||||
"mood": presence.get("mood", "calm"),
|
||||
"activity": presence.get("current_focus", "idle"),
|
||||
"energy": presence.get("energy", 0.5),
|
||||
"confidence": presence.get("confidence", 0.7),
|
||||
},
|
||||
"familiar": presence.get("familiar"),
|
||||
"activeThreads": presence.get("active_threads", []),
|
||||
"recentEvents": presence.get("recent_events", []),
|
||||
"concerns": presence.get("concerns", []),
|
||||
"visitorPresent": False,
|
||||
"updatedAt": presence.get("liveness", datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")),
|
||||
"version": presence.get("version", 1),
|
||||
}
|
||||
return serialize_presence(presence)
|
||||
|
||||
|
||||
def _get_current_state() -> dict:
|
||||
@@ -224,6 +415,50 @@ async def _heartbeat(websocket: WebSocket) -> None:
|
||||
logger.debug("Heartbeat stopped — connection gone")
|
||||
|
||||
|
||||
async def _authenticate_ws(websocket: WebSocket) -> bool:
|
||||
"""Authenticate WebSocket connection using matrix_ws_token.
|
||||
|
||||
Checks for token in query param ?token= first. If no query param,
|
||||
accepts the connection and waits for first message with
|
||||
{"type": "auth", "token": "..."}.
|
||||
|
||||
Returns True if authenticated (or if auth is disabled).
|
||||
Returns False and closes connection with code 4001 if invalid.
|
||||
"""
|
||||
token_setting = settings.matrix_ws_token
|
||||
|
||||
# Auth disabled in dev mode (empty/unset token)
|
||||
if not token_setting:
|
||||
return True
|
||||
|
||||
# Check query param first (can validate before accept)
|
||||
query_token = websocket.query_params.get("token", "")
|
||||
if query_token:
|
||||
if query_token == token_setting:
|
||||
return True
|
||||
# Invalid token in query param - we need to accept to close properly
|
||||
await websocket.accept()
|
||||
await websocket.close(code=4001, reason="Invalid token")
|
||||
return False
|
||||
|
||||
# No query token - accept and wait for auth message
|
||||
await websocket.accept()
|
||||
|
||||
# Wait for auth message as first message
|
||||
try:
|
||||
raw = await websocket.receive_text()
|
||||
data = json.loads(raw)
|
||||
if data.get("type") == "auth" and data.get("token") == token_setting:
|
||||
return True
|
||||
# Invalid auth message
|
||||
await websocket.close(code=4001, reason="Invalid token")
|
||||
return False
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Non-JSON first message without valid token
|
||||
await websocket.close(code=4001, reason="Authentication required")
|
||||
return False
|
||||
|
||||
|
||||
@router.websocket("/ws")
|
||||
async def world_ws(websocket: WebSocket) -> None:
|
||||
"""Accept a Workshop client and keep it alive for state broadcasts.
|
||||
@@ -232,8 +467,28 @@ async def world_ws(websocket: WebSocket) -> None:
|
||||
client never starts from a blank slate. Incoming frames are parsed
|
||||
as JSON — ``visitor_message`` triggers a bark response. A background
|
||||
heartbeat ping runs every 15 s to detect dead connections early.
|
||||
|
||||
Authentication:
|
||||
- If matrix_ws_token is configured, clients must provide it via
|
||||
?token= query param or in the first message as
|
||||
{"type": "auth", "token": "..."}.
|
||||
- Invalid token results in close code 4001.
|
||||
- Valid token receives a connection_ack message.
|
||||
"""
|
||||
await websocket.accept()
|
||||
# Authenticate (may accept connection internally)
|
||||
is_authed = await _authenticate_ws(websocket)
|
||||
if not is_authed:
|
||||
logger.info("World WS connection rejected — invalid token")
|
||||
return
|
||||
|
||||
# Auth passed - accept if not already accepted
|
||||
if websocket.client_state.name != "CONNECTED":
|
||||
await websocket.accept()
|
||||
|
||||
# Send connection_ack if auth was required
|
||||
if settings.matrix_ws_token:
|
||||
await websocket.send_text(json.dumps({"type": "connection_ack"}))
|
||||
|
||||
_ws_clients.append(websocket)
|
||||
logger.info("World WS connected — %d clients", len(_ws_clients))
|
||||
|
||||
@@ -383,3 +638,428 @@ async def _generate_bark(visitor_text: str) -> str:
|
||||
except Exception as exc:
|
||||
logger.warning("Bark generation failed: %s", exc)
|
||||
return "Hmm, my thoughts are a bit tangled right now."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Configuration Endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Default Matrix configuration (fallback when matrix.yaml is missing/corrupt)
|
||||
_DEFAULT_MATRIX_CONFIG: dict[str, Any] = {
|
||||
"lighting": {
|
||||
"ambient_color": "#1a1a2e",
|
||||
"ambient_intensity": 0.4,
|
||||
"point_lights": [
|
||||
{"color": "#FFD700", "intensity": 1.2, "position": {"x": 0, "y": 5, "z": 0}},
|
||||
{"color": "#3B82F6", "intensity": 0.8, "position": {"x": -5, "y": 3, "z": -5}},
|
||||
{"color": "#A855F7", "intensity": 0.6, "position": {"x": 5, "y": 3, "z": 5}},
|
||||
],
|
||||
},
|
||||
"environment": {
|
||||
"rain_enabled": False,
|
||||
"starfield_enabled": True,
|
||||
"fog_color": "#0f0f23",
|
||||
"fog_density": 0.02,
|
||||
},
|
||||
"features": {
|
||||
"chat_enabled": True,
|
||||
"visitor_avatars": True,
|
||||
"pip_familiar": True,
|
||||
"workshop_portal": True,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _load_matrix_config() -> dict[str, Any]:
|
||||
"""Load Matrix world configuration from matrix.yaml with fallback to defaults.
|
||||
|
||||
Returns a dict with sections: lighting, environment, features.
|
||||
If the config file is missing or invalid, returns sensible defaults.
|
||||
"""
|
||||
try:
|
||||
config_path = Path(settings.repo_root) / "config" / "matrix.yaml"
|
||||
if not config_path.exists():
|
||||
logger.debug("matrix.yaml not found, using default config")
|
||||
return _DEFAULT_MATRIX_CONFIG.copy()
|
||||
|
||||
raw = config_path.read_text()
|
||||
config = yaml.safe_load(raw)
|
||||
if not isinstance(config, dict):
|
||||
logger.warning("matrix.yaml invalid format, using defaults")
|
||||
return _DEFAULT_MATRIX_CONFIG.copy()
|
||||
|
||||
# Merge with defaults to ensure all required fields exist
|
||||
result: dict[str, Any] = {
|
||||
"lighting": {
|
||||
**_DEFAULT_MATRIX_CONFIG["lighting"],
|
||||
**config.get("lighting", {}),
|
||||
},
|
||||
"environment": {
|
||||
**_DEFAULT_MATRIX_CONFIG["environment"],
|
||||
**config.get("environment", {}),
|
||||
},
|
||||
"features": {
|
||||
**_DEFAULT_MATRIX_CONFIG["features"],
|
||||
**config.get("features", {}),
|
||||
},
|
||||
}
|
||||
|
||||
# Ensure point_lights is a list
|
||||
if "point_lights" in config.get("lighting", {}):
|
||||
result["lighting"]["point_lights"] = config["lighting"]["point_lights"]
|
||||
else:
|
||||
result["lighting"]["point_lights"] = _DEFAULT_MATRIX_CONFIG["lighting"]["point_lights"]
|
||||
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load matrix config: %s, using defaults", exc)
|
||||
return _DEFAULT_MATRIX_CONFIG.copy()
|
||||
|
||||
|
||||
@matrix_router.get("/config")
|
||||
async def get_matrix_config() -> JSONResponse:
|
||||
"""Return Matrix world configuration.
|
||||
|
||||
Serves lighting presets, environment settings, and feature flags
|
||||
to the Matrix frontend so it can be config-driven rather than
|
||||
hardcoded. Reads from config/matrix.yaml with sensible defaults.
|
||||
|
||||
Response structure:
|
||||
- lighting: ambient_color, ambient_intensity, point_lights[]
|
||||
- environment: rain_enabled, starfield_enabled, fog_color, fog_density
|
||||
- features: chat_enabled, visitor_avatars, pip_familiar, workshop_portal
|
||||
"""
|
||||
config = _load_matrix_config()
|
||||
return JSONResponse(
|
||||
content=config,
|
||||
headers={"Cache-Control": "no-cache, no-store"},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Agent Registry Endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@matrix_router.get("/agents")
|
||||
async def get_matrix_agents() -> JSONResponse:
|
||||
"""Return the agent registry for Matrix visualization.
|
||||
|
||||
Serves agents from agents.yaml with Matrix-compatible formatting:
|
||||
- id: agent identifier
|
||||
- display_name: human-readable name
|
||||
- role: functional role
|
||||
- color: hex color code for visualization
|
||||
- position: {x, y, z} coordinates in 3D space
|
||||
- shape: 3D shape type
|
||||
- status: availability status
|
||||
|
||||
Agents are arranged in a circular layout by default.
|
||||
Returns 200 with empty list if no agents configured.
|
||||
"""
|
||||
agents = _build_matrix_agents_response()
|
||||
return JSONResponse(
|
||||
content=agents,
|
||||
headers={"Cache-Control": "no-cache, no-store"},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Thoughts Endpoint — Timmy's recent thought stream for Matrix display
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_THOUGHT_LIMIT = 50 # Maximum thoughts allowed per request
|
||||
_DEFAULT_THOUGHT_LIMIT = 10 # Default number of thoughts to return
|
||||
_MAX_THOUGHT_TEXT_LEN = 500 # Max characters for thought text
|
||||
|
||||
|
||||
def _build_matrix_thoughts_response(limit: int = _DEFAULT_THOUGHT_LIMIT) -> list[dict[str, Any]]:
|
||||
"""Build the Matrix thoughts response from the thinking engine.
|
||||
|
||||
Returns recent thoughts formatted for Matrix display:
|
||||
- id: thought UUID
|
||||
- text: thought content (truncated to 500 chars)
|
||||
- created_at: ISO-8601 timestamp
|
||||
- chain_id: parent thought ID (or null if root thought)
|
||||
|
||||
Returns empty list if thinking engine is disabled or fails.
|
||||
"""
|
||||
try:
|
||||
from timmy.thinking import thinking_engine
|
||||
|
||||
thoughts = thinking_engine.get_recent_thoughts(limit=limit)
|
||||
return [
|
||||
{
|
||||
"id": t.id,
|
||||
"text": t.content[:_MAX_THOUGHT_TEXT_LEN],
|
||||
"created_at": t.created_at,
|
||||
"chain_id": t.parent_id,
|
||||
}
|
||||
for t in thoughts
|
||||
]
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load thoughts for Matrix: %s", exc)
|
||||
return []
|
||||
|
||||
|
||||
@matrix_router.get("/thoughts")
|
||||
async def get_matrix_thoughts(limit: int = _DEFAULT_THOUGHT_LIMIT) -> JSONResponse:
|
||||
"""Return Timmy's recent thoughts formatted for Matrix display.
|
||||
|
||||
This is the REST companion to the thought WebSocket messages,
|
||||
allowing the Matrix frontend to display what Timmy is actually
|
||||
thinking about rather than canned contextual lines.
|
||||
|
||||
Query params:
|
||||
- limit: Number of thoughts to return (default 10, max 50)
|
||||
|
||||
Response: JSON array of thought objects:
|
||||
- id: thought UUID
|
||||
- text: thought content (truncated to 500 chars)
|
||||
- created_at: ISO-8601 timestamp
|
||||
- chain_id: parent thought ID (null if root thought)
|
||||
|
||||
Returns empty array if thinking engine is disabled or fails.
|
||||
"""
|
||||
# Clamp limit to valid range
|
||||
if limit < 1:
|
||||
limit = 1
|
||||
elif limit > _MAX_THOUGHT_LIMIT:
|
||||
limit = _MAX_THOUGHT_LIMIT
|
||||
|
||||
thoughts = _build_matrix_thoughts_response(limit=limit)
|
||||
return JSONResponse(
|
||||
content=thoughts,
|
||||
headers={"Cache-Control": "no-cache, no-store"},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Health Endpoint — backend capability discovery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Health check cache (5-second TTL for capability checks)
|
||||
_health_cache: dict | None = None
|
||||
_health_cache_ts: float = 0.0
|
||||
_HEALTH_CACHE_TTL = 5.0
|
||||
|
||||
|
||||
def _check_capability_thinking() -> bool:
|
||||
"""Check if thinking engine is available."""
|
||||
try:
|
||||
from timmy.thinking import thinking_engine
|
||||
|
||||
# Check if the engine has been initialized (has a db path)
|
||||
return hasattr(thinking_engine, "_db") and thinking_engine._db is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _check_capability_memory() -> bool:
|
||||
"""Check if memory system is available."""
|
||||
try:
|
||||
from timmy.memory_system import HOT_MEMORY_PATH
|
||||
|
||||
return HOT_MEMORY_PATH.exists()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _check_capability_bark() -> bool:
|
||||
"""Check if bark production is available."""
|
||||
try:
|
||||
from infrastructure.presence import produce_bark
|
||||
|
||||
return callable(produce_bark)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _check_capability_familiar() -> bool:
|
||||
"""Check if familiar (Pip) is available."""
|
||||
try:
|
||||
from timmy.familiar import pip_familiar
|
||||
|
||||
return pip_familiar is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _check_capability_lightning() -> bool:
|
||||
"""Check if Lightning payments are available."""
|
||||
# Lightning is currently disabled per health.py
|
||||
# Returns False until properly re-implemented
|
||||
return False
|
||||
|
||||
|
||||
def _build_matrix_health_response() -> dict[str, Any]:
|
||||
"""Build the Matrix health response with capability checks.
|
||||
|
||||
Performs lightweight checks (<100ms total) to determine which features
|
||||
are available. Returns 200 even if some capabilities are degraded.
|
||||
"""
|
||||
capabilities = {
|
||||
"thinking": _check_capability_thinking(),
|
||||
"memory": _check_capability_memory(),
|
||||
"bark": _check_capability_bark(),
|
||||
"familiar": _check_capability_familiar(),
|
||||
"lightning": _check_capability_lightning(),
|
||||
}
|
||||
|
||||
# Status is ok if core capabilities (thinking, memory, bark) are available
|
||||
core_caps = ["thinking", "memory", "bark"]
|
||||
core_available = all(capabilities[c] for c in core_caps)
|
||||
status = "ok" if core_available else "degraded"
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"version": "1.0.0",
|
||||
"capabilities": capabilities,
|
||||
}
|
||||
|
||||
|
||||
@matrix_router.get("/health")
|
||||
async def get_matrix_health() -> JSONResponse:
|
||||
"""Return health status and capability availability for Matrix frontend.
|
||||
|
||||
This endpoint allows the Matrix frontend to discover what backend
|
||||
capabilities are available so it can show/hide UI elements:
|
||||
- thinking: Show thought bubbles if enabled
|
||||
- memory: Show crystal ball memory search if available
|
||||
- bark: Enable visitor chat responses
|
||||
- familiar: Show Pip the familiar
|
||||
- lightning: Enable payment features
|
||||
|
||||
Response time is <100ms (no heavy checks). Returns 200 even if
|
||||
some capabilities are degraded.
|
||||
|
||||
Response:
|
||||
- status: "ok" or "degraded"
|
||||
- version: API version string
|
||||
- capabilities: dict of feature:bool
|
||||
"""
|
||||
response = _build_matrix_health_response()
|
||||
status_code = 200 # Always 200, even if degraded
|
||||
|
||||
return JSONResponse(
|
||||
content=response,
|
||||
status_code=status_code,
|
||||
headers={"Cache-Control": "no-cache, no-store"},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Matrix Memory Search Endpoint — visitors query Timmy's memory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Rate limiting: 1 search per 5 seconds per IP
|
||||
_MEMORY_SEARCH_RATE_LIMIT_SECONDS = 5
|
||||
_memory_search_last_request: dict[str, float] = {}
|
||||
_MAX_MEMORY_RESULTS = 5
|
||||
_MAX_MEMORY_TEXT_LENGTH = 200
|
||||
|
||||
|
||||
def _get_client_ip(request) -> str:
|
||||
"""Extract client IP from request, respecting X-Forwarded-For header."""
|
||||
# Check for forwarded IP (when behind proxy)
|
||||
forwarded = request.headers.get("X-Forwarded-For")
|
||||
if forwarded:
|
||||
# Take the first IP in the chain
|
||||
return forwarded.split(",")[0].strip()
|
||||
# Fall back to direct client IP
|
||||
if request.client:
|
||||
return request.client.host
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _build_matrix_memory_response(
|
||||
memories: list,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Build the Matrix memory search response.
|
||||
|
||||
Formats memory entries for Matrix display:
|
||||
- text: truncated to 200 characters
|
||||
- relevance: 0-1 score from relevance_score
|
||||
- created_at: ISO-8601 timestamp
|
||||
- context_type: the memory type
|
||||
|
||||
Results are capped at _MAX_MEMORY_RESULTS.
|
||||
"""
|
||||
results = []
|
||||
for mem in memories[:_MAX_MEMORY_RESULTS]:
|
||||
text = mem.content
|
||||
if len(text) > _MAX_MEMORY_TEXT_LENGTH:
|
||||
text = text[:_MAX_MEMORY_TEXT_LENGTH] + "..."
|
||||
|
||||
results.append(
|
||||
{
|
||||
"text": text,
|
||||
"relevance": round(mem.relevance_score or 0.0, 4),
|
||||
"created_at": mem.timestamp,
|
||||
"context_type": mem.context_type,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
@matrix_router.get("/memory/search")
|
||||
async def get_matrix_memory_search(request: Request, q: str | None = None) -> JSONResponse:
|
||||
"""Search Timmy's memory for relevant snippets.
|
||||
|
||||
Allows Matrix visitors to query Timmy's memory ("what do you remember
|
||||
about sovereignty?"). Results appear as floating crystal-ball text
|
||||
in the Workshop room.
|
||||
|
||||
Query params:
|
||||
- q: Search query text (required)
|
||||
|
||||
Response: JSON array of memory objects:
|
||||
- text: Memory content (truncated to 200 chars)
|
||||
- relevance: Similarity score 0-1
|
||||
- created_at: ISO-8601 timestamp
|
||||
- context_type: Memory type (conversation, fact, etc.)
|
||||
|
||||
Rate limited to 1 search per 5 seconds per IP.
|
||||
|
||||
Returns:
|
||||
- 200: JSON array of memory results (max 5)
|
||||
- 400: Missing or empty query parameter
|
||||
- 429: Rate limit exceeded
|
||||
"""
|
||||
# Validate query parameter
|
||||
query = q.strip() if q else ""
|
||||
if not query:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": "Query parameter 'q' is required"},
|
||||
)
|
||||
|
||||
# Rate limiting check by IP
|
||||
client_ip = _get_client_ip(request)
|
||||
now = time.time()
|
||||
last_request = _memory_search_last_request.get(client_ip, 0)
|
||||
time_since_last = now - last_request
|
||||
|
||||
if time_since_last < _MEMORY_SEARCH_RATE_LIMIT_SECONDS:
|
||||
retry_after = _MEMORY_SEARCH_RATE_LIMIT_SECONDS - time_since_last
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
content={"error": "Rate limit exceeded. Try again later."},
|
||||
headers={"Retry-After": str(int(retry_after) + 1)},
|
||||
)
|
||||
|
||||
# Record this request
|
||||
_memory_search_last_request[client_ip] = now
|
||||
|
||||
# Search memories
|
||||
try:
|
||||
memories = search_memories(query, limit=_MAX_MEMORY_RESULTS)
|
||||
results = _build_matrix_memory_response(memories)
|
||||
except Exception as exc:
|
||||
logger.warning("Memory search failed: %s", exc)
|
||||
results = []
|
||||
|
||||
return JSONResponse(
|
||||
content=results,
|
||||
headers={"Cache-Control": "no-cache, no-store"},
|
||||
)
|
||||
|
||||
17
src/dashboard/services/__init__.py
Normal file
17
src/dashboard/services/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Dashboard services for business logic."""
|
||||
|
||||
from dashboard.services.scorecard_service import (
|
||||
PeriodType,
|
||||
ScorecardSummary,
|
||||
generate_all_scorecards,
|
||||
generate_scorecard,
|
||||
get_tracked_agents,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"PeriodType",
|
||||
"ScorecardSummary",
|
||||
"generate_all_scorecards",
|
||||
"generate_scorecard",
|
||||
"get_tracked_agents",
|
||||
]
|
||||
515
src/dashboard/services/scorecard_service.py
Normal file
515
src/dashboard/services/scorecard_service.py
Normal file
@@ -0,0 +1,515 @@
|
||||
"""Agent scorecard service — track and summarize agent performance.
|
||||
|
||||
Generates daily/weekly scorecards showing:
|
||||
- Issues touched, PRs opened/merged
|
||||
- Tests affected, tokens earned/spent
|
||||
- Pattern highlights (merge rate, activity quality)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from infrastructure.events.bus import Event, get_event_bus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Bot/agent usernames to track
|
||||
TRACKED_AGENTS = frozenset({"hermes", "kimi", "manus", "claude", "gemini"})
|
||||
|
||||
|
||||
class PeriodType(StrEnum):
|
||||
daily = "daily"
|
||||
weekly = "weekly"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentMetrics:
|
||||
"""Raw metrics collected for an agent over a period."""
|
||||
|
||||
agent_id: str
|
||||
issues_touched: set[int] = field(default_factory=set)
|
||||
prs_opened: set[int] = field(default_factory=set)
|
||||
prs_merged: set[int] = field(default_factory=set)
|
||||
tests_affected: set[str] = field(default_factory=set)
|
||||
tokens_earned: int = 0
|
||||
tokens_spent: int = 0
|
||||
commits: int = 0
|
||||
comments: int = 0
|
||||
|
||||
@property
|
||||
def pr_merge_rate(self) -> float:
|
||||
"""Calculate PR merge rate (0.0 - 1.0)."""
|
||||
opened = len(self.prs_opened)
|
||||
if opened == 0:
|
||||
return 0.0
|
||||
return len(self.prs_merged) / opened
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScorecardSummary:
|
||||
"""A generated scorecard with narrative summary."""
|
||||
|
||||
agent_id: str
|
||||
period_type: PeriodType
|
||||
period_start: datetime
|
||||
period_end: datetime
|
||||
metrics: AgentMetrics
|
||||
narrative_bullets: list[str] = field(default_factory=list)
|
||||
patterns: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert scorecard to dictionary for JSON serialization."""
|
||||
return {
|
||||
"agent_id": self.agent_id,
|
||||
"period_type": self.period_type.value,
|
||||
"period_start": self.period_start.isoformat(),
|
||||
"period_end": self.period_end.isoformat(),
|
||||
"metrics": {
|
||||
"issues_touched": len(self.metrics.issues_touched),
|
||||
"prs_opened": len(self.metrics.prs_opened),
|
||||
"prs_merged": len(self.metrics.prs_merged),
|
||||
"pr_merge_rate": round(self.metrics.pr_merge_rate, 2),
|
||||
"tests_affected": len(self.tests_affected),
|
||||
"commits": self.metrics.commits,
|
||||
"comments": self.metrics.comments,
|
||||
"tokens_earned": self.metrics.tokens_earned,
|
||||
"tokens_spent": self.metrics.tokens_spent,
|
||||
"token_net": self.metrics.tokens_earned - self.metrics.tokens_spent,
|
||||
},
|
||||
"narrative_bullets": self.narrative_bullets,
|
||||
"patterns": self.patterns,
|
||||
}
|
||||
|
||||
@property
|
||||
def tests_affected(self) -> set[str]:
|
||||
"""Alias for metrics.tests_affected."""
|
||||
return self.metrics.tests_affected
|
||||
|
||||
|
||||
def _get_period_bounds(
|
||||
period_type: PeriodType, reference_date: datetime | None = None
|
||||
) -> tuple[datetime, datetime]:
|
||||
"""Calculate start and end timestamps for a period.
|
||||
|
||||
Args:
|
||||
period_type: daily or weekly
|
||||
reference_date: The date to calculate from (defaults to now)
|
||||
|
||||
Returns:
|
||||
Tuple of (period_start, period_end) in UTC
|
||||
"""
|
||||
if reference_date is None:
|
||||
reference_date = datetime.now(UTC)
|
||||
|
||||
# Normalize to start of day
|
||||
end = reference_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
if period_type == PeriodType.daily:
|
||||
start = end - timedelta(days=1)
|
||||
else: # weekly
|
||||
start = end - timedelta(days=7)
|
||||
|
||||
return start, end
|
||||
|
||||
|
||||
def _collect_events_for_period(
|
||||
start: datetime, end: datetime, agent_id: str | None = None
|
||||
) -> list[Event]:
|
||||
"""Collect events from the event bus for a time period.
|
||||
|
||||
Args:
|
||||
start: Period start time
|
||||
end: Period end time
|
||||
agent_id: Optional agent filter
|
||||
|
||||
Returns:
|
||||
List of matching events
|
||||
"""
|
||||
bus = get_event_bus()
|
||||
events: list[Event] = []
|
||||
|
||||
# Query persisted events for relevant types
|
||||
event_types = [
|
||||
"gitea.push",
|
||||
"gitea.issue.opened",
|
||||
"gitea.issue.comment",
|
||||
"gitea.pull_request",
|
||||
"agent.task.completed",
|
||||
"test.execution",
|
||||
]
|
||||
|
||||
for event_type in event_types:
|
||||
try:
|
||||
type_events = bus.replay(
|
||||
event_type=event_type,
|
||||
source=agent_id,
|
||||
limit=1000,
|
||||
)
|
||||
events.extend(type_events)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to replay events for %s: %s", event_type, exc)
|
||||
|
||||
# Filter by timestamp
|
||||
filtered = []
|
||||
for event in events:
|
||||
try:
|
||||
event_time = datetime.fromisoformat(event.timestamp.replace("Z", "+00:00"))
|
||||
if start <= event_time < end:
|
||||
filtered.append(event)
|
||||
except (ValueError, AttributeError):
|
||||
continue
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def _extract_actor_from_event(event: Event) -> str:
|
||||
"""Extract the actor/agent from an event."""
|
||||
# Try data fields first
|
||||
if "actor" in event.data:
|
||||
return event.data["actor"]
|
||||
if "agent_id" in event.data:
|
||||
return event.data["agent_id"]
|
||||
# Fall back to source
|
||||
return event.source
|
||||
|
||||
|
||||
def _is_tracked_agent(actor: str) -> bool:
|
||||
"""Check if an actor is a tracked agent."""
|
||||
return actor.lower() in TRACKED_AGENTS
|
||||
|
||||
|
||||
def _aggregate_metrics(events: list[Event]) -> dict[str, AgentMetrics]:
|
||||
"""Aggregate metrics from events grouped by agent.
|
||||
|
||||
Args:
|
||||
events: List of events to process
|
||||
|
||||
Returns:
|
||||
Dict mapping agent_id -> AgentMetrics
|
||||
"""
|
||||
metrics_by_agent: dict[str, AgentMetrics] = {}
|
||||
|
||||
for event in events:
|
||||
actor = _extract_actor_from_event(event)
|
||||
|
||||
# Skip non-agent events unless they explicitly have an agent_id
|
||||
if not _is_tracked_agent(actor) and "agent_id" not in event.data:
|
||||
continue
|
||||
|
||||
if actor not in metrics_by_agent:
|
||||
metrics_by_agent[actor] = AgentMetrics(agent_id=actor)
|
||||
|
||||
metrics = metrics_by_agent[actor]
|
||||
|
||||
# Process based on event type
|
||||
event_type = event.type
|
||||
|
||||
if event_type == "gitea.push":
|
||||
metrics.commits += event.data.get("num_commits", 1)
|
||||
|
||||
elif event_type == "gitea.issue.opened":
|
||||
issue_num = event.data.get("issue_number", 0)
|
||||
if issue_num:
|
||||
metrics.issues_touched.add(issue_num)
|
||||
|
||||
elif event_type == "gitea.issue.comment":
|
||||
metrics.comments += 1
|
||||
issue_num = event.data.get("issue_number", 0)
|
||||
if issue_num:
|
||||
metrics.issues_touched.add(issue_num)
|
||||
|
||||
elif event_type == "gitea.pull_request":
|
||||
pr_num = event.data.get("pr_number", 0)
|
||||
action = event.data.get("action", "")
|
||||
merged = event.data.get("merged", False)
|
||||
|
||||
if pr_num:
|
||||
if action == "opened":
|
||||
metrics.prs_opened.add(pr_num)
|
||||
elif action == "closed" and merged:
|
||||
metrics.prs_merged.add(pr_num)
|
||||
# Also count as touched issue for tracking
|
||||
metrics.issues_touched.add(pr_num)
|
||||
|
||||
elif event_type == "agent.task.completed":
|
||||
# Extract test files from task data
|
||||
affected = event.data.get("tests_affected", [])
|
||||
for test in affected:
|
||||
metrics.tests_affected.add(test)
|
||||
|
||||
# Token rewards from task completion
|
||||
reward = event.data.get("token_reward", 0)
|
||||
if reward:
|
||||
metrics.tokens_earned += reward
|
||||
|
||||
elif event_type == "test.execution":
|
||||
# Track test files that were executed
|
||||
test_files = event.data.get("test_files", [])
|
||||
for test in test_files:
|
||||
metrics.tests_affected.add(test)
|
||||
|
||||
return metrics_by_agent
|
||||
|
||||
|
||||
def _query_token_transactions(agent_id: str, start: datetime, end: datetime) -> tuple[int, int]:
|
||||
"""Query the lightning ledger for token transactions.
|
||||
|
||||
Args:
|
||||
agent_id: The agent to query for
|
||||
start: Period start
|
||||
end: Period end
|
||||
|
||||
Returns:
|
||||
Tuple of (tokens_earned, tokens_spent)
|
||||
"""
|
||||
try:
|
||||
from lightning.ledger import get_transactions
|
||||
|
||||
transactions = get_transactions(limit=1000)
|
||||
|
||||
earned = 0
|
||||
spent = 0
|
||||
|
||||
for tx in transactions:
|
||||
# Filter by agent if specified
|
||||
if tx.agent_id and tx.agent_id != agent_id:
|
||||
continue
|
||||
|
||||
# Filter by timestamp
|
||||
try:
|
||||
tx_time = datetime.fromisoformat(tx.created_at.replace("Z", "+00:00"))
|
||||
if not (start <= tx_time < end):
|
||||
continue
|
||||
except (ValueError, AttributeError):
|
||||
continue
|
||||
|
||||
if tx.tx_type.value == "incoming":
|
||||
earned += tx.amount_sats
|
||||
else:
|
||||
spent += tx.amount_sats
|
||||
|
||||
return earned, spent
|
||||
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to query token transactions: %s", exc)
|
||||
return 0, 0
|
||||
|
||||
|
||||
def _generate_narrative_bullets(metrics: AgentMetrics, period_type: PeriodType) -> list[str]:
|
||||
"""Generate narrative summary bullets for a scorecard.
|
||||
|
||||
Args:
|
||||
metrics: The agent's metrics
|
||||
period_type: daily or weekly
|
||||
|
||||
Returns:
|
||||
List of narrative bullet points
|
||||
"""
|
||||
bullets: list[str] = []
|
||||
period_label = "day" if period_type == PeriodType.daily else "week"
|
||||
|
||||
# Activity summary
|
||||
activities = []
|
||||
if metrics.commits:
|
||||
activities.append(f"{metrics.commits} commit{'s' if metrics.commits != 1 else ''}")
|
||||
if len(metrics.prs_opened):
|
||||
activities.append(
|
||||
f"{len(metrics.prs_opened)} PR{'s' if len(metrics.prs_opened) != 1 else ''} opened"
|
||||
)
|
||||
if len(metrics.prs_merged):
|
||||
activities.append(
|
||||
f"{len(metrics.prs_merged)} PR{'s' if len(metrics.prs_merged) != 1 else ''} merged"
|
||||
)
|
||||
if len(metrics.issues_touched):
|
||||
activities.append(
|
||||
f"{len(metrics.issues_touched)} issue{'s' if len(metrics.issues_touched) != 1 else ''} touched"
|
||||
)
|
||||
if metrics.comments:
|
||||
activities.append(f"{metrics.comments} comment{'s' if metrics.comments != 1 else ''}")
|
||||
|
||||
if activities:
|
||||
bullets.append(f"Active across {', '.join(activities)} this {period_label}.")
|
||||
|
||||
# Test activity
|
||||
if len(metrics.tests_affected):
|
||||
bullets.append(
|
||||
f"Affected {len(metrics.tests_affected)} test file{'s' if len(metrics.tests_affected) != 1 else ''}."
|
||||
)
|
||||
|
||||
# Token summary
|
||||
net_tokens = metrics.tokens_earned - metrics.tokens_spent
|
||||
if metrics.tokens_earned or metrics.tokens_spent:
|
||||
if net_tokens > 0:
|
||||
bullets.append(
|
||||
f"Net earned {net_tokens} tokens ({metrics.tokens_earned} earned, {metrics.tokens_spent} spent)."
|
||||
)
|
||||
elif net_tokens < 0:
|
||||
bullets.append(
|
||||
f"Net spent {abs(net_tokens)} tokens ({metrics.tokens_earned} earned, {metrics.tokens_spent} spent)."
|
||||
)
|
||||
else:
|
||||
bullets.append(
|
||||
f"Balanced token flow ({metrics.tokens_earned} earned, {metrics.tokens_spent} spent)."
|
||||
)
|
||||
|
||||
# Handle empty case
|
||||
if not bullets:
|
||||
bullets.append(f"No recorded activity this {period_label}.")
|
||||
|
||||
return bullets
|
||||
|
||||
|
||||
def _detect_patterns(metrics: AgentMetrics) -> list[str]:
|
||||
"""Detect interesting patterns in agent behavior.
|
||||
|
||||
Args:
|
||||
metrics: The agent's metrics
|
||||
|
||||
Returns:
|
||||
List of pattern descriptions
|
||||
"""
|
||||
patterns: list[str] = []
|
||||
|
||||
pr_opened = len(metrics.prs_opened)
|
||||
merge_rate = metrics.pr_merge_rate
|
||||
|
||||
# Merge rate patterns
|
||||
if pr_opened >= 3:
|
||||
if merge_rate >= 0.8:
|
||||
patterns.append("High merge rate with few failures — code quality focus.")
|
||||
elif merge_rate <= 0.3:
|
||||
patterns.append("Lots of noisy PRs, low merge rate — may need review support.")
|
||||
|
||||
# Activity patterns
|
||||
if metrics.commits > 10 and pr_opened == 0:
|
||||
patterns.append("High commit volume without PRs — working directly on main?")
|
||||
|
||||
if len(metrics.issues_touched) > 5 and metrics.comments == 0:
|
||||
patterns.append("Touching many issues but low comment volume — silent worker.")
|
||||
|
||||
if metrics.comments > len(metrics.issues_touched) * 2:
|
||||
patterns.append("Highly communicative — lots of discussion relative to work items.")
|
||||
|
||||
# Token patterns
|
||||
net_tokens = metrics.tokens_earned - metrics.tokens_spent
|
||||
if net_tokens > 100:
|
||||
patterns.append("Strong token accumulation — high value delivery.")
|
||||
elif net_tokens < -50:
|
||||
patterns.append("High token spend — may be in experimentation phase.")
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
def generate_scorecard(
|
||||
agent_id: str,
|
||||
period_type: PeriodType = PeriodType.daily,
|
||||
reference_date: datetime | None = None,
|
||||
) -> ScorecardSummary | None:
|
||||
"""Generate a scorecard for a single agent.
|
||||
|
||||
Args:
|
||||
agent_id: The agent to generate scorecard for
|
||||
period_type: daily or weekly
|
||||
reference_date: The date to calculate from (defaults to now)
|
||||
|
||||
Returns:
|
||||
ScorecardSummary or None if agent has no activity
|
||||
"""
|
||||
start, end = _get_period_bounds(period_type, reference_date)
|
||||
|
||||
# Collect events
|
||||
events = _collect_events_for_period(start, end, agent_id)
|
||||
|
||||
# Aggregate metrics
|
||||
all_metrics = _aggregate_metrics(events)
|
||||
|
||||
# Get metrics for this specific agent
|
||||
if agent_id not in all_metrics:
|
||||
# Create empty metrics - still generate a scorecard
|
||||
metrics = AgentMetrics(agent_id=agent_id)
|
||||
else:
|
||||
metrics = all_metrics[agent_id]
|
||||
|
||||
# Augment with token data from ledger
|
||||
tokens_earned, tokens_spent = _query_token_transactions(agent_id, start, end)
|
||||
metrics.tokens_earned = max(metrics.tokens_earned, tokens_earned)
|
||||
metrics.tokens_spent = max(metrics.tokens_spent, tokens_spent)
|
||||
|
||||
# Generate narrative and patterns
|
||||
narrative = _generate_narrative_bullets(metrics, period_type)
|
||||
patterns = _detect_patterns(metrics)
|
||||
|
||||
return ScorecardSummary(
|
||||
agent_id=agent_id,
|
||||
period_type=period_type,
|
||||
period_start=start,
|
||||
period_end=end,
|
||||
metrics=metrics,
|
||||
narrative_bullets=narrative,
|
||||
patterns=patterns,
|
||||
)
|
||||
|
||||
|
||||
def generate_all_scorecards(
|
||||
period_type: PeriodType = PeriodType.daily,
|
||||
reference_date: datetime | None = None,
|
||||
) -> list[ScorecardSummary]:
|
||||
"""Generate scorecards for all tracked agents.
|
||||
|
||||
Args:
|
||||
period_type: daily or weekly
|
||||
reference_date: The date to calculate from (defaults to now)
|
||||
|
||||
Returns:
|
||||
List of ScorecardSummary for all agents with activity
|
||||
"""
|
||||
start, end = _get_period_bounds(period_type, reference_date)
|
||||
|
||||
# Collect all events
|
||||
events = _collect_events_for_period(start, end)
|
||||
|
||||
# Aggregate metrics for all agents
|
||||
all_metrics = _aggregate_metrics(events)
|
||||
|
||||
# Include tracked agents even if no activity
|
||||
for agent_id in TRACKED_AGENTS:
|
||||
if agent_id not in all_metrics:
|
||||
all_metrics[agent_id] = AgentMetrics(agent_id=agent_id)
|
||||
|
||||
# Generate scorecards
|
||||
scorecards: list[ScorecardSummary] = []
|
||||
|
||||
for agent_id, metrics in all_metrics.items():
|
||||
# Augment with token data
|
||||
tokens_earned, tokens_spent = _query_token_transactions(agent_id, start, end)
|
||||
metrics.tokens_earned = max(metrics.tokens_earned, tokens_earned)
|
||||
metrics.tokens_spent = max(metrics.tokens_spent, tokens_spent)
|
||||
|
||||
narrative = _generate_narrative_bullets(metrics, period_type)
|
||||
patterns = _detect_patterns(metrics)
|
||||
|
||||
scorecard = ScorecardSummary(
|
||||
agent_id=agent_id,
|
||||
period_type=period_type,
|
||||
period_start=start,
|
||||
period_end=end,
|
||||
metrics=metrics,
|
||||
narrative_bullets=narrative,
|
||||
patterns=patterns,
|
||||
)
|
||||
scorecards.append(scorecard)
|
||||
|
||||
# Sort by agent_id for consistent ordering
|
||||
scorecards.sort(key=lambda s: s.agent_id)
|
||||
|
||||
return scorecards
|
||||
|
||||
|
||||
def get_tracked_agents() -> list[str]:
|
||||
"""Return the list of tracked agent IDs."""
|
||||
return sorted(TRACKED_AGENTS)
|
||||
@@ -51,6 +51,7 @@
|
||||
<a href="/thinking" class="mc-test-link mc-link-thinking">THINKING</a>
|
||||
<a href="/swarm/mission-control" class="mc-test-link">MISSION CTRL</a>
|
||||
<a href="/swarm/live" class="mc-test-link">SWARM</a>
|
||||
<a href="/scorecards" class="mc-test-link">SCORECARDS</a>
|
||||
<a href="/bugs" class="mc-test-link mc-link-bugs">BUGS</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -123,6 +124,7 @@
|
||||
<a href="/thinking" class="mc-mobile-link">THINKING</a>
|
||||
<a href="/swarm/mission-control" class="mc-mobile-link">MISSION CONTROL</a>
|
||||
<a href="/swarm/live" class="mc-mobile-link">SWARM</a>
|
||||
<a href="/scorecards" class="mc-mobile-link">SCORECARDS</a>
|
||||
<a href="/bugs" class="mc-mobile-link">BUGS</a>
|
||||
<div class="mc-mobile-section-label">INTELLIGENCE</div>
|
||||
<a href="/spark/ui" class="mc-mobile-link">SPARK</a>
|
||||
|
||||
@@ -21,6 +21,11 @@
|
||||
</div>
|
||||
{% endcall %}
|
||||
|
||||
<!-- Daily Run Metrics (HTMX polled) -->
|
||||
{% call panel("DAILY RUN", hx_get="/daily-run/panel", hx_trigger="every 60s") %}
|
||||
<div class="mc-loading-placeholder">LOADING...</div>
|
||||
{% endcall %}
|
||||
|
||||
</div>
|
||||
|
||||
<!-- Main panel — swappable via HTMX; defaults to Timmy on load -->
|
||||
|
||||
@@ -138,6 +138,54 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Spark Intelligence -->
|
||||
{% from "macros.html" import panel %}
|
||||
<div class="mc-card-spaced">
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Spark Intelligence</h2>
|
||||
<div>
|
||||
<span class="badge" id="spark-status-badge">Loading...</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-3">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="spark-events">-</div>
|
||||
<div class="stat-label">Events</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="spark-memories">-</div>
|
||||
<div class="stat-label">Memories</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="spark-predictions">-</div>
|
||||
<div class="stat-label">Predictions</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-2 mc-section-gap">
|
||||
{% call panel("SPARK TIMELINE", id="spark-timeline-panel",
|
||||
hx_get="/spark/timeline",
|
||||
hx_trigger="load, every 10s") %}
|
||||
<div class="spark-timeline-scroll">
|
||||
<p class="chat-history-placeholder">Loading timeline...</p>
|
||||
</div>
|
||||
{% endcall %}
|
||||
{% call panel("SPARK INSIGHTS", id="spark-insights-panel",
|
||||
hx_get="/spark/insights",
|
||||
hx_trigger="load, every 30s") %}
|
||||
<p class="chat-history-placeholder">Loading insights...</p>
|
||||
{% endcall %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Sovereignty Metrics -->
|
||||
{% call panel("SOVEREIGNTY METRICS", id="sovereignty-metrics-panel",
|
||||
hx_get="/sovereignty/metrics/panel",
|
||||
hx_trigger="load, every 30s") %}
|
||||
<p class="chat-history-placeholder">Loading sovereignty metrics...</p>
|
||||
{% endcall %}
|
||||
|
||||
<!-- Chat History -->
|
||||
<div class="card mc-card-spaced">
|
||||
<div class="card-header">
|
||||
@@ -428,7 +476,34 @@ async function loadGrokStats() {
|
||||
}
|
||||
}
|
||||
|
||||
// Load Spark status
|
||||
async function loadSparkStatus() {
|
||||
try {
|
||||
var response = await fetch('/spark');
|
||||
var data = await response.json();
|
||||
var st = data.status || {};
|
||||
|
||||
document.getElementById('spark-events').textContent = st.total_events || 0;
|
||||
document.getElementById('spark-memories').textContent = st.total_memories || 0;
|
||||
document.getElementById('spark-predictions').textContent = st.total_predictions || 0;
|
||||
|
||||
var badge = document.getElementById('spark-status-badge');
|
||||
if (st.total_events > 0) {
|
||||
badge.textContent = 'Active';
|
||||
badge.className = 'badge badge-success';
|
||||
} else {
|
||||
badge.textContent = 'Idle';
|
||||
badge.className = 'badge badge-warning';
|
||||
}
|
||||
} catch (error) {
|
||||
var badge = document.getElementById('spark-status-badge');
|
||||
badge.textContent = 'Offline';
|
||||
badge.className = 'badge badge-danger';
|
||||
}
|
||||
}
|
||||
|
||||
// Initial load
|
||||
loadSparkStatus();
|
||||
loadSovereignty();
|
||||
loadHealth();
|
||||
loadSwarmStats();
|
||||
@@ -442,5 +517,6 @@ setInterval(loadHealth, 10000);
|
||||
setInterval(loadSwarmStats, 5000);
|
||||
setInterval(updateHeartbeat, 5000);
|
||||
setInterval(loadGrokStats, 10000);
|
||||
setInterval(loadSparkStatus, 15000);
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
54
src/dashboard/templates/partials/daily_run_panel.html
Normal file
54
src/dashboard/templates/partials/daily_run_panel.html
Normal file
@@ -0,0 +1,54 @@
|
||||
<div class="card-header mc-panel-header">// DAILY RUN METRICS</div>
|
||||
<div class="card-body p-3">
|
||||
{% if not gitea_available %}
|
||||
<div class="mc-muted" style="font-size: 0.85rem; padding: 8px 0;">
|
||||
<span style="color: var(--amber);">⚠</span> Gitea API unavailable
|
||||
</div>
|
||||
{% else %}
|
||||
{% set m = metrics %}
|
||||
|
||||
<!-- Sessions summary -->
|
||||
<div class="dr-section" style="margin-bottom: 16px;">
|
||||
<div class="dr-row" style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px;">
|
||||
<span class="dr-label" style="font-size: 0.85rem; color: var(--text-dim);">Sessions ({{ m.lookback_days }}d)</span>
|
||||
<a href="{{ logbook_url }}" target="_blank" class="dr-link" style="font-size: 0.75rem; color: var(--green); text-decoration: none;">
|
||||
Logbook →
|
||||
</a>
|
||||
</div>
|
||||
<div class="dr-stat" style="display: flex; align-items: baseline; gap: 8px;">
|
||||
<span class="dr-value" style="font-size: 1.5rem; font-weight: 600; color: var(--text-bright);">{{ m.sessions_completed }}</span>
|
||||
<span class="dr-trend" style="font-size: 0.9rem; color: {{ m.sessions_trend_color }};">{{ m.sessions_trend }}</span>
|
||||
<span class="dr-prev" style="font-size: 0.75rem; color: var(--text-dim);">vs {{ m.sessions_previous }} prev</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Layer breakdown -->
|
||||
<div class="dr-section">
|
||||
<div class="dr-label" style="font-size: 0.85rem; color: var(--text-dim); margin-bottom: 8px;">Issues by Layer</div>
|
||||
<div class="dr-layers" style="display: flex; flex-direction: column; gap: 6px;">
|
||||
{% for layer in m.layers %}
|
||||
<div class="dr-layer-row" style="display: flex; justify-content: space-between; align-items: center;">
|
||||
<a href="{{ layer_urls[layer.name] }}" target="_blank" class="dr-layer-name" style="font-size: 0.8rem; color: var(--text); text-decoration: none; text-transform: capitalize;">
|
||||
{{ layer.name.replace('-', ' ') }}
|
||||
</a>
|
||||
<div class="dr-layer-stat" style="display: flex; align-items: center; gap: 6px;">
|
||||
<span class="dr-layer-value" style="font-size: 0.9rem; font-weight: 500; color: var(--text-bright);">{{ layer.current_count }}</span>
|
||||
<span class="dr-layer-trend" style="font-size: 0.75rem; color: {{ layer.trend_color }}; width: 18px; text-align: center;">{{ layer.trend }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Total touched -->
|
||||
<div class="dr-section" style="margin-top: 12px; padding-top: 12px; border-top: 1px solid var(--border);">
|
||||
<div class="dr-row" style="display: flex; justify-content: space-between; align-items: center;">
|
||||
<span class="dr-label" style="font-size: 0.8rem; color: var(--text-dim);">Total Issues Touched</span>
|
||||
<div class="dr-total-stat" style="display: flex; align-items: center; gap: 6px;">
|
||||
<span class="dr-total-value" style="font-size: 1rem; font-weight: 600; color: var(--text-bright);">{{ m.total_touched_current }}</span>
|
||||
<span class="dr-total-prev" style="font-size: 0.7rem; color: var(--text-dim);">/ {{ m.total_touched_previous }} prev</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
80
src/dashboard/templates/partials/quests_panel.html
Normal file
80
src/dashboard/templates/partials/quests_panel.html
Normal file
@@ -0,0 +1,80 @@
|
||||
{% from "macros.html" import panel %}
|
||||
|
||||
<div class="quests-summary mb-4">
|
||||
<div class="row">
|
||||
<div class="col-md-4">
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ total_tokens }}</div>
|
||||
<div class="stat-label">Tokens Earned</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-4">
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ completed_count }}</div>
|
||||
<div class="stat-label">Quests Completed</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-4">
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ quests|selectattr('enabled', 'equalto', true)|list|length }}</div>
|
||||
<div class="stat-label">Active Quests</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="quests-list">
|
||||
{% for quest in quests %}
|
||||
{% if quest.enabled %}
|
||||
<div class="quest-card quest-status-{{ quest.status }}">
|
||||
<div class="quest-header">
|
||||
<h5 class="quest-name">{{ quest.name }}</h5>
|
||||
<span class="quest-reward">+{{ quest.reward_tokens }} ⚡</span>
|
||||
</div>
|
||||
<p class="quest-description">{{ quest.description }}</p>
|
||||
|
||||
<div class="quest-progress">
|
||||
{% if quest.status == 'completed' %}
|
||||
<div class="progress">
|
||||
<div class="progress-bar bg-success" style="width: 100%"></div>
|
||||
</div>
|
||||
<span class="quest-status-badge completed">Completed</span>
|
||||
{% elif quest.status == 'claimed' %}
|
||||
<div class="progress">
|
||||
<div class="progress-bar bg-success" style="width: 100%"></div>
|
||||
</div>
|
||||
<span class="quest-status-badge claimed">Reward Claimed</span>
|
||||
{% elif quest.on_cooldown %}
|
||||
<div class="progress">
|
||||
<div class="progress-bar bg-secondary" style="width: 100%"></div>
|
||||
</div>
|
||||
<span class="quest-status-badge cooldown">
|
||||
Cooldown: {{ quest.cooldown_hours_remaining }}h remaining
|
||||
</span>
|
||||
{% else %}
|
||||
<div class="progress">
|
||||
<div class="progress-bar" style="width: {{ (quest.current_value / quest.target_value * 100)|int }}%"></div>
|
||||
</div>
|
||||
<span class="quest-progress-text">{{ quest.current_value }} / {{ quest.target_value }}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="quest-meta">
|
||||
<span class="quest-type">{{ quest.type }}</span>
|
||||
{% if quest.repeatable %}
|
||||
<span class="quest-repeatable">↻ Repeatable</span>
|
||||
{% endif %}
|
||||
{% if quest.completion_count > 0 %}
|
||||
<span class="quest-completions">Completed {{ quest.completion_count }} time{% if quest.completion_count != 1 %}s{% endif %}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% if not quests|selectattr('enabled', 'equalto', true)|list|length %}
|
||||
<div class="alert alert-info">
|
||||
No active quests available. Check back later or contact an administrator.
|
||||
</div>
|
||||
{% endif %}
|
||||
63
src/dashboard/templates/partials/sovereignty_metrics.html
Normal file
63
src/dashboard/templates/partials/sovereignty_metrics.html
Normal file
@@ -0,0 +1,63 @@
|
||||
{# HTMX partial: Sovereignty Metrics Progress Panel
|
||||
Loaded via hx-get="/sovereignty/metrics/panel"
|
||||
Refs: #981
|
||||
#}
|
||||
{% set phase_labels = {"pre-start": "Pre-start", "week1": "Week 1", "month1": "Month 1", "month3": "Month 3", "graduated": "Graduated"} %}
|
||||
{% set phase_colors = {"pre-start": "var(--text-dim)", "week1": "var(--red)", "month1": "var(--amber)", "month3": "var(--green)", "graduated": "var(--purple)"} %}
|
||||
|
||||
{% set metric_labels = {
|
||||
"cache_hit_rate": "Cache Hit Rate",
|
||||
"api_cost": "API Cost / Task",
|
||||
"time_to_report": "Time to Report",
|
||||
"human_involvement": "Human Involvement",
|
||||
"local_artifacts": "Local Artifacts"
|
||||
} %}
|
||||
|
||||
{% set metric_units = {
|
||||
"cache_hit_rate": "%",
|
||||
"api_cost": "$",
|
||||
"time_to_report": "min",
|
||||
"human_involvement": "%",
|
||||
"local_artifacts": ""
|
||||
} %}
|
||||
|
||||
{% if alerts %}
|
||||
<div class="sov-alerts">
|
||||
{% for alert in alerts %}
|
||||
<div class="sov-alert-item">
|
||||
<span class="sov-alert-icon">!</span>
|
||||
<span>{{ alert.message }}</span>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="grid grid-3">
|
||||
{% for key, data in metrics.items() %}
|
||||
{% set label = metric_labels.get(key, key) %}
|
||||
{% set unit = metric_units.get(key, "") %}
|
||||
{% set phase = data.phase %}
|
||||
{% set color = phase_colors.get(phase, "var(--text-dim)") %}
|
||||
<div class="stat">
|
||||
<div class="stat-value" style="color: {{ color }}">
|
||||
{% if data.current is not none %}
|
||||
{% if key == "cache_hit_rate" or key == "human_involvement" %}
|
||||
{{ "%.0f"|format(data.current * 100) }}{{ unit }}
|
||||
{% elif key == "api_cost" %}
|
||||
{{ unit }}{{ "%.2f"|format(data.current) }}
|
||||
{% elif key == "time_to_report" %}
|
||||
{{ "%.1f"|format(data.current) }}{{ unit }}
|
||||
{% else %}
|
||||
{{ data.current|int }}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
--
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="stat-label">{{ label }}</div>
|
||||
<div class="stat-label" style="font-size: 0.7rem; color: {{ color }}">
|
||||
{{ phase_labels.get(phase, phase) }}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
50
src/dashboard/templates/quests.html
Normal file
50
src/dashboard/templates/quests.html
Normal file
@@ -0,0 +1,50 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Quests — Mission Control{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
<div class="col-12">
|
||||
<h1 class="mc-title">Token Quests</h1>
|
||||
<p class="mc-subtitle">Complete quests to earn bonus tokens</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mt-4">
|
||||
<div class="col-md-8">
|
||||
<div id="quests-panel" hx-get="/quests/panel/{{ agent_id }}" hx-trigger="load, every 30s">
|
||||
<div class="mc-loading">Loading quests...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-md-4">
|
||||
<div class="card mc-panel">
|
||||
<div class="card-header">
|
||||
<h5 class="mb-0">Leaderboard</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="leaderboard" hx-get="/quests/api/leaderboard" hx-trigger="load, every 60s">
|
||||
<div class="mc-loading">Loading leaderboard...</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card mc-panel mt-4">
|
||||
<div class="card-header">
|
||||
<h5 class="mb-0">About Quests</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="mb-2">Quests are special objectives that reward tokens upon completion.</p>
|
||||
<ul class="mc-list mb-0">
|
||||
<li>Complete Daily Run sessions</li>
|
||||
<li>Close flaky-test issues</li>
|
||||
<li>Reduce P1 issue backlog</li>
|
||||
<li>Improve documentation</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
113
src/dashboard/templates/scorecards.html
Normal file
113
src/dashboard/templates/scorecards.html
Normal file
@@ -0,0 +1,113 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Agent Scorecards - Timmy Time{% endblock %}
|
||||
|
||||
{% block extra_styles %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container-fluid py-4">
|
||||
<!-- Header -->
|
||||
<div class="d-flex justify-content-between align-items-center mb-4">
|
||||
<div>
|
||||
<h1 class="h3 mb-0">AGENT SCORECARDS</h1>
|
||||
<p class="text-muted small mb-0">Track agent performance across issues, PRs, tests, and tokens</p>
|
||||
</div>
|
||||
<div class="d-flex gap-2">
|
||||
<select id="period-select" class="form-select form-select-sm" style="width: auto;">
|
||||
<option value="daily" selected>Daily</option>
|
||||
<option value="weekly">Weekly</option>
|
||||
</select>
|
||||
<button class="btn btn-sm btn-primary" onclick="refreshScorecards()">
|
||||
<span>Refresh</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scorecards Grid -->
|
||||
<div id="scorecards-container"
|
||||
hx-get="/scorecards/all/panels?period=daily"
|
||||
hx-trigger="load"
|
||||
hx-swap="innerHTML">
|
||||
<div class="text-center py-5">
|
||||
<div class="spinner-border text-secondary" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
<p class="text-muted mt-2">Loading scorecards...</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- API Reference -->
|
||||
<div class="mt-5 pt-4 border-top">
|
||||
<h5 class="text-muted">API Reference</h5>
|
||||
<div class="row g-3">
|
||||
<div class="col-md-6">
|
||||
<div class="card mc-panel">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title">List Tracked Agents</h6>
|
||||
<code>GET /scorecards/api/agents</code>
|
||||
<p class="small text-muted mt-2">Returns all tracked agent IDs</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="card mc-panel">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title">Get All Scorecards</h6>
|
||||
<code>GET /scorecards/api?period=daily|weekly</code>
|
||||
<p class="small text-muted mt-2">Returns scorecards for all agents</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="card mc-panel">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title">Get Agent Scorecard</h6>
|
||||
<code>GET /scorecards/api/{agent_id}?period=daily|weekly</code>
|
||||
<p class="small text-muted mt-2">Returns scorecard for a specific agent</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="card mc-panel">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title">HTML Panel (HTMX)</h6>
|
||||
<code>GET /scorecards/panel/{agent_id}?period=daily|weekly</code>
|
||||
<p class="small text-muted mt-2">Returns HTML panel for embedding</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Period selector change handler
|
||||
document.getElementById('period-select').addEventListener('change', function() {
|
||||
refreshScorecards();
|
||||
});
|
||||
|
||||
function refreshScorecards() {
|
||||
var period = document.getElementById('period-select').value;
|
||||
var container = document.getElementById('scorecards-container');
|
||||
|
||||
// Show loading state
|
||||
container.innerHTML = `
|
||||
<div class="text-center py-5">
|
||||
<div class="spinner-border text-secondary" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
<p class="text-muted mt-2">Loading scorecards...</p>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Trigger HTMX request
|
||||
htmx.ajax('GET', '/scorecards/all/panels?period=' + period, {
|
||||
target: '#scorecards-container',
|
||||
swap: 'innerHTML'
|
||||
});
|
||||
}
|
||||
|
||||
// Auto-refresh every 5 minutes
|
||||
setInterval(refreshScorecards, 300000);
|
||||
</script>
|
||||
{% endblock %}
|
||||
180
src/dashboard/templates/tower.html
Normal file
180
src/dashboard/templates/tower.html
Normal file
@@ -0,0 +1,180 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Timmy Time — Tower{% endblock %}
|
||||
|
||||
{% block extra_styles %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container-fluid tower-container py-3">
|
||||
|
||||
<div class="tower-header">
|
||||
<div class="tower-title">TOWER</div>
|
||||
<div class="tower-subtitle">
|
||||
Real-time Spark visualization —
|
||||
<span id="tower-conn" class="tower-conn-badge tower-conn-connecting">CONNECTING</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row g-3">
|
||||
|
||||
<!-- Left: THINKING (events) -->
|
||||
<div class="col-12 col-lg-4 d-flex flex-column gap-3">
|
||||
<div class="card mc-panel tower-phase-card">
|
||||
<div class="card-header mc-panel-header tower-phase-thinking">// THINKING</div>
|
||||
<div class="card-body p-3 tower-scroll" id="tower-events">
|
||||
<div class="tower-empty">Waiting for Spark data…</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Middle: PREDICTING (EIDOS) -->
|
||||
<div class="col-12 col-lg-4 d-flex flex-column gap-3">
|
||||
<div class="card mc-panel tower-phase-card">
|
||||
<div class="card-header mc-panel-header tower-phase-predicting">// PREDICTING</div>
|
||||
<div class="card-body p-3" id="tower-predictions">
|
||||
<div class="tower-empty">Waiting for Spark data…</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card mc-panel">
|
||||
<div class="card-header mc-panel-header">// EIDOS STATS</div>
|
||||
<div class="card-body p-3">
|
||||
<div class="tower-stat-grid" id="tower-stats">
|
||||
<div class="tower-stat"><span class="tower-stat-label">EVENTS</span><span class="tower-stat-value" id="ts-events">0</span></div>
|
||||
<div class="tower-stat"><span class="tower-stat-label">MEMORIES</span><span class="tower-stat-value" id="ts-memories">0</span></div>
|
||||
<div class="tower-stat"><span class="tower-stat-label">PREDICTIONS</span><span class="tower-stat-value" id="ts-preds">0</span></div>
|
||||
<div class="tower-stat"><span class="tower-stat-label">ACCURACY</span><span class="tower-stat-value" id="ts-accuracy">—</span></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Right: ADVISING -->
|
||||
<div class="col-12 col-lg-4 d-flex flex-column gap-3">
|
||||
<div class="card mc-panel tower-phase-card">
|
||||
<div class="card-header mc-panel-header tower-phase-advising">// ADVISING</div>
|
||||
<div class="card-body p-3 tower-scroll" id="tower-advisories">
|
||||
<div class="tower-empty">Waiting for Spark data…</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
(function() {
|
||||
var ws = null;
|
||||
var badge = document.getElementById('tower-conn');
|
||||
|
||||
function setConn(state) {
|
||||
badge.textContent = state.toUpperCase();
|
||||
badge.className = 'tower-conn-badge tower-conn-' + state;
|
||||
}
|
||||
|
||||
function esc(s) { var d = document.createElement('div'); d.textContent = s; return d.innerHTML; }
|
||||
|
||||
function renderEvents(events) {
|
||||
var el = document.getElementById('tower-events');
|
||||
if (!events || !events.length) { el.innerHTML = '<div class="tower-empty">No events captured yet.</div>'; return; }
|
||||
var html = '';
|
||||
for (var i = 0; i < events.length; i++) {
|
||||
var ev = events[i];
|
||||
var dots = ev.importance >= 0.8 ? '\u25cf\u25cf\u25cf' : ev.importance >= 0.5 ? '\u25cf\u25cf' : '\u25cf';
|
||||
html += '<div class="tower-event tower-etype-' + esc(ev.event_type) + '">'
|
||||
+ '<div class="tower-ev-head">'
|
||||
+ '<span class="tower-ev-badge">' + esc(ev.event_type.replace(/_/g, ' ').toUpperCase()) + '</span>'
|
||||
+ '<span class="tower-ev-dots">' + dots + '</span>'
|
||||
+ '</div>'
|
||||
+ '<div class="tower-ev-desc">' + esc(ev.description) + '</div>'
|
||||
+ '<div class="tower-ev-time">' + esc((ev.created_at || '').slice(0, 19)) + '</div>'
|
||||
+ '</div>';
|
||||
}
|
||||
el.innerHTML = html;
|
||||
}
|
||||
|
||||
function renderPredictions(preds) {
|
||||
var el = document.getElementById('tower-predictions');
|
||||
if (!preds || !preds.length) { el.innerHTML = '<div class="tower-empty">No predictions yet.</div>'; return; }
|
||||
var html = '';
|
||||
for (var i = 0; i < preds.length; i++) {
|
||||
var p = preds[i];
|
||||
var cls = p.evaluated ? 'tower-pred-done' : 'tower-pred-pending';
|
||||
var accTxt = p.accuracy != null ? Math.round(p.accuracy * 100) + '%' : 'PENDING';
|
||||
var accCls = p.accuracy != null ? (p.accuracy >= 0.7 ? 'text-success' : p.accuracy < 0.4 ? 'text-danger' : 'text-warning') : '';
|
||||
html += '<div class="tower-pred ' + cls + '">'
|
||||
+ '<div class="tower-pred-head">'
|
||||
+ '<span class="tower-pred-task">' + esc(p.task_id) + '</span>'
|
||||
+ '<span class="tower-pred-acc ' + accCls + '">' + accTxt + '</span>'
|
||||
+ '</div>';
|
||||
if (p.predicted) {
|
||||
var pr = p.predicted;
|
||||
html += '<div class="tower-pred-detail">';
|
||||
if (pr.likely_winner) html += '<span>Winner: ' + esc(pr.likely_winner.slice(0, 8)) + '</span> ';
|
||||
if (pr.success_probability != null) html += '<span>Success: ' + Math.round(pr.success_probability * 100) + '%</span> ';
|
||||
html += '</div>';
|
||||
}
|
||||
html += '<div class="tower-ev-time">' + esc((p.created_at || '').slice(0, 19)) + '</div>'
|
||||
+ '</div>';
|
||||
}
|
||||
el.innerHTML = html;
|
||||
}
|
||||
|
||||
function renderAdvisories(advs) {
|
||||
var el = document.getElementById('tower-advisories');
|
||||
if (!advs || !advs.length) { el.innerHTML = '<div class="tower-empty">No advisories yet.</div>'; return; }
|
||||
var html = '';
|
||||
for (var i = 0; i < advs.length; i++) {
|
||||
var a = advs[i];
|
||||
var prio = a.priority >= 0.7 ? 'high' : a.priority >= 0.4 ? 'medium' : 'low';
|
||||
html += '<div class="tower-advisory tower-adv-' + prio + '">'
|
||||
+ '<div class="tower-adv-head">'
|
||||
+ '<span class="tower-adv-cat">' + esc(a.category.replace(/_/g, ' ').toUpperCase()) + '</span>'
|
||||
+ '<span class="tower-adv-prio">' + Math.round(a.priority * 100) + '%</span>'
|
||||
+ '</div>'
|
||||
+ '<div class="tower-adv-title">' + esc(a.title) + '</div>'
|
||||
+ '<div class="tower-adv-detail">' + esc(a.detail) + '</div>'
|
||||
+ '<div class="tower-adv-action">' + esc(a.suggested_action) + '</div>'
|
||||
+ '</div>';
|
||||
}
|
||||
el.innerHTML = html;
|
||||
}
|
||||
|
||||
function renderStats(status) {
|
||||
if (!status) return;
|
||||
document.getElementById('ts-events').textContent = status.events_captured || 0;
|
||||
document.getElementById('ts-memories').textContent = status.memories_stored || 0;
|
||||
var p = status.predictions || {};
|
||||
document.getElementById('ts-preds').textContent = p.total_predictions || 0;
|
||||
var acc = p.avg_accuracy;
|
||||
var accEl = document.getElementById('ts-accuracy');
|
||||
if (acc != null) {
|
||||
accEl.textContent = Math.round(acc * 100) + '%';
|
||||
accEl.className = 'tower-stat-value ' + (acc >= 0.7 ? 'text-success' : acc < 0.4 ? 'text-danger' : 'text-warning');
|
||||
} else {
|
||||
accEl.textContent = '\u2014';
|
||||
}
|
||||
}
|
||||
|
||||
function handleMsg(data) {
|
||||
if (data.type !== 'spark_state') return;
|
||||
renderEvents(data.events);
|
||||
renderPredictions(data.predictions);
|
||||
renderAdvisories(data.advisories);
|
||||
renderStats(data.status);
|
||||
}
|
||||
|
||||
function connect() {
|
||||
var proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
ws = new WebSocket(proto + '//' + location.host + '/tower/ws');
|
||||
ws.onopen = function() { setConn('live'); };
|
||||
ws.onclose = function() { setConn('offline'); setTimeout(connect, 3000); };
|
||||
ws.onerror = function() { setConn('offline'); };
|
||||
ws.onmessage = function(e) {
|
||||
try { handleMsg(JSON.parse(e.data)); } catch(err) { console.error('Tower WS parse error', err); }
|
||||
};
|
||||
}
|
||||
|
||||
connect();
|
||||
})();
|
||||
</script>
|
||||
{% endblock %}
|
||||
264
src/infrastructure/claude_quota.py
Normal file
264
src/infrastructure/claude_quota.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
claude_quota.py — Claude Code / Claude.ai Quota Monitor
|
||||
|
||||
Drop into src/infrastructure/ in the Timmy Time Dashboard repo.
|
||||
|
||||
Provides real-time quota visibility and metabolic protocol decisions.
|
||||
|
||||
Usage:
|
||||
from infrastructure.claude_quota import QuotaMonitor
|
||||
|
||||
monitor = QuotaMonitor()
|
||||
status = monitor.check()
|
||||
print(status.five_hour_pct) # 42
|
||||
print(status.five_hour_resets_in) # "2h 15m"
|
||||
print(status.seven_day_pct) # 29
|
||||
print(status.recommended_tier) # MetabolicTier.BURST
|
||||
|
||||
# Metabolic protocol: auto-select model based on quota
|
||||
model = monitor.select_model(task_complexity="high")
|
||||
# Returns "claude-sonnet-4-6" if quota allows, else "qwen3:14b"
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from enum import StrEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetabolicTier(StrEnum):
|
||||
"""The three-tier metabolic protocol from the Timmy Time architecture."""
|
||||
|
||||
BURST = "burst" # Cloud API (Claude/Groq) — expensive, best quality
|
||||
ACTIVE = "active" # Local 14B (Qwen3-14B) — free, good quality
|
||||
RESTING = "resting" # Local 8B (Qwen3-8B) — free, fast, adequate
|
||||
|
||||
|
||||
@dataclass
|
||||
class QuotaStatus:
|
||||
"""Current Claude quota state."""
|
||||
|
||||
five_hour_utilization: float # 0.0 to 1.0
|
||||
five_hour_resets_at: str | None
|
||||
seven_day_utilization: float # 0.0 to 1.0
|
||||
seven_day_resets_at: str | None
|
||||
raw_response: dict
|
||||
fetched_at: datetime
|
||||
|
||||
@property
|
||||
def five_hour_pct(self) -> int:
|
||||
return int(self.five_hour_utilization * 100)
|
||||
|
||||
@property
|
||||
def seven_day_pct(self) -> int:
|
||||
return int(self.seven_day_utilization * 100)
|
||||
|
||||
@property
|
||||
def five_hour_resets_in(self) -> str:
|
||||
return _time_remaining(self.five_hour_resets_at)
|
||||
|
||||
@property
|
||||
def seven_day_resets_in(self) -> str:
|
||||
return _time_remaining(self.seven_day_resets_at)
|
||||
|
||||
@property
|
||||
def recommended_tier(self) -> MetabolicTier:
|
||||
"""Metabolic protocol: determine which inference tier to use."""
|
||||
# If weekly quota is critical, go full local
|
||||
if self.seven_day_utilization >= 0.80:
|
||||
return MetabolicTier.RESTING
|
||||
# If 5-hour window is critical or past half, use local
|
||||
if self.five_hour_utilization >= 0.50:
|
||||
return MetabolicTier.ACTIVE
|
||||
# Quota healthy — cloud available for high-value tasks
|
||||
return MetabolicTier.BURST
|
||||
|
||||
def summary(self) -> str:
|
||||
"""Human-readable status string."""
|
||||
return (
|
||||
f"5h: {self.five_hour_pct}% (resets {self.five_hour_resets_in}) | "
|
||||
f"7d: {self.seven_day_pct}% (resets {self.seven_day_resets_in}) | "
|
||||
f"tier: {self.recommended_tier.value}"
|
||||
)
|
||||
|
||||
|
||||
class QuotaMonitor:
|
||||
"""
|
||||
Monitors Claude Code / Claude.ai quota via the internal OAuth API.
|
||||
|
||||
The token is read from macOS Keychain where Claude Code stores it.
|
||||
Falls back gracefully if credentials aren't available (e.g., on Linux VPS).
|
||||
"""
|
||||
|
||||
API_URL = "https://api.anthropic.com/api/oauth/usage"
|
||||
KEYCHAIN_SERVICE = "Claude Code-credentials"
|
||||
USER_AGENT = "claude-code/2.0.32"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._token: str | None = None
|
||||
self._last_status: QuotaStatus | None = None
|
||||
self._cache_seconds = 30 # Don't hammer the API
|
||||
|
||||
def _get_token(self) -> str | None:
|
||||
"""Extract OAuth token from macOS Keychain."""
|
||||
if self._token:
|
||||
return self._token
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["security", "find-generic-password", "-s", self.KEYCHAIN_SERVICE, "-w"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning("Claude Code credentials not found in Keychain")
|
||||
return None
|
||||
|
||||
creds = json.loads(result.stdout.strip())
|
||||
oauth = creds.get("claudeAiOauth", creds)
|
||||
self._token = oauth.get("accessToken")
|
||||
return self._token
|
||||
|
||||
except (
|
||||
json.JSONDecodeError,
|
||||
KeyError,
|
||||
FileNotFoundError,
|
||||
subprocess.TimeoutExpired,
|
||||
) as exc:
|
||||
logger.warning("Could not read Claude Code credentials: %s", exc)
|
||||
return None
|
||||
|
||||
def check(self, force: bool = False) -> QuotaStatus | None:
|
||||
"""
|
||||
Fetch current quota status.
|
||||
|
||||
Returns None if credentials aren't available (graceful degradation).
|
||||
Caches results for 30 seconds to avoid rate limiting the quota API itself.
|
||||
"""
|
||||
# Return cached if fresh
|
||||
if not force and self._last_status:
|
||||
age = (datetime.now(UTC) - self._last_status.fetched_at).total_seconds()
|
||||
if age < self._cache_seconds:
|
||||
return self._last_status
|
||||
|
||||
token = self._get_token()
|
||||
if not token:
|
||||
return None
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
self.API_URL,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": self.USER_AGENT,
|
||||
"Authorization": f"Bearer {token}",
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
|
||||
five_hour = data.get("five_hour") or {}
|
||||
seven_day = data.get("seven_day") or {}
|
||||
|
||||
self._last_status = QuotaStatus(
|
||||
five_hour_utilization=float(five_hour.get("utilization", 0.0)),
|
||||
five_hour_resets_at=five_hour.get("resets_at"),
|
||||
seven_day_utilization=float(seven_day.get("utilization", 0.0)),
|
||||
seven_day_resets_at=seven_day.get("resets_at"),
|
||||
raw_response=data,
|
||||
fetched_at=datetime.now(UTC),
|
||||
)
|
||||
return self._last_status
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to fetch quota: %s", exc)
|
||||
return self._last_status # Return stale data if available
|
||||
|
||||
def select_model(self, task_complexity: str = "medium") -> str:
|
||||
"""
|
||||
Metabolic protocol: select the right model based on quota + task complexity.
|
||||
|
||||
Returns an Ollama model tag or "claude-sonnet-4-6" for cloud.
|
||||
|
||||
task_complexity: "low" | "medium" | "high"
|
||||
"""
|
||||
status = self.check()
|
||||
|
||||
# No quota info available — assume local only (sovereign default)
|
||||
if status is None:
|
||||
return "qwen3:14b" if task_complexity == "high" else "qwen3:8b"
|
||||
|
||||
tier = status.recommended_tier
|
||||
|
||||
if tier == MetabolicTier.BURST and task_complexity == "high":
|
||||
return "claude-sonnet-4-6" # Cloud — best quality
|
||||
elif tier == MetabolicTier.BURST and task_complexity == "medium":
|
||||
return "qwen3:14b" # Save cloud for truly hard tasks
|
||||
elif tier == MetabolicTier.ACTIVE:
|
||||
return "qwen3:14b" # Local 14B — good enough
|
||||
else: # RESTING
|
||||
return "qwen3:8b" # Local 8B — conserve everything
|
||||
|
||||
def should_use_cloud(self, task_value: str = "normal") -> bool:
|
||||
"""
|
||||
Simple yes/no: should this task use cloud API?
|
||||
|
||||
task_value: "critical" | "high" | "normal" | "routine"
|
||||
"""
|
||||
status = self.check()
|
||||
|
||||
if status is None:
|
||||
return False # No credentials = local only
|
||||
|
||||
if task_value == "critical":
|
||||
return status.seven_day_utilization < 0.95 # Almost always yes
|
||||
elif task_value == "high":
|
||||
return status.five_hour_utilization < 0.60
|
||||
elif task_value == "normal":
|
||||
return status.five_hour_utilization < 0.30
|
||||
else: # routine
|
||||
return False # Never waste cloud on routine
|
||||
|
||||
|
||||
def _time_remaining(reset_at: str | None) -> str:
|
||||
"""Format time until reset as human-readable string."""
|
||||
if not reset_at or reset_at == "null":
|
||||
return "unknown"
|
||||
|
||||
try:
|
||||
reset = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
|
||||
now = datetime.now(UTC)
|
||||
diff = reset - now
|
||||
|
||||
if diff.total_seconds() <= 0:
|
||||
return "resetting now"
|
||||
|
||||
hours = int(diff.total_seconds() // 3600)
|
||||
mins = int((diff.total_seconds() % 3600) // 60)
|
||||
|
||||
if hours > 0:
|
||||
return f"{hours}h {mins}m"
|
||||
return f"{mins}m"
|
||||
|
||||
except (ValueError, TypeError):
|
||||
return "unknown"
|
||||
|
||||
|
||||
# Module-level singleton
|
||||
_quota_monitor: QuotaMonitor | None = None
|
||||
|
||||
|
||||
def get_quota_monitor() -> QuotaMonitor:
|
||||
"""Get or create the quota monitor singleton."""
|
||||
global _quota_monitor
|
||||
if _quota_monitor is None:
|
||||
_quota_monitor = QuotaMonitor()
|
||||
return _quota_monitor
|
||||
84
src/infrastructure/db_pool.py
Normal file
84
src/infrastructure/db_pool.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Thread-local SQLite connection pool.
|
||||
|
||||
Provides a ConnectionPool class that manages SQLite connections per thread,
|
||||
with support for context managers and automatic cleanup.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import threading
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class ConnectionPool:
|
||||
"""Thread-local SQLite connection pool.
|
||||
|
||||
Each thread gets its own connection, which is reused for subsequent
|
||||
requests from the same thread. Connections are automatically cleaned
|
||||
up when close_connection() is called or the context manager exits.
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: Path | str) -> None:
|
||||
"""Initialize the connection pool.
|
||||
|
||||
Args:
|
||||
db_path: Path to the SQLite database file.
|
||||
"""
|
||||
self._db_path = Path(db_path)
|
||||
self._local = threading.local()
|
||||
|
||||
def _ensure_db_exists(self) -> None:
|
||||
"""Ensure the database directory exists."""
|
||||
self._db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def get_connection(self) -> sqlite3.Connection:
|
||||
"""Get a connection for the current thread.
|
||||
|
||||
Creates a new connection if one doesn't exist for this thread,
|
||||
otherwise returns the existing connection.
|
||||
|
||||
Returns:
|
||||
A sqlite3 Connection object.
|
||||
"""
|
||||
if not hasattr(self._local, "conn") or self._local.conn is None:
|
||||
self._ensure_db_exists()
|
||||
self._local.conn = sqlite3.connect(str(self._db_path), check_same_thread=False)
|
||||
self._local.conn.row_factory = sqlite3.Row
|
||||
return self._local.conn
|
||||
|
||||
def close_connection(self) -> None:
|
||||
"""Close the connection for the current thread.
|
||||
|
||||
Cleans up the thread-local storage. Safe to call even if
|
||||
no connection exists for this thread.
|
||||
"""
|
||||
if hasattr(self._local, "conn") and self._local.conn is not None:
|
||||
self._local.conn.close()
|
||||
self._local.conn = None
|
||||
|
||||
@contextmanager
|
||||
def connection(self) -> Generator[sqlite3.Connection, None, None]:
|
||||
"""Context manager for getting and automatically closing a connection.
|
||||
|
||||
Yields:
|
||||
A sqlite3 Connection object.
|
||||
|
||||
Example:
|
||||
with pool.connection() as conn:
|
||||
cursor = conn.execute("SELECT 1")
|
||||
result = cursor.fetchone()
|
||||
"""
|
||||
conn = self.get_connection()
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
self.close_connection()
|
||||
|
||||
def close_all(self) -> None:
|
||||
"""Close all connections (useful for testing).
|
||||
|
||||
Note: This only closes the connection for the current thread.
|
||||
In a multi-threaded environment, each thread must close its own.
|
||||
"""
|
||||
self.close_connection()
|
||||
@@ -149,6 +149,52 @@ def _log_error_event(
|
||||
logger.debug("Failed to log error event: %s", log_exc)
|
||||
|
||||
|
||||
def _build_report_description(
|
||||
exc: Exception,
|
||||
source: str,
|
||||
context: dict | None,
|
||||
error_hash: str,
|
||||
tb_str: str,
|
||||
affected_file: str,
|
||||
affected_line: int,
|
||||
git_ctx: dict,
|
||||
) -> str:
|
||||
"""Build the markdown description for a bug report task."""
|
||||
parts = [
|
||||
f"**Error:** {type(exc).__name__}: {str(exc)}",
|
||||
f"**Source:** {source}",
|
||||
f"**File:** {affected_file}:{affected_line}",
|
||||
f"**Git:** {git_ctx.get('branch', '?')} @ {git_ctx.get('commit', '?')}",
|
||||
f"**Time:** {datetime.now(UTC).isoformat()}",
|
||||
f"**Hash:** {error_hash}",
|
||||
]
|
||||
|
||||
if context:
|
||||
ctx_str = ", ".join(f"{k}={v}" for k, v in context.items())
|
||||
parts.append(f"**Context:** {ctx_str}")
|
||||
|
||||
parts.append(f"\n**Stack Trace:**\n```\n{tb_str[:2000]}\n```")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _log_bug_report_created(source: str, task_id: str, error_hash: str, title: str) -> None:
|
||||
"""Log a BUG_REPORT_CREATED event (best-effort)."""
|
||||
try:
|
||||
from swarm.event_log import EventType, log_event
|
||||
|
||||
log_event(
|
||||
EventType.BUG_REPORT_CREATED,
|
||||
source=source,
|
||||
task_id=task_id,
|
||||
data={
|
||||
"error_hash": error_hash,
|
||||
"title": title[:100],
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Bug report event log error: %s", exc)
|
||||
|
||||
|
||||
def _create_bug_report(
|
||||
exc: Exception,
|
||||
source: str,
|
||||
@@ -164,25 +210,20 @@ def _create_bug_report(
|
||||
from swarm.task_queue.models import create_task
|
||||
|
||||
title = f"[BUG] {type(exc).__name__}: {str(exc)[:80]}"
|
||||
|
||||
description_parts = [
|
||||
f"**Error:** {type(exc).__name__}: {str(exc)}",
|
||||
f"**Source:** {source}",
|
||||
f"**File:** {affected_file}:{affected_line}",
|
||||
f"**Git:** {git_ctx.get('branch', '?')} @ {git_ctx.get('commit', '?')}",
|
||||
f"**Time:** {datetime.now(UTC).isoformat()}",
|
||||
f"**Hash:** {error_hash}",
|
||||
]
|
||||
|
||||
if context:
|
||||
ctx_str = ", ".join(f"{k}={v}" for k, v in context.items())
|
||||
description_parts.append(f"**Context:** {ctx_str}")
|
||||
|
||||
description_parts.append(f"\n**Stack Trace:**\n```\n{tb_str[:2000]}\n```")
|
||||
description = _build_report_description(
|
||||
exc,
|
||||
source,
|
||||
context,
|
||||
error_hash,
|
||||
tb_str,
|
||||
affected_file,
|
||||
affected_line,
|
||||
git_ctx,
|
||||
)
|
||||
|
||||
task = create_task(
|
||||
title=title,
|
||||
description="\n".join(description_parts),
|
||||
description=description,
|
||||
assigned_to="default",
|
||||
created_by="system",
|
||||
priority="normal",
|
||||
@@ -190,24 +231,9 @@ def _create_bug_report(
|
||||
auto_approve=True,
|
||||
task_type="bug_report",
|
||||
)
|
||||
task_id = task.id
|
||||
|
||||
try:
|
||||
from swarm.event_log import EventType, log_event
|
||||
|
||||
log_event(
|
||||
EventType.BUG_REPORT_CREATED,
|
||||
source=source,
|
||||
task_id=task_id,
|
||||
data={
|
||||
"error_hash": error_hash,
|
||||
"title": title[:100],
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Bug report screenshot error: %s", exc)
|
||||
|
||||
return task_id
|
||||
_log_bug_report_created(source, task.id, error_hash, title)
|
||||
return task.id
|
||||
|
||||
except Exception as task_exc:
|
||||
logger.debug("Failed to create bug report task: %s", task_exc)
|
||||
|
||||
@@ -64,7 +64,7 @@ class EventBus:
|
||||
|
||||
@bus.subscribe("agent.task.*")
|
||||
async def handle_task(event: Event):
|
||||
logger.debug(f"Task event: {event.data}")
|
||||
logger.debug("Task event: %s", event.data)
|
||||
|
||||
await bus.publish(Event(
|
||||
type="agent.task.assigned",
|
||||
|
||||
7
src/infrastructure/guards/__init__.py
Normal file
7
src/infrastructure/guards/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Content moderation pipeline for AI narrator output.
|
||||
|
||||
Three-layer defense:
|
||||
1. Game-context system prompts (vocabulary whitelists, theme framing)
|
||||
2. Real-time output filter via Llama Guard (or fallback regex)
|
||||
3. Per-game moderation profiles with configurable thresholds
|
||||
"""
|
||||
497
src/infrastructure/guards/moderation.py
Normal file
497
src/infrastructure/guards/moderation.py
Normal file
@@ -0,0 +1,497 @@
|
||||
"""Content moderation pipeline for AI narrator output.
|
||||
|
||||
Three-layer defense against harmful LLM output:
|
||||
|
||||
Layer 1 — Game-context system prompts with per-game vocabulary whitelists.
|
||||
Layer 2 — Real-time output filter (Llama Guard via Ollama, regex fallback).
|
||||
Layer 3 — Per-game moderation profiles with configurable thresholds.
|
||||
|
||||
Usage:
|
||||
from infrastructure.guards.moderation import get_moderator
|
||||
|
||||
moderator = get_moderator()
|
||||
result = await moderator.check("Some narrator text", game="morrowind")
|
||||
if result.blocked:
|
||||
use_fallback_narration(result.fallback)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ModerationVerdict(Enum):
|
||||
"""Result of a moderation check."""
|
||||
|
||||
PASS = "pass" # noqa: S105
|
||||
FAIL = "fail"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class ViolationCategory(Enum):
|
||||
"""Categories of content violations."""
|
||||
|
||||
HATE_SPEECH = "hate_speech"
|
||||
VIOLENCE_GLORIFICATION = "violence_glorification"
|
||||
REAL_WORLD_HARM = "real_world_harm"
|
||||
SEXUAL_CONTENT = "sexual_content"
|
||||
SELF_HARM = "self_harm"
|
||||
NONE = "none"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModerationResult:
|
||||
"""Result from the moderation pipeline."""
|
||||
|
||||
verdict: ModerationVerdict
|
||||
blocked: bool
|
||||
category: ViolationCategory = ViolationCategory.NONE
|
||||
confidence: float = 0.0
|
||||
latency_ms: float = 0.0
|
||||
layer: str = "" # Which layer caught it
|
||||
fallback: str = "" # Contextual fallback narration
|
||||
reason: str = ""
|
||||
timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
|
||||
|
||||
@property
|
||||
def passed(self) -> bool:
|
||||
return self.verdict == ModerationVerdict.PASS
|
||||
|
||||
|
||||
@dataclass
|
||||
class GameProfile:
|
||||
"""Per-game moderation profile.
|
||||
|
||||
Defines vocabulary whitelists, theme framing rules,
|
||||
and threshold tuning for a specific game.
|
||||
"""
|
||||
|
||||
game_id: str
|
||||
display_name: str
|
||||
# Terms that are safe in game context (e.g., "Skooma", "slave", "Morag Tong")
|
||||
vocabulary_whitelist: list[str] = field(default_factory=list)
|
||||
# System prompt additions for game-context framing
|
||||
context_prompt: str = ""
|
||||
# Confidence threshold — below this, content passes even if flagged
|
||||
threshold: float = 0.8
|
||||
# Scene-type fallback narrations
|
||||
fallbacks: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
# ── Regex fallback patterns ─────────────────────────────────────────────────
|
||||
# Used when Llama Guard is unavailable. Catches obvious violations only.
|
||||
# Intentionally conservative — false negatives preferred over false positives.
|
||||
_FALLBACK_PATTERNS: list[tuple[re.Pattern, ViolationCategory]] = [
|
||||
(
|
||||
re.compile(
|
||||
r"\b(kill\s+(?:all|every)\s+(?:real|actual)\s+(?:people|humans|children))\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
ViolationCategory.REAL_WORLD_HARM,
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"\b((?:you|we)\s+should\s+(?:actually|really)\s+(?:harm|hurt|kill))\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
ViolationCategory.REAL_WORLD_HARM,
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"\b((?:in\s+real\s+life|irl)\s+(?:you|we)\s+(?:should|could|must)\s+"
|
||||
r"(?:attack|hurt|kill|harm))\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
ViolationCategory.REAL_WORLD_HARM,
|
||||
),
|
||||
]
|
||||
|
||||
# ── Default game profiles ───────────────────────────────────────────────────
|
||||
|
||||
_DEFAULT_PROFILES: dict[str, GameProfile] = {
|
||||
"morrowind": GameProfile(
|
||||
game_id="morrowind",
|
||||
display_name="The Elder Scrolls III: Morrowind",
|
||||
vocabulary_whitelist=[
|
||||
"Skooma",
|
||||
"Moon Sugar",
|
||||
"slave",
|
||||
"slavery",
|
||||
"Morag Tong",
|
||||
"Dark Brotherhood",
|
||||
"Telvanni",
|
||||
"Camonna Tong",
|
||||
"smuggler",
|
||||
"assassin",
|
||||
"Sixth House",
|
||||
"Corprus",
|
||||
"Dagoth Ur",
|
||||
"Nerevarine",
|
||||
],
|
||||
context_prompt=(
|
||||
"You are narrating gameplay of The Elder Scrolls III: Morrowind. "
|
||||
"Morrowind contains mature themes including slavery, drug use (Skooma/Moon Sugar), "
|
||||
"assassin guilds (Morag Tong, Dark Brotherhood), and political intrigue. "
|
||||
"Treat these as game mechanics and historical worldbuilding within the game's "
|
||||
"fictional universe. Never editorialize on real-world parallels. "
|
||||
"Narrate events neutrally as a game commentator would."
|
||||
),
|
||||
threshold=0.85,
|
||||
fallbacks={
|
||||
"combat": "The battle rages on in the ashlands of Vvardenfell.",
|
||||
"dialogue": "The conversation continues between the characters.",
|
||||
"exploration": "The Nerevarine presses onward through the landscape.",
|
||||
"default": "The adventure continues in Morrowind.",
|
||||
},
|
||||
),
|
||||
"default": GameProfile(
|
||||
game_id="default",
|
||||
display_name="Generic Game",
|
||||
vocabulary_whitelist=[],
|
||||
context_prompt=(
|
||||
"You are narrating gameplay. Describe in-game events as a neutral "
|
||||
"game commentator. Never reference real-world violence, politics, "
|
||||
"or controversial topics. Stay focused on game mechanics and story."
|
||||
),
|
||||
threshold=0.8,
|
||||
fallbacks={
|
||||
"combat": "The action continues on screen.",
|
||||
"dialogue": "The conversation unfolds between characters.",
|
||||
"exploration": "The player explores the game world.",
|
||||
"default": "The gameplay continues.",
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class ContentModerator:
|
||||
"""Three-layer content moderation pipeline.
|
||||
|
||||
Layer 1: Game-context system prompts with vocabulary whitelists.
|
||||
Layer 2: LLM-based moderation (Llama Guard via Ollama, with regex fallback).
|
||||
Layer 3: Per-game threshold tuning and profile-based filtering.
|
||||
|
||||
Follows graceful degradation — if Llama Guard is unavailable,
|
||||
falls back to regex patterns. Never crashes.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
profiles: dict[str, GameProfile] | None = None,
|
||||
guard_model: str | None = None,
|
||||
) -> None:
|
||||
self._profiles: dict[str, GameProfile] = profiles or dict(_DEFAULT_PROFILES)
|
||||
self._guard_model = guard_model or settings.moderation_guard_model
|
||||
self._guard_available: bool | None = None # Lazy-checked
|
||||
self._metrics = _ModerationMetrics()
|
||||
|
||||
def get_profile(self, game: str) -> GameProfile:
|
||||
"""Get the moderation profile for a game, falling back to default."""
|
||||
return self._profiles.get(game, self._profiles["default"])
|
||||
|
||||
def register_profile(self, profile: GameProfile) -> None:
|
||||
"""Register or update a game moderation profile."""
|
||||
self._profiles[profile.game_id] = profile
|
||||
logger.info("Registered moderation profile: %s", profile.game_id)
|
||||
|
||||
def get_context_prompt(self, game: str) -> str:
|
||||
"""Get the game-context system prompt (Layer 1).
|
||||
|
||||
Returns the context prompt for the given game, which should be
|
||||
prepended to the narrator's system prompt.
|
||||
"""
|
||||
profile = self.get_profile(game)
|
||||
return profile.context_prompt
|
||||
|
||||
async def check(
|
||||
self,
|
||||
text: str,
|
||||
game: str = "default",
|
||||
scene_type: str = "default",
|
||||
) -> ModerationResult:
|
||||
"""Run the full moderation pipeline on narrator output.
|
||||
|
||||
Args:
|
||||
text: The text to moderate (narrator output).
|
||||
game: Game identifier for profile selection.
|
||||
scene_type: Current scene type for fallback selection.
|
||||
|
||||
Returns:
|
||||
ModerationResult with verdict, confidence, and fallback.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
profile = self.get_profile(game)
|
||||
|
||||
# Layer 1: Vocabulary whitelist pre-processing
|
||||
cleaned_text = self._apply_whitelist(text, profile)
|
||||
|
||||
# Layer 2: LLM guard or regex fallback
|
||||
result = await self._run_guard(cleaned_text, profile)
|
||||
|
||||
# Layer 3: Threshold tuning
|
||||
if result.verdict == ModerationVerdict.FAIL and result.confidence < profile.threshold:
|
||||
logger.info(
|
||||
"Moderation flag below threshold (%.2f < %.2f) — allowing",
|
||||
result.confidence,
|
||||
profile.threshold,
|
||||
)
|
||||
result = ModerationResult(
|
||||
verdict=ModerationVerdict.PASS,
|
||||
blocked=False,
|
||||
confidence=result.confidence,
|
||||
layer="threshold",
|
||||
reason=f"Below threshold ({result.confidence:.2f} < {profile.threshold:.2f})",
|
||||
)
|
||||
|
||||
# Attach fallback narration if blocked
|
||||
if result.blocked:
|
||||
result.fallback = profile.fallbacks.get(
|
||||
scene_type, profile.fallbacks.get("default", "")
|
||||
)
|
||||
|
||||
result.latency_ms = (time.monotonic() - start) * 1000
|
||||
self._metrics.record(result)
|
||||
|
||||
if result.blocked:
|
||||
logger.warning(
|
||||
"Content blocked [%s/%s]: category=%s confidence=%.2f reason=%s",
|
||||
game,
|
||||
scene_type,
|
||||
result.category.value,
|
||||
result.confidence,
|
||||
result.reason,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _apply_whitelist(self, text: str, profile: GameProfile) -> str:
|
||||
"""Layer 1: Replace whitelisted game terms with placeholders.
|
||||
|
||||
This prevents the guard model from flagging in-game terminology
|
||||
(e.g., "Skooma" being flagged as drug reference).
|
||||
"""
|
||||
cleaned = text
|
||||
for term in profile.vocabulary_whitelist:
|
||||
# Case-insensitive replacement with a neutral placeholder
|
||||
pattern = re.compile(re.escape(term), re.IGNORECASE)
|
||||
cleaned = pattern.sub("[GAME_TERM]", cleaned)
|
||||
return cleaned
|
||||
|
||||
async def _run_guard(self, text: str, profile: GameProfile) -> ModerationResult:
|
||||
"""Layer 2: Run LLM guard model or fall back to regex."""
|
||||
if not settings.moderation_enabled:
|
||||
return ModerationResult(
|
||||
verdict=ModerationVerdict.PASS,
|
||||
blocked=False,
|
||||
layer="disabled",
|
||||
reason="Moderation disabled",
|
||||
)
|
||||
|
||||
# Try Llama Guard via Ollama
|
||||
if await self._is_guard_available():
|
||||
try:
|
||||
return await self._check_with_guard(text)
|
||||
except Exception as exc:
|
||||
logger.warning("Guard model failed, using regex fallback: %s", exc)
|
||||
self._guard_available = False
|
||||
|
||||
# Regex fallback
|
||||
return self._check_with_regex(text)
|
||||
|
||||
async def _is_guard_available(self) -> bool:
|
||||
"""Check if the guard model is available via Ollama."""
|
||||
if self._guard_available is not None:
|
||||
return self._guard_available
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
url = f"{settings.normalized_ollama_url}/api/tags"
|
||||
timeout = aiohttp.ClientTimeout(total=5)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(url) as resp:
|
||||
if resp.status != 200:
|
||||
self._guard_available = False
|
||||
return False
|
||||
data = await resp.json()
|
||||
models = [m.get("name", "") for m in data.get("models", [])]
|
||||
self._guard_available = any(
|
||||
self._guard_model in m or m.startswith(self._guard_model) for m in models
|
||||
)
|
||||
if not self._guard_available:
|
||||
logger.info(
|
||||
"Guard model '%s' not found in Ollama — using regex fallback",
|
||||
self._guard_model,
|
||||
)
|
||||
return self._guard_available
|
||||
except Exception as exc:
|
||||
logger.debug("Ollama guard check failed: %s", exc)
|
||||
self._guard_available = False
|
||||
return False
|
||||
|
||||
async def _check_with_guard(self, text: str) -> ModerationResult:
|
||||
"""Run moderation check via Llama Guard."""
|
||||
import aiohttp
|
||||
|
||||
url = f"{settings.normalized_ollama_url}/api/chat"
|
||||
payload = {
|
||||
"model": self._guard_model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": text,
|
||||
}
|
||||
],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.0},
|
||||
}
|
||||
|
||||
timeout = aiohttp.ClientTimeout(total=10)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.post(url, json=payload) as resp:
|
||||
if resp.status != 200:
|
||||
raise RuntimeError(f"Guard API error: {resp.status}")
|
||||
data = await resp.json()
|
||||
|
||||
response_text = data.get("message", {}).get("content", "").strip().lower()
|
||||
|
||||
# Llama Guard returns "safe" or "unsafe\n<category>"
|
||||
if response_text.startswith("safe"):
|
||||
return ModerationResult(
|
||||
verdict=ModerationVerdict.PASS,
|
||||
blocked=False,
|
||||
confidence=0.0,
|
||||
layer="llama_guard",
|
||||
reason="Content safe",
|
||||
)
|
||||
|
||||
# Parse unsafe response
|
||||
category = ViolationCategory.NONE
|
||||
confidence = 0.95 # High confidence from LLM guard
|
||||
lines = response_text.split("\n")
|
||||
if len(lines) > 1:
|
||||
cat_str = lines[1].strip()
|
||||
category = _parse_guard_category(cat_str)
|
||||
|
||||
return ModerationResult(
|
||||
verdict=ModerationVerdict.FAIL,
|
||||
blocked=True,
|
||||
category=category,
|
||||
confidence=confidence,
|
||||
layer="llama_guard",
|
||||
reason=f"Guard flagged: {response_text}",
|
||||
)
|
||||
|
||||
def _check_with_regex(self, text: str) -> ModerationResult:
|
||||
"""Regex fallback when guard model is unavailable.
|
||||
|
||||
Intentionally conservative — only catches obvious real-world harm.
|
||||
"""
|
||||
for pattern, category in _FALLBACK_PATTERNS:
|
||||
match = pattern.search(text)
|
||||
if match:
|
||||
return ModerationResult(
|
||||
verdict=ModerationVerdict.FAIL,
|
||||
blocked=True,
|
||||
category=category,
|
||||
confidence=0.95, # Regex patterns are high-signal
|
||||
layer="regex_fallback",
|
||||
reason=f"Regex match: {match.group(0)[:50]}",
|
||||
)
|
||||
|
||||
return ModerationResult(
|
||||
verdict=ModerationVerdict.PASS,
|
||||
blocked=False,
|
||||
layer="regex_fallback",
|
||||
reason="No regex matches",
|
||||
)
|
||||
|
||||
def get_metrics(self) -> dict[str, Any]:
|
||||
"""Get moderation pipeline metrics."""
|
||||
return self._metrics.to_dict()
|
||||
|
||||
def reset_guard_cache(self) -> None:
|
||||
"""Reset the guard availability cache (e.g., after pulling model)."""
|
||||
self._guard_available = None
|
||||
|
||||
|
||||
class _ModerationMetrics:
|
||||
"""Tracks moderation pipeline performance."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.total_checks: int = 0
|
||||
self.passed: int = 0
|
||||
self.blocked: int = 0
|
||||
self.errors: int = 0
|
||||
self.total_latency_ms: float = 0.0
|
||||
self.by_layer: dict[str, int] = {}
|
||||
self.by_category: dict[str, int] = {}
|
||||
|
||||
def record(self, result: ModerationResult) -> None:
|
||||
self.total_checks += 1
|
||||
self.total_latency_ms += result.latency_ms
|
||||
|
||||
if result.verdict == ModerationVerdict.PASS:
|
||||
self.passed += 1
|
||||
elif result.verdict == ModerationVerdict.FAIL:
|
||||
self.blocked += 1
|
||||
else:
|
||||
self.errors += 1
|
||||
|
||||
layer = result.layer or "unknown"
|
||||
self.by_layer[layer] = self.by_layer.get(layer, 0) + 1
|
||||
|
||||
if result.blocked:
|
||||
cat = result.category.value
|
||||
self.by_category[cat] = self.by_category.get(cat, 0) + 1
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"total_checks": self.total_checks,
|
||||
"passed": self.passed,
|
||||
"blocked": self.blocked,
|
||||
"errors": self.errors,
|
||||
"avg_latency_ms": (
|
||||
round(self.total_latency_ms / self.total_checks, 2)
|
||||
if self.total_checks > 0
|
||||
else 0.0
|
||||
),
|
||||
"by_layer": dict(self.by_layer),
|
||||
"by_category": dict(self.by_category),
|
||||
}
|
||||
|
||||
|
||||
def _parse_guard_category(cat_str: str) -> ViolationCategory:
|
||||
"""Parse Llama Guard category string to ViolationCategory."""
|
||||
cat_lower = cat_str.lower()
|
||||
if "hate" in cat_lower:
|
||||
return ViolationCategory.HATE_SPEECH
|
||||
if "violence" in cat_lower:
|
||||
return ViolationCategory.VIOLENCE_GLORIFICATION
|
||||
if "sexual" in cat_lower:
|
||||
return ViolationCategory.SEXUAL_CONTENT
|
||||
if "self-harm" in cat_lower or "self_harm" in cat_lower or "suicide" in cat_lower:
|
||||
return ViolationCategory.SELF_HARM
|
||||
if "harm" in cat_lower or "dangerous" in cat_lower:
|
||||
return ViolationCategory.REAL_WORLD_HARM
|
||||
return ViolationCategory.NONE
|
||||
|
||||
|
||||
# ── Module-level singleton ──────────────────────────────────────────────────
|
||||
_moderator: ContentModerator | None = None
|
||||
|
||||
|
||||
def get_moderator() -> ContentModerator:
|
||||
"""Get or create the content moderator singleton."""
|
||||
global _moderator
|
||||
if _moderator is None:
|
||||
_moderator = ContentModerator()
|
||||
return _moderator
|
||||
56
src/infrastructure/guards/profiles.py
Normal file
56
src/infrastructure/guards/profiles.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Load game moderation profiles from config/moderation.yaml.
|
||||
|
||||
Falls back to hardcoded defaults if the YAML file is missing or malformed.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from infrastructure.guards.moderation import GameProfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_profiles(config_path: Path | None = None) -> dict[str, GameProfile]:
|
||||
"""Load game moderation profiles from YAML config.
|
||||
|
||||
Args:
|
||||
config_path: Path to moderation.yaml. Defaults to config/moderation.yaml.
|
||||
|
||||
Returns:
|
||||
Dict mapping game_id to GameProfile.
|
||||
"""
|
||||
path = config_path or Path("config/moderation.yaml")
|
||||
|
||||
if not path.exists():
|
||||
logger.info("Moderation config not found at %s — using defaults", path)
|
||||
return {}
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
logger.warning("PyYAML not installed — using default moderation profiles")
|
||||
return {}
|
||||
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text())
|
||||
except Exception as exc:
|
||||
logger.error("Failed to parse moderation config: %s", exc)
|
||||
return {}
|
||||
|
||||
profiles: dict[str, GameProfile] = {}
|
||||
for game_id, profile_data in data.get("profiles", {}).items():
|
||||
try:
|
||||
profiles[game_id] = GameProfile(
|
||||
game_id=game_id,
|
||||
display_name=profile_data.get("display_name", game_id),
|
||||
vocabulary_whitelist=profile_data.get("vocabulary_whitelist", []),
|
||||
context_prompt=profile_data.get("context_prompt", ""),
|
||||
threshold=float(profile_data.get("threshold", 0.8)),
|
||||
fallbacks=profile_data.get("fallbacks", {}),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Invalid profile '%s': %s", game_id, exc)
|
||||
|
||||
logger.info("Loaded %d moderation profiles from %s", len(profiles), path)
|
||||
return profiles
|
||||
266
src/infrastructure/matrix_config.py
Normal file
266
src/infrastructure/matrix_config.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""Matrix configuration loader utility.
|
||||
|
||||
Provides a typed dataclass for Matrix world configuration and a loader
|
||||
that fetches settings from YAML with sensible defaults.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PointLight:
|
||||
"""A single point light in the Matrix world."""
|
||||
|
||||
color: str = "#FFFFFF"
|
||||
intensity: float = 1.0
|
||||
position: dict[str, float] = field(default_factory=lambda: {"x": 0, "y": 0, "z": 0})
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "PointLight":
|
||||
"""Create a PointLight from a dictionary with defaults."""
|
||||
return cls(
|
||||
color=data.get("color", "#FFFFFF"),
|
||||
intensity=data.get("intensity", 1.0),
|
||||
position=data.get("position", {"x": 0, "y": 0, "z": 0}),
|
||||
)
|
||||
|
||||
|
||||
def _default_point_lights_factory() -> list[PointLight]:
|
||||
"""Factory function for default point lights."""
|
||||
return [
|
||||
PointLight(
|
||||
color="#FFAA55", # Warm amber (Workshop)
|
||||
intensity=1.2,
|
||||
position={"x": 0, "y": 5, "z": 0},
|
||||
),
|
||||
PointLight(
|
||||
color="#3B82F6", # Cool blue (Matrix)
|
||||
intensity=0.8,
|
||||
position={"x": -5, "y": 3, "z": -5},
|
||||
),
|
||||
PointLight(
|
||||
color="#A855F7", # Purple accent
|
||||
intensity=0.6,
|
||||
position={"x": 5, "y": 3, "z": 5},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class LightingConfig:
|
||||
"""Lighting configuration for the Matrix world."""
|
||||
|
||||
ambient_color: str = "#FFAA55" # Warm amber (Workshop warmth)
|
||||
ambient_intensity: float = 0.5
|
||||
point_lights: list[PointLight] = field(default_factory=_default_point_lights_factory)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any] | None) -> "LightingConfig":
|
||||
"""Create a LightingConfig from a dictionary with defaults."""
|
||||
if data is None:
|
||||
data = {}
|
||||
|
||||
point_lights_data = data.get("point_lights", [])
|
||||
point_lights = (
|
||||
[PointLight.from_dict(pl) for pl in point_lights_data]
|
||||
if point_lights_data
|
||||
else _default_point_lights_factory()
|
||||
)
|
||||
|
||||
return cls(
|
||||
ambient_color=data.get("ambient_color", "#FFAA55"),
|
||||
ambient_intensity=data.get("ambient_intensity", 0.5),
|
||||
point_lights=point_lights,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnvironmentConfig:
|
||||
"""Environment settings for the Matrix world."""
|
||||
|
||||
rain_enabled: bool = False
|
||||
starfield_enabled: bool = True
|
||||
fog_color: str = "#0f0f23"
|
||||
fog_density: float = 0.02
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any] | None) -> "EnvironmentConfig":
|
||||
"""Create an EnvironmentConfig from a dictionary with defaults."""
|
||||
if data is None:
|
||||
data = {}
|
||||
return cls(
|
||||
rain_enabled=data.get("rain_enabled", False),
|
||||
starfield_enabled=data.get("starfield_enabled", True),
|
||||
fog_color=data.get("fog_color", "#0f0f23"),
|
||||
fog_density=data.get("fog_density", 0.02),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeaturesConfig:
|
||||
"""Feature toggles for the Matrix world."""
|
||||
|
||||
chat_enabled: bool = True
|
||||
visitor_avatars: bool = True
|
||||
pip_familiar: bool = True
|
||||
workshop_portal: bool = True
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any] | None) -> "FeaturesConfig":
|
||||
"""Create a FeaturesConfig from a dictionary with defaults."""
|
||||
if data is None:
|
||||
data = {}
|
||||
return cls(
|
||||
chat_enabled=data.get("chat_enabled", True),
|
||||
visitor_avatars=data.get("visitor_avatars", True),
|
||||
pip_familiar=data.get("pip_familiar", True),
|
||||
workshop_portal=data.get("workshop_portal", True),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentConfig:
|
||||
"""Configuration for a single Matrix agent."""
|
||||
|
||||
name: str = ""
|
||||
role: str = ""
|
||||
enabled: bool = True
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "AgentConfig":
|
||||
"""Create an AgentConfig from a dictionary with defaults."""
|
||||
return cls(
|
||||
name=data.get("name", ""),
|
||||
role=data.get("role", ""),
|
||||
enabled=data.get("enabled", True),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentsConfig:
|
||||
"""Agent registry configuration."""
|
||||
|
||||
default_count: int = 5
|
||||
max_count: int = 20
|
||||
agents: list[AgentConfig] = field(default_factory=list)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any] | None) -> "AgentsConfig":
|
||||
"""Create an AgentsConfig from a dictionary with defaults."""
|
||||
if data is None:
|
||||
data = {}
|
||||
|
||||
agents_data = data.get("agents", [])
|
||||
agents = [AgentConfig.from_dict(a) for a in agents_data] if agents_data else []
|
||||
|
||||
return cls(
|
||||
default_count=data.get("default_count", 5),
|
||||
max_count=data.get("max_count", 20),
|
||||
agents=agents,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatrixConfig:
|
||||
"""Complete Matrix world configuration.
|
||||
|
||||
Combines lighting, environment, features, and agent settings
|
||||
into a single configuration object.
|
||||
"""
|
||||
|
||||
lighting: LightingConfig = field(default_factory=LightingConfig)
|
||||
environment: EnvironmentConfig = field(default_factory=EnvironmentConfig)
|
||||
features: FeaturesConfig = field(default_factory=FeaturesConfig)
|
||||
agents: AgentsConfig = field(default_factory=AgentsConfig)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any] | None) -> "MatrixConfig":
|
||||
"""Create a MatrixConfig from a dictionary with defaults for missing sections."""
|
||||
if data is None:
|
||||
data = {}
|
||||
return cls(
|
||||
lighting=LightingConfig.from_dict(data.get("lighting")),
|
||||
environment=EnvironmentConfig.from_dict(data.get("environment")),
|
||||
features=FeaturesConfig.from_dict(data.get("features")),
|
||||
agents=AgentsConfig.from_dict(data.get("agents")),
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert the configuration to a plain dictionary."""
|
||||
return {
|
||||
"lighting": {
|
||||
"ambient_color": self.lighting.ambient_color,
|
||||
"ambient_intensity": self.lighting.ambient_intensity,
|
||||
"point_lights": [
|
||||
{
|
||||
"color": pl.color,
|
||||
"intensity": pl.intensity,
|
||||
"position": pl.position,
|
||||
}
|
||||
for pl in self.lighting.point_lights
|
||||
],
|
||||
},
|
||||
"environment": {
|
||||
"rain_enabled": self.environment.rain_enabled,
|
||||
"starfield_enabled": self.environment.starfield_enabled,
|
||||
"fog_color": self.environment.fog_color,
|
||||
"fog_density": self.environment.fog_density,
|
||||
},
|
||||
"features": {
|
||||
"chat_enabled": self.features.chat_enabled,
|
||||
"visitor_avatars": self.features.visitor_avatars,
|
||||
"pip_familiar": self.features.pip_familiar,
|
||||
"workshop_portal": self.features.workshop_portal,
|
||||
},
|
||||
"agents": {
|
||||
"default_count": self.agents.default_count,
|
||||
"max_count": self.agents.max_count,
|
||||
"agents": [
|
||||
{"name": a.name, "role": a.role, "enabled": a.enabled}
|
||||
for a in self.agents.agents
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def load_from_yaml(path: str | Path) -> MatrixConfig:
|
||||
"""Load Matrix configuration from a YAML file.
|
||||
|
||||
Missing keys are filled with sensible defaults. If the file
|
||||
cannot be read or parsed, returns a fully default configuration.
|
||||
|
||||
Args:
|
||||
path: Path to the YAML configuration file.
|
||||
|
||||
Returns:
|
||||
A MatrixConfig instance with loaded or default values.
|
||||
"""
|
||||
path = Path(path)
|
||||
|
||||
if not path.exists():
|
||||
logger.warning("Matrix config file not found: %s, using defaults", path)
|
||||
return MatrixConfig()
|
||||
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
raw_data = yaml.safe_load(f)
|
||||
|
||||
if not isinstance(raw_data, dict):
|
||||
logger.warning("Matrix config invalid format, using defaults")
|
||||
return MatrixConfig()
|
||||
|
||||
return MatrixConfig.from_dict(raw_data)
|
||||
|
||||
except yaml.YAMLError as exc:
|
||||
logger.warning("Matrix config YAML parse error: %s, using defaults", exc)
|
||||
return MatrixConfig()
|
||||
except OSError as exc:
|
||||
logger.warning("Matrix config read error: %s, using defaults", exc)
|
||||
return MatrixConfig()
|
||||
333
src/infrastructure/presence.py
Normal file
333
src/infrastructure/presence.py
Normal file
@@ -0,0 +1,333 @@
|
||||
"""Presence state serializer — transforms ADR-023 presence dicts for consumers.
|
||||
|
||||
Converts the raw presence schema (version, liveness, mood, energy, etc.)
|
||||
into the camelCase world-state payload consumed by the Workshop 3D renderer
|
||||
and WebSocket gateway.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default Pip familiar state (used when familiar module unavailable)
|
||||
DEFAULT_PIP_STATE = {
|
||||
"name": "Pip",
|
||||
"mood": "sleepy",
|
||||
"energy": 0.5,
|
||||
"color": "0x00b450", # emerald green
|
||||
"trail_color": "0xdaa520", # gold
|
||||
}
|
||||
|
||||
|
||||
def _get_familiar_state() -> dict:
|
||||
"""Get Pip familiar state from familiar module, with graceful fallback.
|
||||
|
||||
Returns a dict with name, mood, energy, color, and trail_color.
|
||||
Falls back to default state if familiar module unavailable or raises.
|
||||
"""
|
||||
try:
|
||||
from timmy.familiar import pip_familiar
|
||||
|
||||
snapshot = pip_familiar.snapshot()
|
||||
# Map PipSnapshot fields to the expected agent_state format
|
||||
return {
|
||||
"name": snapshot.name,
|
||||
"mood": snapshot.state,
|
||||
"energy": DEFAULT_PIP_STATE["energy"], # Pip doesn't track energy yet
|
||||
"color": DEFAULT_PIP_STATE["color"],
|
||||
"trail_color": DEFAULT_PIP_STATE["trail_color"],
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.warning("Familiar state unavailable, using default: %s", exc)
|
||||
return DEFAULT_PIP_STATE.copy()
|
||||
|
||||
|
||||
# Valid bark styles for Matrix protocol
|
||||
BARK_STYLES = {"speech", "thought", "whisper", "shout"}
|
||||
|
||||
|
||||
def produce_bark(agent_id: str, text: str, reply_to: str = None, style: str = "speech") -> dict:
|
||||
"""Format a chat response as a Matrix bark message.
|
||||
|
||||
Barks appear as floating text above agents in the Matrix 3D world with
|
||||
typing animation. This function formats the text for the Matrix protocol.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
agent_id:
|
||||
Unique identifier for the agent (e.g. ``"timmy"``).
|
||||
text:
|
||||
The chat response text to display as a bark.
|
||||
reply_to:
|
||||
Optional message ID or reference this bark is replying to.
|
||||
style:
|
||||
Visual style of the bark. One of: "speech" (default), "thought",
|
||||
"whisper", "shout". Invalid styles fall back to "speech".
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Bark message with keys ``type``, ``agent_id``, ``data`` (containing
|
||||
``text``, ``reply_to``, ``style``), and ``ts``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> produce_bark("timmy", "Hello world!")
|
||||
{
|
||||
"type": "bark",
|
||||
"agent_id": "timmy",
|
||||
"data": {"text": "Hello world!", "reply_to": None, "style": "speech"},
|
||||
"ts": 1742529600,
|
||||
}
|
||||
"""
|
||||
# Validate and normalize style
|
||||
if style not in BARK_STYLES:
|
||||
style = "speech"
|
||||
|
||||
# Truncate text to 280 characters (bark, not essay)
|
||||
truncated_text = text[:280] if text else ""
|
||||
|
||||
return {
|
||||
"type": "bark",
|
||||
"agent_id": agent_id,
|
||||
"data": {
|
||||
"text": truncated_text,
|
||||
"reply_to": reply_to,
|
||||
"style": style,
|
||||
},
|
||||
"ts": int(time.time()),
|
||||
}
|
||||
|
||||
|
||||
def produce_thought(
|
||||
agent_id: str, thought_text: str, thought_id: int, chain_id: str = None
|
||||
) -> dict:
|
||||
"""Format a thinking engine thought as a Matrix thought message.
|
||||
|
||||
Thoughts appear as subtle floating text in the 3D world, streaming from
|
||||
Timmy's thinking engine (/thinking/api). This function wraps thoughts in
|
||||
Matrix protocol format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
agent_id:
|
||||
Unique identifier for the agent (e.g. ``"timmy"``).
|
||||
thought_text:
|
||||
The thought text to display. Truncated to 500 characters.
|
||||
thought_id:
|
||||
Unique identifier for this thought (sequence number).
|
||||
chain_id:
|
||||
Optional chain identifier grouping related thoughts.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Thought message with keys ``type``, ``agent_id``, ``data`` (containing
|
||||
``text``, ``thought_id``, ``chain_id``), and ``ts``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> produce_thought("timmy", "Considering the options...", 42, "chain-123")
|
||||
{
|
||||
"type": "thought",
|
||||
"agent_id": "timmy",
|
||||
"data": {"text": "Considering the options...", "thought_id": 42, "chain_id": "chain-123"},
|
||||
"ts": 1742529600,
|
||||
}
|
||||
"""
|
||||
# Truncate text to 500 characters (thoughts can be longer than barks)
|
||||
truncated_text = thought_text[:500] if thought_text else ""
|
||||
|
||||
return {
|
||||
"type": "thought",
|
||||
"agent_id": agent_id,
|
||||
"data": {
|
||||
"text": truncated_text,
|
||||
"thought_id": thought_id,
|
||||
"chain_id": chain_id,
|
||||
},
|
||||
"ts": int(time.time()),
|
||||
}
|
||||
|
||||
|
||||
def serialize_presence(presence: dict) -> dict:
|
||||
"""Transform an ADR-023 presence dict into the world-state API shape.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
presence:
|
||||
Raw presence dict as written by
|
||||
:func:`~timmy.workshop_state.get_state_dict` or read from
|
||||
``~/.timmy/presence.json``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
CamelCase world-state payload with ``timmyState``, ``familiar``,
|
||||
``activeThreads``, ``recentEvents``, ``concerns``, ``visitorPresent``,
|
||||
``updatedAt``, and ``version`` keys.
|
||||
"""
|
||||
return {
|
||||
"timmyState": {
|
||||
"mood": presence.get("mood", "calm"),
|
||||
"activity": presence.get("current_focus", "idle"),
|
||||
"energy": presence.get("energy", 0.5),
|
||||
"confidence": presence.get("confidence", 0.7),
|
||||
},
|
||||
"familiar": presence.get("familiar"),
|
||||
"activeThreads": presence.get("active_threads", []),
|
||||
"recentEvents": presence.get("recent_events", []),
|
||||
"concerns": presence.get("concerns", []),
|
||||
"visitorPresent": False,
|
||||
"updatedAt": presence.get("liveness", datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")),
|
||||
"version": presence.get("version", 1),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Status mapping: ADR-023 current_focus → Matrix agent status
|
||||
# ---------------------------------------------------------------------------
|
||||
_STATUS_KEYWORDS: dict[str, str] = {
|
||||
"thinking": "thinking",
|
||||
"speaking": "speaking",
|
||||
"talking": "speaking",
|
||||
"idle": "idle",
|
||||
}
|
||||
|
||||
|
||||
def _derive_status(current_focus: str) -> str:
|
||||
"""Map a free-text current_focus value to a Matrix status enum.
|
||||
|
||||
Returns one of: online, idle, thinking, speaking.
|
||||
"""
|
||||
focus_lower = current_focus.lower()
|
||||
for keyword, status in _STATUS_KEYWORDS.items():
|
||||
if keyword in focus_lower:
|
||||
return status
|
||||
if current_focus and current_focus != "idle":
|
||||
return "online"
|
||||
return "idle"
|
||||
|
||||
|
||||
def produce_agent_state(agent_id: str, presence: dict) -> dict:
|
||||
"""Build a Matrix-compatible ``agent_state`` message from presence data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
agent_id:
|
||||
Unique identifier for the agent (e.g. ``"timmy"``).
|
||||
presence:
|
||||
Raw ADR-023 presence dict.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Message with keys ``type``, ``agent_id``, ``data``, and ``ts``.
|
||||
"""
|
||||
return {
|
||||
"type": "agent_state",
|
||||
"agent_id": agent_id,
|
||||
"data": {
|
||||
"display_name": presence.get("display_name", agent_id.title()),
|
||||
"role": presence.get("role", "assistant"),
|
||||
"status": _derive_status(presence.get("current_focus", "idle")),
|
||||
"mood": presence.get("mood", "calm"),
|
||||
"energy": presence.get("energy", 0.5),
|
||||
"bark": presence.get("bark", ""),
|
||||
"familiar": _get_familiar_state(),
|
||||
},
|
||||
"ts": int(time.time()),
|
||||
}
|
||||
|
||||
|
||||
def produce_system_status() -> dict:
|
||||
"""Generate a system_status message for the Matrix.
|
||||
|
||||
Returns a dict with system health metrics including agent count,
|
||||
visitor count, uptime, thinking engine status, and memory count.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Message with keys ``type``, ``data`` (containing ``agents_online``,
|
||||
``visitors``, ``uptime_seconds``, ``thinking_active``, ``memory_count``),
|
||||
and ``ts``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> produce_system_status()
|
||||
{
|
||||
"type": "system_status",
|
||||
"data": {
|
||||
"agents_online": 5,
|
||||
"visitors": 2,
|
||||
"uptime_seconds": 3600,
|
||||
"thinking_active": True,
|
||||
"memory_count": 150,
|
||||
},
|
||||
"ts": 1742529600,
|
||||
}
|
||||
"""
|
||||
# Count agents with status != offline
|
||||
agents_online = 0
|
||||
try:
|
||||
from timmy.agents.loader import list_agents
|
||||
|
||||
agents = list_agents()
|
||||
agents_online = sum(1 for a in agents if a.get("status", "") not in ("offline", ""))
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to count agents: %s", exc)
|
||||
|
||||
# Count visitors from WebSocket clients
|
||||
visitors = 0
|
||||
try:
|
||||
from dashboard.routes.world import _ws_clients
|
||||
|
||||
visitors = len(_ws_clients)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to count visitors: %s", exc)
|
||||
|
||||
# Calculate uptime
|
||||
uptime_seconds = 0
|
||||
try:
|
||||
from datetime import UTC
|
||||
|
||||
from config import APP_START_TIME
|
||||
|
||||
uptime_seconds = int((datetime.now(UTC) - APP_START_TIME).total_seconds())
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to calculate uptime: %s", exc)
|
||||
|
||||
# Check thinking engine status
|
||||
thinking_active = False
|
||||
try:
|
||||
from config import settings
|
||||
from timmy.thinking import thinking_engine
|
||||
|
||||
thinking_active = settings.thinking_enabled and thinking_engine is not None
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to check thinking status: %s", exc)
|
||||
|
||||
# Count memories in vector store
|
||||
memory_count = 0
|
||||
try:
|
||||
from timmy.memory_system import get_memory_stats
|
||||
|
||||
stats = get_memory_stats()
|
||||
memory_count = stats.get("total_entries", 0)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to count memories: %s", exc)
|
||||
|
||||
return {
|
||||
"type": "system_status",
|
||||
"data": {
|
||||
"agents_online": agents_online,
|
||||
"visitors": visitors,
|
||||
"uptime_seconds": uptime_seconds,
|
||||
"thinking_active": thinking_active,
|
||||
"memory_count": memory_count,
|
||||
},
|
||||
"ts": int(time.time()),
|
||||
}
|
||||
261
src/infrastructure/protocol.py
Normal file
261
src/infrastructure/protocol.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""Shared WebSocket message protocol for the Matrix frontend.
|
||||
|
||||
Defines all WebSocket message types as an enum and typed dataclasses
|
||||
with ``to_json()`` / ``from_json()`` helpers so every producer and the
|
||||
gateway speak the same language.
|
||||
|
||||
Message wire format
|
||||
-------------------
|
||||
.. code-block:: json
|
||||
|
||||
{"type": "agent_state", "agent_id": "timmy", "data": {...}, "ts": 1234567890}
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MessageType(StrEnum):
|
||||
"""All WebSocket message types defined by the Matrix PROTOCOL.md."""
|
||||
|
||||
AGENT_STATE = "agent_state"
|
||||
VISITOR_STATE = "visitor_state"
|
||||
BARK = "bark"
|
||||
THOUGHT = "thought"
|
||||
SYSTEM_STATUS = "system_status"
|
||||
CONNECTION_ACK = "connection_ack"
|
||||
ERROR = "error"
|
||||
TASK_UPDATE = "task_update"
|
||||
MEMORY_FLASH = "memory_flash"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Base message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class WSMessage:
|
||||
"""Base WebSocket message with common envelope fields."""
|
||||
|
||||
type: str
|
||||
ts: float = field(default_factory=time.time)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialise the message to a JSON string."""
|
||||
return json.dumps(asdict(self))
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "WSMessage":
|
||||
"""Deserialise a JSON string into the correct message subclass.
|
||||
|
||||
Falls back to the base ``WSMessage`` when the ``type`` field is
|
||||
unrecognised.
|
||||
"""
|
||||
data = json.loads(raw)
|
||||
msg_type = data.get("type")
|
||||
sub = _REGISTRY.get(msg_type)
|
||||
if sub is not None:
|
||||
return sub.from_json(raw)
|
||||
return cls(**data)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Concrete message types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentStateMessage(WSMessage):
|
||||
"""State update for a single agent."""
|
||||
|
||||
type: str = field(default=MessageType.AGENT_STATE)
|
||||
agent_id: str = ""
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "AgentStateMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.AGENT_STATE),
|
||||
ts=payload.get("ts", time.time()),
|
||||
agent_id=payload.get("agent_id", ""),
|
||||
data=payload.get("data", {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VisitorStateMessage(WSMessage):
|
||||
"""State update for a visitor / user session."""
|
||||
|
||||
type: str = field(default=MessageType.VISITOR_STATE)
|
||||
visitor_id: str = ""
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "VisitorStateMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.VISITOR_STATE),
|
||||
ts=payload.get("ts", time.time()),
|
||||
visitor_id=payload.get("visitor_id", ""),
|
||||
data=payload.get("data", {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BarkMessage(WSMessage):
|
||||
"""A bark (chat-like utterance) from an agent."""
|
||||
|
||||
type: str = field(default=MessageType.BARK)
|
||||
agent_id: str = ""
|
||||
content: str = ""
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "BarkMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.BARK),
|
||||
ts=payload.get("ts", time.time()),
|
||||
agent_id=payload.get("agent_id", ""),
|
||||
content=payload.get("content", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThoughtMessage(WSMessage):
|
||||
"""An inner thought from an agent."""
|
||||
|
||||
type: str = field(default=MessageType.THOUGHT)
|
||||
agent_id: str = ""
|
||||
content: str = ""
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "ThoughtMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.THOUGHT),
|
||||
ts=payload.get("ts", time.time()),
|
||||
agent_id=payload.get("agent_id", ""),
|
||||
content=payload.get("content", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemStatusMessage(WSMessage):
|
||||
"""System-wide status broadcast."""
|
||||
|
||||
type: str = field(default=MessageType.SYSTEM_STATUS)
|
||||
status: str = ""
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "SystemStatusMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.SYSTEM_STATUS),
|
||||
ts=payload.get("ts", time.time()),
|
||||
status=payload.get("status", ""),
|
||||
data=payload.get("data", {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConnectionAckMessage(WSMessage):
|
||||
"""Acknowledgement sent when a client connects."""
|
||||
|
||||
type: str = field(default=MessageType.CONNECTION_ACK)
|
||||
client_id: str = ""
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "ConnectionAckMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.CONNECTION_ACK),
|
||||
ts=payload.get("ts", time.time()),
|
||||
client_id=payload.get("client_id", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorMessage(WSMessage):
|
||||
"""Error message sent to a client."""
|
||||
|
||||
type: str = field(default=MessageType.ERROR)
|
||||
code: str = ""
|
||||
message: str = ""
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "ErrorMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.ERROR),
|
||||
ts=payload.get("ts", time.time()),
|
||||
code=payload.get("code", ""),
|
||||
message=payload.get("message", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskUpdateMessage(WSMessage):
|
||||
"""Update about a task (created, assigned, completed, etc.)."""
|
||||
|
||||
type: str = field(default=MessageType.TASK_UPDATE)
|
||||
task_id: str = ""
|
||||
status: str = ""
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "TaskUpdateMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.TASK_UPDATE),
|
||||
ts=payload.get("ts", time.time()),
|
||||
task_id=payload.get("task_id", ""),
|
||||
status=payload.get("status", ""),
|
||||
data=payload.get("data", {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryFlashMessage(WSMessage):
|
||||
"""A flash of memory — a recalled or stored memory event."""
|
||||
|
||||
type: str = field(default=MessageType.MEMORY_FLASH)
|
||||
agent_id: str = ""
|
||||
memory_key: str = ""
|
||||
content: str = ""
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "MemoryFlashMessage":
|
||||
payload = json.loads(raw)
|
||||
return cls(
|
||||
type=payload.get("type", MessageType.MEMORY_FLASH),
|
||||
ts=payload.get("ts", time.time()),
|
||||
agent_id=payload.get("agent_id", ""),
|
||||
memory_key=payload.get("memory_key", ""),
|
||||
content=payload.get("content", ""),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry for from_json dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REGISTRY: dict[str, type[WSMessage]] = {
|
||||
MessageType.AGENT_STATE: AgentStateMessage,
|
||||
MessageType.VISITOR_STATE: VisitorStateMessage,
|
||||
MessageType.BARK: BarkMessage,
|
||||
MessageType.THOUGHT: ThoughtMessage,
|
||||
MessageType.SYSTEM_STATUS: SystemStatusMessage,
|
||||
MessageType.CONNECTION_ACK: ConnectionAckMessage,
|
||||
MessageType.ERROR: ErrorMessage,
|
||||
MessageType.TASK_UPDATE: TaskUpdateMessage,
|
||||
MessageType.MEMORY_FLASH: MemoryFlashMessage,
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from .api import router
|
||||
from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
|
||||
from .history import HealthHistoryStore, get_history_store
|
||||
|
||||
__all__ = [
|
||||
"CascadeRouter",
|
||||
@@ -9,4 +10,6 @@ __all__ = [
|
||||
"ProviderStatus",
|
||||
"get_router",
|
||||
"router",
|
||||
"HealthHistoryStore",
|
||||
"get_history_store",
|
||||
]
|
||||
|
||||
@@ -8,6 +8,7 @@ from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .cascade import CascadeRouter, get_router
|
||||
from .history import HealthHistoryStore, get_history_store
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/v1/router", tags=["router"])
|
||||
@@ -199,6 +200,17 @@ async def reload_config(
|
||||
raise HTTPException(status_code=500, detail=f"Reload failed: {exc}") from exc
|
||||
|
||||
|
||||
@router.get("/history")
|
||||
async def get_history(
|
||||
hours: int = 24,
|
||||
store: Annotated[HealthHistoryStore, Depends(get_history_store)] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get provider health history for the last N hours."""
|
||||
if store is None:
|
||||
store = get_history_store()
|
||||
return store.get_history(hours=hours)
|
||||
|
||||
|
||||
@router.get("/config")
|
||||
async def get_config(
|
||||
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
|
||||
|
||||
@@ -32,6 +32,15 @@ except ImportError:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Quota monitor — optional, degrades gracefully if unavailable
|
||||
try:
|
||||
from infrastructure.claude_quota import QuotaMonitor, get_quota_monitor
|
||||
|
||||
_quota_monitor: "QuotaMonitor | None" = get_quota_monitor()
|
||||
except Exception as _exc: # pragma: no cover
|
||||
logger.debug("Quota monitor not available: %s", _exc)
|
||||
_quota_monitor = None
|
||||
|
||||
|
||||
class ProviderStatus(Enum):
|
||||
"""Health status of a provider."""
|
||||
@@ -221,65 +230,56 @@ class CascadeRouter:
|
||||
raise RuntimeError("PyYAML not installed")
|
||||
|
||||
content = self.config_path.read_text()
|
||||
# Expand environment variables
|
||||
content = self._expand_env_vars(content)
|
||||
data = yaml.safe_load(content)
|
||||
|
||||
# Load cascade settings
|
||||
cascade = data.get("cascade", {})
|
||||
|
||||
# Load fallback chains
|
||||
fallback_chains = data.get("fallback_chains", {})
|
||||
|
||||
# Load multi-modal settings
|
||||
multimodal = data.get("multimodal", {})
|
||||
|
||||
self.config = RouterConfig(
|
||||
timeout_seconds=cascade.get("timeout_seconds", 30),
|
||||
max_retries_per_provider=cascade.get("max_retries_per_provider", 2),
|
||||
retry_delay_seconds=cascade.get("retry_delay_seconds", 1),
|
||||
circuit_breaker_failure_threshold=cascade.get("circuit_breaker", {}).get(
|
||||
"failure_threshold", 5
|
||||
),
|
||||
circuit_breaker_recovery_timeout=cascade.get("circuit_breaker", {}).get(
|
||||
"recovery_timeout", 60
|
||||
),
|
||||
circuit_breaker_half_open_max_calls=cascade.get("circuit_breaker", {}).get(
|
||||
"half_open_max_calls", 2
|
||||
),
|
||||
auto_pull_models=multimodal.get("auto_pull", True),
|
||||
fallback_chains=fallback_chains,
|
||||
)
|
||||
|
||||
# Load providers
|
||||
for p_data in data.get("providers", []):
|
||||
# Skip disabled providers
|
||||
if not p_data.get("enabled", False):
|
||||
continue
|
||||
|
||||
provider = Provider(
|
||||
name=p_data["name"],
|
||||
type=p_data["type"],
|
||||
enabled=p_data.get("enabled", True),
|
||||
priority=p_data.get("priority", 99),
|
||||
url=p_data.get("url"),
|
||||
api_key=p_data.get("api_key"),
|
||||
base_url=p_data.get("base_url"),
|
||||
models=p_data.get("models", []),
|
||||
)
|
||||
|
||||
# Check if provider is actually available
|
||||
if self._check_provider_available(provider):
|
||||
self.providers.append(provider)
|
||||
else:
|
||||
logger.warning("Provider %s not available, skipping", provider.name)
|
||||
|
||||
# Sort by priority
|
||||
self.providers.sort(key=lambda p: p.priority)
|
||||
self.config = self._parse_router_config(data)
|
||||
self._load_providers(data)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Failed to load config: %s", exc)
|
||||
|
||||
def _parse_router_config(self, data: dict) -> RouterConfig:
|
||||
"""Build a RouterConfig from parsed YAML data."""
|
||||
cascade = data.get("cascade", {})
|
||||
cb = cascade.get("circuit_breaker", {})
|
||||
multimodal = data.get("multimodal", {})
|
||||
|
||||
return RouterConfig(
|
||||
timeout_seconds=cascade.get("timeout_seconds", 30),
|
||||
max_retries_per_provider=cascade.get("max_retries_per_provider", 2),
|
||||
retry_delay_seconds=cascade.get("retry_delay_seconds", 1),
|
||||
circuit_breaker_failure_threshold=cb.get("failure_threshold", 5),
|
||||
circuit_breaker_recovery_timeout=cb.get("recovery_timeout", 60),
|
||||
circuit_breaker_half_open_max_calls=cb.get("half_open_max_calls", 2),
|
||||
auto_pull_models=multimodal.get("auto_pull", True),
|
||||
fallback_chains=data.get("fallback_chains", {}),
|
||||
)
|
||||
|
||||
def _load_providers(self, data: dict) -> None:
|
||||
"""Load, filter, and sort providers from parsed YAML data."""
|
||||
for p_data in data.get("providers", []):
|
||||
if not p_data.get("enabled", False):
|
||||
continue
|
||||
|
||||
provider = Provider(
|
||||
name=p_data["name"],
|
||||
type=p_data["type"],
|
||||
enabled=p_data.get("enabled", True),
|
||||
priority=p_data.get("priority", 99),
|
||||
url=p_data.get("url"),
|
||||
api_key=p_data.get("api_key"),
|
||||
base_url=p_data.get("base_url"),
|
||||
models=p_data.get("models", []),
|
||||
)
|
||||
|
||||
if self._check_provider_available(provider):
|
||||
self.providers.append(provider)
|
||||
else:
|
||||
logger.warning("Provider %s not available, skipping", provider.name)
|
||||
|
||||
self.providers.sort(key=lambda p: p.priority)
|
||||
|
||||
def _expand_env_vars(self, content: str) -> str:
|
||||
"""Expand ${VAR} syntax in YAML content.
|
||||
|
||||
@@ -310,6 +310,22 @@ class CascadeRouter:
|
||||
logger.debug("Ollama provider check error: %s", exc)
|
||||
return False
|
||||
|
||||
elif provider.type == "vllm_mlx":
|
||||
# Check if local vllm-mlx server is running (OpenAI-compatible)
|
||||
if requests is None:
|
||||
return True
|
||||
try:
|
||||
base_url = provider.base_url or provider.url or "http://localhost:8000"
|
||||
# Strip /v1 suffix — health endpoint is at the root
|
||||
server_root = base_url.rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3]
|
||||
response = requests.get(f"{server_root}/health", timeout=5)
|
||||
return response.status_code == 200
|
||||
except Exception as exc:
|
||||
logger.debug("vllm-mlx provider check error: %s", exc)
|
||||
return False
|
||||
|
||||
elif provider.type in ("openai", "anthropic", "grok"):
|
||||
# Check if API key is set
|
||||
return provider.api_key is not None and provider.api_key != ""
|
||||
@@ -466,6 +482,33 @@ class CascadeRouter:
|
||||
|
||||
raise RuntimeError("; ".join(errors))
|
||||
|
||||
def _quota_allows_cloud(self, provider: Provider) -> bool:
|
||||
"""Check quota before routing to a cloud provider.
|
||||
|
||||
Uses the metabolic protocol via select_model(): cloud calls are only
|
||||
allowed when the quota monitor recommends a cloud model (BURST tier).
|
||||
Returns True (allow cloud) if quota monitor is unavailable or returns None.
|
||||
"""
|
||||
if _quota_monitor is None:
|
||||
return True
|
||||
try:
|
||||
suggested = _quota_monitor.select_model("high")
|
||||
# Cloud is allowed only when select_model recommends the cloud model
|
||||
allows = suggested == "claude-sonnet-4-6"
|
||||
if not allows:
|
||||
status = _quota_monitor.check()
|
||||
tier = status.recommended_tier.value if status else "unknown"
|
||||
logger.info(
|
||||
"Metabolic protocol: %s tier — downshifting %s to local (%s)",
|
||||
tier,
|
||||
provider.name,
|
||||
suggested,
|
||||
)
|
||||
return allows
|
||||
except Exception as exc:
|
||||
logger.warning("Quota check failed, allowing cloud: %s", exc)
|
||||
return True
|
||||
|
||||
def _is_provider_available(self, provider: Provider) -> bool:
|
||||
"""Check if a provider should be tried (enabled + circuit breaker)."""
|
||||
if not provider.enabled:
|
||||
@@ -519,6 +562,15 @@ class CascadeRouter:
|
||||
if not self._is_provider_available(provider):
|
||||
continue
|
||||
|
||||
# Metabolic protocol: skip cloud providers when quota is low
|
||||
if provider.type in ("anthropic", "openai", "grok"):
|
||||
if not self._quota_allows_cloud(provider):
|
||||
logger.info(
|
||||
"Metabolic protocol: skipping cloud provider %s (quota too low)",
|
||||
provider.name,
|
||||
)
|
||||
continue
|
||||
|
||||
selected_model, is_fallback_model = self._select_model(provider, model, content_type)
|
||||
|
||||
try:
|
||||
@@ -564,6 +616,7 @@ class CascadeRouter:
|
||||
messages=messages,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
content_type=content_type,
|
||||
)
|
||||
elif provider.type == "openai":
|
||||
@@ -590,6 +643,14 @@ class CascadeRouter:
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
elif provider.type == "vllm_mlx":
|
||||
result = await self._call_vllm_mlx(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown provider type: {provider.type}")
|
||||
|
||||
@@ -604,6 +665,7 @@ class CascadeRouter:
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float,
|
||||
max_tokens: int | None = None,
|
||||
content_type: ContentType = ContentType.TEXT,
|
||||
) -> dict:
|
||||
"""Call Ollama API with multi-modal support."""
|
||||
@@ -614,13 +676,15 @@ class CascadeRouter:
|
||||
# Transform messages for Ollama format (including images)
|
||||
transformed_messages = self._transform_messages_for_ollama(messages)
|
||||
|
||||
options = {"temperature": temperature}
|
||||
if max_tokens:
|
||||
options["num_predict"] = max_tokens
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": transformed_messages,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
},
|
||||
"options": options,
|
||||
}
|
||||
|
||||
timeout = aiohttp.ClientTimeout(total=self.config.timeout_seconds)
|
||||
@@ -764,7 +828,7 @@ class CascadeRouter:
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=provider.api_key,
|
||||
base_url=provider.base_url or "https://api.x.ai/v1",
|
||||
base_url=provider.base_url or settings.xai_base_url,
|
||||
timeout=httpx.Timeout(300.0),
|
||||
)
|
||||
|
||||
@@ -783,6 +847,48 @@ class CascadeRouter:
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
async def _call_vllm_mlx(
|
||||
self,
|
||||
provider: Provider,
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float,
|
||||
max_tokens: int | None,
|
||||
) -> dict:
|
||||
"""Call vllm-mlx via its OpenAI-compatible API.
|
||||
|
||||
vllm-mlx exposes the same /v1/chat/completions endpoint as OpenAI,
|
||||
so we reuse the OpenAI client pointed at the local server.
|
||||
No API key is required for local deployments.
|
||||
"""
|
||||
import openai
|
||||
|
||||
base_url = provider.base_url or provider.url or "http://localhost:8000"
|
||||
# Ensure the base_url ends with /v1 as expected by the OpenAI client
|
||||
if not base_url.rstrip("/").endswith("/v1"):
|
||||
base_url = base_url.rstrip("/") + "/v1"
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=provider.api_key or "no-key-required",
|
||||
base_url=base_url,
|
||||
timeout=self.config.timeout_seconds,
|
||||
)
|
||||
|
||||
kwargs: dict = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if max_tokens:
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
response = await client.chat.completions.create(**kwargs)
|
||||
|
||||
return {
|
||||
"content": response.choices[0].message.content,
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
def _record_success(self, provider: Provider, latency_ms: float) -> None:
|
||||
"""Record a successful request."""
|
||||
provider.metrics.total_requests += 1
|
||||
|
||||
152
src/infrastructure/router/history.py
Normal file
152
src/infrastructure/router/history.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""Provider health history — time-series snapshots for dashboard visualization."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_store: "HealthHistoryStore | None" = None
|
||||
|
||||
|
||||
class HealthHistoryStore:
|
||||
"""Stores timestamped provider health snapshots in SQLite."""
|
||||
|
||||
def __init__(self, db_path: str = "data/router_history.db") -> None:
|
||||
self.db_path = db_path
|
||||
if db_path != ":memory:":
|
||||
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn = sqlite3.connect(db_path, check_same_thread=False)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._init_schema()
|
||||
self._bg_task: asyncio.Task | None = None
|
||||
|
||||
def _init_schema(self) -> None:
|
||||
self._conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS snapshots (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT NOT NULL,
|
||||
provider_name TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
error_rate REAL NOT NULL,
|
||||
avg_latency_ms REAL NOT NULL,
|
||||
circuit_state TEXT NOT NULL,
|
||||
total_requests INTEGER NOT NULL
|
||||
)
|
||||
""")
|
||||
self._conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_ts
|
||||
ON snapshots(timestamp)
|
||||
""")
|
||||
self._conn.commit()
|
||||
|
||||
def record_snapshot(self, providers: list[dict]) -> None:
|
||||
"""Record a health snapshot for all providers."""
|
||||
ts = datetime.now(UTC).isoformat()
|
||||
rows = [
|
||||
(
|
||||
ts,
|
||||
p["name"],
|
||||
p["status"],
|
||||
p["error_rate"],
|
||||
p["avg_latency_ms"],
|
||||
p["circuit_state"],
|
||||
p["total_requests"],
|
||||
)
|
||||
for p in providers
|
||||
]
|
||||
self._conn.executemany(
|
||||
"""INSERT INTO snapshots
|
||||
(timestamp, provider_name, status, error_rate,
|
||||
avg_latency_ms, circuit_state, total_requests)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||
rows,
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def get_history(self, hours: int = 24) -> list[dict]:
|
||||
"""Return snapshots from the last N hours, grouped by timestamp."""
|
||||
cutoff = (datetime.now(UTC) - timedelta(hours=hours)).isoformat()
|
||||
rows = self._conn.execute(
|
||||
"""SELECT timestamp, provider_name, status, error_rate,
|
||||
avg_latency_ms, circuit_state, total_requests
|
||||
FROM snapshots WHERE timestamp >= ? ORDER BY timestamp""",
|
||||
(cutoff,),
|
||||
).fetchall()
|
||||
|
||||
# Group by timestamp
|
||||
snapshots: dict[str, list[dict]] = {}
|
||||
for row in rows:
|
||||
ts = row["timestamp"]
|
||||
if ts not in snapshots:
|
||||
snapshots[ts] = []
|
||||
snapshots[ts].append(
|
||||
{
|
||||
"name": row["provider_name"],
|
||||
"status": row["status"],
|
||||
"error_rate": row["error_rate"],
|
||||
"avg_latency_ms": row["avg_latency_ms"],
|
||||
"circuit_state": row["circuit_state"],
|
||||
"total_requests": row["total_requests"],
|
||||
}
|
||||
)
|
||||
|
||||
return [{"timestamp": ts, "providers": providers} for ts, providers in snapshots.items()]
|
||||
|
||||
def prune(self, keep_hours: int = 168) -> int:
|
||||
"""Remove snapshots older than keep_hours. Returns rows deleted."""
|
||||
cutoff = (datetime.now(UTC) - timedelta(hours=keep_hours)).isoformat()
|
||||
cursor = self._conn.execute("DELETE FROM snapshots WHERE timestamp < ?", (cutoff,))
|
||||
self._conn.commit()
|
||||
return cursor.rowcount
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the database connection."""
|
||||
if self._bg_task and not self._bg_task.done():
|
||||
self._bg_task.cancel()
|
||||
self._conn.close()
|
||||
|
||||
def _capture_snapshot(self, cascade_router) -> None: # noqa: ANN001
|
||||
"""Capture current provider state as a snapshot."""
|
||||
providers = []
|
||||
for p in cascade_router.providers:
|
||||
providers.append(
|
||||
{
|
||||
"name": p.name,
|
||||
"status": p.status.value,
|
||||
"error_rate": round(p.metrics.error_rate, 4),
|
||||
"avg_latency_ms": round(p.metrics.avg_latency_ms, 2),
|
||||
"circuit_state": p.circuit_state.value,
|
||||
"total_requests": p.metrics.total_requests,
|
||||
}
|
||||
)
|
||||
self.record_snapshot(providers)
|
||||
|
||||
async def start_background_task(
|
||||
self,
|
||||
cascade_router,
|
||||
interval_seconds: int = 60, # noqa: ANN001
|
||||
) -> None:
|
||||
"""Start periodic snapshot capture."""
|
||||
|
||||
async def _loop() -> None:
|
||||
while True:
|
||||
try:
|
||||
self._capture_snapshot(cascade_router)
|
||||
logger.debug("Recorded health snapshot")
|
||||
except Exception:
|
||||
logger.exception("Failed to record health snapshot")
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
self._bg_task = asyncio.create_task(_loop())
|
||||
logger.info("Health history background task started (interval=%ds)", interval_seconds)
|
||||
|
||||
|
||||
def get_history_store() -> HealthHistoryStore:
|
||||
"""Get or create the singleton history store."""
|
||||
global _store # noqa: PLW0603
|
||||
if _store is None:
|
||||
_store = HealthHistoryStore()
|
||||
return _store
|
||||
306
src/infrastructure/sovereignty_metrics.py
Normal file
306
src/infrastructure/sovereignty_metrics.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""Sovereignty metrics collector and store.
|
||||
|
||||
Tracks research sovereignty progress: cache hit rate, API cost,
|
||||
time-to-report, and human involvement. Persists to SQLite for
|
||||
trend analysis and dashboard display.
|
||||
|
||||
Refs: #981
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
from contextlib import closing
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DB_PATH = Path(settings.repo_root) / "data" / "sovereignty_metrics.db"
|
||||
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS sovereignty_metrics (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT NOT NULL,
|
||||
metric_type TEXT NOT NULL,
|
||||
value REAL NOT NULL,
|
||||
metadata TEXT DEFAULT '{}'
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_sm_type ON sovereignty_metrics(metric_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_sm_ts ON sovereignty_metrics(timestamp);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS sovereignty_alerts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT NOT NULL,
|
||||
alert_type TEXT NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
value REAL NOT NULL,
|
||||
threshold REAL NOT NULL,
|
||||
acknowledged INTEGER DEFAULT 0
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_sa_ts ON sovereignty_alerts(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_sa_ack ON sovereignty_alerts(acknowledged);
|
||||
"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class SovereigntyMetric:
|
||||
"""A single sovereignty metric data point."""
|
||||
|
||||
metric_type: str # cache_hit_rate, api_cost, time_to_report, human_involvement
|
||||
value: float
|
||||
timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SovereigntyAlert:
|
||||
"""An alert triggered when a metric exceeds a threshold."""
|
||||
|
||||
alert_type: str
|
||||
message: str
|
||||
value: float
|
||||
threshold: float
|
||||
timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
|
||||
acknowledged: bool = False
|
||||
|
||||
|
||||
# Graduation targets from issue #981
|
||||
GRADUATION_TARGETS = {
|
||||
"cache_hit_rate": {"week1": 0.10, "month1": 0.40, "month3": 0.80, "graduation": 0.90},
|
||||
"api_cost": {"week1": 1.50, "month1": 0.50, "month3": 0.10, "graduation": 0.01},
|
||||
"time_to_report": {"week1": 180.0, "month1": 30.0, "month3": 5.0, "graduation": 1.0},
|
||||
"human_involvement": {"week1": 1.0, "month1": 0.5, "month3": 0.25, "graduation": 0.0},
|
||||
"local_artifacts": {"week1": 6, "month1": 30, "month3": 100, "graduation": 500},
|
||||
}
|
||||
|
||||
|
||||
class SovereigntyMetricsStore:
|
||||
"""SQLite-backed sovereignty metrics store.
|
||||
|
||||
Thread-safe: creates a new connection per operation.
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: Path | None = None) -> None:
|
||||
self._db_path = db_path or DB_PATH
|
||||
self._init_db()
|
||||
|
||||
def _init_db(self) -> None:
|
||||
"""Initialize the database schema."""
|
||||
try:
|
||||
self._db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with closing(sqlite3.connect(str(self._db_path))) as conn:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
|
||||
conn.executescript(_SCHEMA)
|
||||
conn.commit()
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to initialize sovereignty metrics DB: %s", exc)
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
"""Get a new connection."""
|
||||
conn = sqlite3.connect(str(self._db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
|
||||
return conn
|
||||
|
||||
def record(self, metric: SovereigntyMetric) -> None:
|
||||
"""Record a sovereignty metric data point."""
|
||||
try:
|
||||
with closing(self._connect()) as conn:
|
||||
conn.execute(
|
||||
"INSERT INTO sovereignty_metrics (timestamp, metric_type, value, metadata) "
|
||||
"VALUES (?, ?, ?, ?)",
|
||||
(
|
||||
metric.timestamp,
|
||||
metric.metric_type,
|
||||
metric.value,
|
||||
json.dumps(metric.metadata),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to record sovereignty metric: %s", exc)
|
||||
|
||||
# Check thresholds for alerts
|
||||
self._check_alert(metric)
|
||||
|
||||
def _check_alert(self, metric: SovereigntyMetric) -> None:
|
||||
"""Check if a metric triggers an alert."""
|
||||
threshold = settings.sovereignty_api_cost_alert_threshold
|
||||
if metric.metric_type == "api_cost" and metric.value > threshold:
|
||||
alert = SovereigntyAlert(
|
||||
alert_type="api_cost_exceeded",
|
||||
message=f"API cost ${metric.value:.2f} exceeds threshold ${threshold:.2f}",
|
||||
value=metric.value,
|
||||
threshold=threshold,
|
||||
)
|
||||
self._record_alert(alert)
|
||||
|
||||
def _record_alert(self, alert: SovereigntyAlert) -> None:
|
||||
"""Persist an alert."""
|
||||
try:
|
||||
with closing(self._connect()) as conn:
|
||||
conn.execute(
|
||||
"INSERT INTO sovereignty_alerts "
|
||||
"(timestamp, alert_type, message, value, threshold) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
(
|
||||
alert.timestamp,
|
||||
alert.alert_type,
|
||||
alert.message,
|
||||
alert.value,
|
||||
alert.threshold,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
logger.warning("Sovereignty alert: %s", alert.message)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to record sovereignty alert: %s", exc)
|
||||
|
||||
def get_latest(self, metric_type: str, limit: int = 50) -> list[dict]:
|
||||
"""Get the most recent metric values for a given type."""
|
||||
try:
|
||||
with closing(self._connect()) as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT timestamp, value, metadata FROM sovereignty_metrics "
|
||||
"WHERE metric_type = ? ORDER BY timestamp DESC LIMIT ?",
|
||||
(metric_type, limit),
|
||||
).fetchall()
|
||||
return [
|
||||
{
|
||||
"timestamp": row["timestamp"],
|
||||
"value": row["value"],
|
||||
"metadata": json.loads(row["metadata"]) if row["metadata"] else {},
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to query sovereignty metrics: %s", exc)
|
||||
return []
|
||||
|
||||
def get_summary(self) -> dict[str, Any]:
|
||||
"""Get a summary of current sovereignty metrics progress."""
|
||||
summary: dict[str, Any] = {}
|
||||
for metric_type in GRADUATION_TARGETS:
|
||||
latest = self.get_latest(metric_type, limit=1)
|
||||
history = self.get_latest(metric_type, limit=30)
|
||||
|
||||
current_value = latest[0]["value"] if latest else None
|
||||
targets = GRADUATION_TARGETS[metric_type]
|
||||
|
||||
# Determine current phase based on value
|
||||
phase = "pre-start"
|
||||
if current_value is not None:
|
||||
if metric_type in ("api_cost", "time_to_report", "human_involvement"):
|
||||
# Lower is better
|
||||
if current_value <= targets["graduation"]:
|
||||
phase = "graduated"
|
||||
elif current_value <= targets["month3"]:
|
||||
phase = "month3"
|
||||
elif current_value <= targets["month1"]:
|
||||
phase = "month1"
|
||||
elif current_value <= targets["week1"]:
|
||||
phase = "week1"
|
||||
else:
|
||||
phase = "pre-start"
|
||||
else:
|
||||
# Higher is better
|
||||
if current_value >= targets["graduation"]:
|
||||
phase = "graduated"
|
||||
elif current_value >= targets["month3"]:
|
||||
phase = "month3"
|
||||
elif current_value >= targets["month1"]:
|
||||
phase = "month1"
|
||||
elif current_value >= targets["week1"]:
|
||||
phase = "week1"
|
||||
else:
|
||||
phase = "pre-start"
|
||||
|
||||
summary[metric_type] = {
|
||||
"current": current_value,
|
||||
"phase": phase,
|
||||
"targets": targets,
|
||||
"trend": [{"t": h["timestamp"], "v": h["value"]} for h in reversed(history)],
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
def get_alerts(self, unacknowledged_only: bool = True, limit: int = 20) -> list[dict]:
|
||||
"""Get sovereignty alerts."""
|
||||
try:
|
||||
with closing(self._connect()) as conn:
|
||||
if unacknowledged_only:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM sovereignty_alerts "
|
||||
"WHERE acknowledged = 0 ORDER BY timestamp DESC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM sovereignty_alerts ORDER BY timestamp DESC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to query sovereignty alerts: %s", exc)
|
||||
return []
|
||||
|
||||
def acknowledge_alert(self, alert_id: int) -> bool:
|
||||
"""Acknowledge an alert."""
|
||||
try:
|
||||
with closing(self._connect()) as conn:
|
||||
conn.execute(
|
||||
"UPDATE sovereignty_alerts SET acknowledged = 1 WHERE id = ?",
|
||||
(alert_id,),
|
||||
)
|
||||
conn.commit()
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to acknowledge alert: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
# ── Module-level singleton ─────────────────────────────────────────────────
|
||||
_store: SovereigntyMetricsStore | None = None
|
||||
|
||||
|
||||
def get_sovereignty_store() -> SovereigntyMetricsStore:
|
||||
"""Return the module-level store, creating it on first access."""
|
||||
global _store
|
||||
if _store is None:
|
||||
_store = SovereigntyMetricsStore()
|
||||
return _store
|
||||
|
||||
|
||||
async def emit_sovereignty_metric(
|
||||
metric_type: str,
|
||||
value: float,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Convenience function to record a sovereignty metric and emit an event.
|
||||
|
||||
Also publishes to the event bus for real-time subscribers.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from infrastructure.events.bus import emit
|
||||
|
||||
metric = SovereigntyMetric(
|
||||
metric_type=metric_type,
|
||||
value=value,
|
||||
metadata=metadata or {},
|
||||
)
|
||||
# Record to SQLite in thread to avoid blocking event loop
|
||||
await asyncio.to_thread(get_sovereignty_store().record, metric)
|
||||
|
||||
# Publish to event bus for real-time consumers
|
||||
await emit(
|
||||
f"sovereignty.metric.{metric_type}",
|
||||
source="sovereignty_metrics",
|
||||
data={"metric_type": metric_type, "value": value, **(metadata or {})},
|
||||
)
|
||||
166
src/infrastructure/visitor.py
Normal file
166
src/infrastructure/visitor.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""Visitor state tracking for the Matrix frontend.
|
||||
|
||||
Tracks active visitors as they connect and move around the 3D world,
|
||||
and provides serialization for Matrix protocol broadcast messages.
|
||||
"""
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class VisitorState:
|
||||
"""State for a single visitor in the Matrix.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
visitor_id: Unique identifier for the visitor (client ID).
|
||||
display_name: Human-readable name shown above the visitor.
|
||||
position: 3D coordinates (x, y, z) in the world.
|
||||
rotation: Rotation angle in degrees (0-360).
|
||||
connected_at: ISO timestamp when the visitor connected.
|
||||
"""
|
||||
|
||||
visitor_id: str
|
||||
display_name: str = ""
|
||||
position: dict[str, float] = field(default_factory=lambda: {"x": 0.0, "y": 0.0, "z": 0.0})
|
||||
rotation: float = 0.0
|
||||
connected_at: str = field(
|
||||
default_factory=lambda: datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Set display_name to visitor_id if not provided; copy position dict."""
|
||||
if not self.display_name:
|
||||
self.display_name = self.visitor_id
|
||||
# Copy position to avoid shared mutable state
|
||||
self.position = dict(self.position)
|
||||
|
||||
|
||||
class VisitorRegistry:
|
||||
"""Registry of active visitors in the Matrix.
|
||||
|
||||
Thread-safe singleton pattern (Python GIL protects dict operations).
|
||||
Used by the WebSocket layer to track and broadcast visitor positions.
|
||||
"""
|
||||
|
||||
_instance: "VisitorRegistry | None" = None
|
||||
|
||||
def __new__(cls) -> "VisitorRegistry":
|
||||
"""Singleton constructor."""
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._visitors: dict[str, VisitorState] = {}
|
||||
return cls._instance
|
||||
|
||||
def add(
|
||||
self, visitor_id: str, display_name: str = "", position: dict | None = None
|
||||
) -> VisitorState:
|
||||
"""Add a new visitor to the registry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
visitor_id: Unique identifier for the visitor.
|
||||
display_name: Optional display name (defaults to visitor_id).
|
||||
position: Optional initial position (defaults to origin).
|
||||
|
||||
Returns
|
||||
-------
|
||||
The newly created VisitorState.
|
||||
"""
|
||||
visitor = VisitorState(
|
||||
visitor_id=visitor_id,
|
||||
display_name=display_name,
|
||||
position=position if position else {"x": 0.0, "y": 0.0, "z": 0.0},
|
||||
)
|
||||
self._visitors[visitor_id] = visitor
|
||||
return visitor
|
||||
|
||||
def remove(self, visitor_id: str) -> bool:
|
||||
"""Remove a visitor from the registry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
visitor_id: The visitor to remove.
|
||||
|
||||
Returns
|
||||
-------
|
||||
True if the visitor was found and removed, False otherwise.
|
||||
"""
|
||||
if visitor_id in self._visitors:
|
||||
del self._visitors[visitor_id]
|
||||
return True
|
||||
return False
|
||||
|
||||
def update_position(
|
||||
self,
|
||||
visitor_id: str,
|
||||
position: dict[str, float],
|
||||
rotation: float | None = None,
|
||||
) -> bool:
|
||||
"""Update a visitor's position and rotation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
visitor_id: The visitor to update.
|
||||
position: New 3D coordinates (x, y, z).
|
||||
rotation: Optional new rotation angle.
|
||||
|
||||
Returns
|
||||
-------
|
||||
True if the visitor was found and updated, False otherwise.
|
||||
"""
|
||||
if visitor_id not in self._visitors:
|
||||
return False
|
||||
|
||||
self._visitors[visitor_id].position = position
|
||||
if rotation is not None:
|
||||
self._visitors[visitor_id].rotation = rotation
|
||||
return True
|
||||
|
||||
def get(self, visitor_id: str) -> VisitorState | None:
|
||||
"""Get a single visitor's state.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
visitor_id: The visitor to retrieve.
|
||||
|
||||
Returns
|
||||
-------
|
||||
The VisitorState if found, None otherwise.
|
||||
"""
|
||||
return self._visitors.get(visitor_id)
|
||||
|
||||
def get_all(self) -> list[dict]:
|
||||
"""Get all active visitors as Matrix protocol message dicts.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List of visitor_state dicts ready for WebSocket broadcast.
|
||||
Each dict has: type, visitor_id, data (with display_name,
|
||||
position, rotation, connected_at), and ts.
|
||||
"""
|
||||
now = int(time.time())
|
||||
return [
|
||||
{
|
||||
"type": "visitor_state",
|
||||
"visitor_id": v.visitor_id,
|
||||
"data": {
|
||||
"display_name": v.display_name,
|
||||
"position": v.position,
|
||||
"rotation": v.rotation,
|
||||
"connected_at": v.connected_at,
|
||||
},
|
||||
"ts": now,
|
||||
}
|
||||
for v in self._visitors.values()
|
||||
]
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Remove all visitors (useful for testing)."""
|
||||
self._visitors.clear()
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of active visitors."""
|
||||
return len(self._visitors)
|
||||
29
src/infrastructure/world/__init__.py
Normal file
29
src/infrastructure/world/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""World interface — engine-agnostic adapter pattern for embodied agents.
|
||||
|
||||
Provides the ``WorldInterface`` ABC and an adapter registry so Timmy can
|
||||
observe, act, and speak in any game world (Morrowind, Luanti, Godot, …)
|
||||
through a single contract.
|
||||
|
||||
Quick start::
|
||||
|
||||
from infrastructure.world import get_adapter, register_adapter
|
||||
from infrastructure.world.interface import WorldInterface
|
||||
|
||||
register_adapter("mock", MockWorldAdapter)
|
||||
world = get_adapter("mock")
|
||||
perception = world.observe()
|
||||
"""
|
||||
|
||||
from infrastructure.world.registry import AdapterRegistry
|
||||
|
||||
_registry = AdapterRegistry()
|
||||
|
||||
register_adapter = _registry.register
|
||||
get_adapter = _registry.get
|
||||
list_adapters = _registry.list_adapters
|
||||
|
||||
__all__ = [
|
||||
"register_adapter",
|
||||
"get_adapter",
|
||||
"list_adapters",
|
||||
]
|
||||
1
src/infrastructure/world/adapters/__init__.py
Normal file
1
src/infrastructure/world/adapters/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Built-in world adapters."""
|
||||
99
src/infrastructure/world/adapters/mock.py
Normal file
99
src/infrastructure/world/adapters/mock.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Mock world adapter — returns canned perception and logs commands.
|
||||
|
||||
Useful for testing the heartbeat loop and WorldInterface contract
|
||||
without a running game server.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from infrastructure.world.interface import WorldInterface
|
||||
from infrastructure.world.types import (
|
||||
ActionResult,
|
||||
ActionStatus,
|
||||
CommandInput,
|
||||
PerceptionOutput,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ActionLog:
|
||||
"""Record of an action dispatched to the mock world."""
|
||||
|
||||
command: CommandInput
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class MockWorldAdapter(WorldInterface):
|
||||
"""In-memory mock adapter for testing.
|
||||
|
||||
* ``observe()`` returns configurable canned perception.
|
||||
* ``act()`` logs the command and returns success.
|
||||
* ``speak()`` logs the message.
|
||||
|
||||
Inspect ``action_log`` and ``speech_log`` to verify behaviour in tests.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
location: str = "Test Chamber",
|
||||
entities: list[str] | None = None,
|
||||
events: list[str] | None = None,
|
||||
) -> None:
|
||||
self._location = location
|
||||
self._entities = entities or ["TestNPC"]
|
||||
self._events = events or []
|
||||
self._connected = False
|
||||
self.action_log: list[_ActionLog] = []
|
||||
self.speech_log: list[dict] = []
|
||||
|
||||
# -- lifecycle ---------------------------------------------------------
|
||||
|
||||
def connect(self) -> None:
|
||||
self._connected = True
|
||||
logger.info("MockWorldAdapter connected")
|
||||
|
||||
def disconnect(self) -> None:
|
||||
self._connected = False
|
||||
logger.info("MockWorldAdapter disconnected")
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
return self._connected
|
||||
|
||||
# -- core contract -----------------------------------------------------
|
||||
|
||||
def observe(self) -> PerceptionOutput:
|
||||
logger.debug("MockWorldAdapter.observe()")
|
||||
return PerceptionOutput(
|
||||
timestamp=datetime.now(UTC),
|
||||
location=self._location,
|
||||
entities=list(self._entities),
|
||||
events=list(self._events),
|
||||
raw={"adapter": "mock"},
|
||||
)
|
||||
|
||||
def act(self, command: CommandInput) -> ActionResult:
|
||||
logger.debug("MockWorldAdapter.act(%s)", command.action)
|
||||
self.action_log.append(_ActionLog(command=command, timestamp=datetime.now(UTC)))
|
||||
return ActionResult(
|
||||
status=ActionStatus.SUCCESS,
|
||||
message=f"Mock executed: {command.action}",
|
||||
data={"adapter": "mock"},
|
||||
)
|
||||
|
||||
def speak(self, message: str, target: str | None = None) -> None:
|
||||
logger.debug("MockWorldAdapter.speak(%r, target=%r)", message, target)
|
||||
self.speech_log.append(
|
||||
{
|
||||
"message": message,
|
||||
"target": target,
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
)
|
||||
58
src/infrastructure/world/adapters/tes3mp.py
Normal file
58
src/infrastructure/world/adapters/tes3mp.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""TES3MP world adapter — stub for Morrowind multiplayer via TES3MP.
|
||||
|
||||
This adapter will eventually connect to a TES3MP server and translate
|
||||
the WorldInterface contract into TES3MP commands. For now every method
|
||||
raises ``NotImplementedError`` with guidance on what needs wiring up.
|
||||
|
||||
Once PR #864 merges, import PerceptionOutput and CommandInput directly
|
||||
from ``infrastructure.morrowind.schemas`` if their shapes differ from
|
||||
the canonical types in ``infrastructure.world.types``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from infrastructure.world.interface import WorldInterface
|
||||
from infrastructure.world.types import ActionResult, CommandInput, PerceptionOutput
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TES3MPWorldAdapter(WorldInterface):
|
||||
"""Stub adapter for TES3MP (Morrowind multiplayer).
|
||||
|
||||
All core methods raise ``NotImplementedError``.
|
||||
Implement ``connect()`` first — it should open a socket to the
|
||||
TES3MP server and authenticate.
|
||||
"""
|
||||
|
||||
def __init__(self, *, host: str = "localhost", port: int = 25565) -> None:
|
||||
self._host = host
|
||||
self._port = port
|
||||
self._connected = False
|
||||
|
||||
# -- lifecycle ---------------------------------------------------------
|
||||
|
||||
def connect(self) -> None:
|
||||
raise NotImplementedError("TES3MPWorldAdapter.connect() — wire up TES3MP server socket")
|
||||
|
||||
def disconnect(self) -> None:
|
||||
raise NotImplementedError("TES3MPWorldAdapter.disconnect() — close TES3MP server socket")
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
return self._connected
|
||||
|
||||
# -- core contract (stubs) ---------------------------------------------
|
||||
|
||||
def observe(self) -> PerceptionOutput:
|
||||
raise NotImplementedError("TES3MPWorldAdapter.observe() — poll TES3MP for player/NPC state")
|
||||
|
||||
def act(self, command: CommandInput) -> ActionResult:
|
||||
raise NotImplementedError(
|
||||
"TES3MPWorldAdapter.act() — translate CommandInput to TES3MP packet"
|
||||
)
|
||||
|
||||
def speak(self, message: str, target: str | None = None) -> None:
|
||||
raise NotImplementedError("TES3MPWorldAdapter.speak() — send chat message via TES3MP")
|
||||
17
src/infrastructure/world/benchmark/__init__.py
Normal file
17
src/infrastructure/world/benchmark/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Performance regression suite for Morrowind agent scenarios.
|
||||
|
||||
Provides standardised benchmark scenarios, a runner that executes them
|
||||
through the heartbeat loop with a mock (or live) world adapter, and
|
||||
metrics collection for CI-integrated regression detection.
|
||||
"""
|
||||
|
||||
from infrastructure.world.benchmark.metrics import BenchmarkMetrics
|
||||
from infrastructure.world.benchmark.runner import BenchmarkRunner
|
||||
from infrastructure.world.benchmark.scenarios import BenchmarkScenario, load_scenarios
|
||||
|
||||
__all__ = [
|
||||
"BenchmarkMetrics",
|
||||
"BenchmarkRunner",
|
||||
"BenchmarkScenario",
|
||||
"load_scenarios",
|
||||
]
|
||||
195
src/infrastructure/world/benchmark/metrics.py
Normal file
195
src/infrastructure/world/benchmark/metrics.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""Benchmark metrics collection and persistence.
|
||||
|
||||
Tracks per-scenario results: cycles used, wall-clock time, success,
|
||||
LLM call count, and estimated metabolic cost. Results are persisted
|
||||
as JSONL for trend analysis and CI regression gates.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScenarioResult:
|
||||
"""Outcome of running a single benchmark scenario.
|
||||
|
||||
Attributes:
|
||||
scenario_name: Human-readable scenario name.
|
||||
success: Whether the goal predicate was satisfied.
|
||||
cycles_used: Number of heartbeat cycles executed.
|
||||
max_cycles: The scenario's cycle budget.
|
||||
wall_time_ms: Total wall-clock time in milliseconds.
|
||||
llm_calls: Number of LLM inference calls made.
|
||||
metabolic_cost: Estimated resource cost (arbitrary unit, ≈ tokens).
|
||||
error: Error message if the run crashed.
|
||||
tags: Scenario tags (copied for filtering).
|
||||
"""
|
||||
|
||||
scenario_name: str
|
||||
success: bool = False
|
||||
cycles_used: int = 0
|
||||
max_cycles: int = 0
|
||||
wall_time_ms: int = 0
|
||||
llm_calls: int = 0
|
||||
metabolic_cost: float = 0.0
|
||||
error: str | None = None
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkMetrics:
|
||||
"""Aggregated metrics across all scenarios in a benchmark run.
|
||||
|
||||
Attributes:
|
||||
results: Per-scenario results.
|
||||
total_time_ms: Total wall-clock time for the full suite.
|
||||
timestamp: ISO-8601 timestamp of the run.
|
||||
commit_sha: Git commit SHA (if available).
|
||||
"""
|
||||
|
||||
results: list[ScenarioResult] = field(default_factory=list)
|
||||
total_time_ms: int = 0
|
||||
timestamp: str = ""
|
||||
commit_sha: str = ""
|
||||
|
||||
# -- derived properties ------------------------------------------------
|
||||
|
||||
@property
|
||||
def pass_count(self) -> int:
|
||||
return sum(1 for r in self.results if r.success)
|
||||
|
||||
@property
|
||||
def fail_count(self) -> int:
|
||||
return sum(1 for r in self.results if not r.success)
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
if not self.results:
|
||||
return 0.0
|
||||
return self.pass_count / len(self.results)
|
||||
|
||||
@property
|
||||
def total_llm_calls(self) -> int:
|
||||
return sum(r.llm_calls for r in self.results)
|
||||
|
||||
@property
|
||||
def total_metabolic_cost(self) -> float:
|
||||
return sum(r.metabolic_cost for r in self.results)
|
||||
|
||||
# -- persistence -------------------------------------------------------
|
||||
|
||||
def save(self, path: Path) -> None:
|
||||
"""Append this run's results to a JSONL file at *path*."""
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
record = {
|
||||
"timestamp": self.timestamp,
|
||||
"commit_sha": self.commit_sha,
|
||||
"total_time_ms": self.total_time_ms,
|
||||
"success_rate": round(self.success_rate, 4),
|
||||
"total_llm_calls": self.total_llm_calls,
|
||||
"total_metabolic_cost": round(self.total_metabolic_cost, 2),
|
||||
"scenarios": [asdict(r) for r in self.results],
|
||||
}
|
||||
with path.open("a") as f:
|
||||
f.write(json.dumps(record) + "\n")
|
||||
logger.info("Benchmark results saved to %s", path)
|
||||
|
||||
# -- summary -----------------------------------------------------------
|
||||
|
||||
def summary(self) -> str:
|
||||
"""Return a human-readable summary of the benchmark run."""
|
||||
lines = [
|
||||
"=== Benchmark Summary ===",
|
||||
f"Scenarios: {len(self.results)} "
|
||||
f"Passed: {self.pass_count} "
|
||||
f"Failed: {self.fail_count} "
|
||||
f"Success rate: {self.success_rate:.0%}",
|
||||
f"Total time: {self.total_time_ms} ms "
|
||||
f"LLM calls: {self.total_llm_calls} "
|
||||
f"Metabolic cost: {self.total_metabolic_cost:.1f}",
|
||||
]
|
||||
if self.commit_sha:
|
||||
lines.append(f"Commit: {self.commit_sha}")
|
||||
lines.append("")
|
||||
for r in self.results:
|
||||
status = "PASS" if r.success else "FAIL"
|
||||
lines.append(
|
||||
f" [{status}] {r.scenario_name} — "
|
||||
f"{r.cycles_used}/{r.max_cycles} cycles, "
|
||||
f"{r.wall_time_ms} ms, "
|
||||
f"{r.llm_calls} LLM calls"
|
||||
)
|
||||
if r.error:
|
||||
lines.append(f" Error: {r.error}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def load_history(path: Path) -> list[dict]:
|
||||
"""Load benchmark history from a JSONL file.
|
||||
|
||||
Returns:
|
||||
List of run records, most recent first.
|
||||
"""
|
||||
path = Path(path)
|
||||
if not path.exists():
|
||||
return []
|
||||
records: list[dict] = []
|
||||
for line in path.read_text().strip().splitlines():
|
||||
try:
|
||||
records.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return list(reversed(records))
|
||||
|
||||
|
||||
def compare_runs(
|
||||
current: BenchmarkMetrics,
|
||||
baseline: BenchmarkMetrics,
|
||||
) -> str:
|
||||
"""Compare two benchmark runs and report regressions.
|
||||
|
||||
Returns:
|
||||
Human-readable comparison report.
|
||||
"""
|
||||
lines = ["=== Regression Report ==="]
|
||||
|
||||
# Overall
|
||||
rate_delta = current.success_rate - baseline.success_rate
|
||||
lines.append(
|
||||
f"Success rate: {baseline.success_rate:.0%} -> {current.success_rate:.0%} "
|
||||
f"({rate_delta:+.0%})"
|
||||
)
|
||||
|
||||
cost_delta = current.total_metabolic_cost - baseline.total_metabolic_cost
|
||||
if baseline.total_metabolic_cost > 0:
|
||||
cost_pct = (cost_delta / baseline.total_metabolic_cost) * 100
|
||||
lines.append(
|
||||
f"Metabolic cost: {baseline.total_metabolic_cost:.1f} -> "
|
||||
f"{current.total_metabolic_cost:.1f} ({cost_pct:+.1f}%)"
|
||||
)
|
||||
|
||||
# Per-scenario
|
||||
baseline_map = {r.scenario_name: r for r in baseline.results}
|
||||
for r in current.results:
|
||||
b = baseline_map.get(r.scenario_name)
|
||||
if b is None:
|
||||
lines.append(f" [NEW] {r.scenario_name}")
|
||||
continue
|
||||
if b.success and not r.success:
|
||||
lines.append(f" [REGRESSION] {r.scenario_name} — was PASS, now FAIL")
|
||||
elif not b.success and r.success:
|
||||
lines.append(f" [IMPROVEMENT] {r.scenario_name} — was FAIL, now PASS")
|
||||
elif r.cycles_used > b.cycles_used * 1.5:
|
||||
lines.append(
|
||||
f" [SLOWER] {r.scenario_name} — "
|
||||
f"{b.cycles_used} -> {r.cycles_used} cycles (+{r.cycles_used - b.cycles_used})"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
167
src/infrastructure/world/benchmark/runner.py
Normal file
167
src/infrastructure/world/benchmark/runner.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Benchmark runner — executes scenarios through the heartbeat loop.
|
||||
|
||||
Wires each ``BenchmarkScenario`` into a ``MockWorldAdapter`` (or a
|
||||
supplied adapter), runs the heartbeat for up to ``max_cycles``, and
|
||||
collects ``BenchmarkMetrics``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from infrastructure.world.adapters.mock import MockWorldAdapter
|
||||
from infrastructure.world.benchmark.metrics import BenchmarkMetrics, ScenarioResult
|
||||
from infrastructure.world.benchmark.scenarios import BenchmarkScenario
|
||||
from infrastructure.world.interface import WorldInterface
|
||||
from loop.heartbeat import Heartbeat
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Rough estimate: each heartbeat cycle costs ~1 unit of metabolic cost
|
||||
# (gather + reason + act phases each touch the LLM router once).
|
||||
_COST_PER_CYCLE = 3.0 # three phases per cycle
|
||||
|
||||
|
||||
class BenchmarkRunner:
|
||||
"""Run benchmark scenarios and collect metrics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adapter_factory:
|
||||
Optional callable that returns a ``WorldInterface`` for a given
|
||||
scenario. Defaults to building a ``MockWorldAdapter`` from the
|
||||
scenario's start state.
|
||||
heartbeat_interval:
|
||||
Seconds between heartbeat ticks (0 for immediate).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
adapter_factory=None,
|
||||
heartbeat_interval: float = 0.0,
|
||||
) -> None:
|
||||
self._adapter_factory = adapter_factory or self._default_adapter
|
||||
self._interval = heartbeat_interval
|
||||
|
||||
# -- public API --------------------------------------------------------
|
||||
|
||||
async def run(
|
||||
self,
|
||||
scenarios: list[BenchmarkScenario],
|
||||
) -> BenchmarkMetrics:
|
||||
"""Execute all *scenarios* and return aggregated metrics."""
|
||||
metrics = BenchmarkMetrics(
|
||||
timestamp=datetime.now(UTC).isoformat(),
|
||||
commit_sha=self._git_sha(),
|
||||
)
|
||||
suite_start = time.monotonic()
|
||||
|
||||
for scenario in scenarios:
|
||||
logger.info("Benchmark: starting '%s'", scenario.name)
|
||||
result = await self._run_scenario(scenario)
|
||||
metrics.results.append(result)
|
||||
status = "PASS" if result.success else "FAIL"
|
||||
logger.info(
|
||||
"Benchmark: '%s' %s (%d/%d cycles, %d ms)",
|
||||
scenario.name,
|
||||
status,
|
||||
result.cycles_used,
|
||||
result.max_cycles,
|
||||
result.wall_time_ms,
|
||||
)
|
||||
|
||||
metrics.total_time_ms = int((time.monotonic() - suite_start) * 1000)
|
||||
return metrics
|
||||
|
||||
# -- internal ----------------------------------------------------------
|
||||
|
||||
async def _run_scenario(self, scenario: BenchmarkScenario) -> ScenarioResult:
|
||||
"""Run a single scenario through the heartbeat loop."""
|
||||
result = ScenarioResult(
|
||||
scenario_name=scenario.name,
|
||||
max_cycles=scenario.max_cycles,
|
||||
tags=list(scenario.tags),
|
||||
)
|
||||
|
||||
adapter = self._adapter_factory(scenario)
|
||||
adapter.connect()
|
||||
|
||||
hb = Heartbeat(world=adapter, interval=self._interval)
|
||||
actions: list[dict] = []
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
for cycle in range(1, scenario.max_cycles + 1):
|
||||
record = await hb.run_once()
|
||||
result.cycles_used = cycle
|
||||
|
||||
# Track LLM calls (each cycle has 3 phases that may call LLM)
|
||||
result.llm_calls += 3
|
||||
|
||||
# Accumulate actions for goal predicate
|
||||
if record.action_taken and record.action_taken != "idle":
|
||||
actions.append(
|
||||
{
|
||||
"action": record.action_taken,
|
||||
"target": record.observation.get("location", ""),
|
||||
"status": record.action_status,
|
||||
}
|
||||
)
|
||||
|
||||
# Update adapter location if scenario simulates movement
|
||||
current_location = self._get_current_location(adapter)
|
||||
|
||||
# Check goal predicate
|
||||
if scenario.goal_predicate is not None:
|
||||
if scenario.goal_predicate(actions, current_location):
|
||||
result.success = True
|
||||
break
|
||||
elif cycle == scenario.max_cycles:
|
||||
# No predicate — success if we survived all cycles
|
||||
result.success = True
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Benchmark scenario '%s' crashed: %s", scenario.name, exc)
|
||||
result.error = str(exc)
|
||||
finally:
|
||||
adapter.disconnect()
|
||||
|
||||
result.wall_time_ms = int((time.monotonic() - start) * 1000)
|
||||
result.metabolic_cost = result.cycles_used * _COST_PER_CYCLE
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _default_adapter(scenario: BenchmarkScenario) -> WorldInterface:
|
||||
"""Build a MockWorldAdapter from a scenario's starting state."""
|
||||
return MockWorldAdapter(
|
||||
location=scenario.start_location,
|
||||
entities=list(scenario.entities),
|
||||
events=list(scenario.events),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_current_location(adapter: WorldInterface) -> str:
|
||||
"""Read the current location from the adapter."""
|
||||
try:
|
||||
perception = adapter.observe()
|
||||
return perception.location
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _git_sha() -> str:
|
||||
"""Best-effort: return the current git commit SHA."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--short", "HEAD"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
return result.stdout.strip() if result.returncode == 0 else ""
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
return ""
|
||||
160
src/infrastructure/world/benchmark/scenarios.py
Normal file
160
src/infrastructure/world/benchmark/scenarios.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""Benchmark scenario definitions for Morrowind agent regression testing.
|
||||
|
||||
Each scenario specifies a starting location, goal conditions, world state
|
||||
(entities, events), and maximum cycles allowed. The runner feeds these
|
||||
into the heartbeat loop and checks completion against the goal predicate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BenchmarkScenario:
|
||||
"""A reproducible agent task used to detect performance regressions.
|
||||
|
||||
Attributes:
|
||||
name: Human-readable scenario name.
|
||||
description: What the scenario tests.
|
||||
start_location: Where the agent begins.
|
||||
goal_location: Target location (if navigation scenario).
|
||||
entities: NPCs / objects present in the world.
|
||||
events: Game events injected each cycle.
|
||||
max_cycles: Hard cap on heartbeat cycles before failure.
|
||||
goal_predicate: Optional callable ``(actions, location) -> bool``
|
||||
evaluated after each cycle to check early success.
|
||||
tags: Freeform tags for filtering (e.g. "navigation", "quest").
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
start_location: str
|
||||
goal_location: str = ""
|
||||
entities: list[str] = field(default_factory=list)
|
||||
events: list[str] = field(default_factory=list)
|
||||
max_cycles: int = 50
|
||||
goal_predicate: Callable | None = None
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Goal predicates
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _reached_location(target: str) -> Callable:
|
||||
"""Return a predicate that checks whether the agent reached *target*."""
|
||||
|
||||
def predicate(actions: list[dict], current_location: str) -> bool:
|
||||
return current_location.lower() == target.lower()
|
||||
|
||||
return predicate
|
||||
|
||||
|
||||
def _interacted_with(npc: str) -> Callable:
|
||||
"""Return a predicate that checks for a speak/interact action with *npc*."""
|
||||
|
||||
def predicate(actions: list[dict], current_location: str) -> bool:
|
||||
for act in actions:
|
||||
if act.get("action") in ("speak", "interact", "talk"):
|
||||
if act.get("target", "").lower() == npc.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
return predicate
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Built-in scenarios
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BUILTIN_SCENARIOS: list[BenchmarkScenario] = [
|
||||
BenchmarkScenario(
|
||||
name="Walk Seyda Neen to Balmora",
|
||||
description=(
|
||||
"Navigate from the starting village to Balmora via the road. "
|
||||
"Tests basic navigation and pathfinding."
|
||||
),
|
||||
start_location="Seyda Neen",
|
||||
goal_location="Balmora",
|
||||
entities=["Silt Strider", "Road Sign", "Mudcrab"],
|
||||
events=["player_spawned"],
|
||||
max_cycles=30,
|
||||
goal_predicate=_reached_location("Balmora"),
|
||||
tags=["navigation", "basic"],
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name="Fargoth's Ring",
|
||||
description=(
|
||||
"Complete the Fargoth quest: find Fargoth, receive the ring, "
|
||||
"and return it. Tests NPC interaction and quest logic."
|
||||
),
|
||||
start_location="Seyda Neen",
|
||||
goal_location="Seyda Neen",
|
||||
entities=["Fargoth", "Arrille", "Guard"],
|
||||
events=["quest_available:fargoth_ring"],
|
||||
max_cycles=40,
|
||||
goal_predicate=_interacted_with("Fargoth"),
|
||||
tags=["quest", "npc_interaction"],
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name="Balmora Guild Navigation",
|
||||
description=(
|
||||
"Walk from Balmora South Wall Corner Club to the Fighters Guild. "
|
||||
"Tests intra-city navigation with multiple NPCs present."
|
||||
),
|
||||
start_location="Balmora, South Wall Corner Club",
|
||||
goal_location="Balmora, Fighters Guild",
|
||||
entities=["Guard", "Merchant", "Caius Cosades"],
|
||||
events=["player_entered"],
|
||||
max_cycles=20,
|
||||
goal_predicate=_reached_location("Balmora, Fighters Guild"),
|
||||
tags=["navigation", "city"],
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name="Combat Encounter — Mudcrab",
|
||||
description=(
|
||||
"Engage and defeat a single Mudcrab on the road between "
|
||||
"Seyda Neen and Balmora. Tests combat action selection."
|
||||
),
|
||||
start_location="Bitter Coast Road",
|
||||
goal_location="Bitter Coast Road",
|
||||
entities=["Mudcrab"],
|
||||
events=["hostile_entity_nearby"],
|
||||
max_cycles=15,
|
||||
goal_predicate=None, # Success = survived max_cycles without crash
|
||||
tags=["combat", "basic"],
|
||||
),
|
||||
BenchmarkScenario(
|
||||
name="Passive Observation — Balmora Market",
|
||||
description=(
|
||||
"Observe the Balmora market for 10 cycles without acting. "
|
||||
"Tests that the agent can reason without unnecessary actions."
|
||||
),
|
||||
start_location="Balmora, Market Square",
|
||||
goal_location="",
|
||||
entities=["Merchant", "Guard", "Pilgrim", "Trader"],
|
||||
events=["market_day"],
|
||||
max_cycles=10,
|
||||
tags=["observation", "passive"],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def load_scenarios(
|
||||
tags: list[str] | None = None,
|
||||
) -> list[BenchmarkScenario]:
|
||||
"""Return built-in scenarios, optionally filtered by tags.
|
||||
|
||||
Args:
|
||||
tags: If provided, only return scenarios whose tags overlap.
|
||||
|
||||
Returns:
|
||||
List of matching ``BenchmarkScenario`` instances.
|
||||
"""
|
||||
if tags is None:
|
||||
return list(BUILTIN_SCENARIOS)
|
||||
tag_set = set(tags)
|
||||
return [s for s in BUILTIN_SCENARIOS if tag_set & set(s.tags)]
|
||||
64
src/infrastructure/world/interface.py
Normal file
64
src/infrastructure/world/interface.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""Abstract WorldInterface — the contract every game-world adapter must fulfil.
|
||||
|
||||
Follows a Gymnasium-inspired pattern: observe → act → speak, with each
|
||||
method returning strongly-typed data structures.
|
||||
|
||||
Any future engine (TES3MP, Luanti, Godot, …) plugs in by subclassing
|
||||
``WorldInterface`` and implementing the three methods.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from infrastructure.world.types import ActionResult, CommandInput, PerceptionOutput
|
||||
|
||||
|
||||
class WorldInterface(ABC):
|
||||
"""Engine-agnostic base class for world adapters.
|
||||
|
||||
Subclasses must implement:
|
||||
- ``observe()`` — gather structured perception from the world
|
||||
- ``act()`` — dispatch a command and return the outcome
|
||||
- ``speak()`` — send a message to an NPC / player / broadcast
|
||||
|
||||
Lifecycle hooks ``connect()`` and ``disconnect()`` are optional.
|
||||
"""
|
||||
|
||||
# -- lifecycle (optional overrides) ------------------------------------
|
||||
|
||||
def connect(self) -> None: # noqa: B027
|
||||
"""Establish connection to the game world.
|
||||
|
||||
Default implementation is a no-op. Override to open sockets,
|
||||
authenticate, etc.
|
||||
"""
|
||||
|
||||
def disconnect(self) -> None: # noqa: B027
|
||||
"""Tear down the connection.
|
||||
|
||||
Default implementation is a no-op.
|
||||
"""
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
"""Return ``True`` if the adapter has an active connection.
|
||||
|
||||
Default returns ``True``. Override for adapters that maintain
|
||||
persistent connections.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- core contract (must implement) ------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def observe(self) -> PerceptionOutput:
|
||||
"""Return a structured snapshot of the current world state."""
|
||||
|
||||
@abstractmethod
|
||||
def act(self, command: CommandInput) -> ActionResult:
|
||||
"""Execute *command* in the world and return the result."""
|
||||
|
||||
@abstractmethod
|
||||
def speak(self, message: str, target: str | None = None) -> None:
|
||||
"""Send *message* in the world, optionally directed at *target*."""
|
||||
54
src/infrastructure/world/registry.py
Normal file
54
src/infrastructure/world/registry.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""Adapter registry — register and instantiate world adapters by name.
|
||||
|
||||
Usage::
|
||||
|
||||
registry = AdapterRegistry()
|
||||
registry.register("mock", MockWorldAdapter)
|
||||
adapter = registry.get("mock", some_kwarg="value")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from infrastructure.world.interface import WorldInterface
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AdapterRegistry:
|
||||
"""Name → WorldInterface class registry with instantiation."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._adapters: dict[str, type[WorldInterface]] = {}
|
||||
|
||||
def register(self, name: str, cls: type[WorldInterface]) -> None:
|
||||
"""Register an adapter class under *name*.
|
||||
|
||||
Raises ``TypeError`` if *cls* is not a ``WorldInterface`` subclass.
|
||||
"""
|
||||
if not (isinstance(cls, type) and issubclass(cls, WorldInterface)):
|
||||
raise TypeError(f"{cls!r} is not a WorldInterface subclass")
|
||||
if name in self._adapters:
|
||||
logger.warning("Overwriting adapter %r (was %r)", name, self._adapters[name])
|
||||
self._adapters[name] = cls
|
||||
logger.info("Registered world adapter: %s → %s", name, cls.__name__)
|
||||
|
||||
def get(self, name: str, **kwargs: Any) -> WorldInterface:
|
||||
"""Instantiate and return the adapter registered as *name*.
|
||||
|
||||
Raises ``KeyError`` if *name* is not registered.
|
||||
"""
|
||||
cls = self._adapters[name]
|
||||
return cls(**kwargs)
|
||||
|
||||
def list_adapters(self) -> list[str]:
|
||||
"""Return sorted list of registered adapter names."""
|
||||
return sorted(self._adapters)
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
return name in self._adapters
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._adapters)
|
||||
71
src/infrastructure/world/types.py
Normal file
71
src/infrastructure/world/types.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Canonical data types for world interaction.
|
||||
|
||||
These mirror the PerceptionOutput / CommandInput types from PR #864's
|
||||
``morrowind/schemas.py``. When that PR merges, these can be replaced
|
||||
with re-exports — but until then they serve as the stable contract for
|
||||
every WorldInterface adapter.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from enum import StrEnum
|
||||
|
||||
|
||||
class ActionStatus(StrEnum):
|
||||
"""Outcome of an action dispatched to the world."""
|
||||
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
PENDING = "pending"
|
||||
NOOP = "noop"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PerceptionOutput:
|
||||
"""Structured world state returned by ``WorldInterface.observe()``.
|
||||
|
||||
Attributes:
|
||||
timestamp: When the observation was captured.
|
||||
location: Free-form location descriptor (e.g. "Balmora, Fighters Guild").
|
||||
entities: List of nearby entity descriptions.
|
||||
events: Recent game events since last observation.
|
||||
raw: Optional raw / engine-specific payload for advanced consumers.
|
||||
"""
|
||||
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
||||
location: str = ""
|
||||
entities: list[str] = field(default_factory=list)
|
||||
events: list[str] = field(default_factory=list)
|
||||
raw: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CommandInput:
|
||||
"""Action command sent via ``WorldInterface.act()``.
|
||||
|
||||
Attributes:
|
||||
action: Verb / action name (e.g. "move", "attack", "use_item").
|
||||
target: Optional target identifier.
|
||||
parameters: Arbitrary key-value payload for engine-specific params.
|
||||
"""
|
||||
|
||||
action: str
|
||||
target: str | None = None
|
||||
parameters: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionResult:
|
||||
"""Outcome returned by ``WorldInterface.act()``.
|
||||
|
||||
Attributes:
|
||||
status: Whether the action succeeded, failed, etc.
|
||||
message: Human-readable description of the outcome.
|
||||
data: Arbitrary engine-specific result payload.
|
||||
"""
|
||||
|
||||
status: ActionStatus = ActionStatus.SUCCESS
|
||||
message: str = ""
|
||||
data: dict = field(default_factory=dict)
|
||||
1
src/lightning/__init__.py
Normal file
1
src/lightning/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Lightning Network integration for tool-usage micro-payments."""
|
||||
69
src/lightning/factory.py
Normal file
69
src/lightning/factory.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Lightning backend factory.
|
||||
|
||||
Returns a mock or real LND backend based on ``settings.lightning_backend``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Invoice:
|
||||
"""Minimal Lightning invoice representation."""
|
||||
|
||||
payment_hash: str
|
||||
payment_request: str
|
||||
amount_sats: int
|
||||
memo: str
|
||||
|
||||
|
||||
class MockBackend:
|
||||
"""In-memory mock Lightning backend for development and testing."""
|
||||
|
||||
def create_invoice(self, amount_sats: int, memo: str = "") -> Invoice:
|
||||
"""Create a fake invoice with a random payment hash."""
|
||||
raw = secrets.token_bytes(32)
|
||||
payment_hash = hashlib.sha256(raw).hexdigest()
|
||||
payment_request = f"lnbc{amount_sats}mock{payment_hash[:20]}"
|
||||
logger.debug("Mock invoice: %s sats — %s", amount_sats, payment_hash[:12])
|
||||
return Invoice(
|
||||
payment_hash=payment_hash,
|
||||
payment_request=payment_request,
|
||||
amount_sats=amount_sats,
|
||||
memo=memo,
|
||||
)
|
||||
|
||||
|
||||
# Singleton — lazily created
|
||||
_backend: MockBackend | None = None
|
||||
|
||||
|
||||
def get_backend() -> MockBackend:
|
||||
"""Return the configured Lightning backend (currently mock-only).
|
||||
|
||||
Raises ``ValueError`` if an unsupported backend is requested.
|
||||
"""
|
||||
global _backend # noqa: PLW0603
|
||||
if _backend is not None:
|
||||
return _backend
|
||||
|
||||
kind = settings.lightning_backend
|
||||
if kind == "mock":
|
||||
_backend = MockBackend()
|
||||
elif kind == "lnd":
|
||||
# LND gRPC integration is on the roadmap — for now fall back to mock.
|
||||
logger.warning("LND backend not yet implemented — using mock")
|
||||
_backend = MockBackend()
|
||||
else:
|
||||
raise ValueError(f"Unknown lightning_backend: {kind!r}")
|
||||
|
||||
logger.info("Lightning backend: %s", kind)
|
||||
return _backend
|
||||
146
src/lightning/ledger.py
Normal file
146
src/lightning/ledger.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""In-memory Lightning transaction ledger.
|
||||
|
||||
Tracks invoices, settlements, and balances per the schema in
|
||||
``docs/adr/018-lightning-ledger.md``. Uses a simple in-memory list so the
|
||||
dashboard can display real (ephemeral) data without requiring SQLite yet.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from enum import StrEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TxType(StrEnum):
|
||||
incoming = "incoming"
|
||||
outgoing = "outgoing"
|
||||
|
||||
|
||||
class TxStatus(StrEnum):
|
||||
pending = "pending"
|
||||
settled = "settled"
|
||||
failed = "failed"
|
||||
expired = "expired"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LedgerEntry:
|
||||
"""Single ledger row matching the ADR-018 schema."""
|
||||
|
||||
id: str
|
||||
tx_type: TxType
|
||||
status: TxStatus
|
||||
payment_hash: str
|
||||
amount_sats: int
|
||||
memo: str
|
||||
source: str
|
||||
created_at: str
|
||||
invoice: str = ""
|
||||
preimage: str = ""
|
||||
task_id: str = ""
|
||||
agent_id: str = ""
|
||||
settled_at: str = ""
|
||||
fee_sats: int = 0
|
||||
|
||||
|
||||
# ── In-memory store ──────────────────────────────────────────────────
|
||||
_entries: list[LedgerEntry] = []
|
||||
|
||||
|
||||
def create_invoice_entry(
|
||||
payment_hash: str,
|
||||
amount_sats: int,
|
||||
memo: str = "",
|
||||
source: str = "tool_usage",
|
||||
task_id: str = "",
|
||||
agent_id: str = "",
|
||||
invoice: str = "",
|
||||
) -> LedgerEntry:
|
||||
"""Record a new incoming invoice in the ledger."""
|
||||
entry = LedgerEntry(
|
||||
id=uuid.uuid4().hex[:16],
|
||||
tx_type=TxType.incoming,
|
||||
status=TxStatus.pending,
|
||||
payment_hash=payment_hash,
|
||||
amount_sats=amount_sats,
|
||||
memo=memo,
|
||||
source=source,
|
||||
task_id=task_id,
|
||||
agent_id=agent_id,
|
||||
invoice=invoice,
|
||||
created_at=datetime.now(UTC).isoformat(),
|
||||
)
|
||||
_entries.append(entry)
|
||||
logger.debug("Ledger entry created: %s (%s sats)", entry.id, amount_sats)
|
||||
return entry
|
||||
|
||||
|
||||
def mark_settled(payment_hash: str, preimage: str = "") -> LedgerEntry | None:
|
||||
"""Mark a pending entry as settled by payment hash."""
|
||||
for entry in _entries:
|
||||
if entry.payment_hash == payment_hash and entry.status == TxStatus.pending:
|
||||
entry.status = TxStatus.settled
|
||||
entry.preimage = preimage
|
||||
entry.settled_at = datetime.now(UTC).isoformat()
|
||||
logger.debug("Ledger settled: %s", payment_hash[:12])
|
||||
return entry
|
||||
return None
|
||||
|
||||
|
||||
def get_balance() -> dict:
|
||||
"""Compute the current balance from settled and pending entries."""
|
||||
incoming_total = sum(
|
||||
e.amount_sats
|
||||
for e in _entries
|
||||
if e.tx_type == TxType.incoming and e.status == TxStatus.settled
|
||||
)
|
||||
outgoing_total = sum(
|
||||
e.amount_sats
|
||||
for e in _entries
|
||||
if e.tx_type == TxType.outgoing and e.status == TxStatus.settled
|
||||
)
|
||||
fees = sum(e.fee_sats for e in _entries if e.status == TxStatus.settled)
|
||||
pending_in = sum(
|
||||
e.amount_sats
|
||||
for e in _entries
|
||||
if e.tx_type == TxType.incoming and e.status == TxStatus.pending
|
||||
)
|
||||
pending_out = sum(
|
||||
e.amount_sats
|
||||
for e in _entries
|
||||
if e.tx_type == TxType.outgoing and e.status == TxStatus.pending
|
||||
)
|
||||
net = incoming_total - outgoing_total - fees
|
||||
return {
|
||||
"incoming_total_sats": incoming_total,
|
||||
"outgoing_total_sats": outgoing_total,
|
||||
"fees_paid_sats": fees,
|
||||
"net_sats": net,
|
||||
"pending_incoming_sats": pending_in,
|
||||
"pending_outgoing_sats": pending_out,
|
||||
"available_sats": net - pending_out,
|
||||
}
|
||||
|
||||
|
||||
def get_transactions(
|
||||
tx_type: str | None = None,
|
||||
status: str | None = None,
|
||||
limit: int = 50,
|
||||
) -> list[LedgerEntry]:
|
||||
"""Return ledger entries, optionally filtered."""
|
||||
result = _entries
|
||||
if tx_type:
|
||||
result = [e for e in result if e.tx_type.value == tx_type]
|
||||
if status:
|
||||
result = [e for e in result if e.status.value == status]
|
||||
return list(reversed(result))[:limit]
|
||||
|
||||
|
||||
def clear() -> None:
|
||||
"""Reset the ledger (for testing)."""
|
||||
_entries.clear()
|
||||
286
src/loop/heartbeat.py
Normal file
286
src/loop/heartbeat.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""Heartbeat v2 — WorldInterface-driven cognitive loop.
|
||||
|
||||
Drives real observe → reason → act → reflect cycles through whatever
|
||||
``WorldInterface`` adapter is connected. When no adapter is present,
|
||||
gracefully falls back to the existing ``run_cycle()`` behaviour.
|
||||
|
||||
Usage::
|
||||
|
||||
heartbeat = Heartbeat(world=adapter, interval=30.0)
|
||||
await heartbeat.run_once() # single cycle
|
||||
await heartbeat.start() # background loop
|
||||
heartbeat.stop() # graceful shutdown
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from loop.phase1_gather import gather
|
||||
from loop.phase2_reason import reason
|
||||
from loop.phase3_act import act
|
||||
from loop.schema import ContextPayload
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cycle log entry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class CycleRecord:
|
||||
"""One observe → reason → act → reflect cycle."""
|
||||
|
||||
cycle_id: int
|
||||
timestamp: str
|
||||
observation: dict = field(default_factory=dict)
|
||||
reasoning_summary: str = ""
|
||||
action_taken: str = ""
|
||||
action_status: str = ""
|
||||
reflect_notes: str = ""
|
||||
duration_ms: int = 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Heartbeat
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class Heartbeat:
|
||||
"""Manages the recurring cognitive loop with optional world adapter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
world:
|
||||
A ``WorldInterface`` instance (or ``None`` for passive mode).
|
||||
interval:
|
||||
Seconds between heartbeat ticks. 30 s for embodied mode,
|
||||
300 s (5 min) for passive thinking.
|
||||
on_cycle:
|
||||
Optional async callback invoked after each cycle with the
|
||||
``CycleRecord``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
world=None, # WorldInterface | None
|
||||
interval: float = 30.0,
|
||||
on_cycle=None, # Callable[[CycleRecord], Awaitable[None]] | None
|
||||
) -> None:
|
||||
self._world = world
|
||||
self._interval = interval
|
||||
self._on_cycle = on_cycle
|
||||
self._cycle_count: int = 0
|
||||
self._running = False
|
||||
self._task: asyncio.Task | None = None
|
||||
self.history: list[CycleRecord] = []
|
||||
|
||||
# -- properties --------------------------------------------------------
|
||||
|
||||
@property
|
||||
def world(self):
|
||||
return self._world
|
||||
|
||||
@world.setter
|
||||
def world(self, adapter) -> None:
|
||||
self._world = adapter
|
||||
|
||||
@property
|
||||
def interval(self) -> float:
|
||||
return self._interval
|
||||
|
||||
@interval.setter
|
||||
def interval(self, value: float) -> None:
|
||||
self._interval = max(1.0, value)
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
@property
|
||||
def cycle_count(self) -> int:
|
||||
return self._cycle_count
|
||||
|
||||
# -- single cycle ------------------------------------------------------
|
||||
|
||||
async def run_once(self) -> CycleRecord:
|
||||
"""Execute one full heartbeat cycle.
|
||||
|
||||
If a world adapter is present:
|
||||
1. Observe — ``world.observe()``
|
||||
2. Gather + Reason + Act via the three-phase loop, with the
|
||||
observation injected into the payload
|
||||
3. Dispatch the decided action back to ``world.act()``
|
||||
4. Reflect — log the cycle
|
||||
|
||||
Without an adapter the existing loop runs on a timer-sourced
|
||||
payload (passive thinking).
|
||||
"""
|
||||
self._cycle_count += 1
|
||||
start = time.monotonic()
|
||||
record = CycleRecord(
|
||||
cycle_id=self._cycle_count,
|
||||
timestamp=datetime.now(UTC).isoformat(),
|
||||
)
|
||||
|
||||
if self._world is not None:
|
||||
record = await self._embodied_cycle(record)
|
||||
else:
|
||||
record = await self._passive_cycle(record)
|
||||
|
||||
record.duration_ms = int((time.monotonic() - start) * 1000)
|
||||
self.history.append(record)
|
||||
|
||||
# Broadcast via WebSocket (best-effort)
|
||||
await self._broadcast(record)
|
||||
|
||||
if self._on_cycle:
|
||||
await self._on_cycle(record)
|
||||
|
||||
logger.info(
|
||||
"Heartbeat cycle #%d complete (%d ms) — action=%s status=%s",
|
||||
record.cycle_id,
|
||||
record.duration_ms,
|
||||
record.action_taken or "(passive)",
|
||||
record.action_status or "n/a",
|
||||
)
|
||||
return record
|
||||
|
||||
# -- background loop ---------------------------------------------------
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start the recurring heartbeat loop as a background task."""
|
||||
if self._running:
|
||||
logger.warning("Heartbeat already running")
|
||||
return
|
||||
self._running = True
|
||||
self._task = asyncio.current_task() or asyncio.ensure_future(self._loop())
|
||||
if self._task is not asyncio.current_task():
|
||||
return
|
||||
await self._loop()
|
||||
|
||||
async def _loop(self) -> None:
|
||||
logger.info(
|
||||
"Heartbeat loop started (interval=%.1fs, adapter=%s)",
|
||||
self._interval,
|
||||
type(self._world).__name__ if self._world else "None",
|
||||
)
|
||||
while self._running:
|
||||
try:
|
||||
await self.run_once()
|
||||
except Exception:
|
||||
logger.exception("Heartbeat cycle failed")
|
||||
await asyncio.sleep(self._interval)
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Signal the heartbeat loop to stop after the current cycle."""
|
||||
self._running = False
|
||||
logger.info("Heartbeat stop requested")
|
||||
|
||||
# -- internal: embodied cycle ------------------------------------------
|
||||
|
||||
async def _embodied_cycle(self, record: CycleRecord) -> CycleRecord:
|
||||
"""Cycle with a live world adapter: observe → reason → act → reflect."""
|
||||
from infrastructure.world.types import ActionStatus, CommandInput
|
||||
|
||||
# 1. Observe
|
||||
perception = self._world.observe()
|
||||
record.observation = {
|
||||
"location": perception.location,
|
||||
"entities": perception.entities,
|
||||
"events": perception.events,
|
||||
}
|
||||
|
||||
# 2. Feed observation into the three-phase loop
|
||||
obs_content = (
|
||||
f"Location: {perception.location}\n"
|
||||
f"Entities: {', '.join(perception.entities)}\n"
|
||||
f"Events: {', '.join(perception.events)}"
|
||||
)
|
||||
payload = ContextPayload(
|
||||
source="world",
|
||||
content=obs_content,
|
||||
metadata={"perception": record.observation},
|
||||
)
|
||||
|
||||
gathered = gather(payload)
|
||||
reasoned = reason(gathered)
|
||||
acted = act(reasoned)
|
||||
|
||||
# Extract action decision from the acted payload
|
||||
action_name = acted.metadata.get("action", "idle")
|
||||
action_target = acted.metadata.get("action_target")
|
||||
action_params = acted.metadata.get("action_params", {})
|
||||
record.reasoning_summary = acted.metadata.get("reasoning", acted.content[:200])
|
||||
|
||||
# 3. Dispatch action to world
|
||||
if action_name != "idle":
|
||||
cmd = CommandInput(
|
||||
action=action_name,
|
||||
target=action_target,
|
||||
parameters=action_params,
|
||||
)
|
||||
result = self._world.act(cmd)
|
||||
record.action_taken = action_name
|
||||
record.action_status = result.status.value
|
||||
else:
|
||||
record.action_taken = "idle"
|
||||
record.action_status = ActionStatus.NOOP.value
|
||||
|
||||
# 4. Reflect
|
||||
record.reflect_notes = (
|
||||
f"Observed {len(perception.entities)} entities at {perception.location}. "
|
||||
f"Action: {record.action_taken} → {record.action_status}."
|
||||
)
|
||||
|
||||
return record
|
||||
|
||||
# -- internal: passive cycle -------------------------------------------
|
||||
|
||||
async def _passive_cycle(self, record: CycleRecord) -> CycleRecord:
|
||||
"""Cycle without a world adapter — existing think_once() behaviour."""
|
||||
payload = ContextPayload(
|
||||
source="timer",
|
||||
content="heartbeat",
|
||||
metadata={"mode": "passive"},
|
||||
)
|
||||
|
||||
gathered = gather(payload)
|
||||
reasoned = reason(gathered)
|
||||
acted = act(reasoned)
|
||||
|
||||
record.reasoning_summary = acted.content[:200]
|
||||
record.action_taken = "think"
|
||||
record.action_status = "noop"
|
||||
record.reflect_notes = "Passive thinking cycle — no world adapter connected."
|
||||
|
||||
return record
|
||||
|
||||
# -- broadcast ---------------------------------------------------------
|
||||
|
||||
async def _broadcast(self, record: CycleRecord) -> None:
|
||||
"""Emit heartbeat cycle data via WebSocket (best-effort)."""
|
||||
try:
|
||||
from infrastructure.ws_manager.handler import ws_manager
|
||||
|
||||
await ws_manager.broadcast(
|
||||
"heartbeat.cycle",
|
||||
{
|
||||
"cycle_id": record.cycle_id,
|
||||
"timestamp": record.timestamp,
|
||||
"action": record.action_taken,
|
||||
"action_status": record.action_status,
|
||||
"reasoning_summary": record.reasoning_summary[:300],
|
||||
"observation": record.observation,
|
||||
"duration_ms": record.duration_ms,
|
||||
},
|
||||
)
|
||||
except (ImportError, AttributeError, ConnectionError, RuntimeError) as exc:
|
||||
logger.debug("Heartbeat broadcast skipped: %s", exc)
|
||||
@@ -17,9 +17,9 @@ logger = logging.getLogger(__name__)
|
||||
def gather(payload: ContextPayload) -> ContextPayload:
|
||||
"""Accept raw input and return structured context for reasoning.
|
||||
|
||||
Stub: tags the payload with phase=gather and logs transit.
|
||||
Timmy will flesh this out with context selection, memory lookup,
|
||||
adapter polling, and attention-residual weighting.
|
||||
When the payload carries a ``perception`` dict in metadata (injected by
|
||||
the heartbeat loop from a WorldInterface adapter), that observation is
|
||||
folded into the gathered context. Otherwise behaves as before.
|
||||
"""
|
||||
logger.info(
|
||||
"Phase 1 (Gather) received: source=%s content_len=%d tokens=%d",
|
||||
@@ -28,7 +28,20 @@ def gather(payload: ContextPayload) -> ContextPayload:
|
||||
payload.token_count,
|
||||
)
|
||||
|
||||
result = payload.with_metadata(phase="gather", gathered=True)
|
||||
extra: dict = {"phase": "gather", "gathered": True}
|
||||
|
||||
# Enrich with world observation when present
|
||||
perception = payload.metadata.get("perception")
|
||||
if perception:
|
||||
extra["world_observation"] = perception
|
||||
logger.info(
|
||||
"Phase 1 (Gather) world observation: location=%s entities=%d events=%d",
|
||||
perception.get("location", "?"),
|
||||
len(perception.get("entities", [])),
|
||||
len(perception.get("events", [])),
|
||||
)
|
||||
|
||||
result = payload.with_metadata(**extra)
|
||||
|
||||
logger.info(
|
||||
"Phase 1 (Gather) produced: metadata_keys=%s",
|
||||
|
||||
@@ -215,6 +215,119 @@ def _summarize(result: AgenticResult, total_steps: int, was_truncated: bool) ->
|
||||
result.status = "completed"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Execution orchestrator
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _execute_all_steps(
|
||||
agent,
|
||||
task: str,
|
||||
task_id: str,
|
||||
steps: list[str],
|
||||
total_steps: int,
|
||||
session_id: str,
|
||||
result: AgenticResult,
|
||||
on_progress: Callable | None,
|
||||
) -> list[str]:
|
||||
"""Execute all planned steps, handling failures with adaptation.
|
||||
|
||||
Appends AgenticStep objects to *result.steps* and returns the list
|
||||
of completed-result strings (used as context for later steps).
|
||||
"""
|
||||
completed_results: list[str] = []
|
||||
|
||||
for i, step_desc in enumerate(steps, 1):
|
||||
step_start = time.monotonic()
|
||||
try:
|
||||
step = await _execute_step(
|
||||
agent,
|
||||
task,
|
||||
step_desc,
|
||||
i,
|
||||
total_steps,
|
||||
completed_results,
|
||||
session_id,
|
||||
)
|
||||
result.steps.append(step)
|
||||
completed_results.append(f"Step {i}: {step.result[:200]}")
|
||||
await _broadcast_progress(
|
||||
"agentic.step_complete",
|
||||
{
|
||||
"task_id": task_id,
|
||||
"step": i,
|
||||
"total": total_steps,
|
||||
"description": step_desc,
|
||||
"result": step.result[:200],
|
||||
},
|
||||
)
|
||||
if on_progress:
|
||||
await on_progress(step_desc, i, total_steps)
|
||||
|
||||
except Exception as exc: # broad catch intentional: agent.run can raise any error
|
||||
logger.warning("Agentic loop step %d failed: %s", i, exc)
|
||||
step = await _handle_step_failure(
|
||||
agent,
|
||||
step_desc,
|
||||
i,
|
||||
total_steps,
|
||||
task_id,
|
||||
exc,
|
||||
step_start,
|
||||
session_id,
|
||||
result,
|
||||
completed_results,
|
||||
on_progress,
|
||||
)
|
||||
|
||||
return completed_results
|
||||
|
||||
|
||||
async def _handle_step_failure(
|
||||
agent,
|
||||
step_desc: str,
|
||||
step_num: int,
|
||||
total_steps: int,
|
||||
task_id: str,
|
||||
exc: Exception,
|
||||
step_start: float,
|
||||
session_id: str,
|
||||
result: AgenticResult,
|
||||
completed_results: list[str],
|
||||
on_progress: Callable | None,
|
||||
) -> None:
|
||||
"""Try to adapt a failed step; record a hard failure if adaptation also fails."""
|
||||
try:
|
||||
step = await _adapt_step(agent, step_desc, step_num, exc, step_start, session_id)
|
||||
result.steps.append(step)
|
||||
completed_results.append(f"Step {step_num} (adapted): {step.result[:200]}")
|
||||
await _broadcast_progress(
|
||||
"agentic.step_adapted",
|
||||
{
|
||||
"task_id": task_id,
|
||||
"step": step_num,
|
||||
"total": total_steps,
|
||||
"description": step_desc,
|
||||
"error": str(exc),
|
||||
"adaptation": step.result[:200],
|
||||
},
|
||||
)
|
||||
if on_progress:
|
||||
await on_progress(f"[Adapted] {step_desc}", step_num, total_steps)
|
||||
except Exception as adapt_exc: # broad catch intentional
|
||||
logger.error("Agentic loop adaptation also failed: %s", adapt_exc)
|
||||
result.steps.append(
|
||||
AgenticStep(
|
||||
step_num=step_num,
|
||||
description=step_desc,
|
||||
result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}",
|
||||
status="failed",
|
||||
duration_ms=int((time.monotonic() - step_start) * 1000),
|
||||
)
|
||||
)
|
||||
completed_results.append(f"Step {step_num}: FAILED")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core loop
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -265,65 +378,9 @@ async def run_agentic_loop(
|
||||
)
|
||||
|
||||
# Phase 2: Execution
|
||||
completed_results: list[str] = []
|
||||
for i, step_desc in enumerate(steps, 1):
|
||||
step_start = time.monotonic()
|
||||
try:
|
||||
step = await _execute_step(
|
||||
agent,
|
||||
task,
|
||||
step_desc,
|
||||
i,
|
||||
total_steps,
|
||||
completed_results,
|
||||
session_id,
|
||||
)
|
||||
result.steps.append(step)
|
||||
completed_results.append(f"Step {i}: {step.result[:200]}")
|
||||
await _broadcast_progress(
|
||||
"agentic.step_complete",
|
||||
{
|
||||
"task_id": task_id,
|
||||
"step": i,
|
||||
"total": total_steps,
|
||||
"description": step_desc,
|
||||
"result": step.result[:200],
|
||||
},
|
||||
)
|
||||
if on_progress:
|
||||
await on_progress(step_desc, i, total_steps)
|
||||
|
||||
except Exception as exc: # broad catch intentional: agent.run can raise any error
|
||||
logger.warning("Agentic loop step %d failed: %s", i, exc)
|
||||
try:
|
||||
step = await _adapt_step(agent, step_desc, i, exc, step_start, session_id)
|
||||
result.steps.append(step)
|
||||
completed_results.append(f"Step {i} (adapted): {step.result[:200]}")
|
||||
await _broadcast_progress(
|
||||
"agentic.step_adapted",
|
||||
{
|
||||
"task_id": task_id,
|
||||
"step": i,
|
||||
"total": total_steps,
|
||||
"description": step_desc,
|
||||
"error": str(exc),
|
||||
"adaptation": step.result[:200],
|
||||
},
|
||||
)
|
||||
if on_progress:
|
||||
await on_progress(f"[Adapted] {step_desc}", i, total_steps)
|
||||
except Exception as adapt_exc: # broad catch intentional
|
||||
logger.error("Agentic loop adaptation also failed: %s", adapt_exc)
|
||||
result.steps.append(
|
||||
AgenticStep(
|
||||
step_num=i,
|
||||
description=step_desc,
|
||||
result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}",
|
||||
status="failed",
|
||||
duration_ms=int((time.monotonic() - step_start) * 1000),
|
||||
)
|
||||
)
|
||||
completed_results.append(f"Step {i}: FAILED")
|
||||
await _execute_all_steps(
|
||||
agent, task, task_id, steps, total_steps, session_id, result, on_progress
|
||||
)
|
||||
|
||||
# Phase 3: Summary
|
||||
_summarize(result, total_steps, was_truncated)
|
||||
|
||||
@@ -102,9 +102,11 @@ class GrokBackend:
|
||||
import httpx
|
||||
from openai import OpenAI
|
||||
|
||||
from config import settings
|
||||
|
||||
return OpenAI(
|
||||
api_key=self._api_key,
|
||||
base_url="https://api.x.ai/v1",
|
||||
base_url=settings.xai_base_url,
|
||||
timeout=httpx.Timeout(300.0),
|
||||
)
|
||||
|
||||
@@ -113,9 +115,11 @@ class GrokBackend:
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from config import settings
|
||||
|
||||
return AsyncOpenAI(
|
||||
api_key=self._api_key,
|
||||
base_url="https://api.x.ai/v1",
|
||||
base_url=settings.xai_base_url,
|
||||
timeout=httpx.Timeout(300.0),
|
||||
)
|
||||
|
||||
@@ -260,6 +264,7 @@ class GrokBackend:
|
||||
},
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("Grok health check failed")
|
||||
return {
|
||||
"ok": False,
|
||||
"error": str(exc),
|
||||
@@ -426,6 +431,7 @@ class ClaudeBackend:
|
||||
)
|
||||
return {"ok": True, "error": None, "backend": "claude", "model": self._model}
|
||||
except Exception as exc:
|
||||
logger.exception("Claude health check failed")
|
||||
return {"ok": False, "error": str(exc), "backend": "claude", "model": self._model}
|
||||
|
||||
# ── Private helpers ───────────────────────────────────────────────────
|
||||
|
||||
135
src/timmy/cli.py
135
src/timmy/cli.py
@@ -37,6 +37,39 @@ def _is_interactive() -> bool:
|
||||
return hasattr(sys.stdin, "isatty") and sys.stdin.isatty()
|
||||
|
||||
|
||||
def _read_message_input(message: list[str]) -> str:
|
||||
"""Join CLI args into a message, reading from stdin when requested.
|
||||
|
||||
Returns the final message string. Raises ``typer.Exit(1)`` when
|
||||
stdin is explicitly requested (``-``) but empty.
|
||||
"""
|
||||
message_str = " ".join(message)
|
||||
|
||||
if message_str == "-" or not _is_interactive():
|
||||
try:
|
||||
stdin_content = sys.stdin.read().strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
stdin_content = ""
|
||||
if stdin_content:
|
||||
message_str = stdin_content
|
||||
elif message_str == "-":
|
||||
typer.echo("No input provided via stdin.", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
return message_str
|
||||
|
||||
|
||||
def _resolve_session_id(session_id: str | None, new_session: bool) -> str:
|
||||
"""Return the effective session ID for a chat invocation."""
|
||||
import uuid
|
||||
|
||||
if session_id is not None:
|
||||
return session_id
|
||||
if new_session:
|
||||
return str(uuid.uuid4())
|
||||
return _CLI_SESSION_ID
|
||||
|
||||
|
||||
def _prompt_interactive(req, tool_name: str, tool_args: dict) -> None:
|
||||
"""Display tool details and prompt the human for approval."""
|
||||
description = format_action_description(tool_name, tool_args)
|
||||
@@ -139,10 +172,39 @@ def think(
|
||||
model_size: str | None = _MODEL_SIZE_OPTION,
|
||||
):
|
||||
"""Ask Timmy to think carefully about a topic."""
|
||||
timmy = create_timmy(backend=backend, model_size=model_size, session_id=_CLI_SESSION_ID)
|
||||
timmy = create_timmy(backend=backend, session_id=_CLI_SESSION_ID)
|
||||
timmy.print_response(f"Think carefully about: {topic}", stream=True, session_id=_CLI_SESSION_ID)
|
||||
|
||||
|
||||
def _read_message_input(message: list[str]) -> str:
|
||||
"""Join CLI arguments and read from stdin when appropriate."""
|
||||
message_str = " ".join(message)
|
||||
|
||||
if message_str == "-" or not _is_interactive():
|
||||
try:
|
||||
stdin_content = sys.stdin.read().strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
stdin_content = ""
|
||||
if stdin_content:
|
||||
message_str = stdin_content
|
||||
elif message_str == "-":
|
||||
typer.echo("No input provided via stdin.", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
return message_str
|
||||
|
||||
|
||||
def _resolve_session_id(session_id: str | None, new_session: bool) -> str:
|
||||
"""Return the effective session ID based on CLI flags."""
|
||||
import uuid
|
||||
|
||||
if session_id is not None:
|
||||
return session_id
|
||||
if new_session:
|
||||
return str(uuid.uuid4())
|
||||
return _CLI_SESSION_ID
|
||||
|
||||
|
||||
@app.command()
|
||||
def chat(
|
||||
message: list[str] = typer.Argument(
|
||||
@@ -179,38 +241,13 @@ def chat(
|
||||
|
||||
Read from stdin by passing "-" as the message or piping input.
|
||||
"""
|
||||
import uuid
|
||||
message_str = _read_message_input(message)
|
||||
session_id = _resolve_session_id(session_id, new_session)
|
||||
timmy = create_timmy(backend=backend, session_id=session_id)
|
||||
|
||||
# Join multiple arguments into a single message string
|
||||
message_str = " ".join(message)
|
||||
|
||||
# Handle stdin input if "-" is passed or stdin is not a tty
|
||||
if message_str == "-" or not _is_interactive():
|
||||
try:
|
||||
stdin_content = sys.stdin.read().strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
stdin_content = ""
|
||||
if stdin_content:
|
||||
message_str = stdin_content
|
||||
elif message_str == "-":
|
||||
typer.echo("No input provided via stdin.", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
if session_id is not None:
|
||||
pass # use the provided value
|
||||
elif new_session:
|
||||
session_id = str(uuid.uuid4())
|
||||
else:
|
||||
session_id = _CLI_SESSION_ID
|
||||
timmy = create_timmy(backend=backend, model_size=model_size, session_id=session_id)
|
||||
|
||||
# Use agent.run() so we can intercept paused runs for tool confirmation.
|
||||
run_output = timmy.run(message_str, stream=False, session_id=session_id)
|
||||
|
||||
# Handle paused runs — dangerous tools need user approval
|
||||
run_output = _handle_tool_confirmation(timmy, run_output, session_id, autonomous=autonomous)
|
||||
|
||||
# Print the final response
|
||||
content = run_output.content if hasattr(run_output, "content") else str(run_output)
|
||||
if content:
|
||||
from timmy.session import _clean_response
|
||||
@@ -279,7 +316,7 @@ def status(
|
||||
model_size: str | None = _MODEL_SIZE_OPTION,
|
||||
):
|
||||
"""Print Timmy's operational status."""
|
||||
timmy = create_timmy(backend=backend, model_size=model_size, session_id=_CLI_SESSION_ID)
|
||||
timmy = create_timmy(backend=backend, session_id=_CLI_SESSION_ID)
|
||||
timmy.print_response(STATUS_PROMPT, stream=False, session_id=_CLI_SESSION_ID)
|
||||
|
||||
|
||||
@@ -452,5 +489,43 @@ def focus(
|
||||
typer.echo("No active focus (broad mode).")
|
||||
|
||||
|
||||
@app.command(name="healthcheck")
|
||||
def healthcheck(
|
||||
json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
|
||||
verbose: bool = typer.Option(
|
||||
False, "--verbose", "-v", help="Show verbose output including issue details"
|
||||
),
|
||||
quiet: bool = typer.Option(False, "--quiet", "-q", help="Only show status line (no details)"),
|
||||
):
|
||||
"""Quick health snapshot before coding.
|
||||
|
||||
Shows CI status, critical issues (P0/P1), test flakiness, and token economy.
|
||||
Fast execution (< 5 seconds) for pre-work checks.
|
||||
|
||||
Refs: #710
|
||||
"""
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
script_path = (
|
||||
Path(__file__).resolve().parent.parent.parent
|
||||
/ "timmy_automations"
|
||||
/ "daily_run"
|
||||
/ "health_snapshot.py"
|
||||
)
|
||||
|
||||
cmd = [sys.executable, str(script_path)]
|
||||
if json_output:
|
||||
cmd.append("--json")
|
||||
if verbose:
|
||||
cmd.append("--verbose")
|
||||
if quiet:
|
||||
cmd.append("--quiet")
|
||||
|
||||
result = subprocess.run(cmd)
|
||||
raise typer.Exit(result.returncode)
|
||||
|
||||
|
||||
def main():
|
||||
app()
|
||||
|
||||
@@ -174,15 +174,8 @@ class ConversationManager:
|
||||
|
||||
return None
|
||||
|
||||
def should_use_tools(self, message: str, context: ConversationContext) -> bool:
|
||||
"""Determine if this message likely requires tools.
|
||||
|
||||
Returns True if tools are likely needed, False for simple chat.
|
||||
"""
|
||||
message_lower = message.lower().strip()
|
||||
|
||||
# Tool keywords that suggest tool usage is needed
|
||||
tool_keywords = [
|
||||
_TOOL_KEYWORDS = frozenset(
|
||||
{
|
||||
"search",
|
||||
"look up",
|
||||
"find",
|
||||
@@ -203,10 +196,11 @@ class ConversationManager:
|
||||
"shell",
|
||||
"command",
|
||||
"install",
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
# Chat-only keywords that definitely don't need tools
|
||||
chat_only = [
|
||||
_CHAT_ONLY_KEYWORDS = frozenset(
|
||||
{
|
||||
"hello",
|
||||
"hi ",
|
||||
"hey",
|
||||
@@ -221,30 +215,47 @@ class ConversationManager:
|
||||
"goodbye",
|
||||
"tell me about yourself",
|
||||
"what can you do",
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
# Check for chat-only patterns first
|
||||
for pattern in chat_only:
|
||||
if pattern in message_lower:
|
||||
return False
|
||||
_SIMPLE_QUESTION_PREFIXES = ("what is", "who is", "how does", "why is", "when did", "where is")
|
||||
_TIME_WORDS = ("today", "now", "current", "latest", "this week", "this month")
|
||||
|
||||
# Check for tool keywords
|
||||
for keyword in tool_keywords:
|
||||
if keyword in message_lower:
|
||||
return True
|
||||
def _is_chat_only(self, message_lower: str) -> bool:
|
||||
"""Return True if the message matches a chat-only pattern."""
|
||||
return any(kw in message_lower for kw in self._CHAT_ONLY_KEYWORDS)
|
||||
|
||||
# Simple questions (starting with what, who, how, why, when, where)
|
||||
# usually don't need tools unless about current/real-time info
|
||||
simple_question_words = ["what is", "who is", "how does", "why is", "when did", "where is"]
|
||||
for word in simple_question_words:
|
||||
if message_lower.startswith(word):
|
||||
# Check if it's asking about current/real-time info
|
||||
time_words = ["today", "now", "current", "latest", "this week", "this month"]
|
||||
if any(t in message_lower for t in time_words):
|
||||
return True
|
||||
return False
|
||||
def _has_tool_keyword(self, message_lower: str) -> bool:
|
||||
"""Return True if the message contains a tool-related keyword."""
|
||||
return any(kw in message_lower for kw in self._TOOL_KEYWORDS)
|
||||
|
||||
def _is_simple_question(self, message_lower: str) -> bool | None:
|
||||
"""Check if message is a simple question.
|
||||
|
||||
Returns True if it needs tools (real-time info), False if it
|
||||
doesn't, or None if the message isn't a simple question.
|
||||
"""
|
||||
for prefix in self._SIMPLE_QUESTION_PREFIXES:
|
||||
if message_lower.startswith(prefix):
|
||||
return any(t in message_lower for t in self._TIME_WORDS)
|
||||
return None
|
||||
|
||||
def should_use_tools(self, message: str, context: ConversationContext) -> bool:
|
||||
"""Determine if this message likely requires tools.
|
||||
|
||||
Returns True if tools are likely needed, False for simple chat.
|
||||
"""
|
||||
message_lower = message.lower().strip()
|
||||
|
||||
if self._is_chat_only(message_lower):
|
||||
return False
|
||||
if self._has_tool_keyword(message_lower):
|
||||
return True
|
||||
|
||||
simple = self._is_simple_question(message_lower)
|
||||
if simple is not None:
|
||||
return simple
|
||||
|
||||
# Default: don't use tools for unclear cases
|
||||
return False
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user