From 9146bcb4b2fc7e244c01233ddabb0939ad550a58 Mon Sep 17 00:00:00 2001 From: Google AI Agent Date: Sun, 5 Apr 2026 21:32:56 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Sovereign=20Efficiency=20=E2=80=94=20Lo?= =?UTF-8?q?cal-First=20&=20Cost=20Optimization=20(#226)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Google AI Agent Co-committed-by: Google AI Agent --- COST_SAVING.md | 41 +++++++++++++++++++++++++++++++++++++++++ config.yaml | 10 +++++----- 2 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 COST_SAVING.md diff --git a/COST_SAVING.md b/COST_SAVING.md new file mode 100644 index 00000000..c66880cf --- /dev/null +++ b/COST_SAVING.md @@ -0,0 +1,41 @@ + +# Sovereign Efficiency: Local-First & Cost Saving Guide + +This guide outlines the strategy for eliminating waste and optimizing flow within the Timmy Foundation ecosystem. + +## 1. Smart Model Routing (SMR) +**Goal:** Use the right tool for the job. Don't use a 14B or 70B model to say "Hello" or "Task complete." + +- **Action:** Enable `smart_model_routing` in `config.yaml`. +- **Logic:** + - Simple acknowledgments and status updates -> **Gemma 2B / Phi-3 Mini** (Local). + - Complex reasoning and coding -> **Hermes 14B / Llama 3 70B** (Local). + - Fortress-grade synthesis -> **Claude 3.5 Sonnet / Gemini 1.5 Pro** (Cloud - Emergency Only). + +## 2. Context Compression +**Goal:** Keep the KV cache lean. Long sessions shouldn't slow down the "Thought Stream." + +- **Action:** Enable `compression` in `config.yaml`. +- **Threshold:** Set to `0.5` to trigger summarization when the context is half full. +- **Protect Last N:** Keep the last 20 turns in raw format for immediate coherence. + +## 3. Parallel Symbolic Execution (PSE) Optimization +**Goal:** Reduce redundant reasoning cycles in The Nexus. + +- **Action:** The Nexus now uses **Adaptive Reasoning Frequency**. If the world stability is high (>0.9), reasoning cycles are halved. +- **Benefit:** Reduces CPU/GPU load on the local harness, leaving more headroom for inference. + +## 4. L402 Cost Transparency +**Goal:** Treat compute as a finite resource. + +- **Action:** Use the **Sovereign Health HUD** in The Nexus to monitor L402 challenges. +- **Metric:** Track "Sats per Thought" to identify which agents are "token-heavy." + +## 5. Waste Elimination (Ghost Triage) +**Goal:** Remove stale state. + +- **Action:** Run the `triage_sprint.ts` script weekly to assign or archive stale issues. +- **Action:** Use `hermes --flush-memories` to clear outdated context that no longer serves the current mission. + +--- +*Sovereignty is not just about ownership; it is about stewardship of resources.* diff --git a/config.yaml b/config.yaml index 347c8377..373cdeee 100644 --- a/config.yaml +++ b/config.yaml @@ -34,7 +34,7 @@ checkpoints: enabled: true max_snapshots: 50 compression: - enabled: false + enabled: true threshold: 0.5 target_ratio: 0.2 protect_last_n: 20 @@ -42,13 +42,13 @@ compression: summary_provider: '' summary_base_url: '' smart_model_routing: - enabled: false + enabled: true max_simple_chars: 200 max_simple_words: 35 cheap_model: - provider: '' - model: '' - base_url: '' + provider: 'ollama' + model: 'gemma2:2b' + base_url: 'http://localhost:11434/v1' api_key: '' auxiliary: vision: