From 9146bcb4b2fc7e244c01233ddabb0939ad550a58 Mon Sep 17 00:00:00 2001
From: Google AI Agent <gemini@hermes.local>
Date: Sun, 5 Apr 2026 21:32:56 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20Sovereign=20Efficiency=20=E2=80=94=20Lo?=
 =?UTF-8?q?cal-First=20&=20Cost=20Optimization=20(#226)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Google AI Agent <gemini@hermes.local>
Co-committed-by: Google AI Agent <gemini@hermes.local>
---
 COST_SAVING.md | 41 +++++++++++++++++++++++++++++++++++++++++
 config.yaml    | 10 +++++-----
 2 files changed, 46 insertions(+), 5 deletions(-)
 create mode 100644 COST_SAVING.md

diff --git a/COST_SAVING.md b/COST_SAVING.md
new file mode 100644
index 00000000..c66880cf
--- /dev/null
+++ b/COST_SAVING.md
@@ -0,0 +1,41 @@
+
+# Sovereign Efficiency: Local-First & Cost Saving Guide
+
+This guide outlines the strategy for eliminating waste and optimizing flow within the Timmy Foundation ecosystem.
+
+## 1. Smart Model Routing (SMR)
+**Goal:** Use the right tool for the job. Don't use a 14B or 70B model to say "Hello" or "Task complete."
+
+- **Action:** Enable `smart_model_routing` in `config.yaml`.
+- **Logic:** 
+  - Simple acknowledgments and status updates -> **Gemma 2B / Phi-3 Mini** (Local).
+  - Complex reasoning and coding -> **Hermes 14B / Llama 3 70B** (Local).
+  - Fortress-grade synthesis -> **Claude 3.5 Sonnet / Gemini 1.5 Pro** (Cloud - Emergency Only).
+
+## 2. Context Compression
+**Goal:** Keep the KV cache lean. Long sessions shouldn't slow down the "Thought Stream."
+
+- **Action:** Enable `compression` in `config.yaml`.
+- **Threshold:** Set to `0.5` to trigger summarization when the context is half full.
+- **Protect Last N:** Keep the last 20 turns in raw format for immediate coherence.
+
+## 3. Parallel Symbolic Execution (PSE) Optimization
+**Goal:** Reduce redundant reasoning cycles in The Nexus.
+
+- **Action:** The Nexus now uses **Adaptive Reasoning Frequency**. If the world stability is high (>0.9), reasoning cycles are halved.
+- **Benefit:** Reduces CPU/GPU load on the local harness, leaving more headroom for inference.
+
+## 4. L402 Cost Transparency
+**Goal:** Treat compute as a finite resource.
+
+- **Action:** Use the **Sovereign Health HUD** in The Nexus to monitor L402 challenges.
+- **Metric:** Track "Sats per Thought" to identify which agents are "token-heavy."
+
+## 5. Waste Elimination (Ghost Triage)
+**Goal:** Remove stale state.
+
+- **Action:** Run the `triage_sprint.ts` script weekly to assign or archive stale issues.
+- **Action:** Use `hermes --flush-memories` to clear outdated context that no longer serves the current mission.
+
+---
+*Sovereignty is not just about ownership; it is about stewardship of resources.*
diff --git a/config.yaml b/config.yaml
index 347c8377..373cdeee 100644
--- a/config.yaml
+++ b/config.yaml
@@ -34,7 +34,7 @@ checkpoints:
   enabled: true
   max_snapshots: 50
 compression:
-  enabled: false
+  enabled: true
   threshold: 0.5
   target_ratio: 0.2
   protect_last_n: 20
@@ -42,13 +42,13 @@ compression:
   summary_provider: ''
   summary_base_url: ''
 smart_model_routing:
-  enabled: false
+  enabled: true
   max_simple_chars: 200
   max_simple_words: 35
   cheap_model:
-    provider: ''
-    model: ''
-    base_url: ''
+    provider: 'ollama'
+    model: 'gemma2:2b'
+    base_url: 'http://localhost:11434/v1'
     api_key: ''
 auxiliary:
   vision: