From 317140efcf493e50856333f0278b251d3e8aa575 Mon Sep 17 00:00:00 2001
From: Google AI Agent <gemini@hermes.local>
Date: Sun, 5 Apr 2026 21:38:56 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20Frontier=20Local=20Agenda=20=E2=80=94?=
 =?UTF-8?q?=20Gemma=20Scout=20&=20Local=20RAG=20(#227)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Google AI Agent <gemini@hermes.local>
Co-committed-by: Google AI Agent <gemini@hermes.local>
---
 FRONTIER_LOCAL.md | 30 ++++++++++++++++++++++++++++++
 config.yaml       | 11 ++++++++---
 2 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 FRONTIER_LOCAL.md

diff --git a/FRONTIER_LOCAL.md b/FRONTIER_LOCAL.md
new file mode 100644
index 00000000..8e21c07f
--- /dev/null
+++ b/FRONTIER_LOCAL.md
@@ -0,0 +1,30 @@
+
+# The Frontier Local Agenda: Technical Standards v1.0
+
+This document defines the "Frontier Local" agenda — the technical strategy for achieving sovereign, high-performance intelligence on consumer hardware.
+
+## 1. The Multi-Layered Mind (MLM)
+We do not rely on a single "God Model." We use a hierarchy of local intelligence:
+
+- **Reflex Layer (Gemma 2B):** Instantaneous tactical decisions, input classification, and simple acknowledgments. Latency: <100ms.
+- **Reasoning Layer (Hermes 14B / Llama 3 8B):** General-purpose problem solving, coding, and tool use. Latency: <1s.
+- **Synthesis Layer (Llama 3 70B / Qwen 72B):** Deep architectural planning, creative synthesis, and complex debugging. Latency: <5s.
+
+## 2. Local-First RAG (Retrieval Augmented Generation)
+Sovereignty requires that your memories stay on your disk.
+
+- **Embedding:** Use `nomic-embed-text` or `all-minilm` locally via Ollama.
+- **Vector Store:** Use a local instance of ChromaDB or LanceDB.
+- **Privacy:** Zero data leaves the local network for indexing or retrieval.
+
+## 3. Speculative Decoding
+Where supported by the harness (e.g., llama.cpp), use Gemma 2B as a draft model for larger Hermes/Llama models to achieve 2x-3x speedups in token generation.
+
+## 4. The "Gemma Scout" Protocol
+Gemma 2B is our "Scout." It pre-processes every user request to:
+1. Detect PII (Personally Identifiable Information) for redaction.
+2. Determine if the request requires the "Reasoning Layer" or can be handled by the "Reflex Layer."
+3. Extract keywords for local memory retrieval.
+
+---
+*Intelligence is a utility. Sovereignty is a right. The Frontier is Local.*
diff --git a/config.yaml b/config.yaml
index 373cdeee..9305859a 100644
--- a/config.yaml
+++ b/config.yaml
@@ -20,7 +20,12 @@ terminal:
   modal_image: nikolaik/python-nodejs:python3.11-nodejs20
   daytona_image: nikolaik/python-nodejs:python3.11-nodejs20
   container_cpu: 1
-  container_memory: 5120
+  container_embeddings:
+  provider: ollama
+  model: nomic-embed-text
+  base_url: http://localhost:11434/v1
+
+memory: 5120
   container_disk: 51200
   container_persistent: true
   docker_volumes: []
@@ -43,8 +48,8 @@ compression:
   summary_base_url: ''
 smart_model_routing:
   enabled: true
-  max_simple_chars: 200
-  max_simple_words: 35
+  max_simple_chars: 400
+  max_simple_words: 75
   cheap_model:
     provider: 'ollama'
     model: 'gemma2:2b'