From 317140efcf493e50856333f0278b251d3e8aa575 Mon Sep 17 00:00:00 2001 From: Google AI Agent Date: Sun, 5 Apr 2026 21:38:56 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Frontier=20Local=20Agenda=20=E2=80=94?= =?UTF-8?q?=20Gemma=20Scout=20&=20Local=20RAG=20(#227)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Google AI Agent Co-committed-by: Google AI Agent --- FRONTIER_LOCAL.md | 30 ++++++++++++++++++++++++++++++ config.yaml | 11 ++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 FRONTIER_LOCAL.md diff --git a/FRONTIER_LOCAL.md b/FRONTIER_LOCAL.md new file mode 100644 index 00000000..8e21c07f --- /dev/null +++ b/FRONTIER_LOCAL.md @@ -0,0 +1,30 @@ + +# The Frontier Local Agenda: Technical Standards v1.0 + +This document defines the "Frontier Local" agenda — the technical strategy for achieving sovereign, high-performance intelligence on consumer hardware. + +## 1. The Multi-Layered Mind (MLM) +We do not rely on a single "God Model." We use a hierarchy of local intelligence: + +- **Reflex Layer (Gemma 2B):** Instantaneous tactical decisions, input classification, and simple acknowledgments. Latency: <100ms. +- **Reasoning Layer (Hermes 14B / Llama 3 8B):** General-purpose problem solving, coding, and tool use. Latency: <1s. +- **Synthesis Layer (Llama 3 70B / Qwen 72B):** Deep architectural planning, creative synthesis, and complex debugging. Latency: <5s. + +## 2. Local-First RAG (Retrieval Augmented Generation) +Sovereignty requires that your memories stay on your disk. + +- **Embedding:** Use `nomic-embed-text` or `all-minilm` locally via Ollama. +- **Vector Store:** Use a local instance of ChromaDB or LanceDB. +- **Privacy:** Zero data leaves the local network for indexing or retrieval. + +## 3. Speculative Decoding +Where supported by the harness (e.g., llama.cpp), use Gemma 2B as a draft model for larger Hermes/Llama models to achieve 2x-3x speedups in token generation. + +## 4. The "Gemma Scout" Protocol +Gemma 2B is our "Scout." It pre-processes every user request to: +1. Detect PII (Personally Identifiable Information) for redaction. +2. Determine if the request requires the "Reasoning Layer" or can be handled by the "Reflex Layer." +3. Extract keywords for local memory retrieval. + +--- +*Intelligence is a utility. Sovereignty is a right. The Frontier is Local.* diff --git a/config.yaml b/config.yaml index 373cdeee..9305859a 100644 --- a/config.yaml +++ b/config.yaml @@ -20,7 +20,12 @@ terminal: modal_image: nikolaik/python-nodejs:python3.11-nodejs20 daytona_image: nikolaik/python-nodejs:python3.11-nodejs20 container_cpu: 1 - container_memory: 5120 + container_embeddings: + provider: ollama + model: nomic-embed-text + base_url: http://localhost:11434/v1 + +memory: 5120 container_disk: 51200 container_persistent: true docker_volumes: [] @@ -43,8 +48,8 @@ compression: summary_base_url: '' smart_model_routing: enabled: true - max_simple_chars: 200 - max_simple_words: 35 + max_simple_chars: 400 + max_simple_words: 75 cheap_model: provider: 'ollama' model: 'gemma2:2b'