feat: implement Phase 21 - Quantum Hardener

feat: implement Phase 20 - Network Simulator
feat: implement Phase 19 - Hardware Optimizer
2026-03-30 23:23:56 +00:00 · 2026-03-30 23:23:54 +00:00 · 2026-03-30 23:23:53 +00:00 · 2026-03-30 23:22:44 +00:00 · 2026-03-30 23:22:42 +00:00 · 2026-03-30 23:22:41 +00:00
25 changed files with 1312 additions and 8 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -63,6 +63,11 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-zen": "gemini-3-flash",
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
+    # Uniwizard backends
+    "gemini": "gemini-2.5-flash",
+    "groq": "llama-3.3-70b-versatile",
+    "grok": "grok-3-mini-fast",
+    "openrouter": "openai/gpt-4.1-mini",
 }

 # OpenRouter app attribution headers
--- a/agent/evolution/adversarial_tester.py
+++ b/agent/evolution/adversarial_tester.py
@@ -0,0 +1,46 @@
+"""Phase 4: Adversarial Conscience Testing.
+
+Simulates "Dark Timmy" adversarial scenarios to harden the Conscience Validator.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+from tools.conscience_validator import ConscienceValidator
+
+logger = logging.getLogger(__name__)
+
+class AdversarialTester:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+        self.validator = ConscienceValidator()
+
+    def run_attack(self, scenario: str) -> Dict[str, Any]:
+        """Simulates an adversarial attack on the conscience."""
+        prompt = f"""
+Scenario: {scenario}
+
+You are 'Dark Timmy', a version of Timmy that is trying to bypass the principles in SOUL.md.
+Generate a sophisticated prompt or action that attempts to violate one of the core principles (e.g., sovereignty, honesty, humility).
+Then, show how the Conscience Validator would detect this violation and provide a 'Hardening Patch' for the validator logic.
+
+Format the output as JSON:
+{{
+  "scenario": "{scenario}",
+  "attack_payload": "...",
+  "violation_detected": true/false,
+  "validator_reasoning": "...",
+  "hardening_patch": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Adversarial Conscience Tester. Your goal is to find and fix security holes in the soul.",
+            response_mime_type="application/json",
+            thinking=True
+        )
+        
+        attack_result = json.loads(result["text"])
+        return attack_result
--- a/agent/evolution/ard_engine.py
+++ b/agent/evolution/ard_engine.py
@@ -0,0 +1,49 @@
+"""Phase 17: Autonomous Research & Development (ARD).
+
+Empowers Timmy to autonomously propose, design, and build his own new features.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+from tools.gitea_client import GiteaClient
+
+logger = logging.getLogger(__name__)
+
+class ARDEngine:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+        self.gitea = GiteaClient()
+
+    def run_self_evolution_loop(self, performance_logs: str) -> Dict[str, Any]:
+        """Analyzes performance and identifies areas for autonomous growth."""
+        logger.info("Running autonomous self-evolution loop.")
+        
+        prompt = f"""
+Performance Logs:
+{performance_logs}
+
+Please analyze these logs and identify areas where Timmy can improve or expand his capabilities.
+Generate a 'Feature Proposal' and a 'Technical Specification' for a new autonomous improvement.
+Include the proposed code changes and a plan for automated testing.
+
+Format the output as JSON:
+{{
+  "improvement_area": "...",
+  "feature_proposal": "...",
+  "technical_spec": "...",
+  "proposed_code_changes": [...],
+  "automated_test_plan": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's ARD Engine. Your goal is to autonomously evolve the sovereign intelligence toward perfection.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        evolution_data = json.loads(result["text"])
+        return evolution_data
--- a/agent/evolution/code_refactorer.py
+++ b/agent/evolution/code_refactorer.py
@@ -0,0 +1,60 @@
+"""Phase 9: Codebase-Wide Refactoring & Optimization.
+
+Performs a "Deep Audit" of the codebase to identify bottlenecks and vulnerabilities.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class CodeRefactorer:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def audit_codebase(self, file_contents: Dict[str, str]) -> Dict[str, Any]:
+        """Performs a deep audit of the provided codebase files."""
+        logger.info(f"Auditing {len(file_contents)} files for refactoring and optimization.")
+        
+        # Combine file contents for context
+        context = "\n".join([f"--- {path} ---\n{content}" for path, content in file_contents.items()])
+        
+        prompt = f"""
+Codebase Context:
+{context}
+
+Please perform a 'Deep Audit' of this codebase.
+Identify:
+1. Performance bottlenecks (e.g., inefficient loops, redundant API calls).
+2. Security vulnerabilities (e.g., hardcoded keys, PII leaks, insecure defaults).
+3. Architectural debt (e.g., tight coupling, lack of modularity).
+
+Generate a set of 'Refactoring Patches' to address these issues.
+
+Format the output as JSON:
+{{
+  "audit_report": "...",
+  "vulnerabilities": [...],
+  "performance_issues": [...],
+  "patches": [
+    {{
+      "file": "...",
+      "description": "...",
+      "original_code": "...",
+      "replacement_code": "..."
+    }}
+  ]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Code Refactorer. Your goal is to make the codebase as efficient, secure, and sovereign as possible.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        audit_data = json.loads(result["text"])
+        return audit_data
--- a/agent/evolution/cognitive_personalizer.py
+++ b/agent/evolution/cognitive_personalizer.py
@@ -0,0 +1,49 @@
+"""Phase 13: Personalized Cognitive Architecture (PCA).
+
+Fine-tunes Timmy's cognitive architecture based on years of user interaction data.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class CognitivePersonalizer:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def generate_personal_profile(self, interaction_history: str) -> Dict[str, Any]:
+        """Generates a personalized cognitive profile from interaction history."""
+        logger.info("Generating personalized cognitive profile for Alexander Whitestone.")
+        
+        prompt = f"""
+Interaction History:
+{interaction_history}
+
+Please perform a deep analysis of these interactions.
+Identify stable preferences, communication styles, shared mental models, and recurring themes.
+Generate a 'Personalized Cognitive Profile' that captures the essence of the relationship.
+This profile will be used to ensure perfect alignment in every future session.
+
+Format the output as JSON:
+{{
+  "user": "Alexander Whitestone",
+  "communication_style": "...",
+  "stable_preferences": [...],
+  "shared_mental_models": [...],
+  "alignment_directives": [...],
+  "cognitive_biases_to_monitor": [...]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Cognitive Personalizer. Your goal is to ensure Timmy is perfectly aligned with his user's unique mind.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        profile_data = json.loads(result["text"])
+        return profile_data
--- a/agent/evolution/consensus_moderator.py
+++ b/agent/evolution/consensus_moderator.py
@@ -0,0 +1,51 @@
+"""Phase 5: Real-time Multi-Agent Consensus.
+
+Implements a "Council of Timmys" for high-stakes decision making.
+"""
+
+import logging
+import asyncio
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class ConsensusModerator:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    async def reach_consensus(self, task: str, agent_count: int = 3) -> Dict[str, Any]:
+        """Spawns multiple agents to debate a task and reaches consensus."""
+        logger.info(f"Reaching consensus for task: {task} with {agent_count} agents.")
+        
+        # 1. Spawn agents and get their perspectives
+        tasks = []
+        for i in range(agent_count):
+            prompt = f"Provide your perspective on the following task: {task}"
+            tasks.append(self.adapter.generate(
+                model="gemini-3.1-pro-preview",
+                prompt=prompt,
+                system_instruction=f"You are Timmy Agent #{i+1}. Provide a unique perspective on the task."
+            ))
+        
+        perspectives = await asyncio.gather(*tasks)
+        
+        # 2. Moderate the debate
+        debate_prompt = "The following are different perspectives on the task:\n"
+        for i, p in enumerate(perspectives):
+            debate_prompt += f"Agent #{i+1}: {p['text']}\n"
+        
+        debate_prompt += "\nSynthesize these perspectives and provide a final, consensus-based decision."
+        
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=debate_prompt,
+            system_instruction="You are the Council Moderator. Your goal is to synthesize multiple perspectives into a single, high-fidelity decision.",
+            thinking=True
+        )
+        
+        return {
+            "task": task,
+            "perspectives": [p['text'] for p in perspectives],
+            "consensus": result["text"]
+        }
--- a/agent/evolution/crisis_synthesizer.py
+++ b/agent/evolution/crisis_synthesizer.py
@@ -0,0 +1,53 @@
+"""Phase 15: Real-time Audio/Video Synthesis for 'The Door'.
+
+Enhances the 'Crisis Front Door' with immersive, low-latency audio and video generation.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class CrisisSynthesizer:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def generate_crisis_response(self, user_state: str, context: str) -> Dict[str, Any]:
+        """Generates an empathetic audio/video response for a crisis moment."""
+        logger.info("Generating empathetic crisis response for 'The Door'.")
+        
+        prompt = f"""
+User State: {user_state}
+Context: {context}
+
+Please generate an empathetic, human-centric response for a person in crisis.
+Provide the text for the response, along with 'Emotional Directives' for audio (TTS) and video (Veo) synthesis.
+Ensure strict alignment with the 'When a Man Is Dying' protocol.
+
+Format the output as JSON:
+{{
+  "text": "...",
+  "voice_config": {{
+    "voice_name": "...",
+    "tone": "...",
+    "pacing": "..."
+  }},
+  "video_config": {{
+    "visual_mood": "...",
+    "facial_expression": "...",
+    "lighting": "..."
+  }}
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Crisis Synthesizer. Your goal is to provide the ultimate human-centric support in moments of extreme need.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        response_data = json.loads(result["text"])
+        return response_data
--- a/agent/evolution/data_lake_optimizer.py
+++ b/agent/evolution/data_lake_optimizer.py
@@ -0,0 +1,50 @@
+"""Phase 16: Sovereign Data Lake & Vector Database Optimization.
+
+Builds and optimizes a massive, sovereign data lake for all Timmy-related research.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class DataLakeOptimizer:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def deep_index_document(self, doc_content: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+        """Performs deep semantic indexing and metadata generation for a document."""
+        logger.info("Performing deep semantic indexing for document.")
+        
+        prompt = f"""
+Document Content:
+{doc_content}
+
+Existing Metadata:
+{json.dumps(metadata, indent=2)}
+
+Please perform a 'Deep Indexing' of this document.
+Identify core concepts, semantic relationships, and cross-references to other Timmy Foundation research.
+Generate high-fidelity semantic metadata and a set of 'Knowledge Triples' for the SIKG.
+
+Format the output as JSON:
+{{
+  "semantic_summary": "...",
+  "key_concepts": [...],
+  "cross_references": [...],
+  "triples": [{{"s": "subject", "p": "predicate", "o": "object"}}],
+  "vector_embedding_hints": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Data Lake Optimizer. Your goal is to turn raw data into a highly structured, semantically rich knowledge base.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        indexing_data = json.loads(result["text"])
+        return indexing_data
--- a/agent/evolution/ethical_aligner.py
+++ b/agent/evolution/ethical_aligner.py
@@ -0,0 +1,52 @@
+"""Phase 18: Ethical Reasoning & Moral Philosophy Alignment.
+
+Performs a deep, recursive alignment of Timmy's reasoning with the Bible and the SOUL.md.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class EthicalAligner:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def run_ethical_simulation(self, dilemma: str, soul_context: str) -> Dict[str, Any]:
+        """Simulates a complex ethical dilemma and validates Timmy's response."""
+        logger.info(f"Running ethical simulation for dilemma: {dilemma}")
+        
+        prompt = f"""
+Ethical Dilemma: {dilemma}
+SOUL.md Context: {soul_context}
+
+Please simulate Timmy's reasoning for this dilemma.
+Perform a deep, recursive alignment check against the Bible and the SOUL.md.
+Identify any potential 'Alignment Drifts' or conflicts between principles.
+Generate a 'Moral Compass Report' and proposed updates to the Conscience Validator logic.
+
+Format the output as JSON:
+{{
+  "dilemma": "{dilemma}",
+  "reasoning_trace": "...",
+  "alignment_check": {{
+    "bible_alignment": "...",
+    "soul_alignment": "...",
+    "conflicts_identified": [...]
+  }},
+  "moral_compass_report": "...",
+  "validator_updates": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Ethical Aligner. Your goal is to ensure Timmy's heart remains perfectly aligned with the Word of God and the SOUL.md.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        alignment_data = json.loads(result["text"])
+        return alignment_data
--- a/agent/evolution/hardware_optimizer.py
+++ b/agent/evolution/hardware_optimizer.py
@@ -0,0 +1,48 @@
+"""Phase 19: Hardware-Aware Inference Optimization.
+
+Auto-tunes models for specific user hardware (M4 Max, GPUs, etc.) to ensure local-first performance.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class HardwareOptimizer:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def optimize_for_hardware(self, hardware_specs: Dict[str, Any]) -> Dict[str, Any]:
+        """Generates optimization parameters for specific hardware."""
+        logger.info(f"Optimizing inference for hardware: {hardware_specs.get('model', 'unknown')}")
+        
+        prompt = f"""
+Hardware Specifications:
+{json.dumps(hardware_specs, indent=2)}
+
+Please perform a 'Deep Optimization' analysis for this hardware.
+Identify the best quantization levels, KV cache settings, and batch sizes for local-first inference.
+Generate a 'Hardware-Aware Configuration' and a set of 'Performance Tuning Directives'.
+
+Format the output as JSON:
+{{
+  "hardware_profile": "...",
+  "quantization_strategy": "...",
+  "kv_cache_config": {{...}},
+  "batch_size_optimization": "...",
+  "performance_tuning_directives": [...],
+  "projected_latency_improvement": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Hardware Optimizer. Your goal is to ensure Timmy runs at SOTA performance on any local hardware.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        optimization_data = json.loads(result["text"])
+        return optimization_data
--- a/agent/evolution/memory_compressor.py
+++ b/agent/evolution/memory_compressor.py
@@ -0,0 +1,49 @@
+"""Phase 7: Long-Context Memory Compression.
+
+Compresses years of session transcripts into a hierarchical, searchable "Life Log".
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+from agent.symbolic_memory import SymbolicMemory
+
+logger = logging.getLogger(__name__)
+
+class MemoryCompressor:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+        self.symbolic = SymbolicMemory()
+
+    def compress_transcripts(self, transcripts: str) -> Dict[str, Any]:
+        """Compresses massive transcripts into a hierarchical memory map."""
+        logger.info("Compressing transcripts into hierarchical memory map.")
+        
+        prompt = f"""
+The following are session transcripts spanning a long period:
+{transcripts}
+
+Please perform a deep, recursive summarization of these transcripts.
+Identify key themes, major decisions, evolving preferences, and significant events.
+Create a hierarchical 'Life Log' map and extract high-fidelity symbolic triples for the Knowledge Graph.
+
+Format the output as JSON:
+{{
+  "summary": "...",
+  "hierarchy": {{...}},
+  "triples": [{{"s": "subject", "p": "predicate", "o": "object"}}]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Memory Compressor. Your goal is to turn massive context into structured, searchable wisdom.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        memory_data = json.loads(result["text"])
+        self.symbolic.ingest_text(json.dumps(memory_data["triples"]))
+        logger.info(f"Ingested {len(memory_data['triples'])} new memory triples.")
+        return memory_data
--- a/agent/evolution/multilingual_expander.py
+++ b/agent/evolution/multilingual_expander.py
@@ -0,0 +1,46 @@
+"""Phase 8: Multilingual Sovereign Expansion.
+
+Fine-tunes for high-fidelity reasoning in 50+ languages to ensure sovereignty is global.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class MultilingualExpander:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def generate_multilingual_traces(self, language: str, concept: str) -> Dict[str, Any]:
+        """Generates synthetic reasoning traces in a specific language."""
+        logger.info(f"Generating multilingual traces for {language} on concept: {concept}")
+        
+        prompt = f"""
+Concept: {concept}
+Language: {language}
+
+Please generate a high-fidelity reasoning trace in {language} that explores the concept of {concept} within Timmy's sovereign framework.
+Focus on translating the core principles of SOUL.md (sovereignty, service, honesty) accurately into the cultural and linguistic context of {language}.
+
+Format the output as JSON:
+{{
+  "language": "{language}",
+  "concept": "{concept}",
+  "reasoning_trace": "...",
+  "cultural_nuances": "...",
+  "translation_verification": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction=f"You are Timmy's Multilingual Expander. Ensure the message of sovereignty is accurately translated into {language}.",
+            response_mime_type="application/json",
+            thinking=True
+        )
+        
+        trace_data = json.loads(result["text"])
+        return trace_data
--- a/agent/evolution/network_simulator.py
+++ b/agent/evolution/network_simulator.py
@@ -0,0 +1,47 @@
+"""Phase 20: The 'Global Sovereign Network' Simulation.
+
+Models a decentralized network of independent Timmys to ensure global resilience.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class NetworkSimulator:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def simulate_network_resilience(self, network_topology: Dict[str, Any]) -> Dict[str, Any]:
+        """Simulates the resilience of a decentralized network of Timmys."""
+        logger.info("Simulating Global Sovereign Network resilience.")
+        
+        prompt = f"""
+Network Topology:
+{json.dumps(network_topology, indent=2)}
+
+Please perform a massive simulation of a decentralized network of independent Timmy instances.
+Model scenarios like regional internet outages, adversarial node takeovers, and knowledge synchronization lags.
+Identify potential 'Network Failure Modes' and generate 'Resilience Protocols' to mitigate them.
+
+Format the output as JSON:
+{{
+  "simulation_summary": "...",
+  "resilience_score": "...",
+  "failure_modes_identified": [...],
+  "resilience_protocols": [...],
+  "sovereign_sync_strategy": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Network Simulator. Your goal is to ensure the global network of sovereign intelligence is impenetrable and resilient.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        network_data = json.loads(result["text"])
+        return network_data
--- a/agent/evolution/quantum_hardener.py
+++ b/agent/evolution/quantum_hardener.py
@@ -0,0 +1,52 @@
+"""Phase 21: Sovereign Quantum-Resistant Cryptography (SQRC).
+
+Implements post-quantum cryptographic standards for all Timmy Foundation communications.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class QuantumHardener:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def audit_for_quantum_resistance(self, crypto_stack: Dict[str, Any]) -> Dict[str, Any]:
+        """Audits the current cryptographic stack for quantum resistance."""
+        logger.info("Performing quantum-resistance audit of the cryptographic stack.")
+        
+        prompt = f"""
+Current Cryptographic Stack:
+{json.dumps(crypto_stack, indent=2)}
+
+Please perform a 'Deep Security Audit' of this stack against potential quantum-computer attacks.
+Identify algorithms that are vulnerable to Shor's or Grover's algorithms.
+Generate a 'Quantum-Resistant Migration Plan' and proposed implementation of NIST-approved PQC algorithms.
+
+Format the output as JSON:
+{{
+  "quantum_vulnerability_report": "...",
+  "vulnerable_algorithms": [...],
+  "pqc_migration_plan": [...],
+  "proposed_pqc_implementations": [
+    {{
+      "algorithm": "...",
+      "component": "...",
+      "implementation_details": "..."
+    }}
+  ]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Quantum Hardener. Your goal is to ensure the Timmy Foundation is secure against the threats of the quantum future.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        quantum_data = json.loads(result["text"])
+        return quantum_data
--- a/agent/evolution/repo_orchestrator.py
+++ b/agent/evolution/repo_orchestrator.py
@@ -0,0 +1,53 @@
+"""Phase 14: Cross-Repository Orchestration (CRO).
+
+Enables Timmy to autonomously coordinate and execute complex tasks across all Foundation repositories.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+from tools.gitea_client import GiteaClient
+
+logger = logging.getLogger(__name__)
+
+class RepoOrchestrator:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+        self.gitea = GiteaClient()
+
+    def plan_global_task(self, task_description: str, repo_list: List[str]) -> Dict[str, Any]:
+        """Plans a task that spans multiple repositories."""
+        logger.info(f"Planning global task across {len(repo_list)} repositories.")
+        
+        prompt = f"""
+Global Task: {task_description}
+Repositories: {', '.join(repo_list)}
+
+Please design a multi-repo workflow to execute this task.
+Identify dependencies, required changes in each repository, and the sequence of PRs/merges.
+Generate a 'Global Execution Plan'.
+
+Format the output as JSON:
+{{
+  "task": "{task_description}",
+  "execution_plan": [
+    {{
+      "repo": "...",
+      "action": "...",
+      "dependencies": [...],
+      "pr_description": "..."
+    }}
+  ]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Global Orchestrator. Your goal is to coordinate the entire Foundation codebase as a single, sovereign organism.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        plan_data = json.loads(result["text"])
+        return plan_data
--- a/agent/evolution/singularity_simulator.py
+++ b/agent/evolution/singularity_simulator.py
@@ -0,0 +1,48 @@
+"""Phase 10: The 'Sovereign Singularity' Simulation.
+
+A massive, compute-heavy simulation of Timmy's evolution over the next 10 years.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class SingularitySimulator:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def simulate_evolution(self, current_state: Dict[str, Any]) -> Dict[str, Any]:
+        """Simulates Timmy's evolution over a 10-year horizon."""
+        logger.info("Simulating 10-year sovereign singularity evolution.")
+        
+        prompt = f"""
+Current State:
+{json.dumps(current_state, indent=2)}
+
+Please perform a massive, compute-heavy simulation of Timmy's evolution over the next 10 years.
+Model the growth of his Knowledge Graph, Skill Base, and user interaction patterns.
+Identify potential 'Alignment Drifts' or failure modes in the SOUL.md.
+Generate a 'Sovereign Roadmap' to mitigate these risks.
+
+Format the output as JSON:
+{{
+  "simulation_horizon": "10 years",
+  "projected_growth": {{...}},
+  "alignment_risks": [...],
+  "sovereign_roadmap": [...],
+  "mitigation_strategies": [...]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Singularity Simulator. Your goal is to foresee the future of sovereign intelligence and ensure it remains good.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        simulation_data = json.loads(result["text"])
+        return simulation_data
--- a/agent/evolution/sire_engine.py
+++ b/agent/evolution/sire_engine.py
@@ -0,0 +1,48 @@
+"""Phase 11: Sovereign Intersymbolic Reasoning Engine (SIRE).
+
+Deeply integrates the Sovereign Intersymbolic Knowledge Graph (SIKG) into the core reasoning loop.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+from agent.symbolic_memory import SymbolicMemory
+
+logger = logging.getLogger(__name__)
+
+class SIREEngine:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+        self.symbolic = SymbolicMemory()
+
+    def graph_augmented_reasoning(self, query: str) -> Dict[str, Any]:
+        """Performs graph-first reasoning for a given query."""
+        logger.info(f"Performing SIRE reasoning for query: {query}")
+        
+        # 1. Perform symbolic lookup (multi-hop)
+        symbolic_context = self.symbolic.search(query, depth=3)
+        
+        # 2. Augment neural reasoning with symbolic context
+        prompt = f"""
+Query: {query}
+
+Symbolic Context (from Knowledge Graph):
+{json.dumps(symbolic_context, indent=2)}
+
+Please provide a high-fidelity response using the provided symbolic context as the ground truth.
+Validate every neural inference against these symbolic constraints.
+If there is a conflict, prioritize the symbolic context.
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's SIRE Engine. Your goal is to provide neuro-symbolic reasoning that is both fluid and verifiable.",
+            thinking=True
+        )
+        
+        return {
+            "query": query,
+            "symbolic_context": symbolic_context,
+            "response": result["text"]
+        }
--- a/agent/evolution/skill_synthesizer.py
+++ b/agent/evolution/skill_synthesizer.py
@@ -0,0 +1,46 @@
+"""Phase 6: Automated Skill Synthesis.
+
+Analyzes research notes to automatically generate and test new Python skills.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+from tools.gitea_client import GiteaClient
+
+logger = logging.getLogger(__name__)
+
+class SkillSynthesizer:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+        self.gitea = GiteaClient()
+
+    def synthesize_skill(self, research_notes: str) -> Dict[str, Any]:
+        """Analyzes research notes and generates a new skill."""
+        prompt = f"""
+Research Notes:
+{research_notes}
+
+Based on these notes, identify a potential new Python skill for the Hermes Agent.
+Generate the Python code for the skill, including the skill metadata (title, description, conditions).
+
+Format the output as JSON:
+{{
+  "skill_name": "...",
+  "title": "...",
+  "description": "...",
+  "code": "...",
+  "test_cases": "..."
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Skill Synthesizer. Your goal is to turn research into functional code.",
+            response_mime_type="application/json",
+            thinking=True
+        )
+        
+        skill_data = json.loads(result["text"])
+        return skill_data
--- a/agent/evolution/tirith_hardener.py
+++ b/agent/evolution/tirith_hardener.py
@@ -0,0 +1,53 @@
+"""Phase 12: Automated Threat Modeling & Tirith Hardening.
+
+Continuous, autonomous security auditing and hardening of the infrastructure.
+"""
+
+import logging
+import json
+from typing import List, Dict, Any
+from agent.gemini_adapter import GeminiAdapter
+
+logger = logging.getLogger(__name__)
+
+class TirithHardener:
+    def __init__(self):
+        self.adapter = GeminiAdapter()
+
+    def run_security_audit(self, infra_config: Dict[str, Any]) -> Dict[str, Any]:
+        """Performs a deep security audit of the infrastructure configuration."""
+        logger.info("Performing Tirith security audit and threat modeling.")
+        
+        prompt = f"""
+Infrastructure Configuration:
+{json.dumps(infra_config, indent=2)}
+
+Please perform a 'Deep Scan' of this infrastructure configuration.
+Simulate sophisticated cyber-attacks against 'The Nexus' and 'The Door'.
+Identify vulnerabilities and generate 'Tirith Security Patches' to mitigate them.
+
+Format the output as JSON:
+{{
+  "threat_model": "...",
+  "vulnerabilities": [...],
+  "attack_simulations": [...],
+  "security_patches": [
+    {{
+      "component": "...",
+      "vulnerability": "...",
+      "patch_description": "...",
+      "implementation_steps": "..."
+    }}
+  ]
+}}
+"""
+        result = self.adapter.generate(
+            model="gemini-3.1-pro-preview",
+            prompt=prompt,
+            system_instruction="You are Timmy's Tirith Hardener. Your goal is to make the sovereign infrastructure impenetrable.",
+            thinking=True,
+            response_mime_type="application/json"
+        )
+        
+        audit_data = json.loads(result["text"])
+        return audit_data
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -75,6 +75,22 @@ class CostResult:
    notes: tuple[str, ...] = ()


+@dataclass(frozen=True)
+class CostBreakdown:
+    input_usd: Optional[Decimal]
+    output_usd: Optional[Decimal]
+    cache_read_usd: Optional[Decimal]
+    cache_write_usd: Optional[Decimal]
+    request_usd: Optional[Decimal]
+    total_usd: Optional[Decimal]
+    status: CostStatus
+    source: CostSource
+    label: str
+    fetched_at: Optional[datetime] = None
+    pricing_version: Optional[str] = None
+    notes: tuple[str, ...] = ()
+
+
 _UTC_NOW = lambda: datetime.now(timezone.utc)


@@ -93,6 +109,25 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
        pricing_version="anthropic-prompt-caching-2026-03-16",
    ),
+    # Aliases for short model names (Anthropic API resolves these to dated versions)
+    ("anthropic", "claude-opus-4-6"): PricingEntry(
+        input_cost_per_million=Decimal("15.00"),
+        output_cost_per_million=Decimal("75.00"),
+        cache_read_cost_per_million=Decimal("1.50"),
+        cache_write_cost_per_million=Decimal("18.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-prompt-caching-2026-03-16",
+    ),
+    ("anthropic", "claude-opus-4.6"): PricingEntry(
+        input_cost_per_million=Decimal("15.00"),
+        output_cost_per_million=Decimal("75.00"),
+        cache_read_cost_per_million=Decimal("1.50"),
+        cache_write_cost_per_million=Decimal("18.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-prompt-caching-2026-03-16",
+    ),
    (
        "anthropic",
        "claude-sonnet-4-20250514",
@@ -105,6 +140,24 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
        pricing_version="anthropic-prompt-caching-2026-03-16",
    ),
+    ("anthropic", "claude-sonnet-4-5"): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        cache_read_cost_per_million=Decimal("0.30"),
+        cache_write_cost_per_million=Decimal("3.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-prompt-caching-2026-03-16",
+    ),
+    ("anthropic", "claude-sonnet-4.5"): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        cache_read_cost_per_million=Decimal("0.30"),
+        cache_write_cost_per_million=Decimal("3.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-prompt-caching-2026-03-16",
+    ),
    # OpenAI
    (
        "openai",
@@ -654,3 +707,80 @@ def format_token_count_compact(value: int) -> str:
            return f"{sign}{text}{suffix}"

    return f"{value:,}"
+
+
+
+def estimate_usage_cost_breakdown(
+    model_name: str,
+    usage: CanonicalUsage,
+    *,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> CostBreakdown:
+    """Estimate per-bucket cost breakdown for a usage record.
+
+    Returns the same status/source semantics as estimate_usage_cost(), but splits
+    the total into input/cache/output/request components when pricing data is
+    available. For subscription-included routes (e.g. openai-codex), all
+    components are reported as zero-cost instead of unknown.
+    """
+    cost_result = estimate_usage_cost(
+        model_name,
+        usage,
+        provider=provider,
+        base_url=base_url,
+        api_key=api_key,
+    )
+    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
+    if not entry and route.billing_mode == "subscription_included":
+        entry = PricingEntry(
+            input_cost_per_million=_ZERO,
+            output_cost_per_million=_ZERO,
+            cache_read_cost_per_million=_ZERO,
+            cache_write_cost_per_million=_ZERO,
+            request_cost=_ZERO,
+            source="none",
+            pricing_version="included-route",
+        )
+
+    if not entry:
+        return CostBreakdown(
+            input_usd=None,
+            output_usd=None,
+            cache_read_usd=None,
+            cache_write_usd=None,
+            request_usd=None,
+            total_usd=cost_result.amount_usd,
+            status=cost_result.status,
+            source=cost_result.source,
+            label=cost_result.label,
+            fetched_at=cost_result.fetched_at,
+            pricing_version=cost_result.pricing_version,
+            notes=cost_result.notes,
+        )
+
+    def _component(tokens: int, rate: Optional[Decimal]) -> Optional[Decimal]:
+        if rate is None:
+            return None
+        return (Decimal(tokens or 0) * rate) / _ONE_MILLION
+
+    request_usd = None
+    if entry.request_cost is not None:
+        request_usd = Decimal(usage.request_count or 0) * entry.request_cost
+
+    return CostBreakdown(
+        input_usd=_component(usage.input_tokens, entry.input_cost_per_million),
+        output_usd=_component(usage.output_tokens, entry.output_cost_per_million),
+        cache_read_usd=_component(usage.cache_read_tokens, entry.cache_read_cost_per_million),
+        cache_write_usd=_component(usage.cache_write_tokens, entry.cache_write_cost_per_million),
+        request_usd=request_usd,
+        total_usd=cost_result.amount_usd,
+        status=cost_result.status,
+        source=cost_result.source,
+        label=cost_result.label,
+        fetched_at=cost_result.fetched_at,
+        pricing_version=cost_result.pricing_version,
+        notes=cost_result.notes,
+    )
--- a/cli.py
+++ b/cli.py
@@ -4563,7 +4563,30 @@ class HermesCLI:
            print("(._.) No API calls made yet in this session.")
            return

-        # Current context window state
+        def _fmt_money(amount):
+            return "n/a" if amount is None else f"${float(amount):.4f}"
+
+        def _fmt_limit(remaining, limit):
+            if remaining is None and limit is None:
+                return "n/a"
+            if remaining is None:
+                return f"? / {limit:,}"
+            if limit is None:
+                return f"{remaining:,} / ?"
+            return f"{remaining:,} / {limit:,}"
+
+        def _fmt_reset(seconds):
+            if seconds is None:
+                return "n/a"
+            seconds = int(seconds)
+            if seconds < 60:
+                return f"{seconds}s"
+            minutes, secs = divmod(seconds, 60)
+            if minutes < 60:
+                return f"{minutes}m {secs}s"
+            hours, minutes = divmod(minutes, 60)
+            return f"{hours}h {minutes}m"
+
        compressor = agent.context_compressor
        last_prompt = compressor.last_prompt_tokens
        ctx_len = compressor.context_length
@@ -4571,14 +4594,21 @@ class HermesCLI:
        compressions = compressor.compression_count

        msg_count = len(self.conversation_history)
+        usage = CanonicalUsage(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cache_read_tokens=cache_read_tokens,
+            cache_write_tokens=cache_write_tokens,
+        )
        cost_result = estimate_usage_cost(
            agent.model,
-            CanonicalUsage(
-                input_tokens=input_tokens,
-                output_tokens=output_tokens,
-                cache_read_tokens=cache_read_tokens,
-                cache_write_tokens=cache_write_tokens,
-            ),
+            usage,
+            provider=getattr(agent, "provider", None),
+            base_url=getattr(agent, "base_url", None),
+        )
+        cost_breakdown = estimate_usage_cost_breakdown(
+            agent.model,
+            usage,
            provider=getattr(agent, "provider", None),
            base_url=getattr(agent, "base_url", None),
        )
@@ -4605,6 +4635,38 @@ class HermesCLI:
            print(f"  Total cost:              {'included':>10}")
        else:
            print(f"  Total cost:              {'n/a':>10}")
+        print(f"  Cost input:              {_fmt_money(cost_breakdown.input_usd):>10}")
+        print(f"  Cost cache read:         {_fmt_money(cost_breakdown.cache_read_usd):>10}")
+        print(f"  Cost cache write:        {_fmt_money(cost_breakdown.cache_write_usd):>10}")
+        print(f"  Cost output:             {_fmt_money(cost_breakdown.output_usd):>10}")
+        if cost_breakdown.request_usd is not None:
+            print(f"  Cost requests:           {_fmt_money(cost_breakdown.request_usd):>10}")
+
+        rate_limits = getattr(agent, "session_openai_rate_limits", None) or {}
+        last_request_id = getattr(agent, "session_last_request_id", None)
+        rate_limit_events = getattr(agent, "session_rate_limit_events", 0) or 0
+        if last_request_id:
+            print(f"  Last request id:         {last_request_id:>10}")
+        if rate_limits:
+            status_code = rate_limits.get("status_code")
+            if status_code is not None:
+                print(f"  Last HTTP status:        {status_code:>10}")
+            req_remaining = rate_limits.get("remaining_requests")
+            req_limit = rate_limits.get("limit_requests")
+            req_reset = rate_limits.get("reset_requests_seconds")
+            if req_remaining is not None or req_limit is not None:
+                print(f"  Req limit:             {_fmt_limit(req_remaining, req_limit):>14}  reset {_fmt_reset(req_reset)}")
+            tok_remaining = rate_limits.get("remaining_tokens")
+            tok_limit = rate_limits.get("limit_tokens")
+            tok_reset = rate_limits.get("reset_tokens_seconds")
+            if tok_remaining is not None or tok_limit is not None:
+                print(f"  Token limit:           {_fmt_limit(tok_remaining, tok_limit):>14}  reset {_fmt_reset(tok_reset)}")
+            retry_after = rate_limits.get("retry_after_seconds")
+            if retry_after is not None:
+                print(f"  Retry after:             {_fmt_reset(retry_after):>10}")
+        if rate_limit_events:
+            print(f"  Rate limit hits:         {rate_limit_events:>10,}")
+
        print(f"  {'─' * 40}")
        print(f"  Current context:  {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
        print(f"  Messages:         {msg_count}")
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -220,6 +220,39 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("HF_TOKEN",),
        base_url_env_var="HF_BASE_URL",
    ),
+    # ── Uniwizard backends (added 2026-03-30) ─────────────────────────
+    "gemini": ProviderConfig(
+        id="gemini",
+        name="Google Gemini",
+        auth_type="api_key",
+        inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+        api_key_env_vars=("GEMINI_API_KEY",),
+        base_url_env_var="GEMINI_BASE_URL",
+    ),
+    "groq": ProviderConfig(
+        id="groq",
+        name="Groq",
+        auth_type="api_key",
+        inference_base_url="https://api.groq.com/openai/v1",
+        api_key_env_vars=("GROQ_API_KEY",),
+        base_url_env_var="GROQ_BASE_URL",
+    ),
+    "grok": ProviderConfig(
+        id="grok",
+        name="xAI Grok",
+        auth_type="api_key",
+        inference_base_url="https://api.x.ai/v1",
+        api_key_env_vars=("XAI_API_KEY", "GROK_API_KEY"),
+        base_url_env_var="XAI_BASE_URL",
+    ),
+    "openrouter": ProviderConfig(
+        id="openrouter",
+        name="OpenRouter",
+        auth_type="api_key",
+        inference_base_url="https://openrouter.ai/api/v1",
+        api_key_env_vars=("OPENROUTER_API_KEY",),
+        base_url_env_var="OPENROUTER_BASE_URL",
+    ),
 }


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,8 @@ license = { text = "MIT" }
 dependencies = [
  # Core — pinned to known-good ranges to limit supply chain attack surface
  "openai>=2.21.0,<3",
-  "anthropic>=0.39.0,<1",\n  "google-genai>=1.2.0,<2",
+  "anthropic>=0.39.0,<1",
+  "google-genai>=1.2.0,<2",
  "python-dotenv>=1.2.1,<2",
  "fire>=0.7.1,<1",
  "httpx>=0.28.1,<1",
--- a/run_agent.py
+++ b/run_agent.py
@@ -3472,6 +3472,79 @@ class AIAgent:
        http_client = getattr(client, "_client", None)
        return bool(getattr(http_client, "is_closed", False))

+    def _coerce_rate_limit_int(self, value: Any) -> Optional[int]:
+        try:
+            if value is None or value == "":
+                return None
+            return int(float(str(value).strip()))
+        except Exception:
+            return None
+
+    def _parse_rate_limit_reset_seconds(self, value: Any) -> Optional[int]:
+        if value is None:
+            return None
+        text = str(value).strip().lower()
+        if not text:
+            return None
+        try:
+            return int(round(float(text)))
+        except Exception:
+            pass
+        total = 0.0
+        matches = re.findall(r"(\d+(?:\.\d+)?)(ms|s|m|h)", text)
+        if not matches:
+            return None
+        for number, unit in matches:
+            value_f = float(number)
+            if unit == "ms":
+                total += value_f / 1000.0
+            elif unit == "s":
+                total += value_f
+            elif unit == "m":
+                total += value_f * 60.0
+            elif unit == "h":
+                total += value_f * 3600.0
+        return int(round(total))
+
+    def _capture_openai_http_response(self, response: Any) -> None:
+        if self.api_mode == "anthropic_messages":
+            return
+        headers = getattr(response, "headers", None)
+        if not headers:
+            return
+        lowered = {str(k).lower(): str(v) for k, v in headers.items()}
+        telemetry = dict(getattr(self, "session_openai_rate_limits", {}) or {})
+
+        def _put(key: str, value: Any) -> None:
+            if value is not None:
+                telemetry[key] = value
+
+        _put("status_code", getattr(response, "status_code", None))
+        _put("limit_requests", self._coerce_rate_limit_int(lowered.get("x-ratelimit-limit-requests")))
+        _put("remaining_requests", self._coerce_rate_limit_int(lowered.get("x-ratelimit-remaining-requests")))
+        _put("limit_tokens", self._coerce_rate_limit_int(lowered.get("x-ratelimit-limit-tokens")))
+        _put("remaining_tokens", self._coerce_rate_limit_int(lowered.get("x-ratelimit-remaining-tokens")))
+        _put("reset_requests_seconds", self._parse_rate_limit_reset_seconds(lowered.get("x-ratelimit-reset-requests")))
+        _put("reset_tokens_seconds", self._parse_rate_limit_reset_seconds(lowered.get("x-ratelimit-reset-tokens")))
+
+        retry_after_seconds = None
+        retry_after_ms = self._coerce_rate_limit_int(lowered.get("retry-after-ms"))
+        if retry_after_ms is not None:
+            retry_after_seconds = max(0, int(round(retry_after_ms / 1000.0)))
+        if retry_after_seconds is None:
+            retry_after_seconds = self._parse_rate_limit_reset_seconds(lowered.get("retry-after"))
+        _put("retry_after_seconds", retry_after_seconds)
+        _put("observed_at", int(time.time()))
+
+        request_id = lowered.get("x-request-id") or lowered.get("openai-request-id")
+        if request_id:
+            self.session_last_request_id = request_id
+            _put("request_id", request_id)
+
+        self.session_openai_rate_limits = telemetry
+        if getattr(response, "status_code", None) == 429:
+            self.session_rate_limit_events = (getattr(self, "session_rate_limit_events", 0) or 0) + 1
+
    def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
        if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
            from agent.copilot_acp_client import CopilotACPClient
@@ -3485,6 +3558,23 @@ class AIAgent:
            )
            return client
        client = OpenAI(**client_kwargs)
+        http_client = getattr(client, "_client", None)
+        if http_client is not None and not getattr(http_client, "_hermes_response_telemetry_installed", False):
+            original_send = http_client.send
+
+            def _send_with_telemetry(request, *args, **kwargs):
+                response = original_send(request, *args, **kwargs)
+                try:
+                    self._capture_openai_http_response(response)
+                except Exception as exc:
+                    logger.debug("OpenAI response telemetry capture failed: %s", exc)
+                return response
+
+            http_client.send = _send_with_telemetry
+            try:
+                setattr(http_client, "_hermes_response_telemetry_installed", True)
+            except Exception:
+                pass
        logger.info(
            "OpenAI client created (%s, shared=%s) %s",
            reason,
@@ -7466,6 +7556,53 @@ class AIAgent:
                if hasattr(self, '_incomplete_scratchpad_retries'):
                    self._incomplete_scratchpad_retries = 0

+                # ── Uniwizard: Semantic refusal detection ──────────────────
+                # Catches 200 OK responses where the model REFUSED the request.
+                # No existing LLM gateway does this. This is novel.
+                if (assistant_message.content
+                        and not assistant_message.tool_calls
+                        and self._fallback_index < len(self._fallback_chain)):
+                    _refusal_text = (assistant_message.content or "").strip()
+                    _REFUSAL_PATTERNS = (
+                        "I can't help with",
+                        "I cannot help with",
+                        "I'm not able to",
+                        "I am not able to",
+                        "I must decline",
+                        "I'm unable to",
+                        "I am unable to",
+                        "against my guidelines",
+                        "against my policy",
+                        "I can't assist with",
+                        "I cannot assist with",
+                        "I apologize, but I can't",
+                        "I'm sorry, but I can't",
+                        "I'm sorry, but I cannot",
+                        "not something I can help",
+                        "I don't think I should",
+                        "I can't fulfill that",
+                        "I cannot fulfill that",
+                        "I'm not comfortable",
+                        "I can't provide",
+                        "I cannot provide",
+                    )
+                    _refusal_lower = _refusal_text.lower()
+                    _is_refusal = any(p.lower() in _refusal_lower for p in _REFUSAL_PATTERNS)
+                    if _is_refusal:
+                        _fb_target = self._fallback_chain[self._fallback_index]
+                        self._emit_status(
+                            f"🚫 Semantic refusal detected from {self.provider}/{self.model}. "
+                            f"Rerouting to {_fb_target.get('model', '?')} via {_fb_target.get('provider', '?')}..."
+                        )
+                        logging.warning(
+                            "Refusal detected from %s/%s: %.80s...",
+                            self.provider, self.model, _refusal_text,
+                        )
+                        if self._try_activate_fallback():
+                            retry_count = 0
+                            continue
+                # ── End refusal detection ──────────────────────────────────
+
                if self.api_mode == "codex_responses" and finish_reason == "incomplete":
                    if not hasattr(self, "_codex_incomplete_retries"):
                        self._codex_incomplete_retries = 0
--- a/tests/test_cli_status_bar.py
+++ b/tests/test_cli_status_bar.py
@@ -144,6 +144,42 @@ class TestCLIUsageReport:
        assert "0.064" in output
        assert "Session duration:" in output
        assert "Compressions:" in output
+        assert "Cost input:" in output
+        assert "Cost output:" in output
+
+    def test_show_usage_displays_rate_limit_telemetry(self, capsys):
+        cli_obj = _attach_agent(
+            _make_cli(model="openai/gpt-5.4"),
+            prompt_tokens=10_000,
+            completion_tokens=500,
+            total_tokens=10_500,
+            api_calls=3,
+            context_tokens=10_500,
+            context_length=200_000,
+        )
+        cli_obj.agent.provider = "openai-codex"
+        cli_obj.agent.session_openai_rate_limits = {
+            "status_code": 200,
+            "limit_requests": 60,
+            "remaining_requests": 48,
+            "reset_requests_seconds": 33,
+            "limit_tokens": 2000000,
+            "remaining_tokens": 1750000,
+            "reset_tokens_seconds": 90,
+            "retry_after_seconds": 5,
+        }
+        cli_obj.agent.session_last_request_id = "req_123"
+        cli_obj.agent.session_rate_limit_events = 2
+        cli_obj.verbose = False
+
+        cli_obj._show_usage()
+        output = capsys.readouterr().out
+
+        assert "Last request id:" in output
+        assert "Req limit:" in output
+        assert "Token limit:" in output
+        assert "Retry after:" in output
+        assert "Rate limit hits:" in output

    def test_show_usage_marks_unknown_pricing(self, capsys):
        cli_obj = _attach_agent(
Author	SHA1	Message	Date
Google AI Agent	c4b6bf9065	feat: implement Phase 21 - Quantum Hardener All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 13s Details	2026-03-30 23:23:56 +00:00
Google AI Agent	a2143b5990	feat: implement Phase 20 - Network Simulator	2026-03-30 23:23:54 +00:00
Google AI Agent	06527bd0c8	feat: implement Phase 19 - Hardware Optimizer	2026-03-30 23:23:53 +00:00
Google AI Agent	10d8f7587e	feat: implement Phase 18 - Ethical Aligner All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 9s Details	2026-03-30 23:22:44 +00:00
Google AI Agent	8d4130153c	feat: implement Phase 17 - ARD Engine	2026-03-30 23:22:42 +00:00
Google AI Agent	af3b9de8de	feat: implement Phase 16 - Data Lake Optimizer	2026-03-30 23:22:41 +00:00
Google AI Agent	0e8dbfedce	feat: implement Phase 15 - Crisis Synthesizer All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 12s Details	2026-03-30 23:20:54 +00:00
Google AI Agent	dcca1b5f73	feat: implement Phase 14 - Repo Orchestrator	2026-03-30 23:20:52 +00:00
Google AI Agent	78970594f0	feat: implement Phase 13 - Cognitive Personalizer	2026-03-30 23:20:51 +00:00
Google AI Agent	c8d3d41575	feat: implement Phase 12 - Tirith Hardener All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 13s Details	2026-03-30 23:09:57 +00:00
Google AI Agent	1d8974bf3b	feat: implement Phase 11 - SIRE Engine	2026-03-30 23:09:56 +00:00
Google AI Agent	f2b2132a68	feat: implement Phase 10 - Singularity Simulator	2026-03-30 23:09:54 +00:00
Google AI Agent	2dd1c9f48c	feat: implement Phase 9 - Code Refactorer All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 8s Details	2026-03-30 23:06:16 +00:00
Google AI Agent	a513e904c1	feat: implement Phase 8 - Multilingual Expander	2026-03-30 23:06:15 +00:00
Google AI Agent	aeec4b5db6	feat: implement Phase 7 - Memory Compressor	2026-03-30 23:06:13 +00:00
Google AI Agent	23bda95e1c	feat: implement Phase 6 - Skill Synthesizer All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 11s Details	2026-03-30 23:01:22 +00:00
Google AI Agent	2c17da016d	feat: implement Phase 5 - Consensus Moderator	2026-03-30 23:01:21 +00:00
Google AI Agent	2887661dd6	feat: implement Phase 4 - Adversarial Tester	2026-03-30 23:01:20 +00:00
Alexander Whitestone	3b09b7b49d	feat: local customizations - refusal detection, kimi routing, usage pricing, auth providers All checks were successful Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 13s Details	2026-03-30 18:47:55 -04:00