WIP: Gemini Code progress on #1006

Automated salvage commit — agent session ended (exit 124). Work in progress, may need continuation.
2026-03-23 14:51:26 -04:00
parent 7e03985368
commit 72cf3bb74d
3 changed files with 154 additions and 1 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -341,6 +341,17 @@ class Settings(BaseSettings):
    vassal_stuck_threshold_minutes: int = 120  # minutes before agent issue is "stuck"
    vassal_idle_threshold_minutes: int = 30  # minutes before agent is "idle"

+    # ── Consensus (Multi-Model Voting) ───────────────────────────────
+    # When enabled, critical decisions are verified by multiple models.
+    consensus_enabled: bool = False
+    # Models to use for consensus voting.
+    consensus_models: list[str] = [
+        "qwen3:14b",
+        "llama3.1:8b-instruct",
+    ]
+    # Model to use for escalation when consensus fails.
+    consensus_escalation_model: str = "grok-3-fast"
+
    # ── Paperclip AI — orchestration bridge ────────────────────────────
    # URL where the Paperclip server listens.
    # For VPS deployment behind nginx, use the public domain.
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -24,6 +24,7 @@ from agno.models.ollama import Ollama
 from config import check_ollama_model_available, settings
 from timmy.prompts import get_system_prompt
 from timmy.tools import create_full_toolkit
+from timmy.consensus import ConsensusEngine

 if TYPE_CHECKING:
    from timmy.backends import ClaudeBackend, GrokBackend
@@ -31,7 +32,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)

 # Union type for callers that want to hint the return type.
-TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend"]
+TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend", "ConsensusEngine"]

 # Models known to be too small for reliable tool calling.
 # These hallucinate tool calls as text, invoke tools randomly,
@@ -269,6 +270,47 @@ def _create_ollama_agent(
    return agent


+def create_consensus_timmy(
+    db_file: str = "timmy.db",
+    *,
+    skip_mcp: bool = False,
+    session_id: str = "unknown",
+) -> ConsensusEngine:
+    """Instantiate the consensus engine with multiple Ollama agents."""
+    models = []
+    for model_name in settings.consensus_models:
+        model_name, is_fallback = _resolve_model_with_fallback(
+            requested_model=model_name,
+            require_vision=False,
+            auto_pull=True,
+        )
+
+        if not _check_model_available(model_name):
+            logger.error(
+                "Ollama unreachable and no local models available for consensus. "
+                "Start Ollama with 'ollama serve' or use --backend claude explicitly."
+            )
+            continue
+
+        if is_fallback:
+            logger.info("Using fallback model %s (requested was unavailable)", model_name)
+
+        use_tools = _model_supports_tools(model_name)
+        tools_list = _build_tools_list(use_tools, skip_mcp, model_name)
+        full_prompt = _build_prompt(use_tools, session_id)
+
+        agent = _create_ollama_agent(
+            db_file=db_file,
+            model_name=model_name,
+            tools_list=tools_list,
+            full_prompt=full_prompt,
+            use_tools=use_tools,
+        )
+        models.append(agent)
+
+    return ConsensusEngine(models)
+
+
 def create_timmy(
    db_file: str = "timmy.db",
    backend: str | None = None,
@@ -289,6 +331,13 @@ def create_timmy(
    Returns an Agno Agent or backend-specific agent — all expose
    print_response(message, stream).
    """
+    if settings.consensus_enabled:
+        return create_consensus_timmy(
+            db_file=db_file,
+            skip_mcp=skip_mcp,
+            session_id=session_id,
+        )
+
    resolved = _resolve_backend(backend)

    if resolved == "claude":
--- a/src/timmy/consensus.py
+++ b/src/timmy/consensus.py
@@ -0,0 +1,93 @@
+"""Consensus engine for multi-model decision making."""
+
+import logging
+from typing import TYPE_CHECKING, List
+
+from config import settings
+
+if TYPE_CHECKING:
+    from timmy.agent import TimmyAgent
+
+logger = logging.getLogger(__name__)
+
+
+class ConsensusEngine:
+    """Queries multiple models and compares their outputs for consensus."""
+
+    def __init__(self, models: List["TimmyAgent"]):
+        """
+        Args:
+            models: A list of agent instances to be used for consensus.
+        """
+        self.models = models
+
+    def run(self, message: str) -> str:
+        """
+        Runs the consensus check by querying all models and comparing their outputs.
+
+        Args:
+            message: The input message to be sent to the models.
+
+        Returns:
+            The response from the primary model if there is a consensus,
+            otherwise the response from the escalation model.
+        """
+        if not self.models:
+            return "No models configured for consensus."
+
+        responses = []
+        for model in self.models:
+            try:
+                response = model.run(message, stream=False)
+                responses.append(response.content)
+            except Exception as e:
+                logger.error(f"Error querying model {model}: {e}")
+                responses.append(None)
+
+        if self._check_consensus(responses):
+            logger.info("Consensus reached.")
+            # TODO: Add metric tracking for consensus success
+            return responses[0]
+        else:
+            logger.warning("Consensus not reached. Escalating to a higher-tier model.")
+            # TODO: Add metric tracking for consensus failure
+            return self._escalate(message)
+
+    def _escalate(self, message: str) -> str:
+        """
+        Escalates to a higher-tier model to get a final response.
+
+        Args:
+            message: The input message to be sent to the escalation model.
+
+        Returns:
+            The response from the escalation model.
+        """
+        from timmy.agent import create_timmy
+
+        try:
+            escalation_model = create_timmy(backend=settings.consensus_escalation_model)
+            response = escalation_model.run(message, stream=False)
+            return response.content
+        except Exception as e:
+            logger.error(f"Error querying escalation model: {e}")
+            return "Error during escalation."
+
+    def _check_consensus(self, responses: List[str]) -> bool:
+        """
+        Checks if the responses from the models are in agreement.
+
+        Args:
+            responses: A list of responses from the models.
+
+        Returns:
+            True if the responses are in agreement, False otherwise.
+        """
+        if not responses or len(responses) < 2:
+            return False
+
+        first_response = responses[0]
+        for response in responses[1:]:
+            if response != first_response:
+                return False
+        return True