WIP: Gemini Code progress on #1006

Automated salvage commit — agent session ended (exit 124).
Work in progress, may need continuation.
This commit is contained in:
Alexander Whitestone
2026-03-23 14:51:26 -04:00
parent 7e03985368
commit 72cf3bb74d
3 changed files with 154 additions and 1 deletions

View File

@@ -341,6 +341,17 @@ class Settings(BaseSettings):
vassal_stuck_threshold_minutes: int = 120 # minutes before agent issue is "stuck"
vassal_idle_threshold_minutes: int = 30 # minutes before agent is "idle"
# ── Consensus (Multi-Model Voting) ───────────────────────────────
# When enabled, critical decisions are verified by multiple models.
consensus_enabled: bool = False
# Models to use for consensus voting.
consensus_models: list[str] = [
"qwen3:14b",
"llama3.1:8b-instruct",
]
# Model to use for escalation when consensus fails.
consensus_escalation_model: str = "grok-3-fast"
# ── Paperclip AI — orchestration bridge ────────────────────────────
# URL where the Paperclip server listens.
# For VPS deployment behind nginx, use the public domain.

View File

@@ -24,6 +24,7 @@ from agno.models.ollama import Ollama
from config import check_ollama_model_available, settings
from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
from timmy.consensus import ConsensusEngine
if TYPE_CHECKING:
from timmy.backends import ClaudeBackend, GrokBackend
@@ -31,7 +32,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend"]
TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend", "ConsensusEngine"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
@@ -269,6 +270,47 @@ def _create_ollama_agent(
return agent
def create_consensus_timmy(
db_file: str = "timmy.db",
*,
skip_mcp: bool = False,
session_id: str = "unknown",
) -> ConsensusEngine:
"""Instantiate the consensus engine with multiple Ollama agents."""
models = []
for model_name in settings.consensus_models:
model_name, is_fallback = _resolve_model_with_fallback(
requested_model=model_name,
require_vision=False,
auto_pull=True,
)
if not _check_model_available(model_name):
logger.error(
"Ollama unreachable and no local models available for consensus. "
"Start Ollama with 'ollama serve' or use --backend claude explicitly."
)
continue
if is_fallback:
logger.info("Using fallback model %s (requested was unavailable)", model_name)
use_tools = _model_supports_tools(model_name)
tools_list = _build_tools_list(use_tools, skip_mcp, model_name)
full_prompt = _build_prompt(use_tools, session_id)
agent = _create_ollama_agent(
db_file=db_file,
model_name=model_name,
tools_list=tools_list,
full_prompt=full_prompt,
use_tools=use_tools,
)
models.append(agent)
return ConsensusEngine(models)
def create_timmy(
db_file: str = "timmy.db",
backend: str | None = None,
@@ -289,6 +331,13 @@ def create_timmy(
Returns an Agno Agent or backend-specific agent — all expose
print_response(message, stream).
"""
if settings.consensus_enabled:
return create_consensus_timmy(
db_file=db_file,
skip_mcp=skip_mcp,
session_id=session_id,
)
resolved = _resolve_backend(backend)
if resolved == "claude":

93
src/timmy/consensus.py Normal file
View File

@@ -0,0 +1,93 @@
"""Consensus engine for multi-model decision making."""
import logging
from typing import TYPE_CHECKING, List
from config import settings
if TYPE_CHECKING:
from timmy.agent import TimmyAgent
logger = logging.getLogger(__name__)
class ConsensusEngine:
"""Queries multiple models and compares their outputs for consensus."""
def __init__(self, models: List["TimmyAgent"]):
"""
Args:
models: A list of agent instances to be used for consensus.
"""
self.models = models
def run(self, message: str) -> str:
"""
Runs the consensus check by querying all models and comparing their outputs.
Args:
message: The input message to be sent to the models.
Returns:
The response from the primary model if there is a consensus,
otherwise the response from the escalation model.
"""
if not self.models:
return "No models configured for consensus."
responses = []
for model in self.models:
try:
response = model.run(message, stream=False)
responses.append(response.content)
except Exception as e:
logger.error(f"Error querying model {model}: {e}")
responses.append(None)
if self._check_consensus(responses):
logger.info("Consensus reached.")
# TODO: Add metric tracking for consensus success
return responses[0]
else:
logger.warning("Consensus not reached. Escalating to a higher-tier model.")
# TODO: Add metric tracking for consensus failure
return self._escalate(message)
def _escalate(self, message: str) -> str:
"""
Escalates to a higher-tier model to get a final response.
Args:
message: The input message to be sent to the escalation model.
Returns:
The response from the escalation model.
"""
from timmy.agent import create_timmy
try:
escalation_model = create_timmy(backend=settings.consensus_escalation_model)
response = escalation_model.run(message, stream=False)
return response.content
except Exception as e:
logger.error(f"Error querying escalation model: {e}")
return "Error during escalation."
def _check_consensus(self, responses: List[str]) -> bool:
"""
Checks if the responses from the models are in agreement.
Args:
responses: A list of responses from the models.
Returns:
True if the responses are in agreement, False otherwise.
"""
if not responses or len(responses) < 2:
return False
first_response = responses[0]
for response in responses[1:]:
if response != first_response:
return False
return True