WIP: Gemini Code progress on #1006
Automated salvage commit — agent session ended (exit 124). Work in progress, may need continuation.
This commit is contained in:
@@ -341,6 +341,17 @@ class Settings(BaseSettings):
|
||||
vassal_stuck_threshold_minutes: int = 120 # minutes before agent issue is "stuck"
|
||||
vassal_idle_threshold_minutes: int = 30 # minutes before agent is "idle"
|
||||
|
||||
# ── Consensus (Multi-Model Voting) ───────────────────────────────
|
||||
# When enabled, critical decisions are verified by multiple models.
|
||||
consensus_enabled: bool = False
|
||||
# Models to use for consensus voting.
|
||||
consensus_models: list[str] = [
|
||||
"qwen3:14b",
|
||||
"llama3.1:8b-instruct",
|
||||
]
|
||||
# Model to use for escalation when consensus fails.
|
||||
consensus_escalation_model: str = "grok-3-fast"
|
||||
|
||||
# ── Paperclip AI — orchestration bridge ────────────────────────────
|
||||
# URL where the Paperclip server listens.
|
||||
# For VPS deployment behind nginx, use the public domain.
|
||||
|
||||
@@ -24,6 +24,7 @@ from agno.models.ollama import Ollama
|
||||
from config import check_ollama_model_available, settings
|
||||
from timmy.prompts import get_system_prompt
|
||||
from timmy.tools import create_full_toolkit
|
||||
from timmy.consensus import ConsensusEngine
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from timmy.backends import ClaudeBackend, GrokBackend
|
||||
@@ -31,7 +32,7 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Union type for callers that want to hint the return type.
|
||||
TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend"]
|
||||
TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend", "ConsensusEngine"]
|
||||
|
||||
# Models known to be too small for reliable tool calling.
|
||||
# These hallucinate tool calls as text, invoke tools randomly,
|
||||
@@ -269,6 +270,47 @@ def _create_ollama_agent(
|
||||
return agent
|
||||
|
||||
|
||||
def create_consensus_timmy(
|
||||
db_file: str = "timmy.db",
|
||||
*,
|
||||
skip_mcp: bool = False,
|
||||
session_id: str = "unknown",
|
||||
) -> ConsensusEngine:
|
||||
"""Instantiate the consensus engine with multiple Ollama agents."""
|
||||
models = []
|
||||
for model_name in settings.consensus_models:
|
||||
model_name, is_fallback = _resolve_model_with_fallback(
|
||||
requested_model=model_name,
|
||||
require_vision=False,
|
||||
auto_pull=True,
|
||||
)
|
||||
|
||||
if not _check_model_available(model_name):
|
||||
logger.error(
|
||||
"Ollama unreachable and no local models available for consensus. "
|
||||
"Start Ollama with 'ollama serve' or use --backend claude explicitly."
|
||||
)
|
||||
continue
|
||||
|
||||
if is_fallback:
|
||||
logger.info("Using fallback model %s (requested was unavailable)", model_name)
|
||||
|
||||
use_tools = _model_supports_tools(model_name)
|
||||
tools_list = _build_tools_list(use_tools, skip_mcp, model_name)
|
||||
full_prompt = _build_prompt(use_tools, session_id)
|
||||
|
||||
agent = _create_ollama_agent(
|
||||
db_file=db_file,
|
||||
model_name=model_name,
|
||||
tools_list=tools_list,
|
||||
full_prompt=full_prompt,
|
||||
use_tools=use_tools,
|
||||
)
|
||||
models.append(agent)
|
||||
|
||||
return ConsensusEngine(models)
|
||||
|
||||
|
||||
def create_timmy(
|
||||
db_file: str = "timmy.db",
|
||||
backend: str | None = None,
|
||||
@@ -289,6 +331,13 @@ def create_timmy(
|
||||
Returns an Agno Agent or backend-specific agent — all expose
|
||||
print_response(message, stream).
|
||||
"""
|
||||
if settings.consensus_enabled:
|
||||
return create_consensus_timmy(
|
||||
db_file=db_file,
|
||||
skip_mcp=skip_mcp,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
resolved = _resolve_backend(backend)
|
||||
|
||||
if resolved == "claude":
|
||||
|
||||
93
src/timmy/consensus.py
Normal file
93
src/timmy/consensus.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Consensus engine for multi-model decision making."""
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from config import settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from timmy.agent import TimmyAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConsensusEngine:
|
||||
"""Queries multiple models and compares their outputs for consensus."""
|
||||
|
||||
def __init__(self, models: List["TimmyAgent"]):
|
||||
"""
|
||||
Args:
|
||||
models: A list of agent instances to be used for consensus.
|
||||
"""
|
||||
self.models = models
|
||||
|
||||
def run(self, message: str) -> str:
|
||||
"""
|
||||
Runs the consensus check by querying all models and comparing their outputs.
|
||||
|
||||
Args:
|
||||
message: The input message to be sent to the models.
|
||||
|
||||
Returns:
|
||||
The response from the primary model if there is a consensus,
|
||||
otherwise the response from the escalation model.
|
||||
"""
|
||||
if not self.models:
|
||||
return "No models configured for consensus."
|
||||
|
||||
responses = []
|
||||
for model in self.models:
|
||||
try:
|
||||
response = model.run(message, stream=False)
|
||||
responses.append(response.content)
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying model {model}: {e}")
|
||||
responses.append(None)
|
||||
|
||||
if self._check_consensus(responses):
|
||||
logger.info("Consensus reached.")
|
||||
# TODO: Add metric tracking for consensus success
|
||||
return responses[0]
|
||||
else:
|
||||
logger.warning("Consensus not reached. Escalating to a higher-tier model.")
|
||||
# TODO: Add metric tracking for consensus failure
|
||||
return self._escalate(message)
|
||||
|
||||
def _escalate(self, message: str) -> str:
|
||||
"""
|
||||
Escalates to a higher-tier model to get a final response.
|
||||
|
||||
Args:
|
||||
message: The input message to be sent to the escalation model.
|
||||
|
||||
Returns:
|
||||
The response from the escalation model.
|
||||
"""
|
||||
from timmy.agent import create_timmy
|
||||
|
||||
try:
|
||||
escalation_model = create_timmy(backend=settings.consensus_escalation_model)
|
||||
response = escalation_model.run(message, stream=False)
|
||||
return response.content
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying escalation model: {e}")
|
||||
return "Error during escalation."
|
||||
|
||||
def _check_consensus(self, responses: List[str]) -> bool:
|
||||
"""
|
||||
Checks if the responses from the models are in agreement.
|
||||
|
||||
Args:
|
||||
responses: A list of responses from the models.
|
||||
|
||||
Returns:
|
||||
True if the responses are in agreement, False otherwise.
|
||||
"""
|
||||
if not responses or len(responses) < 2:
|
||||
return False
|
||||
|
||||
first_response = responses[0]
|
||||
for response in responses[1:]:
|
||||
if response != first_response:
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user