tests/timmy/test_prompts.py

from timmy.prompts import STATUS_PROMPT, SYSTEM_PROMPT, get_system_prompt


def test_system_prompt_not_empty():
    assert SYSTEM_PROMPT.strip()


def test_system_prompt_no_persona_identity():
    """System prompt should NOT contain persona identity references."""
    prompt = SYSTEM_PROMPT.lower()
    assert "sir, affirmative" not in prompt
    assert "christian" not in prompt
    assert "bitcoin" not in prompt


def test_system_prompt_references_local():
    assert "local" in SYSTEM_PROMPT.lower()


def test_system_prompt_is_multiline():
    assert "\n" in SYSTEM_PROMPT


def test_status_prompt_not_empty():
    assert STATUS_PROMPT.strip()


def test_status_prompt_no_persona():
    """Status prompt should not reference a persona."""
    assert "Timmy" not in STATUS_PROMPT


def test_prompts_are_distinct():
    assert SYSTEM_PROMPT != STATUS_PROMPT


def test_get_system_prompt_injects_model_name():
    """System prompt should inject actual model name from config."""
    prompt = get_system_prompt(tools_enabled=False)
    # Should contain the model name from settings, not the placeholder
    assert "{model_name}" not in prompt
    assert "llama3.1" in prompt or "qwen" in prompt


def test_full_prompt_brevity_first():
    """Full prompt should front-load brevity instructions before other content."""
    prompt = get_system_prompt(tools_enabled=True)
    brevity_pos = prompt.find("BREVITY")
    tool_pos = prompt.find("TOOL USAGE")
    memory_pos = prompt.find("MEMORY")
    # Brevity section must appear before tools and memory
    assert brevity_pos != -1, "Full prompt must contain BREVITY section"
    assert brevity_pos < tool_pos, "Brevity must come before tool usage"
    assert brevity_pos < memory_pos, "Brevity must come before memory"


def test_full_prompt_no_markdown_headers():
    """Full prompt should not use markdown headers (## / ###) that teach
    the model to respond in markdown."""
    prompt = get_system_prompt(tools_enabled=True)
    for line in prompt.splitlines():
        stripped = line.strip()
        assert not stripped.startswith("## "), f"Full prompt uses markdown header: {stripped!r}"
        assert not stripped.startswith("### "), (
            f"Full prompt uses markdown sub-header: {stripped!r}"
        )


def test_full_prompt_plain_text_brevity():
    """Full prompt should explicitly instruct plain text output."""
    prompt = get_system_prompt(tools_enabled=True).lower()
    assert "plain text" in prompt


def test_lite_prompt_brevity():
    """Lite prompt should also instruct brevity."""
    prompt = get_system_prompt(tools_enabled=False).lower()
    assert "brief" in prompt
    assert "plain text" in prompt or "not markdown" in prompt
feat: code quality audit + autoresearch integration + infra hardening (#150) 2026-03-08 12:50:44 -04:00			`from timmy.prompts import STATUS_PROMPT, SYSTEM_PROMPT, get_system_prompt`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

			`def test_system_prompt_not_empty():`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`assert SYSTEM_PROMPT.strip()`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

Claude/remove persona system f vgt m (#126) * Remove persona system, identity, and all Timmy references Strip the codebase to pure orchestration logic: - Delete TIMMY_IDENTITY.md and memory/self/identity.md - Gut brain/identity.py to no-op stubs (empty returns) - Remove all system prompts reinforcing Timmy's character, faith, sovereignty, sign-off ("Sir, affirmative"), and agent roster - Replace identity-laden prompts with generic local-AI-assistant prompts - Remove "You work for Timmy" from all sub-agent system prompts - Rename PersonaTools → AgentTools, PERSONA_TOOLKITS → AGENT_TOOLKITS - Replace "timmy" agent ID with "orchestrator" across routes, marketplace, tools catalog, and orchestrator class - Strip Timmy references from config comments, templates, telegram bot, chat API, and dashboard UI - Delete tests/brain/test_identity.py entirely - Fix all test assertions that checked for persona identity content 729 tests pass (2 pre-existing failures in test_calm.py unrelated). https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy * Add Taskosaur (PM + AI task execution) to docker-compose Spins up Taskosaur alongside the dashboard on `docker compose up`: - postgres:16-alpine (port 5432, Taskosaur DB) - redis:7-alpine (Bull queue backend) - taskosaur (ports 3000 API / 3001 UI) - dashboard now depends_on taskosaur healthy - TASKOSAUR_API_URL injected into dashboard environment Dashboard can reach Taskosaur at http://taskosaur:3000/api on the internal network. Frontend UI accessible at http://localhost:3001. https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy --------- Co-authored-by: Claude <noreply@anthropic.com> 2026-03-04 12:00:49 -05:00			`def test_system_prompt_no_persona_identity():`
			`"""System prompt should NOT contain persona identity references."""`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`prompt = SYSTEM_PROMPT.lower()`
Claude/remove persona system f vgt m (#126) * Remove persona system, identity, and all Timmy references Strip the codebase to pure orchestration logic: - Delete TIMMY_IDENTITY.md and memory/self/identity.md - Gut brain/identity.py to no-op stubs (empty returns) - Remove all system prompts reinforcing Timmy's character, faith, sovereignty, sign-off ("Sir, affirmative"), and agent roster - Replace identity-laden prompts with generic local-AI-assistant prompts - Remove "You work for Timmy" from all sub-agent system prompts - Rename PersonaTools → AgentTools, PERSONA_TOOLKITS → AGENT_TOOLKITS - Replace "timmy" agent ID with "orchestrator" across routes, marketplace, tools catalog, and orchestrator class - Strip Timmy references from config comments, templates, telegram bot, chat API, and dashboard UI - Delete tests/brain/test_identity.py entirely - Fix all test assertions that checked for persona identity content 729 tests pass (2 pre-existing failures in test_calm.py unrelated). https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy * Add Taskosaur (PM + AI task execution) to docker-compose Spins up Taskosaur alongside the dashboard on `docker compose up`: - postgres:16-alpine (port 5432, Taskosaur DB) - redis:7-alpine (Bull queue backend) - taskosaur (ports 3000 API / 3001 UI) - dashboard now depends_on taskosaur healthy - TASKOSAUR_API_URL injected into dashboard environment Dashboard can reach Taskosaur at http://taskosaur:3000/api on the internal network. Frontend UI accessible at http://localhost:3001. https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy --------- Co-authored-by: Claude <noreply@anthropic.com> 2026-03-04 12:00:49 -05:00			`assert "sir, affirmative" not in prompt`
			`assert "christian" not in prompt`
			`assert "bitcoin" not in prompt`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

			`def test_system_prompt_references_local():`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`assert "local" in SYSTEM_PROMPT.lower()`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

			`def test_system_prompt_is_multiline():`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`assert "\n" in SYSTEM_PROMPT`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

			`def test_status_prompt_not_empty():`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`assert STATUS_PROMPT.strip()`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

Claude/remove persona system f vgt m (#126) * Remove persona system, identity, and all Timmy references Strip the codebase to pure orchestration logic: - Delete TIMMY_IDENTITY.md and memory/self/identity.md - Gut brain/identity.py to no-op stubs (empty returns) - Remove all system prompts reinforcing Timmy's character, faith, sovereignty, sign-off ("Sir, affirmative"), and agent roster - Replace identity-laden prompts with generic local-AI-assistant prompts - Remove "You work for Timmy" from all sub-agent system prompts - Rename PersonaTools → AgentTools, PERSONA_TOOLKITS → AGENT_TOOLKITS - Replace "timmy" agent ID with "orchestrator" across routes, marketplace, tools catalog, and orchestrator class - Strip Timmy references from config comments, templates, telegram bot, chat API, and dashboard UI - Delete tests/brain/test_identity.py entirely - Fix all test assertions that checked for persona identity content 729 tests pass (2 pre-existing failures in test_calm.py unrelated). https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy * Add Taskosaur (PM + AI task execution) to docker-compose Spins up Taskosaur alongside the dashboard on `docker compose up`: - postgres:16-alpine (port 5432, Taskosaur DB) - redis:7-alpine (Bull queue backend) - taskosaur (ports 3000 API / 3001 UI) - dashboard now depends_on taskosaur healthy - TASKOSAUR_API_URL injected into dashboard environment Dashboard can reach Taskosaur at http://taskosaur:3000/api on the internal network. Frontend UI accessible at http://localhost:3001. https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy --------- Co-authored-by: Claude <noreply@anthropic.com> 2026-03-04 12:00:49 -05:00			`def test_status_prompt_no_persona():`
			`"""Status prompt should not reference a persona."""`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`assert "Timmy" not in STATUS_PROMPT`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

			`def test_prompts_are_distinct():`
Add pre-commit hook enforcing 30s test suite time limit (#132) 2026-03-05 19:45:38 -05:00			`assert SYSTEM_PROMPT != STATUS_PROMPT`
feat: Timmy fixes and improvements (#72) * test: remove hardcoded sleeps, add pytest-timeout - Replace fixed time.sleep() calls with intelligent polling or WebDriverWait - Add pytest-timeout dependency and --timeout=30 to prevent hangs - Fixes test flakiness and improves test suite speed * feat: add Aider AI tool to Forge's toolkit - Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist - Register tool in Forge's code toolkit - Add functional tests for the Aider tool * config: add opencode.json with local Ollama provider for sovereign AI * feat: Timmy fixes and improvements ## Bug Fixes - Fix read_file path resolution: add ~ expansion, proper relative path handling - Add repo_root to config.py with auto-detection from .git location - Fix hardcoded llama3.2 - now dynamic from settings.ollama_model ## Timmy's Requests - Add communication protocol to AGENTS.md (read context first, explain changes) - Create DECISIONS.md for architectural decision documentation - Add reasoning guidance to system prompts (step-by-step, state uncertainty) - Update tests to reflect correct model name (llama3.1:8b-instruct) ## Testing - All 177 dashboard tests pass - All 32 prompt/tool tests pass --------- Co-authored-by: Alexander Payne <apayne@MM.local> 2026-02-26 23:39:13 -05:00

			`def test_get_system_prompt_injects_model_name():`
			`"""System prompt should inject actual model name from config."""`
			`prompt = get_system_prompt(tools_enabled=False)`
Claude/remove persona system f vgt m (#126) * Remove persona system, identity, and all Timmy references Strip the codebase to pure orchestration logic: - Delete TIMMY_IDENTITY.md and memory/self/identity.md - Gut brain/identity.py to no-op stubs (empty returns) - Remove all system prompts reinforcing Timmy's character, faith, sovereignty, sign-off ("Sir, affirmative"), and agent roster - Replace identity-laden prompts with generic local-AI-assistant prompts - Remove "You work for Timmy" from all sub-agent system prompts - Rename PersonaTools → AgentTools, PERSONA_TOOLKITS → AGENT_TOOLKITS - Replace "timmy" agent ID with "orchestrator" across routes, marketplace, tools catalog, and orchestrator class - Strip Timmy references from config comments, templates, telegram bot, chat API, and dashboard UI - Delete tests/brain/test_identity.py entirely - Fix all test assertions that checked for persona identity content 729 tests pass (2 pre-existing failures in test_calm.py unrelated). https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy * Add Taskosaur (PM + AI task execution) to docker-compose Spins up Taskosaur alongside the dashboard on `docker compose up`: - postgres:16-alpine (port 5432, Taskosaur DB) - redis:7-alpine (Bull queue backend) - taskosaur (ports 3000 API / 3001 UI) - dashboard now depends_on taskosaur healthy - TASKOSAUR_API_URL injected into dashboard environment Dashboard can reach Taskosaur at http://taskosaur:3000/api on the internal network. Frontend UI accessible at http://localhost:3001. https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy --------- Co-authored-by: Claude <noreply@anthropic.com> 2026-03-04 12:00:49 -05:00			`# Should contain the model name from settings, not the placeholder`
			`assert "{model_name}" not in prompt`
			`assert "llama3.1" in prompt or "qwen" in prompt`
fix: brevity tuning — plain text prompts, markdown=False, front-loaded brevity Closes #71: Timmy was responding with elaborate markdown formatting (tables, headers, emoji, bullet lists) for simple questions. Root causes fixed: 1. Agno Agent markdown=True flag explicitly told the model to format responses as markdown. Set to False in both agent.py and agents/base.py. 2. SYSTEM_PROMPT_FULL used ## and ### markdown headers, bold (**), and numbered lists — teaching by example that markdown is expected. Rewritten to plain text with labeled sections. 3. Brevity instructions were buried at the bottom of the full prompt. Moved to immediately after the opening line as 'VOICE AND BREVITY' with explicit override priority. 4. Orchestrator prompt in agents.yaml was silent on response style. Added 'Voice: brief, plain, direct' with concrete examples. The full prompt is now 41 lines shorter (124 → 83). The prompt itself practices the brevity it preaches. SOUL.md alignment: - 'Brevity is a kindness' — now front-loaded in both base and agent prompt - 'I do not fill silence with noise' — explicit in both tiers - 'I speak plainly. I prefer short sentences.' — structural enforcement 4 new tests guard against regression: - test_full_prompt_brevity_first: brevity section before tools/memory - test_full_prompt_no_markdown_headers: no ## or ### in prompt text - test_full_prompt_plain_text_brevity: 'plain text' instruction present - test_lite_prompt_brevity: lite tier also instructs brevity 2026-03-14 17:15:56 -04:00

			`def test_full_prompt_brevity_first():`
			`"""Full prompt should front-load brevity instructions before other content."""`
			`prompt = get_system_prompt(tools_enabled=True)`
			`brevity_pos = prompt.find("BREVITY")`
			`tool_pos = prompt.find("TOOL USAGE")`
			`memory_pos = prompt.find("MEMORY")`
			`# Brevity section must appear before tools and memory`
			`assert brevity_pos != -1, "Full prompt must contain BREVITY section"`
			`assert brevity_pos < tool_pos, "Brevity must come before tool usage"`
			`assert brevity_pos < memory_pos, "Brevity must come before memory"`


			`def test_full_prompt_no_markdown_headers():`
			`"""Full prompt should not use markdown headers (## / ###) that teach`
			`the model to respond in markdown."""`
			`prompt = get_system_prompt(tools_enabled=True)`
			`for line in prompt.splitlines():`
			`stripped = line.strip()`
			`assert not stripped.startswith("## "), f"Full prompt uses markdown header: {stripped!r}"`
			`assert not stripped.startswith("### "), (`
			`f"Full prompt uses markdown sub-header: {stripped!r}"`
			`)`


			`def test_full_prompt_plain_text_brevity():`
			`"""Full prompt should explicitly instruct plain text output."""`
			`prompt = get_system_prompt(tools_enabled=True).lower()`
			`assert "plain text" in prompt`


			`def test_lite_prompt_brevity():`
			`"""Lite prompt should also instruct brevity."""`
			`prompt = get_system_prompt(tools_enabled=False).lower()`
			`assert "brief" in prompt`
			`assert "plain text" in prompt or "not markdown" in prompt`