timmy-config/tests/test_sovereignty_enforcement.py

"""Sovereignty enforcement tests.

These tests implement the acceptance criteria from issue #94:
  [p0] Cut cloud inheritance from active harness config and cron

Every test in this file catches a specific way that cloud
dependency can creep back into the active config. If any test
fails, Timmy is phoning home.

These tests are designed to be run in CI and to BLOCK any commit
that reintroduces cloud defaults.
"""

from __future__ import annotations

import json
from pathlib import Path

import yaml
import pytest

REPO_ROOT = Path(__file__).parent.parent
CONFIG_PATH = REPO_ROOT / "config.yaml"
CRON_PATH = REPO_ROOT / "cron" / "jobs.json"

# Cloud URLs that should never appear in default/fallback paths
CLOUD_URLS = [
    "generativelanguage.googleapis.com",
    "api.openai.com",
    "chatgpt.com",
    "api.anthropic.com",
    "openrouter.ai",
]

CLOUD_MODELS = [
    "gpt-4",
    "gpt-5",
    "gpt-4o",
    "claude",
    "gemini",
]


@pytest.fixture
def config():
    return yaml.safe_load(CONFIG_PATH.read_text())


@pytest.fixture
def cron_jobs():
    data = json.loads(CRON_PATH.read_text())
    return data.get("jobs", data) if isinstance(data, dict) else data


# ── Config defaults ──────────────────────────────────────────────────

class TestDefaultModelIsLocal:
    """The default model must point to localhost."""

    def test_default_model_is_not_cloud(self, config):
        """model.default should be a local model identifier."""
        model = config["model"]["default"]
        for cloud in CLOUD_MODELS:
            assert cloud not in model.lower(), \
                f"Default model '{model}' looks like a cloud model"

    def test_default_base_url_is_localhost(self, config):
        """model.base_url should point to localhost."""
        base_url = config["model"]["base_url"]
        assert "localhost" in base_url or "127.0.0.1" in base_url, \
            f"Default base_url '{base_url}' is not local"

    def test_default_provider_is_local(self, config):
        """model.provider should be 'custom' or 'ollama'."""
        provider = config["model"]["provider"]
        assert provider in ("custom", "ollama", "local"), \
            f"Default provider '{provider}' may route to cloud"


class TestFallbackIsLocal:
    """The fallback model must also be local — this is the #94 fix."""

    def test_fallback_base_url_is_localhost(self, config):
        """fallback_model.base_url must point to localhost."""
        fb = config.get("fallback_model", {})
        base_url = fb.get("base_url", "")
        if base_url:
            assert "localhost" in base_url or "127.0.0.1" in base_url, \
                f"Fallback base_url '{base_url}' is not local — cloud leak!"

    def test_fallback_has_no_cloud_url(self, config):
        """fallback_model must not contain any cloud API URLs."""
        fb = config.get("fallback_model", {})
        base_url = fb.get("base_url", "")
        for cloud_url in CLOUD_URLS:
            assert cloud_url not in base_url, \
                f"Fallback model routes to cloud: {cloud_url}"

    def test_fallback_model_name_is_local(self, config):
        """fallback_model.model should not be a cloud model name."""
        fb = config.get("fallback_model", {})
        model = fb.get("model", "")
        for cloud in CLOUD_MODELS:
            assert cloud not in model.lower(), \
                f"Fallback model name '{model}' looks like cloud"


# ── Cron jobs ────────────────────────────────────────────────────────

class TestCronSovereignty:
    """Enabled cron jobs must never inherit cloud defaults."""

    def test_enabled_crons_have_explicit_model(self, cron_jobs):
        """Every enabled cron job must have a non-null model field.

        When model is null, the job inherits from config.yaml's default.
        Even if the default is local today, a future edit could change it.
        Explicit is always safer than implicit.
        """
        for job in cron_jobs:
            if not isinstance(job, dict):
                continue
            if not job.get("enabled", False):
                continue

            model = job.get("model")
            name = job.get("name", job.get("id", "?"))
            assert model is not None and model != "", \
                f"Enabled cron job '{name}' has null model — will inherit default"

    def test_enabled_crons_have_explicit_provider(self, cron_jobs):
        """Every enabled cron job must have a non-null provider field."""
        for job in cron_jobs:
            if not isinstance(job, dict):
                continue
            if not job.get("enabled", False):
                continue

            provider = job.get("provider")
            name = job.get("name", job.get("id", "?"))
            assert provider is not None and provider != "", \
                f"Enabled cron job '{name}' has null provider — will inherit default"

    def test_no_enabled_cron_uses_cloud_url(self, cron_jobs):
        """No enabled cron job should have a cloud base_url."""
        for job in cron_jobs:
            if not isinstance(job, dict):
                continue
            if not job.get("enabled", False):
                continue

            base_url = job.get("base_url", "")
            name = job.get("name", job.get("id", "?"))
            for cloud_url in CLOUD_URLS:
                assert cloud_url not in (base_url or ""), \
                    f"Cron '{name}' routes to cloud: {cloud_url}"


# ── Custom providers ─────────────────────────────────────────────────

class TestCustomProviders:
    """Cloud providers can exist but must not be the default path."""

    def test_local_provider_exists(self, config):
        """At least one custom provider must be local."""
        providers = config.get("custom_providers", [])
        has_local = any(
            "localhost" in p.get("base_url", "") or "127.0.0.1" in p.get("base_url", "")
            for p in providers
        )
        assert has_local, "No local custom provider defined"

    def test_first_provider_is_local(self, config):
        """The first custom_provider should be the local one.

        Hermes resolves 'custom' provider by scanning the list in order.
        If a cloud provider is listed first, it becomes the implicit default.
        """
        providers = config.get("custom_providers", [])
        if providers:
            first = providers[0]
            base_url = first.get("base_url", "")
            assert "localhost" in base_url or "127.0.0.1" in base_url, \
                f"First custom_provider '{first.get('name')}' is not local"


# ── TTS/STT ──────────────────────────────────────────────────────────

class TestVoiceSovereignty:
    """Voice services should prefer local providers."""

    def test_tts_default_is_local(self, config):
        """TTS provider should be local (edge or neutts)."""
        tts_provider = config.get("tts", {}).get("provider", "")
        assert tts_provider in ("edge", "neutts", "local"), \
            f"TTS provider '{tts_provider}' may use cloud"

    def test_stt_default_is_local(self, config):
        """STT provider should be local."""
        stt_provider = config.get("stt", {}).get("provider", "")
        assert stt_provider in ("local", "whisper", ""), \
            f"STT provider '{stt_provider}' may use cloud"


# ── Anthropic Ban ────────────────────────────────────────────────────

class TestAnthropicBan:
    """Anthropic is permanently banned from this system.

    Not deprecated. Not discouraged. Banned. Any reference to Anthropic
    as a provider, model, or API endpoint in active wizard configs,
    playbooks, or fallback chains is a hard failure.
    """

    BANNED_PATTERNS = [
        "provider: anthropic",
        "provider: \"anthropic\"",
        "anthropic/claude",
        "claude-opus",
        "claude-sonnet",
        "claude-haiku",
        "api.anthropic.com",
    ]

    ACTIVE_CONFIG_DIRS = [
        "wizards",
        "playbooks",
    ]

    ACTIVE_CONFIG_FILES = [
        "fallback-portfolios.yaml",
        "config.yaml",
    ]

    def _scan_active_configs(self):
        """Collect all active config files for scanning."""
        files = []
        for dir_name in self.ACTIVE_CONFIG_DIRS:
            dir_path = REPO_ROOT / dir_name
            if dir_path.exists():
                for f in dir_path.rglob("*.yaml"):
                    files.append(f)
                for f in dir_path.rglob("*.yml"):
                    files.append(f)
                for f in dir_path.rglob("*.json"):
                    files.append(f)
        for fname in self.ACTIVE_CONFIG_FILES:
            fpath = REPO_ROOT / fname
            if fpath.exists():
                files.append(fpath)
        return files

    def test_no_anthropic_in_wizard_configs(self):
        """No wizard config may reference Anthropic as a provider or model."""
        wizard_dir = REPO_ROOT / "wizards"
        if not wizard_dir.exists():
            pytest.skip("No wizards directory")
        for config_file in wizard_dir.rglob("*.yaml"):
            content = config_file.read_text().lower()
            for pattern in self.BANNED_PATTERNS:
                assert pattern.lower() not in content, \
                    f"BANNED: {config_file.name} contains \"{pattern}\". Anthropic is permanently banned."

    def test_no_anthropic_in_playbooks(self):
        """No playbook may reference Anthropic models."""
        playbook_dir = REPO_ROOT / "playbooks"
        if not playbook_dir.exists():
            pytest.skip("No playbooks directory")
        for pb_file in playbook_dir.rglob("*.yaml"):
            content = pb_file.read_text().lower()
            for pattern in self.BANNED_PATTERNS:
                assert pattern.lower() not in content, \
                    f"BANNED: {pb_file.name} contains \"{pattern}\". Anthropic is permanently banned."

    def test_no_anthropic_in_fallback_chain(self):
        """Fallback portfolios must not include Anthropic."""
        fb_path = REPO_ROOT / "fallback-portfolios.yaml"
        if not fb_path.exists():
            pytest.skip("No fallback-portfolios.yaml")
        content = fb_path.read_text().lower()
        for pattern in self.BANNED_PATTERNS:
            assert pattern.lower() not in content, \
                f"BANNED: fallback-portfolios.yaml contains \"{pattern}\". Anthropic is permanently banned."

    def test_no_anthropic_api_key_in_bootstrap(self):
        """Wizard bootstrap must not require ANTHROPIC_API_KEY."""
        bootstrap_path = REPO_ROOT / "hermes-sovereign" / "wizard-bootstrap" / "wizard_bootstrap.py"
        if not bootstrap_path.exists():
            pytest.skip("No wizard_bootstrap.py")
        content = bootstrap_path.read_text()
        assert "ANTHROPIC_API_KEY" not in content, \
            "BANNED: wizard_bootstrap.py still checks for ANTHROPIC_API_KEY"
        assert "ANTHROPIC_TOKEN" not in content, \
            "BANNED: wizard_bootstrap.py still checks for ANTHROPIC_TOKEN"
        assert "\"anthropic\"" not in content.lower(), \
            "BANNED: wizard_bootstrap.py still lists anthropic as a dependency"