Merge branch 'main' into perplexity/fleet-behaviour-hardening

2026-04-10 09:37:42 +00:00
parent 41044d36ae fa9e83ac95
commit b172d23b98
9 changed files with 1113 additions and 77 deletions
--- a/scripts/architecture_linter.py
+++ b/scripts/architecture_linter.py
@@ -9,7 +9,7 @@ import re
 SOVEREIGN_RULES = [
    (r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."),
    (r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."),
-    (r"api_key: ['"][^'"\s]{10,}['"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
+    (r"api_key:\s*['\"][A-Za-z0-9_\-]{16,}['\"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
 ]

 def lint_file(path):
--- a/scripts/architecture_linter_v2.py
+++ b/scripts/architecture_linter_v2.py
@@ -5,122 +5,233 @@ Part of the Gemini Sovereign Governance System.

 Enforces architectural boundaries, security, and documentation standards
 across the Timmy Foundation fleet.
+
+Refs: #437 — repo-aware, test-backed, CI-enforced.
 """

+import argparse
 import os
 import re
 import sys
-import argparse
 from pathlib import Path

 # --- CONFIGURATION ---
+
 SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]
-IP_REGEX = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
-API_KEY_REGEX = r'(?:api_key|secret|token|password|auth_token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']'
+
+# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
+IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
+           r'(?:\d{1,3}\.){3}\d{1,3}\b'
+
+# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
+API_KEY_PATTERNS = [
+    r'sk-[A-Za-z0-9]{20,}',               # OpenAI-style
+    r'sk-ant-[A-Za-z0-9\-]{20,}',          # Anthropic
+    r'AKIA[A-Z0-9]{16}',                    # AWS access key
+    r'ghp_[A-Za-z0-9]{36}',                # GitHub PAT
+    r'glpat-[A-Za-z0-9\-]{20,}',           # GitLab PAT
+    r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
+]
+
+# Sovereignty rules (carried from v1)
+SOVEREIGN_RULES = [
+    (r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
+    (r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
+    (r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
+]
+
+# File extensions to scan
+SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
+SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}
+
+
+class LinterResult:
+    """Structured result container for programmatic access."""
+
+    def __init__(self, repo_path: str, repo_name: str):
+        self.repo_path = repo_path
+        self.repo_name = repo_name
+        self.errors: list[str] = []
+        self.warnings: list[str] = []
+
+    @property
+    def passed(self) -> bool:
+        return len(self.errors) == 0
+
+    @property
+    def violation_count(self) -> int:
+        return len(self.errors)
+
+    def summary(self) -> str:
+        lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
+        for w in self.warnings:
+            lines.append(f"  [W] {w}")
+        for e in self.errors:
+            lines.append(f"  [E] {e}")
+        status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
+        lines.append(f"\nResult: {status}")
+        return '\n'.join(lines)
+

 class Linter:
    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path).resolve()
+        if not self.repo_path.is_dir():
+            raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
        self.repo_name = self.repo_path.name
-        self.errors = []
+        self.result = LinterResult(str(self.repo_path), self.repo_name)

-    def log_error(self, message: str, file: str = None, line: int = None):
-        loc = f"{file}:{line}" if file and line else (file if file else "General")
-        self.errors.append(f"[{loc}] {message}")
+    # --- helpers ---
+
+    def _scan_files(self, extensions=None):
+        """Yield (Path, content) for files matching *extensions*."""
+        exts = extensions or SCAN_EXTENSIONS
+        for root, dirs, files in os.walk(self.repo_path):
+            dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
+            for fname in files:
+                if Path(fname).suffix in exts:
+                    if fname == '.env.example':
+                        continue
+                    fpath = Path(root) / fname
+                    try:
+                        content = fpath.read_text(errors='ignore')
+                    except Exception:
+                        continue
+                    yield fpath, content
+
+    def _line_no(self, content: str, offset: int) -> int:
+        return content.count('\n', 0, offset) + 1
+
+    # --- checks ---

    def check_sidecar_boundary(self):
-        """Rule 1: No sovereign code in hermes-agent (sidecar boundary)"""
-        if self.repo_name == "hermes-agent":
-            for root, _, files in os.walk(self.repo_path):
-                if "node_modules" in root or ".git" in root:
-                    continue
-                for file in files:
-                    if file.endswith((".py", ".ts", ".js", ".tsx")):
-                        path = Path(root) / file
-                        content = path.read_text(errors="ignore")
-                        for kw in SOVEREIGN_KEYWORDS:
-                            if kw in content.lower():
-                                # Exception: imports or comments might be okay, but we're strict for now
-                                self.log_error(f"Sovereign keyword '{kw}' found in hermes-agent. Violates sidecar boundary.", str(path.relative_to(self.repo_path)))
+        """No sovereign code in hermes-agent (sidecar boundary)."""
+        if self.repo_name != 'hermes-agent':
+            return
+        for fpath, content in self._scan_files():
+            for kw in SOVEREIGN_KEYWORDS:
+                if kw in content.lower():
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(
+                        f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
+                    )

    def check_hardcoded_ips(self):
-        """Rule 2: No hardcoded IPs (use domain names)"""
-        for root, _, files in os.walk(self.repo_path):
-            if "node_modules" in root or ".git" in root:
-                continue
-            for file in files:
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json")):
-                    path = Path(root) / file
-                    content = path.read_text(errors="ignore")
-                    matches = re.finditer(IP_REGEX, content)
-                    for match in matches:
-                        ip = match.group()
-                        if ip in ["127.0.0.1", "0.0.0.0"]:
-                            continue
-                        line_no = content.count('\n', 0, match.start()) + 1
-                        self.log_error(f"Hardcoded IP address '{ip}' found. Use domain names or environment variables.", str(path.relative_to(self.repo_path)), line_no)
+        """No hardcoded public IPs — use DNS or env vars."""
+        for fpath, content in self._scan_files():
+            for m in re.finditer(IP_REGEX, content):
+                ip = m.group()
+                # skip private ranges already handled by lookahead, and 0.0.0.0
+                if ip.startswith('0.'):
+                    continue
+                line = self._line_no(content, m.start())
+                rel = str(fpath.relative_to(self.repo_path))
+                self.result.errors.append(
+                    f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
+                )

    def check_api_keys(self):
-        """Rule 3: No cloud API keys committed to repos"""
-        for root, _, files in os.walk(self.repo_path):
-            if "node_modules" in root or ".git" in root:
-                continue
-            for file in files:
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json", ".env")):
-                    if file == ".env.example":
-                        continue
-                    path = Path(root) / file
-                    content = path.read_text(errors="ignore")
-                    matches = re.finditer(API_KEY_REGEX, content, re.IGNORECASE)
-                    for match in matches:
-                        line_no = content.count('\n', 0, match.start()) + 1
-                        self.log_error("Potential API key or secret found in code.", str(path.relative_to(self.repo_path)), line_no)
+        """No cloud API keys / secrets committed."""
+        for fpath, content in self._scan_files():
+            for pattern in API_KEY_PATTERNS:
+                for m in re.finditer(pattern, content, re.IGNORECASE):
+                    line = self._line_no(content, m.start())
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(
+                        f"Potential secret / API key detected. [{rel}:{line}]"
+                    )
+
+    def check_sovereignty_rules(self):
+        """V1 sovereignty rules: no direct cloud API endpoints or providers."""
+        for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
+            for pattern, msg in SOVEREIGN_RULES:
+                for m in re.finditer(pattern, content):
+                    line = self._line_no(content, m.start())
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(f"{msg} [{rel}:{line}]")

    def check_soul_canonical(self):
-        """Rule 4: SOUL.md exists and is canonical in exactly one location"""
-        soul_path = self.repo_path / "SOUL.md"
-        if self.repo_name == "timmy-config":
+        """SOUL.md must exist exactly in timmy-config root."""
+        soul_path = self.repo_path / 'SOUL.md'
+        if self.repo_name == 'timmy-config':
            if not soul_path.exists():
-                self.log_error("SOUL.md is missing from the canonical location (timmy-config root).")
+                self.result.errors.append(
+                    'SOUL.md missing from canonical location (timmy-config root).'
+                )
        else:
            if soul_path.exists():
-                self.log_error("SOUL.md found in non-canonical repo. It should only live in timmy-config.")
+                self.result.errors.append(
+                    'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
+                )

    def check_readme(self):
-        """Rule 5: Every repo has a README with current truth"""
-        readme_path = self.repo_path / "README.md"
-        if not readme_path.exists():
-            self.log_error("README.md is missing.")
+        """Every repo must have a substantive README."""
+        readme = self.repo_path / 'README.md'
+        if not readme.exists():
+            self.result.errors.append('README.md is missing.')
        else:
-            content = readme_path.read_text(errors="ignore")
+            content = readme.read_text(errors='ignore')
            if len(content.strip()) < 50:
-                self.log_error("README.md is too short or empty. Provide current truth about the repo.")
+                self.result.warnings.append(
+                    'README.md is very short (<50 chars). Provide current truth about the repo.'
+                )

-    def run(self):
-        print(f"--- Gemini Linter: Auditing {self.repo_name} ---")
+    # --- runner ---
+
+    def run(self) -> LinterResult:
+        """Execute all checks and return the result."""
        self.check_sidecar_boundary()
        self.check_hardcoded_ips()
        self.check_api_keys()
+        self.check_sovereignty_rules()
        self.check_soul_canonical()
        self.check_readme()
+        return self.result

-        if self.errors:
-            print(f"\n[FAILURE] Found {len(self.errors)} architectural violations:")
-            for err in self.errors:
-                print(f"  - {err}")
-            return False
-        else:
-            print("\n[SUCCESS] Architecture is sound. Sovereignty maintained.")
-            return True

 def main():
-    parser = argparse.ArgumentParser(description="Gemini Architecture Linter v2")
-    parser.add_argument("repo_path", nargs="?", default=".", help="Path to the repository to lint")
+    parser = argparse.ArgumentParser(
+        description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
+    )
+    parser.add_argument(
+        'repo_path', nargs='?', default='.',
+        help='Path to the repository to lint (default: cwd).',
+    )
+    parser.add_argument(
+        '--repo', dest='repo_flag', default=None,
+        help='Explicit repo path (alias for positional arg).',
+    )
+    parser.add_argument(
+        '--json', dest='json_output', action='store_true',
+        help='Emit machine-readable JSON instead of human text.',
+    )
    args = parser.parse_args()

-    linter = Linter(args.repo_path)
-    success = linter.run()
-    sys.exit(0 if success else 1)
+    path = args.repo_flag if args.repo_flag else args.repo_path

-if __name__ == "__main__":
+    try:
+        linter = Linter(path)
+    except FileNotFoundError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(2)
+
+    result = linter.run()
+
+    if args.json_output:
+        import json as _json
+        out = {
+            'repo': result.repo_name,
+            'passed': result.passed,
+            'violation_count': result.violation_count,
+            'errors': result.errors,
+            'warnings': result.warnings,
+        }
+        print(_json.dumps(out, indent=2))
+    else:
+        print(result.summary())
+
+    sys.exit(0 if result.passed else 1)
+
+
+if __name__ == '__main__':
    main()
--- a/scripts/test_harness.sh
+++ b/scripts/test_harness.sh
@@ -0,0 +1,195 @@
+#!/usr/bin/env bash
+# test_harness.sh — Common CLI safety/test harness for the scripts/ suite
+# Usage: ./scripts/test_harness.sh [--verbose] [--ci] [directory]
+#
+# Discovers .sh, .py, and .yaml files in the target directory and validates them:
+#   - .sh  : runs shellcheck (or SKIPS if unavailable)
+#   - .py  : runs python3 -m py_compile
+#   - .yaml: validates with python3 yaml.safe_load
+#
+# Exit codes: 0 = all pass, 1 = any fail
+
+set -euo pipefail
+
+# --- Defaults ---
+VERBOSE=0
+CI_MODE=0
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TARGET_DIR="${SCRIPT_DIR}"
+
+# --- Colors (disabled in CI) ---
+RED=""
+GREEN=""
+YELLOW=""
+CYAN=""
+RESET=""
+if [[ -t 1 && "${CI:-}" != "true" ]]; then
+    RED=$'\033[0;31m'
+    GREEN=$'\033[0;32m'
+    YELLOW=$'\033[0;33m'
+    CYAN=$'\033[0;36m'
+    RESET=$'\033[0m'
+fi
+
+# --- Argument parsing ---
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --verbose|-v) VERBOSE=1; shift ;;
+        --ci)         CI_MODE=1; shift ;;
+        -*)           echo "Unknown option: $1" >&2; exit 2 ;;
+        *)            TARGET_DIR="$1"; shift ;;
+    esac
+done
+
+# --- Counters ---
+PASS=0
+FAIL=0
+SKIP=0
+TOTAL=0
+
+# --- Helpers ---
+log_verbose() {
+    if [[ "${VERBOSE}" -eq 1 ]]; then
+        echo "  ${CYAN}[DEBUG]${RESET} $*"
+    fi
+}
+
+record_pass() {
+    ((PASS++))
+    ((TOTAL++))
+    echo "${GREEN}PASS${RESET}  $1"
+}
+
+record_fail() {
+    ((FAIL++))
+    ((TOTAL++))
+    echo "${RED}FAIL${RESET}  $1"
+    if [[ -n "${2:-}" ]]; then
+        echo "        ${2}"
+    fi
+}
+
+record_skip() {
+    ((SKIP++))
+    ((TOTAL++))
+    echo "${YELLOW}SKIP${RESET}  $1 — $2"
+}
+
+# --- Checkers ---
+check_shell_file() {
+    local file="$1"
+    local rel="${file#${TARGET_DIR}/}"
+    if command -v shellcheck &>/dev/null; then
+        log_verbose "Running shellcheck on ${rel}"
+        local output
+        if output=$(shellcheck -x -S warning "${file}" 2>&1); then
+            record_pass "${rel}"
+        else
+            record_fail "${rel}" "${output}"
+        fi
+    else
+        record_skip "${rel}" "shellcheck not installed"
+    fi
+}
+
+check_python_file() {
+    local file="$1"
+    local rel="${file#${TARGET_DIR}/}"
+    log_verbose "Running py_compile on ${rel}"
+    local output
+    if output=$(python3 -m py_compile "${file}" 2>&1); then
+        record_pass "${rel}"
+    else
+        record_fail "${rel}" "${output}"
+    fi
+}
+
+check_yaml_file() {
+    local file="$1"
+    local rel="${file#${TARGET_DIR}/}"
+    log_verbose "Validating YAML: ${rel}"
+    local output
+    if output=$(python3 -c "import yaml; yaml.safe_load(open('${file}'))" 2>&1); then
+        record_pass "${rel}"
+    else
+        record_fail "${rel}" "${output}"
+    fi
+}
+
+# --- Main ---
+echo ""
+echo "=== scripts/ test harness ==="
+echo "Target: ${TARGET_DIR}"
+echo ""
+
+if [[ ! -d "${TARGET_DIR}" ]]; then
+    echo "Error: target directory '${TARGET_DIR}' not found" >&2
+    exit 1
+fi
+
+# Check python3 availability
+if ! command -v python3 &>/dev/null; then
+    echo "${RED}Error: python3 is required but not found${RESET}" >&2
+    exit 1
+fi
+
+# Check PyYAML availability
+if ! python3 -c "import yaml" 2>/dev/null; then
+    echo "${YELLOW}Warning: PyYAML not installed — YAML checks will be skipped${RESET}" >&2
+    YAML_AVAILABLE=0
+else
+    YAML_AVAILABLE=1
+fi
+
+# Discover and check .sh files
+sh_files=()
+while IFS= read -r -d '' f; do
+    sh_files+=("$f")
+done < <(find "${TARGET_DIR}" -maxdepth 1 -name "*.sh" ! -name "test_harness.sh" ! -name "test_runner.sh" -print0 | sort -z)
+
+for f in "${sh_files[@]:-}"; do
+    [[ -n "$f" ]] && check_shell_file "$f"
+done
+
+# Discover and check .py files
+py_files=()
+while IFS= read -r -d '' f; do
+    py_files+=("$f")
+done < <(find "${TARGET_DIR}" -maxdepth 1 -name "*.py" -print0 | sort -z)
+
+for f in "${py_files[@]:-}"; do
+    [[ -n "$f" ]] && check_python_file "$f"
+done
+
+# Discover and check .yaml files in target dir
+yaml_files=()
+while IFS= read -r -d '' f; do
+    yaml_files+=("$f")
+done < <(find "${TARGET_DIR}" -maxdepth 1 -name "*.yaml" -print0 | sort -z)
+
+if [[ "${YAML_AVAILABLE}" -eq 1 ]]; then
+    for f in "${yaml_files[@]:-}"; do
+        [[ -n "$f" ]] && check_yaml_file "$f"
+    done
+else
+    for f in "${yaml_files[@]:-}"; do
+        [[ -n "$f" ]] && record_skip "${f#${TARGET_DIR}/}" "PyYAML not installed"
+    done
+fi
+
+# --- Summary ---
+echo ""
+echo "=== Results ==="
+echo "  ${GREEN}PASS${RESET}: ${PASS}"
+echo "  ${RED}FAIL${RESET}: ${FAIL}"
+echo "  ${YELLOW}SKIP${RESET}: ${SKIP}"
+echo "  Total: ${TOTAL}"
+echo ""
+
+if [[ "${FAIL}" -gt 0 ]]; then
+    echo "${RED}FAILED${RESET} — ${FAIL} file(s) did not pass validation."
+    exit 1
+else
+    echo "${GREEN}ALL CLEAR${RESET} — all checked files passed."
+    exit 0
+fi
--- a/scripts/test_runner.sh
+++ b/scripts/test_runner.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# test_runner.sh — Convenience wrapper for test_harness.sh
+# Runs the test harness with sensible defaults for local development.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+exec "${SCRIPT_DIR}/test_harness.sh" --verbose "$@"