ci: add pytest workflow for #190

test: define CI configuration acceptance for #190
Merge pull request 'feat: Knowledge store staleness detector (closes #179 )' (#185 ) from feat/179-staleness-check into main
2026-04-15 11:29:23 -04:00 · 2026-04-15 11:26:08 -04:00 · 2026-04-15 06:09:14 +00:00 · 2026-04-15 04:00:46 +00:00 · 2026-04-15 04:00:12 +00:00 · 2026-04-14 19:02:41 +00:00
13 changed files with 585 additions and 298 deletions
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
@@ -0,0 +1,22 @@
 name: Test
 on:
  pull_request:
  push:
    branches: [main]
 jobs:
  pytest:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Install test dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
      - name: Run test suite
        run: |
          make test
--- a/4
+++ b/4
@@ -0,0 +1,4 @@
 .PHONY: test
 test:
 	python3 -m pytest tests/test_ci_config.py scripts/test_*.py -v
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
 pytest>=8,<9
--- a/scripts/knowledge_staleness_check.py
+++ b/scripts/knowledge_staleness_check.py
@@ -0,0 +1,131 @@
 #!/usr/bin/env python3
 """
 Knowledge Store Staleness Detector — Detect stale knowledge entries by comparing source file hashes.
 Usage:
    python3 scripts/knowledge_staleness_check.py --index knowledge/index.json
    python3 scripts/knowledge_staleness_check.py --index knowledge/index.json --json
    python3 scripts/knowledge_staleness_check.py --index knowledge/index.json --fix
 """
 import argparse
 import hashlib
 import json
 import os
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Any, Optional
 def compute_file_hash(filepath: str) -> Optional[str]:
    """Compute SHA-256 hash of a file. Returns None if file doesn't exist."""
    try:
        with open(filepath, "rb") as f:
            return "sha256:" + hashlib.sha256(f.read()).hexdigest()
    except (FileNotFoundError, IsADirectoryError, PermissionError):
        return None
 def check_staleness(index_path: str, repo_root: str = ".") -> List[Dict[str, Any]]:
    """Check all entries in knowledge index for staleness.
    Returns list of entries with staleness info:
    - status: "fresh" | "stale" | "missing_source" | "no_hash"
    - current_hash: computed hash (if source exists)
    - stored_hash: hash from index
    """
    with open(index_path) as f:
        data = json.load(f)
    facts = data.get("facts", [])
    results = []
    for entry in facts:
        source_file = entry.get("source_file")
        stored_hash = entry.get("source_hash")
        if not source_file:
            results.append({**entry, "status": "no_source", "current_hash": None})
            continue
        full_path = os.path.join(repo_root, source_file)
        current_hash = compute_file_hash(full_path)
        if current_hash is None:
            results.append({**entry, "status": "missing_source", "current_hash": None})
        elif not stored_hash:
            results.append({**entry, "status": "no_hash", "current_hash": current_hash})
        elif current_hash != stored_hash:
            results.append({**entry, "status": "stale", "current_hash": current_hash})
        else:
            results.append({**entry, "status": "fresh", "current_hash": current_hash})
    return results
 def fix_hashes(index_path: str, repo_root: str = ".") -> int:
    """Add hashes to entries missing them. Returns count of fixed entries."""
    with open(index_path) as f:
        data = json.load(f)
    fixed = 0
    for entry in data.get("facts", []):
        if entry.get("source_hash"):
            continue
        source_file = entry.get("source_file")
        if not source_file:
            continue
        full_path = os.path.join(repo_root, source_file)
        h = compute_file_hash(full_path)
        if h:
            entry["source_hash"] = h
            fixed += 1
    with open(index_path, "w") as f:
        json.dump(data, f, indent=2)
    return fixed
 def main():
    parser = argparse.ArgumentParser(description="Check knowledge store staleness")
    parser.add_argument("--index", required=True, help="Path to knowledge/index.json")
    parser.add_argument("--repo", default=".", help="Repo root for source file resolution")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--fix", action="store_true", help="Add hashes to entries missing them")
    args = parser.parse_args()
    if args.fix:
        fixed = fix_hashes(args.index, args.repo)
        print(f"Fixed {fixed} entries with missing hashes.")
        return
    results = check_staleness(args.index, args.repo)
    if args.json:
        print(json.dumps(results, indent=2))
    else:
        stale = [r for r in results if r["status"] != "fresh"]
        fresh = [r for r in results if r["status"] == "fresh"]
        print(f"Knowledge Store Staleness Check")
        print(f"  Total entries: {len(results)}")
        print(f"  Fresh: {len(fresh)}")
        print(f"  Stale/Issues: {len(stale)}")
        print()
        if stale:
            print("Issues found:")
            for r in stale:
                status = r["status"]
                fact = r.get("fact", "?")[:60]
                source = r.get("source_file", "?")
                print(f"  [{status}] {source}: {fact}")
        else:
            print("All entries are fresh!")
 if __name__ == "__main__":
    main()
--- a/scripts/session_reader.py
+++ b/scripts/session_reader.py
@@ -1,298 +0,0 @@
 #!/usr/bin/env python3
 """
 Hermes Session JSONL Transcript Parser
 Parses JSONL session transcripts and extracts structured data.
 Part of the compounding-intelligence harvester pipeline.
 """
 import json
 import re
 import sys
 import os
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Any
 from dataclasses import dataclass, asdict
@dataclass
 class SessionSummary:
    """Structured summary of a Hermes session transcript."""
    session_id: str
    model: str
    repo: str
    outcome: str
    message_count: int
    tool_calls: int
    duration_estimate: str
    key_actions: List[str]
    errors_encountered: List[str]
    start_time: Optional[str] = None
    end_time: Optional[str] = None
    total_tokens_estimate: int = 0
    user_messages: int = 0
    assistant_messages: int = 0
    tool_outputs: int = 0
 def parse_jsonl_session(file_path: str) -> SessionSummary:
    """
    Parse a Hermes session JSONL transcript and extract structured data.
    Args:
        file_path: Path to the JSONL session file
    Returns:
        SessionSummary with extracted data
    """
    session_id = Path(file_path).stem
    messages = []
    model = "unknown"
    repo = "unknown"
    tool_calls_count = 0
    key_actions = []
    errors = []
    start_time = None
    end_time = None
    total_tokens = 0
    # Common repo patterns to look for
    repo_patterns = [
        r"(?:the-nexus|compounding-intelligence|timmy-config|hermes-agent)",
        r"(?:forge\.alexanderwhitestone\.com/([^/]+/[^/\\s]+))",
        r"(?:github\.com/([^/]+/[^/\\s]+))",
        r"(?:Timmy_Foundation/([^/\\s]+))",
    ]
    # Read JSONL file
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                line = line.strip()
                if not line:
                    continue
                try:
                    entry = json.loads(line)
                except json.JSONDecodeError as e:
                    errors.append(f"Line {line_num}: Invalid JSON - {e}")
                    continue
                messages.append(entry)
                # Extract model from assistant messages
                if entry.get("role") == "assistant" and entry.get("model"):
                    model = entry["model"]
                # Extract timestamps
                if entry.get("timestamp"):
                    ts = entry["timestamp"]
                    if start_time is None:
                        start_time = ts
                    end_time = ts
                # Count tool calls
                if entry.get("tool_calls"):
                    tool_calls_count += len(entry["tool_calls"])
                    for tc in entry["tool_calls"]:
                        if tc.get("function", {}).get("name"):
                            action = f"{tc['function']['name']}"
                            if action not in key_actions:
                                key_actions.append(action)
                # Estimate tokens from content length
                content = entry.get("content", "")
                if isinstance(content, str):
                    total_tokens += len(content.split())
                elif isinstance(content, list):
                    for item in content:
                        if isinstance(item, dict) and "text" in item:
                            total_tokens += len(item["text"].split())
                # Look for repo mentions in content
                if entry.get("content"):
                    content_str = str(entry["content"])
                    for pattern in repo_patterns:
                        match = re.search(pattern, content_str, re.IGNORECASE)
                        if match:
                            if match.groups():
                                repo = match.group(1)
                            else:
                                repo = match.group(0)
                            break
                # Look for error messages
                if entry.get("role") == "tool" and entry.get("is_error"):
                    error_msg = entry.get("content", "Unknown error")
                    if isinstance(error_msg, str) and len(error_msg) < 200:
                        errors.append(error_msg[:200])
    except FileNotFoundError:
        return SessionSummary(
            session_id=session_id,
            model="unknown",
            repo="unknown",
            outcome="failure",
            message_count=0,
            tool_calls=0,
            duration_estimate="0m",
            key_actions=[],
            errors_encountered=[f"File not found: {file_path}"]
        )
    # Count message types
    user_messages = sum(1 for m in messages if m.get("role") == "user")
    assistant_messages = sum(1 for m in messages if m.get("role") == "assistant")
    tool_outputs = sum(1 for m in messages if m.get("role") == "tool")
    # Calculate duration estimate
    duration_estimate = "unknown"
    if start_time and end_time:
        try:
            # Try to parse timestamps
            start_dt = None
            end_dt = None
            # Handle various timestamp formats
            for fmt in ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"]:
                try:
                    if start_dt is None:
                        start_dt = datetime.strptime(start_time, fmt)
                    if end_dt is None:
                        end_dt = datetime.strptime(end_time, fmt)
                except ValueError:
                    continue
            if start_dt and end_dt:
                duration = end_dt - start_dt
                minutes = duration.total_seconds() / 60
                duration_estimate = f"{minutes:.0f}m"
        except Exception:
            pass
    # Classify outcome
    outcome = "unknown"
    if errors:
        # Check if any errors are fatal
        fatal_errors = any("405" in e or "permission" in e.lower() or "authentication" in e.lower() 
                          for e in errors)
        if fatal_errors:
            outcome = "failure"
        else:
            outcome = "partial"
    elif messages:
        # Check last message for success indicators
        last_msg = messages[-1]
        if last_msg.get("role") == "assistant":
            content = last_msg.get("content", "")
            if isinstance(content, str):
                success_indicators = ["done", "completed", "success", "merged", "pushed"]
                if any(indicator in content.lower() for indicator in success_indicators):
                    outcome = "success"
                else:
                    outcome = "unknown"
    # Deduplicate key actions (keep unique, limit to 10)
    unique_actions = []
    for action in key_actions:
        if action not in unique_actions:
            unique_actions.append(action)
        if len(unique_actions) >= 10:
            break
    # Deduplicate errors (keep unique, limit to 5)
    unique_errors = []
    for error in errors:
        if error not in unique_errors:
            unique_errors.append(error)
        if len(unique_errors) >= 5:
            break
    return SessionSummary(
        session_id=session_id,
        model=model,
        repo=repo,
        outcome=outcome,
        message_count=len(messages),
        tool_calls=tool_calls_count,
        duration_estimate=duration_estimate,
        key_actions=unique_actions,
        errors_encountered=unique_errors,
        start_time=start_time,
        end_time=end_time,
        total_tokens_estimate=total_tokens,
        user_messages=user_messages,
        assistant_messages=assistant_messages,
        tool_outputs=tool_outputs
    )
 def process_session_directory(directory_path: str, output_file: Optional[str] = None) -> List[SessionSummary]:
    """
    Process all JSONL files in a directory.
    Args:
        directory_path: Path to directory containing session JSONL files
        output_file: Optional path to write JSON output
    Returns:
        List of SessionSummary objects
    """
    directory = Path(directory_path)
    if not directory.exists():
        print(f"Error: Directory {directory_path} does not exist", file=sys.stderr)
        return []
    jsonl_files = list(directory.glob("session_*.jsonl"))
    if not jsonl_files:
        print(f"Warning: No session_*.jsonl files found in {directory_path}", file=sys.stderr)
        return []
    summaries = []
    for jsonl_file in sorted(jsonl_files):
        print(f"Processing {jsonl_file.name}...", file=sys.stderr)
        summary = parse_jsonl_session(str(jsonl_file))
        summaries.append(summary)
    if output_file:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump([asdict(s) for s in summaries], f, indent=2)
        print(f"Wrote {len(summaries)} summaries to {output_file}", file=sys.stderr)
    return summaries
 def main():
    """CLI entry point."""
    import argparse
    parser = argparse.ArgumentParser(description="Parse Hermes session JSONL transcripts")
    parser.add_argument("path", help="Path to JSONL file or directory of session files")
    parser.add_argument("-o", "--output", help="Output JSON file (default: stdout)")
    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
    args = parser.parse_args()
    path = Path(args.path)
    if path.is_file():
        summary = parse_jsonl_session(str(path))
        if args.output:
            with open(args.output, 'w') as f:
                json.dump(asdict(summary), f, indent=2)
            print(f"Wrote summary to {args.output}", file=sys.stderr)
        else:
            print(json.dumps(asdict(summary), indent=2))
    elif path.is_dir():
        summaries = process_session_directory(str(path), args.output)
        if not args.output:
            print(json.dumps([asdict(s) for s in summaries], indent=2))
    else:
        print(f"Error: {args.path} is not a file or directory", file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/scripts/test_harvest_prompt_comprehensive.py
+++ b/scripts/test_harvest_prompt_comprehensive.py
@@ -0,0 +1,212 @@
 #!/usr/bin/env python3
 """
 Comprehensive test script for knowledge extraction prompt.
 Validates prompt structure, requirements, and consistency.
 """
 import json
 import re
 from pathlib import Path
 def test_prompt_structure():
    """Test that the prompt has the required structure."""
    prompt_path = Path("templates/harvest-prompt.md")
    if not prompt_path.exists():
        return False, "harvest-prompt.md not found"
    content = prompt_path.read_text()
    # Check for required sections
    required_sections = [
        "System Prompt",
        "Instructions", 
        "Categories",
        "Output Format",
        "Confidence Scoring",
        "Constraints",
        "Example"
    ]
    for section in required_sections:
        if section.lower() not in content.lower():
            return False, f"Missing required section: {section}"
    # Check for required categories
    required_categories = ["fact", "pitfall", "pattern", "tool-quirk", "question"]
    for category in required_categories:
        if category not in content:
            return False, f"Missing required category: {category}"
    # Check for required output fields
    required_fields = ["fact", "category", "repo", "confidence"]
    for field in required_fields:
        if field not in content:
            return False, f"Missing required output field: {field}"
    # Check prompt size (should be ~1k tokens, roughly 4k chars)
    if len(content) > 5000:
        return False, f"Prompt too large: {len(content)} chars (max ~5000)"
    if len(content) < 1000:
        return False, f"Prompt too small: {len(content)} chars (min ~1000)"
    return True, "Prompt structure is valid"
 def test_confidence_scoring():
    """Test that confidence scoring is properly defined."""
    prompt_path = Path("templates/harvest-prompt.md")
    content = prompt_path.read_text()
    # Check for confidence scale definitions
    confidence_levels = [
        ("0.9-1.0", "explicitly stated"),
        ("0.7-0.8", "clearly implied"),
        ("0.5-0.6", "suggested"),
        ("0.3-0.4", "inferred"),
        ("0.1-0.2", "speculative")
    ]
    for level, description in confidence_levels:
        if level not in content:
            return False, f"Missing confidence level: {level}"
        if description.lower() not in content.lower():
            return False, f"Missing confidence description: {description}"
    return True, "Confidence scoring is properly defined"
 def test_example_quality():
    """Test that examples are clear and complete."""
    prompt_path = Path("templates/harvest-prompt.md")
    content = prompt_path.read_text()
    # Check for example input/output
    if "example" not in content.lower():
        return False, "No examples provided"
    # Check that example includes all categories
    example_section = content[content.lower().find("example"):]
    # Look for JSON example
    json_match = re.search(r'\{[\s\S]*"knowledge"[\s\S]*\}', example_section)
    if not json_match:
        return False, "No JSON example found"
    example_json = json_match.group(0)
    # Check for all categories in example
    for category in ["fact", "pitfall", "pattern", "tool-quirk", "question"]:
        if category not in example_json:
            return False, f"Example missing category: {category}"
    return True, "Examples are clear and complete"
 def test_constraint_coverage():
    """Test that constraints cover all requirements."""
    prompt_path = Path("templates/harvest-prompt.md")
    content = prompt_path.read_text()
    required_constraints = [
        "No hallucination",
        "only extract",
        "explicitly",
        "partial",
        "failed sessions",
        "1k tokens"
    ]
    for constraint in required_constraints:
        if constraint.lower() not in content.lower():
            return False, f"Missing constraint: {constraint}"
    return True, "Constraints cover all requirements"
 def test_test_sessions():
    """Test that test sessions exist and are valid."""
    test_sessions_dir = Path("test_sessions")
    if not test_sessions_dir.exists():
        return False, "test_sessions directory not found"
    session_files = list(test_sessions_dir.glob("*.jsonl"))
    if len(session_files) < 5:
        return False, f"Only {len(session_files)} test sessions found, need 5"
    # Check each session file
    for session_file in session_files:
        content = session_file.read_text()
        lines = content.strip().split("\n")
        # Check that each line is valid JSON
        for i, line in enumerate(lines, 1):
            try:
                json.loads(line)
            except json.JSONDecodeError as e:
                return False, f"Invalid JSON in {session_file.name}, line {i}: {e}"
    return True, f"Found {len(session_files)} valid test sessions"
 def run_all_tests():
    """Run all tests and return results."""
    tests = [
        ("Prompt Structure", test_prompt_structure),
        ("Confidence Scoring", test_confidence_scoring),
        ("Example Quality", test_example_quality),
        ("Constraint Coverage", test_constraint_coverage),
        ("Test Sessions", test_test_sessions)
    ]
    results = []
    all_passed = True
    for test_name, test_func in tests:
        try:
            passed, message = test_func()
            results.append({
                "test": test_name,
                "passed": passed,
                "message": message
            })
            if not passed:
                all_passed = False
        except Exception as e:
            results.append({
                "test": test_name,
                "passed": False,
                "message": f"Error: {str(e)}"
            })
            all_passed = False
    # Print results
    print("=" * 60)
    print("HARVEST PROMPT TEST RESULTS")
    print("=" * 60)
    for result in results:
        status = "✓ PASS" if result["passed"] else "✗ FAIL"
        print(f"{status}: {result['test']}")
        print(f"  {result['message']}")
        print()
    print("=" * 60)
    if all_passed:
        print("ALL TESTS PASSED!")
    else:
        print("SOME TESTS FAILED!")
    print("=" * 60)
    return all_passed, results
 if __name__ == "__main__":
    all_passed, results = run_all_tests()
    # Save results to file
    with open("test_results.json", "w") as f:
        json.dump({
            "all_passed": all_passed,
            "results": results,
            "timestamp": "2026-04-14T19:05:00Z"
        }, f, indent=2)
    print(f"Results saved to test_results.json")
    # Exit with appropriate code
    exit(0 if all_passed else 1)
--- a/scripts/test_knowledge_staleness.py
+++ b/scripts/test_knowledge_staleness.py
@@ -0,0 +1,129 @@
 #!/usr/bin/env python3
 """Tests for scripts/knowledge_staleness_check.py — 8 tests."""
 import json
 import os
 import sys
 import tempfile
 sys.path.insert(0, os.path.dirname(__file__) or ".")
 import importlib.util
 spec = importlib.util.spec_from_file_location("ks", os.path.join(os.path.dirname(__file__) or ".", "knowledge_staleness_check.py"))
 mod = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 check_staleness = mod.check_staleness
 fix_hashes = mod.fix_hashes
 compute_file_hash = mod.compute_file_hash
 def test_fresh_entry():
    with tempfile.TemporaryDirectory() as tmpdir:
        src = os.path.join(tmpdir, "source.py")
        with open(src, "w") as f:
            f.write("print('hello')")
        h = compute_file_hash(src)
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": [{"fact": "hello", "source_file": "source.py", "source_hash": h}]}, f)
        results = check_staleness(idx, tmpdir)
        assert results[0]["status"] == "fresh"
        print("PASS: test_fresh_entry")
 def test_stale_entry():
    with tempfile.TemporaryDirectory() as tmpdir:
        src = os.path.join(tmpdir, "source.py")
        with open(src, "w") as f:
            f.write("original content")
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": [{"fact": "old", "source_file": "source.py", "source_hash": "sha256:wrong"}]}, f)
        # Now change the source
        with open(src, "w") as f:
            f.write("modified content")
        results = check_staleness(idx, tmpdir)
        assert results[0]["status"] == "stale"
        print("PASS: test_stale_entry")
 def test_missing_source():
    with tempfile.TemporaryDirectory() as tmpdir:
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": [{"fact": "gone", "source_file": "nonexistent.py", "source_hash": "sha256:abc"}]}, f)
        results = check_staleness(idx, tmpdir)
        assert results[0]["status"] == "missing_source"
        print("PASS: test_missing_source")
 def test_no_hash():
    with tempfile.TemporaryDirectory() as tmpdir:
        src = os.path.join(tmpdir, "source.py")
        with open(src, "w") as f:
            f.write("content")
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": [{"fact": "no hash", "source_file": "source.py"}]}, f)
        results = check_staleness(idx, tmpdir)
        assert results[0]["status"] == "no_hash"
        assert results[0]["current_hash"].startswith("sha256:")
        print("PASS: test_no_hash")
 def test_no_source_field():
    with tempfile.TemporaryDirectory() as tmpdir:
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": [{"fact": "orphan"}]}, f)
        results = check_staleness(idx, tmpdir)
        assert results[0]["status"] == "no_source"
        print("PASS: test_no_source_field")
 def test_fix_hashes():
    with tempfile.TemporaryDirectory() as tmpdir:
        src = os.path.join(tmpdir, "source.py")
        with open(src, "w") as f:
            f.write("content for hashing")
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": [{"fact": "needs hash", "source_file": "source.py"}]}, f)
        fixed = fix_hashes(idx, tmpdir)
        assert fixed == 1
        # Verify hash was added
        with open(idx) as f:
            data = json.load(f)
        assert data["facts"][0]["source_hash"].startswith("sha256:")
        print("PASS: test_fix_hashes")
 def test_empty_index():
    with tempfile.TemporaryDirectory() as tmpdir:
        idx = os.path.join(tmpdir, "index.json")
        with open(idx, "w") as f:
            json.dump({"facts": []}, f)
        results = check_staleness(idx, tmpdir)
        assert results == []
        print("PASS: test_empty_index")
 def test_compute_hash_nonexistent():
    h = compute_file_hash("/nonexistent/path/file.py")
    assert h is None
    print("PASS: test_compute_hash_nonexistent")
 def run_all():
    test_fresh_entry()
    test_stale_entry()
    test_missing_source()
    test_no_hash()
    test_no_source_field()
    test_fix_hashes()
    test_empty_index()
    test_compute_hash_nonexistent()
    print("\nAll 8 tests passed!")
 if __name__ == "__main__":
    run_all()
--- a/test_sessions/session_failure.jsonl
+++ b/test_sessions/session_failure.jsonl
@@ -0,0 +1,18 @@
 {"role": "user", "content": "Deploy the application to production"}
 {"role": "assistant", "content": "I'll deploy the application to production.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ssh deploy@prod-server 'cd /app && git pull && docker-compose up -d'"}}]}
 {"role": "tool", "content": "ssh: connect to host prod-server port 22: Connection refused"}
 {"role": "assistant", "content": "Connection refused. Let me check the server status.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ping -c 3 prod-server"}}]}
 {"role": "tool", "content": "PING prod-server (192.168.1.100): 56 data bytes\nRequest timeout for icmp_seq 0\nRequest timeout for icmp_seq 1\nRequest timeout for icmp_seq 2\n--- prod-server ping statistics ---\n3 packets transmitted, 0 packets received, 100.0% packet loss"}
 {"role": "assistant", "content": "The server appears to be down. Let me check if there's an alternative deployment method."}
 {"role": "user", "content": "Try using the VPS instead"}
 {"role": "assistant", "content": "I'll try deploying to the VPS.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ssh deploy@vps-server 'cd /app && git pull && docker-compose up -d'"}}]}
 {"role": "tool", "content": "Permission denied (publickey)."}
 {"role": "assistant", "content": "Permission denied. The SSH key might not be set up correctly. Let me check the SSH configuration.", "tool_calls": [{"function": "terminal", "arguments": {"command": "cat ~/.ssh/config"}}]}
 {"role": "tool", "content": "Host vps-server\n    HostName 167.99.126.228\n    User deploy\n    IdentityFile ~/.ssh/id_rsa_vps"}
 {"role": "assistant", "content": "The SSH config looks correct. Let me check if the key exists.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ls -la ~/.ssh/id_rsa_vps"}}]}
 {"role": "tool", "content": "ls: /Users/user/.ssh/id_rsa_vps: No such file or directory"}
 {"role": "assistant", "content": "The SSH key doesn't exist. This is a pitfall - we need to generate the SSH key first."}
 {"role": "user", "content": "Generate a new SSH key for the VPS"}
 {"role": "assistant", "content": "I'll generate a new SSH key.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa_vps -N ''"}}]}
 {"role": "tool", "content": "Generating public/private rsa key pair.\nYour identification has been saved in /Users/user/.ssh/id_rsa_vps\nYour public key has been saved in /Users/user/.ssh/id_rsa_vps.pub\nThe key fingerprint is:\nSHA256:abc123... user@machine"}
 {"role": "assistant", "content": "SSH key generated. Now I need to add it to the VPS."}
--- a/test_sessions/session_partial.jsonl
+++ b/test_sessions/session_partial.jsonl
@@ -0,0 +1,14 @@
 {"role": "user", "content": "Check the Gitea API for open issues"}
 {"role": "assistant", "content": "I'll check the Gitea API for open issues.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -H 'Authorization: token $GITEA_TOKEN' https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/compounding-intelligence/issues?state=open"}}]}
 {"role": "tool", "content": "curl: (6) Could not resolve host: forge.alexanderwhitestone.com"}
 {"role": "assistant", "content": "DNS resolution failed. Let me check the network connectivity.", "tool_calls": [{"function": "terminal", "arguments": {"command": "nslookup forge.alexanderwhitestone.com"}}]}
 {"role": "tool", "content": "Server:		8.8.8.8\nAddress:	8.8.8.8#53\n\n** server can't find forge.alexanderwhitestone.com: NXDOMAIN"}
 {"role": "assistant", "content": "The domain doesn't resolve. This might be a DNS issue. Let me try using the IP address directly.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -H 'Authorization: token $GITEA_TOKEN' https://167.99.126.228/api/v1/repos/Timmy_Foundation/compounding-intelligence/issues?state=open"}}]}
 {"role": "tool", "content": "curl: (60) SSL certificate problem: self signed certificate"}
 {"role": "assistant", "content": "SSL certificate issue. The server is using a self-signed certificate. Let me try with -k flag to skip SSL verification.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -k -H 'Authorization: token $GITEA_TOKEN' https://167.99.126.228/api/v1/repos/Timmy_Foundation/compounding-intelligence/issues?state=open"}}]}
 {"role": "tool", "content": "[]"}
 {"role": "assistant", "content": "No open issues found. The API returned an empty array."}
 {"role": "user", "content": "Check if there are any pull requests"}
 {"role": "assistant", "content": "Let me check for pull requests.", "tool_calls": [{"function": "terminal", "arguments": {"command": "curl -k -H 'Authorization: token $GITEA_TOKEN' https://167.99.126.228/api/v1/repos/Timmy_Foundation/compounding-intelligence/pulls?state=open"}}]}
 {"role": "tool", "content": "[{"id": 20, "title": "feat: add harvester.py + session_reader.py — session knowledge extractor (closes #8)", "state": "open", "user": {"login": "Rockachopa"}, "created_at": "2026-04-14T17:28:20Z"}]"}
 {"role": "assistant", "content": "Found 1 open pull request: PR #20 - "feat: add harvester.py + session_reader.py — session knowledge extractor (closes #8)" by Rockachopa."}
--- a/test_sessions/session_patterns.jsonl
+++ b/test_sessions/session_patterns.jsonl
@@ -0,0 +1,10 @@
 {"role": "user", "content": "How do I deploy a new version of the application?"}
 {"role": "assistant", "content": "Here's the deployment process:\n1. Run tests: pytest\n2. Build Docker image: docker build -t app:latest .\n3. Push to registry: docker push registry.example.com/app:latest\n4. Update production: ssh prod 'cd /app && docker-compose pull && docker-compose up -d'\n5. Verify: curl https://app.example.com/health"}
 {"role": "user", "content": "What if tests fail?"}
 {"role": "assistant", "content": "If tests fail:\n1. Check the error logs: pytest -v\n2. Fix the failing tests\n3. Re-run tests: pytest\n4. If still failing, check recent changes: git log --oneline -10\n5. Consider reverting: git revert HEAD"}
 {"role": "user", "content": "What if the Docker build fails?"}
 {"role": "assistant", "content": "If Docker build fails:\n1. Check Dockerfile syntax: docker build --no-cache -t app:latest .\n2. Check for missing dependencies: cat requirements.txt\n3. Check disk space: df -h\n4. Clean up Docker: docker system prune -f\n5. Try building with verbose output: docker build --progress=plain -t app:latest ."}
 {"role": "user", "content": "What if the push to registry fails?"}
 {"role": "assistant", "content": "If push to registry fails:\n1. Check registry credentials: docker login registry.example.com\n2. Check registry space: curl https://registry.example.com/v2/_catalog\n3. Check image size: docker images app:latest\n4. Try tagging with version: docker tag app:latest registry.example.com/app:v1.0.0\n5. Push with version tag: docker push registry.example.com/app:v1.0.0"}
 {"role": "user", "content": "What if the production update fails?"}
 {"role": "assistant", "content": "If production update fails:\n1. Check SSH connection: ssh prod 'echo connected'\n2. Check disk space on prod: ssh prod 'df -h'\n3. Check Docker status: ssh prod 'docker ps -a'\n4. Check logs: ssh prod 'docker-compose logs --tail=100'\n5. Rollback: ssh prod 'cd /app && git checkout HEAD~1 && docker-compose up -d'"}
--- a/test_sessions/session_questions.jsonl
+++ b/test_sessions/session_questions.jsonl
@@ -0,0 +1,12 @@
 {"role": "user", "content": "I need to implement a new feature for the application"}
 {"role": "assistant", "content": "I can help you implement a new feature. What feature would you like to add?"}
 {"role": "user", "content": "I want to add user authentication"}
 {"role": "assistant", "content": "User authentication is a good feature. I have a few questions:\n1. What authentication method do you prefer? (JWT, OAuth, Session-based)\n2. Do you need social login? (Google, GitHub, etc.)\n3. What user data do you need to store?\n4. Do you need role-based access control?\n5. What's your database? (PostgreSQL, MySQL, SQLite)"}
 {"role": "user", "content": "I prefer JWT, no social login, just email and password, no roles, PostgreSQL"}
 {"role": "assistant", "content": "Great choices. Here's the implementation plan:\n1. Create user model with email and password hash\n2. Create JWT token generation and validation\n3. Create login endpoint: POST /api/auth/login\n4. Create register endpoint: POST /api/auth/register\n5. Create middleware for protected routes\n6. Add password hashing with bcrypt\n7. Add rate limiting for login attempts\n8. Add password reset functionality\n9. Add email verification\n10. Test all endpoints"}
 {"role": "user", "content": "What about password reset?"}
 {"role": "assistant", "content": "For password reset:\n1. Create reset token table\n2. Create forgot password endpoint: POST /api/auth/forgot-password\n3. Send reset email with token\n4. Create reset password endpoint: POST /api/auth/reset-password\n5. Validate reset token and expiration\n6. Update password and delete token\n7. Send confirmation email"}
 {"role": "user", "content": "What about email verification?"}
 {"role": "assistant", "content": "For email verification:\n1. Add email_verified field to user model\n2. Create verification token table\n3. Send verification email on registration\n4. Create verify email endpoint: GET /api/auth/verify-email?token=...\n5. Validate token and update email_verified\n6. Add middleware to check email_verified for certain routes\n7. Add resend verification email endpoint"}
 {"role": "user", "content": "What about rate limiting?"}
 {"role": "assistant", "content": "For rate limiting:\n1. Use Redis for rate limit storage\n2. Create rate limit middleware\n3. Set limits: 5 login attempts per minute, 3 password resets per hour\n4. Return 429 Too Many Requests when limit exceeded\n5. Add X-RateLimit headers to responses\n6. Log rate limit violations\n7. Add whitelist for trusted IPs"}
--- a/test_sessions/session_success.jsonl
+++ b/test_sessions/session_success.jsonl
@@ -0,0 +1,13 @@
 {"role": "user", "content": "Clone the repo at forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence"}
 {"role": "assistant", "content": "I'll clone the repository for you.", "tool_calls": [{"function": "terminal", "arguments": {"command": "git clone https://forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence.git"}}]}
 {"role": "tool", "content": "Cloning into 'compounding-intelligence'...\nremote: Enumerating objects: 15, done.\nremote: Counting objects: 100% (15/15), done.\nremote: Compressing objects: 100% (12/12), done.\nremote: Total 15 (delta 2), reused 0 (delta 0), pack-reused 0\nReceiving objects: 100% (15/15), done.\nResolving deltas: 100% (2/2), done."}
 {"role": "assistant", "content": "Repository cloned successfully. Let me check the structure.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ls -la compounding-intelligence/"}}]}
 {"role": "tool", "content": "total 8\ndrwxr-xr-x   5 user  staff  160 Apr 14 17:21 .\ndrwxr-xr-x  12 user  staff  384 Apr 14 17:21 ..\ndrwxr-xr-x  13 user  staff  416 Apr 14 17:21 .git\n-rw-r--r--   1 user  staff  1234 Apr 14 17:21 README.md\ndrwxr-xr-x   3 user  staff   96 Apr 14 17:21 knowledge\n-rw-r--r--   1 user  staff   456 Apr 14 17:21 metrics\ndrwxr-xr-x   4 user  staff  128 Apr 14 17:21 scripts\ndrwxr-xr-x   3 user  staff   96 Apr 14 17:21 templates"}
 {"role": "user", "content": "What files are in the templates directory?"}
 {"role": "assistant", "content": "Let me check the templates directory.", "tool_calls": [{"function": "terminal", "arguments": {"command": "ls -la compounding-intelligence/templates/"}}]}
 {"role": "tool", "content": "total 0\ndrwxr-xr-x  3 user  staff   96 Apr 14 17:21 .\ndrwxr-xr-x  5 user  staff  160 Apr 14 17:21 ..\n-rw-r--r--  1 user  staff    0 Apr 14 17:21 .gitkeep"}
 {"role": "assistant", "content": "The templates directory currently only contains a .gitkeep file. No templates have been added yet."}
 {"role": "user", "content": "Create a harvest-prompt.md template in the templates directory."}
 {"role": "assistant", "content": "I'll create the harvest-prompt.md template for you.", "tool_calls": [{"function": "terminal", "arguments": {"command": "cat > compounding-intelligence/templates/harvest-prompt.md << 'EOF'\n# Knowledge Extraction Prompt\n\nExtract knowledge from session transcripts.\nEOF"}}]}
 {"role": "tool", "content": ""}
 {"role": "assistant", "content": "Template created successfully. The harvest-prompt.md file is now in the templates directory."}
--- a/tests/test_ci_config.py
+++ b/tests/test_ci_config.py
@@ -0,0 +1,19 @@
 from pathlib import Path
 def test_requirements_makefile_and_workflow_exist() -> None:
    assert Path("requirements.txt").exists()
    assert Path("Makefile").exists()
    assert Path(".gitea/workflows/test.yml").exists()
 def test_ci_workflow_runs_project_test_command() -> None:
    workflow = Path(".gitea/workflows/test.yml").read_text(encoding="utf-8")
    requirements = Path("requirements.txt").read_text(encoding="utf-8")
    makefile = Path("Makefile").read_text(encoding="utf-8")
    assert "pytest" in requirements
    assert "test:" in makefile
    assert "python3 -m pytest tests/test_ci_config.py scripts/test_*.py -v" in makefile
    assert "pip install -r requirements.txt" in workflow
    assert "make test" in workflow
Author	SHA1	Message	Date
Alexander Whitestone	363407293d	ci: add pytest workflow for #190 All checks were successful Test / pytest (pull_request) Successful in 30s Details	2026-04-15 11:29:23 -04:00
Alexander Whitestone	4bd0a5d7ac	test: define CI configuration acceptance for #190	2026-04-15 11:26:08 -04:00
Timmy Time	e6f1b07f16	Merge pull request 'feat: Knowledge store staleness detector (closes #179 )' (#185 ) from feat/179-staleness-check into main	2026-04-15 06:09:14 +00:00
Alexander Whitestone	81c02f6709	feat: Add staleness detector tests (closes #179 )	2026-04-15 04:00:46 +00:00
Alexander Whitestone	c2c3c6a3b9	feat: Add knowledge staleness detector (closes #179 )	2026-04-15 04:00:12 +00:00
Timmy Time	8d716ff03f	Add comprehensive test script for harvest prompt validation	2026-04-14 19:02:41 +00:00
Timmy Time	920510996e	Add test session 5: Session with questions	2026-04-14 19:01:03 +00:00
Timmy Time	1fafeaf5a4	Add test session 4: Session with patterns	2026-04-14 19:01:00 +00:00
Timmy Time	36b440f998	Add test session 3: Partial session with tool quirks	2026-04-14 19:00:58 +00:00
Timmy Time	9f3caabf42	Add test session 2: Failed session with pitfalls	2026-04-14 19:00:56 +00:00
Timmy Time	a21f3a44e1	Add test session 1: Successful session	2026-04-14 18:58:05 +00:00