diff --git a/knowledge/SCHEMA.md b/knowledge/SCHEMA.md new file mode 100644 index 0000000..31b1640 --- /dev/null +++ b/knowledge/SCHEMA.md @@ -0,0 +1,114 @@ +# Knowledge File Format Specification + +**Version:** 1 +**Issue:** #10 +**Status:** Draft + +--- + +## Overview + +The knowledge system has two layers: + +1. **index.json** — Machine-readable fact index. Fast lookups by ID, category, repo, tags. +2. **Knowledge files** (YAML) — Human-readable, editable facts organized by domain. + +The harvester writes to both. The bootstrapper reads from index.json. Humans edit the YAML files directly. + +--- + +## index.json Schema + +```json +{ + "version": 1, + "last_updated": "ISO-8601 timestamp", + "total_facts": 0, + "facts": [] +} +``` + +### Fact Object + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `id` | string | yes | Unique identifier: `{domain}:{category}:{sequence}` | +| `fact` | string | yes | One-sentence description of the knowledge | +| `category` | enum | yes | One of: `fact`, `pitfall`, `pattern`, `tool-quirk`, `question` | +| `domain` | string | yes | Where this applies: repo name, `global`, or agent name | +| `confidence` | float | yes | 0.0–1.0. How certain is this knowledge? | +| `tags` | string[] | no | Searchable labels | +| `source_count` | int | no | How many sessions confirmed this fact | +| `first_seen` | date | no | ISO-8601 date first extracted | +| `last_confirmed` | date | no | ISO-8601 date last seen in a session | +| `expires` | date | no | Optional. After this date, fact is stale | +| `related` | string[] | no | IDs of related facts | + +### ID Format: `{domain}:{category}:{sequence}` + +### Categories + +| Category | Definition | +|----------|------------| +| `fact` | Concrete, verifiable information | +| `pitfall` | Errors, wrong assumptions, time-wasters | +| `pattern` | Successful sequences of actions | +| `tool-quirk` | Environment-specific behaviors | +| `question` | Identified but unanswered | + +### Confidence Scoring + +| Range | Meaning | +|-------|---------| +| 0.9–1.0 | Explicitly stated and verified | +| 0.7–0.8 | Clearly implied by multiple data points | +| 0.5–0.6 | Suggested but not fully verified | +| 0.3–0.4 | Inferred from limited data | +| 0.1–0.2 | Speculative or uncertain | + +--- + +## Directory Structure + +``` +knowledge/ +├── index.json # Machine-readable fact index +├── SCHEMA.md # This file +├── global/ # Cross-repo knowledge +│ ├── pitfalls.yaml +│ ├── patterns.yaml +│ └── tool-quirks.yaml +├── repos/ # Per-repo knowledge +│ ├── {repo-name}.yaml +│ └── ... +└── agents/ # Agent-type knowledge + └── {agent-type}.yaml +``` + +## YAML File Format + +YAML files use frontmatter for metadata, then markdown sections with fact entries: + +```yaml +--- +domain: global +category: tool-quirk +version: 1 +last_updated: "2026-04-13" +--- + +# Title + +## Section + +- id: global:tool-quirk:001 + fact: "Description" + confidence: 0.95 + tags: [tag1, tag2] + source_count: 5 + first_seen: "2026-03-27" +``` + +## Validation + +Run `python scripts/validate_knowledge.py` to validate index.json. diff --git a/knowledge/global/pitfalls.yaml b/knowledge/global/pitfalls.yaml new file mode 100644 index 0000000..2f93613 --- /dev/null +++ b/knowledge/global/pitfalls.yaml @@ -0,0 +1,80 @@ +--- +domain: global +category: pitfall +version: 1 +last_updated: "2026-04-13" +--- + +# Pitfalls (Global) + +Cross-repo traps that waste time across the fleet. + +## Git & Forge + +- id: global:pitfall:001 + fact: "Branch protection requires 1 approval on main - API merges fail with 405 without it" + confidence: 0.95 + tags: [git, merge, branch-protection, gitea] + source_count: 12 + first_seen: "2026-04-05" + last_confirmed: "2026-04-13" + related: [the-nexus:pitfall:001] + +- id: global:pitfall:002 + fact: "Never use --no-verify on git commits - it bypasses all hooks including safety checks" + confidence: 0.95 + tags: [git, hooks, safety] + source_count: 5 + first_seen: "2026-03-28" + last_confirmed: "2026-04-13" + +- id: global:pitfall:003 + fact: "Gitea PR creation workaround needed on the-nexus - direct API call fails, use alternative endpoint" + confidence: 0.9 + tags: [gitea, pr, api, workaround] + source_count: 4 + first_seen: "2026-04-06" + last_confirmed: "2026-04-12" + +## Agent Operations + +- id: global:pitfall:004 + fact: "Anthropic is BANNED from fallback chain - if fallback triggers to Anthropic, something is wrong" + confidence: 0.95 + tags: [provider, anthropic, fallback] + source_count: 7 + first_seen: "2026-03-30" + last_confirmed: "2026-04-13" + +- id: global:pitfall:005 + fact: "Telegram tokens expired - don't assume Telegram notifications work without checking" + confidence: 0.85 + tags: [telegram, notifications, token] + source_count: 3 + first_seen: "2026-04-02" + +- id: global:pitfall:006 + fact: "Multiple gateways = 'cannot schedule futures' error - only one gateway process should run" + confidence: 0.9 + tags: [gateway, cron, process] + source_count: 4 + first_seen: "2026-04-04" + last_confirmed: "2026-04-11" + +## Testing + +- id: global:pitfall:007 + fact: "pytest root collection picks up operational *_test.py scripts - restrict to tests/ directory" + confidence: 0.9 + tags: [pytest, test, collection] + source_count: 3 + first_seen: "2026-04-07" + last_confirmed: "2026-04-13" + +- id: global:pitfall:008 + fact: "TDD: test 1 before building 55 - verify the cycle works before scaling" + confidence: 0.95 + tags: [tdd, testing, methodology] + source_count: 8 + first_seen: "2026-03-25" + last_confirmed: "2026-04-13" diff --git a/knowledge/global/tool-quirks.yaml b/knowledge/global/tool-quirks.yaml new file mode 100644 index 0000000..8deacba --- /dev/null +++ b/knowledge/global/tool-quirks.yaml @@ -0,0 +1,71 @@ +--- +domain: global +category: tool-quirk +version: 1 +last_updated: "2026-04-13" +--- + +# Tool Quirks (Global) + +## Authentication + +- id: global:tool-quirk:001 + fact: "Gitea token stored at ~/.config/gitea/token, not env var GITEA_TOKEN" + confidence: 0.95 + tags: [git, auth, gitea, token] + source_count: 23 + first_seen: "2026-03-27" + last_confirmed: "2026-04-13" + related: [global:pitfall:001] + +- id: global:tool-quirk:002 + fact: "Gitea API uses 'Authorization: token TOKEN' header format, not Bearer" + confidence: 0.9 + tags: [git, api, gitea] + source_count: 8 + first_seen: "2026-03-28" + last_confirmed: "2026-04-12" + +- id: global:tool-quirk:003 + fact: "Gitea Issues API type=issues param does NOT filter PRs - use truthiness check on pull_request field" + confidence: 0.95 + tags: [gitea, api, issues, pr] + source_count: 6 + first_seen: "2026-04-01" + last_confirmed: "2026-04-13" + +## Paths & Environment + +- id: global:tool-quirk:004 + fact: "~/.hermes is the default hermes home - check get_hermes_home() not the path literal" + confidence: 0.9 + tags: [paths, hermes, env] + source_count: 10 + first_seen: "2026-03-30" + last_confirmed: "2026-04-13" + related: [hermes-agent:pitfall:005] + +- id: global:tool-quirk:005 + fact: "Ansible vault-encrypted vars in YAML require vault_inline_vars plugin" + confidence: 0.85 + tags: [ansible, vault, config] + source_count: 3 + first_seen: "2026-04-02" + +## Model & Inference + +- id: global:tool-quirk:006 + fact: "mimo-v2-pro via Nous Research is the default model - don't assume Anthropic is available" + confidence: 0.95 + tags: [model, provider, nous, default] + source_count: 15 + first_seen: "2026-03-25" + last_confirmed: "2026-04-13" + +- id: global:tool-quirk:007 + fact: "Kill + restart with 'hermes chat' preserves old model state - NEVER use --resume" + confidence: 0.95 + tags: [hermes, model, restart, session] + source_count: 8 + first_seen: "2026-03-29" + last_confirmed: "2026-04-12" diff --git a/knowledge/index.json b/knowledge/index.json index dd3e0d4..7134070 100644 --- a/knowledge/index.json +++ b/knowledge/index.json @@ -1,6 +1,472 @@ { "version": 1, "last_updated": "2026-04-13T20:00:00Z", - "total_facts": 0, - "facts": [] + "total_facts": 29, + "facts": [ + { + "id": "hermes-agent:pitfall:001", + "fact": "deploy-crons.py leaves jobs in mixed model format", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.95, + "tags": [ + "cron", + "deploy", + "model", + "config" + ], + "source_count": 5, + "first_seen": "2026-04-08", + "last_confirmed": "2026-04-13", + "related": [ + "hermes-agent:pitfall:002", + "hermes-agent:pitfall:003" + ] + }, + { + "id": "hermes-agent:pitfall:002", + "fact": "deploy-crons.py --deploy doesn't set legacy skill field from skills list", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.9, + "tags": [ + "cron", + "deploy", + "skills" + ], + "source_count": 3, + "first_seen": "2026-04-09", + "last_confirmed": "2026-04-13", + "related": [ + "hermes-agent:pitfall:001" + ] + }, + { + "id": "hermes-agent:pitfall:003", + "fact": "Cron jobs with blank fallback_model fields trigger spurious gateway warnings", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.9, + "tags": [ + "cron", + "model", + "fallback" + ], + "source_count": 4, + "first_seen": "2026-04-07", + "last_confirmed": "2026-04-12", + "related": [ + "hermes-agent:pitfall:001" + ] + }, + { + "id": "hermes-agent:pitfall:004", + "fact": "model-watchdog.py checks first provider line, not model.provider - causes false drift alarms", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.9, + "tags": [ + "watchdog", + "model", + "config" + ], + "source_count": 3, + "first_seen": "2026-04-08", + "last_confirmed": "2026-04-13" + }, + { + "id": "hermes-agent:pitfall:005", + "fact": "10+ files read HERMES_HOME directly instead of get_hermes_home()", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.85, + "tags": [ + "paths", + "env", + "hermes-home" + ], + "source_count": 6, + "first_seen": "2026-04-06", + "last_confirmed": "2026-04-12", + "related": [ + "global:pitfall:002" + ] + }, + { + "id": "hermes-agent:pitfall:006", + "fact": "get_hermes_home() doesn't expand tilde when HERMES_HOME=~/... is set", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.8, + "tags": [ + "paths", + "env", + "bug" + ], + "source_count": 2, + "first_seen": "2026-04-05" + }, + { + "id": "hermes-agent:pitfall:007", + "fact": "vps-agent-dispatch reports OK while remote hermes binary path is broken", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.9, + "tags": [ + "ssh", + "dispatch", + "vps" + ], + "source_count": 4, + "first_seen": "2026-04-07", + "last_confirmed": "2026-04-11" + }, + { + "id": "hermes-agent:pitfall:008", + "fact": "nightwatch-health-monitor SSH check fails on cloud-model-only deployments", + "category": "pitfall", + "domain": "hermes-agent", + "confidence": 0.85, + "tags": [ + "ssh", + "health", + "cloud" + ], + "source_count": 2, + "first_seen": "2026-04-10" + }, + { + "id": "the-nexus:pitfall:001", + "fact": "Merges fail with HTTP 405 due to branch protection", + "category": "pitfall", + "domain": "the-nexus", + "confidence": 0.95, + "tags": [ + "git", + "merge", + "branch-protection", + "gitea" + ], + "source_count": 12, + "first_seen": "2026-04-05", + "last_confirmed": "2026-04-13", + "related": [ + "global:pitfall:001" + ] + }, + { + "id": "the-nexus:pitfall:002", + "fact": "ThreadingHTTPServer required for multi-user bridge - standard HTTPServer blocks on concurrent requests", + "category": "pitfall", + "domain": "the-nexus", + "confidence": 0.95, + "tags": [ + "server", + "concurrency", + "bridge" + ], + "source_count": 5, + "first_seen": "2026-04-10", + "last_confirmed": "2026-04-13" + }, + { + "id": "the-nexus:pitfall:003", + "fact": "ChatLog.log() crashes on message persistence when index.html has orphaned button tags", + "category": "pitfall", + "domain": "the-nexus", + "confidence": 0.9, + "tags": [ + "html", + "crash", + "chatlog" + ], + "source_count": 3, + "first_seen": "2026-04-12", + "last_confirmed": "2026-04-13" + }, + { + "id": "the-nexus:pitfall:004", + "fact": "Three.js LOD not implemented - local hardware struggles with full scene", + "category": "pitfall", + "domain": "the-nexus", + "confidence": 0.85, + "tags": [ + "threejs", + "performance", + "lod" + ], + "source_count": 4, + "first_seen": "2026-04-09", + "last_confirmed": "2026-04-13" + }, + { + "id": "the-nexus:pitfall:005", + "fact": "Duplicate content blocks appear in index.html when PR merges conflict silently", + "category": "pitfall", + "domain": "the-nexus", + "confidence": 0.8, + "tags": [ + "html", + "merge-conflict", + "duplicate" + ], + "source_count": 3, + "first_seen": "2026-04-11", + "last_confirmed": "2026-04-13" + }, + { + "id": "the-nexus:pitfall:006", + "fact": "Unified HTTP + WebSocket server required for proper URL deployment - separate servers break CORS", + "category": "pitfall", + "domain": "the-nexus", + "confidence": 0.9, + "tags": [ + "deploy", + "websocket", + "http", + "cors" + ], + "source_count": 4, + "first_seen": "2026-04-10", + "last_confirmed": "2026-04-13" + }, + { + "id": "global:tool-quirk:001", + "fact": "Gitea token stored at ~/.config/gitea/token, not env var GITEA_TOKEN", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.95, + "tags": [ + "git", + "auth", + "gitea", + "token" + ], + "source_count": 23, + "first_seen": "2026-03-27", + "last_confirmed": "2026-04-13", + "related": [ + "global:pitfall:001" + ] + }, + { + "id": "global:tool-quirk:002", + "fact": "Gitea API uses 'Authorization: token TOKEN' header format, not Bearer", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.9, + "tags": [ + "git", + "api", + "gitea" + ], + "source_count": 8, + "first_seen": "2026-03-28", + "last_confirmed": "2026-04-12" + }, + { + "id": "global:tool-quirk:003", + "fact": "Gitea Issues API type=issues param does NOT filter PRs", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.95, + "tags": [ + "gitea", + "api", + "issues", + "pr" + ], + "source_count": 6, + "first_seen": "2026-04-01", + "last_confirmed": "2026-04-13" + }, + { + "id": "global:tool-quirk:004", + "fact": "~/.hermes is the default hermes home - check get_hermes_home() not the path literal", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.9, + "tags": [ + "paths", + "hermes", + "env" + ], + "source_count": 10, + "first_seen": "2026-03-30", + "last_confirmed": "2026-04-13", + "related": [ + "hermes-agent:pitfall:005" + ] + }, + { + "id": "global:tool-quirk:005", + "fact": "Ansible vault-encrypted vars in YAML require vault_inline_vars plugin", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.85, + "tags": [ + "ansible", + "vault", + "config" + ], + "source_count": 3, + "first_seen": "2026-04-02" + }, + { + "id": "global:tool-quirk:006", + "fact": "mimo-v2-pro via Nous Research is the default model - don't assume Anthropic is available", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.95, + "tags": [ + "model", + "provider", + "nous", + "default" + ], + "source_count": 15, + "first_seen": "2026-03-25", + "last_confirmed": "2026-04-13" + }, + { + "id": "global:tool-quirk:007", + "fact": "Kill + restart with 'hermes chat' preserves old model state - NEVER use --resume", + "category": "tool-quirk", + "domain": "global", + "confidence": 0.95, + "tags": [ + "hermes", + "model", + "restart", + "session" + ], + "source_count": 8, + "first_seen": "2026-03-29", + "last_confirmed": "2026-04-12" + }, + { + "id": "global:pitfall:001", + "fact": "Branch protection requires 1 approval on main - API merges fail with 405 without it", + "category": "pitfall", + "domain": "global", + "confidence": 0.95, + "tags": [ + "git", + "merge", + "branch-protection", + "gitea" + ], + "source_count": 12, + "first_seen": "2026-04-05", + "last_confirmed": "2026-04-13", + "related": [ + "the-nexus:pitfall:001" + ] + }, + { + "id": "global:pitfall:002", + "fact": "Never use --no-verify on git commits", + "category": "pitfall", + "domain": "global", + "confidence": 0.95, + "tags": [ + "git", + "hooks", + "safety" + ], + "source_count": 5, + "first_seen": "2026-03-28", + "last_confirmed": "2026-04-13" + }, + { + "id": "global:pitfall:003", + "fact": "Gitea PR creation workaround needed on the-nexus - direct API call fails", + "category": "pitfall", + "domain": "global", + "confidence": 0.9, + "tags": [ + "gitea", + "pr", + "api", + "workaround" + ], + "source_count": 4, + "first_seen": "2026-04-06", + "last_confirmed": "2026-04-12" + }, + { + "id": "global:pitfall:004", + "fact": "Anthropic is BANNED from fallback chain", + "category": "pitfall", + "domain": "global", + "confidence": 0.95, + "tags": [ + "provider", + "anthropic", + "fallback" + ], + "source_count": 7, + "first_seen": "2026-03-30", + "last_confirmed": "2026-04-13" + }, + { + "id": "global:pitfall:005", + "fact": "Telegram tokens expired - don't assume Telegram notifications work", + "category": "pitfall", + "domain": "global", + "confidence": 0.85, + "tags": [ + "telegram", + "notifications", + "token" + ], + "source_count": 3, + "first_seen": "2026-04-02" + }, + { + "id": "global:pitfall:006", + "fact": "Multiple gateways = 'cannot schedule futures' error - only one gateway process should run", + "category": "pitfall", + "domain": "global", + "confidence": 0.9, + "tags": [ + "gateway", + "cron", + "process" + ], + "source_count": 4, + "first_seen": "2026-04-04", + "last_confirmed": "2026-04-11" + }, + { + "id": "global:pitfall:007", + "fact": "pytest root collection picks up operational *_test.py scripts - restrict to tests/ directory", + "category": "pitfall", + "domain": "global", + "confidence": 0.9, + "tags": [ + "pytest", + "test", + "collection" + ], + "source_count": 3, + "first_seen": "2026-04-07", + "last_confirmed": "2026-04-13" + }, + { + "id": "global:pitfall:008", + "fact": "TDD: test 1 before building 55", + "category": "pitfall", + "domain": "global", + "confidence": 0.95, + "tags": [ + "tdd", + "testing", + "methodology" + ], + "source_count": 8, + "first_seen": "2026-03-25", + "last_confirmed": "2026-04-13" + } + ] } \ No newline at end of file diff --git a/knowledge/repos/hermes-agent.yaml b/knowledge/repos/hermes-agent.yaml new file mode 100644 index 0000000..36adc03 --- /dev/null +++ b/knowledge/repos/hermes-agent.yaml @@ -0,0 +1,80 @@ +--- +domain: hermes-agent +category: pitfall +version: 1 +last_updated: "2026-04-13" +--- + +# Pitfalls (hermes-agent) + +## Cron & Deployment + +- id: hermes-agent:pitfall:001 + fact: "deploy-crons.py leaves jobs in mixed model format - some have provider/model, some just model" + confidence: 0.95 + tags: [cron, deploy, model, config] + source_count: 5 + first_seen: "2026-04-08" + last_confirmed: "2026-04-13" + related: [hermes-agent:pitfall:002, hermes-agent:pitfall:003] + +- id: hermes-agent:pitfall:002 + fact: "deploy-crons.py --deploy doesn't set legacy skill field from skills list" + confidence: 0.9 + tags: [cron, deploy, skills] + source_count: 3 + first_seen: "2026-04-09" + last_confirmed: "2026-04-13" + related: [hermes-agent:pitfall:001] + +- id: hermes-agent:pitfall:003 + fact: "Cron jobs with blank fallback_model fields trigger spurious gateway warnings" + confidence: 0.9 + tags: [cron, model, fallback] + source_count: 4 + first_seen: "2026-04-07" + last_confirmed: "2026-04-12" + related: [hermes-agent:pitfall:001] + +- id: hermes-agent:pitfall:004 + fact: "model-watchdog.py checks first provider line, not model.provider - causes false drift alarms" + confidence: 0.9 + tags: [watchdog, model, config] + source_count: 3 + first_seen: "2026-04-08" + last_confirmed: "2026-04-13" + +## Path & Environment + +- id: hermes-agent:pitfall:005 + fact: "10+ files read HERMES_HOME directly instead of get_hermes_home() - breaks on custom paths" + confidence: 0.85 + tags: [paths, env, hermes-home] + source_count: 6 + first_seen: "2026-04-06" + last_confirmed: "2026-04-12" + related: [global:pitfall:002] + +- id: hermes-agent:pitfall:006 + fact: "get_hermes_home() doesn't expand tilde when HERMES_HOME=~/... is set" + confidence: 0.8 + tags: [paths, env, bug] + source_count: 2 + first_seen: "2026-04-05" + +## SSH & Dispatch + +- id: hermes-agent:pitfall:007 + fact: "vps-agent-dispatch reports OK while remote hermes binary path is broken" + confidence: 0.9 + tags: [ssh, dispatch, vps] + source_count: 4 + first_seen: "2026-04-07" + last_confirmed: "2026-04-11" + +- id: hermes-agent:pitfall:008 + fact: "nightwatch-health-monitor SSH check fails on cloud-model-only deployments" + confidence: 0.85 + tags: [ssh, health, cloud] + source_count: 2 + first_seen: "2026-04-10" diff --git a/knowledge/repos/the-nexus.yaml b/knowledge/repos/the-nexus.yaml new file mode 100644 index 0000000..e7f4ebb --- /dev/null +++ b/knowledge/repos/the-nexus.yaml @@ -0,0 +1,63 @@ +--- +domain: the-nexus +category: pitfall +version: 1 +last_updated: "2026-04-13" +--- + +# Pitfalls (the-nexus) + +## Git & Merging + +- id: the-nexus:pitfall:001 + fact: "Merges fail with HTTP 405 due to branch protection - must use merge API with 1 approval" + confidence: 0.95 + tags: [git, merge, branch-protection, gitea] + source_count: 12 + first_seen: "2026-04-05" + last_confirmed: "2026-04-13" + related: [global:pitfall:001] + +- id: the-nexus:pitfall:002 + fact: "ThreadingHTTPServer required for multi-user bridge - standard HTTPServer blocks on concurrent requests" + confidence: 0.95 + tags: [server, concurrency, bridge] + source_count: 5 + first_seen: "2026-04-10" + last_confirmed: "2026-04-13" + +- id: the-nexus:pitfall:003 + fact: "ChatLog.log() crashes on message persistence when index.html has orphaned button tags" + confidence: 0.9 + tags: [html, crash, chatlog] + source_count: 3 + first_seen: "2026-04-12" + last_confirmed: "2026-04-13" + +## Three.js & Performance + +- id: the-nexus:pitfall:004 + fact: "Three.js LOD not implemented - local hardware struggles with full scene without texture optimization" + confidence: 0.85 + tags: [threejs, performance, lod] + source_count: 4 + first_seen: "2026-04-09" + last_confirmed: "2026-04-13" + +- id: the-nexus:pitfall:005 + fact: "Duplicate content blocks appear in index.html when PR merges conflict silently" + confidence: 0.8 + tags: [html, merge-conflict, duplicate] + source_count: 3 + first_seen: "2026-04-11" + last_confirmed: "2026-04-13" + +## Deployment + +- id: the-nexus:pitfall:006 + fact: "Unified HTTP + WebSocket server required for proper URL deployment - separate servers break CORS" + confidence: 0.9 + tags: [deploy, websocket, http, cors] + source_count: 4 + first_seen: "2026-04-10" + last_confirmed: "2026-04-13" diff --git a/scripts/validate_knowledge.py b/scripts/validate_knowledge.py new file mode 100644 index 0000000..b4aeb95 --- /dev/null +++ b/scripts/validate_knowledge.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Validate knowledge files and index.json against the schema.""" +import json, sys +from pathlib import Path + +VALID_CATEGORIES = {"fact", "pitfall", "pattern", "tool-quirk", "question"} +REQUIRED = {"id", "fact", "category", "domain", "confidence"} + +def validate_fact(fact, src=""): + errs = [] + for f in REQUIRED: + if f not in fact: errs.append(f"{src}: missing '{f}'") + if "category" in fact and fact["category"] not in VALID_CATEGORIES: + errs.append(f"{src}: invalid category '{fact['category']}'") + if "confidence" in fact: + if not isinstance(fact["confidence"], (int, float)) or not (0 <= fact["confidence"] <= 1): + errs.append(f"{src}: confidence must be 0.0-1.0") + if "id" in fact: + parts = fact["id"].split(":") + if len(parts) != 3: errs.append(f"{src}: id must be domain:category:sequence") + return errs + +def main(): + idx = Path(__file__).parent.parent / "knowledge" / "index.json" + if not idx.exists(): print(f"FAILED: {idx} not found"); sys.exit(1) + data = json.load(open(idx)) + errs = [] + seen = set() + for i, f in enumerate(data.get("facts", [])): + errs.extend(validate_fact(f, f"[{i}]")) + if "id" in f: + if f["id"] in seen: errs.append(f"duplicate id '{f['id']}'") + seen.add(f["id"]) + if errs: + print(f"FAILED - {len(errs)} errors:"); [print(f" x {e}") for e in errs]; sys.exit(1) + print(f"PASSED - {len(data.get('facts', []))} facts") + +if __name__ == "__main__": main()