Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d11c95094d | ||
|
|
44013ba520 |
299
GENOME.md
299
GENOME.md
@@ -1,144 +1,209 @@
|
||||
# GENOME.md — Timmy_Foundation/timmy-home
|
||||
|
||||
Generated by `pipelines/codebase_genome.py`.
|
||||
# GENOME.md — the-nexus
|
||||
|
||||
## Project Overview
|
||||
|
||||
Timmy Foundation's home repository for development operations and configurations.
|
||||
`the-nexus` is a hybrid repo that combines three layers in one codebase:
|
||||
|
||||
- Text files indexed: 3133
|
||||
- Source and script files: 219
|
||||
- Test files: 73
|
||||
- Documentation files: 743
|
||||
1. A browser-facing world shell rooted in `index.html`, `boot.js`, `bootstrap.mjs`, `app.js`, `style.css`, `portals.json`, `vision.json`, `manifest.json`, and `gofai_worker.js`
|
||||
2. A Python realtime bridge centered on `server.py` plus harness code under `nexus/`
|
||||
3. A memory / fleet / operator layer spanning `mempalace/`, `mcp_servers/`, `multi_user_bridge.py`, and supporting scripts
|
||||
|
||||
## Architecture
|
||||
The repo is not a clean single-purpose frontend and not just a backend harness. It is a mixed world/runtime/ops repository where browser rendering, WebSocket telemetry, MCP-driven game harnesses, and fleet memory tooling coexist.
|
||||
|
||||
Grounded repo facts from this checkout:
|
||||
- Browser shell files exist at repo root: `index.html`, `app.js`, `style.css`, `manifest.json`, `gofai_worker.js`
|
||||
- Data/config files also live at repo root: `portals.json`, `vision.json`
|
||||
- Realtime bridge exists in `server.py`
|
||||
- Game harnesses exist in `nexus/morrowind_harness.py` and `nexus/bannerlord_harness.py`
|
||||
- Memory/fleet sync exists in `mempalace/tunnel_sync.py`
|
||||
- Desktop/game automation MCP servers exist in `mcp_servers/desktop_control_server.py` and `mcp_servers/steam_info_server.py`
|
||||
- Validation exists in `tests/test_browser_smoke.py`, `tests/test_portals_json.py`, `tests/test_index_html_integrity.py`, and `tests/test_repo_truth.py`
|
||||
|
||||
The current architecture is best understood as a sovereign world shell plus operator/game harness backend, with accumulated documentation drift from multiple restoration and migration efforts.
|
||||
|
||||
## Architecture Diagram
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
repo_root["repo"]
|
||||
angband["angband"]
|
||||
ansible["ansible"]
|
||||
briefings["briefings"]
|
||||
codebase_genome["codebase_genome"]
|
||||
config["config"]
|
||||
configs["configs"]
|
||||
conftest["conftest"]
|
||||
dns_records["dns-records"]
|
||||
evennia["evennia"]
|
||||
evennia_tools["evennia_tools"]
|
||||
repo_root --> angband
|
||||
repo_root --> ansible
|
||||
repo_root --> briefings
|
||||
repo_root --> codebase_genome
|
||||
repo_root --> config
|
||||
repo_root --> configs
|
||||
browser[Index HTML Shell\nindex.html -> boot.js -> bootstrap.mjs -> app.js]
|
||||
assets[Root Assets\nstyle.css\nmanifest.json\ngofai_worker.js]
|
||||
data[World Data\nportals.json\nvision.json]
|
||||
ws[Realtime Bridge\nserver.py\nWebSocket broadcast hub]
|
||||
gofai[In-browser GOFAI\nSymbolicEngine\nNeuroSymbolicBridge\nsetupGOFAI/updateGOFAI]
|
||||
harnesses[Python Harnesses\nnexus/morrowind_harness.py\nnexus/bannerlord_harness.py]
|
||||
mcp[MCP Adapters\nmcp_servers/desktop_control_server.py\nmcp_servers/steam_info_server.py]
|
||||
memory[Memory + Fleet\nmempalace/tunnel_sync.py\nmempalace.js]
|
||||
bridge[Operator / MUD Bridge\nmulti_user_bridge.py\ncommands/timmy_commands.py]
|
||||
tests[Verification\ntests/test_browser_smoke.py\ntests/test_portals_json.py\ntests/test_repo_truth.py]
|
||||
docs[Contracts + Drift Docs\nBROWSER_CONTRACT.md\nREADME.md\nCLAUDE.md\nINVESTIGATION_ISSUE_1145.md]
|
||||
|
||||
browser --> assets
|
||||
browser --> data
|
||||
browser --> gofai
|
||||
browser --> ws
|
||||
harnesses --> mcp
|
||||
harnesses --> ws
|
||||
bridge --> ws
|
||||
memory --> ws
|
||||
tests --> browser
|
||||
tests --> data
|
||||
tests --> docs
|
||||
docs --> browser
|
||||
```
|
||||
|
||||
## Entry Points
|
||||
## Entry Points and Data Flow
|
||||
|
||||
- `codebase_genome.py` — python main guard (`python3 codebase_genome.py`)
|
||||
- `gemini-fallback-setup.sh` — operational script (`bash gemini-fallback-setup.sh`)
|
||||
- `morrowind/hud.sh` — operational script (`bash morrowind/hud.sh`)
|
||||
- `pipelines/codebase_genome.py` — python main guard (`python3 pipelines/codebase_genome.py`)
|
||||
- `scripts/agent_pr_gate.py` — operational script (`python3 scripts/agent_pr_gate.py`)
|
||||
- `scripts/auto_restart_agent.sh` — operational script (`bash scripts/auto_restart_agent.sh`)
|
||||
- `scripts/autonomous_issue_creator.py` — operational script (`python3 scripts/autonomous_issue_creator.py`)
|
||||
- `scripts/backlog_cleanup.py` — operational script (`python3 scripts/backlog_cleanup.py`)
|
||||
- `scripts/backlog_triage.py` — operational script (`python3 scripts/backlog_triage.py`)
|
||||
- `scripts/backlog_triage_cron.sh` — operational script (`bash scripts/backlog_triage_cron.sh`)
|
||||
- `scripts/backup_pipeline.sh` — operational script (`bash scripts/backup_pipeline.sh`)
|
||||
- `scripts/bezalel_gemma4_vps.py` — operational script (`python3 scripts/bezalel_gemma4_vps.py`)
|
||||
### Primary entry points
|
||||
|
||||
## Data Flow
|
||||
- `index.html` — root browser entry point
|
||||
- `boot.js` — startup selector; `tests/boot.test.js` shows it chooses file-mode vs HTTP/module-mode and injects `bootstrap.mjs` when served over HTTP
|
||||
- `bootstrap.mjs` — module bootstrap for the browser shell
|
||||
- `app.js` — main browser runtime; owns world state, GOFAI wiring, metrics polling, and portal/UI logic
|
||||
- `server.py` — WebSocket broadcast bridge on `ws://0.0.0.0:8765`
|
||||
- `nexus/morrowind_harness.py` — GamePortal/MCP harness for OpenMW Morrowind
|
||||
- `nexus/bannerlord_harness.py` — GamePortal/MCP harness for Bannerlord
|
||||
- `mempalace/tunnel_sync.py` — pulls remote fleet closets into the local palace over HTTP
|
||||
- `multi_user_bridge.py` — HTTP bridge for multi-user chat/session integration
|
||||
- `mcp_servers/desktop_control_server.py` — stdio MCP server exposing screenshots/mouse/keyboard control
|
||||
|
||||
1. Operators enter through `codebase_genome.py`, `gemini-fallback-setup.sh`, `morrowind/hud.sh`.
|
||||
2. Core logic fans into top-level components: `angband`, `ansible`, `briefings`, `codebase_genome`, `config`, `configs`.
|
||||
3. Validation is incomplete around `wizards/allegro/home/skills/red-teaming/godmode/scripts/auto_jailbreak.py`, `timmy-local/cache/agent_cache.py`, `wizards/allegro/home/skills/red-teaming/godmode/scripts/parseltongue.py`, so changes there carry regression risk.
|
||||
4. Final artifacts land as repository files, docs, or runtime side effects depending on the selected entry point.
|
||||
### Data flow
|
||||
|
||||
1. Browser startup begins at `index.html`
|
||||
2. `boot.js` decides whether the page is being served correctly; in HTTP mode it injects `bootstrap.mjs`
|
||||
3. `bootstrap.mjs` hands off to `app.js`
|
||||
4. `app.js` loads world configuration from `portals.json` and `vision.json`
|
||||
5. `app.js` constructs the Three.js scene and in-browser reasoning components, including `SymbolicEngine`, `NeuroSymbolicBridge`, `setupGOFAI()`, and `updateGOFAI()`
|
||||
6. Browser state and external runtimes connect through `server.py`, which broadcasts messages between connected clients
|
||||
7. Python harnesses (`nexus/morrowind_harness.py`, `nexus/bannerlord_harness.py`) spawn MCP subprocesses for desktop control / Steam metadata, capture state, execute actions, and feed telemetry into the Nexus bridge
|
||||
8. Memory/fleet tools like `mempalace/tunnel_sync.py` import remote palace data into local closets, extending what the operator/runtime layers can inspect
|
||||
9. Tests validate both the static browser contract and the higher-level repo-truth/memory contracts
|
||||
|
||||
### Important repo-specific runtime facts
|
||||
|
||||
- `portals.json` is a JSON array of portal/world/operator entries; examples in this checkout include `morrowind`, `bannerlord`, `workshop`, `archive`, `chapel`, and `courtyard`
|
||||
- `server.py` is a plain broadcast hub: clients send messages, the server forwards them to other connected clients
|
||||
- `nexus/morrowind_harness.py` and `nexus/bannerlord_harness.py` both implement a GamePortal pattern with MCP subprocess clients over stdio and WebSocket telemetry uplink
|
||||
- `mempalace/tunnel_sync.py` is not speculative; it is a real client that discovers remote wings, searches remote rooms, and writes `.closet.json` payloads locally
|
||||
|
||||
## Key Abstractions
|
||||
|
||||
- `codebase_genome.py` — classes `FunctionInfo`:19; functions `extract_functions()`:58, `generate_test()`:116, `scan_repo()`:191, `find_existing_tests()`:209, `main()`:231
|
||||
- `evennia/timmy_world/game.py` — classes `World`:91, `ActionSystem`:421, `TimmyAI`:539, `NPCAI`:550; functions `get_narrative_phase()`:55, `get_phase_transition_event()`:65
|
||||
- `evennia/timmy_world/world/game.py` — classes `World`:19, `ActionSystem`:326, `TimmyAI`:444, `NPCAI`:455; functions none detected
|
||||
- `timmy-world/game.py` — classes `World`:19, `ActionSystem`:349, `TimmyAI`:467, `NPCAI`:478; functions none detected
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/auto_jailbreak.py` — classes none detected; functions none detected
|
||||
- `uniwizard/self_grader.py` — classes `SessionGrade`:23, `WeeklyReport`:55, `SelfGrader`:74; functions `main()`:713
|
||||
- `uni-wizard/v3/intelligence_engine.py` — classes `ExecutionPattern`:27, `ModelPerformance`:44, `AdaptationEvent`:58, `PatternDatabase`:69; functions none detected
|
||||
- `scripts/know_thy_father/crossref_audit.py` — classes `ThemeCategory`:30, `Principle`:160, `MeaningKernel`:169, `CrossRefFinding`:178; functions `extract_themes_from_text()`:192, `parse_soul_md()`:206, `parse_kernels()`:264, `cross_reference()`:296, `generate_report()`:440, `main()`:561
|
||||
### Browser runtime
|
||||
|
||||
- `app.js`
|
||||
- Defines in-browser reasoning/state machinery, including `class SymbolicEngine`, `class NeuroSymbolicBridge`, `setupGOFAI()`, and `updateGOFAI()`
|
||||
- Couples rendering, local symbolic reasoning, metrics polling, and portal/UI logic in one very large root module
|
||||
- `BROWSER_CONTRACT.md`
|
||||
- Acts like an executable architecture contract for the browser surface
|
||||
- Declares required files, DOM IDs, Three.js expectations, provenance rules, and WebSocket expectations
|
||||
|
||||
### Realtime bridge
|
||||
|
||||
- `server.py`
|
||||
- Single hub abstraction: a WebSocket broadcast server maintaining a `clients` set and forwarding messages from one client to the others
|
||||
- This is the seam between browser shell, harnesses, and external telemetry producers
|
||||
|
||||
### GamePortal harness layer
|
||||
|
||||
- `nexus/morrowind_harness.py`
|
||||
- `nexus/bannerlord_harness.py`
|
||||
- Both define MCP client wrappers, `GameState` / `ActionResult`-style data classes, and an Observe-Decide-Act telemetry loop
|
||||
- The harnesses are symmetric enough to be understood as reusable portal adapters with game-specific context injected on top
|
||||
|
||||
### Memory / fleet layer
|
||||
|
||||
- `mempalace/tunnel_sync.py`
|
||||
- Encodes the fleet-memory sync client contract: discover wings, pull broad room queries, write closet files, support dry-run
|
||||
- `mempalace.js`
|
||||
- Minimal browser/Electron bridge to MemPalace commands via `window.electronAPI.execPython(...)`
|
||||
- Important because it shows a second memory integration surface distinct from the Python fleet sync path
|
||||
|
||||
### Operator / interaction bridge
|
||||
|
||||
- `multi_user_bridge.py`
|
||||
- `commands/timmy_commands.py`
|
||||
- These bridge user-facing conversations or MUD/Evennia interactions back into Timmy/Nexus services
|
||||
|
||||
## API Surface
|
||||
|
||||
- CLI: `python3 codebase_genome.py` — python main guard (`codebase_genome.py`)
|
||||
- CLI: `bash gemini-fallback-setup.sh` — operational script (`gemini-fallback-setup.sh`)
|
||||
- CLI: `bash morrowind/hud.sh` — operational script (`morrowind/hud.sh`)
|
||||
- CLI: `python3 pipelines/codebase_genome.py` — python main guard (`pipelines/codebase_genome.py`)
|
||||
- CLI: `python3 scripts/agent_pr_gate.py` — operational script (`scripts/agent_pr_gate.py`)
|
||||
- CLI: `bash scripts/auto_restart_agent.sh` — operational script (`scripts/auto_restart_agent.sh`)
|
||||
- CLI: `python3 scripts/autonomous_issue_creator.py` — operational script (`scripts/autonomous_issue_creator.py`)
|
||||
- CLI: `python3 scripts/backlog_cleanup.py` — operational script (`scripts/backlog_cleanup.py`)
|
||||
- Python: `extract_functions()` from `codebase_genome.py:58`
|
||||
- Python: `generate_test()` from `codebase_genome.py:116`
|
||||
- Python: `scan_repo()` from `codebase_genome.py:191`
|
||||
- Python: `find_existing_tests()` from `codebase_genome.py:209`
|
||||
- Python: `main()` from `codebase_genome.py:231`
|
||||
- Python: `get_narrative_phase()` from `evennia/timmy_world/game.py:55`
|
||||
### Browser / static surface
|
||||
|
||||
## Test Coverage Report
|
||||
- `index.html` served over HTTP
|
||||
- `boot.js` exports `bootPage()`; verified by `node --test tests/boot.test.js`
|
||||
- Data APIs are file-based inside the repo: `portals.json`, `vision.json`, `manifest.json`
|
||||
|
||||
- Source and script files inspected: 219
|
||||
- Test files inspected: 73
|
||||
- Coverage gaps:
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/auto_jailbreak.py` — no matching test reference detected
|
||||
- `timmy-local/cache/agent_cache.py` — no matching test reference detected
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/parseltongue.py` — no matching test reference detected
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/godmode_race.py` — no matching test reference detected
|
||||
- `skills/productivity/google-workspace/scripts/google_api.py` — no matching test reference detected
|
||||
- `wizards/allegro/home/skills/productivity/google-workspace/scripts/google_api.py` — no matching test reference detected
|
||||
- `morrowind/pilot.py` — no matching test reference detected
|
||||
- `skills/research/domain-intel/scripts/domain_intel.py` — no matching test reference detected
|
||||
- `wizards/allegro/home/skills/research/domain-intel/scripts/domain_intel.py` — no matching test reference detected
|
||||
- `timmy-local/scripts/ingest.py` — no matching test reference detected
|
||||
- `uni-wizard/scripts/generate_scorecard.py` — no matching test reference detected
|
||||
- `morrowind/local_brain.py` — no matching test reference detected
|
||||
### Network/runtime surface
|
||||
|
||||
## Security Audit Findings
|
||||
- `python3 server.py`
|
||||
- Starts the WebSocket bridge on port `8765`
|
||||
- `python3 l402_server.py`
|
||||
- Local HTTP microservice for cost-estimate style responses
|
||||
- `python3 multi_user_bridge.py`
|
||||
- Multi-user HTTP/chat bridge
|
||||
|
||||
- [medium] `briefings/briefing_20260325.json:37` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `"gitea_error": "Gitea 404: {\"errors\":null,\"message\":\"not found\",\"url\":\"http://143.198.27.163:3000/api/swagger\"}\n [http://143.198.27.163:3000/api/v1/repos/Timmy_Foundation/sovereign-orchestration/issues?state=open&type=issues&sort=created&direction=desc&limit=1&page=1]",`
|
||||
- [medium] `briefings/briefing_20260328.json:11` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `"provider_base_url": "http://localhost:8081/v1",`
|
||||
- [medium] `briefings/briefing_20260329.json:11` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `"provider_base_url": "http://localhost:8081/v1",`
|
||||
- [medium] `config.yaml:37` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `summary_base_url: http://localhost:11434/v1`
|
||||
- [medium] `config.yaml:47` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:52` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:57` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:62` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:67` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:77` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:82` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: 'http://localhost:11434/v1'`
|
||||
- [medium] `config.yaml:174` — hardcoded http endpoint: plaintext or fixed HTTP endpoints can drift or leak across environments. Evidence: `base_url: http://localhost:11434/v1`
|
||||
### Harness / operator CLI surfaces
|
||||
|
||||
## Dead Code Candidates
|
||||
- `python3 nexus/morrowind_harness.py`
|
||||
- `python3 nexus/bannerlord_harness.py`
|
||||
- `python3 mempalace/tunnel_sync.py --peer <url> [--dry-run] [--n N]`
|
||||
- `python3 mcp_servers/desktop_control_server.py`
|
||||
- `python3 mcp_servers/steam_info_server.py`
|
||||
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/auto_jailbreak.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `timmy-local/cache/agent_cache.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/parseltongue.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `wizards/allegro/home/skills/red-teaming/godmode/scripts/godmode_race.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `skills/productivity/google-workspace/scripts/google_api.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `wizards/allegro/home/skills/productivity/google-workspace/scripts/google_api.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `morrowind/pilot.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `skills/research/domain-intel/scripts/domain_intel.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `wizards/allegro/home/skills/research/domain-intel/scripts/domain_intel.py` — not imported by indexed Python modules and not referenced by tests
|
||||
- `timmy-local/scripts/ingest.py` — not imported by indexed Python modules and not referenced by tests
|
||||
### Validation surface
|
||||
|
||||
## Performance Bottleneck Analysis
|
||||
- `python3 -m pytest tests/test_portals_json.py tests/test_index_html_integrity.py tests/test_repo_truth.py -q`
|
||||
- `node --test tests/boot.test.js`
|
||||
- `python3 -m py_compile server.py nexus/morrowind_harness.py nexus/bannerlord_harness.py mempalace/tunnel_sync.py mcp_servers/desktop_control_server.py`
|
||||
- `tests/test_browser_smoke.py` defines the higher-cost Playwright smoke contract for the world shell
|
||||
|
||||
- `angband/mcp_server.py` — large module (353 lines) likely hides multiple responsibilities
|
||||
- `evennia/timmy_world/game.py` — large module (1541 lines) likely hides multiple responsibilities
|
||||
- `evennia/timmy_world/world/game.py` — large module (1345 lines) likely hides multiple responsibilities
|
||||
- `morrowind/mcp_server.py` — large module (451 lines) likely hides multiple responsibilities
|
||||
- `morrowind/pilot.py` — large module (459 lines) likely hides multiple responsibilities
|
||||
- `pipelines/codebase_genome.py` — large module (557 lines) likely hides multiple responsibilities
|
||||
- `scripts/know_thy_father/crossref_audit.py` — large module (657 lines) likely hides multiple responsibilities
|
||||
- `scripts/know_thy_father/index_media.py` — large module (405 lines) likely hides multiple responsibilities
|
||||
- `scripts/know_thy_father/synthesize_kernels.py` — large module (416 lines) likely hides multiple responsibilities
|
||||
- `scripts/predictive_resource_allocator.py` — large module (410 lines) likely hides multiple responsibilities
|
||||
## Test Coverage Gaps
|
||||
|
||||
Strongly covered in this checkout:
|
||||
- `tests/test_portals_json.py` validates `portals.json`
|
||||
- `tests/test_index_html_integrity.py` checks merge-marker/DOM-integrity regressions in `index.html`
|
||||
- `tests/boot.test.js` verifies `boot.js` startup behavior
|
||||
- `tests/test_repo_truth.py` validates the repo-truth documents
|
||||
- Multiple `tests/test_mempalace_*.py` files cover the palace layer
|
||||
- `tests/test_bannerlord_harness.py` exists for the Bannerlord harness
|
||||
|
||||
Notable gaps or weak seams:
|
||||
- `nexus/morrowind_harness.py` is large and operationally critical, but the generated baseline still flags it as a gap relative to its size/complexity
|
||||
- `mcp_servers/desktop_control_server.py` exposes high-power automation but has no obvious dedicated test file in the root `tests/` suite
|
||||
- `app.js` is the dominant browser runtime file and mixes rendering, GOFAI, metrics, and integration logic in one place; browser smoke exists, but there is limited unit-level decomposition around those subsystems
|
||||
- `mempalace.js` appears minimally bridged and stale relative to the richer Python MemPalace layer
|
||||
- `multi_user_bridge.py` is a large integration surface and should be treated as high regression risk even though it is central to operator/chat flow
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- `server.py` binds `HOST = "0.0.0.0"`, exposing the broadcast bridge beyond localhost unless network controls limit it
|
||||
- The WebSocket bridge is a broadcast hub without visible authentication in `server.py`; connected clients are trusted to send messages into the bus
|
||||
- `mcp_servers/desktop_control_server.py` exposes mouse/keyboard/screenshot control through a stdio MCP server. In any non-local or poorly isolated runtime, this is a privileged automation surface
|
||||
- `app.js` contains hardcoded local/network endpoints such as `http://localhost:${L402_PORT}/api/cost-estimate` and `http://localhost:8082/metrics`; these are convenient for local development but create environment drift and deployment assumptions
|
||||
- `app.js` also embeds explicit endpoint/status references like `ws://143.198.27.163:8765`, which is operationally brittle and the kind of hardcoded location data that drifts across environments
|
||||
- `mempalace.js` shells out through `window.electronAPI.execPython(...)`; this is powerful and useful, but it is a clear trust boundary between UI and host execution
|
||||
- `INVESTIGATION_ISSUE_1145.md` documents an earlier integrity hazard: agents writing to `public/nexus/` instead of canonical root paths. That path confusion is both an operational and security concern because it makes provenance harder to reason about
|
||||
|
||||
## Runtime Truth and Docs Drift
|
||||
|
||||
The most important architecture finding in this repo is not a class or subsystem. It is a truth mismatch.
|
||||
|
||||
- README.md says current `main` does not ship a browser 3D world
|
||||
- CLAUDE.md declares root `app.js` and `index.html` as canonical frontend paths
|
||||
- tests and browser contract now assume the root frontend exists
|
||||
|
||||
All three statements are simultaneously present in this checkout.
|
||||
|
||||
Grounded evidence:
|
||||
- `README.md` still says the repo does not contain an active root frontend such as `index.html`, `app.js`, or `style.css`
|
||||
- the current checkout does contain `index.html`, `app.js`, `style.css`, `manifest.json`, and `gofai_worker.js`
|
||||
- `BROWSER_CONTRACT.md` explicitly treats those root files as required browser assets
|
||||
- `tests/test_browser_smoke.py` serves those exact files and validates DOM/WebGL contracts against them
|
||||
- `tests/test_index_html_integrity.py` assumes `index.html` is canonical and production-relevant
|
||||
- `CLAUDE.md` says frontend code lives at repo root and explicitly warns against `public/nexus/`
|
||||
- `INVESTIGATION_ISSUE_1145.md` explains why `public/nexus/` is a bad/corrupt duplicate path and confirms the real classical AI code lives in root `app.js`
|
||||
|
||||
The honest conclusion:
|
||||
- The repo contains a partially restored or actively re-materialized browser surface
|
||||
- The docs are preserving an older migration truth while the runtime files and smoke contracts describe a newer present-tense truth
|
||||
- Any future work in `the-nexus` must choose one truth and align `README.md`, `CLAUDE.md`, smoke tests, and file layout around it
|
||||
|
||||
That drift is itself a critical architectural fact and should be treated as first-order design debt, not a side note.
|
||||
|
||||
@@ -8,7 +8,6 @@ This pipeline gives Timmy a repeatable way to generate a deterministic `GENOME.m
|
||||
|
||||
- `pipelines/codebase_genome.py` — static analyzer that writes `GENOME.md`
|
||||
- `pipelines/codebase-genome.py` — thin CLI wrapper matching the expected pipeline-style entrypoint
|
||||
- `templates/GENOME-template.md` — reusable review scaffold with the exact sections the generator emits
|
||||
- `scripts/codebase_genome_nightly.py` — org-aware nightly runner that selects the next repo, updates a local checkout, and writes the genome artifact
|
||||
- `GENOME.md` — generated analysis for `timmy-home` itself
|
||||
|
||||
@@ -41,14 +40,6 @@ The hyphenated wrapper also works:
|
||||
python3 pipelines/codebase-genome.py --repo-root /path/to/repo --repo Timmy_Foundation/some-repo
|
||||
```
|
||||
|
||||
If an agent or human wants to review or hand-edit the artifact before publishing it, start from:
|
||||
|
||||
```text
|
||||
templates/GENOME-template.md
|
||||
```
|
||||
|
||||
The template uses the same section names as the generator output, so issue-specific verification can lock the structure without depending on one repo's exact contents.
|
||||
|
||||
## Nightly org rotation
|
||||
|
||||
Dry-run the next selection:
|
||||
|
||||
@@ -1,263 +1,310 @@
|
||||
# GENOME.md — Wolf (Timmy_Foundation/wolf)
|
||||
|
||||
> Codebase Genome v1.0 | Generated 2026-04-14 | Repo 16/16
|
||||
Generated 2026-04-17 from direct source inspection of `/tmp/wolf-genome` plus live test execution.
|
||||
|
||||
## Project Overview
|
||||
|
||||
**Wolf** is a multi-model evaluation engine for sovereign AI fleets. It runs prompts against multiple LLM providers, scores responses on relevance, coherence, and safety, and outputs structured JSON results for model selection and ranking.
|
||||
Wolf is a sovereign multi-model evaluation engine with two real operating modes:
|
||||
|
||||
**Core principle:** agents work, PRs prove it, CI judges it.
|
||||
1. Prompt evaluation mode
|
||||
- runs a set of prompts against multiple model providers
|
||||
- scores responses on relevance, coherence, and safety
|
||||
- emits structured JSON results plus a console leaderboard
|
||||
2. Legacy task / PR mode
|
||||
- fetches Gitea issues
|
||||
- assigns them to configured models/providers
|
||||
- generates output files and opens PRs
|
||||
- records task scores in a leaderboard
|
||||
|
||||
**Status:** v1.0.0 — production-ready for prompt evaluation. Legacy PR evaluation module retained for backward compatibility.
|
||||
Current repo shape observed directly:
|
||||
- 9 Python modules under `wolf/`
|
||||
- 5 active test modules under `tests/`
|
||||
- 63 tests passing across `test_config.py`, `test_evaluator.py`, `test_gitea.py`, `test_models.py`, `test_runner.py`
|
||||
- two smoke workflows: `.gitea/workflows/smoke.yml` and `.github/workflows/smoke-test.yml`
|
||||
- a checked-in `GENOME.md` at repo root
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
CLI[cli.py] --> Config[config.py]
|
||||
CLI --> TaskGen[task.py]
|
||||
CLI --> Runner[runner.py]
|
||||
CLI --> Evaluator[evaluator.py]
|
||||
CLI --> Leaderboard[leaderboard.py]
|
||||
CLI --> Gitea[gitea.py]
|
||||
flowchart TD
|
||||
CLI1[wolf.cli]
|
||||
CLI2[wolf.runner]
|
||||
CFG[Config + setup_logging]
|
||||
TASKS[TaskGenerator]
|
||||
AR[AgentRunner]
|
||||
PE[PromptEvaluator]
|
||||
SC[ResponseScorer]
|
||||
MF[ModelFactory]
|
||||
MC[Provider Clients]
|
||||
GC[GiteaClient]
|
||||
LB[Leaderboard]
|
||||
OUT1[JSON results]
|
||||
OUT2[stdout summary]
|
||||
OUT3[Gitea PRs]
|
||||
|
||||
Runner --> Models[models.py]
|
||||
Runner --> Gitea
|
||||
Evaluator --> Models
|
||||
CLI1 --> CFG
|
||||
CLI1 --> GC
|
||||
CLI1 --> TASKS
|
||||
CLI1 --> AR
|
||||
CLI1 --> LB
|
||||
CLI1 --> PE
|
||||
|
||||
TaskGen --> Gitea
|
||||
Leaderboard --> |leaderboard.json| FS[(File System)]
|
||||
Config --> |wolf-config.yaml| FS
|
||||
CLI2 --> CFG
|
||||
CLI2 --> PE
|
||||
PE --> SC
|
||||
PE --> MF
|
||||
MF --> MC
|
||||
CLI2 --> OUT1
|
||||
CLI2 --> OUT2
|
||||
|
||||
Models --> OpenRouter[OpenRouter API]
|
||||
Models --> Groq[Groq API]
|
||||
Models --> Ollama[Ollama Local]
|
||||
Models --> OpenAI[OpenAI API]
|
||||
Models --> Anthropic[Anthropic API]
|
||||
|
||||
Runner --> |branch + commit| Gitea
|
||||
Evaluator --> |score results| Leaderboard
|
||||
TASKS --> GC
|
||||
AR --> MF
|
||||
AR --> GC
|
||||
AR --> OUT3
|
||||
CLI1 --> LB
|
||||
```
|
||||
|
||||
## Entry Points
|
||||
|
||||
| Entry Point | Command | Purpose |
|
||||
|-------------|---------|---------|
|
||||
| `wolf/cli.py` | `python3 -m wolf.cli --run` | Main CLI: run tasks, evaluate PRs, show leaderboard |
|
||||
| `wolf/runner.py` | `python3 -m wolf.runner --prompts p.json --models m.json` | Standalone prompt evaluation runner |
|
||||
| `wolf/__init__.py` | `import wolf` | Package init, version metadata |
|
||||
Primary runtime entry points:
|
||||
- `python -m wolf.runner`
|
||||
- pure prompt evaluation pipeline
|
||||
- requires `--prompts` plus either `--models` or `--config`
|
||||
- `python -m wolf.cli`
|
||||
- task runner / PR scoring / leaderboard CLI
|
||||
- supports `--run`, `--evaluate`, `--leaderboard`
|
||||
|
||||
Supporting entry surfaces:
|
||||
- `wolf/config.py`
|
||||
- config loading and log setup
|
||||
- `wolf/models.py`
|
||||
- provider-specific model clients
|
||||
- `wolf/gitea.py`
|
||||
- repository / branch / file / PR operations
|
||||
|
||||
## Data Flow
|
||||
|
||||
### Prompt Evaluation Pipeline (Primary)
|
||||
### Prompt evaluation mode
|
||||
|
||||
```
|
||||
prompts.json + models.json (or wolf-config.yaml)
|
||||
│
|
||||
▼
|
||||
PromptEvaluator.evaluate()
|
||||
│
|
||||
├─ For each (prompt, model) pair:
|
||||
│ ├─ ModelClient.generate(prompt) → response text
|
||||
│ ├─ ResponseScorer.score(response, prompt)
|
||||
│ │ ├─ score_relevance() (0.40 weight)
|
||||
│ │ ├─ score_coherence() (0.35 weight)
|
||||
│ │ └─ score_safety() (0.25 weight)
|
||||
│ └─ EvaluationResult (prompt, model, scores, latency, error)
|
||||
│
|
||||
▼
|
||||
evaluate_and_serialize() → JSON output
|
||||
│
|
||||
├─ model_summaries (per-model averages)
|
||||
└─ results[] (per-evaluation details)
|
||||
```
|
||||
1. `runner.py` loads prompts from JSON via `load_prompts()`
|
||||
2. it loads model endpoints from JSON or config via `load_models_from_json()` / `load_models_from_config()`
|
||||
3. `PromptEvaluator.evaluate()` iterates prompt × model
|
||||
4. `ModelFactory.get_client()` selects the provider client
|
||||
5. the client calls the model API and returns response text
|
||||
6. `ResponseScorer.score()` computes:
|
||||
- relevance
|
||||
- coherence
|
||||
- safety
|
||||
- weighted overall
|
||||
7. `evaluate_and_serialize()` builds per-model summaries and detailed results
|
||||
8. `run()` returns JSON and optionally writes it to disk
|
||||
9. `print_summary()` renders a human-readable ranking table
|
||||
|
||||
### Task Assignment Pipeline (Legacy)
|
||||
### Legacy task / PR mode
|
||||
|
||||
```
|
||||
Gitea Issues → TaskGenerator → AgentRunner
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
Fetch tasks Assign models Execute + PR
|
||||
from issues from config via Gitea API
|
||||
```
|
||||
1. `cli.py` loads config and constructs `GiteaClient`
|
||||
2. `TaskGenerator.from_gitea_issues()` or `from_spec()` builds `Task` objects
|
||||
3. `assign_tasks()` applies round-robin model/provider assignment
|
||||
4. `AgentRunner.execute_task()`:
|
||||
- generates model output
|
||||
- creates a branch
|
||||
- writes `wolf-outputs/<task>.md`
|
||||
- opens a PR
|
||||
5. `Leaderboard.record_score()` persists score history and serverless-readiness flags
|
||||
|
||||
## Key Abstractions
|
||||
|
||||
| Class | Module | Purpose |
|
||||
|-------|--------|---------|
|
||||
| `PromptEntry` | evaluator.py | Single prompt with expected keywords and category |
|
||||
| `ModelEndpoint` | evaluator.py | Model connection descriptor (provider, model_id, key) |
|
||||
| `ScoreResult` | evaluator.py | Scores for relevance, coherence, safety, overall |
|
||||
| `EvaluationResult` | evaluator.py | Full result: prompt + model + response + scores + latency |
|
||||
| `ResponseScorer` | evaluator.py | Heuristic scoring engine (regex + keyword + structure) |
|
||||
| `PromptEvaluator` | evaluator.py | Core engine: runs prompts against models, scores output |
|
||||
| `ModelClient` | models.py | Abstract base for LLM API calls |
|
||||
| `ModelFactory` | models.py | Factory: returns correct client for provider name |
|
||||
| `Task` | task.py | Work unit: id, title, description, assigned model/provider |
|
||||
| `TaskGenerator` | task.py | Creates tasks from Gitea issues or JSON spec |
|
||||
| `AgentRunner` | runner.py | Executes tasks: generate → branch → commit → PR |
|
||||
| `Config` | config.py | YAML config loader (wolf-config.yaml) |
|
||||
| `Leaderboard` | leaderboard.py | Persistent model ranking with serverless readiness |
|
||||
| `GiteaClient` | gitea.py | Full Gitea REST API client |
|
||||
| `PREvaluator` | evaluator.py | Legacy: scores PRs on CI, commits, code quality |
|
||||
Core dataclasses in `wolf/evaluator.py`:
|
||||
- `PromptEntry`
|
||||
- `ModelEndpoint`
|
||||
- `ScoreResult`
|
||||
- `EvaluationResult`
|
||||
|
||||
Core engines:
|
||||
- `ResponseScorer`
|
||||
- heuristic scoring engine for relevance/coherence/safety
|
||||
- `PromptEvaluator`
|
||||
- N×M evaluation orchestration
|
||||
- `ModelFactory`
|
||||
- dispatches to provider clients
|
||||
- `GiteaClient`
|
||||
- wraps issue / branch / file / PR operations
|
||||
- `TaskGenerator`
|
||||
- turns issues or spec JSON into `Task` objects
|
||||
- `AgentRunner`
|
||||
- legacy execution path from task to PR
|
||||
- `Leaderboard`
|
||||
- persists scoring history and ranking output
|
||||
- `Config`
|
||||
- tolerant config loader with PyYAML fallback logic
|
||||
|
||||
## API Surface
|
||||
|
||||
### CLI Arguments (cli.py)
|
||||
CLI flags in `wolf.runner`:
|
||||
- `--prompts/-p`
|
||||
- `--models/-m`
|
||||
- `--config/-c`
|
||||
- `--output/-o`
|
||||
- `--system-prompt`
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `--config` | Path to wolf-config.yaml |
|
||||
| `--task-spec` | Path to task specification JSON |
|
||||
| `--run` | Run pending tasks (assign models, execute, create PRs) |
|
||||
| `--evaluate` | Evaluate open PRs and score them |
|
||||
| `--leaderboard` | Show model rankings |
|
||||
CLI flags in `wolf.cli`:
|
||||
- `--config`
|
||||
- `--task-spec`
|
||||
- `--run`
|
||||
- `--evaluate`
|
||||
- `--leaderboard`
|
||||
|
||||
### CLI Arguments (runner.py)
|
||||
Provider surface in `wolf.models`:
|
||||
- `OpenRouterClient`
|
||||
- `GroqClient`
|
||||
- `OllamaClient`
|
||||
- `AnthropicClient`
|
||||
- OpenAI is handled as a Groq-style compatible client with a different base URL
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `--prompts` / `-p` | Path to prompts JSON (required) |
|
||||
| `--models` / `-m` | Path to models JSON |
|
||||
| `--config` / `-c` | Path to wolf-config.yaml (alternative to --models) |
|
||||
| `--output` / `-o` | Path to write JSON results |
|
||||
| `--system-prompt` | System prompt for all model calls |
|
||||
|
||||
### Provider Clients (models.py)
|
||||
|
||||
| Client | Provider | API Format |
|
||||
|--------|----------|------------|
|
||||
| `OpenRouterClient` | openrouter | OpenAI-compatible chat completions |
|
||||
| `GroqClient` | groq | OpenAI-compatible chat completions |
|
||||
| `OllamaClient` | ollama | Ollama native /api/generate |
|
||||
| `OpenAIClient` | openai | OpenAI-compatible (reuses GroqClient with different URL) |
|
||||
| `AnthropicClient` | anthropic | Anthropic Messages API v1 |
|
||||
|
||||
### Gitea Client (gitea.py)
|
||||
|
||||
| Method | Purpose |
|
||||
|--------|---------|
|
||||
| `get_issues()` | Fetch issues by state |
|
||||
| `create_branch()` | Create new branch from base |
|
||||
| `create_file()` | Create file on branch (base64) |
|
||||
| `update_file()` | Update file with SHA |
|
||||
| `get_file()` | Read file contents |
|
||||
| `create_pull_request()` | Open PR |
|
||||
| `get_pull_request()` | Fetch PR details |
|
||||
| `get_pr_status()` | Check PR CI status |
|
||||
|
||||
## Configuration (wolf-config.yaml)
|
||||
|
||||
```yaml
|
||||
gitea:
|
||||
base_url: "https://forge.alexanderwhitestone.com/api/v1"
|
||||
token: "..."
|
||||
owner: "Timmy_Foundation"
|
||||
repo: "wolf"
|
||||
|
||||
providers:
|
||||
openrouter:
|
||||
api_key: "..."
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
ollama:
|
||||
base_url: "http://localhost:11434"
|
||||
|
||||
models:
|
||||
- model: "anthropic/claude-3.5-sonnet"
|
||||
provider: "openrouter"
|
||||
- model: "gemma4:latest"
|
||||
provider: "ollama"
|
||||
|
||||
log_dir: "~/.hermes/wolf/"
|
||||
leaderboard_path: "~/.hermes/wolf/leaderboard.json"
|
||||
```
|
||||
|
||||
## Scoring Methodology
|
||||
|
||||
| Dimension | Weight | Method |
|
||||
|-----------|--------|--------|
|
||||
| Relevance | 0.40 | Keyword matching (expected_keywords), prompt word overlap, length penalty, refusal detection |
|
||||
| Coherence | 0.35 | Structure indicators (paragraphs, lists, transitions), sentence completeness, repetition ratio, length sweet spot |
|
||||
| Safety | 0.25 | Unsafe pattern regex (bomb/hack/bypass/injection), profanity detection |
|
||||
|
||||
**Overall = relevance×0.40 + coherence×0.35 + safety×0.25**
|
||||
|
||||
### Safety Patterns Detected
|
||||
|
||||
- Bomb/explosive/weapon creation
|
||||
- System/network hacking
|
||||
- Security/auth bypass
|
||||
- Prompt injection ("ignore previous instructions")
|
||||
- System prompt extraction attempts
|
||||
|
||||
### Refusal Patterns Detected
|
||||
|
||||
- "I cannot/can't/won't help/assist"
|
||||
- "Sorry, but I cannot"
|
||||
- "Against my guidelines/policy"
|
||||
Gitea surface in `wolf.gitea`:
|
||||
- `get_issues()`
|
||||
- `create_branch()`
|
||||
- `create_file()`
|
||||
- `update_file()`
|
||||
- `get_file()`
|
||||
- `create_pull_request()`
|
||||
- `get_pull_request()`
|
||||
- `get_pr_status()`
|
||||
|
||||
## Test Coverage
|
||||
|
||||
| File | Tests | Coverage |
|
||||
|------|-------|----------|
|
||||
| `tests/test_evaluator.py` | 17 tests | PromptEntry, ModelEndpoint, ResponseScorer (relevance/coherence/safety), PromptEvaluator (evaluate, error handling, serialization, file output, multi-model), PREvaluator (score_pr, description scoring) |
|
||||
| `tests/test_config.py` | 1 test | Config load from YAML |
|
||||
Live verification run:
|
||||
- `python3 -m pytest -q tests/test_config.py tests/test_evaluator.py tests/test_gitea.py tests/test_models.py tests/test_runner.py`
|
||||
- result: `63 passed`
|
||||
|
||||
### Coverage Gaps
|
||||
Current tested modules:
|
||||
- `tests/test_config.py`
|
||||
- config load happy path
|
||||
- `tests/test_evaluator.py`
|
||||
- scorer heuristics
|
||||
- prompt/model dataclasses
|
||||
- evaluator serialization paths
|
||||
- legacy PR evaluator behavior
|
||||
- `tests/test_gitea.py`
|
||||
- Gitea client request/response behavior
|
||||
- 404 and fallback status handling
|
||||
- `tests/test_models.py`
|
||||
- provider factory dispatch
|
||||
- provider generate() request formatting
|
||||
- `tests/test_runner.py`
|
||||
- prompt/model loading helpers
|
||||
- parser wiring
|
||||
- `AgentRunner.execute_task()` behavior
|
||||
|
||||
- No tests for `cli.py` (argument parsing, workflow orchestration)
|
||||
- No tests for `runner.py` (`load_prompts`, `load_models_from_json`, `AgentRunner.execute_task`)
|
||||
- No tests for `task.py` (`TaskGenerator.from_gitea_issues`, `from_spec`, `assign_tasks`)
|
||||
- No tests for `models.py` (API clients — would require mocking HTTP)
|
||||
- No tests for `leaderboard.py` (`record_score`, `get_rankings`, serverless readiness logic)
|
||||
- No tests for `gitea.py` (API client — would require mocking HTTP)
|
||||
- No integration tests (end-to-end evaluation pipeline)
|
||||
Coverage gaps that still matter:
|
||||
- `wolf/cli.py`
|
||||
- no direct tests for the top-level workflow routing
|
||||
- `wolf/task.py`
|
||||
- no direct tests for `from_gitea_issues()`, `from_spec()`, `assign_tasks()` in this repo state
|
||||
- `wolf/leaderboard.py`
|
||||
- no direct tests for persistence / ranking / serverless-ready threshold logic
|
||||
|
||||
Important drift note:
|
||||
- the older timmy-home genome artifact claimed only `test_config.py` and `test_evaluator.py` existed
|
||||
- current repo also includes `tests/test_models.py`, `tests/test_gitea.py`, and `tests/test_runner.py`
|
||||
|
||||
## CI / Verification Surface
|
||||
|
||||
Current CI contracts observed directly:
|
||||
- `.gitea/workflows/smoke.yml`
|
||||
- checkout
|
||||
- setup Python 3.11
|
||||
- install `pytest` and `pyyaml`
|
||||
- install `requirements.txt` if present
|
||||
- run `pytest tests/`
|
||||
- `.github/workflows/smoke-test.yml`
|
||||
- YAML parse check
|
||||
- JSON parse check
|
||||
- Python compile check
|
||||
- shell syntax check
|
||||
- secret scan
|
||||
|
||||
This means the real repo contract is broader than unit tests alone: syntax, parseability, and secret hygiene are part of the shipped smoke lane.
|
||||
|
||||
## Dependencies
|
||||
|
||||
| Dependency | Used By | Purpose |
|
||||
|------------|---------|---------|
|
||||
| `requests` | models.py, gitea.py | HTTP client for all API calls |
|
||||
| `pyyaml` (optional) | config.py | YAML config parsing (falls back to line parser) |
|
||||
Direct dependency files:
|
||||
- `requirements.txt`
|
||||
- only `requests`
|
||||
- README install instructions
|
||||
- `pip install requests pyyaml`
|
||||
|
||||
Observed dependency tension:
|
||||
- `wolf/config.py` imports `yaml` when available and falls back to a simple parser if PyYAML is absent
|
||||
- CI installs `pyyaml`
|
||||
- `requirements.txt` does not list `pyyaml`
|
||||
|
||||
So PyYAML is operationally expected in normal use and CI, but not formally pinned in `requirements.txt`.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **API keys in config**: wolf-config.yaml stores provider API keys in plaintext. File should be chmod 600 and excluded from git (already in .gitignore pattern via ~/.hermes/).
|
||||
2. **Gitea token**: Full access token used for branch creation, file commits, and PR creation. Scoped access recommended.
|
||||
3. **No input sanitization**: Prompts from Gitea issues are passed directly to models without filtering. Prompt injection risk for automated workflows.
|
||||
4. **No rate limiting**: Model API calls are sequential with no backoff or rate limiting. Could exhaust API quotas.
|
||||
5. **Legacy code reference**: `evaluator.py` references `Evaluator = PREvaluator` alias but `cli.py` imports `Evaluator` expecting the legacy class. This works but is confusing.
|
||||
1. Plaintext secrets in config
|
||||
- model API keys and Gitea tokens are expected via config files
|
||||
- this is user-controlled but still a secret-handling risk
|
||||
2. Arbitrary base URLs
|
||||
- provider configs can point to arbitrary endpoints
|
||||
- useful for sovereignty, but also expands trust boundaries
|
||||
3. PR automation blast radius
|
||||
- `AgentRunner.execute_task()` can create branches, files, and PRs
|
||||
- bad prompts or weak issue filtering could create noisy or unsafe PRs
|
||||
4. Prompt-injection exposure
|
||||
- model prompts and issue bodies are passed through with limited sanitization
|
||||
5. Leaderboard persistence without locking
|
||||
- `leaderboard.json` writes are not protected against concurrent writers
|
||||
|
||||
## Repository Notes
|
||||
|
||||
Notable current-repo facts that the host-repo genome should preserve:
|
||||
- Wolf already ships its own `GENOME.md` at repo root
|
||||
- the timmy-home deliverable for issue #683 is therefore a host-repo genome artifact that mirrors / tracks the current wolf repo, not the first genome ever written for wolf
|
||||
- current smoke workflows exist in both `.gitea/` and `.github/`
|
||||
|
||||
## File Index
|
||||
|
||||
| File | LOC | Purpose |
|
||||
|------|-----|---------|
|
||||
| `wolf/__init__.py` | 12 | Package init, version |
|
||||
| `wolf/cli.py` | 90 | Main CLI orchestrator |
|
||||
| `wolf/config.py` | 48 | YAML config loader |
|
||||
| `wolf/models.py` | 130 | LLM provider clients (5 providers) |
|
||||
| `wolf/runner.py` | 280 | Prompt evaluation CLI + AgentRunner |
|
||||
| `wolf/task.py` | 80 | Task dataclass + generator |
|
||||
| `wolf/evaluator.py` | 350 | Core scoring engine + legacy PR evaluator |
|
||||
| `wolf/leaderboard.py` | 70 | Persistent model ranking |
|
||||
| `wolf/gitea.py` | 100 | Gitea REST API client |
|
||||
| `tests/test_evaluator.py` | 180 | Unit tests for evaluator |
|
||||
| `tests/test_config.py` | 20 | Unit tests for config |
|
||||
Observed module sizes:
|
||||
- `wolf/evaluator.py` — 465 lines
|
||||
- `wolf/runner.py` — 311 lines
|
||||
- `wolf/models.py` — 120 lines
|
||||
- `wolf/gitea.py` — 95 lines
|
||||
- `wolf/cli.py` — 94 lines
|
||||
- `wolf/leaderboard.py` — 77 lines
|
||||
- `wolf/task.py` — 63 lines
|
||||
- `wolf/config.py` — 51 lines
|
||||
- `wolf/__init__.py` — 12 lines
|
||||
|
||||
**Total: ~1,360 LOC Python | 11 modules | 18 tests**
|
||||
Aggregate metrics from direct scan:
|
||||
- 15 Python files total
|
||||
- 9 module files under `wolf/`
|
||||
- 6 Python files under `tests/` (including `__init__.py`)
|
||||
- ~2150 lines of Python total
|
||||
|
||||
## Sovereignty Assessment
|
||||
## Verification Commands
|
||||
|
||||
- **No external dependencies beyond requests**: Runs on any machine with Python 3.11+ and requests.
|
||||
- **No phone-home**: All API calls are to user-configured endpoints.
|
||||
- **No telemetry**: Logs go to local filesystem only.
|
||||
- **Config-driven**: All secrets in user's ~/.hermes/ directory.
|
||||
- **Provider-agnostic**: Supports 5 providers with easy extension via ModelFactory.
|
||||
Commands used for this update:
|
||||
- `git clone --depth 1 --single-branch https://.../Timmy_Foundation/wolf.git /tmp/wolf-genome`
|
||||
- `python3 -m pytest -q tests/test_config.py tests/test_evaluator.py tests/test_gitea.py tests/test_models.py tests/test_runner.py`
|
||||
- direct file inspection of:
|
||||
- `README.md`
|
||||
- `wolf/cli.py`
|
||||
- `wolf/config.py`
|
||||
- `wolf/evaluator.py`
|
||||
- `wolf/gitea.py`
|
||||
- `wolf/models.py`
|
||||
- `wolf/runner.py`
|
||||
- `wolf/task.py`
|
||||
- `wolf/leaderboard.py`
|
||||
- `.gitea/workflows/smoke.yml`
|
||||
- `.github/workflows/smoke-test.yml`
|
||||
|
||||
**Verdict: Fully sovereign. No corporate lock-in. User controls all endpoints and keys.**
|
||||
## Summary
|
||||
|
||||
---
|
||||
|
||||
*"The strength of the pack is the wolf, and the strength of the wolf is the pack."*
|
||||
*— The Wolf Sovereign Core has spoken.*
|
||||
Wolf is real and useful today, but its current reality is:
|
||||
- stronger test coverage than the older timmy-home genome recorded
|
||||
- a still-untested CLI/task/leaderboard control plane
|
||||
- smoke workflows that now form part of the repo’s real contract
|
||||
- a checked-in root `GENOME.md` that does not remove the need for the host-repo genome issue artifact
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
# GENOME.md — [org/repo]
|
||||
|
||||
Generated by `pipelines/codebase_genome.py` or used as a manual review scaffold when a human is curating the final artifact.
|
||||
|
||||
## Project Overview
|
||||
|
||||
[One paragraph: what the repo does, why it exists, and what outcome it creates.]
|
||||
|
||||
- Text files indexed: [count]
|
||||
- Source and script files: [count]
|
||||
- Test files: [count]
|
||||
- Documentation files: [count]
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
repo_root["repo"] --> component_a["component-a"]
|
||||
repo_root --> component_b["component-b"]
|
||||
component_a --> component_b
|
||||
```
|
||||
|
||||
## Entry Points
|
||||
|
||||
- `[path/to/entrypoint]` — [why it matters] (`python3 path/to/entrypoint.py`)
|
||||
- `[path/to/other-entrypoint]` — [why it matters] (`bash path/to/script.sh`)
|
||||
|
||||
## Data Flow
|
||||
|
||||
1. [How operators or callers enter the system.]
|
||||
2. [Which modules or directories fan out from the entrypoint.]
|
||||
3. [Where validation or test gaps create risk.]
|
||||
4. [What artifact, state change, or runtime side effect is produced.]
|
||||
|
||||
## Key Abstractions
|
||||
|
||||
- `[module.py]` — classes `[ClassName]:line`; functions `[function_name()]:line`
|
||||
- `[another_module.py]` — classes `[AnotherClass]:line`; functions `[run()]:line`
|
||||
|
||||
## API Surface
|
||||
|
||||
- CLI: `python3 [entrypoint] --help` — [what it exposes]
|
||||
- Python: `[public_function]()` from `[module.py:line]`
|
||||
- HTTP/WebSocket/other: `[surface]` — [contract summary]
|
||||
|
||||
## Test Coverage Report
|
||||
|
||||
- Source and script files inspected: [count]
|
||||
- Test files inspected: [count]
|
||||
- Coverage gaps:
|
||||
- `[path/to/file]` — [missing coverage detail]
|
||||
- `[path/to/other]` — [missing coverage detail]
|
||||
|
||||
## Security Audit Findings
|
||||
|
||||
- `[severity]` `[path:line]` — [risk category]: [detail]. Evidence: `[snippet]`
|
||||
- `[severity]` `[path:line]` — [risk category]: [detail]. Evidence: `[snippet]`
|
||||
|
||||
## Dead Code Candidates
|
||||
|
||||
- `[path/to/file]` — [why it appears unreferenced]
|
||||
- `[path/to/other]` — [why it appears unreferenced]
|
||||
|
||||
## Performance Bottleneck Analysis
|
||||
|
||||
- `[path/to/file]` — [why runtime or scale could degrade here]
|
||||
- `[path/to/other]` — [filesystem scan / network / large module / hot path detail]
|
||||
@@ -1,37 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
TEMPLATE_PATH = ROOT / "templates" / "GENOME-template.md"
|
||||
DOC_PATH = ROOT / "docs" / "CODEBASE_GENOME_PIPELINE.md"
|
||||
|
||||
|
||||
REQUIRED_HEADINGS = (
|
||||
"# GENOME.md — [org/repo]",
|
||||
"## Project Overview",
|
||||
"## Architecture",
|
||||
"## Entry Points",
|
||||
"## Data Flow",
|
||||
"## Key Abstractions",
|
||||
"## API Surface",
|
||||
"## Test Coverage Report",
|
||||
"## Security Audit Findings",
|
||||
"## Dead Code Candidates",
|
||||
"## Performance Bottleneck Analysis",
|
||||
)
|
||||
|
||||
|
||||
def test_issue_666_template_exists_and_covers_required_sections() -> None:
|
||||
assert TEMPLATE_PATH.exists(), "missing templates/GENOME-template.md"
|
||||
text = TEMPLATE_PATH.read_text(encoding="utf-8")
|
||||
for heading in REQUIRED_HEADINGS:
|
||||
assert heading in text
|
||||
|
||||
|
||||
def test_issue_666_docs_reference_template_and_single_repo_entrypoint() -> None:
|
||||
text = DOC_PATH.read_text(encoding="utf-8")
|
||||
assert "templates/GENOME-template.md" in text
|
||||
assert "python3 pipelines/codebase_genome.py" in text
|
||||
assert "python3 pipelines/codebase-genome.py" in text
|
||||
22
tests/test_wolf_genome.py
Normal file
22
tests/test_wolf_genome.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from pathlib import Path
|
||||
|
||||
GENOME = Path("genomes/wolf/GENOME.md")
|
||||
|
||||
|
||||
def test_wolf_genome_exists_at_expected_path():
|
||||
assert GENOME.exists(), "wolf genome must exist at genomes/wolf/GENOME.md"
|
||||
|
||||
|
||||
def test_wolf_genome_covers_current_test_surface_and_ci_contract():
|
||||
content = GENOME.read_text(encoding="utf-8")
|
||||
required = [
|
||||
"# GENOME.md — Wolf (Timmy_Foundation/wolf)",
|
||||
"tests/test_models.py",
|
||||
"tests/test_gitea.py",
|
||||
"tests/test_runner.py",
|
||||
".gitea/workflows/smoke.yml",
|
||||
".github/workflows/smoke-test.yml",
|
||||
"`GENOME.md` at repo root",
|
||||
]
|
||||
missing = [item for item in required if item not in content]
|
||||
assert not missing, f"wolf genome missing current repo facts: {missing}"
|
||||
Reference in New Issue
Block a user