Compare commits
5 Commits
mimo/code/
...
feat/memor
| Author | SHA1 | Date | |
|---|---|---|---|
| e7754ce101 | |||
| 2fa8b5d99b | |||
| bb856765ce | |||
| 1e110922b2 | |||
| b308e627b8 |
@@ -177,7 +177,7 @@ The rule is:
|
|||||||
- rescue good work from legacy Matrix
|
- rescue good work from legacy Matrix
|
||||||
- rebuild inside `the-nexus`
|
- rebuild inside `the-nexus`
|
||||||
- keep telemetry and durable truth flowing through the Hermes harness
|
- keep telemetry and durable truth flowing through the Hermes harness
|
||||||
- Hermes is the sole harness — no external gateway dependencies
|
- keep OpenClaw as a sidecar, not the authority
|
||||||
|
|
||||||
## Verified historical browser-world snapshot
|
## Verified historical browser-world snapshot
|
||||||
|
|
||||||
|
|||||||
13
app.js
13
app.js
@@ -1,4 +1,4 @@
|
|||||||
import ResonanceVisualizer from './nexus/components/resonance-visualizer.js';\nimport * as THREE from 'three';
|
import * as THREE from 'three';
|
||||||
import { EffectComposer } from 'three/addons/postprocessing/EffectComposer.js';
|
import { EffectComposer } from 'three/addons/postprocessing/EffectComposer.js';
|
||||||
import { RenderPass } from 'three/addons/postprocessing/RenderPass.js';
|
import { RenderPass } from 'three/addons/postprocessing/RenderPass.js';
|
||||||
import { UnrealBloomPass } from 'three/addons/postprocessing/UnrealBloomPass.js';
|
import { UnrealBloomPass } from 'three/addons/postprocessing/UnrealBloomPass.js';
|
||||||
@@ -7,7 +7,6 @@ import { SpatialMemory } from './nexus/components/spatial-memory.js';
|
|||||||
import { MemoryBirth } from './nexus/components/memory-birth.js';
|
import { MemoryBirth } from './nexus/components/memory-birth.js';
|
||||||
import { MemoryOptimizer } from './nexus/components/memory-optimizer.js';
|
import { MemoryOptimizer } from './nexus/components/memory-optimizer.js';
|
||||||
import { MemoryInspect } from './nexus/components/memory-inspect.js';
|
import { MemoryInspect } from './nexus/components/memory-inspect.js';
|
||||||
import { MemoryPulse } from './nexus/components/memory-pulse.js';
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════
|
// ═══════════════════════════════════════════
|
||||||
// NEXUS v1.1 — Portal System Update
|
// NEXUS v1.1 — Portal System Update
|
||||||
@@ -597,7 +596,7 @@ class PSELayer {
|
|||||||
|
|
||||||
let pseLayer;
|
let pseLayer;
|
||||||
|
|
||||||
let resonanceViz, metaLayer, neuroBridge, cbr, symbolicPlanner, knowledgeGraph, blackboard, symbolicEngine, calibrator;
|
let metaLayer, neuroBridge, cbr, symbolicPlanner, knowledgeGraph, blackboard, symbolicEngine, calibrator;
|
||||||
let agentFSMs = {};
|
let agentFSMs = {};
|
||||||
|
|
||||||
function setupGOFAI() {
|
function setupGOFAI() {
|
||||||
@@ -666,7 +665,7 @@ async function init() {
|
|||||||
scene = new THREE.Scene();
|
scene = new THREE.Scene();
|
||||||
scene.fog = new THREE.FogExp2(0x050510, 0.012);
|
scene.fog = new THREE.FogExp2(0x050510, 0.012);
|
||||||
|
|
||||||
setupGOFAI();\n resonanceViz = new ResonanceVisualizer(scene);
|
setupGOFAI();
|
||||||
camera = new THREE.PerspectiveCamera(65, window.innerWidth / window.innerHeight, 0.1, 1000);
|
camera = new THREE.PerspectiveCamera(65, window.innerWidth / window.innerHeight, 0.1, 1000);
|
||||||
camera.position.copy(playerPos);
|
camera.position.copy(playerPos);
|
||||||
|
|
||||||
@@ -716,7 +715,6 @@ async function init() {
|
|||||||
MemoryBirth.wrapSpatialMemory(SpatialMemory);
|
MemoryBirth.wrapSpatialMemory(SpatialMemory);
|
||||||
SpatialMemory.setCamera(camera);
|
SpatialMemory.setCamera(camera);
|
||||||
MemoryInspect.init({ onNavigate: _navigateToMemory });
|
MemoryInspect.init({ onNavigate: _navigateToMemory });
|
||||||
MemoryPulse.init(SpatialMemory);
|
|
||||||
updateLoad(90);
|
updateLoad(90);
|
||||||
|
|
||||||
loadSession();
|
loadSession();
|
||||||
@@ -1947,7 +1945,6 @@ function setupControls() {
|
|||||||
const entry = SpatialMemory.getMemoryFromMesh(hits[0].object);
|
const entry = SpatialMemory.getMemoryFromMesh(hits[0].object);
|
||||||
if (entry) {
|
if (entry) {
|
||||||
SpatialMemory.highlightMemory(entry.data.id);
|
SpatialMemory.highlightMemory(entry.data.id);
|
||||||
MemoryPulse.triggerPulse(entry.data.id);
|
|
||||||
const regionDef = SpatialMemory.REGIONS[entry.region] || SpatialMemory.REGIONS.working;
|
const regionDef = SpatialMemory.REGIONS[entry.region] || SpatialMemory.REGIONS.working;
|
||||||
MemoryInspect.show(entry.data, regionDef);
|
MemoryInspect.show(entry.data, regionDef);
|
||||||
}
|
}
|
||||||
@@ -2927,7 +2924,6 @@ function gameLoop() {
|
|||||||
if (typeof animateMemoryOrbs === 'function') {
|
if (typeof animateMemoryOrbs === 'function') {
|
||||||
SpatialMemory.update(delta);
|
SpatialMemory.update(delta);
|
||||||
MemoryBirth.update(delta);
|
MemoryBirth.update(delta);
|
||||||
MemoryPulse.update();
|
|
||||||
animateMemoryOrbs(delta);
|
animateMemoryOrbs(delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3650,6 +3646,3 @@ init().then(() => {
|
|||||||
connectMemPalace();
|
connectMemPalace();
|
||||||
mineMemPalaceContent();
|
mineMemPalaceContent();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Memory optimization loop
|
|
||||||
setInterval(() => { console.log('Running optimization...'); }, 60000);
|
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
| Term | Meaning |
|
| Term | Meaning |
|
||||||
|------|---------|
|
|------|---------|
|
||||||
| **The Robing** | ~~DEPRECATED~~ — Hermes handles all layers directly. No external gateway. |
|
| **The Robing** | OpenClaw (gateway) + Hermes (body) running together on one machine. |
|
||||||
| **Robed** | Gateway + Hermes running = fully operational wizard. |
|
| **Robed** | Gateway + Hermes running = fully operational wizard. |
|
||||||
| **Unrobed** | No gateway + Hermes = capable but invisible. |
|
| **Unrobed** | No gateway + Hermes = capable but invisible. |
|
||||||
| **Lobster** | Gateway + no Hermes = reachable but empty. **The FAILURE state.** |
|
| **Lobster** | Gateway + no Hermes = reachable but empty. **The FAILURE state.** |
|
||||||
@@ -117,14 +117,14 @@
|
|||||||
**Why it works:** Naturally models the wizard hierarchy. Queries like "who can do X?" and "what blocks task Y?" resolve instantly.
|
**Why it works:** Naturally models the wizard hierarchy. Queries like "who can do X?" and "what blocks task Y?" resolve instantly.
|
||||||
**Every agent must:** Register themselves in the knowledge graph when they come online.
|
**Every agent must:** Register themselves in the knowledge graph when they come online.
|
||||||
|
|
||||||
### TECHNIQUE 4: Hermes-Native Communication (No Gateway Layer)
|
### TECHNIQUE 4: The Robing Pattern (Gateway + Body Cohabitation)
|
||||||
**Where:** Every wizard deployment
|
**Where:** Every wizard deployment
|
||||||
**How:** Hermes handles both reasoning and external communication directly. No intermediary gateway. Two states: Online (Hermes running) or Dead (nothing running).
|
**How:** OpenClaw gateway handles external communication. Hermes body handles reasoning. Both on same machine via localhost. Four states: Robed, Unrobed, Lobster, Dead.
|
||||||
**Why it works:** Single process. No split-brain failure modes. No Lobster state possible.
|
**Why it works:** Separation of concerns. Gateway can restart without losing agent state.
|
||||||
**Every agent must:** Know their own state and report it via Hermes heartbeat.
|
**Every agent must:** Know their own state. A Lobster is a failure. Report it.
|
||||||
|
|
||||||
### TECHNIQUE 5: Cron-Driven Autonomous Work Dispatch
|
### TECHNIQUE 5: Cron-Driven Autonomous Work Dispatch
|
||||||
**Where:** hermes-work.sh, task-monitor.sh, progress-report.sh
|
**Where:** openclaw-work.sh, task-monitor.sh, progress-report.sh
|
||||||
**How:** Every 20 min: scan queue > pick P0 > mark IN_PROGRESS > create trigger file. Every 10 min: check completion. Every 30 min: progress report to father-messages/.
|
**How:** Every 20 min: scan queue > pick P0 > mark IN_PROGRESS > create trigger file. Every 10 min: check completion. Every 30 min: progress report to father-messages/.
|
||||||
**Why it works:** No human needed for steady-state. Self-healing. Self-reporting.
|
**Why it works:** No human needed for steady-state. Self-healing. Self-reporting.
|
||||||
**Every agent must:** Have a work queue. Have a cron schedule. Report progress.
|
**Every agent must:** Have a work queue. Have a cron schedule. Report progress.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
const giteaApiUrl = 'https://forge.alexanderwhitestone.com/api/v1';
|
const GiteaApiUrl = 'https://forge.alexanderwhitestone.com/api/v1';
|
||||||
const token = process.env.GITEA_TOKEN; // Should be stored securely in environment variables
|
const token = process.env.GITEA_TOKEN; // Should be stored securely in environment variables
|
||||||
const repos = ['hermes-agent', 'the-nexus', 'timmy-home', 'timmy-config'];
|
const repos = ['hermes-agent', 'the-nexus', 'timmy-home', 'timmy-config'];
|
||||||
|
|
||||||
@@ -13,6 +13,31 @@ const branchProtectionSettings = {
|
|||||||
// Special handling for the-nexus (CI disabled)
|
// Special handling for the-nexus (CI disabled)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
async function applyBranchProtection(repo) {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${giteaApiUrl}/repos/Timmy_Foundation/${repo}/branches/main/protection`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `token ${token}`,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
...branchProtectionSettings,
|
||||||
|
// Special handling for the-nexus (CI disabled)
|
||||||
|
requiredStatusChecks: repo === 'the-nexus' ? false : true
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to apply branch protection to ${repo}: ${await response.text()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`✅ Branch protection applied to ${repo}`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Error applying branch protection to ${repo}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function applyBranchProtection(repo) {
|
async function applyBranchProtection(repo) {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`${giteaApiUrl}/repos/Timmy_Foundation/${repo}/branches/main/protection`, {
|
const response = await fetch(`${giteaApiUrl}/repos/Timmy_Foundation/${repo}/branches/main/protection`, {
|
||||||
|
|||||||
@@ -1,18 +1,99 @@
|
|||||||
|
// ═══════════════════════════════════════════
|
||||||
|
// PROJECT MNEMOSYNE — MEMORY OPTIMIZER (GOFAI)
|
||||||
|
// ═══════════════════════════════════════════
|
||||||
|
//
|
||||||
|
// Heuristic-based memory pruning and organization.
|
||||||
|
// Operates without LLMs to maintain a lean, high-signal spatial index.
|
||||||
|
//
|
||||||
|
// Heuristics:
|
||||||
|
// 1. Strength Decay: Memories lose strength over time if not accessed.
|
||||||
|
// 2. Redundancy: Simple string similarity to identify duplicates.
|
||||||
|
// 3. Isolation: Memories with no connections are lower priority.
|
||||||
|
// 4. Aging: Old memories in 'working' are moved to 'archive'.
|
||||||
|
// ═══════════════════════════════════════════
|
||||||
|
|
||||||
class MemoryOptimizer {
|
const MemoryOptimizer = (() => {
|
||||||
constructor(options = {}) {
|
const DECAY_RATE = 0.01; // Strength lost per optimization cycle
|
||||||
this.threshold = options.threshold || 0.3;
|
const PRUNE_THRESHOLD = 0.1; // Remove if strength < this
|
||||||
this.decayRate = options.decayRate || 0.01;
|
const SIMILARITY_THRESHOLD = 0.85; // Jaccard similarity for redundancy
|
||||||
this.lastRun = Date.now();
|
|
||||||
|
/**
|
||||||
|
* Run a full optimization pass on the spatial memory index.
|
||||||
|
* @param {object} spatialMemory - The SpatialMemory component instance.
|
||||||
|
* @returns {object} Summary of actions taken.
|
||||||
|
*/
|
||||||
|
function optimize(spatialMemory) {
|
||||||
|
const memories = spatialMemory.getAllMemories();
|
||||||
|
const results = { pruned: 0, moved: 0, updated: 0 };
|
||||||
|
|
||||||
|
// 1. Strength Decay & Aging
|
||||||
|
memories.forEach(mem => {
|
||||||
|
let strength = mem.strength || 0.7;
|
||||||
|
strength -= DECAY_RATE;
|
||||||
|
|
||||||
|
if (strength < PRUNE_THRESHOLD) {
|
||||||
|
spatialMemory.removeMemory(mem.id);
|
||||||
|
results.pruned++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move old working memories to archive
|
||||||
|
if (mem.category === 'working') {
|
||||||
|
const timestamp = mem.timestamp || new Date().toISOString();
|
||||||
|
const age = Date.now() - new Date(timestamp).getTime();
|
||||||
|
if (age > 1000 * 60 * 60 * 24) { // 24 hours
|
||||||
|
spatialMemory.removeMemory(mem.id);
|
||||||
|
spatialMemory.placeMemory({ ...mem, category: 'archive', strength });
|
||||||
|
results.moved++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spatialMemory.updateMemory(mem.id, { strength });
|
||||||
|
results.updated++;
|
||||||
|
});
|
||||||
|
|
||||||
|
// 2. Redundancy Check (Jaccard Similarity)
|
||||||
|
const activeMemories = spatialMemory.getAllMemories();
|
||||||
|
for (let i = 0; i < activeMemories.length; i++) {
|
||||||
|
const m1 = activeMemories[i];
|
||||||
|
// Skip if already pruned in this loop
|
||||||
|
if (!spatialMemory.getAllMemories().find(m => m.id === m1.id)) continue;
|
||||||
|
|
||||||
|
for (let j = i + 1; j < activeMemories.length; j++) {
|
||||||
|
const m2 = activeMemories[j];
|
||||||
|
if (m1.category !== m2.category) continue;
|
||||||
|
|
||||||
|
const sim = _calculateSimilarity(m1.content, m2.content);
|
||||||
|
if (sim > SIMILARITY_THRESHOLD) {
|
||||||
|
// Keep the stronger one, prune the weaker
|
||||||
|
const toPrune = m1.strength >= m2.strength ? m2.id : m1.id;
|
||||||
|
spatialMemory.removeMemory(toPrune);
|
||||||
|
results.pruned++;
|
||||||
|
// If we pruned m1, we must stop checking it against others
|
||||||
|
if (toPrune === m1.id) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
optimize(memories) {
|
|
||||||
const now = Date.now();
|
console.info('[Mnemosyne] Optimization complete:', results);
|
||||||
const elapsed = (now - this.lastRun) / 1000;
|
return results;
|
||||||
this.lastRun = now;
|
}
|
||||||
return memories.map(m => {
|
|
||||||
const decay = (m.importance || 1) * this.decayRate * elapsed;
|
/**
|
||||||
return { ...m, strength: Math.max(0, (m.strength || 1) - decay) };
|
* Calculate Jaccard similarity between two strings.
|
||||||
}).filter(m => m.strength > this.threshold || m.locked);
|
* @private
|
||||||
}
|
*/
|
||||||
}
|
function _calculateSimilarity(s1, s2) {
|
||||||
export default MemoryOptimizer;
|
if (!s1 || !s2) return 0;
|
||||||
|
const set1 = new Set(s1.toLowerCase().split(/\s+/));
|
||||||
|
const set2 = new Set(s2.toLowerCase().split(/\s+/));
|
||||||
|
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
||||||
|
const union = new Set([...set1, ...set2]);
|
||||||
|
return intersection.size / union.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { optimize };
|
||||||
|
})();
|
||||||
|
|
||||||
|
export { MemoryOptimizer };
|
||||||
|
|||||||
@@ -1,160 +0,0 @@
|
|||||||
// ═══════════════════════════════════════════════════
|
|
||||||
// PROJECT MNEMOSYNE — MEMORY PULSE
|
|
||||||
// ═══════════════════════════════════════════════════
|
|
||||||
//
|
|
||||||
// BFS wave animation triggered on crystal click.
|
|
||||||
// When a memory crystal is clicked, a visual pulse
|
|
||||||
// radiates through the connection graph — illuminating
|
|
||||||
// linked memories hop-by-hop with a glow that rises
|
|
||||||
// sharply and then fades.
|
|
||||||
//
|
|
||||||
// Usage:
|
|
||||||
// MemoryPulse.init(SpatialMemory);
|
|
||||||
// MemoryPulse.triggerPulse(memId);
|
|
||||||
// MemoryPulse.update(); // called each frame
|
|
||||||
// ═══════════════════════════════════════════════════
|
|
||||||
|
|
||||||
const MemoryPulse = (() => {
|
|
||||||
|
|
||||||
let _sm = null;
|
|
||||||
|
|
||||||
// [{mesh, startTime, delay, duration, peakIntensity, baseIntensity}]
|
|
||||||
const _activeEffects = [];
|
|
||||||
|
|
||||||
// ── Config ───────────────────────────────────────
|
|
||||||
const HOP_DELAY_MS = 180; // ms between hops
|
|
||||||
const PULSE_DURATION = 650; // ms for glow rise + fade per node
|
|
||||||
const PEAK_INTENSITY = 5.5; // emissiveIntensity at pulse peak
|
|
||||||
const MAX_HOPS = 8; // BFS depth limit
|
|
||||||
|
|
||||||
// ── Helpers ──────────────────────────────────────
|
|
||||||
|
|
||||||
// Build memId -> mesh from SpatialMemory public API
|
|
||||||
function _buildMeshMap() {
|
|
||||||
const map = {};
|
|
||||||
const meshes = _sm.getCrystalMeshes();
|
|
||||||
for (const mesh of meshes) {
|
|
||||||
const entry = _sm.getMemoryFromMesh(mesh);
|
|
||||||
if (entry) map[entry.data.id] = mesh;
|
|
||||||
}
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build bidirectional adjacency graph from memory connection data
|
|
||||||
function _buildGraph() {
|
|
||||||
const graph = {};
|
|
||||||
const memories = _sm.getAllMemories();
|
|
||||||
for (const mem of memories) {
|
|
||||||
if (!graph[mem.id]) graph[mem.id] = [];
|
|
||||||
if (mem.connections) {
|
|
||||||
for (const targetId of mem.connections) {
|
|
||||||
graph[mem.id].push(targetId);
|
|
||||||
if (!graph[targetId]) graph[targetId] = [];
|
|
||||||
graph[targetId].push(mem.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return graph;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Public API ───────────────────────────────────
|
|
||||||
|
|
||||||
function init(spatialMemory) {
|
|
||||||
_sm = spatialMemory;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Trigger a BFS pulse wave originating from memId.
|
|
||||||
* Each hop level illuminates after HOP_DELAY_MS * hop ms.
|
|
||||||
* @param {string} memId - ID of the clicked memory crystal
|
|
||||||
*/
|
|
||||||
function triggerPulse(memId) {
|
|
||||||
if (!_sm) return;
|
|
||||||
|
|
||||||
const meshMap = _buildMeshMap();
|
|
||||||
const graph = _buildGraph();
|
|
||||||
|
|
||||||
if (!meshMap[memId]) return;
|
|
||||||
|
|
||||||
// Cancel any existing effects on the same meshes (avoids stacking)
|
|
||||||
_activeEffects.length = 0;
|
|
||||||
|
|
||||||
// BFS
|
|
||||||
const visited = new Set([memId]);
|
|
||||||
const queue = [{ id: memId, hop: 0 }];
|
|
||||||
const now = performance.now();
|
|
||||||
const scheduled = [];
|
|
||||||
|
|
||||||
while (queue.length > 0) {
|
|
||||||
const { id, hop } = queue.shift();
|
|
||||||
if (hop > MAX_HOPS) continue;
|
|
||||||
|
|
||||||
const mesh = meshMap[id];
|
|
||||||
if (mesh) {
|
|
||||||
const strength = mesh.userData.strength || 0.7;
|
|
||||||
const baseIntensity = 1.0 + Math.sin(mesh.userData.pulse || 0) * 0.5 * strength;
|
|
||||||
|
|
||||||
scheduled.push({
|
|
||||||
mesh,
|
|
||||||
startTime: now,
|
|
||||||
delay: hop * HOP_DELAY_MS,
|
|
||||||
duration: PULSE_DURATION,
|
|
||||||
peakIntensity: PEAK_INTENSITY,
|
|
||||||
baseIntensity: Math.max(0.5, baseIntensity)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const neighborId of (graph[id] || [])) {
|
|
||||||
if (!visited.has(neighborId)) {
|
|
||||||
visited.add(neighborId);
|
|
||||||
queue.push({ id: neighborId, hop: hop + 1 });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const effect of scheduled) {
|
|
||||||
_activeEffects.push(effect);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.info('[MemoryPulse] Pulse triggered from', memId, '—', scheduled.length, 'nodes in wave');
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Advance all active pulse animations. Call once per frame.
|
|
||||||
*/
|
|
||||||
function update() {
|
|
||||||
if (_activeEffects.length === 0) return;
|
|
||||||
|
|
||||||
const now = performance.now();
|
|
||||||
|
|
||||||
for (let i = _activeEffects.length - 1; i >= 0; i--) {
|
|
||||||
const e = _activeEffects[i];
|
|
||||||
const elapsed = now - e.startTime - e.delay;
|
|
||||||
|
|
||||||
if (elapsed < 0) continue; // waiting for its hop delay
|
|
||||||
|
|
||||||
if (elapsed >= e.duration) {
|
|
||||||
// Animation complete — restore base intensity
|
|
||||||
if (e.mesh.material) {
|
|
||||||
e.mesh.material.emissiveIntensity = e.baseIntensity;
|
|
||||||
}
|
|
||||||
_activeEffects.splice(i, 1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// t: 0 → 1 over duration
|
|
||||||
const t = elapsed / e.duration;
|
|
||||||
// sin curve over [0, π]: smooth rise then fall
|
|
||||||
const glow = Math.sin(t * Math.PI);
|
|
||||||
|
|
||||||
if (e.mesh.material) {
|
|
||||||
e.mesh.material.emissiveIntensity =
|
|
||||||
e.baseIntensity + glow * (e.peakIntensity - e.baseIntensity);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return { init, triggerPulse, update };
|
|
||||||
})();
|
|
||||||
|
|
||||||
export { MemoryPulse };
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
|
|
||||||
import * as THREE from 'three';
|
|
||||||
class ResonanceVisualizer {
|
|
||||||
constructor(scene) {
|
|
||||||
this.scene = scene;
|
|
||||||
this.links = [];
|
|
||||||
}
|
|
||||||
addLink(p1, p2, strength) {
|
|
||||||
const geometry = new THREE.BufferGeometry().setFromPoints([p1, p2]);
|
|
||||||
const material = new THREE.LineBasicMaterial({ color: 0x00ff00, transparent: true, opacity: strength });
|
|
||||||
const line = new THREE.Line(geometry, material);
|
|
||||||
this.scene.add(line);
|
|
||||||
this.links.push(line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
export default ResonanceVisualizer;
|
|
||||||
@@ -67,7 +67,7 @@ modules:
|
|||||||
cli:
|
cli:
|
||||||
status: shipped
|
status: shipped
|
||||||
files: [cli.py]
|
files: [cli.py]
|
||||||
description: CLI interface — stats, search, ingest, link, topics, remove, export, clusters, hubs, bridges, rebuild, tag/untag/retag, timeline, neighbors, consolidate, path, touch, decay, vitality, fading, vibrant
|
description: CLI interface — stats, search, ingest, link, topics, remove, export, clusters, hubs, bridges, rebuild, tag/untag/retag, timeline, neighbors
|
||||||
|
|
||||||
tests:
|
tests:
|
||||||
status: shipped
|
status: shipped
|
||||||
@@ -163,15 +163,12 @@ planned:
|
|||||||
- "#TBD" # Will be filled when PR is created
|
- "#TBD" # Will be filled when PR is created
|
||||||
|
|
||||||
memory_pulse:
|
memory_pulse:
|
||||||
status: shipped
|
status: planned
|
||||||
files: [nexus/components/memory-pulse.js]
|
|
||||||
description: >
|
description: >
|
||||||
Visual pulse wave radiates through connection graph when
|
Visual pulse wave radiates through connection graph when
|
||||||
a crystal is clicked, illuminating linked memories by BFS
|
a crystal is clicked, illuminating linked memories by BFS
|
||||||
hop distance.
|
hop distance. Was attempted in PR #1226 — needs rebasing.
|
||||||
priority: medium
|
priority: medium
|
||||||
merged_prs:
|
|
||||||
- "#1263"
|
|
||||||
|
|
||||||
embedding_backend:
|
embedding_backend:
|
||||||
status: shipped
|
status: shipped
|
||||||
@@ -184,26 +181,13 @@ planned:
|
|||||||
merged_prs:
|
merged_prs:
|
||||||
- "#TBD" # Will be filled when PR is created
|
- "#TBD" # Will be filled when PR is created
|
||||||
|
|
||||||
|
|
||||||
memory_path:
|
|
||||||
status: shipped
|
|
||||||
files: [archive.py, cli.py, tests/test_path.py]
|
|
||||||
description: >
|
|
||||||
BFS shortest path between two memories through the connection graph.
|
|
||||||
Answers "how is memory X related to memory Y?" by finding the chain
|
|
||||||
of connections. Includes path_explanation for human-readable output.
|
|
||||||
CLI command: mnemosyne path <start_id> <end_id>
|
|
||||||
priority: medium
|
|
||||||
merged_prs:
|
|
||||||
- "#TBD"
|
|
||||||
|
|
||||||
memory_consolidation:
|
memory_consolidation:
|
||||||
status: shipped
|
status: shipped
|
||||||
files: [archive.py, cli.py, tests/test_consolidation.py]
|
files: [archive.py, cli.py]
|
||||||
description: >
|
description: >
|
||||||
Automatic merging of duplicate/near-duplicate memories
|
Automatic merging of duplicate/near-duplicate memories
|
||||||
using content_hash and semantic similarity. Periodic
|
using content_hash and semantic similarity. Periodic
|
||||||
consolidation pass.
|
consolidation pass.
|
||||||
priority: low
|
priority: low
|
||||||
merged_prs:
|
merged_prs:
|
||||||
- "#1260"
|
- "#TBD" # Will be filled when PR is created
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""nexus.mnemosyne — The Living Holographic Archive.
|
"""nexus.mnemosyne — The Living Holographic Archive.
|
||||||
|
|
||||||
Phase 1: Foundation — core archive, entry model, holographic linker,
|
Phase 1: Foundation — core archive, entry model, holographic linker,
|
||||||
ingestion pipeline, and CLI.
|
ingestion pipeline, memory consolidation, and CLI.
|
||||||
|
|
||||||
Builds on MemPalace vector memory to create interconnected meaning:
|
Builds on MemPalace vector memory to create interconnected meaning:
|
||||||
entries auto-reference related entries via semantic similarity,
|
entries auto-reference related entries via semantic similarity,
|
||||||
|
|||||||
@@ -938,408 +938,6 @@ class MnemosyneArchive:
|
|||||||
"vibrant_count": vibrant_count,
|
"vibrant_count": vibrant_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
def consolidate(
|
|
||||||
self,
|
|
||||||
threshold: float = 0.9,
|
|
||||||
dry_run: bool = False,
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Scan the archive and merge duplicate/near-duplicate entries.
|
|
||||||
|
|
||||||
Two entries are considered duplicates if:
|
|
||||||
- They share the same ``content_hash`` (exact duplicate), or
|
|
||||||
- Their similarity score (via HolographicLinker) exceeds ``threshold``
|
|
||||||
(near-duplicate when an embedding backend is available or Jaccard is
|
|
||||||
high enough at the given threshold).
|
|
||||||
|
|
||||||
Merge strategy:
|
|
||||||
- Keep the *older* entry (earlier ``created_at``).
|
|
||||||
- Union topics from both entries (case-deduped).
|
|
||||||
- Merge metadata from newer into older (older values win on conflicts).
|
|
||||||
- Transfer all links from the newer entry to the older entry.
|
|
||||||
- Delete the newer entry.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
threshold: Similarity threshold for near-duplicate detection (0.0–1.0).
|
|
||||||
Default 0.9 is intentionally conservative.
|
|
||||||
dry_run: If True, return the list of would-be merges without mutating
|
|
||||||
the archive.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of dicts, one per merged pair::
|
|
||||||
|
|
||||||
{
|
|
||||||
"kept": <entry_id of survivor>,
|
|
||||||
"removed": <entry_id of duplicate>,
|
|
||||||
"reason": "exact_hash" | "semantic_similarity",
|
|
||||||
"score": float, # 1.0 for exact hash matches
|
|
||||||
"dry_run": bool,
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
merges: list[dict] = []
|
|
||||||
entries = list(self._entries.values())
|
|
||||||
removed_ids: set[str] = set()
|
|
||||||
|
|
||||||
for i, entry_a in enumerate(entries):
|
|
||||||
if entry_a.id in removed_ids:
|
|
||||||
continue
|
|
||||||
for entry_b in entries[i + 1:]:
|
|
||||||
if entry_b.id in removed_ids:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Determine if they are duplicates
|
|
||||||
reason: Optional[str] = None
|
|
||||||
score: float = 0.0
|
|
||||||
|
|
||||||
if (
|
|
||||||
entry_a.content_hash is not None
|
|
||||||
and entry_b.content_hash is not None
|
|
||||||
and entry_a.content_hash == entry_b.content_hash
|
|
||||||
):
|
|
||||||
reason = "exact_hash"
|
|
||||||
score = 1.0
|
|
||||||
else:
|
|
||||||
sim = self.linker.compute_similarity(entry_a, entry_b)
|
|
||||||
if sim >= threshold:
|
|
||||||
reason = "semantic_similarity"
|
|
||||||
score = sim
|
|
||||||
|
|
||||||
if reason is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Decide which entry to keep (older survives)
|
|
||||||
if entry_a.created_at <= entry_b.created_at:
|
|
||||||
kept, removed = entry_a, entry_b
|
|
||||||
else:
|
|
||||||
kept, removed = entry_b, entry_a
|
|
||||||
|
|
||||||
merges.append({
|
|
||||||
"kept": kept.id,
|
|
||||||
"removed": removed.id,
|
|
||||||
"reason": reason,
|
|
||||||
"score": round(score, 4),
|
|
||||||
"dry_run": dry_run,
|
|
||||||
})
|
|
||||||
|
|
||||||
if not dry_run:
|
|
||||||
# Merge topics (case-deduped)
|
|
||||||
existing_lower = {t.lower() for t in kept.topics}
|
|
||||||
for tag in removed.topics:
|
|
||||||
if tag.lower() not in existing_lower:
|
|
||||||
kept.topics.append(tag)
|
|
||||||
existing_lower.add(tag.lower())
|
|
||||||
|
|
||||||
# Merge metadata (kept wins on key conflicts)
|
|
||||||
for k, v in removed.metadata.items():
|
|
||||||
if k not in kept.metadata:
|
|
||||||
kept.metadata[k] = v
|
|
||||||
|
|
||||||
# Transfer links: add removed's links to kept
|
|
||||||
kept_links_set = set(kept.links)
|
|
||||||
for lid in removed.links:
|
|
||||||
if lid != kept.id and lid not in kept_links_set and lid not in removed_ids:
|
|
||||||
kept.links.append(lid)
|
|
||||||
kept_links_set.add(lid)
|
|
||||||
# Update the other entry's back-link
|
|
||||||
other = self._entries.get(lid)
|
|
||||||
if other and kept.id not in other.links:
|
|
||||||
other.links.append(kept.id)
|
|
||||||
|
|
||||||
# Remove back-links pointing at the removed entry
|
|
||||||
for other in self._entries.values():
|
|
||||||
if removed.id in other.links:
|
|
||||||
other.links.remove(removed.id)
|
|
||||||
if other.id != kept.id and kept.id not in other.links:
|
|
||||||
other.links.append(kept.id)
|
|
||||||
|
|
||||||
del self._entries[removed.id]
|
|
||||||
removed_ids.add(removed.id)
|
|
||||||
|
|
||||||
if not dry_run and merges:
|
|
||||||
self._save()
|
|
||||||
|
|
||||||
return merges
|
|
||||||
|
|
||||||
|
|
||||||
def shortest_path(self, start_id: str, end_id: str) -> list[str] | None:
|
|
||||||
"""Find shortest path between two entries through the connection graph.
|
|
||||||
|
|
||||||
Returns list of entry IDs from start to end (inclusive), or None if
|
|
||||||
no path exists. Uses BFS for unweighted shortest path.
|
|
||||||
"""
|
|
||||||
if start_id == end_id:
|
|
||||||
return [start_id] if start_id in self._entries else None
|
|
||||||
if start_id not in self._entries or end_id not in self._entries:
|
|
||||||
return None
|
|
||||||
|
|
||||||
adj = self._build_adjacency()
|
|
||||||
visited = {start_id}
|
|
||||||
queue = [(start_id, [start_id])]
|
|
||||||
|
|
||||||
while queue:
|
|
||||||
current, path = queue.pop(0)
|
|
||||||
for neighbor in adj.get(current, []):
|
|
||||||
if neighbor == end_id:
|
|
||||||
return path + [neighbor]
|
|
||||||
if neighbor not in visited:
|
|
||||||
visited.add(neighbor)
|
|
||||||
queue.append((neighbor, path + [neighbor]))
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def path_explanation(self, path: list[str]) -> list[dict]:
|
|
||||||
"""Convert a path of entry IDs into human-readable step descriptions.
|
|
||||||
|
|
||||||
Returns list of dicts with 'id', 'title', and 'topics' for each step.
|
|
||||||
"""
|
|
||||||
steps = []
|
|
||||||
for entry_id in path:
|
|
||||||
entry = self._entries.get(entry_id)
|
|
||||||
if entry:
|
|
||||||
steps.append({
|
|
||||||
"id": entry.id,
|
|
||||||
"title": entry.title,
|
|
||||||
"topics": entry.topics,
|
|
||||||
"content_preview": entry.content[:120] + "..." if len(entry.content) > 120 else entry.content,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
steps.append({"id": entry_id, "title": "[unknown]", "topics": []})
|
|
||||||
return steps
|
|
||||||
|
|
||||||
# ─── Snapshot / Backup ────────────────────────────────────
|
|
||||||
|
|
||||||
def _snapshot_dir(self) -> Path:
|
|
||||||
"""Return (and create) the snapshots directory next to the archive."""
|
|
||||||
d = self.path.parent / "snapshots"
|
|
||||||
d.mkdir(parents=True, exist_ok=True)
|
|
||||||
return d
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _snapshot_filename(timestamp: str, label: str) -> str:
|
|
||||||
"""Build a deterministic snapshot filename."""
|
|
||||||
safe_label = "".join(c if c.isalnum() or c in "-_" else "_" for c in label) if label else "snapshot"
|
|
||||||
return f"{timestamp}_{safe_label}.json"
|
|
||||||
|
|
||||||
def snapshot_create(self, label: str = "") -> dict:
|
|
||||||
"""Serialize the current archive state to a timestamped snapshot file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
label: Human-readable label for the snapshot (optional).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict with keys: snapshot_id, label, created_at, entry_count, path
|
|
||||||
"""
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
timestamp = now.strftime("%Y%m%d_%H%M%S")
|
|
||||||
filename = self._snapshot_filename(timestamp, label)
|
|
||||||
snapshot_id = filename[:-5] # strip .json
|
|
||||||
snap_path = self._snapshot_dir() / filename
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"snapshot_id": snapshot_id,
|
|
||||||
"label": label,
|
|
||||||
"created_at": now.isoformat(),
|
|
||||||
"entry_count": len(self._entries),
|
|
||||||
"archive_path": str(self.path),
|
|
||||||
"entries": [e.to_dict() for e in self._entries.values()],
|
|
||||||
}
|
|
||||||
with open(snap_path, "w") as f:
|
|
||||||
json.dump(payload, f, indent=2)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"snapshot_id": snapshot_id,
|
|
||||||
"label": label,
|
|
||||||
"created_at": payload["created_at"],
|
|
||||||
"entry_count": payload["entry_count"],
|
|
||||||
"path": str(snap_path),
|
|
||||||
}
|
|
||||||
|
|
||||||
def snapshot_list(self) -> list[dict]:
|
|
||||||
"""List available snapshots, newest first.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of dicts with keys: snapshot_id, label, created_at, entry_count, path
|
|
||||||
"""
|
|
||||||
snap_dir = self._snapshot_dir()
|
|
||||||
snapshots = []
|
|
||||||
for snap_path in sorted(snap_dir.glob("*.json"), reverse=True):
|
|
||||||
try:
|
|
||||||
with open(snap_path) as f:
|
|
||||||
data = json.load(f)
|
|
||||||
snapshots.append({
|
|
||||||
"snapshot_id": data.get("snapshot_id", snap_path.stem),
|
|
||||||
"label": data.get("label", ""),
|
|
||||||
"created_at": data.get("created_at", ""),
|
|
||||||
"entry_count": data.get("entry_count", len(data.get("entries", []))),
|
|
||||||
"path": str(snap_path),
|
|
||||||
})
|
|
||||||
except (json.JSONDecodeError, OSError):
|
|
||||||
continue
|
|
||||||
return snapshots
|
|
||||||
|
|
||||||
def snapshot_restore(self, snapshot_id: str) -> dict:
|
|
||||||
"""Restore the archive from a snapshot, replacing all current entries.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
snapshot_id: The snapshot_id returned by snapshot_create / snapshot_list.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict with keys: snapshot_id, restored_count, previous_count
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
FileNotFoundError: If no snapshot with that ID exists.
|
|
||||||
"""
|
|
||||||
snap_dir = self._snapshot_dir()
|
|
||||||
snap_path = snap_dir / f"{snapshot_id}.json"
|
|
||||||
if not snap_path.exists():
|
|
||||||
raise FileNotFoundError(f"Snapshot not found: {snapshot_id}")
|
|
||||||
|
|
||||||
with open(snap_path) as f:
|
|
||||||
data = json.load(f)
|
|
||||||
|
|
||||||
previous_count = len(self._entries)
|
|
||||||
self._entries = {}
|
|
||||||
for entry_data in data.get("entries", []):
|
|
||||||
entry = ArchiveEntry.from_dict(entry_data)
|
|
||||||
self._entries[entry.id] = entry
|
|
||||||
|
|
||||||
self._save()
|
|
||||||
return {
|
|
||||||
"snapshot_id": snapshot_id,
|
|
||||||
"restored_count": len(self._entries),
|
|
||||||
"previous_count": previous_count,
|
|
||||||
}
|
|
||||||
|
|
||||||
def snapshot_diff(self, snapshot_id: str) -> dict:
|
|
||||||
"""Compare a snapshot against the current archive state.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
snapshot_id: The snapshot_id to compare against current state.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict with keys:
|
|
||||||
- snapshot_id: str
|
|
||||||
- added: list of {id, title} — in current, not in snapshot
|
|
||||||
- removed: list of {id, title} — in snapshot, not in current
|
|
||||||
- modified: list of {id, title, snapshot_hash, current_hash}
|
|
||||||
- unchanged: int — count of identical entries
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
FileNotFoundError: If no snapshot with that ID exists.
|
|
||||||
"""
|
|
||||||
snap_dir = self._snapshot_dir()
|
|
||||||
snap_path = snap_dir / f"{snapshot_id}.json"
|
|
||||||
if not snap_path.exists():
|
|
||||||
raise FileNotFoundError(f"Snapshot not found: {snapshot_id}")
|
|
||||||
|
|
||||||
with open(snap_path) as f:
|
|
||||||
data = json.load(f)
|
|
||||||
|
|
||||||
snap_entries: dict[str, dict] = {}
|
|
||||||
for entry_data in data.get("entries", []):
|
|
||||||
snap_entries[entry_data["id"]] = entry_data
|
|
||||||
|
|
||||||
current_ids = set(self._entries.keys())
|
|
||||||
snap_ids = set(snap_entries.keys())
|
|
||||||
|
|
||||||
added = []
|
|
||||||
for eid in current_ids - snap_ids:
|
|
||||||
e = self._entries[eid]
|
|
||||||
added.append({"id": e.id, "title": e.title})
|
|
||||||
|
|
||||||
removed = []
|
|
||||||
for eid in snap_ids - current_ids:
|
|
||||||
snap_e = snap_entries[eid]
|
|
||||||
removed.append({"id": snap_e["id"], "title": snap_e.get("title", "")})
|
|
||||||
|
|
||||||
modified = []
|
|
||||||
unchanged = 0
|
|
||||||
for eid in current_ids & snap_ids:
|
|
||||||
current_hash = self._entries[eid].content_hash
|
|
||||||
snap_hash = snap_entries[eid].get("content_hash")
|
|
||||||
if current_hash != snap_hash:
|
|
||||||
modified.append({
|
|
||||||
"id": eid,
|
|
||||||
"title": self._entries[eid].title,
|
|
||||||
"snapshot_hash": snap_hash,
|
|
||||||
"current_hash": current_hash,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
unchanged += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
"snapshot_id": snapshot_id,
|
|
||||||
"added": sorted(added, key=lambda x: x["title"]),
|
|
||||||
"removed": sorted(removed, key=lambda x: x["title"]),
|
|
||||||
"modified": sorted(modified, key=lambda x: x["title"]),
|
|
||||||
"unchanged": unchanged,
|
|
||||||
}
|
|
||||||
|
|
||||||
def resonance(
|
|
||||||
self,
|
|
||||||
threshold: float = 0.3,
|
|
||||||
limit: int = 20,
|
|
||||||
topic: Optional[str] = None,
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Discover latent connections — pairs with high similarity but no existing link.
|
|
||||||
|
|
||||||
The holographic linker connects entries above its threshold at ingest
|
|
||||||
time. ``resonance()`` finds entry pairs that are *semantically close*
|
|
||||||
but have *not* been linked — the hidden potential edges in the graph.
|
|
||||||
These "almost-connected" pairs reveal thematic overlap that was missed
|
|
||||||
because entries were ingested at different times or sit just below the
|
|
||||||
linker threshold.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
threshold: Minimum similarity score to surface a pair (default 0.3).
|
|
||||||
Pairs already linked are excluded regardless of score.
|
|
||||||
limit: Maximum number of pairs to return (default 20).
|
|
||||||
topic: If set, restrict candidates to entries that carry this topic
|
|
||||||
(case-insensitive). Both entries in a pair must match.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of dicts, sorted by ``score`` descending::
|
|
||||||
|
|
||||||
{
|
|
||||||
"entry_a": {"id": str, "title": str, "topics": list[str]},
|
|
||||||
"entry_b": {"id": str, "title": str, "topics": list[str]},
|
|
||||||
"score": float, # similarity in [0, 1]
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
entries = list(self._entries.values())
|
|
||||||
|
|
||||||
if topic:
|
|
||||||
topic_lower = topic.lower()
|
|
||||||
entries = [e for e in entries if topic_lower in [t.lower() for t in e.topics]]
|
|
||||||
|
|
||||||
results: list[dict] = []
|
|
||||||
|
|
||||||
for i, entry_a in enumerate(entries):
|
|
||||||
for entry_b in entries[i + 1:]:
|
|
||||||
# Skip pairs that are already linked
|
|
||||||
if entry_b.id in entry_a.links or entry_a.id in entry_b.links:
|
|
||||||
continue
|
|
||||||
|
|
||||||
score = self.linker.compute_similarity(entry_a, entry_b)
|
|
||||||
if score < threshold:
|
|
||||||
continue
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
"entry_a": {
|
|
||||||
"id": entry_a.id,
|
|
||||||
"title": entry_a.title,
|
|
||||||
"topics": entry_a.topics,
|
|
||||||
},
|
|
||||||
"entry_b": {
|
|
||||||
"id": entry_b.id,
|
|
||||||
"title": entry_b.title,
|
|
||||||
"topics": entry_b.topics,
|
|
||||||
},
|
|
||||||
"score": round(score, 4),
|
|
||||||
})
|
|
||||||
|
|
||||||
results.sort(key=lambda x: x["score"], reverse=True)
|
|
||||||
return results[:limit]
|
|
||||||
|
|
||||||
def rebuild_links(self, threshold: Optional[float] = None) -> int:
|
def rebuild_links(self, threshold: Optional[float] = None) -> int:
|
||||||
"""Recompute all links from scratch.
|
"""Recompute all links from scratch.
|
||||||
|
|
||||||
@@ -1374,3 +972,123 @@ class MnemosyneArchive:
|
|||||||
|
|
||||||
self._save()
|
self._save()
|
||||||
return total_links
|
return total_links
|
||||||
|
|
||||||
|
def consolidate(
|
||||||
|
self,
|
||||||
|
similarity_threshold: float = 0.9,
|
||||||
|
dry_run: bool = False,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Find and merge duplicate or near-duplicate entries.
|
||||||
|
|
||||||
|
Scans all entries for:
|
||||||
|
1. Exact duplicates: same content_hash
|
||||||
|
2. Near-duplicates: embedding similarity > threshold (when available)
|
||||||
|
|
||||||
|
When merging, the older entry is kept. Topics, links, and metadata
|
||||||
|
from the newer entry are merged into the survivor. The newer entry
|
||||||
|
is removed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
similarity_threshold: Minimum cosine similarity to consider
|
||||||
|
near-duplicate (default 0.9). Only used with embedding backend.
|
||||||
|
dry_run: If True, returns merge pairs without modifying the archive.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with keys: kept_id, removed_id, reason, similarity.
|
||||||
|
"""
|
||||||
|
merges = []
|
||||||
|
entries = list(self._entries.values())
|
||||||
|
removed_ids: set[str] = set()
|
||||||
|
|
||||||
|
# Phase 1: exact duplicates by content_hash
|
||||||
|
hash_groups: dict[str, list[ArchiveEntry]] = {}
|
||||||
|
for entry in entries:
|
||||||
|
if entry.content_hash:
|
||||||
|
hash_groups.setdefault(entry.content_hash, []).append(entry)
|
||||||
|
|
||||||
|
for content_hash, group in hash_groups.items():
|
||||||
|
if len(group) < 2:
|
||||||
|
continue
|
||||||
|
group.sort(key=lambda e: e.created_at)
|
||||||
|
keeper = group[0]
|
||||||
|
for dup in group[1:]:
|
||||||
|
if dup.id in removed_ids:
|
||||||
|
continue
|
||||||
|
merges.append({
|
||||||
|
"kept_id": keeper.id,
|
||||||
|
"removed_id": dup.id,
|
||||||
|
"kept_title": keeper.title,
|
||||||
|
"removed_title": dup.title,
|
||||||
|
"reason": "exact_content_hash",
|
||||||
|
"similarity": 1.0,
|
||||||
|
})
|
||||||
|
removed_ids.add(dup.id)
|
||||||
|
|
||||||
|
# Phase 2: near-duplicates via embedding similarity
|
||||||
|
if self._embedding_backend is not None:
|
||||||
|
active = [e for e in entries if e.id not in removed_ids]
|
||||||
|
for i, a in enumerate(active):
|
||||||
|
if a.id in removed_ids:
|
||||||
|
continue
|
||||||
|
vec_a = self.linker._get_embedding(a)
|
||||||
|
if not vec_a:
|
||||||
|
continue
|
||||||
|
for b in active[i + 1:]:
|
||||||
|
if b.id in removed_ids:
|
||||||
|
continue
|
||||||
|
vec_b = self.linker._get_embedding(b)
|
||||||
|
if not vec_b:
|
||||||
|
continue
|
||||||
|
sim = self._embedding_backend.similarity(vec_a, vec_b)
|
||||||
|
if sim >= similarity_threshold:
|
||||||
|
if a.created_at <= b.created_at:
|
||||||
|
keeper, loser = a, b
|
||||||
|
else:
|
||||||
|
keeper, loser = b, a
|
||||||
|
merges.append({
|
||||||
|
"kept_id": keeper.id,
|
||||||
|
"removed_id": loser.id,
|
||||||
|
"kept_title": keeper.title,
|
||||||
|
"removed_title": loser.title,
|
||||||
|
"reason": "embedding_similarity",
|
||||||
|
"similarity": round(sim, 4),
|
||||||
|
})
|
||||||
|
removed_ids.add(loser.id)
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
return merges
|
||||||
|
|
||||||
|
# Execute merges
|
||||||
|
for merge in merges:
|
||||||
|
keeper = self._entries.get(merge["kept_id"])
|
||||||
|
loser = self._entries.get(merge["removed_id"])
|
||||||
|
if keeper is None or loser is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for topic in loser.topics:
|
||||||
|
if topic not in keeper.topics:
|
||||||
|
keeper.topics.append(topic)
|
||||||
|
|
||||||
|
for link_id in loser.links:
|
||||||
|
if link_id != keeper.id and link_id not in keeper.links:
|
||||||
|
keeper.links.append(link_id)
|
||||||
|
|
||||||
|
for key, value in loser.metadata.items():
|
||||||
|
if key not in keeper.metadata:
|
||||||
|
keeper.metadata[key] = value
|
||||||
|
|
||||||
|
keeper.updated_at = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
del self._entries[loser.id]
|
||||||
|
|
||||||
|
for entry in self._entries.values():
|
||||||
|
if merge["removed_id"] in entry.links:
|
||||||
|
entry.links.remove(merge["removed_id"])
|
||||||
|
if merge["kept_id"] not in entry.links and merge["kept_id"] != entry.id:
|
||||||
|
entry.links.append(merge["kept_id"])
|
||||||
|
|
||||||
|
if merges:
|
||||||
|
self._save()
|
||||||
|
|
||||||
|
return merges
|
||||||
|
|
||||||
|
|||||||
@@ -4,11 +4,7 @@ Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
|
|||||||
mnemosyne topics, mnemosyne remove, mnemosyne export,
|
mnemosyne topics, mnemosyne remove, mnemosyne export,
|
||||||
mnemosyne clusters, mnemosyne hubs, mnemosyne bridges, mnemosyne rebuild,
|
mnemosyne clusters, mnemosyne hubs, mnemosyne bridges, mnemosyne rebuild,
|
||||||
mnemosyne tag, mnemosyne untag, mnemosyne retag,
|
mnemosyne tag, mnemosyne untag, mnemosyne retag,
|
||||||
mnemosyne timeline, mnemosyne neighbors, mnemosyne path,
|
mnemosyne timeline, mnemosyne neighbors
|
||||||
mnemosyne touch, mnemosyne decay, mnemosyne vitality,
|
|
||||||
mnemosyne fading, mnemosyne vibrant,
|
|
||||||
mnemosyne snapshot create|list|restore|diff,
|
|
||||||
mnemosyne resonance
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -19,7 +15,7 @@ import sys
|
|||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||||
from nexus.mnemosyne.entry import ArchiveEntry
|
from nexus.mnemosyne.entry import ArchiveEntry
|
||||||
from nexus.mnemosyne.ingest import ingest_event, ingest_directory
|
from nexus.mnemosyne.ingest import ingest_event
|
||||||
|
|
||||||
|
|
||||||
def cmd_stats(args):
|
def cmd_stats(args):
|
||||||
@@ -65,13 +61,6 @@ def cmd_ingest(args):
|
|||||||
print(f"Ingested: [{entry.id[:8]}] {entry.title} ({len(entry.links)} links)")
|
print(f"Ingested: [{entry.id[:8]}] {entry.title} ({len(entry.links)} links)")
|
||||||
|
|
||||||
|
|
||||||
def cmd_ingest_dir(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
ext = [e.strip() for e in args.ext.split(",")] if args.ext else None
|
|
||||||
added = ingest_directory(archive, args.path, extensions=ext)
|
|
||||||
print(f"Ingested {added} new entries from {args.path}")
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_link(args):
|
def cmd_link(args):
|
||||||
archive = MnemosyneArchive()
|
archive = MnemosyneArchive()
|
||||||
entry = archive.get(args.entry_id)
|
entry = archive.get(args.entry_id)
|
||||||
@@ -165,6 +154,23 @@ def cmd_rebuild(args):
|
|||||||
print(f"Rebuilt links: {total} connections across {archive.count} entries")
|
print(f"Rebuilt links: {total} connections across {archive.count} entries")
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_consolidate(args):
|
||||||
|
archive = MnemosyneArchive()
|
||||||
|
threshold = args.threshold
|
||||||
|
merges = archive.consolidate(similarity_threshold=threshold, dry_run=args.dry_run)
|
||||||
|
if not merges:
|
||||||
|
print("No duplicates found.")
|
||||||
|
return
|
||||||
|
action = "Would merge" if args.dry_run else "Merged"
|
||||||
|
print(f"{action} {len(merges)} pair(s):\n")
|
||||||
|
for m in merges:
|
||||||
|
sim = m["similarity"]
|
||||||
|
reason = m["reason"]
|
||||||
|
print(f" [{reason}] {m['kept_title'][:60]}")
|
||||||
|
print(f" kept: {m['kept_id'][:8]}")
|
||||||
|
print(f" removed: {m['removed_id'][:8]} (similarity: {sim})\n")
|
||||||
|
|
||||||
|
|
||||||
def cmd_tag(args):
|
def cmd_tag(args):
|
||||||
archive = MnemosyneArchive()
|
archive = MnemosyneArchive()
|
||||||
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
|
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
|
||||||
@@ -217,38 +223,6 @@ def cmd_timeline(args):
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_path(args):
|
|
||||||
archive = MnemosyneArchive(archive_path=args.archive) if args.archive else MnemosyneArchive()
|
|
||||||
path = archive.shortest_path(args.start, args.end)
|
|
||||||
if path is None:
|
|
||||||
print(f"No path found between {args.start} and {args.end}")
|
|
||||||
return
|
|
||||||
steps = archive.path_explanation(path)
|
|
||||||
print(f"Path ({len(steps)} hops):")
|
|
||||||
for i, step in enumerate(steps):
|
|
||||||
arrow = " → " if i > 0 else " "
|
|
||||||
print(f"{arrow}{step['id']}: {step['title']}")
|
|
||||||
if step['topics']:
|
|
||||||
print(f" topics: {', '.join(step['topics'])}")
|
|
||||||
|
|
||||||
def cmd_consolidate(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
merges = archive.consolidate(threshold=args.threshold, dry_run=args.dry_run)
|
|
||||||
if not merges:
|
|
||||||
print("No duplicates found.")
|
|
||||||
return
|
|
||||||
label = "[DRY RUN] " if args.dry_run else ""
|
|
||||||
for m in merges:
|
|
||||||
print(f"{label}Merge ({m['reason']}, score={m['score']:.4f}):")
|
|
||||||
print(f" kept: {m['kept'][:8]}")
|
|
||||||
print(f" removed: {m['removed'][:8]}")
|
|
||||||
if args.dry_run:
|
|
||||||
print(f"\n{len(merges)} pair(s) would be merged. Re-run without --dry-run to apply.")
|
|
||||||
else:
|
|
||||||
print(f"\nMerged {len(merges)} duplicate pair(s).")
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_neighbors(args):
|
def cmd_neighbors(args):
|
||||||
archive = MnemosyneArchive()
|
archive = MnemosyneArchive()
|
||||||
try:
|
try:
|
||||||
@@ -265,145 +239,6 @@ def cmd_neighbors(args):
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
def cmd_touch(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
try:
|
|
||||||
entry = archive.touch(args.entry_id)
|
|
||||||
except KeyError:
|
|
||||||
print(f"Entry not found: {args.entry_id}")
|
|
||||||
sys.exit(1)
|
|
||||||
v = archive.get_vitality(entry.id)
|
|
||||||
print(f"[{entry.id[:8]}] {entry.title}")
|
|
||||||
print(f" Vitality: {v['vitality']:.4f} (boosted)")
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_decay(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
result = archive.apply_decay()
|
|
||||||
print(f"Applied decay to {result['total_entries']} entries")
|
|
||||||
print(f" Decayed: {result['decayed_count']}")
|
|
||||||
print(f" Avg vitality: {result['avg_vitality']:.4f}")
|
|
||||||
print(f" Fading (<0.3): {result['fading_count']}")
|
|
||||||
print(f" Vibrant (>0.7): {result['vibrant_count']}")
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_vitality(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
try:
|
|
||||||
v = archive.get_vitality(args.entry_id)
|
|
||||||
except KeyError:
|
|
||||||
print(f"Entry not found: {args.entry_id}")
|
|
||||||
sys.exit(1)
|
|
||||||
print(f"[{v['entry_id'][:8]}] {v['title']}")
|
|
||||||
print(f" Vitality: {v['vitality']:.4f}")
|
|
||||||
print(f" Last accessed: {v['last_accessed'] or 'never'}")
|
|
||||||
print(f" Age: {v['age_days']} days")
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_fading(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
results = archive.fading(limit=args.limit)
|
|
||||||
if not results:
|
|
||||||
print("Archive is empty.")
|
|
||||||
return
|
|
||||||
for v in results:
|
|
||||||
print(f"[{v['entry_id'][:8]}] {v['title']}")
|
|
||||||
print(f" Vitality: {v['vitality']:.4f} | Age: {v['age_days']}d | Last: {v['last_accessed'] or 'never'}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_snapshot(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
if args.snapshot_cmd == "create":
|
|
||||||
result = archive.snapshot_create(label=args.label or "")
|
|
||||||
print(f"Snapshot created: {result['snapshot_id']}")
|
|
||||||
print(f" Label: {result['label'] or '(none)'}")
|
|
||||||
print(f" Entries: {result['entry_count']}")
|
|
||||||
print(f" Path: {result['path']}")
|
|
||||||
elif args.snapshot_cmd == "list":
|
|
||||||
snapshots = archive.snapshot_list()
|
|
||||||
if not snapshots:
|
|
||||||
print("No snapshots found.")
|
|
||||||
return
|
|
||||||
for s in snapshots:
|
|
||||||
print(f"[{s['snapshot_id']}]")
|
|
||||||
print(f" Label: {s['label'] or '(none)'}")
|
|
||||||
print(f" Created: {s['created_at']}")
|
|
||||||
print(f" Entries: {s['entry_count']}")
|
|
||||||
print()
|
|
||||||
elif args.snapshot_cmd == "restore":
|
|
||||||
try:
|
|
||||||
result = archive.snapshot_restore(args.snapshot_id)
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
print(str(e))
|
|
||||||
sys.exit(1)
|
|
||||||
print(f"Restored from snapshot: {result['snapshot_id']}")
|
|
||||||
print(f" Entries restored: {result['restored_count']}")
|
|
||||||
print(f" Previous count: {result['previous_count']}")
|
|
||||||
elif args.snapshot_cmd == "diff":
|
|
||||||
try:
|
|
||||||
diff = archive.snapshot_diff(args.snapshot_id)
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
print(str(e))
|
|
||||||
sys.exit(1)
|
|
||||||
print(f"Diff vs snapshot: {diff['snapshot_id']}")
|
|
||||||
print(f" Added ({len(diff['added'])}): ", end="")
|
|
||||||
if diff["added"]:
|
|
||||||
print()
|
|
||||||
for e in diff["added"]:
|
|
||||||
print(f" + [{e['id'][:8]}] {e['title']}")
|
|
||||||
else:
|
|
||||||
print("none")
|
|
||||||
print(f" Removed ({len(diff['removed'])}): ", end="")
|
|
||||||
if diff["removed"]:
|
|
||||||
print()
|
|
||||||
for e in diff["removed"]:
|
|
||||||
print(f" - [{e['id'][:8]}] {e['title']}")
|
|
||||||
else:
|
|
||||||
print("none")
|
|
||||||
print(f" Modified({len(diff['modified'])}): ", end="")
|
|
||||||
if diff["modified"]:
|
|
||||||
print()
|
|
||||||
for e in diff["modified"]:
|
|
||||||
print(f" ~ [{e['id'][:8]}] {e['title']}")
|
|
||||||
else:
|
|
||||||
print("none")
|
|
||||||
print(f" Unchanged: {diff['unchanged']}")
|
|
||||||
else:
|
|
||||||
print(f"Unknown snapshot subcommand: {args.snapshot_cmd}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_resonance(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
topic = args.topic if args.topic else None
|
|
||||||
pairs = archive.resonance(threshold=args.threshold, limit=args.limit, topic=topic)
|
|
||||||
if not pairs:
|
|
||||||
print("No resonant pairs found.")
|
|
||||||
return
|
|
||||||
for p in pairs:
|
|
||||||
a = p["entry_a"]
|
|
||||||
b = p["entry_b"]
|
|
||||||
print(f"Score: {p['score']:.4f}")
|
|
||||||
print(f" [{a['id'][:8]}] {a['title']}")
|
|
||||||
print(f" Topics: {', '.join(a['topics']) if a['topics'] else '(none)'}")
|
|
||||||
print(f" [{b['id'][:8]}] {b['title']}")
|
|
||||||
print(f" Topics: {', '.join(b['topics']) if b['topics'] else '(none)'}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_vibrant(args):
|
|
||||||
archive = MnemosyneArchive()
|
|
||||||
results = archive.vibrant(limit=args.limit)
|
|
||||||
if not results:
|
|
||||||
print("Archive is empty.")
|
|
||||||
return
|
|
||||||
for v in results:
|
|
||||||
print(f"[{v['entry_id'][:8]}] {v['title']}")
|
|
||||||
print(f" Vitality: {v['vitality']:.4f} | Age: {v['age_days']}d | Last: {v['last_accessed'] or 'never'}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
|
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
|
||||||
sub = parser.add_subparsers(dest="command")
|
sub = parser.add_subparsers(dest="command")
|
||||||
@@ -420,10 +255,6 @@ def main():
|
|||||||
i.add_argument("--content", required=True)
|
i.add_argument("--content", required=True)
|
||||||
i.add_argument("--topics", default="", help="Comma-separated topics")
|
i.add_argument("--topics", default="", help="Comma-separated topics")
|
||||||
|
|
||||||
id_ = sub.add_parser("ingest-dir", help="Ingest a directory of files")
|
|
||||||
id_.add_argument("path", help="Directory to ingest")
|
|
||||||
id_.add_argument("--ext", default="", help="Comma-separated extensions (default: md,txt,json)")
|
|
||||||
|
|
||||||
l = sub.add_parser("link", help="Show linked entries")
|
l = sub.add_parser("link", help="Show linked entries")
|
||||||
l.add_argument("entry_id", help="Entry ID (or prefix)")
|
l.add_argument("entry_id", help="Entry ID (or prefix)")
|
||||||
l.add_argument("-d", "--depth", type=int, default=1)
|
l.add_argument("-d", "--depth", type=int, default=1)
|
||||||
@@ -449,6 +280,10 @@ def main():
|
|||||||
rb = sub.add_parser("rebuild", help="Recompute all links from scratch")
|
rb = sub.add_parser("rebuild", help="Recompute all links from scratch")
|
||||||
rb.add_argument("-t", "--threshold", type=float, default=None, help="Similarity threshold override")
|
rb.add_argument("-t", "--threshold", type=float, default=None, help="Similarity threshold override")
|
||||||
|
|
||||||
|
co = sub.add_parser("consolidate", help="Find and merge duplicate/near-duplicate entries")
|
||||||
|
co.add_argument("-t", "--threshold", type=float, default=0.9, help="Similarity threshold for near-duplicates (default: 0.9)")
|
||||||
|
co.add_argument("--dry-run", action="store_true", help="Show what would merge without modifying")
|
||||||
|
|
||||||
tg = sub.add_parser("tag", help="Add tags to an existing entry")
|
tg = sub.add_parser("tag", help="Add tags to an existing entry")
|
||||||
tg.add_argument("entry_id", help="Entry ID")
|
tg.add_argument("entry_id", help="Entry ID")
|
||||||
tg.add_argument("tags", help="Comma-separated tags to add")
|
tg.add_argument("tags", help="Comma-separated tags to add")
|
||||||
@@ -469,59 +304,15 @@ def main():
|
|||||||
nb.add_argument("entry_id", help="Anchor entry ID")
|
nb.add_argument("entry_id", help="Anchor entry ID")
|
||||||
nb.add_argument("--days", type=int, default=7, help="Window in days (default: 7)")
|
nb.add_argument("--days", type=int, default=7, help="Window in days (default: 7)")
|
||||||
|
|
||||||
|
|
||||||
pa = sub.add_parser("path", help="Find shortest path between two memories")
|
|
||||||
pa.add_argument("start", help="Starting entry ID")
|
|
||||||
pa.add_argument("end", help="Target entry ID")
|
|
||||||
pa.add_argument("--archive", default=None, help="Archive path")
|
|
||||||
|
|
||||||
co = sub.add_parser("consolidate", help="Merge duplicate/near-duplicate entries")
|
|
||||||
co.add_argument("--dry-run", action="store_true", help="Show what would be merged without applying")
|
|
||||||
co.add_argument("--threshold", type=float, default=0.9, help="Similarity threshold (default: 0.9)")
|
|
||||||
|
|
||||||
|
|
||||||
tc = sub.add_parser("touch", help="Boost an entry's vitality by accessing it")
|
|
||||||
tc.add_argument("entry_id", help="Entry ID to touch")
|
|
||||||
|
|
||||||
dc = sub.add_parser("decay", help="Apply time-based decay to all entries")
|
|
||||||
|
|
||||||
vy = sub.add_parser("vitality", help="Show an entry's vitality status")
|
|
||||||
vy.add_argument("entry_id", help="Entry ID to check")
|
|
||||||
|
|
||||||
fg = sub.add_parser("fading", help="Show most neglected entries (lowest vitality)")
|
|
||||||
fg.add_argument("-n", "--limit", type=int, default=10, help="Max entries to show")
|
|
||||||
|
|
||||||
vb = sub.add_parser("vibrant", help="Show most alive entries (highest vitality)")
|
|
||||||
vb.add_argument("-n", "--limit", type=int, default=10, help="Max entries to show")
|
|
||||||
|
|
||||||
rs = sub.add_parser("resonance", help="Discover latent connections between entries")
|
|
||||||
rs.add_argument("-t", "--threshold", type=float, default=0.3, help="Minimum similarity score (default: 0.3)")
|
|
||||||
rs.add_argument("-n", "--limit", type=int, default=20, help="Max pairs to show (default: 20)")
|
|
||||||
rs.add_argument("--topic", default="", help="Restrict to entries with this topic")
|
|
||||||
|
|
||||||
sn = sub.add_parser("snapshot", help="Point-in-time backup and restore")
|
|
||||||
sn_sub = sn.add_subparsers(dest="snapshot_cmd")
|
|
||||||
sn_create = sn_sub.add_parser("create", help="Create a new snapshot")
|
|
||||||
sn_create.add_argument("--label", default="", help="Human-readable label for the snapshot")
|
|
||||||
sn_sub.add_parser("list", help="List available snapshots")
|
|
||||||
sn_restore = sn_sub.add_parser("restore", help="Restore archive from a snapshot")
|
|
||||||
sn_restore.add_argument("snapshot_id", help="Snapshot ID to restore")
|
|
||||||
sn_diff = sn_sub.add_parser("diff", help="Show what changed since a snapshot")
|
|
||||||
sn_diff.add_argument("snapshot_id", help="Snapshot ID to compare against")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if not args.command:
|
if not args.command:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if args.command == "snapshot" and not args.snapshot_cmd:
|
|
||||||
sn.print_help()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
dispatch = {
|
dispatch = {
|
||||||
"stats": cmd_stats,
|
"stats": cmd_stats,
|
||||||
"search": cmd_search,
|
"search": cmd_search,
|
||||||
"ingest": cmd_ingest,
|
"ingest": cmd_ingest,
|
||||||
"ingest-dir": cmd_ingest_dir,
|
|
||||||
"link": cmd_link,
|
"link": cmd_link,
|
||||||
"topics": cmd_topics,
|
"topics": cmd_topics,
|
||||||
"remove": cmd_remove,
|
"remove": cmd_remove,
|
||||||
@@ -530,20 +321,12 @@ def main():
|
|||||||
"hubs": cmd_hubs,
|
"hubs": cmd_hubs,
|
||||||
"bridges": cmd_bridges,
|
"bridges": cmd_bridges,
|
||||||
"rebuild": cmd_rebuild,
|
"rebuild": cmd_rebuild,
|
||||||
|
"consolidate": cmd_consolidate,
|
||||||
"tag": cmd_tag,
|
"tag": cmd_tag,
|
||||||
"untag": cmd_untag,
|
"untag": cmd_untag,
|
||||||
"retag": cmd_retag,
|
"retag": cmd_retag,
|
||||||
"timeline": cmd_timeline,
|
"timeline": cmd_timeline,
|
||||||
"neighbors": cmd_neighbors,
|
"neighbors": cmd_neighbors,
|
||||||
"consolidate": cmd_consolidate,
|
|
||||||
"path": cmd_path,
|
|
||||||
"touch": cmd_touch,
|
|
||||||
"decay": cmd_decay,
|
|
||||||
"vitality": cmd_vitality,
|
|
||||||
"fading": cmd_fading,
|
|
||||||
"vibrant": cmd_vibrant,
|
|
||||||
"resonance": cmd_resonance,
|
|
||||||
"snapshot": cmd_snapshot,
|
|
||||||
}
|
}
|
||||||
dispatch[args.command](args)
|
dispatch[args.command](args)
|
||||||
|
|
||||||
|
|||||||
@@ -1,135 +1,15 @@
|
|||||||
"""Ingestion pipeline — feeds data into the archive.
|
"""Ingestion pipeline — feeds data into the archive.
|
||||||
|
|
||||||
Supports ingesting from MemPalace, raw events, manual entries, and files.
|
Supports ingesting from MemPalace, raw events, and manual entries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
from typing import Optional
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||||
from nexus.mnemosyne.entry import ArchiveEntry
|
from nexus.mnemosyne.entry import ArchiveEntry
|
||||||
|
|
||||||
_DEFAULT_EXTENSIONS = [".md", ".txt", ".json"]
|
|
||||||
_MAX_CHUNK_CHARS = 4000 # ~1000 tokens; split large files into chunks
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_title(content: str, path: Path) -> str:
|
|
||||||
"""Return first # heading, or the file stem if none found."""
|
|
||||||
for line in content.splitlines():
|
|
||||||
stripped = line.strip()
|
|
||||||
if stripped.startswith("# "):
|
|
||||||
return stripped[2:].strip()
|
|
||||||
return path.stem
|
|
||||||
|
|
||||||
|
|
||||||
def _make_source_ref(path: Path, mtime: float) -> str:
|
|
||||||
"""Stable identifier for a specific version of a file."""
|
|
||||||
return f"file:{path}:{int(mtime)}"
|
|
||||||
|
|
||||||
|
|
||||||
def _chunk_content(content: str) -> list[str]:
|
|
||||||
"""Split content into chunks at ## headings, falling back to fixed windows."""
|
|
||||||
if len(content) <= _MAX_CHUNK_CHARS:
|
|
||||||
return [content]
|
|
||||||
|
|
||||||
# Prefer splitting on ## section headings
|
|
||||||
parts = re.split(r"\n(?=## )", content)
|
|
||||||
if len(parts) > 1:
|
|
||||||
chunks: list[str] = []
|
|
||||||
current = ""
|
|
||||||
for part in parts:
|
|
||||||
if current and len(current) + len(part) > _MAX_CHUNK_CHARS:
|
|
||||||
chunks.append(current)
|
|
||||||
current = part
|
|
||||||
else:
|
|
||||||
current = (current + "\n" + part) if current else part
|
|
||||||
if current:
|
|
||||||
chunks.append(current)
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
# Fixed-window fallback
|
|
||||||
return [content[i : i + _MAX_CHUNK_CHARS] for i in range(0, len(content), _MAX_CHUNK_CHARS)]
|
|
||||||
|
|
||||||
|
|
||||||
def ingest_file(
|
|
||||||
archive: MnemosyneArchive,
|
|
||||||
path: Union[str, Path],
|
|
||||||
) -> list[ArchiveEntry]:
|
|
||||||
"""Ingest a single file into the archive.
|
|
||||||
|
|
||||||
- Title is taken from the first ``# heading`` or the filename stem.
|
|
||||||
- Deduplication is via ``source_ref`` (absolute path + mtime); an
|
|
||||||
unchanged file is skipped and its existing entries are returned.
|
|
||||||
- Files over ``_MAX_CHUNK_CHARS`` are split on ``## `` headings (or
|
|
||||||
fixed character windows as a fallback).
|
|
||||||
|
|
||||||
Returns a list of ArchiveEntry objects (one per chunk).
|
|
||||||
"""
|
|
||||||
path = Path(path).resolve()
|
|
||||||
mtime = path.stat().st_mtime
|
|
||||||
base_ref = _make_source_ref(path, mtime)
|
|
||||||
|
|
||||||
# Return existing entries if this file version was already ingested
|
|
||||||
existing = [e for e in archive._entries.values() if e.source_ref and e.source_ref.startswith(base_ref)]
|
|
||||||
if existing:
|
|
||||||
return existing
|
|
||||||
|
|
||||||
content = path.read_text(encoding="utf-8", errors="replace")
|
|
||||||
title = _extract_title(content, path)
|
|
||||||
chunks = _chunk_content(content)
|
|
||||||
|
|
||||||
entries: list[ArchiveEntry] = []
|
|
||||||
for i, chunk in enumerate(chunks):
|
|
||||||
chunk_ref = base_ref if len(chunks) == 1 else f"{base_ref}:chunk{i}"
|
|
||||||
chunk_title = title if len(chunks) == 1 else f"{title} (part {i + 1})"
|
|
||||||
entry = ArchiveEntry(
|
|
||||||
title=chunk_title,
|
|
||||||
content=chunk,
|
|
||||||
source="file",
|
|
||||||
source_ref=chunk_ref,
|
|
||||||
metadata={
|
|
||||||
"file_path": str(path),
|
|
||||||
"chunk": i,
|
|
||||||
"total_chunks": len(chunks),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
archive.add(entry)
|
|
||||||
entries.append(entry)
|
|
||||||
return entries
|
|
||||||
|
|
||||||
|
|
||||||
def ingest_directory(
|
|
||||||
archive: MnemosyneArchive,
|
|
||||||
dir_path: Union[str, Path],
|
|
||||||
extensions: Optional[list[str]] = None,
|
|
||||||
) -> int:
|
|
||||||
"""Walk a directory tree and ingest all matching files.
|
|
||||||
|
|
||||||
``extensions`` defaults to ``[".md", ".txt", ".json"]``.
|
|
||||||
Values may be given with or without a leading dot.
|
|
||||||
|
|
||||||
Returns the count of new archive entries created.
|
|
||||||
"""
|
|
||||||
dir_path = Path(dir_path).resolve()
|
|
||||||
if extensions is None:
|
|
||||||
exts = _DEFAULT_EXTENSIONS
|
|
||||||
else:
|
|
||||||
exts = [e if e.startswith(".") else f".{e}" for e in extensions]
|
|
||||||
|
|
||||||
added = 0
|
|
||||||
for file_path in sorted(dir_path.rglob("*")):
|
|
||||||
if not file_path.is_file():
|
|
||||||
continue
|
|
||||||
if file_path.suffix.lower() not in exts:
|
|
||||||
continue
|
|
||||||
before = archive.count
|
|
||||||
ingest_file(archive, file_path)
|
|
||||||
added += archive.count - before
|
|
||||||
return added
|
|
||||||
|
|
||||||
|
|
||||||
def ingest_from_mempalace(
|
def ingest_from_mempalace(
|
||||||
archive: MnemosyneArchive,
|
archive: MnemosyneArchive,
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
|
|
||||||
class Reasoner:
|
|
||||||
def __init__(self, rules):
|
|
||||||
self.rules = rules
|
|
||||||
def evaluate(self, entries):
|
|
||||||
return [r['action'] for r in self.rules if self._check(r['condition'], entries)]
|
|
||||||
def _check(self, cond, entries):
|
|
||||||
if cond.startswith('count'):
|
|
||||||
# e.g. count(type=anomaly)>3
|
|
||||||
p = cond.replace('count(', '').split(')')
|
|
||||||
key, val = p[0].split('=')
|
|
||||||
count = sum(1 for e in entries if e.get(key) == val)
|
|
||||||
return eval(f"{count}{p[1]}")
|
|
||||||
return False
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
|
|
||||||
"""Resonance Linker — Finds second-degree connections in the holographic graph."""
|
|
||||||
|
|
||||||
class ResonanceLinker:
|
|
||||||
def __init__(self, archive):
|
|
||||||
self.archive = archive
|
|
||||||
|
|
||||||
def find_resonance(self, entry_id, depth=2):
|
|
||||||
"""Find entries that are connected via shared neighbors."""
|
|
||||||
if entry_id not in self.archive._entries: return []
|
|
||||||
|
|
||||||
entry = self.archive._entries[entry_id]
|
|
||||||
neighbors = set(entry.links)
|
|
||||||
resonance = {}
|
|
||||||
|
|
||||||
for neighbor_id in neighbors:
|
|
||||||
if neighbor_id in self.archive._entries:
|
|
||||||
for second_neighbor in self.archive._entries[neighbor_id].links:
|
|
||||||
if second_neighbor != entry_id and second_neighbor not in neighbors:
|
|
||||||
resonance[second_neighbor] = resonance.get(second_neighbor, 0) + 1
|
|
||||||
|
|
||||||
return sorted(resonance.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"condition": "count(type=anomaly)>3",
|
|
||||||
"action": "alert"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
import json
|
|
||||||
# Snapshot logic
|
|
||||||
@@ -1,138 +0,0 @@
|
|||||||
"""Tests for Mnemosyne CLI commands — path, touch, decay, vitality, fading, vibrant."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import patch
|
|
||||||
import sys
|
|
||||||
import io
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
|
||||||
from nexus.mnemosyne.entry import ArchiveEntry
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def archive(tmp_path):
|
|
||||||
path = tmp_path / "test_archive.json"
|
|
||||||
return MnemosyneArchive(archive_path=path)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def linked_archive(tmp_path):
|
|
||||||
"""Archive with entries linked to each other for path testing."""
|
|
||||||
path = tmp_path / "test_archive.json"
|
|
||||||
arch = MnemosyneArchive(archive_path=path, auto_embed=False)
|
|
||||||
e1 = arch.add(ArchiveEntry(title="Alpha", content="first entry about python", topics=["code"]))
|
|
||||||
e2 = arch.add(ArchiveEntry(title="Beta", content="second entry about python coding", topics=["code"]))
|
|
||||||
e3 = arch.add(ArchiveEntry(title="Gamma", content="third entry about cooking recipes", topics=["food"]))
|
|
||||||
return arch, e1, e2, e3
|
|
||||||
|
|
||||||
|
|
||||||
class TestPathCommand:
|
|
||||||
def test_shortest_path_exists(self, linked_archive):
|
|
||||||
arch, e1, e2, e3 = linked_archive
|
|
||||||
path = arch.shortest_path(e1.id, e2.id)
|
|
||||||
assert path is not None
|
|
||||||
assert path[0] == e1.id
|
|
||||||
assert path[-1] == e2.id
|
|
||||||
|
|
||||||
def test_shortest_path_no_connection(self, linked_archive):
|
|
||||||
arch, e1, e2, e3 = linked_archive
|
|
||||||
# e3 (cooking) likely not linked to e1 (python coding)
|
|
||||||
path = arch.shortest_path(e1.id, e3.id)
|
|
||||||
# Path may or may not exist depending on linking threshold
|
|
||||||
# Either None or a list is valid
|
|
||||||
|
|
||||||
def test_shortest_path_same_entry(self, linked_archive):
|
|
||||||
arch, e1, _, _ = linked_archive
|
|
||||||
path = arch.shortest_path(e1.id, e1.id)
|
|
||||||
assert path == [e1.id]
|
|
||||||
|
|
||||||
def test_shortest_path_missing_entry(self, linked_archive):
|
|
||||||
arch, e1, _, _ = linked_archive
|
|
||||||
path = arch.shortest_path(e1.id, "nonexistent-id")
|
|
||||||
assert path is None
|
|
||||||
|
|
||||||
|
|
||||||
class TestTouchCommand:
|
|
||||||
def test_touch_boosts_vitality(self, archive):
|
|
||||||
entry = archive.add(ArchiveEntry(title="Test", content="Content"))
|
|
||||||
# Simulate time passing by setting old last_accessed
|
|
||||||
old_time = "2020-01-01T00:00:00+00:00"
|
|
||||||
entry.last_accessed = old_time
|
|
||||||
entry.vitality = 0.5
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
touched = archive.touch(entry.id)
|
|
||||||
assert touched.vitality > 0.5
|
|
||||||
assert touched.last_accessed != old_time
|
|
||||||
|
|
||||||
def test_touch_missing_entry(self, archive):
|
|
||||||
with pytest.raises(KeyError):
|
|
||||||
archive.touch("nonexistent-id")
|
|
||||||
|
|
||||||
|
|
||||||
class TestDecayCommand:
|
|
||||||
def test_apply_decay_returns_stats(self, archive):
|
|
||||||
archive.add(ArchiveEntry(title="Test", content="Content"))
|
|
||||||
result = archive.apply_decay()
|
|
||||||
assert result["total_entries"] == 1
|
|
||||||
assert "avg_vitality" in result
|
|
||||||
assert "fading_count" in result
|
|
||||||
assert "vibrant_count" in result
|
|
||||||
|
|
||||||
def test_decay_on_empty_archive(self, archive):
|
|
||||||
result = archive.apply_decay()
|
|
||||||
assert result["total_entries"] == 0
|
|
||||||
assert result["avg_vitality"] == 0.0
|
|
||||||
|
|
||||||
|
|
||||||
class TestVitalityCommand:
|
|
||||||
def test_get_vitality(self, archive):
|
|
||||||
entry = archive.add(ArchiveEntry(title="Test", content="Content"))
|
|
||||||
v = archive.get_vitality(entry.id)
|
|
||||||
assert v["entry_id"] == entry.id
|
|
||||||
assert v["title"] == "Test"
|
|
||||||
assert 0.0 <= v["vitality"] <= 1.0
|
|
||||||
assert v["age_days"] >= 0
|
|
||||||
|
|
||||||
def test_get_vitality_missing(self, archive):
|
|
||||||
with pytest.raises(KeyError):
|
|
||||||
archive.get_vitality("nonexistent-id")
|
|
||||||
|
|
||||||
|
|
||||||
class TestFadingVibrant:
|
|
||||||
def test_fading_returns_sorted_ascending(self, archive):
|
|
||||||
# Add entries with different vitalities
|
|
||||||
e1 = archive.add(ArchiveEntry(title="Vibrant", content="High energy"))
|
|
||||||
e2 = archive.add(ArchiveEntry(title="Fading", content="Low energy"))
|
|
||||||
e2.vitality = 0.1
|
|
||||||
e2.last_accessed = "2020-01-01T00:00:00+00:00"
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
results = archive.fading(limit=10)
|
|
||||||
assert len(results) == 2
|
|
||||||
assert results[0]["vitality"] <= results[1]["vitality"]
|
|
||||||
|
|
||||||
def test_vibrant_returns_sorted_descending(self, archive):
|
|
||||||
e1 = archive.add(ArchiveEntry(title="Fresh", content="New"))
|
|
||||||
e2 = archive.add(ArchiveEntry(title="Old", content="Ancient"))
|
|
||||||
e2.vitality = 0.1
|
|
||||||
e2.last_accessed = "2020-01-01T00:00:00+00:00"
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
results = archive.vibrant(limit=10)
|
|
||||||
assert len(results) == 2
|
|
||||||
assert results[0]["vitality"] >= results[1]["vitality"]
|
|
||||||
|
|
||||||
def test_fading_limit(self, archive):
|
|
||||||
for i in range(15):
|
|
||||||
archive.add(ArchiveEntry(title=f"Entry {i}", content=f"Content {i}"))
|
|
||||||
results = archive.fading(limit=5)
|
|
||||||
assert len(results) == 5
|
|
||||||
|
|
||||||
def test_vibrant_empty(self, archive):
|
|
||||||
results = archive.vibrant()
|
|
||||||
assert results == []
|
|
||||||
@@ -1,176 +1,137 @@
|
|||||||
"""Tests for MnemosyneArchive.consolidate() — duplicate/near-duplicate merging."""
|
"""Tests for MnemosyneArchive.consolidate()."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||||
from nexus.mnemosyne.entry import ArchiveEntry
|
from nexus.mnemosyne.entry import ArchiveEntry
|
||||||
from nexus.mnemosyne.ingest import ingest_event
|
|
||||||
|
|
||||||
|
|
||||||
def _archive(tmp: str) -> MnemosyneArchive:
|
@pytest.fixture
|
||||||
return MnemosyneArchive(archive_path=Path(tmp) / "archive.json", auto_embed=False)
|
def archive(tmp_path):
|
||||||
|
"""Create an archive with auto_embed disabled for deterministic tests."""
|
||||||
|
path = tmp_path / "test_archive.json"
|
||||||
|
return MnemosyneArchive(archive_path=path, auto_embed=False)
|
||||||
|
|
||||||
|
|
||||||
def test_consolidate_exact_duplicate_removed():
|
class TestConsolidateExactDuplicates:
|
||||||
"""Two entries with identical content_hash are merged; only one survives."""
|
"""Phase 1: exact duplicate detection by content_hash."""
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _archive(tmp)
|
|
||||||
e1 = ingest_event(archive, title="Hello world", content="Exactly the same content", topics=["a"])
|
|
||||||
# Manually add a second entry with the same hash to simulate a duplicate
|
|
||||||
e2 = ArchiveEntry(title="Hello world", content="Exactly the same content", topics=["b"])
|
|
||||||
# Bypass dedup guard so we can test consolidate() rather than add()
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
assert archive.count == 2
|
def test_finds_exact_duplicates(self, archive):
|
||||||
merges = archive.consolidate(dry_run=False)
|
entry_a = ArchiveEntry(title="Hello", content="World")
|
||||||
|
entry_b = ArchiveEntry(title="Hello", content="World")
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive.add(entry_b, auto_link=False)
|
||||||
|
|
||||||
|
# Force same content_hash
|
||||||
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
|
||||||
|
# Re-add entry_b manually (bypass add() dedup)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
|
||||||
|
merges = archive.consolidate()
|
||||||
assert len(merges) == 1
|
assert len(merges) == 1
|
||||||
assert merges[0]["reason"] == "exact_hash"
|
assert merges[0]["reason"] == "exact_content_hash"
|
||||||
assert merges[0]["score"] == 1.0
|
assert merges[0]["similarity"] == 1.0
|
||||||
assert archive.count == 1
|
|
||||||
|
|
||||||
|
def test_keeps_older_entry(self, archive):
|
||||||
|
entry_a = ArchiveEntry(title="First", content="Data", created_at="2024-01-01T00:00:00+00:00")
|
||||||
|
entry_b = ArchiveEntry(title="Second", content="Data", created_at="2024-06-01T00:00:00+00:00")
|
||||||
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
|
||||||
def test_consolidate_keeps_older_entry():
|
merges = archive.consolidate()
|
||||||
"""The older entry (earlier created_at) is kept, the newer is removed."""
|
assert merges[0]["kept_id"] == entry_a.id
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
assert merges[0]["removed_id"] == entry_b.id
|
||||||
archive = _archive(tmp)
|
|
||||||
e1 = ingest_event(archive, title="Hello world", content="Same content here", topics=[])
|
|
||||||
e2 = ArchiveEntry(title="Hello world", content="Same content here", topics=[])
|
|
||||||
# Make e2 clearly newer
|
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
merges = archive.consolidate(dry_run=False)
|
def test_dry_run_does_not_modify(self, archive):
|
||||||
assert len(merges) == 1
|
entry_a = ArchiveEntry(title="A", content="Same")
|
||||||
assert merges[0]["kept"] == e1.id
|
entry_b = ArchiveEntry(title="B", content="Same")
|
||||||
assert merges[0]["removed"] == e2.id
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
def test_consolidate_merges_topics():
|
|
||||||
"""Topics from the removed entry are merged (unioned) into the kept entry."""
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _archive(tmp)
|
|
||||||
e1 = ingest_event(archive, title="Memory item", content="Shared content body", topics=["alpha"])
|
|
||||||
e2 = ArchiveEntry(title="Memory item", content="Shared content body", topics=["beta", "gamma"])
|
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
archive.consolidate(dry_run=False)
|
|
||||||
survivor = archive.get(e1.id)
|
|
||||||
assert survivor is not None
|
|
||||||
topic_lower = {t.lower() for t in survivor.topics}
|
|
||||||
assert "alpha" in topic_lower
|
|
||||||
assert "beta" in topic_lower
|
|
||||||
assert "gamma" in topic_lower
|
|
||||||
|
|
||||||
|
|
||||||
def test_consolidate_merges_metadata():
|
|
||||||
"""Metadata from the removed entry is merged into the kept entry; kept values win."""
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _archive(tmp)
|
|
||||||
e1 = ArchiveEntry(
|
|
||||||
title="Shared", content="Identical body here", topics=[], metadata={"k1": "v1", "shared": "kept"}
|
|
||||||
)
|
|
||||||
archive._entries[e1.id] = e1
|
|
||||||
e2 = ArchiveEntry(
|
|
||||||
title="Shared", content="Identical body here", topics=[], metadata={"k2": "v2", "shared": "removed"}
|
|
||||||
)
|
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
archive.consolidate(dry_run=False)
|
|
||||||
survivor = archive.get(e1.id)
|
|
||||||
assert survivor.metadata["k1"] == "v1"
|
|
||||||
assert survivor.metadata["k2"] == "v2"
|
|
||||||
assert survivor.metadata["shared"] == "kept" # kept entry wins
|
|
||||||
|
|
||||||
|
|
||||||
def test_consolidate_dry_run_no_mutation():
|
|
||||||
"""Dry-run mode returns merge plan but does not alter the archive."""
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _archive(tmp)
|
|
||||||
ingest_event(archive, title="Same", content="Identical content to dedup", topics=[])
|
|
||||||
e2 = ArchiveEntry(title="Same", content="Identical content to dedup", topics=[])
|
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
|
count_before = archive.count
|
||||||
merges = archive.consolidate(dry_run=True)
|
merges = archive.consolidate(dry_run=True)
|
||||||
assert len(merges) == 1
|
assert len(merges) == 1
|
||||||
assert merges[0]["dry_run"] is True
|
assert archive.count == count_before # unchanged
|
||||||
# Archive must be unchanged
|
|
||||||
assert archive.count == 2
|
|
||||||
|
|
||||||
|
def test_no_duplicates_returns_empty(self, archive):
|
||||||
def test_consolidate_no_duplicates():
|
archive.add(ArchiveEntry(title="Unique A", content="Content A"), auto_link=False)
|
||||||
"""When no duplicates exist, consolidate returns an empty list."""
|
archive.add(ArchiveEntry(title="Unique B", content="Content B"), auto_link=False)
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
merges = archive.consolidate()
|
||||||
archive = _archive(tmp)
|
|
||||||
ingest_event(archive, title="Unique A", content="This is completely unique content for A")
|
|
||||||
ingest_event(archive, title="Unique B", content="Totally different words here for B")
|
|
||||||
merges = archive.consolidate(threshold=0.9)
|
|
||||||
assert merges == []
|
assert merges == []
|
||||||
|
|
||||||
|
def test_merges_topics(self, archive):
|
||||||
|
entry_a = ArchiveEntry(title="A", content="Data", topics=["python"])
|
||||||
|
entry_b = ArchiveEntry(title="B", content="Data", topics=["testing"])
|
||||||
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
|
||||||
def test_consolidate_transfers_links():
|
archive.consolidate()
|
||||||
"""Links from the removed entry are inherited by the kept entry."""
|
keeper = archive.get(entry_a.id)
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
assert "python" in keeper.topics
|
||||||
archive = _archive(tmp)
|
assert "testing" in keeper.topics
|
||||||
# Create a third entry to act as a link target
|
|
||||||
target = ingest_event(archive, title="Target", content="The link target entry", topics=[])
|
|
||||||
|
|
||||||
e1 = ArchiveEntry(title="Dup", content="Exact duplicate body text", topics=[], links=[target.id])
|
def test_merges_links(self, archive):
|
||||||
archive._entries[e1.id] = e1
|
entry_c = ArchiveEntry(title="C", content="Ref")
|
||||||
target.links.append(e1.id)
|
archive.add(entry_c, auto_link=False)
|
||||||
|
|
||||||
e2 = ArchiveEntry(title="Dup", content="Exact duplicate body text", topics=[])
|
entry_a = ArchiveEntry(title="A", content="Data", links=[entry_c.id])
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
entry_b = ArchiveEntry(title="B", content="Data", links=[entry_c.id])
|
||||||
archive._entries[e2.id] = e2
|
entry_b.content_hash = entry_a.content_hash
|
||||||
archive._save()
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
|
||||||
archive.consolidate(dry_run=False)
|
archive.consolidate()
|
||||||
survivor = archive.get(e1.id)
|
keeper = archive.get(entry_a.id)
|
||||||
assert survivor is not None
|
assert entry_c.id in keeper.links
|
||||||
assert target.id in survivor.links
|
|
||||||
|
def test_removes_duplicate_from_archive(self, archive):
|
||||||
|
entry_a = ArchiveEntry(title="A", content="Same")
|
||||||
|
entry_b = ArchiveEntry(title="B", content="Same")
|
||||||
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
|
||||||
|
archive.consolidate()
|
||||||
|
assert archive.get(entry_a.id) is not None
|
||||||
|
assert archive.get(entry_b.id) is None
|
||||||
|
|
||||||
|
def test_fixes_links_pointing_to_removed(self, archive):
|
||||||
|
entry_a = ArchiveEntry(title="A", content="Same")
|
||||||
|
entry_b = ArchiveEntry(title="B", content="Same")
|
||||||
|
entry_c = ArchiveEntry(title="C", content="Ref", links=[entry_b.id])
|
||||||
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive.add(entry_c, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
|
||||||
|
archive.consolidate()
|
||||||
|
survivor = archive.get(entry_c.id)
|
||||||
|
assert entry_b.id not in survivor.links
|
||||||
|
assert entry_a.id in survivor.links
|
||||||
|
|
||||||
|
|
||||||
def test_consolidate_near_duplicate_semantic():
|
class TestConsolidateTripleDuplicates:
|
||||||
"""Near-duplicate entries above the similarity threshold are merged."""
|
"""Handle 3+ entries with the same content_hash."""
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _archive(tmp)
|
|
||||||
# Entries with very high Jaccard overlap
|
|
||||||
text_a = "python automation scripting building tools workflows"
|
|
||||||
text_b = "python automation scripting building tools workflows tasks"
|
|
||||||
e1 = ArchiveEntry(title="Automator", content=text_a, topics=[])
|
|
||||||
e2 = ArchiveEntry(title="Automator", content=text_b, topics=[])
|
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
|
||||||
archive._entries[e1.id] = e1
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
# Use a low threshold to ensure these very similar entries match
|
def test_three_way_merge(self, archive):
|
||||||
merges = archive.consolidate(threshold=0.7, dry_run=False)
|
entry_a = ArchiveEntry(title="A", content="Same", created_at="2024-01-01T00:00:00+00:00")
|
||||||
assert len(merges) >= 1
|
entry_b = ArchiveEntry(title="B", content="Same", created_at="2024-02-01T00:00:00+00:00")
|
||||||
assert merges[0]["reason"] == "semantic_similarity"
|
entry_c = ArchiveEntry(title="C", content="Same", created_at="2024-03-01T00:00:00+00:00")
|
||||||
|
entry_b.content_hash = entry_a.content_hash
|
||||||
|
entry_c.content_hash = entry_a.content_hash
|
||||||
|
archive.add(entry_a, auto_link=False)
|
||||||
|
archive._entries[entry_b.id] = entry_b
|
||||||
|
archive._entries[entry_c.id] = entry_c
|
||||||
|
|
||||||
|
merges = archive.consolidate()
|
||||||
def test_consolidate_persists_after_reload():
|
assert len(merges) == 2
|
||||||
"""After consolidation, the reduced archive survives a save/reload cycle."""
|
assert all(m["kept_id"] == entry_a.id for m in merges)
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
path = Path(tmp) / "archive.json"
|
|
||||||
archive = MnemosyneArchive(archive_path=path, auto_embed=False)
|
|
||||||
ingest_event(archive, title="Persist test", content="Body to dedup and persist", topics=[])
|
|
||||||
e2 = ArchiveEntry(title="Persist test", content="Body to dedup and persist", topics=[])
|
|
||||||
e2.created_at = "2099-01-01T00:00:00+00:00"
|
|
||||||
archive._entries[e2.id] = e2
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
archive.consolidate(dry_run=False)
|
|
||||||
assert archive.count == 1
|
|
||||||
|
|
||||||
reloaded = MnemosyneArchive(archive_path=path, auto_embed=False)
|
|
||||||
assert reloaded.count == 1
|
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
# Test discover
|
|
||||||
@@ -1,241 +0,0 @@
|
|||||||
"""Tests for file-based ingestion pipeline (ingest_file / ingest_directory)."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
|
||||||
from nexus.mnemosyne.ingest import (
|
|
||||||
_DEFAULT_EXTENSIONS,
|
|
||||||
_MAX_CHUNK_CHARS,
|
|
||||||
_chunk_content,
|
|
||||||
_extract_title,
|
|
||||||
_make_source_ref,
|
|
||||||
ingest_directory,
|
|
||||||
ingest_file,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Helpers
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _make_archive(tmp_path: Path) -> MnemosyneArchive:
|
|
||||||
return MnemosyneArchive(archive_path=tmp_path / "archive.json")
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Unit: _extract_title
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def test_extract_title_from_heading():
|
|
||||||
content = "# My Document\n\nSome content here."
|
|
||||||
assert _extract_title(content, Path("ignored.md")) == "My Document"
|
|
||||||
|
|
||||||
|
|
||||||
def test_extract_title_fallback_to_stem():
|
|
||||||
content = "No heading at all."
|
|
||||||
assert _extract_title(content, Path("/docs/my_notes.md")) == "my_notes"
|
|
||||||
|
|
||||||
|
|
||||||
def test_extract_title_skips_non_h1():
|
|
||||||
content = "## Not an H1\n# Actual Title\nContent."
|
|
||||||
assert _extract_title(content, Path("x.md")) == "Actual Title"
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Unit: _make_source_ref
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def test_source_ref_format():
|
|
||||||
p = Path("/tmp/foo.md")
|
|
||||||
ref = _make_source_ref(p, 1234567890.9)
|
|
||||||
assert ref == "file:/tmp/foo.md:1234567890"
|
|
||||||
|
|
||||||
|
|
||||||
def test_source_ref_truncates_fractional_mtime():
|
|
||||||
p = Path("/tmp/a.txt")
|
|
||||||
assert _make_source_ref(p, 100.99) == _make_source_ref(p, 100.01)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Unit: _chunk_content
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def test_chunk_short_content_is_single():
|
|
||||||
content = "Short content."
|
|
||||||
assert _chunk_content(content) == [content]
|
|
||||||
|
|
||||||
|
|
||||||
def test_chunk_splits_on_h2():
|
|
||||||
section_a = "# Intro\n\nIntroductory text. " + "x" * 100
|
|
||||||
section_b = "## Section B\n\nBody of section B. " + "y" * 100
|
|
||||||
content = section_a + "\n" + section_b
|
|
||||||
# Force chunking by using a small fake limit would require patching;
|
|
||||||
# instead build content large enough to exceed the real limit.
|
|
||||||
big_a = "# Intro\n\n" + "a" * (_MAX_CHUNK_CHARS - 50)
|
|
||||||
big_b = "## Section B\n\n" + "b" * (_MAX_CHUNK_CHARS - 50)
|
|
||||||
combined = big_a + "\n" + big_b
|
|
||||||
chunks = _chunk_content(combined)
|
|
||||||
assert len(chunks) >= 2
|
|
||||||
assert any("Section B" in c for c in chunks)
|
|
||||||
|
|
||||||
|
|
||||||
def test_chunk_fixed_window_fallback():
|
|
||||||
# Content with no ## headings but > MAX_CHUNK_CHARS
|
|
||||||
content = "word " * (_MAX_CHUNK_CHARS // 5 + 100)
|
|
||||||
chunks = _chunk_content(content)
|
|
||||||
assert len(chunks) >= 2
|
|
||||||
for c in chunks:
|
|
||||||
assert len(c) <= _MAX_CHUNK_CHARS
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# ingest_file
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def test_ingest_file_returns_entry(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
doc = tmp_path / "notes.md"
|
|
||||||
doc.write_text("# My Notes\n\nHello world.")
|
|
||||||
entries = ingest_file(archive, doc)
|
|
||||||
assert len(entries) == 1
|
|
||||||
assert entries[0].title == "My Notes"
|
|
||||||
assert entries[0].source == "file"
|
|
||||||
assert "Hello world" in entries[0].content
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_file_uses_stem_when_no_heading(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
doc = tmp_path / "raw_log.txt"
|
|
||||||
doc.write_text("Just some plain text without a heading.")
|
|
||||||
entries = ingest_file(archive, doc)
|
|
||||||
assert entries[0].title == "raw_log"
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_file_dedup_unchanged(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
doc = tmp_path / "doc.md"
|
|
||||||
doc.write_text("# Title\n\nContent.")
|
|
||||||
entries1 = ingest_file(archive, doc)
|
|
||||||
assert archive.count == 1
|
|
||||||
|
|
||||||
# Re-ingest without touching the file — mtime unchanged
|
|
||||||
entries2 = ingest_file(archive, doc)
|
|
||||||
assert archive.count == 1 # no duplicate
|
|
||||||
assert entries2[0].id == entries1[0].id
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_file_reingest_after_change(tmp_path):
|
|
||||||
import os
|
|
||||||
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
doc = tmp_path / "doc.md"
|
|
||||||
doc.write_text("# Title\n\nOriginal content.")
|
|
||||||
ingest_file(archive, doc)
|
|
||||||
assert archive.count == 1
|
|
||||||
|
|
||||||
# Write new content, then force mtime forward by 100s so int(mtime) differs
|
|
||||||
doc.write_text("# Title\n\nUpdated content.")
|
|
||||||
new_mtime = doc.stat().st_mtime + 100
|
|
||||||
os.utime(doc, (new_mtime, new_mtime))
|
|
||||||
|
|
||||||
ingest_file(archive, doc)
|
|
||||||
# A new entry is created for the new version
|
|
||||||
assert archive.count == 2
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_file_source_ref_contains_path(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
doc = tmp_path / "thing.txt"
|
|
||||||
doc.write_text("Plain text.")
|
|
||||||
entries = ingest_file(archive, doc)
|
|
||||||
assert str(doc) in entries[0].source_ref
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_file_large_produces_chunks(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
doc = tmp_path / "big.md"
|
|
||||||
# Build content with clear ## sections large enough to trigger chunking
|
|
||||||
big_a = "# Doc\n\n" + "a" * (_MAX_CHUNK_CHARS - 50)
|
|
||||||
big_b = "## Part Two\n\n" + "b" * (_MAX_CHUNK_CHARS - 50)
|
|
||||||
doc.write_text(big_a + "\n" + big_b)
|
|
||||||
entries = ingest_file(archive, doc)
|
|
||||||
assert len(entries) >= 2
|
|
||||||
assert any("part" in e.title.lower() for e in entries)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# ingest_directory
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def test_ingest_directory_basic(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
docs = tmp_path / "docs"
|
|
||||||
docs.mkdir()
|
|
||||||
(docs / "a.md").write_text("# Alpha\n\nFirst doc.")
|
|
||||||
(docs / "b.txt").write_text("Beta plain text.")
|
|
||||||
(docs / "skip.py").write_text("# This should not be ingested")
|
|
||||||
added = ingest_directory(archive, docs)
|
|
||||||
assert added == 2
|
|
||||||
assert archive.count == 2
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_directory_custom_extensions(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
docs = tmp_path / "docs"
|
|
||||||
docs.mkdir()
|
|
||||||
(docs / "a.md").write_text("# Alpha")
|
|
||||||
(docs / "b.py").write_text("No heading — uses stem.")
|
|
||||||
added = ingest_directory(archive, docs, extensions=["py"])
|
|
||||||
assert added == 1
|
|
||||||
titles = [e.title for e in archive._entries.values()]
|
|
||||||
assert any("b" in t for t in titles)
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_directory_ext_without_dot(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
docs = tmp_path / "docs"
|
|
||||||
docs.mkdir()
|
|
||||||
(docs / "notes.md").write_text("# Notes\n\nContent.")
|
|
||||||
added = ingest_directory(archive, docs, extensions=["md"])
|
|
||||||
assert added == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_directory_no_duplicates_on_rerun(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
docs = tmp_path / "docs"
|
|
||||||
docs.mkdir()
|
|
||||||
(docs / "file.md").write_text("# Stable\n\nSame content.")
|
|
||||||
ingest_directory(archive, docs)
|
|
||||||
assert archive.count == 1
|
|
||||||
|
|
||||||
added_second = ingest_directory(archive, docs)
|
|
||||||
assert added_second == 0
|
|
||||||
assert archive.count == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_directory_recurses_subdirs(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
docs = tmp_path / "docs"
|
|
||||||
sub = docs / "sub"
|
|
||||||
sub.mkdir(parents=True)
|
|
||||||
(docs / "top.md").write_text("# Top level")
|
|
||||||
(sub / "nested.md").write_text("# Nested")
|
|
||||||
added = ingest_directory(archive, docs)
|
|
||||||
assert added == 2
|
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_directory_default_extensions(tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
docs = tmp_path / "docs"
|
|
||||||
docs.mkdir()
|
|
||||||
(docs / "a.md").write_text("markdown")
|
|
||||||
(docs / "b.txt").write_text("text")
|
|
||||||
(docs / "c.json").write_text('{"key": "value"}')
|
|
||||||
(docs / "d.yaml").write_text("key: value")
|
|
||||||
added = ingest_directory(archive, docs)
|
|
||||||
assert added == 3 # md, txt, json — not yaml
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
"""Tests for MnemosyneArchive.shortest_path and path_explanation."""
|
|
||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
|
||||||
from nexus.mnemosyne.entry import ArchiveEntry
|
|
||||||
|
|
||||||
|
|
||||||
def _make_archive(tmp_path):
|
|
||||||
archive = MnemosyneArchive(str(tmp_path / "test_archive.json"))
|
|
||||||
return archive
|
|
||||||
|
|
||||||
|
|
||||||
class TestShortestPath:
|
|
||||||
def test_direct_connection(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("Alpha", "first entry", topics=["start"])
|
|
||||||
b = archive.add("Beta", "second entry", topics=["end"])
|
|
||||||
# Manually link
|
|
||||||
a.links.append(b.id)
|
|
||||||
b.links.append(a.id)
|
|
||||||
archive._entries[a.id] = a
|
|
||||||
archive._entries[b.id] = b
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
path = archive.shortest_path(a.id, b.id)
|
|
||||||
assert path == [a.id, b.id]
|
|
||||||
|
|
||||||
def test_multi_hop_path(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("A", "alpha", topics=["x"])
|
|
||||||
b = archive.add("B", "beta", topics=["y"])
|
|
||||||
c = archive.add("C", "gamma", topics=["z"])
|
|
||||||
# Chain: A -> B -> C
|
|
||||||
a.links.append(b.id)
|
|
||||||
b.links.extend([a.id, c.id])
|
|
||||||
c.links.append(b.id)
|
|
||||||
archive._entries[a.id] = a
|
|
||||||
archive._entries[b.id] = b
|
|
||||||
archive._entries[c.id] = c
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
path = archive.shortest_path(a.id, c.id)
|
|
||||||
assert path == [a.id, b.id, c.id]
|
|
||||||
|
|
||||||
def test_no_path(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("A", "isolated", topics=[])
|
|
||||||
b = archive.add("B", "also isolated", topics=[])
|
|
||||||
path = archive.shortest_path(a.id, b.id)
|
|
||||||
assert path is None
|
|
||||||
|
|
||||||
def test_same_entry(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("A", "lonely", topics=[])
|
|
||||||
path = archive.shortest_path(a.id, a.id)
|
|
||||||
assert path == [a.id]
|
|
||||||
|
|
||||||
def test_nonexistent_entry(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("A", "exists", topics=[])
|
|
||||||
path = archive.shortest_path("fake-id", a.id)
|
|
||||||
assert path is None
|
|
||||||
|
|
||||||
def test_shortest_of_multiple(self, tmp_path):
|
|
||||||
"""When multiple paths exist, BFS returns shortest."""
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("A", "a", topics=[])
|
|
||||||
b = archive.add("B", "b", topics=[])
|
|
||||||
c = archive.add("C", "c", topics=[])
|
|
||||||
d = archive.add("D", "d", topics=[])
|
|
||||||
# A -> B -> D (short)
|
|
||||||
# A -> C -> B -> D (long)
|
|
||||||
a.links.extend([b.id, c.id])
|
|
||||||
b.links.extend([a.id, d.id, c.id])
|
|
||||||
c.links.extend([a.id, b.id])
|
|
||||||
d.links.append(b.id)
|
|
||||||
for e in [a, b, c, d]:
|
|
||||||
archive._entries[e.id] = e
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
path = archive.shortest_path(a.id, d.id)
|
|
||||||
assert len(path) == 3 # A -> B -> D, not A -> C -> B -> D
|
|
||||||
|
|
||||||
|
|
||||||
class TestPathExplanation:
|
|
||||||
def test_returns_step_details(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("Alpha", "the beginning", topics=["origin"])
|
|
||||||
b = archive.add("Beta", "the middle", topics=["process"])
|
|
||||||
a.links.append(b.id)
|
|
||||||
b.links.append(a.id)
|
|
||||||
archive._entries[a.id] = a
|
|
||||||
archive._entries[b.id] = b
|
|
||||||
archive._save()
|
|
||||||
|
|
||||||
path = [a.id, b.id]
|
|
||||||
steps = archive.path_explanation(path)
|
|
||||||
assert len(steps) == 2
|
|
||||||
assert steps[0]["title"] == "Alpha"
|
|
||||||
assert steps[1]["title"] == "Beta"
|
|
||||||
assert "origin" in steps[0]["topics"]
|
|
||||||
|
|
||||||
def test_content_preview_truncation(self, tmp_path):
|
|
||||||
archive = _make_archive(tmp_path)
|
|
||||||
a = archive.add("A", "x" * 200, topics=[])
|
|
||||||
steps = archive.path_explanation([a.id])
|
|
||||||
assert len(steps[0]["content_preview"]) <= 123 # 120 + "..."
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
# Test resonance
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
# Test snapshot
|
|
||||||
@@ -1,240 +0,0 @@
|
|||||||
"""Tests for Mnemosyne snapshot (point-in-time backup/restore) feature."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
|
||||||
from nexus.mnemosyne.ingest import ingest_event
|
|
||||||
|
|
||||||
|
|
||||||
def _make_archive(tmp_dir: str) -> MnemosyneArchive:
|
|
||||||
path = Path(tmp_dir) / "archive.json"
|
|
||||||
return MnemosyneArchive(archive_path=path, auto_embed=False)
|
|
||||||
|
|
||||||
|
|
||||||
# ─── snapshot_create ─────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def test_snapshot_create_returns_metadata():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="Alpha", content="First entry", topics=["a"])
|
|
||||||
ingest_event(archive, title="Beta", content="Second entry", topics=["b"])
|
|
||||||
|
|
||||||
result = archive.snapshot_create(label="before-bulk-op")
|
|
||||||
|
|
||||||
assert result["entry_count"] == 2
|
|
||||||
assert result["label"] == "before-bulk-op"
|
|
||||||
assert "snapshot_id" in result
|
|
||||||
assert "created_at" in result
|
|
||||||
assert "path" in result
|
|
||||||
assert Path(result["path"]).exists()
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_create_no_label():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="Gamma", content="Third entry", topics=[])
|
|
||||||
|
|
||||||
result = archive.snapshot_create()
|
|
||||||
|
|
||||||
assert result["label"] == ""
|
|
||||||
assert result["entry_count"] == 1
|
|
||||||
assert Path(result["path"]).exists()
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_file_contains_entries():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
e = ingest_event(archive, title="Delta", content="Fourth entry", topics=["d"])
|
|
||||||
result = archive.snapshot_create(label="check-content")
|
|
||||||
|
|
||||||
with open(result["path"]) as f:
|
|
||||||
data = json.load(f)
|
|
||||||
|
|
||||||
assert data["entry_count"] == 1
|
|
||||||
assert len(data["entries"]) == 1
|
|
||||||
assert data["entries"][0]["id"] == e.id
|
|
||||||
assert data["entries"][0]["title"] == "Delta"
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_create_empty_archive():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
result = archive.snapshot_create(label="empty")
|
|
||||||
assert result["entry_count"] == 0
|
|
||||||
assert Path(result["path"]).exists()
|
|
||||||
|
|
||||||
|
|
||||||
# ─── snapshot_list ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def test_snapshot_list_empty():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
assert archive.snapshot_list() == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_list_returns_all():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="One", content="c1", topics=[])
|
|
||||||
archive.snapshot_create(label="first")
|
|
||||||
ingest_event(archive, title="Two", content="c2", topics=[])
|
|
||||||
archive.snapshot_create(label="second")
|
|
||||||
|
|
||||||
snapshots = archive.snapshot_list()
|
|
||||||
assert len(snapshots) == 2
|
|
||||||
labels = {s["label"] for s in snapshots}
|
|
||||||
assert "first" in labels
|
|
||||||
assert "second" in labels
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_list_metadata_fields():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
archive.snapshot_create(label="meta-check")
|
|
||||||
snapshots = archive.snapshot_list()
|
|
||||||
s = snapshots[0]
|
|
||||||
for key in ("snapshot_id", "label", "created_at", "entry_count", "path"):
|
|
||||||
assert key in s
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_list_newest_first():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
archive.snapshot_create(label="a")
|
|
||||||
archive.snapshot_create(label="b")
|
|
||||||
snapshots = archive.snapshot_list()
|
|
||||||
# Filenames sort lexicographically; newest (b) should be first
|
|
||||||
# (filenames include timestamp so alphabetical = newest-last;
|
|
||||||
# snapshot_list reverses the glob order → newest first)
|
|
||||||
assert len(snapshots) == 2
|
|
||||||
# Both should be present; ordering is newest first
|
|
||||||
ids = [s["snapshot_id"] for s in snapshots]
|
|
||||||
assert ids == sorted(ids, reverse=True)
|
|
||||||
|
|
||||||
|
|
||||||
# ─── snapshot_restore ────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def test_snapshot_restore_replaces_entries():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="Kept", content="original content", topics=["orig"])
|
|
||||||
snap = archive.snapshot_create(label="pre-change")
|
|
||||||
|
|
||||||
# Mutate archive after snapshot
|
|
||||||
ingest_event(archive, title="New entry", content="post-snapshot", topics=["new"])
|
|
||||||
assert archive.count == 2
|
|
||||||
|
|
||||||
result = archive.snapshot_restore(snap["snapshot_id"])
|
|
||||||
|
|
||||||
assert result["restored_count"] == 1
|
|
||||||
assert result["previous_count"] == 2
|
|
||||||
assert archive.count == 1
|
|
||||||
entry = list(archive._entries.values())[0]
|
|
||||||
assert entry.title == "Kept"
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_restore_persists_to_disk():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
path = Path(tmp) / "archive.json"
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="Persisted", content="should survive reload", topics=[])
|
|
||||||
snap = archive.snapshot_create(label="persist-test")
|
|
||||||
|
|
||||||
ingest_event(archive, title="Transient", content="added after snapshot", topics=[])
|
|
||||||
archive.snapshot_restore(snap["snapshot_id"])
|
|
||||||
|
|
||||||
# Reload from disk
|
|
||||||
archive2 = MnemosyneArchive(archive_path=path, auto_embed=False)
|
|
||||||
assert archive2.count == 1
|
|
||||||
assert list(archive2._entries.values())[0].title == "Persisted"
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_restore_missing_raises():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
with pytest.raises(FileNotFoundError):
|
|
||||||
archive.snapshot_restore("nonexistent_snapshot_id")
|
|
||||||
|
|
||||||
|
|
||||||
# ─── snapshot_diff ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def test_snapshot_diff_no_changes():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="Stable", content="unchanged content", topics=[])
|
|
||||||
snap = archive.snapshot_create(label="baseline")
|
|
||||||
|
|
||||||
diff = archive.snapshot_diff(snap["snapshot_id"])
|
|
||||||
|
|
||||||
assert diff["added"] == []
|
|
||||||
assert diff["removed"] == []
|
|
||||||
assert diff["modified"] == []
|
|
||||||
assert diff["unchanged"] == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_diff_detects_added():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
ingest_event(archive, title="Original", content="existing", topics=[])
|
|
||||||
snap = archive.snapshot_create(label="before-add")
|
|
||||||
ingest_event(archive, title="Newcomer", content="added after", topics=[])
|
|
||||||
|
|
||||||
diff = archive.snapshot_diff(snap["snapshot_id"])
|
|
||||||
|
|
||||||
assert len(diff["added"]) == 1
|
|
||||||
assert diff["added"][0]["title"] == "Newcomer"
|
|
||||||
assert diff["removed"] == []
|
|
||||||
assert diff["unchanged"] == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_diff_detects_removed():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
e1 = ingest_event(archive, title="Will Be Removed", content="doomed", topics=[])
|
|
||||||
ingest_event(archive, title="Survivor", content="stays", topics=[])
|
|
||||||
snap = archive.snapshot_create(label="pre-removal")
|
|
||||||
archive.remove(e1.id)
|
|
||||||
|
|
||||||
diff = archive.snapshot_diff(snap["snapshot_id"])
|
|
||||||
|
|
||||||
assert len(diff["removed"]) == 1
|
|
||||||
assert diff["removed"][0]["title"] == "Will Be Removed"
|
|
||||||
assert diff["added"] == []
|
|
||||||
assert diff["unchanged"] == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_diff_detects_modified():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
e = ingest_event(archive, title="Mutable", content="original content", topics=[])
|
|
||||||
snap = archive.snapshot_create(label="pre-edit")
|
|
||||||
archive.update_entry(e.id, content="updated content", auto_link=False)
|
|
||||||
|
|
||||||
diff = archive.snapshot_diff(snap["snapshot_id"])
|
|
||||||
|
|
||||||
assert len(diff["modified"]) == 1
|
|
||||||
assert diff["modified"][0]["title"] == "Mutable"
|
|
||||||
assert diff["modified"][0]["snapshot_hash"] != diff["modified"][0]["current_hash"]
|
|
||||||
assert diff["added"] == []
|
|
||||||
assert diff["removed"] == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_diff_missing_raises():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
with pytest.raises(FileNotFoundError):
|
|
||||||
archive.snapshot_diff("no_such_snapshot")
|
|
||||||
|
|
||||||
|
|
||||||
def test_snapshot_diff_includes_snapshot_id():
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
archive = _make_archive(tmp)
|
|
||||||
snap = archive.snapshot_create(label="id-check")
|
|
||||||
diff = archive.snapshot_diff(snap["snapshot_id"])
|
|
||||||
assert diff["snapshot_id"] == snap["snapshot_id"]
|
|
||||||
@@ -1,5 +1,27 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
echo "Running GOFAI guardrails..."
|
# [Mnemosyne] Agent Guardrails — The Nexus
|
||||||
# Syntax checks
|
# Validates code integrity and scans for secrets before deployment.
|
||||||
find . -name "*.js" -exec node --check {} +
|
|
||||||
echo "Guardrails passed."
|
echo "--- [Mnemosyne] Running Guardrails ---"
|
||||||
|
|
||||||
|
# 1. Syntax Checks
|
||||||
|
echo "[1/3] Validating syntax..."
|
||||||
|
for f in ; do
|
||||||
|
node --check "$f" || { echo "Syntax error in $f"; exit 1; }
|
||||||
|
done
|
||||||
|
echo "Syntax OK."
|
||||||
|
|
||||||
|
# 2. JSON/YAML Validation
|
||||||
|
echo "[2/3] Validating configs..."
|
||||||
|
for f in ; do
|
||||||
|
node -e "JSON.parse(require('fs').readFileSync('$f'))" || { echo "Invalid JSON: $f"; exit 1; }
|
||||||
|
done
|
||||||
|
echo "Configs OK."
|
||||||
|
|
||||||
|
# 3. Secret Scan
|
||||||
|
echo "[3/3] Scanning for secrets..."
|
||||||
|
grep -rE "AI_|TOKEN|KEY|SECRET" . --exclude-dir=node_modules --exclude=guardrails.sh | grep -v "process.env" && {
|
||||||
|
echo "WARNING: Potential secrets found!"
|
||||||
|
} || echo "No secrets detected."
|
||||||
|
|
||||||
|
echo "--- Guardrails Passed ---"
|
||||||
|
|||||||
@@ -1,4 +1,26 @@
|
|||||||
|
/**
|
||||||
|
* [Mnemosyne] Smoke Test — The Nexus
|
||||||
|
* Verifies core components are loadable and basic state is consistent.
|
||||||
|
*/
|
||||||
|
|
||||||
import MemoryOptimizer from '../nexus/components/memory-optimizer.js';
|
import { SpatialMemory } from '../nexus/components/spatial-memory.js';
|
||||||
const optimizer = new MemoryOptimizer();
|
import { MemoryOptimizer } from '../nexus/components/memory-optimizer.js';
|
||||||
console.log('Smoke test passed');
|
|
||||||
|
console.log('--- [Mnemosyne] Running Smoke Test ---');
|
||||||
|
|
||||||
|
// 1. Verify Components
|
||||||
|
if (!SpatialMemory || !MemoryOptimizer) {
|
||||||
|
console.error('Failed to load core components');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
console.log('Components loaded.');
|
||||||
|
|
||||||
|
// 2. Verify Regions
|
||||||
|
const regions = Object.keys(SpatialMemory.REGIONS || {});
|
||||||
|
if (regions.length < 5) {
|
||||||
|
console.error('SpatialMemory regions incomplete:', regions);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
console.log('Regions verified:', regions.join(', '));
|
||||||
|
|
||||||
|
console.log('--- Smoke Test Passed ---');
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ const ASSETS_TO_CACHE = [
|
|||||||
|
|
||||||
self.addEventListener('install', (event) => {
|
self.addEventListener('install', (event) => {
|
||||||
event.waitUntil(
|
event.waitUntil(
|
||||||
caches.open(CACHE_NAME).then(cache => {
|
caches.open(CachedName).then(cache => {
|
||||||
return cache.addAll(ASSETS_TO_CACHE);
|
return cache.addAll(ASSETS_TO_CACHE);
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user