Files
timmy-config/scripts/pipeline_knowledge_mine.sh

80 lines
2.3 KiB
Bash

#!/usr/bin/env bash
# pipeline_knowledge_mine.sh — Extract structured knowledge from session archives.
#
# Mines sessions for facts, skills, and patterns. Populates memory palaces.
#
# Usage:
# ./scripts/pipeline_knowledge_mine.sh --max-tokens 80000
set -euo pipefail
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
SESSIONS_DIR="${HERMES_HOME}/sessions"
KNOWLEDGE_DIR="${KNOWLEDGE_DIR:-$HOME/.timmy/knowledge}"
MAX_TOKENS="${MAX_TOKENS:-80000}"
DRY_RUN=false
TOKENS_USED=0
while [[ $# -gt 0 ]]; do
case "$1" in
--max-tokens) MAX_TOKENS="$2"; shift 2 ;;
--dry-run) DRY_RUN=true; shift ;;
*) shift ;;
esac
done
log() { echo "[knowledge-mine $(date '+%H:%M:%S')] $*"; }
mkdir -p "$KNOWLEDGE_DIR"
# Find sessions from the last 24 hours
RECENT=$(find "$SESSIONS_DIR" -name 'session_*.json' -type f -mtime -1 2>/dev/null | sort | head -30)
SESSION_COUNT=$(echo "$RECENT" | grep -c '.' || echo 0)
log "Mining $SESSION_COUNT recent sessions"
EXTRACTED=0
for session_file in $RECENT; do
if [[ "$TOKENS_USED" -ge "$MAX_TOKENS" ]]; then
log "Token budget exhausted. Stopping."
break
fi
if $DRY_RUN; then
log "DRY-RUN: Would mine $(basename "$session_file")"
continue
fi
# Extract facts using Python
python3 -c "
import json, os
with open('$session_file') as f:
data = json.load(f)
messages = data.get('messages', [])
facts = []
for msg in messages:
if msg.get('role') == 'assistant':
content = msg.get('content', '')
# Look for memory saves, skill creates, config changes
if any(kw in content.lower() for kw in ['saved', 'created', 'deployed', 'fixed', 'merged', 'configured']):
facts.append({
'session': os.path.basename('$session_file'),
'summary': content[:300],
'type': 'action'
})
outpath = '$KNOWLEDGE_DIR/' + os.path.basename('$session_file').replace('.json', '.facts.json')
if facts:
with open(outpath, 'w') as f:
json.dump(facts, f, indent=2)
print(len(facts))
" 2>/dev/null && EXTRACTED=$((EXTRACTED + 1)) || true
TOKENS_USED=$((TOKENS_USED + 2000))
done
log "Done: $SESSION_COUNT sessions scanned, $EXTRACTED files with extractable knowledge, $TOKENS_USED tokens used."
exit 0