80 lines
2.3 KiB
Bash
80 lines
2.3 KiB
Bash
#!/usr/bin/env bash
|
|
# pipeline_knowledge_mine.sh — Extract structured knowledge from session archives.
|
|
#
|
|
# Mines sessions for facts, skills, and patterns. Populates memory palaces.
|
|
#
|
|
# Usage:
|
|
# ./scripts/pipeline_knowledge_mine.sh --max-tokens 80000
|
|
|
|
set -euo pipefail
|
|
|
|
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
|
|
SESSIONS_DIR="${HERMES_HOME}/sessions"
|
|
KNOWLEDGE_DIR="${KNOWLEDGE_DIR:-$HOME/.timmy/knowledge}"
|
|
MAX_TOKENS="${MAX_TOKENS:-80000}"
|
|
DRY_RUN=false
|
|
TOKENS_USED=0
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--max-tokens) MAX_TOKENS="$2"; shift 2 ;;
|
|
--dry-run) DRY_RUN=true; shift ;;
|
|
*) shift ;;
|
|
esac
|
|
done
|
|
|
|
log() { echo "[knowledge-mine $(date '+%H:%M:%S')] $*"; }
|
|
|
|
mkdir -p "$KNOWLEDGE_DIR"
|
|
|
|
# Find sessions from the last 24 hours
|
|
RECENT=$(find "$SESSIONS_DIR" -name 'session_*.json' -type f -mtime -1 2>/dev/null | sort | head -30)
|
|
SESSION_COUNT=$(echo "$RECENT" | grep -c '.' || echo 0)
|
|
|
|
log "Mining $SESSION_COUNT recent sessions"
|
|
|
|
EXTRACTED=0
|
|
for session_file in $RECENT; do
|
|
if [[ "$TOKENS_USED" -ge "$MAX_TOKENS" ]]; then
|
|
log "Token budget exhausted. Stopping."
|
|
break
|
|
fi
|
|
|
|
if $DRY_RUN; then
|
|
log "DRY-RUN: Would mine $(basename "$session_file")"
|
|
continue
|
|
fi
|
|
|
|
# Extract facts using Python
|
|
python3 -c "
|
|
import json, os
|
|
|
|
with open('$session_file') as f:
|
|
data = json.load(f)
|
|
|
|
messages = data.get('messages', [])
|
|
facts = []
|
|
for msg in messages:
|
|
if msg.get('role') == 'assistant':
|
|
content = msg.get('content', '')
|
|
# Look for memory saves, skill creates, config changes
|
|
if any(kw in content.lower() for kw in ['saved', 'created', 'deployed', 'fixed', 'merged', 'configured']):
|
|
facts.append({
|
|
'session': os.path.basename('$session_file'),
|
|
'summary': content[:300],
|
|
'type': 'action'
|
|
})
|
|
|
|
outpath = '$KNOWLEDGE_DIR/' + os.path.basename('$session_file').replace('.json', '.facts.json')
|
|
if facts:
|
|
with open(outpath, 'w') as f:
|
|
json.dump(facts, f, indent=2)
|
|
print(len(facts))
|
|
" 2>/dev/null && EXTRACTED=$((EXTRACTED + 1)) || true
|
|
|
|
TOKENS_USED=$((TOKENS_USED + 2000))
|
|
done
|
|
|
|
log "Done: $SESSION_COUNT sessions scanned, $EXTRACTED files with extractable knowledge, $TOKENS_USED tokens used."
|
|
exit 0
|