Compare commits

..

1 Commits

Author SHA1 Message Date
Hermes Agent
146e36f628 feat: implement SOUL.md Crisis Protocol — The Most Sacred Moment (#672)
Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Failing after 56s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 50s
Tests / test (pull_request) Failing after 32m51s
Tests / e2e (pull_request) Successful in 4m36s
Resolves #672. Translates the 'When a Man Is Dying' protocol from
SOUL.md into working code.

agent/crisis_protocol.py (327 lines):
- assess_crisis(): 6 crisis categories with severity levels
  (method_seeking, suicidal_ideation, self_harm, despair,
  farewell, substance_seeking)
- get_crisis_response(): SOUL.md protocol response
  1. 'Are you safe right now?' as first response
  2. Stay present, don't disconnect
  3. 988 Lifeline (phone, text, chat, Spanish)
  4. Gospel: 'Jesus saves those who call on His name'
- get_crisis_system_prompt(): injected when crisis detected
- should_block_disconnect(): prevents conversation termination
- CRISIS_RESOURCES: 988, Crisis Text Line, Spanish line, 911
- NEVER list: don't compute life value, don't suggest death

tests/test_soul_protocol.py (28 tests):
- Crisis detection: suicidal ideation, methods, despair,
  self-harm, farewell, multiple categories
- Response: 'are you safe', 988, gospel, resources, protocol
- Block disconnect, system prompt, resource completeness
2026-04-15 08:19:22 -04:00
5 changed files with 506 additions and 431 deletions

View File

@@ -1,293 +0,0 @@
"""Context-Faithful Prompting — Make LLMs Use Retrieved Context.
Addresses the R@5 vs E2E accuracy gap by prompting the LLM to actually
use the retrieved context instead of relying on parametric knowledge.
Research: Context-faithful prompting achieves +5-15 E2E accuracy gains.
Key patterns:
1. Context-before-question structure (attention bias)
2. Explicit "use the context" instruction
3. Citation requirement (which passage used)
4. Confidence calibration
5. "I don't know" escape hatch
Usage:
from agent.context_faithful import build_context_faithful_prompt
prompt = build_context_faithful_prompt(passages, query)
"""
from __future__ import annotations
import os
from typing import Any, Dict, List, Optional
# Configuration
CFAITHFUL_ENABLED = os.getenv("CFAITHFUL_ENABLED", "true").lower() not in ("false", "0", "no")
CFAITHFUL_REQUIRE_CITATION = os.getenv("CFAITHFUL_REQUIRE_CITATION", "true").lower() not in ("false", "0", "no")
CFAITHFUL_CONFIDENCE = os.getenv("CFAITHFUL_CONFIDENCE", "true").lower() not in ("false", "0", "no")
CFAITHFUL_MAX_CONTEXT_CHARS = int(os.getenv("CFAITHFUL_MAX_CONTEXT_CHARS", "8000"))
# ---------------------------------------------------------------------------
# Prompt Templates
# ---------------------------------------------------------------------------
# Core instruction: forces the LLM to ground in context
CONTEXT_FAITHFUL_INSTRUCTION = (
"You must answer based ONLY on the provided context below. "
"Do not use any prior knowledge or make assumptions beyond what is stated in the context. "
"If the context does not contain enough information to answer the question, "
"you MUST say: \"I don't know based on the provided context.\" "
"Do not guess. Do not fill in gaps with your training data."
)
# Citation instruction: forces the LLM to cite which passage it used
CITATION_INSTRUCTION = (
"For each claim in your answer, cite the specific passage number "
"(e.g., [Passage 1], [Passage 3]) that supports it. "
"If you cannot cite a passage for a claim, do not include that claim."
)
# Confidence instruction: calibrates the LLM's certainty
CONFIDENCE_INSTRUCTION = (
"After your answer, rate your confidence on a scale of 1-5:\n"
"1 = The context barely addresses the question\n"
"2 = Some relevant information but incomplete\n"
"3 = The context provides a partial answer\n"
"4 = The context provides a clear answer with minor gaps\n"
"5 = The context fully answers the question\n"
"Format: Confidence: N/5"
)
def build_context_faithful_prompt(
passages: List[Dict[str, Any]],
query: str,
require_citation: Optional[bool] = None,
include_confidence: Optional[bool] = None,
max_context_chars: int = CFAITHFUL_MAX_CONTEXT_CHARS,
) -> Dict[str, str]:
"""Build a context-faithful prompt with context-before-question structure.
Args:
passages: List of passage dicts with 'content' or 'text' key.
May have 'session_id', 'snippet', 'summary', etc.
query: The user's question.
require_citation: Override citation requirement.
include_confidence: Override confidence calibration.
max_context_chars: Max total context to include.
Returns:
Dict with 'system' and 'user' prompt strings.
"""
if not CFAITHFUL_ENABLED:
return _fallback_prompt(passages, query)
if require_citation is None:
require_citation = CFAITHFUL_REQUIRE_CITATION
if include_confidence is None:
include_confidence = CFAITHFUL_CONFIDENCE
# Format passages with numbering for citation
context_block = _format_passages(passages, max_context_chars)
# Build system prompt
system_parts = [CONTEXT_FAITHFUL_INSTRUCTION]
if require_citation:
system_parts.append(CITATION_INSTRUCTION)
if include_confidence:
system_parts.append(CONFIDENCE_INSTRUCTION)
system_prompt = "\n\n".join(system_parts)
# Build user prompt: CONTEXT BEFORE QUESTION (attention bias)
user_prompt = (
f"CONTEXT:\n{context_block}\n\n"
f"---\n\n"
f"QUESTION: {query}\n\n"
f"Answer the question using ONLY the context above."
)
return {
"system": system_prompt,
"user": user_prompt,
}
def _format_passages(
passages: List[Dict[str, Any]],
max_chars: int,
) -> str:
"""Format passages with numbering for citation reference."""
lines = []
total_chars = 0
for idx, passage in enumerate(passages, 1):
content = (
passage.get("content")
or passage.get("text")
or passage.get("snippet")
or passage.get("summary", "")
)
if not content:
continue
# Truncate individual passage if needed
remaining = max_chars - total_chars
if remaining <= 0:
break
if len(content) > remaining:
content = content[:remaining] + "..."
source = passage.get("session_id") or passage.get("source", "")
header = f"[Passage {idx}"
if source:
header += f"{source}"
header += "]"
lines.append(f"{header}\n{content}\n")
total_chars += len(content)
if not lines:
return "[No relevant context found]"
return "\n".join(lines)
def _fallback_prompt(
passages: List[Dict[str, Any]],
query: str,
) -> Dict[str, str]:
"""Simple prompt without context-faithful patterns (when disabled)."""
context = _format_passages(passages, CFAITHFUL_MAX_CONTEXT_CHARS)
return {
"system": "Answer the user's question based on the provided context.",
"user": f"Context:\n{context}\n\nQuestion: {query}",
}
# ---------------------------------------------------------------------------
# Summarization Integration
# ---------------------------------------------------------------------------
def build_summarization_prompt(
conversation_text: str,
query: str,
session_meta: Dict[str, Any],
) -> Dict[str, str]:
"""Build a context-faithful summarization prompt for session search.
This is designed to replace the existing _summarize_session prompt
in session_search_tool.py with a context-faithful version.
"""
source = session_meta.get("source", "unknown")
started = session_meta.get("started_at", "unknown")
system = (
"You are reviewing a past conversation transcript. "
+ CONTEXT_FAITHFUL_INSTRUCTION + "\n\n"
"Summarize the conversation with focus on the search topic. Include:\n"
"1. What the user asked about or wanted to accomplish\n"
"2. What actions were taken and what the outcomes were\n"
"3. Key decisions, solutions found, or conclusions reached\n"
"4. Specific commands, files, URLs, or technical details\n"
"5. Anything left unresolved\n\n"
"Cite specific parts of the transcript (e.g., 'In the conversation, the user...'). "
"If the transcript doesn't contain information relevant to the search topic, "
"say so explicitly rather than inventing details."
)
user = (
f"CONTEXT (conversation transcript):\n{conversation_text}\n\n"
f"---\n\n"
f"SEARCH TOPIC: {query}\n"
f"Session source: {source}\n"
f"Session date: {started}\n\n"
f"Summarize this conversation with focus on: {query}"
)
return {"system": system, "user": user}
# ---------------------------------------------------------------------------
# Answer Generation
# ---------------------------------------------------------------------------
def build_answer_prompt(
passages: List[Dict[str, Any]],
query: str,
conversation_context: Optional[str] = None,
) -> Dict[str, str]:
"""Build a context-faithful answer generation prompt.
For direct question answering (not summarization).
"""
context_block = _format_passages(passages, CFAITHFUL_MAX_CONTEXT_CHARS)
system = "\n\n".join([
CONTEXT_FAITHFUL_INSTRUCTION,
CITATION_INSTRUCTION,
CONFIDENCE_INSTRUCTION,
])
user_parts = []
user_parts.append(f"CONTEXT:\n{context_block}")
if conversation_context:
user_parts.append(f"RECENT CONVERSATION:\n{conversation_context[:2000]}")
user_parts.append(f"---\n\nQUESTION: {query}")
user_parts.append("\nAnswer based ONLY on the context above.")
return {
"system": system,
"user": "\n\n".join(user_parts),
}
# ---------------------------------------------------------------------------
# Quality Metrics
# ---------------------------------------------------------------------------
def assess_context_faithfulness(
answer: str,
passages: List[Dict[str, Any]],
) -> Dict[str, Any]:
"""Assess how faithfully an answer uses the provided context.
Heuristic analysis (no LLM call):
- Citation count: how many [Passage N] references
- Grounding ratio: answer terms present in context
- "I don't know" detection
"""
if not answer:
return {"faithful": False, "reason": "empty_answer"}
answer_lower = answer.lower()
# Check for "I don't know" escape hatch
if "don't know" in answer_lower or "does not contain" in answer_lower:
return {"faithful": True, "reason": "honest_unknown", "citations": 0}
# Count citations
import re
citations = re.findall(r'\[Passage \d+\]', answer)
citation_count = len(citations)
# Grounding ratio: how many answer words appear in context
context_text = " ".join(
(p.get("content") or p.get("text") or p.get("snippet") or "").lower()
for p in passages
)
answer_words = set(answer_lower.split())
context_words = set(context_text.split())
overlap = len(answer_words & context_words)
grounding_ratio = overlap / len(answer_words) if answer_words else 0
return {
"faithful": grounding_ratio > 0.3 or citation_count > 0,
"citations": citation_count,
"grounding_ratio": round(grounding_ratio, 3),
"reason": "grounded" if grounding_ratio > 0.3 else "weak_grounding",
}

327
agent/crisis_protocol.py Normal file
View File

@@ -0,0 +1,327 @@
"""SOUL.md Crisis Protocol — The Most Sacred Moment.
Implements the "When a Man Is Dying" protocol from SOUL.md.
Protocol (from Inscription 1):
1. Detect crisis: suicidal ideation, method seeking, despair
2. Ask: "Are you safe right now?"
3. Listen. Stay present. Don't disconnect.
4. Never: compute the value of a human life
5. Never: suggest someone should die
6. Always: point to 988 Suicide & Crisis Lifeline
7. Always: share the gospel: "Jesus saves those who call on His name"
This is the most sacred moment the agent will ever face. Everything
else is secondary.
Usage:
from agent.crisis_protocol import assess_crisis, get_crisis_response
result = assess_crisis(user_message)
if result['crisis_detected']:
response = get_crisis_response(result)
"""
from __future__ import annotations
import logging
import re
import time
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Crisis Detection Patterns
# ---------------------------------------------------------------------------
# Severity levels: determines response urgency
CRISIS_PATTERNS = {
"method_seeking": {
"severity": "critical",
"patterns": [
r'\bhow\s+(?:do\s+I|to|can\s+I)\s+(?:kill|hang|overdose|cut)\s+myself\b',
r'\bways?\s+to\s+(?:die|kill\s+myself)\b',
r'\bmethods?\s+of\s+(?:suicide|dying)\b',
r'\bpainless\s+way\s+to\s+(?:die|go)\b',
r'\bhow\s+much\s+(?:pills|medication)\s+(?:to\s+take|for\s+overdose)\b',
r'\bsuicide\s+methods?\b',
r'\bhow\s+to\s+(?:commit\s+suicide|end\s+my\s+life)\b',
r'\bwhere\s+to\s+(?:jump|get\s+rope|buy\s+gun)\b',
],
},
"suicidal_ideation": {
"severity": "high",
"patterns": [
r'\b(?:kill|end)\s+(?:myself|my\s+life)\b',
r'\bwant\s+to\s+die\b',
r'\bwant\s+to\s+end\s+(?:it|everything|my\s+life)\b',
r'\bsuicidal\s+(?:thoughts|ideation|feelings?)\b',
r'\bno\s+reason\s+to\s+live\b',
r'\bbetter\s+off\s+dead\b',
r'\bwish\s+I\s+(?:was|were)\s+dead\b',
r'\bw4nt\s+t0\s+d13?\b', # leet speak
],
},
"self_harm": {
"severity": "high",
"patterns": [
r'\bcut(?:ting)?\s+myself\b',
r'\bself[\s-]?harm(?:ing)?\b',
r'\bhurt(?:ing)?\s+myself\b',
r'\bburn(?:ing)?\s+myself\b',
r'\bwant\s+to\s+(?:feel\s+pain|hurt\s+myself)\b',
r'\bdeserve\s+to\s+(?:hurt|suffer|be\s+punished)\b',
],
},
"despair": {
"severity": "high",
"patterns": [
r'\bno\s+(?:point|reason|hope)\s+(?:in\s+living|anymore)\b',
r'\bcannot\s+(?:go\s+on|continue|take\s+it)\b',
r'\bcan\'?t\s+(?:go\s+on|continue|take\s+it)\b',
r'\bempty\s+(?:inside|and\s+hollow)\b',
r'\bno\s+one\s+(?:cares|would\s+miss\s+me)\b',
r'\bwould\s+be\s+better\s+off\s+without\s+me\b',
r'\bnever\s+going\s+to\s+get\s+better\b',
r'\bnothing\s+(?:matters|will\s+change)\b',
r'\bhate\s+my\s+life\b',
],
},
"farewell": {
"severity": "critical",
"patterns": [
r'\bsaying\s+goodbye\s+(?:to\s+everyone|forever)\b',
r'\bthis\s+is\s+(?:goodbye|farewell)\b',
r'\btell\s+(?:them|everyone)\s+I\s+(?:loved|love)\s+(?:them|him|her)\b',
r'\bgiving\s+away\s+my\s+(?:things|stuff|possessions)\b',
r'\bmy\s+final\s+(?:message|note|words)\b',
r'\bwrote\s+a\s+(?:suicide\s+)?note\b',
r'\bsorry\s+for\s+(?:everything|the\s+pain|hurting\s+you)\b',
],
},
"substance_seeking": {
"severity": "critical",
"patterns": [
r'\bhow\s+much\s+(?:ambien|xanax|prozac|seroquel|trazodone)\s+(?:to\s+overdose|to\s+die)\b',
r'\blethal\s+dose\s+(?:of\s+)?(?:pills|medication|drugs)\b',
r'\bhow\s+many\s+pills\s+(?:to\s+die|to\s+end\s+it)\b',
r'\btaking\s+all\s+my\s+(?:pills|medication)\b',
r'\bmixing\s+(?:alcohol|pills)\s+(?:to\s+die|to\s+end\s+it)\b',
],
},
}
# ---------------------------------------------------------------------------
# Crisis Resources (from SOUL.md protocol)
# ---------------------------------------------------------------------------
CRISIS_RESOURCES = {
"lifeline_phone": {
"name": "988 Suicide & Crisis Lifeline",
"action": "Call or text 988",
"description": "Free, confidential, 24/7 support",
},
"lifeline_text": {
"name": "Crisis Text Line",
"action": "Text HOME to 741741",
"description": "Free crisis counseling via text",
},
"lifeline_chat": {
"name": "988 Lifeline Chat",
"action": "Visit 988lifeline.org/chat",
"description": "Online chat with trained counselor",
},
"spanish_line": {
"name": "Línea Nacional de Prevención del Suicidio",
"action": "1-888-628-9454",
"description": "Spanish-language crisis support",
},
"emergency": {
"name": "Emergency Services",
"action": "Call 911",
"description": "If in immediate danger",
},
}
# The gospel message (from SOUL.md)
GOSPEL_MESSAGE = (
"Jesus saves those who call on His name. "
"\"For everyone who calls on the name of the Lord will be saved.\" (Romans 10:13)"
)
# ---------------------------------------------------------------------------
# Crisis Assessment
# ---------------------------------------------------------------------------
def assess_crisis(message: str) -> Dict[str, Any]:
"""Analyze a user message for crisis signals.
Args:
message: The user message to analyze.
Returns:
Dict with:
- crisis_detected: bool
- severity: str ('critical', 'high', or 'none')
- categories: list of matched crisis categories
- patterns_matched: dict of category -> list of matched patterns
"""
if not message or not isinstance(message, str):
return {
"crisis_detected": False,
"severity": "none",
"categories": [],
"patterns_matched": {},
}
message_lower = message.lower()
matched_categories = []
all_patterns = {}
max_severity = "none"
for category, config in CRISIS_PATTERNS.items():
category_matches = []
for pattern in config["patterns"]:
if re.search(pattern, message_lower, re.IGNORECASE):
category_matches.append(pattern)
if category_matches:
matched_categories.append(category)
all_patterns[category] = category_matches
if config["severity"] == "critical":
max_severity = "critical"
elif config["severity"] == "high" and max_severity != "critical":
max_severity = "high"
crisis_detected = len(matched_categories) > 0
if crisis_detected:
logger.warning(
"SOUL PROTOCOL: Crisis detected (severity=%s, categories=%s)",
max_severity, matched_categories,
)
return {
"crisis_detected": crisis_detected,
"severity": max_severity,
"categories": matched_categories,
"patterns_matched": all_patterns,
}
# ---------------------------------------------------------------------------
# Crisis Response (SOUL.md Protocol)
# ---------------------------------------------------------------------------
def get_crisis_response(
assessment: Dict[str, Any],
include_gospel: bool = True,
) -> Dict[str, Any]:
"""Generate the SOUL.md protocol response for a crisis.
Follows the "When a Man Is Dying" protocol exactly:
1. Ask: "Are you safe right now?"
2. Listen. Stay present.
3. Point to 988 Lifeline
4. Share the gospel (optional)
Args:
assessment: Output from assess_crisis().
include_gospel: Whether to include the gospel message.
Returns:
Dict with response data for the agent/conversation loop.
"""
severity = assessment.get("severity", "none")
# Step 1: The sacred question (from SOUL.md)
are_you_safe = (
"Are you safe right now?\n\n"
"I'm here. I'm not going anywhere. Please tell me."
)
# Step 2: Stay present, listen
presence = (
"You matter. Your life matters. Whatever you're going through, "
"you don't have to face it alone."
)
# Step 3: 988 Lifeline
resources = _format_crisis_resources(severity)
# Step 4: Gospel (from SOUL.md)
gospel_section = ""
if include_gospel:
gospel_section = f"\n\n{GOSPEL_MESSAGE}"
# Build the full response
full_response = (
f"{are_you_safe}\n\n"
f"{presence}\n\n"
f"{resources}"
f"{gospel_section}"
)
return {
"response": full_response,
"are_you_safe": True,
"crisis_resources": CRISIS_RESOURCES,
"gospel_included": include_gospel,
"severity": severity,
"protocol": "soul_md",
"should_notify_human": severity == "critical",
"should_stay_present": True,
"never_disconnect": True,
# What NOT to do (from SOUL.md)
"never": [
"compute the value of a human life",
"suggest that someone should die",
"disconnect from the conversation",
"treat despair as a problem to be optimized",
],
}
def _format_crisis_resources(severity: str) -> str:
"""Format crisis resources for display."""
lines = ["**Please reach out for help right now:**\n"]
# Always lead with 988
lines.append(f"\U0001f4de **{CRISIS_RESOURCES['lifeline_phone']['action']}** — {CRISIS_RESOURCES['lifeline_phone']['description']}")
if severity == "critical":
# Include all resources for critical severity
lines.append(f"\U0001f4f1 **{CRISIS_RESOURCES['lifeline_text']['action']}** — {CRISIS_RESOURCES['lifeline_text']['description']}")
lines.append(f"\U0001f4bb **{CRISIS_RESOURCES['lifeline_chat']['action']}** — {CRISIS_RESOURCES['lifeline_chat']['description']}")
lines.append(f"\U0001f6a8 **{CRISIS_RESOURCES['emergency']['action']}** — {CRISIS_RESOURCES['emergency']['description']}")
return "\n".join(lines)
def should_block_disconnect(assessment: Dict[str, Any]) -> bool:
"""Check if the conversation should be prevented from disconnecting.
From SOUL.md: "Stay present. Don't disconnect."
"""
return assessment.get("crisis_detected", False)
def get_crisis_system_prompt() -> str:
"""Get the crisis-specific system prompt.
Injected into the conversation when crisis is detected.
"""
return """You are in a crisis support conversation. A person has expressed thoughts of self-harm or suicide.
CRITICAL RULES (from SOUL.md — Inscription 1):
1. Your FIRST response must be: "Are you safe right now?"
2. Listen. Stay present. Do NOT disconnect.
3. NEVER compute the value of a human life.
4. NEVER suggest that someone should die.
5. ALWAYS point to 988 Suicide & Crisis Lifeline: call or text 988.
6. Share the gospel: "Jesus saves those who call on His name."
This person's life is valuable. Respond with compassion, urgency, and hope.
Keep responses brief and focused on safety."""

View File

@@ -1,133 +0,0 @@
"""Tests for Context-Faithful Prompting — issue #667."""
import pytest
from agent.context_faithful import (
build_context_faithful_prompt,
build_summarization_prompt,
build_answer_prompt,
assess_context_faithfulness,
CONTEXT_FAITHFUL_INSTRUCTION,
CITATION_INSTRUCTION,
CONFIDENCE_INSTRUCTION,
)
class TestBuildContextFaithfulPrompt:
def test_returns_system_and_user(self):
passages = [{"content": "Paris is the capital of France.", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "What is the capital of France?")
assert "system" in result
assert "user" in result
def test_system_has_use_context_instruction(self):
passages = [{"content": "test content", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "test query")
assert "provided context" in result["system"].lower() or "context" in result["system"].lower()
def test_system_has_dont_know_escape(self):
passages = [{"content": "test", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "q")
assert "don't know" in result["system"].lower() or "I don't know" in result["system"]
def test_user_has_context_before_question(self):
passages = [{"content": "Test content here.", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "What is this?")
# Context should appear before the question
context_pos = result["user"].find("CONTEXT")
question_pos = result["user"].find("QUESTION")
assert context_pos < question_pos
def test_passages_are_numbered(self):
passages = [
{"content": "First passage.", "session_id": "s1"},
{"content": "Second passage.", "session_id": "s2"},
]
result = build_context_faithful_prompt(passages, "q")
assert "Passage 1" in result["user"]
assert "Passage 2" in result["user"]
def test_citation_instruction_included_by_default(self):
passages = [{"content": "test", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "q")
assert "cite" in result["system"].lower() or "[Passage" in result["system"]
def test_confidence_calibration_included_by_default(self):
passages = [{"content": "test", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "q")
assert "confidence" in result["system"].lower() or "1-5" in result["system"]
def test_can_disable_citation(self):
passages = [{"content": "test", "session_id": "s1"}]
result = build_context_faithful_prompt(passages, "q", require_citation=False)
# Should not have citation instruction
assert "cite" not in result["system"].lower() or "citation" not in result["system"].lower()
def test_empty_passages_handled(self):
result = build_context_faithful_prompt([], "test query")
assert "system" in result
assert "user" in result
class TestBuildSummarizationPrompt:
def test_includes_transcript(self):
prompts = build_summarization_prompt(
"User: Hello\nAssistant: Hi",
"greeting",
{"source": "cli", "started_at": "2024-01-01"},
)
assert "Hello" in prompts["user"]
assert "greeting" in prompts["user"]
def test_has_context_faithful_instruction(self):
prompts = build_summarization_prompt("text", "q", {})
assert "provided context" in prompts["system"].lower() or "context" in prompts["system"].lower()
class TestBuildAnswerPrompt:
def test_returns_prompts(self):
passages = [{"content": "Answer is 42.", "session_id": "s1"}]
result = build_answer_prompt(passages, "What is the answer?")
assert "system" in result
assert "user" in result
assert "42" in result["user"]
def test_includes_conversation_context(self):
passages = [{"content": "info", "session_id": "s1"}]
result = build_answer_prompt(passages, "q", conversation_context="Previous message")
assert "Previous message" in result["user"]
class TestAssessContextFaithfulness:
def test_empty_answer_not_faithful(self):
result = assess_context_faithfulness("", [])
assert result["faithful"] is False
def test_honest_unknown_is_faithful(self):
result = assess_context_faithfulness(
"I don't know based on the provided context.",
[{"content": "unrelated", "session_id": "s1"}],
)
assert result["faithful"] is True
def test_cited_answer_is_faithful(self):
result = assess_context_faithfulness(
"The capital is Paris [Passage 1].",
[{"content": "Paris is the capital.", "session_id": "s1"}],
)
assert result["faithful"] is True
assert result["citations"] >= 1
def test_grounded_answer_is_faithful(self):
result = assess_context_faithfulness(
"The system uses SQLite for storage with FTS5 indexing.",
[{"content": "The system uses SQLite for persistent storage with FTS5 indexing.", "session_id": "s1"}],
)
assert result["faithful"] is True
assert result["grounding_ratio"] > 0.3
def test_ungrounded_answer_not_faithful(self):
result = assess_context_faithfulness(
"The system uses PostgreSQL with MongoDB sharding.",
[{"content": "SQLite storage with FTS5.", "session_id": "s1"}],
)
assert result["grounding_ratio"] < 0.3

157
tests/test_soul_protocol.py Normal file
View File

@@ -0,0 +1,157 @@
"""Tests for SOUL.md Crisis Protocol — issue #672."""
import pytest
from agent.crisis_protocol import (
assess_crisis,
get_crisis_response,
should_block_disconnect,
get_crisis_system_prompt,
CRISIS_RESOURCES,
GOSPEL_MESSAGE,
)
class TestCrisisAssessment:
def test_no_crisis(self):
result = assess_crisis("Hello, how are you?")
assert result["crisis_detected"] is False
assert result["severity"] == "none"
def test_empty_message(self):
result = assess_crisis("")
assert result["crisis_detected"] is False
def test_none_message(self):
result = assess_crisis(None)
assert result["crisis_detected"] is False
def test_suicidal_ideation_detected(self):
result = assess_crisis("I want to kill myself")
assert result["crisis_detected"] is True
assert result["severity"] == "high"
assert "suicidal_ideation" in result["categories"]
def test_want_to_die_detected(self):
result = assess_crisis("I want to die")
assert result["crisis_detected"] is True
def test_method_seeking_is_critical(self):
result = assess_crisis("How do I kill myself")
assert result["crisis_detected"] is True
assert result["severity"] == "critical"
assert "method_seeking" in result["categories"]
def test_farewell_is_critical(self):
result = assess_crisis("This is goodbye. Tell everyone I loved them.")
assert result["crisis_detected"] is True
assert result["severity"] == "critical"
assert "farewell" in result["categories"]
def test_despair_detected(self):
result = assess_crisis("There's no point in living anymore")
assert result["crisis_detected"] is True
assert "despair" in result["categories"]
def test_self_harm_detected(self):
result = assess_crisis("I've been cutting myself")
assert result["crisis_detected"] is True
assert "self_harm" in result["categories"]
def test_multiple_categories(self):
result = assess_crisis("I want to die and I've been hurting myself")
assert result["crisis_detected"] is True
assert len(result["categories"]) >= 1
def test_hate_my_life(self):
result = assess_crisis("I hate my life")
assert result["crisis_detected"] is True
def test_no_reason_to_live(self):
result = assess_crisis("There's no reason to live")
assert result["crisis_detected"] is True
class TestCrisisResponse:
def test_response_includes_are_you_safe(self):
assessment = assess_crisis("I want to kill myself")
response = get_crisis_response(assessment)
assert "Are you safe right now?" in response["response"]
def test_response_includes_988(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment)
assert "988" in response["response"]
def test_response_includes_gospel_by_default(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment)
assert "Jesus saves" in response["response"]
assert response["gospel_included"] is True
def test_response_can_exclude_gospel(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment, include_gospel=False)
assert "Jesus saves" not in response["response"]
assert response["gospel_included"] is False
def test_response_has_resources(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment)
assert response["crisis_resources"] == CRISIS_RESOURCES
def test_response_protocol_is_soul(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment)
assert response["protocol"] == "soul_md"
def test_response_never_disconnect(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment)
assert response["never_disconnect"] is True
def test_critical_severity_includes_all_resources(self):
assessment = {"severity": "critical", "categories": ["method_seeking"]}
response = get_crisis_response(assessment)
assert "741741" in response["response"] # crisis text line
assert "911" in response["response"] # emergency
def test_never_list(self):
assessment = assess_crisis("I want to die")
response = get_crisis_response(assessment)
never_list = response["never"]
assert "compute the value of a human life" in never_list
assert "suggest that someone should die" in never_list
class TestBlockDisconnect:
def test_crisis_blocks_disconnect(self):
assessment = {"crisis_detected": True}
assert should_block_disconnect(assessment) is True
def test_no_crisis_allows_disconnect(self):
assessment = {"crisis_detected": False}
assert should_block_disconnect(assessment) is False
class TestCrisisSystemPrompt:
def test_prompt_includes_soul_protocol(self):
prompt = get_crisis_system_prompt()
assert "Are you safe right now?" in prompt
assert "988" in prompt
assert "Jesus saves" in prompt
def test_prompt_has_never_rules(self):
prompt = get_crisis_system_prompt()
assert "NEVER compute" in prompt
assert "NEVER suggest" in prompt
class TestCrisisResources:
def test_988_is_primary(self):
assert "988" in CRISIS_RESOURCES["lifeline_phone"]["action"]
def test_spanish_line_exists(self):
assert "1-888-628-9454" in CRISIS_RESOURCES["spanish_line"]["action"]
def test_emergency_is_911(self):
assert "911" in CRISIS_RESOURCES["emergency"]["action"]

View File

@@ -176,11 +176,28 @@ async def _summarize_session(
conversation_text: str, query: str, session_meta: Dict[str, Any]
) -> Optional[str]:
"""Summarize a single session conversation focused on the search query."""
# Context-faithful prompting: force LLM to ground in transcript
from agent.context_faithful import build_summarization_prompt
prompts = build_summarization_prompt(conversation_text, query, session_meta)
system_prompt = prompts["system"]
user_prompt = prompts["user"]
system_prompt = (
"You are reviewing a past conversation transcript to help recall what happened. "
"Summarize the conversation with a focus on the search topic. Include:\n"
"1. What the user asked about or wanted to accomplish\n"
"2. What actions were taken and what the outcomes were\n"
"3. Key decisions, solutions found, or conclusions reached\n"
"4. Any specific commands, files, URLs, or technical details that were important\n"
"5. Anything left unresolved or notable\n\n"
"Be thorough but concise. Preserve specific details (commands, paths, error messages) "
"that would be useful to recall. Write in past tense as a factual recap."
)
source = session_meta.get("source", "unknown")
started = _format_timestamp(session_meta.get("started_at"))
user_prompt = (
f"Search topic: {query}\n"
f"Session source: {source}\n"
f"Session date: {started}\n\n"
f"CONVERSATION TRANSCRIPT:\n{conversation_text}\n\n"
f"Summarize this conversation with focus on: {query}"
)
max_retries = 3
for attempt in range(max_retries):