forked from Rockachopa/Timmy-time-dashboard
Compare commits
1 Commits
feature/mo
...
kimi/issue
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
75a6a498b4 |
@@ -39,19 +39,21 @@ _DEFAULT_DB = Path("data/thoughts.db")
|
||||
# qwen3 and other reasoning models wrap chain-of-thought in <think> tags
|
||||
_THINK_TAG_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
|
||||
|
||||
# Sensitive patterns that must never be stored as facts
|
||||
_SENSITIVE_PATTERNS = [
|
||||
"token",
|
||||
"password",
|
||||
"secret",
|
||||
"api_key",
|
||||
"apikey",
|
||||
"credential",
|
||||
".config/",
|
||||
"/token",
|
||||
"access_token",
|
||||
"private_key",
|
||||
"ssh_key",
|
||||
# Sensitive patterns that must never be stored as facts.
|
||||
# Uses compiled regexes with word boundaries so that compound technical
|
||||
# terms like "max_tokens" or "num_tokens" are NOT falsely flagged.
|
||||
_SENSITIVE_RE = [
|
||||
re.compile(r"(?<![a-z_])token(?![a-z_])", re.IGNORECASE), # "token" but not "max_tokens"
|
||||
re.compile(r"password", re.IGNORECASE),
|
||||
re.compile(r"secret", re.IGNORECASE),
|
||||
re.compile(r"api_key", re.IGNORECASE),
|
||||
re.compile(r"apikey", re.IGNORECASE),
|
||||
re.compile(r"credential", re.IGNORECASE),
|
||||
re.compile(r"\.config/"),
|
||||
re.compile(r"/token\b"),
|
||||
re.compile(r"access_token", re.IGNORECASE),
|
||||
re.compile(r"private_key", re.IGNORECASE),
|
||||
re.compile(r"ssh_key", re.IGNORECASE),
|
||||
]
|
||||
|
||||
# Meta-observation phrases to filter out from distilled facts
|
||||
@@ -548,7 +550,7 @@ class ThinkingEngine:
|
||||
fact_lower = fact.lower()
|
||||
|
||||
# Block sensitive information
|
||||
if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS):
|
||||
if any(pat.search(fact) for pat in _SENSITIVE_RE):
|
||||
logger.warning("Distill: blocked sensitive fact: %s", fact[:60])
|
||||
continue
|
||||
|
||||
|
||||
@@ -1188,3 +1188,42 @@ def test_references_real_files_blocks_mixed(tmp_path):
|
||||
# Mix of real and fake files — should fail because of the fake one
|
||||
text = "Fix src/timmy/thinking.py and also src/timmy/nonexistent_module.py for the memory leak."
|
||||
assert ThinkingEngine._references_real_files(text) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sensitive-pattern regression: max_tokens must NOT be flagged (#625)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_sensitive_patterns_allow_max_tokens():
|
||||
"""_SENSITIVE_RE should not flag 'max_tokens' as sensitive (#625)."""
|
||||
from timmy.thinking import _SENSITIVE_RE
|
||||
|
||||
safe_facts = [
|
||||
"The cascade router passes max_tokens to Ollama provider.",
|
||||
"max_tokens=request.max_tokens in the completion call.",
|
||||
"num_tokens defaults to 2048.",
|
||||
"total_prompt_tokens is tracked in stats.",
|
||||
]
|
||||
for fact in safe_facts:
|
||||
assert not any(pat.search(fact) for pat in _SENSITIVE_RE), (
|
||||
f"False positive: {fact!r} was flagged as sensitive"
|
||||
)
|
||||
|
||||
|
||||
def test_sensitive_patterns_still_block_real_secrets():
|
||||
"""_SENSITIVE_RE should still block actual secrets."""
|
||||
from timmy.thinking import _SENSITIVE_RE
|
||||
|
||||
dangerous_facts = [
|
||||
"The token is abc123def456.",
|
||||
"Set password to hunter2.",
|
||||
"api_key = sk-live-xyz",
|
||||
"Found credential in .env file.",
|
||||
"access_token expired yesterday.",
|
||||
"private_key stored in vault.",
|
||||
]
|
||||
for fact in dangerous_facts:
|
||||
assert any(pat.search(fact) for pat in _SENSITIVE_RE), (
|
||||
f"Missed secret: {fact!r} was NOT flagged as sensitive"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user