- Add connectors/ directory with base infrastructure - Implement SourceEvent unified schema (source/account/thread/author/timestamp/content/attachments/raw_ref/hash/consent_scope) - Create BaseConnector abstract class with checkpoint/dedup/consent gates - Implement TwitterArchiveConnector for official Twitter/X data exports - Add run_connector.py CLI entry point - Add comprehensive test suite (13 tests, all passing) - Add connectors/README.md with usage docs - Add Makefile targets: test-connectors, run-connector, connectors-help - Reference parent EPIC #194 and issue #233 This is the foundational connector pack. Future work: Discord, Slack, WhatsApp, Notion, iMessage, Google.
51 lines
1.5 KiB
Python
51 lines
1.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
connectors/__init__.py — Sovereign personal archive connector pack.
|
|
|
|
Provides:
|
|
- BaseConnector: abstract base class for all connectors
|
|
- SourceEvent: unified event schema
|
|
- compute_event_hash, validate_event: utilities
|
|
- Registry: connector discovery and loading
|
|
|
|
Connectors:
|
|
- TwitterArchiveConnector: parse official Twitter/X archive exports
|
|
(Future: Discord, Slack, WhatsApp, Notion, iMessage, Google)
|
|
"""
|
|
|
|
from .base import BaseConnector
|
|
from .schema import (
|
|
SourceEvent,
|
|
compute_event_hash,
|
|
validate_event,
|
|
CONSENT_MEMORY_ONLY,
|
|
CONSENT_BOOTSTRAP,
|
|
CONSENT_TRAINING,
|
|
)
|
|
from .twitter_archive import TwitterArchiveConnector
|
|
|
|
# Auto-registry: map of connector name → class
|
|
_REGISTRY = {
|
|
"twitter_archive": TwitterArchiveConnector,
|
|
# Future connectors:
|
|
# "discord_archive": DiscordArchiveConnector,
|
|
# "slack_archive": SlackArchiveConnector,
|
|
# "whatsapp_archive": WhatsAppArchiveConnector,
|
|
# "notion_archive": NotionArchiveConnector,
|
|
# "imessage_archive": iMessageArchiveConnector,
|
|
# "google_archive": GoogleArchiveConnector,
|
|
}
|
|
|
|
|
|
def get_connector(name: str) -> type[BaseConnector]:
|
|
"""Get connector class by registry name."""
|
|
cls = _REGISTRY.get(name)
|
|
if cls is None:
|
|
raise ValueError(f"Unknown connector '{name}'. Available: {list(_REGISTRY.keys())}")
|
|
return cls
|
|
|
|
|
|
def list_connectors() -> list[str]:
|
|
"""List all registered connector names."""
|
|
return list(_REGISTRY.keys())
|