#!/usr/bin/env python3 """ CODITECT Database Router (ADR-118)
Routes database queries to the correct database in the four-tier architecture. Centralizes table-to-database mapping for consistent ADR-118 compliance.
Usage: from scripts.core.db_router import get_db_for_table, get_connection, DatabaseRouter
# Simple: get path for a table
db_path = get_db_for_table('messages') # Returns sessions.db path
# Connection: get sqlite connection for a table
conn = get_connection('decisions') # Returns connection to org.db
# Multi-table: get connections for multiple tables
router = DatabaseRouter()
connections = router.get_connections_for_tables(['messages', 'decisions'])
ADR-118 Four-Tier Architecture: TIER 1 - platform.db: Component metadata (regenerable) TIER 2 - org.db: IRREPLACEABLE knowledge (decisions, learnings, errors) TIER 3 - sessions.db: REGENERABLE session data (messages, analytics) TIER 4 - projects.db: Project embeddings (regenerable) LEGACY - context.db: DEPRECATED (being migrated)
Created: 2026-01-29 ADR: ADR-118 Four-Tier Database Architecture """
import sqlite3 import logging import sys from pathlib import Path from typing import Dict, Optional, Set, Union, List from enum import Enum
Handle both module import and direct script execution
try: from scripts.core.paths import ( get_org_db_path, get_sessions_db_path, get_projects_db_path, get_context_db_path, get_context_storage_dir, ORG_DB, SESSIONS_DB, PROJECTS_DB, CONTEXT_DB, ) except ModuleNotFoundError: # Running as standalone script - add parent to path _script_dir = Path(file).resolve().parent _core_root = _script_dir.parent.parent if str(_core_root) not in sys.path: sys.path.insert(0, str(_core_root)) from scripts.core.paths import ( get_org_db_path, get_sessions_db_path, get_projects_db_path, get_context_db_path, get_context_storage_dir, ORG_DB, SESSIONS_DB, PROJECTS_DB, CONTEXT_DB, )
logger = logging.getLogger(name)
-----------------------------------------------------------------------------
Database Tier Enumeration
-----------------------------------------------------------------------------
class DatabaseTier(Enum): """ADR-118 database tiers.""" PLATFORM = "platform" # Tier 1: Component metadata ORG = "org" # Tier 2: IRREPLACEABLE knowledge SESSIONS = "sessions" # Tier 3: Regenerable session data PROJECTS = "projects" # Tier 4: Project embeddings LEGACY = "legacy" # DEPRECATED: context.db
-----------------------------------------------------------------------------
Table-to-Database Routing (ADR-118)
-----------------------------------------------------------------------------
Canonical table routing - SINGLE SOURCE OF TRUTH
TABLE_ROUTING: Dict[str, DatabaseTier] = { # ------------------------------------------------------------------------- # TIER 1: platform.db - Component Metadata (Regenerable) # ------------------------------------------------------------------------- "components": DatabaseTier.PLATFORM, "capabilities": DatabaseTier.PLATFORM, "component_search": DatabaseTier.PLATFORM, # FTS5 table "component_dependencies": DatabaseTier.PLATFORM, "component_health": DatabaseTier.PLATFORM,
# -------------------------------------------------------------------------
# TIER 2: org.db - IRREPLACEABLE Knowledge (CRITICAL BACKUP)
# -------------------------------------------------------------------------
"decisions": DatabaseTier.ORG,
"skill_learnings": DatabaseTier.ORG,
"error_solutions": DatabaseTier.ORG,
"adr_references": DatabaseTier.ORG,
"knowledge_graph": DatabaseTier.ORG,
"entity_mentions": DatabaseTier.ORG,
# ADR-151: Knowledge Graph Tables (Context Graph Evolution)
"kg_nodes": DatabaseTier.ORG, # Universal entity table (11 node types)
"kg_edges": DatabaseTier.ORG, # Typed relationships (12 edge types)
"kg_nodes_fts": DatabaseTier.ORG, # FTS5 full-text search over nodes
# -------------------------------------------------------------------------
# TIER 3: sessions.db - Regenerable Session Data
# -------------------------------------------------------------------------
"sessions": DatabaseTier.SESSIONS,
"messages": DatabaseTier.SESSIONS,
"message_search": DatabaseTier.SESSIONS, # FTS5 table
"tool_analytics": DatabaseTier.SESSIONS,
"token_economics": DatabaseTier.SESSIONS,
"activity_associations": DatabaseTier.SESSIONS,
"message_component_invocations": DatabaseTier.SESSIONS,
"code_patterns": DatabaseTier.SESSIONS,
"session_insights": DatabaseTier.SESSIONS,
"embeddings": DatabaseTier.SESSIONS,
"sync_queue": DatabaseTier.SESSIONS,
"task_tracking": DatabaseTier.SESSIONS,
"task_messages": DatabaseTier.SESSIONS,
# ADR-151 Phase 4: Call Graph Tables (Legacy + Memory)
# - call_graph_functions: MIGRATED to kg_nodes (node_type='function')
# - call_graph_edges: MIGRATED to kg_edges (edge_type='CALLS')
# - call_graph_memory: KEPT IN sessions.db (session-specific, ephemeral)
#
# Rationale for keeping call_graph_memory separate:
# 1. Ephemeral: Links functions to session messages (regenerable)
# 2. Session-scoped: Not global knowledge, specific to each session
# 3. Performance: Avoids cross-database joins for real-time memory context
# See: docs/reference/CALL-GRAPH-MIGRATION.md
"call_graph_functions": DatabaseTier.SESSIONS, # LEGACY - use kg_nodes
"call_graph_edges": DatabaseTier.SESSIONS, # LEGACY - use kg_edges
"call_graph_memory": DatabaseTier.SESSIONS, # KEPT - session-specific context
"call_graph_fts": DatabaseTier.SESSIONS, # LEGACY FTS5 table
# ADR-151 Phase 5: Context Graph Tables (Builder Service)
# Task-specific subgraph projections from knowledge graph backbone
# - context_graphs: Graph metadata and configuration
# - context_graph_nodes: Junction table linking graphs to kg_nodes (in org.db)
# - context_graph_usage: Analytics/audit for graph usage
# - context_graph_checkpoints: LangGraph checkpointer persistence
# See: scripts/context_graph/builder.py
"context_graphs": DatabaseTier.SESSIONS,
"context_graph_nodes": DatabaseTier.SESSIONS,
"context_graph_usage": DatabaseTier.SESSIONS,
"context_graph_checkpoints": DatabaseTier.SESSIONS,
# -------------------------------------------------------------------------
# TIER 4: projects.db - Project Embeddings (Regenerable)
# -------------------------------------------------------------------------
"projects": DatabaseTier.PROJECTS,
"content_hashes": DatabaseTier.PROJECTS,
"project_embeddings": DatabaseTier.PROJECTS,
"exclude_patterns": DatabaseTier.PROJECTS,
}
-----------------------------------------------------------------------------
Database Router Class
-----------------------------------------------------------------------------
class DatabaseRouter: """ Routes database operations to the correct database per ADR-118.
Provides:
- Table → Database path resolution
- Connection management with automatic routing
- Multi-database query support
NOTE: context.db is DEPRECATED - NO FALLBACK per ADR-118.
Unknown tables raise ValueError. Add new tables to TABLE_ROUTING.
Example:
router = DatabaseRouter()
# Get path for a table
path = router.get_db_path('messages') # Returns sessions.db
# Get connection for a table
conn = router.get_connection('decisions') # Connects to org.db
# Check which database a table belongs to
tier = router.get_tier('skill_learnings') # Returns DatabaseTier.ORG
"""
def __init__(self, enable_legacy_fallback: bool = False):
"""
Initialize the database router.
Args:
enable_legacy_fallback: DEPRECATED - always False per ADR-118.
context.db fallback is NOT ALLOWED.
Unknown tables raise ValueError.
"""
if enable_legacy_fallback:
logger.warning(
"enable_legacy_fallback=True is DEPRECATED per ADR-118. "
"context.db fallback is NOT ALLOWED. Ignoring flag."
)
self.enable_legacy_fallback = False # Always False per ADR-118
self._connections: Dict[DatabaseTier, sqlite3.Connection] = {}
# Database path mapping
self._db_paths: Dict[DatabaseTier, Path] = {
DatabaseTier.PLATFORM: get_context_storage_dir() / "platform.db",
DatabaseTier.ORG: get_org_db_path(),
DatabaseTier.SESSIONS: get_sessions_db_path(),
DatabaseTier.PROJECTS: get_projects_db_path(),
DatabaseTier.LEGACY: get_context_db_path(),
}
def get_tier(self, table: str) -> DatabaseTier:
"""
Get the database tier for a table.
Args:
table: Table name (e.g., 'messages', 'decisions')
Returns:
DatabaseTier enum value
Raises:
ValueError: If table not found in ADR-118 routing.
NO FALLBACK to context.db per ADR-118.
"""
tier = TABLE_ROUTING.get(table)
if tier is None:
# ADR-118: NO FALLBACK to context.db - always raise error
raise ValueError(
f"Table '{table}' not found in ADR-118 routing. "
f"Add to TABLE_ROUTING in scripts/core/db_router.py. "
f"context.db fallback is NOT ALLOWED per ADR-118."
)
return tier
def get_db_path(self, table: str) -> Path:
"""
Get the database file path for a table.
Args:
table: Table name
Returns:
Path to the database file
"""
tier = self.get_tier(table)
return self._db_paths[tier]
def get_connection(self, table: str, **kwargs) -> sqlite3.Connection:
"""
Get a database connection for a table.
Args:
table: Table name
**kwargs: Additional arguments passed to sqlite3.connect()
Returns:
sqlite3.Connection to the appropriate database
"""
db_path = self.get_db_path(table)
return sqlite3.connect(str(db_path), **kwargs)
def get_cached_connection(self, table: str) -> sqlite3.Connection:
"""
Get a cached database connection for a table.
Connections are cached per tier and reused. Use this for
multiple queries to the same database within a session.
Args:
table: Table name
Returns:
Cached sqlite3.Connection
"""
tier = self.get_tier(table)
if tier not in self._connections:
db_path = self._db_paths[tier]
self._connections[tier] = sqlite3.connect(str(db_path))
self._connections[tier].row_factory = sqlite3.Row
return self._connections[tier]
def get_connections_for_tables(
self,
tables: List[str]
) -> Dict[DatabaseTier, sqlite3.Connection]:
"""
Get connections for multiple tables (for cross-database queries).
Args:
tables: List of table names
Returns:
Dict mapping DatabaseTier to Connection for each unique tier
"""
connections = {}
for table in tables:
tier = self.get_tier(table)
if tier not in connections:
connections[tier] = self.get_cached_connection(table)
return connections
def get_tables_by_tier(self, tier: DatabaseTier) -> Set[str]:
"""
Get all tables belonging to a specific tier.
Args:
tier: DatabaseTier enum value
Returns:
Set of table names
"""
return {
table for table, t in TABLE_ROUTING.items()
if t == tier
}
def close_all(self):
"""Close all cached connections."""
for conn in self._connections.values():
conn.close()
self._connections.clear()
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit - close all connections."""
self.close_all()
return False
-----------------------------------------------------------------------------
Convenience Functions (Module-Level)
-----------------------------------------------------------------------------
Singleton router instance for convenience functions
_default_router: Optional[DatabaseRouter] = None
def _get_router() -> DatabaseRouter: """Get or create the default router instance.""" global _default_router if _default_router is None: _default_router = DatabaseRouter() return _default_router
def get_db_for_table(table: str) -> Path: """ Get the database path for a table (convenience function).
Args:
table: Table name (e.g., 'messages', 'decisions')
Returns:
Path to the appropriate database file
Example:
from scripts.core.db_router import get_db_for_table
db_path = get_db_for_table('messages') # Returns sessions.db path
"""
return _get_router().get_db_path(table)
def get_connection(table: str, **kwargs) -> sqlite3.Connection: """ Get a database connection for a table (convenience function).
Args:
table: Table name
**kwargs: Additional arguments for sqlite3.connect()
Returns:
sqlite3.Connection to the appropriate database
Example:
from scripts.core.db_router import get_connection
conn = get_connection('decisions') # Connects to org.db
cursor = conn.execute("SELECT * FROM decisions LIMIT 10")
"""
return _get_router().get_connection(table, **kwargs)
def get_tier_for_table(table: str) -> str: """ Get the tier name for a table (convenience function).
Args:
table: Table name
Returns:
Tier name as string ('platform', 'org', 'sessions', 'projects', 'legacy')
"""
return _get_router().get_tier(table).value
def is_irreplaceable_table(table: str) -> bool: """ Check if a table contains irreplaceable data (Tier 2).
Args:
table: Table name
Returns:
True if table is in org.db (CRITICAL backup required)
"""
return _get_router().get_tier(table) == DatabaseTier.ORG
def is_regenerable_table(table: str) -> bool: """ Check if a table contains regenerable data (Tier 1, 3, or 4).
Args:
table: Table name
Returns:
True if table can be regenerated from source data
"""
tier = _get_router().get_tier(table)
return tier in (DatabaseTier.PLATFORM, DatabaseTier.SESSIONS, DatabaseTier.PROJECTS)
-----------------------------------------------------------------------------
CLI for Testing
-----------------------------------------------------------------------------
if name == "main": import argparse import json
parser = argparse.ArgumentParser(
description="CODITECT Database Router (ADR-118)"
)
parser.add_argument(
"--table", "-t",
help="Get database for a specific table"
)
parser.add_argument(
"--list-tables", "-l",
action="store_true",
help="List all table routing"
)
parser.add_argument(
"--tier",
choices=["platform", "org", "sessions", "projects"],
help="List tables for a specific tier"
)
parser.add_argument(
"--json", "-j",
action="store_true",
help="Output as JSON"
)
parser.add_argument(
"--verify",
action="store_true",
help="Verify all database files exist"
)
args = parser.parse_args()
router = DatabaseRouter()
if args.table:
tier = router.get_tier(args.table)
db_path = router.get_db_path(args.table)
if args.json:
print(json.dumps({
"table": args.table,
"tier": tier.value,
"database": str(db_path),
"exists": db_path.exists(),
"irreplaceable": tier == DatabaseTier.ORG,
}))
else:
print(f"Table: {args.table}")
print(f"Tier: {tier.value} (Tier {list(DatabaseTier).index(tier) + 1})")
print(f"DB: {db_path}")
print(f"Exists: {db_path.exists()}")
if tier == DatabaseTier.ORG:
print("⚠️ IRREPLACEABLE - Requires CRITICAL backup")
elif args.list_tables:
by_tier = {}
for table, tier in sorted(TABLE_ROUTING.items()):
if tier.value not in by_tier:
by_tier[tier.value] = []
by_tier[tier.value].append(table)
if args.json:
print(json.dumps(by_tier, indent=2))
else:
print("ADR-118 Table Routing")
print("=" * 50)
for tier_name in ["platform", "org", "sessions", "projects"]:
tables = by_tier.get(tier_name, [])
tier_num = ["platform", "org", "sessions", "projects"].index(tier_name) + 1
critical = " (CRITICAL)" if tier_name == "org" else ""
print(f"\nTier {tier_num}: {tier_name}.db{critical}")
for t in sorted(tables):
print(f" - {t}")
elif args.tier:
tier_enum = DatabaseTier(args.tier)
tables = router.get_tables_by_tier(tier_enum)
if args.json:
print(json.dumps({"tier": args.tier, "tables": sorted(tables)}))
else:
print(f"Tables in {args.tier}.db:")
for t in sorted(tables):
print(f" - {t}")
elif args.verify:
print("Verifying ADR-118 database files...")
all_ok = True
for tier in DatabaseTier:
if tier == DatabaseTier.LEGACY:
continue # Skip deprecated
db_path = router._db_paths[tier]
exists = db_path.exists()
status = "✅" if exists else "❌"
print(f"{status} {tier.value}.db: {db_path}")
if not exists:
all_ok = False
if all_ok:
print("\n✅ All databases exist")
else:
print("\n❌ Some databases missing - run CODITECT-CORE-INITIAL-SETUP.py")
else:
parser.print_help()