scripts-learning-db-migrate
#!/usr/bin/env python3 """
title: "Get script directory for path resolution (works from any cwd)" component_type: script version: "1.0.0" audience: contributor status: stable summary: "CODITECT Learning Management System - Database Migration Script" keywords: ['analysis', 'automation', 'database', 'learning', 'migrate'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "learning-db-migrate.py" language: python executable: true usage: "python3 scripts/learning-db-migrate.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false
CODITECT Learning Management System - Database Migration Script
Migrates onboarding data from JSON to SQLite and creates LMS schema.
Usage: python3 scripts/learning-db-migrate.py --all python3 scripts/learning-db-migrate.py --create-tables python3 scripts/learning-db-migrate.py --migrate-progress python3 scripts/learning-db-migrate.py --migrate-telemetry python3 scripts/learning-db-migrate.py --seed-modules python3 scripts/learning-db-migrate.py --verify python3 scripts/learning-db-migrate.py --export-to-json
Version: 1.0.0 Author: CODITECT Core Team """
import argparse import json import sqlite3 import sys from datetime import datetime, timezone from pathlib import Path from typing import Optional, Dict, List, Any
Get script directory for path resolution (works from any cwd)
SCRIPT_DIR = Path(file).resolve().parent CORE_ROOT = SCRIPT_DIR.parent
ADR-114 & ADR-118: Use centralized path discovery
sys.path.insert(0, str(SCRIPT_DIR / "core")) try: from paths import get_org_db_path, ORG_DB DB_PATH = ORG_DB # LMS data goes to org.db (Tier 2 - irreplaceable user data) except ImportError: # Fallback for backward compatibility _user_data = Path.home() / "PROJECTS" / ".coditect-data" / "context-storage" if _user_data.exists(): ORG_DB = _user_data / "org.db" else: ORG_DB = Path.home() / ".coditect" / "context-storage" / "org.db" DB_PATH = ORG_DB # Backward compatibility alias PROGRESS_JSON = CORE_ROOT / ".coditect" / "onboarding-full-progress.json" TELEMETRY_JSONL = CORE_ROOT / ".coditect" / "telemetry" / "onboarding_events.jsonl" SCHEMA_SQL = CORE_ROOT / "docs" / "09-research-analysis" / "LMS-DATABASE-DESIGN.md"
Phase definitions (matches onboarding-progress-manager.py)
PHASES = { 0: {"name": "welcome", "title": "Welcome & Assessment", "duration_min": 3}, 1: {"name": "big_picture", "title": "The Big Picture", "duration_min": 2}, 2: {"name": "commands", "title": "Commands", "duration_min": 3, "badge": "command_explorer"}, 3: {"name": "agents", "title": "Agents", "duration_min": 5, "badge": "agent_invoker"}, 4: {"name": "skills", "title": "Skills", "duration_min": 3, "badge": "skill_spotter"}, 5: {"name": "scripts", "title": "Scripts", "duration_min": 2, "badge": "script_runner"}, 6: {"name": "tools", "title": "Tools", "duration_min": 2, "badge": "tool_aware"}, 7: {"name": "workflows", "title": "Workflows", "duration_min": 3, "badge": "workflow_designer"}, 8: {"name": "cookbook", "title": "Cookbook", "duration_min": 3, "badge": "recipe_finder"}, 9: {"name": "integration", "title": "Integration", "duration_min": 2}, 10: {"name": "final_check", "title": "Final Check", "duration_min": 2, "badge": "onboarding_complete"}, }
Badge definitions
BADGES = { "command_explorer": {"icon": "💻", "title": "Command Explorer", "description": "Mastered slash commands", "category": "skill"}, "agent_invoker": {"icon": "🤖", "title": "Agent Invoker", "description": "Successfully invoked agents with Task()", "category": "skill"}, "skill_spotter": {"icon": "⚡", "title": "Skill Spotter", "description": "Understands background automation", "category": "skill"}, "script_runner": {"icon": "📜", "title": "Script Runner", "description": "Ran Python scripts successfully", "category": "skill"}, "tool_aware": {"icon": "🔧", "title": "Tool Aware", "description": "Understands Claude's capabilities", "category": "skill"}, "workflow_designer": {"icon": "🔄", "title": "Workflow Designer", "description": "Can design multi-component workflows", "category": "skill"}, "recipe_finder": {"icon": "📖", "title": "Recipe Finder", "description": "Knows how to use the Cookbook", "category": "skill"}, "onboarding_complete": {"icon": "🎓", "title": "CODITECT Graduate", "description": "Completed full onboarding", "category": "milestone"}, "speed_learner": {"icon": "⚡", "title": "Speed Learner", "description": "Completed onboarding in under 20 min", "category": "achievement"}, "perfect_score": {"icon": "💯", "title": "Perfect Score", "description": "Got 7/7 on final quiz", "category": "achievement"}, }
def get_timestamp() -> str: """Get current ISO timestamp.""" return datetime.now(timezone.utc).isoformat()
def get_db() -> sqlite3.Connection: """Get database connection.""" if not DB_PATH.exists(): print(f"Error: Database not found at {DB_PATH}") print("Run: python3 scripts/context-db.py --index") sys.exit(1)
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
return conn
def create_tables(): """Create all LMS tables in sessions.db (ADR-118 Tier 3).""" print("Creating LMS tables in sessions.db (ADR-118 Tier 3)...")
conn = get_db()
# Schema DDL
schema = """
-- Learning Users Table
CREATE TABLE IF NOT EXISTS learning_users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id TEXT UNIQUE NOT NULL,
experience_level TEXT,
ai_familiarity TEXT,
learning_goal TEXT,
learning_style TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
last_activity TEXT DEFAULT CURRENT_TIMESTAMP,
onboarding_completed BOOLEAN DEFAULT 0,
certification_level TEXT,
total_time_seconds INTEGER DEFAULT 0,
total_modules_completed INTEGER DEFAULT 0,
total_badges_earned INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_users_user_id ON learning_users(user_id);
CREATE INDEX IF NOT EXISTS idx_users_experience ON learning_users(experience_level);
CREATE INDEX IF NOT EXISTS idx_users_last_activity ON learning_users(last_activity);
-- Learning Modules Table
CREATE TABLE IF NOT EXISTS learning_modules (
id INTEGER PRIMARY KEY AUTOINCREMENT,
module_key TEXT UNIQUE NOT NULL,
module_type TEXT NOT NULL,
category TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT,
difficulty_level TEXT,
estimated_duration_minutes INTEGER,
prerequisite_modules TEXT,
learning_objectives TEXT,
content_path TEXT,
has_quiz BOOLEAN DEFAULT 0,
has_exercise BOOLEAN DEFAULT 0,
badge_id TEXT,
sort_order INTEGER DEFAULT 0,
is_active BOOLEAN DEFAULT 1,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_modules_type ON learning_modules(module_type);
CREATE INDEX IF NOT EXISTS idx_modules_category ON learning_modules(category);
CREATE INDEX IF NOT EXISTS idx_modules_difficulty ON learning_modules(difficulty_level);
CREATE INDEX IF NOT EXISTS idx_modules_sort ON learning_modules(sort_order);
-- Learning Progress Table
CREATE TABLE IF NOT EXISTS learning_progress (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id TEXT NOT NULL,
module_id INTEGER NOT NULL,
status TEXT NOT NULL,
started_at TEXT,
completed_at TEXT,
time_spent_seconds INTEGER DEFAULT 0,
quiz_score INTEGER,
quiz_max_score INTEGER,
exercise_completed BOOLEAN DEFAULT 0,
review_needed BOOLEAN DEFAULT 0,
notes TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES learning_users(user_id) ON DELETE CASCADE,
FOREIGN KEY (module_id) REFERENCES learning_modules(id) ON DELETE CASCADE,
UNIQUE(user_id, module_id)
);
CREATE INDEX IF NOT EXISTS idx_progress_user ON learning_progress(user_id);
CREATE INDEX IF NOT EXISTS idx_progress_module ON learning_progress(module_id);
CREATE INDEX IF NOT EXISTS idx_progress_status ON learning_progress(status);
CREATE INDEX IF NOT EXISTS idx_progress_completed ON learning_progress(completed_at);
-- Learning Badges Table
CREATE TABLE IF NOT EXISTS learning_badges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
badge_key TEXT UNIQUE NOT NULL,
icon TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT,
badge_category TEXT,
rarity TEXT,
criteria_type TEXT,
criteria_data TEXT,
sort_order INTEGER DEFAULT 0,
is_active BOOLEAN DEFAULT 1,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_badges_key ON learning_badges(badge_key);
CREATE INDEX IF NOT EXISTS idx_badges_category ON learning_badges(badge_category);
-- Learning Achievements Table
CREATE TABLE IF NOT EXISTS learning_achievements (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id TEXT NOT NULL,
badge_id INTEGER NOT NULL,
earned_at TEXT DEFAULT CURRENT_TIMESTAMP,
module_id INTEGER,
notes TEXT,
FOREIGN KEY (user_id) REFERENCES learning_users(user_id) ON DELETE CASCADE,
FOREIGN KEY (badge_id) REFERENCES learning_badges(id) ON DELETE CASCADE,
FOREIGN KEY (module_id) REFERENCES learning_modules(id) ON DELETE SET NULL,
UNIQUE(user_id, badge_id)
);
CREATE INDEX IF NOT EXISTS idx_achievements_user ON learning_achievements(user_id);
CREATE INDEX IF NOT EXISTS idx_achievements_badge ON learning_achievements(badge_id);
CREATE INDEX IF NOT EXISTS idx_achievements_earned ON learning_achievements(earned_at);
-- Learning Analytics Events Table
CREATE TABLE IF NOT EXISTS learning_analytics_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
event_id TEXT UNIQUE NOT NULL,
user_id TEXT NOT NULL,
session_id TEXT,
event_type TEXT NOT NULL,
event_category TEXT,
module_id INTEGER,
badge_id INTEGER,
event_data TEXT,
timestamp TEXT DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES learning_users(user_id) ON DELETE CASCADE,
FOREIGN KEY (module_id) REFERENCES learning_modules(id) ON DELETE SET NULL,
FOREIGN KEY (badge_id) REFERENCES learning_badges(id) ON DELETE SET NULL
);
CREATE INDEX IF NOT EXISTS idx_events_user ON learning_analytics_events(user_id);
CREATE INDEX IF NOT EXISTS idx_events_type ON learning_analytics_events(event_type);
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON learning_analytics_events(timestamp);
CREATE INDEX IF NOT EXISTS idx_events_session ON learning_analytics_events(session_id);
"""
try:
conn.executescript(schema)
conn.commit()
print("✅ All LMS tables created successfully.")
# Verify tables
tables = conn.execute("""
SELECT name FROM sqlite_master
WHERE type='table' AND name LIKE 'learning_%'
ORDER BY name
""").fetchall()
print(f" Created {len(tables)} tables:")
for table in tables:
print(f" - {table['name']}")
except sqlite3.Error as e:
print(f"❌ Error creating tables: {e}")
sys.exit(1)
finally:
conn.close()
def seed_modules(): """Seed initial onboarding modules.""" print("Seeding onboarding modules...")
conn = get_db()
seeded = 0
for phase_num, phase_info in PHASES.items():
module_key = f"onboarding_phase_{phase_num}"
category = phase_info['name']
# Determine category from phase name
if 'command' in phase_info['name']:
category = 'commands'
elif 'agent' in phase_info['name']:
category = 'agents'
elif 'skill' in phase_info['name']:
category = 'skills'
elif 'script' in phase_info['name']:
category = 'scripts'
elif 'tool' in phase_info['name']:
category = 'tools'
elif 'workflow' in phase_info['name']:
category = 'workflows'
elif 'cookbook' in phase_info['name']:
category = 'cookbook'
else:
category = 'general'
try:
conn.execute("""
INSERT OR IGNORE INTO learning_modules
(module_key, module_type, category, title, description,
difficulty_level, estimated_duration_minutes, badge_id,
sort_order, is_active)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
module_key,
'onboarding',
category,
phase_info['title'],
f"Phase {phase_num}: {phase_info['title']}",
'beginner',
phase_info['duration_min'],
phase_info.get('badge'),
phase_num,
1
))
seeded += 1
except sqlite3.Error as e:
print(f" Warning: Could not seed {module_key}: {e}")
conn.commit()
conn.close()
print(f"✅ Seeded {seeded} onboarding modules.")
def seed_badges(): """Seed badge definitions.""" print("Seeding badges...")
conn = get_db()
seeded = 0
for badge_key, badge_info in BADGES.items():
try:
conn.execute("""
INSERT OR IGNORE INTO learning_badges
(badge_key, icon, title, description, badge_category,
rarity, sort_order, is_active)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
badge_key,
badge_info['icon'],
badge_info['title'],
badge_info['description'],
badge_info['category'],
'common' if badge_info['category'] == 'skill' else 'rare',
list(BADGES.keys()).index(badge_key),
1
))
seeded += 1
except sqlite3.Error as e:
print(f" Warning: Could not seed {badge_key}: {e}")
conn.commit()
conn.close()
print(f"✅ Seeded {seeded} badges.")
def migrate_progress(): """Migrate onboarding-full-progress.json to SQLite.""" if not PROGRESS_JSON.exists(): print("⏭️ No progress file found. Skipping migration.") return
print(f"Migrating progress from {PROGRESS_JSON}...")
with open(PROGRESS_JSON) as f:
progress = json.load(f)
conn = get_db()
# Create or update user
user_id = progress.get('started_at', 'default')
profile = progress.get('profile', {})
try:
conn.execute("""
INSERT OR REPLACE INTO learning_users
(user_id, experience_level, ai_familiarity, learning_goal, learning_style,
created_at, last_activity, onboarding_completed, total_time_seconds,
total_badges_earned)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
user_id,
profile.get('experience_level'),
profile.get('ai_familiarity'),
profile.get('learning_goal'),
profile.get('learning_style'),
progress.get('started_at'),
progress.get('last_activity'),
1 if progress.get('completed_at') else 0,
progress.get('total_time_seconds', 0),
len(progress.get('badges_earned', []))
))
print(f" ✅ Migrated user profile: {user_id}")
except sqlite3.Error as e:
print(f" ❌ Error migrating user: {e}")
conn.close()
return
# Migrate phases
phases = progress.get('phases', {})
migrated_phases = 0
for phase_key, phase_data in phases.items():
phase_num = int(phase_key.split('_')[0])
module_key = f"onboarding_phase_{phase_num}"
# Find module
module = conn.execute("""
SELECT id FROM learning_modules WHERE module_key = ?
""", (module_key,)).fetchone()
if not module:
print(f" ⚠️ Module not found: {module_key}")
continue
module_id = module['id']
try:
conn.execute("""
INSERT OR REPLACE INTO learning_progress
(user_id, module_id, status, started_at, completed_at,
time_spent_seconds, quiz_score, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
user_id,
module_id,
phase_data.get('status', 'pending'),
progress.get('started_at') if phase_data.get('status') != 'pending' else None,
phase_data.get('completed_at'),
phase_data.get('time_spent_seconds', 0),
phase_data.get('score'),
progress.get('last_activity')
))
migrated_phases += 1
except sqlite3.Error as e:
print(f" ⚠️ Error migrating phase {phase_num}: {e}")
print(f" ✅ Migrated {migrated_phases} phase records.")
# Migrate badges
migrated_badges = 0
for badge_key in progress.get('badges_earned', []):
badge = conn.execute("""
SELECT id FROM learning_badges WHERE badge_key = ?
""", (badge_key,)).fetchone()
if badge:
try:
conn.execute("""
INSERT OR IGNORE INTO learning_achievements
(user_id, badge_id, earned_at)
VALUES (?, ?, ?)
""", (user_id, badge['id'], progress.get('last_activity')))
migrated_badges += 1
except sqlite3.Error as e:
print(f" ⚠️ Error migrating badge {badge_key}: {e}")
print(f" ✅ Migrated {migrated_badges} badges.")
conn.commit()
conn.close()
print(f"✅ Progress migration complete for user {user_id}")
def migrate_telemetry(): """Migrate onboarding_events.jsonl to learning_analytics_events.""" if not TELEMETRY_JSONL.exists(): print("⏭️ No telemetry file found. Skipping migration.") return
print(f"Migrating telemetry from {TELEMETRY_JSONL}...")
conn = get_db()
migrated = 0
skipped = 0
with open(TELEMETRY_JSONL) as f:
for line in f:
try:
event = json.loads(line.strip())
except json.JSONDecodeError:
skipped += 1
continue
# Generate unique event ID
event_id = f"{event.get('timestamp', '')}_{event.get('event_type', '')}_{migrated}"
try:
conn.execute("""
INSERT OR IGNORE INTO learning_analytics_events
(event_id, user_id, session_id, event_type, event_category,
event_data, timestamp)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (
event_id,
event.get('session_id', 'default'),
event.get('session_id'),
event.get('event_type'),
'onboarding',
json.dumps(event.get('data', {})),
event.get('timestamp')
))
migrated += 1
except sqlite3.Error as e:
skipped += 1
conn.commit()
conn.close()
print(f"✅ Migrated {migrated} telemetry events ({skipped} skipped).")
def verify_migration(): """Verify migration success.""" print("\n📊 Migration Verification:\n")
conn = get_db()
checks = {
"Users": "SELECT COUNT(*) FROM learning_users",
"Modules": "SELECT COUNT(*) FROM learning_modules",
"Progress Records": "SELECT COUNT(*) FROM learning_progress",
"Badges": "SELECT COUNT(*) FROM learning_badges",
"Achievements": "SELECT COUNT(*) FROM learning_achievements",
"Analytics Events": "SELECT COUNT(*) FROM learning_analytics_events"
}
all_passed = True
for name, sql in checks.items():
try:
count = conn.execute(sql).fetchone()[0]
status = "✅" if count > 0 else "⚠️ "
print(f" {status} {name}: {count:,}")
if name in ["Modules", "Badges"] and count == 0:
all_passed = False
print(f" → Run --seed-modules to populate modules")
except sqlite3.Error as e:
print(f" ❌ {name}: Error - {e}")
all_passed = False
# Check data consistency
print("\n🔍 Data Consistency Checks:\n")
# Check if users have progress
user_progress = conn.execute("""
SELECT u.user_id, COUNT(p.id) as progress_count
FROM learning_users u
LEFT JOIN learning_progress p ON u.user_id = p.user_id
GROUP BY u.user_id
""").fetchall()
for row in user_progress:
print(f" User {row['user_id'][:20]}: {row['progress_count']} progress records")
conn.close()
if all_passed:
print("\n✅ Migration verification passed!")
else:
print("\n⚠️ Migration verification found issues. Review output above.")
def export_to_json(): """Export SQLite data back to JSON (for rollback).""" print("Exporting SQLite data to JSON...")
conn = get_db()
# Export user and progress
user = conn.execute("SELECT * FROM learning_users LIMIT 1").fetchone()
if not user:
print("❌ No user data found to export.")
conn.close()
return
user_id = user['user_id']
# Reconstruct progress JSON structure
progress = {
"version": "1.0.0",
"started_at": user['created_at'],
"last_activity": user['last_activity'],
"profile": {
"experience_level": user['experience_level'],
"ai_familiarity": user['ai_familiarity'],
"learning_goal": user['learning_goal'],
"learning_style": user['learning_style']
},
"phases": {},
"current_phase": 0,
"badges_earned": [],
"total_time_seconds": user['total_time_seconds'],
"completed_at": get_timestamp() if user['onboarding_completed'] else None
}
# Get progress records
progress_records = conn.execute("""
SELECT p.*, m.module_key
FROM learning_progress p
JOIN learning_modules m ON p.module_id = m.id
WHERE p.user_id = ? AND m.module_type = 'onboarding'
ORDER BY m.sort_order
""", (user_id,)).fetchall()
for record in progress_records:
# Extract phase number from module_key
phase_num = int(record['module_key'].split('_')[-1])
phase_key = f"{phase_num}_{PHASES[phase_num]['name']}"
progress['phases'][phase_key] = {
"status": record['status'],
"score": record['quiz_score'],
"completed_at": record['completed_at'],
"time_spent_seconds": record['time_spent_seconds']
}
if record['status'] == 'in_progress':
progress['current_phase'] = phase_num
# Get badges
badges = conn.execute("""
SELECT b.badge_key
FROM learning_achievements a
JOIN learning_badges b ON a.badge_id = b.id
WHERE a.user_id = ?
""", (user_id,)).fetchall()
progress['badges_earned'] = [b['badge_key'] for b in badges]
# Write to backup file
backup_path = PROGRESS_JSON.parent / f"onboarding-full-progress-backup-{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(backup_path, 'w') as f:
json.dump(progress, f, indent=2)
print(f"✅ Progress exported to {backup_path}")
conn.close()
def main(): parser = argparse.ArgumentParser( description="CODITECT LMS Database Migration Script", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples:
Complete migration (recommended)
python3 scripts/learning-db-migrate.py --all
Step-by-step migration
python3 scripts/learning-db-migrate.py --create-tables python3 scripts/learning-db-migrate.py --seed-modules python3 scripts/learning-db-migrate.py --migrate-progress python3 scripts/learning-db-migrate.py --verify
Rollback preparation
python3 scripts/learning-db-migrate.py --export-to-json """ )
parser.add_argument('--all', action='store_true',
help='Run all migration steps (create + seed + migrate + verify)')
parser.add_argument('--create-tables', action='store_true',
help='Create LMS tables in sessions.db (ADR-118 Tier 3)')
parser.add_argument('--seed-modules', action='store_true',
help='Seed onboarding modules and badges')
parser.add_argument('--migrate-progress', action='store_true',
help='Migrate onboarding-full-progress.json')
parser.add_argument('--migrate-telemetry', action='store_true',
help='Migrate onboarding_events.jsonl')
parser.add_argument('--verify', action='store_true',
help='Verify migration success')
parser.add_argument('--export-to-json', action='store_true',
help='Export SQLite data to JSON (for rollback)')
args = parser.parse_args()
# Run all steps if --all specified
if args.all:
create_tables()
seed_modules()
seed_badges()
migrate_progress()
migrate_telemetry()
verify_migration()
return
# Run individual steps
if args.create_tables:
create_tables()
if args.seed_modules:
seed_modules()
seed_badges()
if args.migrate_progress:
migrate_progress()
if args.migrate_telemetry:
migrate_telemetry()
if args.verify:
verify_migration()
if args.export_to_json:
export_to_json()
# Show help if no arguments
if not any([args.all, args.create_tables, args.seed_modules,
args.migrate_progress, args.migrate_telemetry,
args.verify, args.export_to_json]):
parser.print_help()
print("\nRecommended: Run with --all for complete migration")
if name == 'main': main()