scripts-migrate-tasks-to-v2
#!/usr/bin/env python3 """
title: "Constants" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Migrate existing tasklist files to V2 work item hierarchy." keywords: ['analysis', 'database', 'deployment', 'migrate', 'tasks'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "migrate-tasks-to-v2.py" language: python executable: true usage: "python3 scripts/migrate-tasks-to-v2.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false
Migrate existing tasklist files to V2 work item hierarchy.
Parses markdown checkbox tasks and imports them into the work_items table following ADR-006 Work Item Hierarchy schema.
Usage: python3 scripts/migrate-tasks-to-v2.py --epic E001 --source docs/05-project-planning/sprints/ORCHESTRATOR-PROJECT-PLAN.md python3 scripts/migrate-tasks-to-v2.py --analyze # Show what would be imported python3 scripts/migrate-tasks-to-v2.py --all # Migrate all critical blocker files """
import argparse import json import re import sqlite3 from datetime import datetime, timezone from pathlib import Path from typing import Optional
Constants
CODITECT_CORE = Path(file).parent.parent V2_DIR = CODITECT_CORE / "docs" / "05-project-planning" / "v2"
ADR-114 & ADR-118: Use centralized path discovery
sys.path.insert(0, str(Path(file).parent / "core")) try: from paths import get_sessions_db_path, SESSIONS_DB CONTEXT_DB = SESSIONS_DB # Task data goes to sessions.db (Tier 3) except ImportError: # Fallback for backward compatibility _user_data = Path.home() / "PROJECTS" / ".coditect-data" / "context-storage" if _user_data.exists(): CONTEXT_DB = _user_data / "sessions.db" else: CONTEXT_DB = CODITECT_CORE / "context-storage" / "sessions.db"
Epic mapping from critical blockers
EPIC_MAPPING = { "E001": { "title": "Core Platform Autonomy", "files": [ "docs/05-project-planning/sprints/ORCHESTRATOR-PROJECT-PLAN.md", "docs/05-project-planning/COMPONENT-ACTIVATION-TASKLIST.md", ] }, "E002": { "title": "TOON Integration", "files": [ "docs/03-architecture/toon-integration/TOON-TESTING-MASTER.md", "docs/03-architecture/toon-integration/TOON-INTEGRATION-MASTER.md", ] }, "E003": { "title": "Documentation & Standards", "files": [ "docs/06-implementation-guides/processes/DOCUMENTATION-IMPROVEMENT-CHECKLIST.md", "docs/05-project-planning/documentation-project/DOCUMENTATION-REORGANIZATION-MASTER.md", ] }, "E004": { "title": "Distribution & Packaging", "files": [ "distribution/coditect-core-initial-installation-packaging/docs/architecture/CICD-BUILD-ORCHESTRATION-PLAN.md", "distribution/coditect-core-initial-installation-packaging/docs/deployment/RELEASE-CHECKLIST.md", "distribution/coditect-core-initial-installation-packaging/docs/architecture/CICD-INFRASTRUCTURE-GAP-ANALYSIS.md", ] }, "E005": { "title": "Executor Refactoring", "files": [ "docs/05-project-planning/orchestration/executor-refactoring/TASKLIST-EXECUTOR-REFACTORING.md", ] }, "E006": { "title": "Production Readiness", "files": [ "docs/05-project-planning/strategic/PRODUCTION-READINESS-ROADMAP.md", "docs/09-research-analysis/audits/PRODUCTION-READINESS-ASSESSMENT.md", "docs/10-special-topics/deduplication/DEDUPLICATION-SYSTEM-MASTER.md", ] }, "E007": { "title": "Training & Onboarding", "files": [ "docs/11-training-certification/CODITECT-OPERATOR-PROGRESS-TRACKER.md", "docs/01-getting-started/quick-starts/AZ1.AI-CODITECT-1-2-3-QUICKSTART.md", "docs/11-training-certification/1-2-3-CODITECT-ONBOARDING-GUIDE.md", ] }, "E008": { "title": "Memory Context System", "files": [ "docs/10-special-topics/memory-context/MEMORY-CONTEXT-COMPLETE.md", "docs/05-project-planning/sprints/SPRINT-1-MEMORY-CONTEXT-PROJECT-PLAN.md", ] }, "E009": { "title": "LMS Implementation", "files": [ "docs/09-research-analysis/LMS-IMPLEMENTATION-ROADMAP.md", "docs/09-research-analysis/LMS-DATABASE-DESIGN.md", ] }, "E010": { "title": "Hooks & Workflows", "files": [ "docs/06-implementation-guides/hooks/HOOKS-IMPLEMENTATION-STRATEGY.md", "workflows/templates/incident-response.template.md", ] }, }
def parse_tasks_from_markdown(filepath: Path) -> list[dict]: """ Parse markdown file for checkbox tasks.
Returns list of task dicts with:
- title: Task text
- status: 'completed' or 'backlog'
- source_line: Line number
- parent_header: Nearest H2/H3 header above
- indent_level: Indentation level (0=top, 1=subtask, etc.)
"""
tasks = []
current_h2 = None
current_h3 = None
current_phase = None
if not filepath.exists():
print(f" [SKIP] File not found: {filepath}")
return []
try:
content = filepath.read_text(encoding='utf-8')
except Exception as e:
print(f" [ERROR] Cannot read {filepath}: {e}")
return []
lines = content.split('\n')
for line_num, line in enumerate(lines, 1):
# Track headings
if line.startswith('## '):
current_h2 = line[3:].strip()
# Extract phase number if present
phase_match = re.search(r'Phase\s*(\d+)', current_h2, re.I)
if phase_match:
current_phase = phase_match.group(1)
current_h3 = None
elif line.startswith('### '):
current_h3 = line[4:].strip()
# Parse checkbox tasks
checkbox_match = re.match(r'^(\s*)- \[([ xX])\] (.+)$', line)
if checkbox_match:
indent = checkbox_match.group(1)
checked = checkbox_match.group(2).lower() == 'x'
title = checkbox_match.group(3).strip()
# Calculate indent level (2 spaces = 1 level)
indent_level = len(indent) // 2
# Determine task type
task_type = 'subtask' if indent_level > 0 else 'task'
# Extract task ID if present (e.g., "Task 1.1.1:" or "T001:")
task_id_match = re.match(r'^(?:Task\s+)?([A-Z]?\d+(?:\.\d+)*):?\s*(.+)$', title, re.I)
if task_id_match:
task_id = task_id_match.group(1)
title = task_id_match.group(2)
else:
task_id = None
tasks.append({
'title': title,
'task_id': task_id,
'status': 'completed' if checked else 'backlog',
'type': task_type,
'source_line': line_num,
'source_file': str(filepath.relative_to(CODITECT_CORE)),
'phase': current_phase,
'parent_header': current_h3 or current_h2,
'indent_level': indent_level,
})
return tasks
def analyze_migration(epic_id: Optional[str] = None) -> dict: """Analyze what would be migrated without making changes.""" results = { 'epics': {}, 'total_tasks': 0, 'total_completed': 0, 'total_pending': 0, }
epics_to_process = [epic_id] if epic_id else list(EPIC_MAPPING.keys())
for eid in epics_to_process:
if eid not in EPIC_MAPPING:
print(f"[WARN] Unknown epic: {eid}")
continue
epic_info = EPIC_MAPPING[eid]
epic_tasks = []
print(f"\n{eid}: {epic_info['title']}")
print("=" * 50)
for source_file in epic_info['files']:
filepath = CODITECT_CORE / source_file
tasks = parse_tasks_from_markdown(filepath)
pending = sum(1 for t in tasks if t['status'] == 'backlog')
completed = sum(1 for t in tasks if t['status'] == 'completed')
print(f" {source_file}")
print(f" Tasks: {len(tasks)} ({completed} done, {pending} pending)")
epic_tasks.extend(tasks)
total_pending = sum(1 for t in epic_tasks if t['status'] == 'backlog')
total_completed = sum(1 for t in epic_tasks if t['status'] == 'completed')
results['epics'][eid] = {
'title': epic_info['title'],
'total': len(epic_tasks),
'completed': total_completed,
'pending': total_pending,
'files': len(epic_info['files']),
}
results['total_tasks'] += len(epic_tasks)
results['total_completed'] += total_completed
results['total_pending'] += total_pending
print(f" TOTAL: {len(epic_tasks)} tasks ({total_completed} done, {total_pending} pending)")
return results
def generate_epic_tasklist(epic_id: str) -> str: """Generate V2 tasklist markdown for an epic.""" if epic_id not in EPIC_MAPPING: raise ValueError(f"Unknown epic: {epic_id}")
epic_info = EPIC_MAPPING[epic_id]
all_tasks = []
for source_file in epic_info['files']:
filepath = CODITECT_CORE / source_file
tasks = parse_tasks_from_markdown(filepath)
all_tasks.extend(tasks)
# Group by parent header
by_header = {}
for task in all_tasks:
header = task['parent_header'] or 'Uncategorized'
if header not in by_header:
by_header[header] = []
by_header[header].append(task)
# Generate markdown
lines = [
f"# {epic_id}: {epic_info['title']} - Task List",
"",
f"**Generated:** {datetime.now(timezone.utc).isoformat()}",
f"**Total Tasks:** {len(all_tasks)}",
f"**Completed:** {sum(1 for t in all_tasks if t['status'] == 'completed')}",
f"**Pending:** {sum(1 for t in all_tasks if t['status'] == 'backlog')}",
"",
"---",
"",
]
for header, tasks in by_header.items():
lines.append(f"## {header}")
lines.append("")
for task in tasks:
checkbox = "[x]" if task['status'] == 'completed' else "[ ]"
indent = " " * task['indent_level']
lines.append(f"{indent}- {checkbox} {task['title']}")
lines.append("")
lines.extend([
"---",
"",
"## Source Files",
"",
])
for source_file in epic_info['files']:
lines.append(f"- `{source_file}`")
return "\n".join(lines)
def insert_work_items(epic_id: str, dry_run: bool = True) -> int: """Insert parsed tasks into work_items table.""" if epic_id not in EPIC_MAPPING: raise ValueError(f"Unknown epic: {epic_id}")
epic_info = EPIC_MAPPING[epic_id]
all_tasks = []
for source_file in epic_info['files']:
filepath = CODITECT_CORE / source_file
tasks = parse_tasks_from_markdown(filepath)
all_tasks.extend(tasks)
if dry_run:
print(f"[DRY RUN] Would insert {len(all_tasks)} tasks for {epic_id}")
return len(all_tasks)
# Connect to database
if not CONTEXT_DB.exists():
print(f"[ERROR] Database not found: {CONTEXT_DB}")
return 0
conn = sqlite3.connect(CONTEXT_DB)
cursor = conn.cursor()
# Check if work_items table exists
cursor.execute("""
SELECT name FROM sqlite_master
WHERE type='table' AND name='work_items'
""")
if not cursor.fetchone():
print("[ERROR] work_items table not found. Run init-work-item-db.py first.")
conn.close()
return 0
inserted = 0
task_counter = 1
for task in all_tasks:
work_item_id = f"T{epic_id[1:]}.{task_counter:04d}"
try:
cursor.execute("""
INSERT OR REPLACE INTO work_items (
id, type, title, status, parent_id, project_id,
source_file, source_line, created_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
work_item_id,
task['type'],
task['title'],
task['status'],
epic_id,
"P001",
task['source_file'],
task['source_line'],
datetime.now(timezone.utc).isoformat()
))
inserted += 1
task_counter += 1
except Exception as e:
print(f"[ERROR] Failed to insert task: {e}")
conn.commit()
conn.close()
print(f"[OK] Inserted {inserted} tasks for {epic_id}")
return inserted
def main(): parser = argparse.ArgumentParser(description="Migrate tasks to V2 work item hierarchy") parser.add_argument("--epic", help="Epic ID to migrate (e.g., E001)") parser.add_argument("--source", help="Source file to parse (overrides epic default)") parser.add_argument("--analyze", action="store_true", help="Analyze without making changes") parser.add_argument("--all", action="store_true", help="Process all epics") parser.add_argument("--generate-tasklist", action="store_true", help="Generate V2 tasklist markdown") parser.add_argument("--insert-db", action="store_true", help="Insert into work_items table") parser.add_argument("--dry-run", action="store_true", help="Don't actually insert (with --insert-db)") parser.add_argument("--output", help="Output file for generated tasklist")
args = parser.parse_args()
if args.analyze or args.all:
print("=" * 60)
print("V2 TASK MIGRATION ANALYSIS")
print("=" * 60)
results = analyze_migration(args.epic if not args.all else None)
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
print(f"Total Epics: {len(results['epics'])}")
print(f"Total Tasks: {results['total_tasks']}")
print(f"Completed: {results['total_completed']}")
print(f"Pending: {results['total_pending']}")
# Save analysis
output_path = V2_DIR / "migration-analysis.json"
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f:
json.dump(results, f, indent=2)
print(f"\nAnalysis saved to: {output_path}")
elif args.generate_tasklist and args.epic:
print(f"Generating tasklist for {args.epic}...")
content = generate_epic_tasklist(args.epic)
if args.output:
output_path = Path(args.output)
else:
output_path = V2_DIR / "epics" / f"{args.epic}-*" / "TASKLIST.md"
# Find actual directory
epic_dirs = list((V2_DIR / "epics").glob(f"{args.epic}-*"))
if epic_dirs:
output_path = epic_dirs[0] / "TASKLIST.md"
else:
output_path = V2_DIR / f"{args.epic}-TASKLIST.md"
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(content)
print(f"Tasklist saved to: {output_path}")
elif args.insert_db and args.epic:
count = insert_work_items(args.epic, dry_run=args.dry_run)
print(f"{'Would insert' if args.dry_run else 'Inserted'} {count} work items")
else:
parser.print_help()
print("\nExamples:")
print(" python3 scripts/migrate-tasks-to-v2.py --analyze")
print(" python3 scripts/migrate-tasks-to-v2.py --all --analyze")
print(" python3 scripts/migrate-tasks-to-v2.py --epic E001 --generate-tasklist")
print(" python3 scripts/migrate-tasks-to-v2.py --epic E001 --insert-db --dry-run")
if name == "main": main()