scripts-migrate-tasks-to-v2

#!/usr/bin/env python3 """

title: "Constants" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Migrate existing tasklist files to V2 work item hierarchy." keywords: ['analysis', 'database', 'deployment', 'migrate', 'tasks'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "migrate-tasks-to-v2.py" language: python executable: true usage: "python3 scripts/migrate-tasks-to-v2.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

Migrate existing tasklist files to V2 work item hierarchy.

Parses markdown checkbox tasks and imports them into the work_items table following ADR-006 Work Item Hierarchy schema.

Usage: python3 scripts/migrate-tasks-to-v2.py --epic E001 --source docs/05-project-planning/sprints/ORCHESTRATOR-PROJECT-PLAN.md python3 scripts/migrate-tasks-to-v2.py --analyze # Show what would be imported python3 scripts/migrate-tasks-to-v2.py --all # Migrate all critical blocker files """

import argparse import json import re import sqlite3 from datetime import datetime, timezone from pathlib import Path from typing import Optional

Constants

CODITECT_CORE = Path(file).parent.parent V2_DIR = CODITECT_CORE / "docs" / "05-project-planning" / "v2"

ADR-114 & ADR-118: Use centralized path discovery

sys.path.insert(0, str(Path(file).parent / "core")) try: from paths import get_sessions_db_path, SESSIONS_DB CONTEXT_DB = SESSIONS_DB # Task data goes to sessions.db (Tier 3) except ImportError: # Fallback for backward compatibility _user_data = Path.home() / "PROJECTS" / ".coditect-data" / "context-storage" if _user_data.exists(): CONTEXT_DB = _user_data / "sessions.db" else: CONTEXT_DB = CODITECT_CORE / "context-storage" / "sessions.db"

Epic mapping from critical blockers

EPIC_MAPPING = { "E001": { "title": "Core Platform Autonomy", "files": [ "docs/05-project-planning/sprints/ORCHESTRATOR-PROJECT-PLAN.md", "docs/05-project-planning/COMPONENT-ACTIVATION-TASKLIST.md", ] }, "E002": { "title": "TOON Integration", "files": [ "docs/03-architecture/toon-integration/TOON-TESTING-MASTER.md", "docs/03-architecture/toon-integration/TOON-INTEGRATION-MASTER.md", ] }, "E003": { "title": "Documentation & Standards", "files": [ "docs/06-implementation-guides/processes/DOCUMENTATION-IMPROVEMENT-CHECKLIST.md", "docs/05-project-planning/documentation-project/DOCUMENTATION-REORGANIZATION-MASTER.md", ] }, "E004": { "title": "Distribution & Packaging", "files": [ "distribution/coditect-core-initial-installation-packaging/docs/architecture/CICD-BUILD-ORCHESTRATION-PLAN.md", "distribution/coditect-core-initial-installation-packaging/docs/deployment/RELEASE-CHECKLIST.md", "distribution/coditect-core-initial-installation-packaging/docs/architecture/CICD-INFRASTRUCTURE-GAP-ANALYSIS.md", ] }, "E005": { "title": "Executor Refactoring", "files": [ "docs/05-project-planning/orchestration/executor-refactoring/TASKLIST-EXECUTOR-REFACTORING.md", ] }, "E006": { "title": "Production Readiness", "files": [ "docs/05-project-planning/strategic/PRODUCTION-READINESS-ROADMAP.md", "docs/09-research-analysis/audits/PRODUCTION-READINESS-ASSESSMENT.md", "docs/10-special-topics/deduplication/DEDUPLICATION-SYSTEM-MASTER.md", ] }, "E007": { "title": "Training & Onboarding", "files": [ "docs/11-training-certification/CODITECT-OPERATOR-PROGRESS-TRACKER.md", "docs/01-getting-started/quick-starts/AZ1.AI-CODITECT-1-2-3-QUICKSTART.md", "docs/11-training-certification/1-2-3-CODITECT-ONBOARDING-GUIDE.md", ] }, "E008": { "title": "Memory Context System", "files": [ "docs/10-special-topics/memory-context/MEMORY-CONTEXT-COMPLETE.md", "docs/05-project-planning/sprints/SPRINT-1-MEMORY-CONTEXT-PROJECT-PLAN.md", ] }, "E009": { "title": "LMS Implementation", "files": [ "docs/09-research-analysis/LMS-IMPLEMENTATION-ROADMAP.md", "docs/09-research-analysis/LMS-DATABASE-DESIGN.md", ] }, "E010": { "title": "Hooks & Workflows", "files": [ "docs/06-implementation-guides/hooks/HOOKS-IMPLEMENTATION-STRATEGY.md", "workflows/templates/incident-response.template.md", ] }, }

def parse_tasks_from_markdown(filepath: Path) -> list[dict]: """ Parse markdown file for checkbox tasks.

Returns list of task dicts with:
- title: Task text
- status: 'completed' or 'backlog'
- source_line: Line number
- parent_header: Nearest H2/H3 header above
- indent_level: Indentation level (0=top, 1=subtask, etc.)
"""
tasks = []
current_h2 = None
current_h3 = None
current_phase = None

if not filepath.exists():
    print(f"  [SKIP] File not found: {filepath}")
    return []

try:
    content = filepath.read_text(encoding='utf-8')
except Exception as e:
    print(f"  [ERROR] Cannot read {filepath}: {e}")
    return []

lines = content.split('\n')

for line_num, line in enumerate(lines, 1):
    # Track headings
    if line.startswith('## '):
        current_h2 = line[3:].strip()
        # Extract phase number if present
        phase_match = re.search(r'Phase\s*(\d+)', current_h2, re.I)
        if phase_match:
            current_phase = phase_match.group(1)
        current_h3 = None
    elif line.startswith('### '):
        current_h3 = line[4:].strip()

    # Parse checkbox tasks
    checkbox_match = re.match(r'^(\s*)- \[([ xX])\] (.+)$', line)
    if checkbox_match:
        indent = checkbox_match.group(1)
        checked = checkbox_match.group(2).lower() == 'x'
        title = checkbox_match.group(3).strip()

        # Calculate indent level (2 spaces = 1 level)
        indent_level = len(indent) // 2

        # Determine task type
        task_type = 'subtask' if indent_level > 0 else 'task'

        # Extract task ID if present (e.g., "Task 1.1.1:" or "T001:")
        task_id_match = re.match(r'^(?:Task\s+)?([A-Z]?\d+(?:\.\d+)*):?\s*(.+)$', title, re.I)
        if task_id_match:
            task_id = task_id_match.group(1)
            title = task_id_match.group(2)
        else:
            task_id = None

        tasks.append({
            'title': title,
            'task_id': task_id,
            'status': 'completed' if checked else 'backlog',
            'type': task_type,
            'source_line': line_num,
            'source_file': str(filepath.relative_to(CODITECT_CORE)),
            'phase': current_phase,
            'parent_header': current_h3 or current_h2,
            'indent_level': indent_level,
        })

return tasks

def analyze_migration(epic_id: Optional[str] = None) -> dict: """Analyze what would be migrated without making changes.""" results = { 'epics': {}, 'total_tasks': 0, 'total_completed': 0, 'total_pending': 0, }

epics_to_process = [epic_id] if epic_id else list(EPIC_MAPPING.keys())

for eid in epics_to_process:
    if eid not in EPIC_MAPPING:
        print(f"[WARN] Unknown epic: {eid}")
        continue

    epic_info = EPIC_MAPPING[eid]
    epic_tasks = []

    print(f"\n{eid}: {epic_info['title']}")
    print("=" * 50)

    for source_file in epic_info['files']:
        filepath = CODITECT_CORE / source_file
        tasks = parse_tasks_from_markdown(filepath)

        pending = sum(1 for t in tasks if t['status'] == 'backlog')
        completed = sum(1 for t in tasks if t['status'] == 'completed')

        print(f"  {source_file}")
        print(f"    Tasks: {len(tasks)} ({completed} done, {pending} pending)")

        epic_tasks.extend(tasks)

    total_pending = sum(1 for t in epic_tasks if t['status'] == 'backlog')
    total_completed = sum(1 for t in epic_tasks if t['status'] == 'completed')

    results['epics'][eid] = {
        'title': epic_info['title'],
        'total': len(epic_tasks),
        'completed': total_completed,
        'pending': total_pending,
        'files': len(epic_info['files']),
    }
    results['total_tasks'] += len(epic_tasks)
    results['total_completed'] += total_completed
    results['total_pending'] += total_pending

    print(f"  TOTAL: {len(epic_tasks)} tasks ({total_completed} done, {total_pending} pending)")

return results

def generate_epic_tasklist(epic_id: str) -> str: """Generate V2 tasklist markdown for an epic.""" if epic_id not in EPIC_MAPPING: raise ValueError(f"Unknown epic: {epic_id}")

epic_info = EPIC_MAPPING[epic_id]
all_tasks = []

for source_file in epic_info['files']:
    filepath = CODITECT_CORE / source_file
    tasks = parse_tasks_from_markdown(filepath)
    all_tasks.extend(tasks)

# Group by parent header
by_header = {}
for task in all_tasks:
    header = task['parent_header'] or 'Uncategorized'
    if header not in by_header:
        by_header[header] = []
    by_header[header].append(task)

# Generate markdown
lines = [
    f"# {epic_id}: {epic_info['title']} - Task List",
    "",
    f"**Generated:** {datetime.now(timezone.utc).isoformat()}",
    f"**Total Tasks:** {len(all_tasks)}",
    f"**Completed:** {sum(1 for t in all_tasks if t['status'] == 'completed')}",
    f"**Pending:** {sum(1 for t in all_tasks if t['status'] == 'backlog')}",
    "",
    "---",
    "",
]

for header, tasks in by_header.items():
    lines.append(f"## {header}")
    lines.append("")

    for task in tasks:
        checkbox = "[x]" if task['status'] == 'completed' else "[ ]"
        indent = "  " * task['indent_level']
        lines.append(f"{indent}- {checkbox} {task['title']}")

    lines.append("")

lines.extend([
    "---",
    "",
    "## Source Files",
    "",
])
for source_file in epic_info['files']:
    lines.append(f"- `{source_file}`")

return "\n".join(lines)

def insert_work_items(epic_id: str, dry_run: bool = True) -> int: """Insert parsed tasks into work_items table.""" if epic_id not in EPIC_MAPPING: raise ValueError(f"Unknown epic: {epic_id}")

epic_info = EPIC_MAPPING[epic_id]
all_tasks = []

for source_file in epic_info['files']:
    filepath = CODITECT_CORE / source_file
    tasks = parse_tasks_from_markdown(filepath)
    all_tasks.extend(tasks)

if dry_run:
    print(f"[DRY RUN] Would insert {len(all_tasks)} tasks for {epic_id}")
    return len(all_tasks)

# Connect to database
if not CONTEXT_DB.exists():
    print(f"[ERROR] Database not found: {CONTEXT_DB}")
    return 0

conn = sqlite3.connect(CONTEXT_DB)
cursor = conn.cursor()

# Check if work_items table exists
cursor.execute("""
    SELECT name FROM sqlite_master
    WHERE type='table' AND name='work_items'
""")
if not cursor.fetchone():
    print("[ERROR] work_items table not found. Run init-work-item-db.py first.")
    conn.close()
    return 0

inserted = 0
task_counter = 1

for task in all_tasks:
    work_item_id = f"T{epic_id[1:]}.{task_counter:04d}"

    try:
        cursor.execute("""
            INSERT OR REPLACE INTO work_items (
                id, type, title, status, parent_id, project_id,
                source_file, source_line, created_at
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            work_item_id,
            task['type'],
            task['title'],
            task['status'],
            epic_id,
            "P001",
            task['source_file'],
            task['source_line'],
            datetime.now(timezone.utc).isoformat()
        ))
        inserted += 1
        task_counter += 1
    except Exception as e:
        print(f"[ERROR] Failed to insert task: {e}")

conn.commit()
conn.close()

print(f"[OK] Inserted {inserted} tasks for {epic_id}")
return inserted

def main(): parser = argparse.ArgumentParser(description="Migrate tasks to V2 work item hierarchy") parser.add_argument("--epic", help="Epic ID to migrate (e.g., E001)") parser.add_argument("--source", help="Source file to parse (overrides epic default)") parser.add_argument("--analyze", action="store_true", help="Analyze without making changes") parser.add_argument("--all", action="store_true", help="Process all epics") parser.add_argument("--generate-tasklist", action="store_true", help="Generate V2 tasklist markdown") parser.add_argument("--insert-db", action="store_true", help="Insert into work_items table") parser.add_argument("--dry-run", action="store_true", help="Don't actually insert (with --insert-db)") parser.add_argument("--output", help="Output file for generated tasklist")

args = parser.parse_args()

if args.analyze or args.all:
    print("=" * 60)
    print("V2 TASK MIGRATION ANALYSIS")
    print("=" * 60)

    results = analyze_migration(args.epic if not args.all else None)

    print("\n" + "=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"Total Epics: {len(results['epics'])}")
    print(f"Total Tasks: {results['total_tasks']}")
    print(f"Completed: {results['total_completed']}")
    print(f"Pending: {results['total_pending']}")

    # Save analysis
    output_path = V2_DIR / "migration-analysis.json"
    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, 'w') as f:
        json.dump(results, f, indent=2)
    print(f"\nAnalysis saved to: {output_path}")

elif args.generate_tasklist and args.epic:
    print(f"Generating tasklist for {args.epic}...")
    content = generate_epic_tasklist(args.epic)

    if args.output:
        output_path = Path(args.output)
    else:
        output_path = V2_DIR / "epics" / f"{args.epic}-*" / "TASKLIST.md"
        # Find actual directory
        epic_dirs = list((V2_DIR / "epics").glob(f"{args.epic}-*"))
        if epic_dirs:
            output_path = epic_dirs[0] / "TASKLIST.md"
        else:
            output_path = V2_DIR / f"{args.epic}-TASKLIST.md"

    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(content)
    print(f"Tasklist saved to: {output_path}")

elif args.insert_db and args.epic:
    count = insert_work_items(args.epic, dry_run=args.dry_run)
    print(f"{'Would insert' if args.dry_run else 'Inserted'} {count} work items")

else:
    parser.print_help()
    print("\nExamples:")
    print("  python3 scripts/migrate-tasks-to-v2.py --analyze")
    print("  python3 scripts/migrate-tasks-to-v2.py --all --analyze")
    print("  python3 scripts/migrate-tasks-to-v2.py --epic E001 --generate-tasklist")
    print("  python3 scripts/migrate-tasks-to-v2.py --epic E001 --insert-db --dry-run")

if name == "main": main()

#!/usr/bin/env python3 """​

Constants

ADR-114 & ADR-118: Use centralized path discovery

Epic mapping from critical blockers

#!/usr/bin/env python3 """