#!/usr/bin/env python3 """ Map CODITECT skills to CEF (CODITECT Experience Framework) tracks.

Usage: python3 scripts/map-skills-to-cef.py [--update] [--report]

Examples: python3 scripts/map-skills-to-cef.py --report # Generate mapping report python3 scripts/map-skills-to-cef.py --update # Update skill frontmatter """

import argparse import json import re import sys from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Set, Tuple

CEF Track Definitions

CEF_TRACKS = { # Creation Tracks "C-1": { "name": "Architecture & Design", "keywords": ["architecture", "design", "system", "api", "database", "pattern"], "pilot_mapping": ["A"], "weight": 1.0, }, "C-2": { "name": "Implementation & Development", "keywords": ["development", "coding", "implementation", "frontend", "backend", "component"], "pilot_mapping": ["A", "B", "I"], "weight": 1.0, }, "C-3": { "name": "Content & Media Creation", "keywords": ["content", "media", "documentation", "writing", "video", "audio"], "pilot_mapping": ["F"], "weight": 1.0, }, # Operations Tracks "O-1": { "name": "Infrastructure & Deployment", "keywords": ["infrastructure", "deployment", "devops", "cloud", "kubernetes", "docker"], "pilot_mapping": ["C"], "weight": 1.0, }, "O-2": { "name": "Systems & Processes", "keywords": ["process", "workflow", "automation", "system", "operation"], "pilot_mapping": ["C", "K"], "weight": 1.0, }, "O-3": { "name": "Management & Coordination", "keywords": ["management", "coordination", "project", "team", "planning"], "pilot_mapping": [], "weight": 1.0, }, # Intelligence Tracks "I-1": { "name": "Analysis & Research", "keywords": ["analysis", "research", "investigation", "data", "query"], "pilot_mapping": ["J"], "weight": 1.0, }, "I-2": { "name": "Decision & Strategy", "keywords": ["decision", "strategy", "architecture decision", "adr", "planning"], "pilot_mapping": ["H"], "weight": 1.0, }, "I-3": { "name": "Optimization & Performance", "keywords": ["optimization", "performance", "efficiency", "benchmark", "profiling"], "pilot_mapping": ["H"], "weight": 1.0, }, # Communication Tracks "CM-1": { "name": "Documentation & Knowledge", "keywords": ["documentation", "knowledge", "wiki", "technical writing"], "pilot_mapping": ["F"], "weight": 1.0, }, "CM-2": { "name": "Teaching & Training", "keywords": ["teaching", "training", "mentoring", "guide", "onboarding"], "pilot_mapping": ["F"], "weight": 1.0, }, "CM-3": { "name": "Collaboration & Community", "keywords": ["collaboration", "community", "communication", "team"], "pilot_mapping": [], "weight": 1.0, }, # Foundation Tracks "F-1": { "name": "Quality & Testing", "keywords": ["testing", "quality", "qa", "validation", "verification", "test"], "pilot_mapping": ["E", "L"], "weight": 1.0, }, "F-2": { "name": "Security & Privacy", "keywords": ["security", "privacy", "authentication", "encryption", "auth"], "pilot_mapping": ["D", "M"], "weight": 1.0, }, "F-3": { "name": "Compliance & Standards", "keywords": ["compliance", "standards", "governance", "regulatory", "audit"], "pilot_mapping": [], "weight": 1.0, }, # Growth Tracks "G-1": { "name": "Learning & Development", "keywords": ["learning", "development", "skill", "training", "education", "growth"], "pilot_mapping": ["F"], "weight": 1.0, }, "G-2": { "name": "Scaling & Expansion", "keywords": ["scaling", "expansion", "growth", "capacity"], "pilot_mapping": ["H"], "weight": 1.0, }, "G-3": { "name": "Evolution & Adaptation", "keywords": ["evolution", "adaptation", "modernization", "transformation"], "pilot_mapping": ["H"], "weight": 1.0, }, # Protection Tracks "P-1": { "name": "Monitoring & Observability", "keywords": ["monitoring", "observability", "metrics", "logging", "alerting"], "pilot_mapping": ["J"], "weight": 1.0, }, "P-2": { "name": "Auditing & Review", "keywords": ["audit", "review", "assessment", "evaluation", "inspection"], "pilot_mapping": [], "weight": 1.0, }, "P-3": { "name": "Governance & Control", "keywords": ["governance", "control", "access", "rbac", "permissions"], "pilot_mapping": [], "weight": 1.0, }, # Integration Tracks "IN-1": { "name": "Tool Integration", "keywords": ["integration", "api", "connector", "mcp", "tool"], "pilot_mapping": ["T"], "weight": 1.0, }, "IN-2": { "name": "Workflow Automation", "keywords": ["automation", "workflow", "n8n", "orchestration"], "pilot_mapping": ["K"], "weight": 1.0, }, "IN-3": { "name": "Data & Context Flow", "keywords": ["context", "data flow", "synchronization", "memory"], "pilot_mapping": ["J"], "weight": 1.0, }, # Experience Tracks "E-1": { "name": "User Interface & Design", "keywords": ["ui", "interface", "design", "frontend", "component", "react"], "pilot_mapping": ["B", "I"], "weight": 1.0, }, "E-2": { "name": "Interaction & Engagement", "keywords": ["interaction", "engagement", "ux", "user experience"], "pilot_mapping": [], "weight": 1.0, }, "E-3": { "name": "Journey & Experience Design", "keywords": ["journey", "experience design", "service design", "touchpoint"], "pilot_mapping": [], "weight": 1.0, }, }

@dataclass class SkillMapping: """Represents a skill's CEF mapping.""" skill_path: Path name: str description: str primary_track: str secondary_tracks: List[str] confidence: float pilot_track: Optional[str] = None

class CEFMapper: """Maps skills to CEF tracks."""

def __init__(self, skills_dir: Path = None):
    self.skills_dir = skills_dir or Path("skills")
    self.mappings: List[SkillMapping] = []
    self.track_counts: Dict[str, int] = defaultdict(int)
    
def _parse_frontmatter(self, skill_path: Path) -> Dict:
    """Extract frontmatter from SKILL.md."""
    content = skill_path.read_text()
    
    # Match YAML frontmatter
    match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
    if not match:
        return {}
    
    frontmatter = match.group(1)
    metadata = {}
    
    # Parse key-value pairs
    for line in frontmatter.split('\n'):
        if ':' in line and not line.strip().startswith('#'):
            key, value = line.split(':', 1)
            key = key.strip()
            value = value.strip().strip('"\'')
            
            if key == 'tags':
                # Handle list format
                continue
            metadata[key] = value
    
    # Extract tags separately
    tags_match = re.search(r'tags:\s*\n((?:\s*-\s*\S+\n?)*)', frontmatter)
    if tags_match:
        tags_text = tags_match.group(1)
        metadata['tags'] = [t.strip('- ') for t in tags_text.split('\n') if t.strip()]
    
    # Use title as fallback for name (skills should have both)
    if 'name' not in metadata and 'title' in metadata:
        metadata['name'] = metadata['title']
    if 'title' not in metadata and 'name' in metadata:
        metadata['title'] = metadata['name']
    
    return metadata

def _calculate_track_scores(self, metadata: Dict) -> Dict[str, float]:
    """Calculate relevance scores for each CEF track."""
    text = f"{metadata.get('name', '')} {metadata.get('description', '')}"
    text_lower = text.lower()
    tags = [t.lower() for t in metadata.get('tags', [])]
    
    scores = {}
    
    for track_id, track_info in CEF_TRACKS.items():
        score = 0.0
        
        # Keyword matching
        for keyword in track_info['keywords']:
            if keyword in text_lower:
                score += 3.0
        
        # Tag matching
        for tag in tags:
            for keyword in track_info['keywords']:
                if keyword in tag:
                    score += 2.0
        
        # Name match bonus
        track_name_lower = track_info['name'].lower()
        for word in track_name_lower.split():
            if len(word) > 3 and word in text_lower:
                score += 1.5
        
        scores[track_id] = score * track_info['weight']
    
    return scores

def _map_skill(self, skill_path: Path) -> Optional[SkillMapping]:
    """Map a single skill to CEF tracks."""
    metadata = self._parse_frontmatter(skill_path)
    
    if not metadata.get('name'):
        return None
    
    scores = self._calculate_track_scores(metadata)
    
    if not scores:
        return None
    
    # Get top tracks
    sorted_tracks = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    
    primary_track = sorted_tracks[0][0]
    secondary_tracks = [t[0] for t in sorted_tracks[1:3] if t[1] > 0]
    
    # Calculate confidence
    max_score = sorted_tracks[0][1]
    confidence = min(max_score / 10, 1.0)
    
    # Check for existing pilot track
    pilot_track = metadata.get('track')
    
    return SkillMapping(
        skill_path=skill_path,
        name=metadata.get('name', ''),
        description=metadata.get('description', ''),
        primary_track=primary_track,
        secondary_tracks=secondary_tracks,
        confidence=confidence,
        pilot_track=pilot_track,
    )

def map_all_skills(self) -> None:
    """Map all skills in the skills directory."""
    skill_files = list(self.skills_dir.glob("*/SKILL.md"))
    
    print(f"Mapping {len(skill_files)} skills to CEF tracks...")
    
    for skill_path in skill_files:
        mapping = self._map_skill(skill_path)
        if mapping:
            self.mappings.append(mapping)
            self.track_counts[mapping.primary_track] += 1
    
    print(f"✅ Successfully mapped {len(self.mappings)} skills")

def generate_report(self) -> str:
    """Generate mapping report."""
    report = []
    report.append("# CEF Skill Mapping Report\n")
    report.append(f"Total Skills Mapped: {len(self.mappings)}\n")
    
    # Track distribution
    report.append("\n## Track Distribution\n")
    for track_id in sorted(CEF_TRACKS.keys()):
        count = self.track_counts.get(track_id, 0)
        track_name = CEF_TRACKS[track_id]['name']
        bar = "█" * (count // 2)
        report.append(f"- **{track_id}** ({track_name}): {count} skills {bar}")
    
    # Skills by track
    report.append("\n## Skills by Track\n")
    
    skills_by_track = defaultdict(list)
    for mapping in self.mappings:
        skills_by_track[mapping.primary_track].append(mapping)
    
    for track_id in sorted(skills_by_track.keys()):
        track_name = CEF_TRACKS[track_id]['name']
        report.append(f"\n### {track_id}: {track_name}\n")
        
        for mapping in skills_by_track[track_id]:
            pilot_info = f" [PILOT: {mapping.pilot_track}]" if mapping.pilot_track else ""
            report.append(f"- {mapping.name} ({mapping.confidence:.0%}){pilot_info}")
    
    # Coverage analysis
    report.append("\n## Coverage Analysis\n")
    
    empty_tracks = [t for t in CEF_TRACKS.keys() if t not in self.track_counts]
    if empty_tracks:
        report.append(f"\n**Empty Tracks ({len(empty_tracks)}):**")
        for track_id in empty_tracks:
            report.append(f"- {track_id}: {CEF_TRACKS[track_id]['name']}")
    
    # Recommendations
    report.append("\n## Recommendations\n")
    
    if empty_tracks:
        report.append("1. **Develop skills for empty tracks:**")
        for track_id in empty_tracks[:3]:
            report.append(f"   - Create 3-5 skills for {track_id}: {CEF_TRACKS[track_id]['name']}")
    
    report.append(f"\n2. **Well-covered tracks (top 5):**")
    top_tracks = sorted(self.track_counts.items(), key=lambda x: x[1], reverse=True)[:5]
    for track_id, count in top_tracks:
        report.append(f"   - {track_id}: {count} skills")
    
    return "\n".join(report)

def update_skill_files(self) -> None:
    """Update SKILL.md files with CEF track information and ensure name/title fields."""
    updated = 0
    
    for mapping in self.mappings:
        skill_path = mapping.skill_path
        content = skill_path.read_text()
        
        # Check if already has cef_track
        if 'cef_track:' in content:
            continue
        
        # Parse frontmatter boundaries
        match = re.match(r'^(---\s*\n)(.*?)(\n---\s*\n)', content, re.DOTALL)
        if not match:
            continue
        
        frontmatter = match.group(2)
        body = content[match.end():]
        
        # Parse existing fields
        has_name = 'name:' in frontmatter
        has_title = 'title:' in frontmatter
        
        # Build new frontmatter lines
        lines = frontmatter.split('\n')
        new_lines = []
        
        # Add name/title if missing
        if not has_name and mapping.name:
            lines.insert(0, f"name: {mapping.name}")
        if not has_title and mapping.name:
            lines.insert(0, f"title: {mapping.name}")
        
        # Add CEF fields at the end of frontmatter
        lines.append(f"cef_track: {mapping.primary_track}")
        lines.append(f"cef_confidence: {mapping.confidence:.2f}")
        if mapping.secondary_tracks:
            lines.append(f"cef_secondary: [{', '.join(mapping.secondary_tracks)}]")
        
        new_frontmatter = '\n'.join(lines)
        new_content = f"---\n{new_frontmatter}\n---\n{body}"
        
        skill_path.write_text(new_content)
        updated += 1
    
    print(f"✅ Updated {updated} skill files with CEF track information")

def main(): parser = argparse.ArgumentParser(description="Map skills to CEF tracks") parser.add_argument("--update", action="store_true", help="Update skill files") parser.add_argument("--report", action="store_true", help="Generate report") parser.add_argument("--output", type=str, help="Output file for report")

args = parser.parse_args()

mapper = CEFMapper()
mapper.map_all_skills()

if args.report or not args.update:
    report = mapper.generate_report()
    
    if args.output:
        Path(args.output).write_text(report)
        print(f"Report saved to {args.output}")
    else:
        print(report)

if args.update:
    mapper.update_skill_files()

return 0

if name == "main": sys.exit(main())