#!/usr/bin/env python3 """ Map CODITECT skills to CEF (CODITECT Experience Framework) tracks.
Usage: python3 scripts/map-skills-to-cef.py [--update] [--report]
Examples: python3 scripts/map-skills-to-cef.py --report # Generate mapping report python3 scripts/map-skills-to-cef.py --update # Update skill frontmatter """
import argparse import json import re import sys from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Set, Tuple
CEF Track Definitions
CEF_TRACKS = { # Creation Tracks "C-1": { "name": "Architecture & Design", "keywords": ["architecture", "design", "system", "api", "database", "pattern"], "pilot_mapping": ["A"], "weight": 1.0, }, "C-2": { "name": "Implementation & Development", "keywords": ["development", "coding", "implementation", "frontend", "backend", "component"], "pilot_mapping": ["A", "B", "I"], "weight": 1.0, }, "C-3": { "name": "Content & Media Creation", "keywords": ["content", "media", "documentation", "writing", "video", "audio"], "pilot_mapping": ["F"], "weight": 1.0, }, # Operations Tracks "O-1": { "name": "Infrastructure & Deployment", "keywords": ["infrastructure", "deployment", "devops", "cloud", "kubernetes", "docker"], "pilot_mapping": ["C"], "weight": 1.0, }, "O-2": { "name": "Systems & Processes", "keywords": ["process", "workflow", "automation", "system", "operation"], "pilot_mapping": ["C", "K"], "weight": 1.0, }, "O-3": { "name": "Management & Coordination", "keywords": ["management", "coordination", "project", "team", "planning"], "pilot_mapping": [], "weight": 1.0, }, # Intelligence Tracks "I-1": { "name": "Analysis & Research", "keywords": ["analysis", "research", "investigation", "data", "query"], "pilot_mapping": ["J"], "weight": 1.0, }, "I-2": { "name": "Decision & Strategy", "keywords": ["decision", "strategy", "architecture decision", "adr", "planning"], "pilot_mapping": ["H"], "weight": 1.0, }, "I-3": { "name": "Optimization & Performance", "keywords": ["optimization", "performance", "efficiency", "benchmark", "profiling"], "pilot_mapping": ["H"], "weight": 1.0, }, # Communication Tracks "CM-1": { "name": "Documentation & Knowledge", "keywords": ["documentation", "knowledge", "wiki", "technical writing"], "pilot_mapping": ["F"], "weight": 1.0, }, "CM-2": { "name": "Teaching & Training", "keywords": ["teaching", "training", "mentoring", "guide", "onboarding"], "pilot_mapping": ["F"], "weight": 1.0, }, "CM-3": { "name": "Collaboration & Community", "keywords": ["collaboration", "community", "communication", "team"], "pilot_mapping": [], "weight": 1.0, }, # Foundation Tracks "F-1": { "name": "Quality & Testing", "keywords": ["testing", "quality", "qa", "validation", "verification", "test"], "pilot_mapping": ["E", "L"], "weight": 1.0, }, "F-2": { "name": "Security & Privacy", "keywords": ["security", "privacy", "authentication", "encryption", "auth"], "pilot_mapping": ["D", "M"], "weight": 1.0, }, "F-3": { "name": "Compliance & Standards", "keywords": ["compliance", "standards", "governance", "regulatory", "audit"], "pilot_mapping": [], "weight": 1.0, }, # Growth Tracks "G-1": { "name": "Learning & Development", "keywords": ["learning", "development", "skill", "training", "education", "growth"], "pilot_mapping": ["F"], "weight": 1.0, }, "G-2": { "name": "Scaling & Expansion", "keywords": ["scaling", "expansion", "growth", "capacity"], "pilot_mapping": ["H"], "weight": 1.0, }, "G-3": { "name": "Evolution & Adaptation", "keywords": ["evolution", "adaptation", "modernization", "transformation"], "pilot_mapping": ["H"], "weight": 1.0, }, # Protection Tracks "P-1": { "name": "Monitoring & Observability", "keywords": ["monitoring", "observability", "metrics", "logging", "alerting"], "pilot_mapping": ["J"], "weight": 1.0, }, "P-2": { "name": "Auditing & Review", "keywords": ["audit", "review", "assessment", "evaluation", "inspection"], "pilot_mapping": [], "weight": 1.0, }, "P-3": { "name": "Governance & Control", "keywords": ["governance", "control", "access", "rbac", "permissions"], "pilot_mapping": [], "weight": 1.0, }, # Integration Tracks "IN-1": { "name": "Tool Integration", "keywords": ["integration", "api", "connector", "mcp", "tool"], "pilot_mapping": ["T"], "weight": 1.0, }, "IN-2": { "name": "Workflow Automation", "keywords": ["automation", "workflow", "n8n", "orchestration"], "pilot_mapping": ["K"], "weight": 1.0, }, "IN-3": { "name": "Data & Context Flow", "keywords": ["context", "data flow", "synchronization", "memory"], "pilot_mapping": ["J"], "weight": 1.0, }, # Experience Tracks "E-1": { "name": "User Interface & Design", "keywords": ["ui", "interface", "design", "frontend", "component", "react"], "pilot_mapping": ["B", "I"], "weight": 1.0, }, "E-2": { "name": "Interaction & Engagement", "keywords": ["interaction", "engagement", "ux", "user experience"], "pilot_mapping": [], "weight": 1.0, }, "E-3": { "name": "Journey & Experience Design", "keywords": ["journey", "experience design", "service design", "touchpoint"], "pilot_mapping": [], "weight": 1.0, }, }
@dataclass class SkillMapping: """Represents a skill's CEF mapping.""" skill_path: Path name: str description: str primary_track: str secondary_tracks: List[str] confidence: float pilot_track: Optional[str] = None
class CEFMapper: """Maps skills to CEF tracks."""
def __init__(self, skills_dir: Path = None):
self.skills_dir = skills_dir or Path("skills")
self.mappings: List[SkillMapping] = []
self.track_counts: Dict[str, int] = defaultdict(int)
def _parse_frontmatter(self, skill_path: Path) -> Dict:
"""Extract frontmatter from SKILL.md."""
content = skill_path.read_text()
# Match YAML frontmatter
match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
if not match:
return {}
frontmatter = match.group(1)
metadata = {}
# Parse key-value pairs
for line in frontmatter.split('\n'):
if ':' in line and not line.strip().startswith('#'):
key, value = line.split(':', 1)
key = key.strip()
value = value.strip().strip('"\'')
if key == 'tags':
# Handle list format
continue
metadata[key] = value
# Extract tags separately
tags_match = re.search(r'tags:\s*\n((?:\s*-\s*\S+\n?)*)', frontmatter)
if tags_match:
tags_text = tags_match.group(1)
metadata['tags'] = [t.strip('- ') for t in tags_text.split('\n') if t.strip()]
# Use title as fallback for name (skills should have both)
if 'name' not in metadata and 'title' in metadata:
metadata['name'] = metadata['title']
if 'title' not in metadata and 'name' in metadata:
metadata['title'] = metadata['name']
return metadata
def _calculate_track_scores(self, metadata: Dict) -> Dict[str, float]:
"""Calculate relevance scores for each CEF track."""
text = f"{metadata.get('name', '')} {metadata.get('description', '')}"
text_lower = text.lower()
tags = [t.lower() for t in metadata.get('tags', [])]
scores = {}
for track_id, track_info in CEF_TRACKS.items():
score = 0.0
# Keyword matching
for keyword in track_info['keywords']:
if keyword in text_lower:
score += 3.0
# Tag matching
for tag in tags:
for keyword in track_info['keywords']:
if keyword in tag:
score += 2.0
# Name match bonus
track_name_lower = track_info['name'].lower()
for word in track_name_lower.split():
if len(word) > 3 and word in text_lower:
score += 1.5
scores[track_id] = score * track_info['weight']
return scores
def _map_skill(self, skill_path: Path) -> Optional[SkillMapping]:
"""Map a single skill to CEF tracks."""
metadata = self._parse_frontmatter(skill_path)
if not metadata.get('name'):
return None
scores = self._calculate_track_scores(metadata)
if not scores:
return None
# Get top tracks
sorted_tracks = sorted(scores.items(), key=lambda x: x[1], reverse=True)
primary_track = sorted_tracks[0][0]
secondary_tracks = [t[0] for t in sorted_tracks[1:3] if t[1] > 0]
# Calculate confidence
max_score = sorted_tracks[0][1]
confidence = min(max_score / 10, 1.0)
# Check for existing pilot track
pilot_track = metadata.get('track')
return SkillMapping(
skill_path=skill_path,
name=metadata.get('name', ''),
description=metadata.get('description', ''),
primary_track=primary_track,
secondary_tracks=secondary_tracks,
confidence=confidence,
pilot_track=pilot_track,
)
def map_all_skills(self) -> None:
"""Map all skills in the skills directory."""
skill_files = list(self.skills_dir.glob("*/SKILL.md"))
print(f"Mapping {len(skill_files)} skills to CEF tracks...")
for skill_path in skill_files:
mapping = self._map_skill(skill_path)
if mapping:
self.mappings.append(mapping)
self.track_counts[mapping.primary_track] += 1
print(f"✅ Successfully mapped {len(self.mappings)} skills")
def generate_report(self) -> str:
"""Generate mapping report."""
report = []
report.append("# CEF Skill Mapping Report\n")
report.append(f"Total Skills Mapped: {len(self.mappings)}\n")
# Track distribution
report.append("\n## Track Distribution\n")
for track_id in sorted(CEF_TRACKS.keys()):
count = self.track_counts.get(track_id, 0)
track_name = CEF_TRACKS[track_id]['name']
bar = "█" * (count // 2)
report.append(f"- **{track_id}** ({track_name}): {count} skills {bar}")
# Skills by track
report.append("\n## Skills by Track\n")
skills_by_track = defaultdict(list)
for mapping in self.mappings:
skills_by_track[mapping.primary_track].append(mapping)
for track_id in sorted(skills_by_track.keys()):
track_name = CEF_TRACKS[track_id]['name']
report.append(f"\n### {track_id}: {track_name}\n")
for mapping in skills_by_track[track_id]:
pilot_info = f" [PILOT: {mapping.pilot_track}]" if mapping.pilot_track else ""
report.append(f"- {mapping.name} ({mapping.confidence:.0%}){pilot_info}")
# Coverage analysis
report.append("\n## Coverage Analysis\n")
empty_tracks = [t for t in CEF_TRACKS.keys() if t not in self.track_counts]
if empty_tracks:
report.append(f"\n**Empty Tracks ({len(empty_tracks)}):**")
for track_id in empty_tracks:
report.append(f"- {track_id}: {CEF_TRACKS[track_id]['name']}")
# Recommendations
report.append("\n## Recommendations\n")
if empty_tracks:
report.append("1. **Develop skills for empty tracks:**")
for track_id in empty_tracks[:3]:
report.append(f" - Create 3-5 skills for {track_id}: {CEF_TRACKS[track_id]['name']}")
report.append(f"\n2. **Well-covered tracks (top 5):**")
top_tracks = sorted(self.track_counts.items(), key=lambda x: x[1], reverse=True)[:5]
for track_id, count in top_tracks:
report.append(f" - {track_id}: {count} skills")
return "\n".join(report)
def update_skill_files(self) -> None:
"""Update SKILL.md files with CEF track information and ensure name/title fields."""
updated = 0
for mapping in self.mappings:
skill_path = mapping.skill_path
content = skill_path.read_text()
# Check if already has cef_track
if 'cef_track:' in content:
continue
# Parse frontmatter boundaries
match = re.match(r'^(---\s*\n)(.*?)(\n---\s*\n)', content, re.DOTALL)
if not match:
continue
frontmatter = match.group(2)
body = content[match.end():]
# Parse existing fields
has_name = 'name:' in frontmatter
has_title = 'title:' in frontmatter
# Build new frontmatter lines
lines = frontmatter.split('\n')
new_lines = []
# Add name/title if missing
if not has_name and mapping.name:
lines.insert(0, f"name: {mapping.name}")
if not has_title and mapping.name:
lines.insert(0, f"title: {mapping.name}")
# Add CEF fields at the end of frontmatter
lines.append(f"cef_track: {mapping.primary_track}")
lines.append(f"cef_confidence: {mapping.confidence:.2f}")
if mapping.secondary_tracks:
lines.append(f"cef_secondary: [{', '.join(mapping.secondary_tracks)}]")
new_frontmatter = '\n'.join(lines)
new_content = f"---\n{new_frontmatter}\n---\n{body}"
skill_path.write_text(new_content)
updated += 1
print(f"✅ Updated {updated} skill files with CEF track information")
def main(): parser = argparse.ArgumentParser(description="Map skills to CEF tracks") parser.add_argument("--update", action="store_true", help="Update skill files") parser.add_argument("--report", action="store_true", help="Generate report") parser.add_argument("--output", type=str, help="Output file for report")
args = parser.parse_args()
mapper = CEFMapper()
mapper.map_all_skills()
if args.report or not args.update:
report = mapper.generate_report()
if args.output:
Path(args.output).write_text(report)
print(f"Report saved to {args.output}")
else:
print(report)
if args.update:
mapper.update_skill_files()
return 0
if name == "main": sys.exit(main())