Skip to main content

#!/usr/bin/env python3 """ Intelligent Track Mapper for CODITECT Skills

Analyzes skill content and intelligently assigns them to appropriate tracks based on semantic analysis, keywords, and workflow patterns.

This ensures:

  1. All 37 tracks have relevant skills
  2. Bi-lateral mapping consistency (skill.track ↔ track.associated_skills)
  3. Semantic coherence between skill purpose and track mission

Version: 1.0.0 """

import re import yaml import logging from pathlib import Path from typing import Dict, List, Optional, Set, Tuple from dataclasses import dataclass from collections import defaultdict

logger = logging.getLogger(name)

Track definitions with semantic keywords for intelligent mapping

TRACK_SEMANTICS = { # Technical Tracks (A-N, T) "A": { "name": "Backend API", "keywords": ["api", "backend", "django", "rest", "graphql", "endpoint", "server", "database", "orm", "license", "commerce", "stripe", "subscription"], "patterns": ["api-design", "backend-", "database-", "securing-backend", "restful-api"] }, "B": { "name": "Frontend UI", "keywords": ["frontend", "react", "typescript", "dashboard", "ui", "ux", "component", "interface", "user-interface"], "patterns": ["frontend-", "-ui-", "react-", "ui-components"] }, "C": { "name": "DevOps Infra", "keywords": ["devops", "infrastructure", "docker", "kubernetes", "gcp", "terraform", "cicd", "deployment", "github"], "patterns": ["-cicd", "-pipelines", "terraform-", "deployment-", "infrastructure-"] }, "D": { "name": "Security", "keywords": ["security", "auth", "authentication", "authorization", "audit", "compliance", "encryption", "vulnerability", "penetration"], "patterns": ["security-", "-security-", "securing-", "auth-", "audit-", "penetration-"] }, "E": { "name": "Testing QA", "keywords": ["testing", "test", "qa", "quality", "e2e", "integration-test", "unit-test", "validation"], "patterns": ["-testing", "testing-", "-tests", "test-", "e2e-", "-qa-"] }, "F": { "name": "Documentation", "keywords": ["documentation", "docs", "guide", "reference", "writing", "publishing", "readme"], "patterns": ["documenting-", "-documentation", "-docs", "readme-"] }, "G": { "name": "DMS Product", "keywords": ["dms", "document-management", "github-oauth", "product", "cms", "content"], "patterns": ["dms-", "-dms-", "document-management*"] }, "H": { "name": "Framework Autonomy", "keywords": ["framework", "autonomy", "cef", "moe", "orchestration", "agent", "circuit-breaker", "retry", "self-healing"], "patterns": ["-orchestrator", "orchestration-", "circuit-breaker*", "-retry", "moe-", "agentic-"] }, "I": { "name": "UI Components", "keywords": ["component", "ui", "design-system", "atomic-design", "storybook", "react-component"], "patterns": ["-component-", "-design", "ui-", "agui-", "a2ui-"] }, "J": { "name": "Memory Intelligence", "keywords": ["memory", "context", "session", "intelligence", "bi-dashboard", "analytics", "export", "checkpoint"], "patterns": ["memory-", "-memory", "session-", "-session", "context-"] }, "K": { "name": "Workflow Automation", "keywords": ["workflow", "automation", "n8n", "scheduler", "scheduled", "batch", "pipeline"], "patterns": ["workflow-", "-workflow*", "automating-", "scheduled-", "automation-"] }, "L": { "name": "Extended Testing", "keywords": ["load-testing", "chaos", "performance", "stress", "scalability", "benchmark"], "patterns": ["load-", "chaos-", "performance-", "-performance"] }, "M": { "name": "Extended Security", "keywords": ["security", "compliance", "audit", "fedramp", "hipaa", "pci", "sox"], "patterns": ["compliance-", "-compliance", "-audit", "penetration-"] }, "N": { "name": "GTM Launch", "keywords": ["gtm", "marketing", "sales", "launch", "pilot", "onboarding", "customer"], "patterns": ["-marketing", "-sales", "launch-", "gtm-"] }, "T": { "name": "Tools Integration", "keywords": ["tool", "integration", "mcp", "adk", "sdk", "cli", "plugin"], "patterns": ["-integration", "-tools", "mcp-", "adk-", "tool-*"] },

# PCF Business Tracks (O-AA)
"O": {
"name": "Vision & Strategy",
"keywords": ["strategy", "vision", "planning", "swot", "business-model", "roadmap"],
"patterns": ["*-strategy", "strategic-*", "planning-*", "*-planning"]
},
"P": {
"name": "Products & Services",
"keywords": ["product", "service", "lifecycle", "pricing", "feature", "roadmap"],
"patterns": ["product-*", "*-product*", "*-service*"]
},
"Q": {
"name": "Marketing & Sales",
"keywords": ["marketing", "sales", "lead", "crm", "content-marketing", "pitch"],
"patterns": ["marketing-*", "*-marketing", "sales-*", "pitch-*", "content-*"]
},
"R": {
"name": "Physical Delivery",
"keywords": ["supply-chain", "logistics", "warehouse", "shipping", "delivery"],
"patterns": ["supply-*", "logistics*", "*-delivery"]
},
"S": {
"name": "Service Delivery",
"keywords": ["service", "professional-services", "consulting", "delivery", "ps"],
"patterns": ["service-*", "*-service*", "consulting*", "professional-*"]
},
"PCF-T": {
"name": "Customer Service",
"keywords": ["support", "helpdesk", "chatbot", "customer-success", "ticket"],
"patterns": ["customer-*", "support-*", "*-support", "helpdesk*"]
},
"U": {
"name": "Human Capital",
"keywords": ["hr", "recruiting", "training", "performance", "talent", "onboarding"],
"patterns": ["*-onboarding", "training-*", "*-training", "hr-*"]
},
"V": {
"name": "Information Technology",
"keywords": ["it-strategy", "enterprise", "aiops", "it-governance"],
"patterns": ["*-it-*", "enterprise-*", "aiops*"]
},
"W": {
"name": "Financial Resources",
"keywords": ["finance", "accounting", "treasury", "tax", "budget", "cost"],
"patterns": ["*-cost*", "financial-*", "*-finance*", "budget*"]
},
"X": {
"name": "Asset Management",
"keywords": ["asset", "facilities", "maintenance", "inventory"],
"patterns": ["*-asset*", "*-inventory", "maintenance*"]
},
"Y": {
"name": "Risk & Compliance",
"keywords": ["risk", "compliance", "continuity", "audit", "governance"],
"patterns": ["risk-*", "*-risk", "*-compliance", "compliance-*"]
},
"Z": {
"name": "External Relationships",
"keywords": ["investor", "government", "partnership", "external", "vendor"],
"patterns": ["*-external*", "partnership*", "*-vendor*"]
},
"AA": {
"name": "Business Capabilities",
"keywords": ["process", "knowledge", "quality", "change-management", "capability"],
"patterns": ["*-process*", "knowledge-*", "*-quality*", "change-*"]
},

# Extension Tracks (AB-AK)
"AB": {
"name": "Mobile Platform",
"keywords": ["mobile", "ios", "android", "react-native", "flutter", "app"],
"patterns": ["mobile-*", "*-mobile*", "ios-*", "android-*"]
},
"AC": {
"name": "Desktop Platform",
"keywords": ["desktop", "electron", "tauri", "cli", "desktop-app"],
"patterns": ["desktop-*", "electron-*", "tauri-*", "*-cli-*"]
},
"AD": {
"name": "AI/ML Integration",
"keywords": ["ai", "ml", "llm", "openai", "anthropic", "mlops", "rag", "model"],
"patterns": ["ai-*", "ml-*", "llm-*", "*-ai", "*-ml", "*-llm"]
},
"AE": {
"name": "Data Engineering",
"keywords": ["data", "etl", "warehouse", "snowflake", "dbt", "analytics"],
"patterns": ["data-*", "*-data", "etl*", "*-warehouse*", "analytics*"]
},
"AF": {
"name": "API Integrations",
"keywords": ["api-gateway", "webhook", "integration", "api-management"],
"patterns": ["*-gateway*", "webhook*", "*-integrations"]
},
"AG": {
"name": "Healthcare",
"keywords": ["healthcare", "hipaa", "fhir", "medical", "health"],
"patterns": ["health*", "*-health*", "hipaa*", "medical*"]
},
"AH": {
"name": "Finance",
"keywords": ["finance", "fintech", "pci-dss", "sox", "trading", "banking"],
"patterns": ["fintech*", "*-trading", "banking*", "*-banking*"]
},
"AI-PCF": {
"name": "Government",
"keywords": ["government", "fedramp", "compliance", "nist", "public-sector"],
"patterns": ["government*", "fedramp*", "public-*", "*-gov"]
},
"AJ": {
"name": "Localization",
"keywords": ["i18n", "localization", "translation", "l10n", "internationalization"],
"patterns": ["i18n*", "*-i18n", "locali*", "translation*", "*-l10n"]
},
"AK": {
"name": "Sustainability",
"keywords": ["sustainability", "esg", "carbon", "reporting", "green"],
"patterns": ["esg*", "sustainab*", "carbon*", "green-*"]
},

}

@dataclass class SkillAnalysis: """Analysis result for a skill.""" name: str path: Path current_track: Optional[str] suggested_track: Optional[str] confidence: float reasons: List[str]

class IntelligentTrackMapper: """Intelligently maps skills to tracks based on semantic analysis."""

def __init__(self, project_root: Optional[Path] = None):
self.project_root = project_root or Path.cwd()
self.skills_dir = self.project_root / "skills"
self.analyses: List[SkillAnalysis] = []

def analyze_all_skills(self) -> List[SkillAnalysis]:
"""Analyze all skills and suggest track mappings."""
analyses = []

for skill_dir in sorted(self.skills_dir.iterdir()):
if not skill_dir.is_dir():
continue

skill_file = skill_dir / "SKILL.md"
if not skill_file.exists():
continue

try:
analysis = self._analyze_skill(skill_dir.name, skill_file)
if analysis:
analyses.append(analysis)
except Exception as e:
logger.warning(f"Failed to analyze {skill_dir.name}: {e}")

self.analyses = analyses
return analyses

def _analyze_skill(self, skill_name: str, skill_file: Path) -> Optional[SkillAnalysis]:
"""Analyze a single skill and suggest track."""
content = skill_file.read_text(encoding='utf-8')

# Extract current track
current_track = None
track_match = re.search(r'^cef_track:\s*(.+)$', content, re.MULTILINE)
if track_match:
current_track = track_match.group(1).strip().split('-')[0]

# Extract metadata for analysis
name_lower = skill_name.lower()

# Get frontmatter tags
tags = []
tags_match = re.search(r'^tags:\s*$\n((?: - .+\n)+)', content, re.MULTILINE)
if tags_match:
tags_text = tags_match.group(1)
tags = [t.strip('- ') for t in tags_text.strip().split('\n') if t.strip()]

# Get description
description = ""
desc_match = re.search(r'^description:\s*(.+)$', content, re.MULTILINE)
if desc_match:
description = desc_match.group(1).lower()

# Get summary
summary = ""
summary_match = re.search(r'^summary:\s*(.+)$', content, re.MULTILINE | re.DOTALL)
if summary_match:
summary = summary_match.group(1).lower()

# Combine all text for analysis
all_text = f"{name_lower} {' '.join(tags)} {description} {summary}"

# Score each track
track_scores = {}
track_reasons = defaultdict(list)

for track_code, semantics in TRACK_SEMANTICS.items():
score = 0.0

# Check name patterns
for pattern in semantics["patterns"]:
pattern_clean = pattern.replace("*", "").replace("-", " ")
if pattern_clean in name_lower or name_lower.startswith(pattern_clean):
score += 3.0
track_reasons[track_code].append(f"Name pattern match: {pattern}")

# Check keywords
for keyword in semantics["keywords"]:
if keyword in all_text:
score += 1.0
if len(track_reasons[track_code]) < 3:
track_reasons[track_code].append(f"Keyword: {keyword}")

# Check tags
for tag in tags:
tag_lower = tag.lower()
for keyword in semantics["keywords"]:
if keyword in tag_lower or tag_lower in keyword:
score += 2.0
if len(track_reasons[track_code]) < 3:
track_reasons[track_code].append(f"Tag match: {tag}")

track_scores[track_code] = score

# Find best match
if track_scores:
best_track = max(track_scores, key=track_scores.get)
best_score = track_scores[best_track]

# Normalize confidence (max possible is ~10+)
confidence = min(best_score / 5.0, 1.0)

return SkillAnalysis(
name=skill_name,
path=skill_file,
current_track=current_track,
suggested_track=best_track if best_score > 0 else None,
confidence=confidence,
reasons=track_reasons[best_track][:3]
)

return None

def generate_reassignments(self) -> Dict[str, List[SkillAnalysis]]:
"""Generate list of skills that should be reassigned."""
if not self.analyses:
self.analyze_all_skills()

reassignments = defaultdict(list)

for analysis in self.analyses:
# If current track is legacy (CM, IN) or empty, suggest reassignment
if analysis.current_track in ['CM', 'IN']:
if analysis.suggested_track and analysis.suggested_track != analysis.current_track:
reassignments[analysis.suggested_track].append(analysis)
# Also suggest for unassigned skills
elif not analysis.current_track and analysis.suggested_track:
reassignments[analysis.suggested_track].append(analysis)

return dict(reassignments)

def print_analysis(self):
"""Print analysis results."""
if not self.analyses:
self.analyze_all_skills()

reassignments = self.generate_reassignments()

print("\n" + "=" * 80)
print("INTELLIGENT TRACK MAPPING ANALYSIS")
print("=" * 80)

print("\nšŸ“Š SKILL DISTRIBUTION BY CURRENT TRACK:")
print("-" * 80)

current_distribution = defaultdict(list)
for analysis in self.analyses:
track = analysis.current_track or "UNASSIGNED"
current_distribution[track].append(analysis.name)

for track in sorted(current_distribution.keys()):
count = len(current_distribution[track])
print(f" Track {track:4}: {count:3} skills")

print("\nšŸ”„ SUGGESTED REASSIGNMENTS (from CM/IN to proper tracks):")
print("-" * 80)

for track_code in sorted(reassignments.keys()):
track_name = TRACK_SEMANTICS.get(track_code, {}).get("name", track_code)
analyses = reassignments[track_code]
print(f"\n Track {track_code} ({track_name}): {len(analyses)} skills")
for analysis in analyses[:5]: # Show first 5
print(f" • {analysis.name:40} (confidence: {analysis.confidence:.0%})")
for reason in analysis.reasons[:2]:
print(f" ↳ {reason}")
if len(analyses) > 5:
print(f" ... and {len(analyses) - 5} more")

print("\n" + "=" * 80)

return reassignments

def main(): """CLI entry point.""" import argparse

parser = argparse.ArgumentParser(description="Intelligent Track Mapper")
parser.add_argument("--analyze", action="store_true", help="Analyze all skills")
parser.add_argument("--suggest", action="store_true", help="Show reassignment suggestions")
parser.add_argument("--apply", action="store_true", help="Apply suggested reassignments")
parser.add_argument("--dry-run", action="store_true", help="Preview changes without applying")

args = parser.parse_args()

logging.basicConfig(level=logging.INFO)

mapper = IntelligentTrackMapper()

if args.analyze or args.suggest or args.apply:
mapper.analyze_all_skills()

if args.suggest or not args.apply:
reassignments = mapper.print_analysis()

if args.apply:
print("\nšŸ“ Applying reassignments...")
# Implementation would go here
print(" (Not yet implemented - use --dry-run to preview)")

if name == "main": main()