Skip to main content

#!/usr/bin/env python3 """ CODITECT Track Registry Module

Manages bi-lateral TRACK ↔ SKILL mapping for the CODITECT Experience Framework and Coditect Semantic Skill LLM Translator MCP.

This module provides:

  • Track-to-skill mapping (which skills belong to each track)
  • Skill-to-track mapping (which track each skill belongs to)
  • Automatic synchronization of bi-lateral mappings
  • Frontmatter updates for both skills and tracks

Version: 1.0.0 """

import re import yaml import logging from pathlib import Path from typing import Dict, List, Optional, Set, Tuple from dataclasses import dataclass, field from datetime import datetime

logger = logging.getLogger(name)

Track ID mapping (Track Letter/Number → Track Name)

38 total tracks: A-N (14) + T (1) + O-AA (13) + AB-AK (10)

TRACK_DEFINITIONS = { # PILOT Technical Tracks (A-N, T) - 15 tracks "A": {"name": "Backend API", "category": "technical", "status": "active"}, "B": {"name": "Frontend UI", "category": "technical", "status": "active"}, "C": {"name": "DevOps Infra", "category": "technical", "status": "active"}, "D": {"name": "Security", "category": "technical", "status": "completed"}, "E": {"name": "Testing QA", "category": "technical", "status": "completed"}, "F": {"name": "Documentation", "category": "technical", "status": "active"}, "G": {"name": "DMS Product", "category": "technical", "status": "planned"}, "H": {"name": "Framework Autonomy", "category": "technical", "status": "active"}, "I": {"name": "UI Components", "category": "technical", "status": "active"}, "J": {"name": "Memory Intelligence", "category": "technical", "status": "active"}, "K": {"name": "Workflow Automation", "category": "technical", "status": "planned"}, "L": {"name": "Extended Testing", "category": "technical", "status": "planned"}, "M": {"name": "Extended Security", "category": "technical", "status": "planned"}, "N": {"name": "GTM Launch", "category": "technical", "status": "active"}, "T": {"name": "Tools Integration", "category": "technical", "status": "active"}, # Note: Different from PCF Track T

# PCF Business Process Tracks (O-AA) - 13 tracks
"O": {"name": "Vision & Strategy", "category": "business", "pcf_id": "1.0"},
"P": {"name": "Products & Services", "category": "business", "pcf_id": "2.0"},
"Q": {"name": "Marketing & Sales", "category": "business", "pcf_id": "3.0"},
"R": {"name": "Physical Delivery", "category": "business", "pcf_id": "4.0"},
"S": {"name": "Service Delivery", "category": "business", "pcf_id": "5.0"},
"PCF-T": {"name": "Customer Service", "category": "business", "pcf_id": "6.0"}, # PCF Track T (distinct from Tech Track T)
"U": {"name": "Human Capital", "category": "business", "pcf_id": "7.0"},
"V": {"name": "Information Technology", "category": "business", "pcf_id": "8.0"},
"W": {"name": "Financial Resources", "category": "business", "pcf_id": "9.0"},
"X": {"name": "Asset Management", "category": "business", "pcf_id": "10.0"},
"Y": {"name": "Risk & Compliance", "category": "business", "pcf_id": "11.0"},
"Z": {"name": "External Relationships", "category": "business", "pcf_id": "12.0"},
"AA": {"name": "Business Capabilities", "category": "business", "pcf_id": "13.0"},

# Extension Tracks (AB-AK) - 10 tracks
"AB": {"name": "Mobile Platform", "category": "extension", "inherits": ["B", "C"]},
"AC": {"name": "Desktop Platform", "category": "extension", "inherits": ["B", "C"]},
"AD": {"name": "AI/ML Integration", "category": "extension", "inherits": ["V", "A"]},
"AE": {"name": "Data Engineering", "category": "extension", "inherits": ["V", "W"]},
"AF": {"name": "API Integrations", "category": "extension", "inherits": ["A", "V"]},
"AG": {"name": "Healthcare", "category": "extension", "inherits": ["Y", "V"]},
"AH": {"name": "Finance", "category": "extension", "inherits": ["Y", "W"]},
"AI-PCF": {"name": "Government", "category": "extension", "inherits": ["Y", "V"]}, # Renamed to avoid collision
"AJ": {"name": "Localization", "category": "extension", "inherits": ["B", "F"]},
"AK": {"name": "Sustainability", "category": "extension", "inherits": ["Y", "AA"]},

}

Legacy/internal track codes that map to main tracks

TRACK_CODE_ALIASES = { "IN": "T", # Infrastructure → Tools Integration (legacy mapping) "CM": "F", # Communications/Content Management → Documentation "O": "O", # Operations (can be O-1, O-2, O-3 subtracks) "P": "P", # Platform }

Additional legacy track definitions

LEGACY_TRACK_DEFINITIONS = { "IN": {"name": "Infrastructure (Legacy)", "category": "technical", "status": "deprecated", "maps_to": "T"}, "CM": {"name": "Communications Management (Legacy)", "category": "technical", "status": "deprecated", "maps_to": "F"}, }

@dataclass class SkillTrackMapping: """Represents a skill-to-track mapping.""" skill_name: str skill_path: Path track_code: str subtrack: Optional[str] = None # e.g., "1", "2", "3" confidence: float = 0.0

@property
def full_track_id(self) -> str:
"""Get full track ID like 'G-1' or 'F'."""
if self.subtrack:
return f"{self.track_code}-{self.subtrack}"
return self.track_code

@dataclass class TrackInfo: """Information about a track.""" code: str name: str category: str status: str skills: List[str] = field(default_factory=list) file_path: Optional[Path] = None

@property
def skill_count(self) -> int:
return len(self.skills)

class TrackRegistry: """ Registry for managing bi-lateral TRACK ↔ SKILL mappings.

This class maintains the relationship between tracks and skills,
ensuring bi-lateral consistency:
- Skills declare their track via 'cef_track' frontmatter field
- Tracks list their skills via 'associated_skills' frontmatter field
"""

def __init__(self, project_root: Optional[Path] = None):
self.project_root = project_root or Path.cwd()
self.skills_dir = self.project_root / "skills"
self.tracks_dir = self.project_root / "internal" / "project" / "plans" / "tracks"
self.pilot_tracks_dir = self.project_root / "internal" / "project" / "plans" / "pilot-tracks"

# Mappings
self._skill_to_track: Dict[str, SkillTrackMapping] = {}
self._track_to_skills: Dict[str, List[str]] = {}
self._all_track_files: Dict[str, Path] = {} # All discovered track files

logger.debug(f"TrackRegistry initialized: skills={self.skills_dir}, tracks={self.tracks_dir}")

def scan_skills(self) -> Dict[str, SkillTrackMapping]:
"""
Scan all skills and extract their track mappings.

Returns:
Dictionary mapping skill names to their track mappings
"""
if not self.skills_dir.exists():
logger.warning(f"Skills directory not found: {self.skills_dir}")
return {}

mappings = {}

for skill_dir in self.skills_dir.iterdir():
if not skill_dir.is_dir():
continue

skill_file = skill_dir / "SKILL.md"
if not skill_file.exists():
continue

try:
mapping = self._extract_skill_track_mapping(skill_dir.name, skill_file)
if mapping:
mappings[skill_dir.name] = mapping
except Exception as e:
logger.warning(f"Failed to parse skill {skill_dir.name}: {e}")

self._skill_to_track = mappings
logger.info(f"Scanned {len(mappings)} skills with track mappings")
return mappings

def _extract_skill_track_mapping(self, skill_name: str, skill_file: Path) -> Optional[SkillTrackMapping]:
"""Extract track mapping from a skill file.

Checks both 'track' and 'cef_track' fields for bi-lateral mapping support.
Prefers 'track' field if present, falls back to 'cef_track'.
"""
content = skill_file.read_text(encoding='utf-8')

# Check for frontmatter
if not content.startswith('---'):
return None

# Extract frontmatter
end_match = content.find('\n---', 3)
if end_match < 0:
return None

frontmatter_text = content[4:end_match]

# Try 'track' field first (new bi-lateral mapping field)
track_value = None
track_match = re.search(r'^track:\s*(.+)$', frontmatter_text, re.MULTILINE)
if track_match:
track_value = track_match.group(1).strip()

# Fall back to 'cef_track' if 'track' not found
if not track_value:
cef_match = re.search(r'^cef_track:\s*(.+)$', frontmatter_text, re.MULTILINE)
if cef_match:
track_value = cef_match.group(1).strip()

if not track_value:
return None

# Parse track code and subtrack (e.g., "G-1" → track="G", subtrack="1")
track_code = track_value
subtrack = None

if '-' in track_value:
parts = track_value.split('-')
track_code = parts[0]
subtrack = parts[1] if len(parts) > 1 else None

return SkillTrackMapping(
skill_name=skill_name,
skill_path=skill_file,
track_code=track_code,
subtrack=subtrack
)

def build_track_mappings(self) -> Dict[str, List[str]]:
"""
Build track-to-skills mapping from skill mappings.

Returns:
Dictionary mapping track codes to lists of skill names
"""
if not self._skill_to_track:
self.scan_skills()

track_skills: Dict[str, List[str]] = {}

for skill_name, mapping in self._skill_to_track.items():
track_code = mapping.track_code
if track_code not in track_skills:
track_skills[track_code] = []
track_skills[track_code].append(skill_name)

# Deduplicate and sort skills alphabetically within each track
for track_code in track_skills:
track_skills[track_code] = sorted(set(track_skills[track_code]))

self._track_to_skills = track_skills
logger.info(f"Built mappings for {len(track_skills)} tracks")
return track_skills

def get_track_file(self, track_code: str) -> Optional[Path]:
"""
Find the track file for a given track code.

Args:
track_code: Track code (e.g., "A", "F", "AA")

Returns:
Path to track file if found, None otherwise
"""
track_code_lower = track_code.lower()
track_code_upper = track_code.upper()

# Search directories
search_dirs = [
self.tracks_dir,
self.pilot_tracks_dir,
self.pilot_tracks_dir / "completed" if self.pilot_tracks_dir.exists() else None
]
search_dirs = [d for d in search_dirs if d and d.exists()]

# Search patterns for different naming conventions
patterns = [
# tracks/ directory patterns (lowercase with dashes)
f"track-{track_code_lower}.md",
f"track-{track_code_lower}-*.md",
# pilot-tracks/ directory patterns (uppercase with dashes)
f"TRACK-{track_code_upper}.md",
f"TRACK-{track_code_upper}-*.md",
]

for search_dir in search_dirs:
for pattern in patterns:
matches = list(search_dir.glob(pattern))
if matches:
return matches[0]

# Also try direct listing and filtering
for file_path in search_dir.glob("*.md"):
name = file_path.name.upper()
# Match patterns like TRACK-C-*.md or TRACK-C-devops.md
if f"TRACK-{track_code_upper}-" in name or f"TRACK-{track_code_upper}." in name:
return file_path

return None

def update_track_frontmatter(self, track_code: str, dry_run: bool = False, skills: Optional[List[str]] = None) -> bool:
"""
Update a track file's frontmatter with associated skills.

Args:
track_code: Track code (e.g., "A", "F")
dry_run: If True, don't actually write changes
skills: Optional list of skills (if None, uses mapped skills)

Returns:
True if successful, False otherwise
"""
track_file = self.get_track_file(track_code)
if not track_file:
logger.warning(f"Track file not found for code: {track_code}")
return False

# Use provided skills or look up mapped skills
if skills is None:
if track_code not in self._track_to_skills:
logger.debug(f"No skills mapped to track {track_code}")
return False
skills = self._track_to_skills[track_code]

try:
content = track_file.read_text(encoding='utf-8')

if not content.startswith('---'):
logger.warning(f"Track file missing frontmatter: {track_file}")
return False

end_match = content.find('\n---', 3)
if end_match < 0:
logger.warning(f"Track file has invalid frontmatter: {track_file}")
return False

frontmatter = content[4:end_match]
body = content[end_match + 4:]

# Build skills list in YAML format
skills_yaml = "\n".join([f" - {skill}" for skill in skills])

# Update or add associated_skills field
if re.search(r'^associated_skills:', frontmatter, re.MULTILINE):
# Replace existing - match key + all indented/blank lines that follow
frontmatter = re.sub(
r'^associated_skills:\n(?:[ \t]+.*(?:\n|$)|\n)*',
f'associated_skills:\n{skills_yaml}\n',
frontmatter,
flags=re.MULTILINE
)
else:
# Add new field before closing ---
frontmatter = frontmatter.rstrip() + f"\nassociated_skills:\n{skills_yaml}"

# Add/update track_mapping_updated timestamp
today = datetime.now().strftime("%Y-%m-%d")
if re.search(r'^track_mapping_updated:', frontmatter, re.MULTILINE):
frontmatter = re.sub(
r'^track_mapping_updated:.*$',
f'track_mapping_updated: {today}',
frontmatter,
flags=re.MULTILINE
)
else:
frontmatter += f"\ntrack_mapping_updated: {today}"

new_content = f"---\n{frontmatter}\n---{body}"

if not dry_run:
track_file.write_text(new_content, encoding='utf-8')
logger.info(f"Updated track {track_code}: {len(skills)} skills")
else:
logger.info(f"[DRY RUN] Would update track {track_code}: {len(skills)} skills")

return True

except Exception as e:
logger.error(f"Failed to update track {track_code}: {e}")
return False

def update_all_tracks(self, dry_run: bool = False, include_empty: bool = True) -> Tuple[int, int, int]:
"""
Update all track files with their associated skills.

Args:
dry_run: If True, don't actually write changes
include_empty: If True, also update tracks with no associated skills

Returns:
Tuple of (success_count, fail_count, empty_count)
"""
if not self._track_to_skills:
self.build_track_mappings()

# Discover all track files
all_tracks = self.discover_all_track_files()

success = 0
failed = 0
empty = 0

# Process tracks with skills
for track_code in sorted(self._track_to_skills.keys()):
if self.update_track_frontmatter(track_code, dry_run=dry_run):
success += 1
else:
failed += 1

# Process tracks without skills (if include_empty)
if include_empty:
for track_code in sorted(all_tracks.keys()):
if track_code not in self._track_to_skills:
# Update with empty skills list
if self.update_track_frontmatter(track_code, dry_run=dry_run, skills=[]):
empty += 1
else:
failed += 1

total_tracks = len(all_tracks)
tracks_with_skills = len(self._track_to_skills)

logger.info(f"Track updates: {success} with skills, {empty} empty, {failed} failed (total files: {total_tracks})")
return success, failed, empty

def generate_track_skills_index(self) -> str:
"""
Generate a markdown index of all tracks and their skills.

Returns:
Markdown content for the index
"""
if not self._track_to_skills:
self.build_track_mappings()

lines = [
"# Track-Skills Index",
"",
"Bi-lateral TRACK ↔ SKILL mapping for CODITECT Experience Framework.",
"",
f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
f"**Total Skills:** {len(self._skill_to_track)}",
f"**Total Tracks:** {len(self._track_to_skills)}",
"",
"---",
"",
]

# Group by category
categories = {"technical": [], "business": [], "extension": []}

for track_code in sorted(self._track_to_skills.keys()):
track_info = TRACK_DEFINITIONS.get(track_code, {})
category = track_info.get("category", "unknown")
if category in categories:
categories[category].append(track_code)

# Output by category
category_names = {
"technical": "Technical Tracks (A-N, T)",
"business": "PCF Business Tracks (O-AA)",
"extension": "Extension Tracks (AB-AK)"
}

for category, track_codes in categories.items():
if not track_codes:
continue

lines.extend([
f"## {category_names.get(category, category.title())}",
"",
"| Track | Name | Skills |",
"|-------|------|--------|",
])

for track_code in track_codes:
track_info = TRACK_DEFINITIONS.get(track_code, {})
name = track_info.get("name", "Unknown")
skills = self._track_to_skills.get(track_code, [])
skill_count = len(skills)
skill_preview = ", ".join(skills[:3])
if len(skills) > 3:
skill_preview += f", ... ({len(skills) - 3} more)"

lines.append(f"| {track_code} | {name} | {skill_count}: {skill_preview} |")

lines.append("")

# Full skill listings
lines.extend([
"---",
"",
"## Detailed Skill Mappings",
"",
])

for track_code in sorted(self._track_to_skills.keys()):
track_info = TRACK_DEFINITIONS.get(track_code, {})
name = track_info.get("name", "Unknown")
skills = self._track_to_skills.get(track_code, [])

lines.extend([
f"### Track {track_code}: {name}",
"",
f"**Skills ({len(skills)}):**",
"",
])

for skill in skills:
mapping = self._skill_to_track.get(skill)
if mapping:
lines.append(f"- `{skill}` → `{mapping.full_track_id}`")
else:
lines.append(f"- `{skill}`")

lines.append("")

return "\n".join(lines)

def save_track_skills_index(self, output_path: Optional[Path] = None, dry_run: bool = False) -> Path:
"""
Save the track-skills index to a file.

Args:
output_path: Where to save the index (default: internal/project/track-skills-index.md)
dry_run: If True, don't actually write the file

Returns:
Path to the saved file
"""
if output_path is None:
output_path = self.project_root / "internal" / "project" / "track-skills-index.md"

content = self.generate_track_skills_index()

if not dry_run:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(content, encoding='utf-8')
logger.info(f"Saved track-skills index: {output_path}")
else:
logger.info(f"[DRY RUN] Would save track-skills index: {output_path}")

return output_path

def _is_valid_track(self, track_code: str) -> bool:
"""Check if a track code is valid (including legacy)."""
return (
track_code in TRACK_DEFINITIONS or
track_code in LEGACY_TRACK_DEFINITIONS or
track_code in TRACK_CODE_ALIASES
)

def discover_all_track_files(self) -> Dict[str, Path]:
"""
Discover all track files across all track directories.

Returns:
Dictionary mapping track codes to file paths
"""
track_files = {}

search_dirs = [
self.tracks_dir,
self.pilot_tracks_dir,
self.pilot_tracks_dir / "completed" if self.pilot_tracks_dir.exists() else None
]
search_dirs = [d for d in search_dirs if d and d.exists()]

for search_dir in search_dirs:
for file_path in search_dir.glob("*.md"):
name = file_path.name

# Skip non-track files
if name in ['README.md', 'CLAUDE.md', 'CHANGELOG.md', 'MASTER-TRACK-INDEX.md',
'TOOLS_TRACKING_ADDITIONS_SUMMARY.md']:
continue

# Extract track code from filename
# Patterns: TRACK-A-*.md, track-a-*.md, track-aa-*.md, TRACK-A.md
track_code = None

# Try TRACK-X pattern
match = re.match(r'^[Tt][Rr][Aa][Cc][Kk]-([A-Z0-9]+)', name)
if match:
track_code = match.group(1)
else:
# Try track-x pattern
match = re.match(r'^[Tt][Rr][Aa][Cc][Kk]-([a-z0-9]+)', name)
if match:
track_code = match.group(1).upper()

if track_code:
# Handle duplicates (e.g., track-f vs track-f-documentation)
if track_code not in track_files:
track_files[track_code] = file_path
logger.debug(f"Found track file: {track_code} → {file_path}")

self._all_track_files = track_files
logger.info(f"Discovered {len(track_files)} track files")
return track_files

def validate_consistency(self) -> List[str]:
"""
Validate bi-lateral mapping consistency.

Returns:
List of validation errors (empty if consistent)
"""
errors = []

if not self._skill_to_track:
self.scan_skills()
if not self._track_to_skills:
self.build_track_mappings()

# Check for skills pointing to non-existent tracks
for skill_name, mapping in self._skill_to_track.items():
if not self._is_valid_track(mapping.track_code):
errors.append(f"Skill '{skill_name}' references unknown track '{mapping.track_code}'")

# Check for orphaned skills (skills not in any track's list)
all_tracked_skills = set()
for skills in self._track_to_skills.values():
all_tracked_skills.update(skills)

for skill_name in self._skill_to_track:
if skill_name not in all_tracked_skills:
errors.append(f"Skill '{skill_name}' not found in any track's skill list")

return errors

Convenience functions for CLI integration

def update_track_mappings(skills_dir: Optional[Path] = None, project_root: Optional[Path] = None, dry_run: bool = False, save_index: bool = True, include_empty_tracks: bool = True) -> Tuple[int, int, int]: """ Main entry point for updating track mappings.

Args:
skills_dir: Directory containing skills (default: ./skills)
project_root: Project root directory (default: cwd)
dry_run: If True, don't actually write changes
save_index: If True, save the track-skills index file
include_empty_tracks: If True, update tracks with no skills

Returns:
Tuple of (success_with_skills, failed, empty_tracks)
"""
registry = TrackRegistry(project_root=project_root)

# Discover all track files
logger.info("Discovering all track files...")
registry.discover_all_track_files()

# Scan all skills
logger.info("Scanning skills for track mappings...")
registry.scan_skills()

# Build track mappings
logger.info("Building track-to-skill mappings...")
registry.build_track_mappings()

# Validate consistency
errors = registry.validate_consistency()
if errors:
logger.warning("Consistency validation found issues:")
for error in errors:
logger.warning(f" - {error}")

# Update track files
logger.info("Updating track files...")
success, failed, empty = registry.update_all_tracks(dry_run=dry_run, include_empty=include_empty_tracks)

# Save index
if save_index:
registry.save_track_skills_index(dry_run=dry_run)

return success, failed, empty

if name == "main": # Simple CLI for testing import argparse

parser = argparse.ArgumentParser(description="Track Registry Manager")
parser.add_argument("--scan", action="store_true", help="Scan skills and show mappings")
parser.add_argument("--update", action="store_true", help="Update track files")
parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing")
parser.add_argument("--validate", action="store_true", help="Validate consistency")
parser.add_argument("--index", action="store_true", help="Generate and save index")
parser.add_argument("--list-tracks", action="store_true", help="List all discovered track files")

args = parser.parse_args()

logging.basicConfig(level=logging.INFO)

registry = TrackRegistry()

if args.list_tracks:
tracks = registry.discover_all_track_files()
print(f"\nšŸ“ Discovered {len(tracks)} track files:\n")
for code in sorted(tracks.keys()):
track_info = TRACK_DEFINITIONS.get(code, {})
name = track_info.get("name", "Unknown")
print(f" {code:3} - {name:30} → {tracks[code]}")
print()

if args.scan or args.update or args.validate or args.index:
registry.discover_all_track_files()
registry.scan_skills()
registry.build_track_mappings()

total_tracks = len(registry._all_track_files)
mapped_tracks = len(registry._track_to_skills)
total_skills = len(registry._skill_to_track)

print(f"\nšŸ“Š Summary:")
print(f" Total track files: {total_tracks}")
print(f" Tracks with skills: {mapped_tracks}")
print(f" Empty tracks: {total_tracks - mapped_tracks}")
print(f" Total skills mapped: {total_skills}\n")

print("Tracks with skills:")
for track_code in sorted(registry._track_to_skills.keys()):
skills = registry._track_to_skills[track_code]
track_info = TRACK_DEFINITIONS.get(track_code, {})
name = track_info.get("name", "Unknown")
print(f" Track {track_code} ({name}): {len(skills)} skills")

if args.validate:
errors = registry.validate_consistency()
if errors:
print("\nValidation errors:")
for error in errors:
print(f" āŒ {error}")
else:
print("\nāœ… All mappings are consistent")

if args.update:
success, failed, empty = registry.update_all_tracks(dry_run=args.dry_run, include_empty=True)
print(f"\nUpdated tracks: {success} with skills, {empty} empty, {failed} failed")

if args.index:
path = registry.save_track_skills_index(dry_run=args.dry_run)
print(f"\nIndex saved: {path}")