Skip to main content

#!/usr/bin/env python3 """ CP-10: Track Node Extractor (ADR-151)

Creates track nodes for PILOT tracks:

  • Technical Tracks: A-N (14 tracks)
  • PCF Business Tracks: O-AA (13 tracks)
  • Extension Tracks: AB-AK (10 tracks)

Total: 37 tracks

node_type: 'track' Properties: name, domain, status, progress_percent, tier

Source: Static definition + TRACK files Target: org.db kg_nodes table

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.8 """

import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

from .base_extractor import BaseExtractor

logger = logging.getLogger(name)

Track definitions per ADR-054 and CODITECT-STANDARD-TRACK-NOMENCLATURE

TECHNICAL_TRACKS = { "A": {"name": "Backend API", "domain": "Django, REST APIs, databases", "tier": 1}, "B": {"name": "Frontend UI", "domain": "React, TypeScript, dashboards", "tier": 1}, "C": {"name": "DevOps/Infrastructure", "domain": "Docker, K8s, CI/CD, Terraform", "tier": 1}, "D": {"name": "Security", "domain": "Auth, audit, compliance", "tier": 1}, "E": {"name": "Testing/QA", "domain": "E2E, unit tests, validation", "tier": 1}, "F": {"name": "Documentation", "domain": "Docs, guides, API reference", "tier": 1}, "G": {"name": "DMS Product", "domain": "Document management, GitHub OAuth", "tier": 1}, "H": {"name": "Framework Autonomy", "domain": "MoE, CEF, orchestration", "tier": 1}, "I": {"name": "UI Components", "domain": "React components, design system", "tier": 1}, "J": {"name": "Memory Intelligence", "domain": "Context, sessions, /cx, /cxq", "tier": 1}, "K": {"name": "Workflow Automation", "domain": "n8n, scheduling, automation", "tier": 1}, "L": {"name": "Extended Testing", "domain": "Performance, load, security testing", "tier": 1}, "M": {"name": "Extended Security", "domain": "Penetration, compliance, audit", "tier": 1}, "N": {"name": "GTM/Launch", "domain": "Marketing, sales, onboarding", "tier": 1}, }

PCF_BUSINESS_TRACKS = { "O": {"name": "Vision & Strategy", "domain": "Strategy, planning, roadmap", "tier": 2}, "P": {"name": "Products & Services", "domain": "Product management, lifecycle", "tier": 2}, "Q": {"name": "Marketing", "domain": "Brand, campaigns, content", "tier": 2}, "R": {"name": "Sales", "domain": "Pipeline, CRM, deals", "tier": 2}, "S": {"name": "Customer Service", "domain": "Support, success, retention", "tier": 2}, "T": {"name": "Tools Integration", "domain": "MCP, ADK, integrations", "tier": 2}, "U": {"name": "Procurement", "domain": "Vendors, purchasing, contracts", "tier": 2}, "V": {"name": "Human Capital", "domain": "HR, recruiting, org development", "tier": 2}, "W": {"name": "IT Management", "domain": "Infrastructure, security, support", "tier": 2}, "X": {"name": "Financial Management", "domain": "Accounting, budgets, planning", "tier": 2}, "Y": {"name": "Legal", "domain": "Contracts, compliance, IP", "tier": 2}, "Z": {"name": "Risk Management", "domain": "Risk assessment, mitigation", "tier": 2}, "AA": {"name": "Business Capabilities", "domain": "Process, knowledge, quality", "tier": 2}, }

EXTENSION_TRACKS = { "AB": {"name": "AI/ML Integration", "domain": "LLMs, RAG, MLops", "tier": 3}, "AC": {"name": "Analytics", "domain": "Metrics, dashboards, reporting", "tier": 3}, "AD": {"name": "Data Engineering", "domain": "ETL, pipelines, warehousing", "tier": 3}, "AE": {"name": "Mobile", "domain": "iOS, Android, React Native", "tier": 3}, "AF": {"name": "IoT", "domain": "Sensors, edge, embedded", "tier": 3}, "AG": {"name": "Blockchain", "domain": "Smart contracts, Web3", "tier": 3}, "AH": {"name": "AR/VR", "domain": "Augmented/virtual reality", "tier": 3}, "AI": {"name": "Gaming", "domain": "Game engines, mechanics", "tier": 3}, "AJ": {"name": "Robotics", "domain": "Automation, control systems", "tier": 3}, "AK": {"name": "Custom", "domain": "Customer-specific tracks", "tier": 3}, }

class TrackExtractor(BaseExtractor): """ Create track nodes from static definitions.

Tracks are predefined in ADR-054 and the Track Nomenclature Standard.
"""

def __init__(
self,
target_db_path: Path,
track_files_dir: Optional[Path] = None,
dry_run: bool = False,
tenant_id: Optional[str] = None,
project_id: Optional[str] = None,
):
super().__init__(target_db_path, dry_run, tenant_id, project_id)
self.track_files_dir = track_files_dir

@property
def node_type(self) -> str:
return "track"

def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
"""
Generate track nodes from static definitions.

Yields:
Tuple of (node_id, name, subtype, properties, source_table, source_id)
"""
# Combine all tracks
all_tracks = {}
all_tracks.update(TECHNICAL_TRACKS)
all_tracks.update(PCF_BUSINESS_TRACKS)
all_tracks.update(EXTENSION_TRACKS)

for track_letter, track_info in all_tracks.items():
node_id = self.generate_node_id(track_letter)
name = f"Track {track_letter}: {track_info['name']}"

# Determine subtype from tier
tier = track_info['tier']
if tier == 1:
subtype = "technical"
elif tier == 2:
subtype = "business"
else:
subtype = "extension"

# Build properties
properties = {
"track_letter": track_letter,
"track_name": track_info['name'],
"domain": track_info['domain'],
"tier": tier,
"status": "active", # Default status
"progress_percent": 0.0, # Will be updated from TRACK files if available
}

# Try to get progress from TRACK files if directory provided
if self.track_files_dir:
track_file_info = self._get_track_file_info(track_letter)
if track_file_info:
properties.update(track_file_info)

yield (
node_id,
name,
subtype,
properties,
None, # No source table (static definition)
track_letter, # Use track letter as source_id
)

def _get_track_file_info(self, track_letter: str) -> Optional[Dict[str, Any]]:
"""
Try to read progress info from TRACK-{letter}-*.md files.

Returns:
Dict with progress_percent, status if found
"""
if not self.track_files_dir or not self.track_files_dir.exists():
return None

# Look for TRACK-{letter}-*.md
pattern = f"TRACK-{track_letter}-*.md"
track_files = list(self.track_files_dir.glob(pattern))

if not track_files:
return None

# Read the first matching file
track_file = track_files[0]
try:
content = track_file.read_text()
# Parse YAML frontmatter or look for progress indicators
# This is a simplified parser
info = {"track_file": str(track_file)}

# Look for progress percentage in content
import re
progress_match = re.search(r'progress[:\s]+(\d+(?:\.\d+)?)\s*%', content, re.IGNORECASE)
if progress_match:
info["progress_percent"] = float(progress_match.group(1))

# Look for status
status_match = re.search(r'status[:\s]+(active|completed|paused|deferred)', content, re.IGNORECASE)
if status_match:
info["status"] = status_match.group(1).lower()

return info
except Exception as e:
logger.debug(f"Could not read track file {track_file}: {e}")
return None