Skip to main content

scripts-belongs-to-edge-builder

#!/usr/bin/env python3 """ CP-19: BELONGS_TO Edge Builder (ADR-151)

Creates BELONGS_TO edges from Component nodes to Track nodes.

Edge: component:X -> track:Y Source: Component frontmatter (track: field) Properties: assignment_source (frontmatter, inferred)

Maps components to their PILOT tracks (A-N) based on frontmatter metadata in agent, skill, command files.

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.5.4 """

import json import logging import re from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

import yaml

from .base_edge_builder import BaseEdgeBuilder

logger = logging.getLogger(name)

class BelongsToEdgeBuilder(BaseEdgeBuilder): """ Build BELONGS_TO edges from components to tracks.

Parses component files (agents, skills, commands) for track: frontmatter
and creates edges to the corresponding track nodes.
"""

def __init__(
self,
framework_root: Path,
target_db_path: Path,
dry_run: bool = False,
tenant_id: Optional[str] = None,
validate_nodes: bool = True,
):
"""
Initialize with framework root directory.

Args:
framework_root: Path to coditect-core root
target_db_path: Path to org.db for kg_edges
dry_run: If True, don't write to database
tenant_id: Optional tenant ID
validate_nodes: If True, verify nodes exist
"""
super().__init__(target_db_path, dry_run, tenant_id, validate_nodes)
self.framework_root = framework_root

@property
def edge_type(self) -> str:
return "BELONGS_TO"

def _parse_frontmatter(self, content: str) -> Optional[Dict]:
"""Extract YAML frontmatter from markdown content."""
match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if match:
try:
return yaml.safe_load(match.group(1))
except yaml.YAMLError:
return None
return None

def _extract_track_from_file(self, file_path: Path) -> Optional[str]:
"""
Extract track letter from component file frontmatter.

Returns track letter (A-N, O-AA) or None if not found.
"""
try:
content = file_path.read_text(encoding='utf-8')
frontmatter = self._parse_frontmatter(content)
if frontmatter:
track = frontmatter.get('track')
if track:
# Normalize track letter (handle "Track A" or just "A")
track_str = str(track).strip().upper()
if track_str.startswith('TRACK '):
track_str = track_str[6:]
return track_str
except Exception as e:
logger.debug(f"Could not parse {file_path}: {e}")
return None

def _infer_track_from_path(self, file_path: Path, component_type: str) -> Optional[str]:
"""
Infer track from file path or component type.

Fallback when no explicit track: frontmatter.
"""
# Track inference rules based on component type and location
path_str = str(file_path).lower()

if 'security' in path_str:
return 'D'
if 'test' in path_str:
return 'E'
if 'doc' in path_str:
return 'F'
if 'devops' in path_str or 'deploy' in path_str or 'docker' in path_str:
return 'C'
if 'frontend' in path_str or 'react' in path_str or 'ui' in path_str:
return 'B'
if 'backend' in path_str or 'api' in path_str or 'django' in path_str:
return 'A'

return None

def extract_edges(self) -> Generator[Tuple[str, str, Dict[str, Any]], None, None]:
"""
Extract BELONGS_TO edges from component files.

Scans agents/, skills/, commands/ directories for track: frontmatter.

Yields:
Tuple of (from_node_id, to_node_id, properties)
"""
# Component directories to scan
component_dirs = [
('agents', 'agent'),
('skills', 'skill'),
('commands', 'command'),
('hooks', 'hook'),
]

for dir_name, component_type in component_dirs:
dir_path = self.framework_root / dir_name
if not dir_path.exists():
continue

# Find markdown files
patterns = ['*.md', '**/SKILL.md'] if dir_name == 'skills' else ['*.md']

for pattern in patterns:
for file_path in dir_path.glob(pattern):
# Skip non-component files
if file_path.name.startswith('_') or file_path.name == 'README.md':
continue

# Extract track
track = self._extract_track_from_file(file_path)
assignment_source = 'frontmatter'

if not track:
track = self._infer_track_from_path(file_path, component_type)
assignment_source = 'inferred'

if not track:
continue

# Generate node IDs
# Component node: component:{type}/{name}
component_name = file_path.stem
if file_path.name == 'SKILL.md':
component_name = file_path.parent.name

from_node = f"component:{component_type}/{component_name}"
to_node = f"track:{track}"

properties = {
'assignment_source': assignment_source,
'file_path': str(file_path.relative_to(self.framework_root)),
}

yield (from_node, to_node, properties)

def extract_edges_from_kg_nodes(self) -> Generator[Tuple[str, str, Dict[str, Any]], None, None]:
"""
Alternative: Extract track assignments from existing kg_nodes properties.

This uses the properties already extracted during node population
instead of re-parsing files.
"""
conn = self.connect_target()

try:
cursor = conn.execute("""
SELECT id, node_type, subtype, name, properties
FROM kg_nodes
WHERE node_type = 'component'
""")

for row in cursor:
node_id = row['id']
properties_json = row['properties']

if not properties_json:
continue

try:
properties = json.loads(properties_json)
except json.JSONDecodeError:
continue

track = properties.get('track')
if not track:
continue

from_node = node_id
to_node = f"track:{track.upper()}"

edge_properties = {
'assignment_source': 'node_properties',
}

yield (from_node, to_node, edge_properties)

except Exception as e:
logger.error(f"Error extracting BELONGS_TO edges from kg_nodes: {e}")
return