scripts-belongs-to-edge-builder

#!/usr/bin/env python3 """ CP-19: BELONGS_TO Edge Builder (ADR-151)

Creates BELONGS_TO edges from Component nodes to Track nodes.

Edge: component:X -> track:Y Source: Component frontmatter (track: field) Properties: assignment_source (frontmatter, inferred)

Maps components to their PILOT tracks (A-N) based on frontmatter metadata in agent, skill, command files.

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.5.4 """

import json import logging import re from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

import yaml

from .base_edge_builder import BaseEdgeBuilder

logger = logging.getLogger(name)

class BelongsToEdgeBuilder(BaseEdgeBuilder): """ Build BELONGS_TO edges from components to tracks.

Parses component files (agents, skills, commands) for track: frontmatter
and creates edges to the corresponding track nodes.
"""

def __init__(
    self,
    framework_root: Path,
    target_db_path: Path,
    dry_run: bool = False,
    tenant_id: Optional[str] = None,
    validate_nodes: bool = True,
):
    """
    Initialize with framework root directory.

    Args:
        framework_root: Path to coditect-core root
        target_db_path: Path to org.db for kg_edges
        dry_run: If True, don't write to database
        tenant_id: Optional tenant ID
        validate_nodes: If True, verify nodes exist
    """
    super().__init__(target_db_path, dry_run, tenant_id, validate_nodes)
    self.framework_root = framework_root

@property
def edge_type(self) -> str:
    return "BELONGS_TO"

def _parse_frontmatter(self, content: str) -> Optional[Dict]:
    """Extract YAML frontmatter from markdown content."""
    match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
    if match:
        try:
            return yaml.safe_load(match.group(1))
        except yaml.YAMLError:
            return None
    return None

def _extract_track_from_file(self, file_path: Path) -> Optional[str]:
    """
    Extract track letter from component file frontmatter.

    Returns track letter (A-N, O-AA) or None if not found.
    """
    try:
        content = file_path.read_text(encoding='utf-8')
        frontmatter = self._parse_frontmatter(content)
        if frontmatter:
            track = frontmatter.get('track')
            if track:
                # Normalize track letter (handle "Track A" or just "A")
                track_str = str(track).strip().upper()
                if track_str.startswith('TRACK '):
                    track_str = track_str[6:]
                return track_str
    except Exception as e:
        logger.debug(f"Could not parse {file_path}: {e}")
    return None

def _infer_track_from_path(self, file_path: Path, component_type: str) -> Optional[str]:
    """
    Infer track from file path or component type.

    Fallback when no explicit track: frontmatter.
    """
    # Track inference rules based on component type and location
    path_str = str(file_path).lower()

    if 'security' in path_str:
        return 'D'
    if 'test' in path_str:
        return 'E'
    if 'doc' in path_str:
        return 'F'
    if 'devops' in path_str or 'deploy' in path_str or 'docker' in path_str:
        return 'C'
    if 'frontend' in path_str or 'react' in path_str or 'ui' in path_str:
        return 'B'
    if 'backend' in path_str or 'api' in path_str or 'django' in path_str:
        return 'A'

    return None

def extract_edges(self) -> Generator[Tuple[str, str, Dict[str, Any]], None, None]:
    """
    Extract BELONGS_TO edges from component files.

    Scans agents/, skills/, commands/ directories for track: frontmatter.

    Yields:
        Tuple of (from_node_id, to_node_id, properties)
    """
    # Component directories to scan
    component_dirs = [
        ('agents', 'agent'),
        ('skills', 'skill'),
        ('commands', 'command'),
        ('hooks', 'hook'),
    ]

    for dir_name, component_type in component_dirs:
        dir_path = self.framework_root / dir_name
        if not dir_path.exists():
            continue

        # Find markdown files
        patterns = ['*.md', '**/SKILL.md'] if dir_name == 'skills' else ['*.md']

        for pattern in patterns:
            for file_path in dir_path.glob(pattern):
                # Skip non-component files
                if file_path.name.startswith('_') or file_path.name == 'README.md':
                    continue

                # Extract track
                track = self._extract_track_from_file(file_path)
                assignment_source = 'frontmatter'

                if not track:
                    track = self._infer_track_from_path(file_path, component_type)
                    assignment_source = 'inferred'

                if not track:
                    continue

                # Generate node IDs
                # Component node: component:{type}/{name}
                component_name = file_path.stem
                if file_path.name == 'SKILL.md':
                    component_name = file_path.parent.name

                from_node = f"component:{component_type}/{component_name}"
                to_node = f"track:{track}"

                properties = {
                    'assignment_source': assignment_source,
                    'file_path': str(file_path.relative_to(self.framework_root)),
                }

                yield (from_node, to_node, properties)

def extract_edges_from_kg_nodes(self) -> Generator[Tuple[str, str, Dict[str, Any]], None, None]:
    """
    Alternative: Extract track assignments from existing kg_nodes properties.

    This uses the properties already extracted during node population
    instead of re-parsing files.
    """
    conn = self.connect_target()

    try:
        cursor = conn.execute("""
            SELECT id, node_type, subtype, name, properties
            FROM kg_nodes
            WHERE node_type = 'component'
        """)

        for row in cursor:
            node_id = row['id']
            properties_json = row['properties']

            if not properties_json:
                continue

            try:
                properties = json.loads(properties_json)
            except json.JSONDecodeError:
                continue

            track = properties.get('track')
            if not track:
                continue

            from_node = node_id
            to_node = f"track:{track.upper()}"

            edge_properties = {
                'assignment_source': 'node_properties',
            }

            yield (from_node, to_node, edge_properties)

    except Exception as e:
        logger.error(f"Error extracting BELONGS_TO edges from kg_nodes: {e}")
        return