scripts-component-extractor

#!/usr/bin/env python3 """ CP-07: Component Node Extractor (ADR-151)

Extracts component entities from platform.db and creates kg_nodes:

node_type: 'component'
subtype: 'agent', 'skill', 'command', 'script', 'hook', etc.
Properties: path, description, model, tools, track

Source: platform.db components table Target: org.db kg_nodes table

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.1 """

import json import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

from .base_extractor import SQLiteSourceExtractor

logger = logging.getLogger(name)

class ComponentExtractor(SQLiteSourceExtractor): """ Extract components from platform.db into kg_nodes.

Components include: agents, skills, commands, scripts, hooks, documents, workflows
"""

@property
def node_type(self) -> str:
    return "component"

def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
    """
    Extract components from platform.db.

    Yields:
        Tuple of (node_id, name, subtype, properties, source_table, source_id)
    """
    conn = self.connect_source()

    # Query components with capabilities
    cursor = conn.execute("""
        SELECT
            c.id,
            c.type,
            c.name,
            c.path,
            c.description,
            c.category,
            c.subcategory,
            c.status,
            c.complexity,
            c.maturity,
            c.confidence,
            c.llm_provider,
            c.llm_model,
            c.llm_temperature,
            c.llm_max_tokens,
            c.tools_list,
            c.token_budget_recommended,
            c.invocation_method,
            c.parallel_safe,
            c.created_at,
            c.updated_at
        FROM components c
        ORDER BY c.type, c.name
    """)

    for row in cursor:
        component_id = row['id']
        component_type = row['type']
        name = row['name']

        # Build node_id: component:{type}/{name}
        # e.g., component:agent/senior-architect
        node_id = self.generate_node_id(component_id)

        # Determine subtype from component type
        subtype = self._normalize_subtype(component_type)

        # Build properties bag
        properties = {
            "path": row['path'],
            "description": row['description'],
            "category": row['category'],
            "subcategory": row['subcategory'],
            "status": row['status'],
            "complexity": row['complexity'],
            "maturity": row['maturity'],
            "confidence": row['confidence'],
        }

        # Add LLM-related properties if present
        if row['llm_provider']:
            properties["llm_provider"] = row['llm_provider']
        if row['llm_model']:
            properties["llm_model"] = row['llm_model']
        if row['llm_temperature']:
            properties["llm_temperature"] = row['llm_temperature']
        if row['llm_max_tokens']:
            properties["llm_max_tokens"] = row['llm_max_tokens']
        if row['tools_list']:
            try:
                properties["tools"] = json.loads(row['tools_list'])
            except json.JSONDecodeError:
                properties["tools_raw"] = row['tools_list']
        if row['token_budget_recommended']:
            properties["token_budget"] = row['token_budget_recommended']
        if row['invocation_method']:
            properties["invocation_method"] = row['invocation_method']
        if row['parallel_safe'] is not None:
            properties["parallel_safe"] = bool(row['parallel_safe'])

        # Clean None values
        properties = {k: v for k, v in properties.items() if v is not None}

        yield (
            node_id,
            name,
            subtype,
            properties,
            "components",  # source_table
            component_id,  # source_id
        )

def _normalize_subtype(self, component_type: str) -> str:
    """
    Normalize component type to subtype.

    Maps: document -> document, workflow-json -> workflow, etc.
    """
    type_map = {
        "agent": "agent",
        "skill": "skill",
        "command": "command",
        "script": "script",
        "hook": "hook",
        "document": "document",
        "workflow": "workflow",
        "workflow-json": "workflow",
    }
    return type_map.get(component_type, component_type)

def get_capabilities(self, component_id: str) -> list:
    """Get capabilities for a component."""
    conn = self.connect_source()
    cursor = conn.execute(
        "SELECT capability, capability_type FROM capabilities WHERE component_id = ?",
        (component_id,)
    )
    return [{"capability": row[0], "type": row[1]} for row in cursor]