Skip to main content

scripts-component-extractor

#!/usr/bin/env python3 """ CP-07: Component Node Extractor (ADR-151)

Extracts component entities from platform.db and creates kg_nodes:

  • node_type: 'component'
  • subtype: 'agent', 'skill', 'command', 'script', 'hook', etc.
  • Properties: path, description, model, tools, track

Source: platform.db components table Target: org.db kg_nodes table

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.1 """

import json import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

from .base_extractor import SQLiteSourceExtractor

logger = logging.getLogger(name)

class ComponentExtractor(SQLiteSourceExtractor): """ Extract components from platform.db into kg_nodes.

Components include: agents, skills, commands, scripts, hooks, documents, workflows
"""

@property
def node_type(self) -> str:
return "component"

def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
"""
Extract components from platform.db.

Yields:
Tuple of (node_id, name, subtype, properties, source_table, source_id)
"""
conn = self.connect_source()

# Query components with capabilities
cursor = conn.execute("""
SELECT
c.id,
c.type,
c.name,
c.path,
c.description,
c.category,
c.subcategory,
c.status,
c.complexity,
c.maturity,
c.confidence,
c.llm_provider,
c.llm_model,
c.llm_temperature,
c.llm_max_tokens,
c.tools_list,
c.token_budget_recommended,
c.invocation_method,
c.parallel_safe,
c.created_at,
c.updated_at
FROM components c
ORDER BY c.type, c.name
""")

for row in cursor:
component_id = row['id']
component_type = row['type']
name = row['name']

# Build node_id: component:{type}/{name}
# e.g., component:agent/senior-architect
node_id = self.generate_node_id(component_id)

# Determine subtype from component type
subtype = self._normalize_subtype(component_type)

# Build properties bag
properties = {
"path": row['path'],
"description": row['description'],
"category": row['category'],
"subcategory": row['subcategory'],
"status": row['status'],
"complexity": row['complexity'],
"maturity": row['maturity'],
"confidence": row['confidence'],
}

# Add LLM-related properties if present
if row['llm_provider']:
properties["llm_provider"] = row['llm_provider']
if row['llm_model']:
properties["llm_model"] = row['llm_model']
if row['llm_temperature']:
properties["llm_temperature"] = row['llm_temperature']
if row['llm_max_tokens']:
properties["llm_max_tokens"] = row['llm_max_tokens']
if row['tools_list']:
try:
properties["tools"] = json.loads(row['tools_list'])
except json.JSONDecodeError:
properties["tools_raw"] = row['tools_list']
if row['token_budget_recommended']:
properties["token_budget"] = row['token_budget_recommended']
if row['invocation_method']:
properties["invocation_method"] = row['invocation_method']
if row['parallel_safe'] is not None:
properties["parallel_safe"] = bool(row['parallel_safe'])

# Clean None values
properties = {k: v for k, v in properties.items() if v is not None}

yield (
node_id,
name,
subtype,
properties,
"components", # source_table
component_id, # source_id
)

def _normalize_subtype(self, component_type: str) -> str:
"""
Normalize component type to subtype.

Maps: document -> document, workflow-json -> workflow, etc.
"""
type_map = {
"agent": "agent",
"skill": "skill",
"command": "command",
"script": "script",
"hook": "hook",
"document": "document",
"workflow": "workflow",
"workflow-json": "workflow",
}
return type_map.get(component_type, component_type)

def get_capabilities(self, component_id: str) -> list:
"""Get capabilities for a component."""
conn = self.connect_source()
cursor = conn.execute(
"SELECT capability, capability_type FROM capabilities WHERE component_id = ?",
(component_id,)
)
return [{"capability": row[0], "type": row[1]} for row in cursor]