#!/usr/bin/env python3 """ H.1.7: Component Metadata Registry
Unified metadata registry that consolidates all component metadata sources:
- framework-registry.json (component definitions)
- capability-registry.json (capability taxonomy)
- component-activation-status.json (activation state)
- component-indexer SQLite database (FTS search, capabilities)
- agent-cards/*.json (A2A protocol metadata)
Provides a single API for querying component metadata with caching, versioning, and integration with the discovery service.
Usage: python3 scripts/core/component_metadata_registry.py --stats python3 scripts/core/component_metadata_registry.py --get agent/orchestrator python3 scripts/core/component_metadata_registry.py --list agents python3 scripts/core/component_metadata_registry.py --search "security" python3 scripts/core/component_metadata_registry.py --capabilities agent/orchestrator python3 scripts/core/component_metadata_registry.py --refresh # Reload from sources """
import argparse import hashlib import json import sqlite3 from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple from enum import Enum
=============================================================================
Configuration
=============================================================================
SCRIPT_DIR = Path(file).parent ROOT_DIR = SCRIPT_DIR.parent.parent
Metadata source paths
FRAMEWORK_REGISTRY = ROOT_DIR / "config" / "framework-registry.json" CAPABILITY_REGISTRY = ROOT_DIR / "config" / "capability-registry.json" ACTIVATION_STATUS = ROOT_DIR / "config" / "component-activation-status.json" AGENT_CARDS_DIR = ROOT_DIR / "config" / "agent-cards" AGENTS_JSON_DIR = ROOT_DIR / "config" / "agents" COMMANDS_JSON_DIR = ROOT_DIR / "config" / "commands" SKILLS_JSON_DIR = ROOT_DIR / "config" / "skills"
ADR-114 & ADR-118: Use centralized path discovery
import sys sys.path.insert(0, str(SCRIPT_DIR)) try: from paths import get_platform_db_path, PLATFORM_DB DB_PATH = PLATFORM_DB # Component data goes to platform.db (Tier 1) except ImportError: # Fallback for backward compatibility _user_data = Path.home() / "PROJECTS" / ".coditect-data" / "context-storage" if _user_data.exists(): PLATFORM_DB = _user_data / "platform.db" else: PLATFORM_DB = Path.home() / ".coditect" / "context-storage" / "platform.db" DB_PATH = PLATFORM_DB # Backward compatibility alias
=============================================================================
Enums and Data Classes
=============================================================================
class ComponentType(Enum): """Component types in the framework.""" AGENT = "agent" COMMAND = "command" SKILL = "skill" SCRIPT = "script" HOOK = "hook" WORKFLOW = "workflow" PROMPT = "prompt"
class ComponentStatus(Enum): """Component operational status.""" OPERATIONAL = "operational" DEGRADED = "degraded" MAINTENANCE = "maintenance" DEPRECATED = "deprecated" EXPERIMENTAL = "experimental"
class ActivationState(Enum): """Component activation state.""" ACTIVATED = "activated" DEACTIVATED = "deactivated"
@dataclass class LLMBinding: """LLM configuration for a component.""" provider: str = "anthropic-claude" model: str = "sonnet" temperature: float = 0.7 max_tokens: int = 4096
@classmethod
def from_dict(cls, data: Dict) -> "LLMBinding":
return cls(
provider=data.get("provider", "anthropic-claude"),
model=data.get("model", "sonnet"),
temperature=data.get("temperature", 0.7),
max_tokens=data.get("max_tokens", 4096),
)
@dataclass class CapabilityInfo: """Capability information for a component.""" name: str type: str # primary, domain, action, tag confidence: float = 1.0 source: str = "registry" # registry, frontmatter, inferred
@dataclass class ComponentMetadata: """Unified component metadata.""" # Identity id: str # e.g., "agent/orchestrator" name: str type: ComponentType version: str = "1.0.0"
# Description
description: str = ""
category: str = ""
subcategory: str = ""
# Status
status: ComponentStatus = ComponentStatus.OPERATIONAL
activation: ActivationState = ActivationState.ACTIVATED
activated_at: Optional[str] = None
# LLM Configuration
llm_binding: Optional[LLMBinding] = None
# Capabilities
capabilities: List[CapabilityInfo] = field(default_factory=list)
use_cases: List[str] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
# Invocation
invocation_method: str = ""
tools: List[str] = field(default_factory=list)
# Relationships
invokes: List[str] = field(default_factory=list)
invoked_by: List[str] = field(default_factory=list)
alternatives: List[str] = field(default_factory=list)
complements: List[str] = field(default_factory=list)
# File info
path: str = ""
content_hash: str = ""
# Timestamps
created_at: Optional[str] = None
updated_at: Optional[str] = None
indexed_at: Optional[str] = None
# A2A Protocol
a2a_schema: str = ""
vendor_name: str = ""
vendor_url: str = ""
# Quality
maturity: str = "production"
documentation_quality: str = "partial"
confidence: float = 0.5
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
data = {
"id": self.id,
"name": self.name,
"type": self.type.value,
"version": self.version,
"description": self.description,
"category": self.category,
"subcategory": self.subcategory,
"status": self.status.value,
"activation": self.activation.value,
"activated_at": self.activated_at,
"capabilities": [
{"name": c.name, "type": c.type, "confidence": c.confidence, "source": c.source}
for c in self.capabilities
],
"use_cases": self.use_cases,
"tags": self.tags,
"invocation_method": self.invocation_method,
"tools": self.tools,
"invokes": self.invokes,
"invoked_by": self.invoked_by,
"alternatives": self.alternatives,
"complements": self.complements,
"path": self.path,
"maturity": self.maturity,
"documentation_quality": self.documentation_quality,
"confidence": self.confidence,
}
if self.llm_binding:
data["llm_binding"] = {
"provider": self.llm_binding.provider,
"model": self.llm_binding.model,
"temperature": self.llm_binding.temperature,
"max_tokens": self.llm_binding.max_tokens,
}
return data
=============================================================================
Registry Class
=============================================================================
class ComponentMetadataRegistry: """ Unified component metadata registry.
Consolidates metadata from multiple sources and provides
a single API for querying component information.
"""
def __init__(self, auto_load: bool = True):
"""Initialize the registry."""
self._components: Dict[str, ComponentMetadata] = {}
self._capability_taxonomy: Dict[str, Any] = {}
self._by_type: Dict[ComponentType, List[str]] = {t: [] for t in ComponentType}
self._by_capability: Dict[str, List[str]] = {}
self._version: str = "1.0.0"
self._loaded_at: Optional[str] = None
self._source_hashes: Dict[str, str] = {}
if auto_load:
self.load()
def load(self) -> None:
"""Load metadata from all sources."""
self._components.clear()
self._by_type = {t: [] for t in ComponentType}
self._by_capability.clear()
# Load in order of priority (later sources override earlier)
self._load_framework_registry()
self._load_activation_status()
self._load_capability_taxonomy()
self._load_agent_cards()
self._load_from_database()
# Build indexes
self._build_indexes()
self._loaded_at = datetime.now(timezone.utc).isoformat()
def _load_framework_registry(self) -> None:
"""Load from framework-registry.json."""
if not FRAMEWORK_REGISTRY.exists():
return
try:
with open(FRAMEWORK_REGISTRY, 'r', encoding='utf-8') as f:
data = json.load(f)
self._source_hashes["framework_registry"] = hashlib.sha256(
json.dumps(data, sort_keys=True).encode()
).hexdigest()[:16]
self._version = data.get("framework_version", "1.0.0")
for comp_type, type_data in data.get("components", {}).items():
try:
component_type = ComponentType(comp_type.rstrip('s')) # agents -> agent
except ValueError:
continue
# Handle both list and dict with categories
if isinstance(type_data, dict):
for category, components in type_data.get("categories", {}).items():
for comp in components:
self._process_framework_component(comp, component_type, category)
elif isinstance(type_data, list):
for comp in type_data:
self._process_framework_component(comp, component_type, "general")
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Failed to load framework registry: {e}")
def _process_framework_component(
self, comp: Dict, component_type: ComponentType, category: str
) -> None:
"""Process a component from framework registry."""
comp_id = f"{component_type.value}/{comp.get('id', comp.get('name', 'unknown'))}"
llm_binding = None
if "llm_binding" in comp:
llm_binding = LLMBinding.from_dict(comp["llm_binding"])
# Parse tools
tools = comp.get("tools", [])
if isinstance(tools, str):
tools = [t.strip() for t in tools.split(",") if t.strip()]
metadata = ComponentMetadata(
id=comp_id,
name=comp.get("name", comp.get("id", "unknown")),
type=component_type,
version=comp.get("version", "1.0.0"),
description=comp.get("description", ""),
category=category,
llm_binding=llm_binding,
use_cases=comp.get("use_cases", []),
tags=comp.get("tags", []),
invocation_method=comp.get("typical_invocation", ""),
tools=tools,
)
# Add capabilities from use_cases and tags
for uc in metadata.use_cases:
metadata.capabilities.append(CapabilityInfo(
name=uc, type="use_case", confidence=0.8, source="framework"
))
for tag in metadata.tags:
metadata.capabilities.append(CapabilityInfo(
name=tag, type="tag", confidence=0.7, source="framework"
))
self._components[comp_id] = metadata
def _load_activation_status(self) -> None:
"""Load activation status."""
if not ACTIVATION_STATUS.exists():
return
try:
with open(ACTIVATION_STATUS, 'r', encoding='utf-8') as f:
data = json.load(f)
self._source_hashes["activation_status"] = hashlib.sha256(
json.dumps(data, sort_keys=True).encode()
).hexdigest()[:16]
for comp in data.get("components", []):
comp_type = comp.get("type", "")
comp_name = comp.get("name", "")
comp_id = f"{comp_type}/{comp_name}"
if comp_id in self._components:
# Update existing
self._components[comp_id].activation = (
ActivationState.ACTIVATED if comp.get("activated", False)
else ActivationState.DEACTIVATED
)
self._components[comp_id].activated_at = comp.get("activated_at")
self._components[comp_id].path = comp.get("path", "")
status_str = comp.get("status", "operational")
try:
self._components[comp_id].status = ComponentStatus(status_str)
except ValueError:
pass
else:
# Create new from activation status
try:
component_type = ComponentType(comp_type)
except ValueError:
continue
self._components[comp_id] = ComponentMetadata(
id=comp_id,
name=comp_name,
type=component_type,
version=comp.get("version", "1.0.0"),
path=comp.get("path", ""),
activation=ActivationState.ACTIVATED if comp.get("activated", False) else ActivationState.DEACTIVATED,
activated_at=comp.get("activated_at"),
)
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Failed to load activation status: {e}")
def _load_capability_taxonomy(self) -> None:
"""Load capability taxonomy."""
if not CAPABILITY_REGISTRY.exists():
return
try:
with open(CAPABILITY_REGISTRY, 'r', encoding='utf-8') as f:
data = json.load(f)
self._capability_taxonomy = data.get("taxonomy", {})
# Also load component capabilities if present
for comp_id, comp_caps in data.get("components", {}).items():
if comp_id in self._components:
for cap in comp_caps.get("capabilities", []):
self._components[comp_id].capabilities.append(CapabilityInfo(
name=cap, type="primary", confidence=0.9, source="capability_registry"
))
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Failed to load capability taxonomy: {e}")
def _load_agent_cards(self) -> None:
"""Load A2A protocol agent cards."""
if not AGENT_CARDS_DIR.exists():
return
for card_path in AGENT_CARDS_DIR.glob("*.json"):
try:
with open(card_path, 'r', encoding='utf-8') as f:
data = json.load(f)
agent_name = card_path.stem
comp_id = f"agent/{agent_name}"
if comp_id in self._components:
# Enrich existing
comp = self._components[comp_id]
if "$schema" in data:
comp.a2a_schema = data["$schema"]
agent_info = data.get("agent", {})
if agent_info.get("version"):
comp.version = agent_info["version"]
vendor = agent_info.get("vendor", {})
comp.vendor_name = vendor.get("name", "")
comp.vendor_url = vendor.get("url", "")
caps = data.get("capabilities", {})
for skill in caps.get("skills", []):
comp.capabilities.append(CapabilityInfo(
name=skill, type="skill", confidence=0.95, source="agent_card"
))
for domain in caps.get("domains", []):
comp.capabilities.append(CapabilityInfo(
name=domain, type="domain", confidence=0.95, source="agent_card"
))
composability = data.get("composability", {})
comp.invokes = composability.get("can_orchestrate", [])
comp.invoked_by = composability.get("can_be_orchestrated_by", [])
except (json.JSONDecodeError, IOError):
continue
def _load_from_database(self) -> None:
"""Load/enrich from SQLite database."""
if not DB_PATH.exists():
return
try:
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
# Load components
cursor = conn.execute("""
SELECT id, name, type, description, path, status,
llm_model, llm_provider, llm_temperature, llm_max_tokens,
tools_list, maturity, documentation_quality, confidence,
content_hash, indexed_at
FROM components
""")
for row in cursor:
comp_id = row["id"]
if comp_id in self._components:
# Enrich existing
comp = self._components[comp_id]
if not comp.description and row["description"]:
comp.description = row["description"]
if row["llm_model"] and not comp.llm_binding:
comp.llm_binding = LLMBinding(
provider=row["llm_provider"] or "anthropic-claude",
model=row["llm_model"],
temperature=row["llm_temperature"] or 0.7,
max_tokens=row["llm_max_tokens"] or 4096,
)
comp.content_hash = row["content_hash"] or ""
comp.indexed_at = row["indexed_at"]
comp.maturity = row["maturity"] or "production"
comp.documentation_quality = row["documentation_quality"] or "partial"
comp.confidence = row["confidence"] or 0.5
# Parse tools
if row["tools_list"]:
try:
tools = json.loads(row["tools_list"])
comp.tools = tools if isinstance(tools, list) else []
except json.JSONDecodeError:
pass
# Load capabilities
cursor = conn.execute("""
SELECT component_id, capability, capability_type
FROM capabilities
""")
for row in cursor:
comp_id = row["component_id"]
if comp_id in self._components:
self._components[comp_id].capabilities.append(CapabilityInfo(
name=row["capability"],
type=row["capability_type"],
confidence=0.85,
source="database"
))
# Load relationships
cursor = conn.execute("""
SELECT source_id, target_id, relationship_type
FROM component_relationships
""")
for row in cursor:
source_id = row["source_id"]
if source_id in self._components:
rel_type = row["relationship_type"]
target = row["target_id"]
if rel_type == "invokes":
self._components[source_id].invokes.append(target)
elif rel_type == "invoked_by":
self._components[source_id].invoked_by.append(target)
elif rel_type == "alternative":
self._components[source_id].alternatives.append(target)
elif rel_type == "complement":
self._components[source_id].complements.append(target)
conn.close()
except sqlite3.Error as e:
print(f"Warning: Failed to load from database: {e}")
def _build_indexes(self) -> None:
"""Build lookup indexes."""
self._by_type = {t: [] for t in ComponentType}
self._by_capability.clear()
for comp_id, comp in self._components.items():
# By type
self._by_type[comp.type].append(comp_id)
# By capability
for cap in comp.capabilities:
cap_key = cap.name.lower()
if cap_key not in self._by_capability:
self._by_capability[cap_key] = []
if comp_id not in self._by_capability[cap_key]:
self._by_capability[cap_key].append(comp_id)
# =========================================================================
# Public API
# =========================================================================
def get(self, component_id: str) -> Optional[ComponentMetadata]:
"""Get component metadata by ID."""
return self._components.get(component_id)
def get_by_name(self, name: str, component_type: Optional[ComponentType] = None) -> Optional[ComponentMetadata]:
"""Get component by name (searches all types if type not specified)."""
if component_type:
comp_id = f"{component_type.value}/{name}"
return self._components.get(comp_id)
# Search all types
for t in ComponentType:
comp_id = f"{t.value}/{name}"
if comp_id in self._components:
return self._components[comp_id]
return None
def list_by_type(
self,
component_type: ComponentType,
activated_only: bool = False
) -> List[ComponentMetadata]:
"""List all components of a given type."""
comp_ids = self._by_type.get(component_type, [])
components = [self._components[cid] for cid in comp_ids if cid in self._components]
if activated_only:
components = [c for c in components if c.activation == ActivationState.ACTIVATED]
return components
def list_by_capability(self, capability: str) -> List[ComponentMetadata]:
"""List all components with a given capability."""
cap_key = capability.lower()
comp_ids = self._by_capability.get(cap_key, [])
return [self._components[cid] for cid in comp_ids if cid in self._components]
def search(self, query: str, component_type: Optional[ComponentType] = None) -> List[ComponentMetadata]:
"""Search components by query string."""
query_lower = query.lower()
results = []
for comp_id, comp in self._components.items():
if component_type and comp.type != component_type:
continue
# Search in name, description, capabilities
if query_lower in comp.name.lower():
results.append(comp)
elif query_lower in comp.description.lower():
results.append(comp)
elif any(query_lower in cap.name.lower() for cap in comp.capabilities):
results.append(comp)
elif any(query_lower in tag.lower() for tag in comp.tags):
results.append(comp)
return results
def get_capabilities(self, component_id: str) -> List[CapabilityInfo]:
"""Get all capabilities for a component."""
comp = self._components.get(component_id)
return comp.capabilities if comp else []
def get_relationships(self, component_id: str) -> Dict[str, List[str]]:
"""Get all relationships for a component."""
comp = self._components.get(component_id)
if not comp:
return {}
return {
"invokes": comp.invokes,
"invoked_by": comp.invoked_by,
"alternatives": comp.alternatives,
"complements": comp.complements,
}
def get_capability_taxonomy(self) -> Dict[str, Any]:
"""Get the capability taxonomy."""
return self._capability_taxonomy
def get_stats(self) -> Dict[str, Any]:
"""Get registry statistics."""
by_type_counts = {t.value: len(ids) for t, ids in self._by_type.items()}
activated = sum(1 for c in self._components.values() if c.activation == ActivationState.ACTIVATED)
deactivated = len(self._components) - activated
total_capabilities = sum(len(c.capabilities) for c in self._components.values())
unique_capabilities = len(self._by_capability)
return {
"version": self._version,
"loaded_at": self._loaded_at,
"total_components": len(self._components),
"by_type": by_type_counts,
"activated": activated,
"deactivated": deactivated,
"total_capabilities": total_capabilities,
"unique_capabilities": unique_capabilities,
"source_hashes": self._source_hashes,
}
def refresh(self) -> None:
"""Reload metadata from all sources."""
self.load()
def export_to_json(self, output_path: Path) -> None:
"""Export registry to JSON file."""
data = {
"version": self._version,
"exported_at": datetime.now(timezone.utc).isoformat(),
"stats": self.get_stats(),
"components": {
comp_id: comp.to_dict()
for comp_id, comp in self._components.items()
},
"capability_taxonomy": self._capability_taxonomy,
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
f.write('\n')
=============================================================================
CLI Interface
=============================================================================
def main(): parser = argparse.ArgumentParser( description="H.1.7: Component Metadata Registry", formatter_class=argparse.RawDescriptionHelpFormatter )
parser.add_argument(
"--get", "-g",
metavar="ID",
help="Get component by ID (e.g., agent/orchestrator)"
)
parser.add_argument(
"--list", "-l",
metavar="TYPE",
choices=["agent", "agents", "command", "commands", "skill", "skills",
"script", "scripts", "hook", "hooks", "workflow", "workflows"],
help="List components by type"
)
parser.add_argument(
"--search", "-s",
metavar="QUERY",
help="Search components"
)
parser.add_argument(
"--capabilities", "-c",
metavar="ID",
help="Get capabilities for component"
)
parser.add_argument(
"--relationships", "-r",
metavar="ID",
help="Get relationships for component"
)
parser.add_argument(
"--stats",
action="store_true",
help="Show registry statistics"
)
parser.add_argument(
"--refresh",
action="store_true",
help="Reload from all sources"
)
parser.add_argument(
"--export",
metavar="PATH",
help="Export registry to JSON file"
)
parser.add_argument(
"--activated-only",
action="store_true",
help="Only show activated components"
)
parser.add_argument(
"--json", "-j",
action="store_true",
help="Output as JSON"
)
parser.add_argument(
"--limit",
type=int,
default=20,
help="Limit results (default: 20)"
)
args = parser.parse_args()
registry = ComponentMetadataRegistry()
if args.refresh:
print("Refreshing registry...")
registry.refresh()
print("Done.")
return
if args.export:
export_path = Path(args.export)
registry.export_to_json(export_path)
print(f"Exported to {export_path}")
return
if args.stats:
stats = registry.get_stats()
if args.json:
print(json.dumps(stats, indent=2))
else:
print()
print("=" * 60)
print("COMPONENT METADATA REGISTRY STATS")
print("=" * 60)
print()
print(f"Version: {stats['version']}")
print(f"Loaded: {stats['loaded_at']}")
print()
print(f"Total Components: {stats['total_components']}")
print(f" Activated: {stats['activated']}")
print(f" Deactivated: {stats['deactivated']}")
print()
print("By Type:")
for t, count in stats['by_type'].items():
print(f" {t}: {count}")
print()
print(f"Total Capabilities: {stats['total_capabilities']}")
print(f"Unique Capabilities: {stats['unique_capabilities']}")
return
if args.get:
comp = registry.get(args.get)
if comp:
if args.json:
print(json.dumps(comp.to_dict(), indent=2))
else:
print()
print(f"ID: {comp.id}")
print(f"Name: {comp.name}")
print(f"Type: {comp.type.value}")
print(f"Version: {comp.version}")
print(f"Status: {comp.status.value}")
print(f"Activation: {comp.activation.value}")
if comp.description:
desc = comp.description[:200] + "..." if len(comp.description) > 200 else comp.description
print(f"Description: {desc}")
if comp.llm_binding:
print(f"LLM: {comp.llm_binding.model} ({comp.llm_binding.provider})")
if comp.tools:
print(f"Tools: {', '.join(comp.tools[:5])}")
if comp.capabilities:
caps = [c.name for c in comp.capabilities[:5]]
print(f"Capabilities: {', '.join(caps)}")
if comp.path:
print(f"Path: {comp.path}")
else:
print(f"Component not found: {args.get}")
return
if args.list:
type_str = args.list.rstrip('s') # agents -> agent
try:
comp_type = ComponentType(type_str)
except ValueError:
print(f"Invalid type: {args.list}")
return
components = registry.list_by_type(comp_type, activated_only=args.activated_only)
components = components[:args.limit]
if args.json:
print(json.dumps([c.to_dict() for c in components], indent=2))
else:
print()
print(f"Components ({comp_type.value}): {len(components)}")
print("-" * 60)
for comp in components:
status_icon = "✓" if comp.activation == ActivationState.ACTIVATED else "○"
model = f" [{comp.llm_binding.model}]" if comp.llm_binding else ""
print(f" {status_icon} {comp.name}{model}")
return
if args.search:
results = registry.search(args.search)
results = results[:args.limit]
if args.json:
print(json.dumps([c.to_dict() for c in results], indent=2))
else:
print()
print(f"Search results for '{args.search}': {len(results)}")
print("-" * 60)
for comp in results:
print(f" [{comp.type.value}] {comp.name}")
if comp.description:
desc = comp.description[:80] + "..." if len(comp.description) > 80 else comp.description
print(f" {desc}")
return
if args.capabilities:
caps = registry.get_capabilities(args.capabilities)
if args.json:
print(json.dumps([
{"name": c.name, "type": c.type, "confidence": c.confidence, "source": c.source}
for c in caps
], indent=2))
else:
print()
print(f"Capabilities for {args.capabilities}: {len(caps)}")
print("-" * 60)
for cap in caps[:args.limit]:
print(f" [{cap.type}] {cap.name} (conf: {cap.confidence:.2f}, src: {cap.source})")
return
if args.relationships:
rels = registry.get_relationships(args.relationships)
if args.json:
print(json.dumps(rels, indent=2))
else:
print()
print(f"Relationships for {args.relationships}")
print("-" * 60)
for rel_type, targets in rels.items():
if targets:
print(f" {rel_type}:")
for t in targets[:10]:
print(f" - {t}")
return
# Default: show stats
parser.print_help()
if name == "main": main()