Skip to main content

#!/usr/bin/env python3 """ generate_catalog.py — Platform Catalog Generator (J.29.17)

Scans skills//SKILL.md, commands/.md, agents/*.md for YAML frontmatter. Outputs:

  1. catalog-meta.json — lightweight metadata for build-time import
  2. public/catalog/{skills,commands,agents}/*.md — full content for lazy loading """

import json import os import re import shutil import sys from pathlib import Path

─── YAML frontmatter parser (no PyYAML dependency) ─────────────────────────

def parse_frontmatter(text: str) -> tuple[dict, str]: """Extract YAML frontmatter and body from markdown text.""" if not text.startswith("---"): return {}, text end = text.find("\n---", 3) if end == -1: return {}, text yaml_block = text[4:end] body = text[end + 4:].lstrip("\n") meta = {} current_key = None current_list = None for line in yaml_block.split("\n"): # List item if line.startswith("- ") and current_key: if current_list is None: current_list = [] meta[current_key] = current_list val = line[2:].strip().strip("'"") current_list.append(val) continue # Continuation line (indented) if line.startswith(" ") and not line.startswith("- ") and current_key: prev = meta.get(current_key, "") if isinstance(prev, str): meta[current_key] = (prev + " " + line.strip()).strip() continue # Key: value m = re.match(r"^([a-zA-Z_][a-zA-Z0-9_-])\s:\s*(.*)", line) if m: current_key = m.group(1) current_list = None val = m.group(2).strip().strip("'"") # Inline list [a, b, c] if val.startswith("[") and val.endswith("]"): items = [x.strip().strip("'"") for x in val[1:-1].split(",") if x.strip()] meta[current_key] = items elif val.lower() == "true": meta[current_key] = True elif val.lower() == "false": meta[current_key] = False elif val == "": meta[current_key] = "" else: # Try numeric try: meta[current_key] = int(val) except ValueError: try: meta[current_key] = float(val) except ValueError: meta[current_key] = val return meta, body

def safe_str(val, default=""): if val is None or val == "": return default if isinstance(val, list): return ", ".join(str(v) for v in val) return str(val)

def safe_list(val): if isinstance(val, list): return val if isinstance(val, str) and val: return [val] return []

def safe_num(val, default=0): if isinstance(val, (int, float)): return val if isinstance(val, str): try: return float(val.replace("~", "").strip()) except ValueError: return default return default

─── Scanners ────────────────────────────────────────────────────────────────

def scan_skills(base_dir: Path) -> list[dict]: skills_dir = base_dir / "skills" entries = [] if not skills_dir.exists(): return entries for skill_dir in sorted(skills_dir.iterdir()): skill_file = skill_dir / "SKILL.md" if not skill_dir.is_dir() or not skill_file.exists(): continue text = skill_file.read_text(errors="replace") meta, _body = parse_frontmatter(text) slug = skill_dir.name entries.append({ "slug": slug, "componentType": "skill", "title": safe_str(meta.get("title", meta.get("name", slug))), "name": safe_str(meta.get("name", slug)), "description": safe_str(meta.get("description", meta.get("summary", ""))), "summary": safe_str(meta.get("summary", meta.get("description", ""))), "track": safe_str(meta.get("track", "")), "version": safe_str(meta.get("version", "1.0.0")), "status": safe_str(meta.get("status", "active")), "audience": safe_str(meta.get("audience", "")), "tags": safe_list(meta.get("tags", [])), "qualityScore": safe_num(meta.get("quality_score"), 0), "tokens": safe_str(meta.get("tokens", "")), "created": safe_str(meta.get("created", "")), "updated": safe_str(meta.get("updated", "")), "category": safe_str(meta.get("skill_category", "")), "whenToUse": safe_str(meta.get("when_to_use", "")), "sourcePath": f"skills/{slug}/SKILL.md", }) return entries

def scan_commands(base_dir: Path) -> list[dict]: commands_dir = base_dir / "commands" entries = [] if not commands_dir.exists(): return entries for cmd_file in sorted(commands_dir.glob("*.md")): text = cmd_file.read_text(errors="replace") meta, _body = parse_frontmatter(text) slug = cmd_file.stem entries.append({ "slug": slug, "componentType": "command", "title": safe_str(meta.get("title", slug)), "name": safe_str(meta.get("command_name", f"/{slug}")), "description": safe_str(meta.get("summary", meta.get("description", ""))), "summary": safe_str(meta.get("summary", "")), "track": safe_str(meta.get("track", "")), "version": safe_str(meta.get("version", "1.0.0")), "status": safe_str(meta.get("status", "active")), "audience": safe_str(meta.get("audience", "")), "tags": safe_list(meta.get("tags", [])), "qualityScore": safe_num(meta.get("quality_score"), 0), "tokens": safe_str(meta.get("tokens", "")), "created": safe_str(meta.get("created", "")), "updated": safe_str(meta.get("updated", "")), "invocation": safe_str(meta.get("invocation", f"/{slug}")), "aliases": safe_list(meta.get("aliases", [])), "sourcePath": f"commands/{slug}.md", }) return entries

def scan_agents(base_dir: Path) -> list[dict]: agents_dir = base_dir / "agents" entries = [] if not agents_dir.exists(): return entries for agent_file in sorted(agents_dir.glob("*.md")): text = agent_file.read_text(errors="replace") meta, _body = parse_frontmatter(text) slug = agent_file.stem entries.append({ "slug": slug, "componentType": "agent", "title": safe_str(meta.get("title", slug)), "name": safe_str(meta.get("name", slug)), "description": safe_str(meta.get("description", meta.get("summary", ""))), "summary": safe_str(meta.get("summary", "")), "track": safe_str(meta.get("track", "")), "version": safe_str(meta.get("version", "1.0.0")), "status": safe_str(meta.get("status", "active")), "audience": safe_str(meta.get("audience", "")), "tags": safe_list(meta.get("tags", [])), "qualityScore": safe_num(meta.get("quality_score"), 0), "tokens": safe_str(meta.get("tokens", "")), "created": safe_str(meta.get("created", "")), "updated": safe_str(meta.get("updated", "")), "agentType": safe_str(meta.get("agent_type", "")), "domain": safe_list(meta.get("domain", [])), "model": safe_str(meta.get("model", "")), "tools": safe_str(meta.get("tools", "")), "invocationPattern": safe_str(meta.get("invocation_pattern", "")), "sourcePath": f"agents/{slug}.md", }) return entries

─── Copy .md files to public/catalog/ for lazy loading ─────────────────────

def copy_content_files(base_dir: Path, dashboard_dir: Path, entries: list[dict], component_type: str): """Copy .md source files to public/catalog/{type}/{slug}.md""" dest_dir = dashboard_dir / "public" / "catalog" / component_type dest_dir.mkdir(parents=True, exist_ok=True) copied = 0 for entry in entries: src = base_dir / entry["sourcePath"] if src.exists(): dst = dest_dir / f"{entry['slug']}.md" # Strip YAML frontmatter before copying (content only) text = src.read_text(errors="replace") _meta, body = parse_frontmatter(text) dst.write_text(body) copied += 1 return copied

─── Main ────────────────────────────────────────────────────────────────────

def main(): import argparse parser = argparse.ArgumentParser(description="Generate platform catalog metadata") parser.add_argument("--base-dir", help="CODITECT core root directory") parser.add_argument("--output", help="Output path for catalog-meta.json") parser.add_argument("--dashboard-dir", help="Dashboard directory (for public/catalog/ content)") parser.add_argument("--no-copy", action="store_true", help="Skip copying .md content files") parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output") args = parser.parse_args()

# Auto-detect paths
script_dir = Path(__file__).resolve().parent
base_dir = Path(args.base_dir) if args.base_dir else script_dir.parent.parent
dashboard_dir = Path(args.dashboard_dir) if args.dashboard_dir else base_dir / "tools" / "trajectory-dashboard"
output_path = Path(args.output) if args.output else dashboard_dir / "src" / "generated" / "catalog-meta.json"

print(f"Scanning components in {base_dir}")
skills = scan_skills(base_dir)
commands = scan_commands(base_dir)
agents = scan_agents(base_dir)

catalog = {
"generatedAt": __import__("datetime").datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
"counts": {
"skills": len(skills),
"commands": len(commands),
"agents": len(agents),
"total": len(skills) + len(commands) + len(agents),
},
"skills": skills,
"commands": commands,
"agents": agents,
}

# Write metadata JSON
output_path.parent.mkdir(parents=True, exist_ok=True)
indent = 2 if args.pretty else None
output_path.write_text(json.dumps(catalog, indent=indent, ensure_ascii=False))
print(f"Catalog metadata: {output_path}")
print(f" Skills: {len(skills)}, Commands: {len(commands)}, Agents: {len(agents)}")
print(f" Total: {catalog['counts']['total']}")

# Copy content files for lazy loading
if not args.no_copy:
s = copy_content_files(base_dir, dashboard_dir, skills, "skills")
c = copy_content_files(base_dir, dashboard_dir, commands, "commands")
a = copy_content_files(base_dir, dashboard_dir, agents, "agents")
print(f"Content files copied to public/catalog/: skills={s}, commands={c}, agents={a}")
else:
print("Skipped content file copy (--no-copy)")

return 0

if name == "main": sys.exit(main())