scripts-inject-frontmatter

#!/usr/bin/env python3 """

title: "Inject Frontmatter" component_type: script version: "1.0.0" audience: contributor status: stable summary: "CODITECT Frontmatter Injection Script for ADR-018 compliance" keywords: ['frontmatter', 'automation', 'adr-018', 'standardization'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "inject-frontmatter.py" language: python executable: true usage: "python3 scripts/inject-frontmatter.py [options]" python_version: "3.10+" dependencies: [] modifies_files: true network_access: false requires_auth: false

CODITECT Frontmatter Injection Script

Automatically adds ADR-018 compliant YAML frontmatter to components that are missing it.

Usage: python3 scripts/inject-frontmatter.py --type agent # Process agents only python3 scripts/inject-frontmatter.py --type all # Process all types python3 scripts/inject-frontmatter.py --dry-run # Preview changes python3 scripts/inject-frontmatter.py --file PATH # Process single file

Author: AZ1.AI INC Version: 1.0.0 ADR: ADR-018-AGENTIC-DOCUMENTATION-STANDARD """

import argparse import re import sys from datetime import date from pathlib import Path from typing import Dict, List, Optional, Tuple

class FrontmatterInjector: """Inject standardized frontmatter into components."""

COMPONENT_PATTERNS = {
    'agent': ['agents/*.md'],
    'command': ['commands/*.md'],
    'skill': ['skills/*/SKILL.md', 'skills/*/*.md'],
    'script': ['scripts/*.py'],
    'hook': ['hooks/*.py', 'hooks/*.md'],
    'prompt': ['prompts/*.md'],
    'workflow': ['docs/workflows/*.md'],
    'guide': ['docs/guides/*.md', 'docs/getting-started/*.md'],
    'reference': ['docs/reference/*.md'],
    # ADR-213: ADRs now in coditect-documentation; keep local path as fallback
    'adr': ['../../../docs/coditect-documentation/coditect-core/adrs/*.md',
            'internal/architecture/adrs/*.md']
}

# MoE role inference from keywords
MOE_KEYWORDS = {
    'orchestrator': ['orchestrat', 'coordinat', 'multi-agent', 'workflow'],
    'analyst': ['analy', 'research', 'investigat', 'assess', 'audit'],
    'judge': ['judge', 'review', 'evaluat', 'verdict', 'council'],
    'specialist': []  # default
}

# Domain inference from keywords
DOMAIN_KEYWORDS = {
    'security': ['security', 'vulnerab', 'penetrat', 'audit', 'compliance'],
    'development': ['develop', 'code', 'implement', 'build', 'engineer'],
    'qa': ['test', 'qa', 'quality', 'validation'],
    'devops': ['devops', 'deploy', 'ci/cd', 'pipeline', 'docker', 'k8s'],
    'documentation': ['document', 'doc', 'guide', 'tutorial', 'reference'],
    'research': ['research', 'analy', 'investigat', 'study']
}

def __init__(self, project_root: Path, dry_run: bool = False):
    self.project_root = project_root
    self.dry_run = dry_run
    self.today = date.today().isoformat()

def has_frontmatter(self, content: str) -> bool:
    """Check if content already has YAML frontmatter."""
    return content.strip().startswith('---')

def is_adr018_compliant(self, content: str) -> bool:
    """Check if frontmatter is ADR-018 compliant."""
    if not self.has_frontmatter(content):
        return False
    # ADR-018 requires component_type field
    return 'component_type:' in content[:2000]

def parse_old_frontmatter(self, content: str) -> Tuple[Dict, str]:
    """Parse old-format frontmatter and return (data, body)."""
    if not content.startswith('---'):
        return ({}, content)

    parts = content.split('---', 2)
    if len(parts) < 3:
        return ({}, content)

    frontmatter_text = parts[1]
    body = parts[2]

    # Simple YAML-like parsing
    data = {}
    current_key = None
    current_list = None

    for line in frontmatter_text.split('\n'):
        line = line.rstrip()
        if not line or line.startswith('#'):
            continue

        # Check for list item
        if line.startswith('  - ') and current_key:
            if current_list is None:
                current_list = []
                data[current_key] = current_list
            current_list.append(line.strip()[2:].strip())
            continue

        # Check for nested key (indented)
        if line.startswith('  ') and ':' in line and not line.strip().startswith('-'):
            continue  # Skip nested structures for now

        # Check for key: value
        if ':' in line and not line.startswith(' '):
            current_list = None
            key, _, value = line.partition(':')
            key = key.strip()
            value = value.strip().strip('"').strip("'")
            current_key = key
            if value:
                data[key] = value

    return (data, body)

def extract_title(self, content: str, filename: str) -> str:
    """Extract title from first heading or generate from filename."""
    # Try to find first markdown heading
    match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
    if match:
        return match.group(1).strip()

    # Generate from filename
    name = Path(filename).stem
    # Convert kebab-case or snake_case to Title Case
    name = name.replace('-', ' ').replace('_', ' ')
    return name.title()

def extract_summary(self, content: str) -> str:
    """Extract summary from first paragraph after heading."""
    # Skip frontmatter if present
    if content.startswith('---'):
        parts = content.split('---', 2)
        if len(parts) >= 3:
            content = parts[2]

    # Find first non-heading paragraph
    lines = content.strip().split('\n')
    paragraph_lines = []
    in_paragraph = False

    for line in lines:
        line = line.strip()
        if not line:
            if in_paragraph:
                break
            continue
        if line.startswith('#'):
            continue
        if line.startswith('```'):
            break
        if line.startswith('|') or line.startswith('-'):
            continue

        in_paragraph = True
        paragraph_lines.append(line)

    if paragraph_lines:
        summary = ' '.join(paragraph_lines)
        # Truncate to ~100 chars
        if len(summary) > 100:
            summary = summary[:97] + '...'
        return summary

    return "Component description pending"

def extract_keywords(self, content: str, filename: str) -> List[str]:
    """Extract keywords from content and filename."""
    keywords = set()

    # From filename
    name = Path(filename).stem.lower()
    parts = re.split(r'[-_]', name)
    keywords.update(p for p in parts if len(p) > 2)

    # Common technical terms from content
    content_lower = content.lower()
    tech_terms = [
        'api', 'database', 'frontend', 'backend', 'security', 'testing',
        'deployment', 'docker', 'kubernetes', 'git', 'ci/cd', 'automation',
        'validation', 'analysis', 'review', 'generation', 'optimization'
    ]
    for term in tech_terms:
        if term in content_lower:
            keywords.add(term)

    return sorted(list(keywords))[:5]

def infer_moe_role(self, content: str, filename: str) -> str:
    """Infer MoE role from content and filename."""
    text = (content + ' ' + filename).lower()

    for role, keywords in self.MOE_KEYWORDS.items():
        for kw in keywords:
            if kw in text:
                return role

    return 'specialist'

def infer_domains(self, content: str, filename: str) -> List[str]:
    """Infer domains from content and filename."""
    text = (content + ' ' + filename).lower()
    domains = []

    for domain, keywords in self.DOMAIN_KEYWORDS.items():
        for kw in keywords:
            if kw in text:
                domains.append(domain)
                break

    return domains if domains else ['development']

def infer_agent_type(self, content: str, filename: str) -> str:
    """Infer agent type from content."""
    text = (content + ' ' + filename).lower()

    if any(x in text for x in ['orchestrat', 'coordinat', 'multi-agent']):
        return 'orchestrator'
    if any(x in text for x in ['review', 'evaluat', 'audit']):
        return 'reviewer'
    if any(x in text for x in ['generat', 'creat', 'build']):
        return 'generator'
    if any(x in text for x in ['analy', 'research', 'investigat']):
        return 'analyst'
    if any(x in text for x in ['judge', 'verdict', 'score']):
        return 'judge'

    return 'specialist'

def generate_agent_frontmatter(self, content: str, filepath: Path) -> str:
    """Generate frontmatter for an agent."""
    # Use old frontmatter data if available
    old = getattr(self, '_old_frontmatter', {})

    # Extract title - prefer old 'name' field, then heading, then filename
    if 'name' in old:
        title = old['name'].replace('-', ' ').title()
    else:
        title = self.extract_title(content, filepath.name)

    # Use old description if available
    if 'description' in old:
        summary = old['description'][:100]
        if len(old['description']) > 100:
            summary = summary[:97] + '...'
    else:
        summary = self.extract_summary(content)

    keywords = self.extract_keywords(content, filepath.name)
    moe_role = self.infer_moe_role(content, filepath.name)
    domains = self.infer_domains(content, filepath.name)
    agent_type = self.infer_agent_type(content, filepath.name)

    # Use old model info
    model = old.get('model', 'sonnet')

    # Use old tools if available
    tools = old.get('tools', 'Read, Write, Edit, Bash, Grep, Glob, TodoWrite')

    # Generate invocation pattern
    agent_name = old.get('name', filepath.stem)
    invocation = f"Task(subagent_type='{agent_name}', prompt='...')"

    frontmatter = f"""---

title: "{title}" component_type: agent version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~2000 created: {self.today} updated: {self.today} agent_type: {agent_type} domain: {domains} moe_role: {moe_role} moe_capabilities:

specialized_analysis
task_execution invocation_pattern: "{invocation}" requires_context: true model: {model} tools: "{tools}" quality_score: 75 last_reviewed: {self.today}

""" return frontmatter

def generate_command_frontmatter(self, content: str, filepath: Path) -> str:
    """Generate frontmatter for a command."""
    title = self.extract_title(content, filepath.name)
    summary = self.extract_summary(content)
    keywords = self.extract_keywords(content, filepath.name)

    command_name = '/' + filepath.stem

    frontmatter = f"""---

title: "{title}" component_type: command version: "1.0.0" audience: customer status: stable summary: "{summary}" keywords: {keywords} tokens: ~1500 created: {self.today} updated: {self.today} command_name: "{command_name}" aliases: [] usage: "{command_name} [options]" requires_confirmation: false modifies_files: false network_access: false

""" return frontmatter

def generate_skill_frontmatter(self, content: str, filepath: Path) -> str:
    """Generate frontmatter for a skill."""
    title = self.extract_title(content, filepath.name)
    summary = self.extract_summary(content)
    keywords = self.extract_keywords(content, filepath.name)

    skill_name = filepath.parent.name if filepath.name == 'SKILL.md' else filepath.stem

    frontmatter = f"""---

title: "{title}" component_type: skill version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~3000 created: {self.today} updated: {self.today} skill_name: "{skill_name}" skill_category: pattern when_to_use: "When implementing {skill_name.replace('-', ' ')} patterns" when_not_to_use: "When simpler approaches suffice" composes_with: [] requires: []

""" return frontmatter

def generate_script_frontmatter(self, content: str, filepath: Path) -> str:
    """Generate frontmatter for a Python script (as docstring)."""
    title = self.extract_title(content, filepath.name)
    summary = self.extract_summary(content)
    keywords = self.extract_keywords(content, filepath.name)

    # For Python, we add to docstring, not YAML block
    frontmatter = f'''"""

title: "{title}" component_type: script version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~500 created: {self.today} updated: {self.today} script_name: "{filepath.name}" language: python executable: true usage: "python3 scripts/{filepath.name} [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

''' return frontmatter

def generate_guide_frontmatter(self, content: str, filepath: Path) -> str:
    """Generate frontmatter for a guide."""
    title = self.extract_title(content, filepath.name)
    summary = self.extract_summary(content)
    keywords = self.extract_keywords(content, filepath.name)

    frontmatter = f"""---

title: "{title}" component_type: guide version: "1.0.0" audience: customer status: stable summary: "{summary}" keywords: {keywords} tokens: ~3000 created: {self.today} updated: {self.today} doc_type: guide when_to_read: "When learning about {filepath.stem.replace('-', ' ')}" prerequisites: [] next_steps: [] reading_time: "10 minutes"

""" return frontmatter

def generate_adr_frontmatter(self, content: str, filepath: Path) -> str:
    """Generate frontmatter for an ADR."""
    title = self.extract_title(content, filepath.name)
    summary = self.extract_summary(content)
    keywords = self.extract_keywords(content, filepath.name)

    # Extract ADR number if present
    match = re.match(r'ADR-(\d+)', filepath.stem)
    adr_number = match.group(1) if match else '000'

    frontmatter = f"""---

""" return frontmatter

def generate_generic_frontmatter(self, content: str, filepath: Path, component_type: str) -> str:
    """Generate generic frontmatter for other types."""
    title = self.extract_title(content, filepath.name)
    summary = self.extract_summary(content)
    keywords = self.extract_keywords(content, filepath.name)

    frontmatter = f"""---

title: "{title}" component_type: {component_type} version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~1500 created: {self.today} updated: {self.today}

""" return frontmatter

def inject_frontmatter(self, filepath: Path, component_type: str) -> Tuple[bool, str]:
    """Inject frontmatter into a file."""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        # Skip README files
        if filepath.name.lower() == 'readme.md':
            return (False, "Skipped README")

        # Check if already ADR-018 compliant
        if self.is_adr018_compliant(content):
            return (False, "Already ADR-018 compliant")

        # Parse old frontmatter if present
        old_data = {}
        body = content
        if self.has_frontmatter(content):
            old_data, body = self.parse_old_frontmatter(content)

        # Use old data to enrich new frontmatter generation
        # Store old data for generator methods to use
        self._old_frontmatter = old_data

        # Generate appropriate frontmatter
        if component_type == 'agent':
            frontmatter = self.generate_agent_frontmatter(content, filepath)
        elif component_type == 'command':
            frontmatter = self.generate_command_frontmatter(content, filepath)
        elif component_type == 'skill':
            frontmatter = self.generate_skill_frontmatter(content, filepath)
        elif component_type == 'script':
            frontmatter = self.generate_script_frontmatter(content, filepath)
        elif component_type == 'guide':
            frontmatter = self.generate_guide_frontmatter(content, filepath)
        elif component_type == 'adr':
            frontmatter = self.generate_adr_frontmatter(content, filepath)
        else:
            frontmatter = self.generate_generic_frontmatter(content, filepath, component_type)

        # For Python scripts, handle differently
        if component_type == 'script' and filepath.suffix == '.py':
            # Check if already has docstring with frontmatter
            if '---' in content[:500] and 'component_type:' in content[:1000]:
                return (False, "Already has frontmatter in docstring")

            # Check for shebang
            if content.startswith('#!'):
                lines = content.split('\n', 1)
                shebang = lines[0] + '\n'
                rest = lines[1] if len(lines) > 1 else ''

                # Check for existing docstring
                if rest.strip().startswith('"""'):
                    # Insert into existing docstring
                    new_content = shebang + frontmatter + rest.lstrip().lstrip('"""')
                else:
                    new_content = shebang + frontmatter + '"""\n' + rest
            else:
                new_content = frontmatter + '"""\n' + content
        else:
            # Use body (stripped of old frontmatter) for markdown files
            new_content = frontmatter + body.lstrip()

        if not self.dry_run:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(new_content)

        return (True, "Frontmatter injected")

    except Exception as e:
        return (False, f"Error: {str(e)}")

def discover_files(self, component_type: str) -> List[Path]:
    """Discover files for a component type."""
    if component_type == 'all':
        files = []
        for ctype, patterns in self.COMPONENT_PATTERNS.items():
            for pattern in patterns:
                files.extend(self.project_root.glob(pattern))
        return files

    patterns = self.COMPONENT_PATTERNS.get(component_type, [])
    files = []
    for pattern in patterns:
        files.extend(self.project_root.glob(pattern))
    return files

def process_type(self, component_type: str) -> Dict[str, int]:
    """Process all files of a component type."""
    files = self.discover_files(component_type)
    results = {"processed": 0, "skipped": 0, "errors": 0}

    for filepath in sorted(files):
        if filepath.name.startswith('.'):
            continue

        # Determine actual component type for 'all'
        actual_type = component_type
        if component_type == 'all':
            for ctype, patterns in self.COMPONENT_PATTERNS.items():
                for pattern in patterns:
                    if filepath.match(pattern):
                        actual_type = ctype
                        break
                if actual_type != 'all':
                    break

        success, message = self.inject_frontmatter(filepath, actual_type)

        if success:
            results["processed"] += 1
            print(f"  ✓ {filepath.relative_to(self.project_root)}")
        elif "Error" in message:
            results["errors"] += 1
            print(f"  ✗ {filepath.relative_to(self.project_root)}: {message}")
        else:
            results["skipped"] += 1

    return results

def main(): parser = argparse.ArgumentParser( description="Inject ADR-018 compliant frontmatter into components", epilog="Part of CODITECT standardization pipeline" ) parser.add_argument("--type", choices=['agent', 'command', 'skill', 'script', 'hook', 'prompt', 'workflow', 'guide', 'reference', 'adr', 'all'], default='all', help="Component type to process") parser.add_argument("--file", type=Path, help="Process single file") parser.add_argument("--dry-run", action="store_true", help="Preview without changes") args = parser.parse_args()

# Find project root
script_path = Path(__file__).resolve()
project_root = script_path.parent.parent

injector = FrontmatterInjector(project_root, args.dry_run)

if args.dry_run:
    print("🔍 DRY RUN - No files will be modified\n")

if args.file:
    # Process single file
    success, message = injector.inject_frontmatter(args.file, args.type)
    print(f"{'✓' if success else '✗'} {args.file}: {message}")
else:
    # Process by type
    print(f"🔧 Processing {args.type} components...\n")
    results = injector.process_type(args.type)

    print(f"\n📊 Results:")
    print(f"   Processed: {results['processed']}")
    print(f"   Skipped:   {results['skipped']}")
    print(f"   Errors:    {results['errors']}")

if name == "main": main()

#!/usr/bin/env python3 """​

title: "{title}" component_type: {component_type} version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~1500 created: {self.today} updated: {self.today}​

#!/usr/bin/env python3 """

title: "{title}" component_type: {component_type} version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~1500 created: {self.today} updated: {self.today}