scripts-validate-component-frontmatter

#!/usr/bin/env python3 """

title: "Validate Component Frontmatter" component_type: script version: "1.0.0" audience: contributor status: stable summary: "CODITECT Component Frontmatter Validator" keywords: ['analysis', 'component', 'frontmatter', 'git', 'review'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "validate-component-frontmatter.py" language: python executable: true usage: "python3 scripts/validate-component-frontmatter.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

CODITECT Component Frontmatter Validator

Validates all components against the Agentic Documentation Standard v1.1.0 as defined in ADR-018.

Usage: python3 scripts/validate-component-frontmatter.py # Full audit python3 scripts/validate-component-frontmatter.py --strict # Fail on L0/L1 python3 scripts/validate-component-frontmatter.py --fix # Auto-fix simple issues python3 scripts/validate-component-frontmatter.py --type agent # Validate specific type python3 scripts/validate-component-frontmatter.py --json # JSON output

Author: AZ1.AI INC Version: 1.0.0 ADR: ADR-018-AGENTIC-DOCUMENTATION-STANDARD """

import argparse import json import re import sys from dataclasses import dataclass, field from datetime import datetime from enum import Enum from pathlib import Path from typing import Any, Dict, List, Optional, Set

class ConformanceLevel(Enum): """Conformance levels per ADR-018.""" L0_NONE = 0 # No frontmatter - FAIL L1_UNIVERSAL = 1 # Universal fields only - WARN L2_TYPED = 2 # Type-specific fields - PASS L3_FULL = 3 # Full schema + MoE - EXEMPLARY

@dataclass class ValidationResult: """Result of validating a single component.""" path: str component_type: str level: ConformanceLevel missing_fields: List[str] = field(default_factory=list) warnings: List[str] = field(default_factory=list) errors: List[str] = field(default_factory=list) frontmatter: Dict[str, Any] = field(default_factory=dict)

def to_dict(self) -> dict:
    return {
        "path": self.path,
        "component_type": self.component_type,
        "level": self.level.name,
        "level_value": self.level.value,
        "missing_fields": self.missing_fields,
        "warnings": self.warnings,
        "errors": self.errors
    }

class FrontmatterValidator: """Validate component frontmatter against ADR-018 schema."""

# Universal required fields (all components)
UNIVERSAL_REQUIRED = {
    'title', 'component_type', 'version', 'audience', 'status',
    'summary', 'keywords'
}

UNIVERSAL_RECOMMENDED = {
    'tokens', 'created', 'updated'
}

# Type-specific required fields
TYPE_REQUIRED = {
    'agent': {'agent_type', 'domain', 'moe_role', 'invocation_pattern'},
    'command': {'command_name', 'usage'},
    'skill': {'skill_name', 'skill_category', 'when_to_use'},
    'script': {'script_name', 'language', 'usage'},
    'guide': {'doc_type', 'when_to_read'},
    'reference': {'doc_type'},
    'workflow': {'workflow_name'},
    'hook': {'hook_name', 'trigger'},
    'prompt': {'prompt_name'}
}

# MoE role required fields
MOE_REQUIRED = {
    'analyst': {'analysis_domains', 'output_format'},
    'judge': {'judging_criteria', 'scoring_scale'},
    'orchestrator': {'orchestration_patterns', 'max_concurrent_agents'}
}

# Valid enum values
VALID_COMPONENT_TYPES = {
    'agent', 'command', 'skill', 'script', 'hook', 'prompt',
    'workflow', 'guide', 'reference', 'spec', 'adr', 'tutorial'
}

VALID_AUDIENCES = {'customer', 'contributor', 'both'}
VALID_STATUSES = {'draft', 'experimental', 'stable', 'production', 'deprecated'}
VALID_AGENT_TYPES = {'specialist', 'analyst', 'judge', 'orchestrator', 'reviewer', 'generator'}
VALID_MOE_ROLES = {'analyst', 'judge', 'orchestrator', 'specialist', 'none'}

def __init__(self, project_root: Path):
    self.project_root = project_root

def extract_frontmatter(self, file_path: Path) -> Optional[Dict[str, Any]]:
    """Extract YAML frontmatter from a markdown file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Check for frontmatter delimiters
        if not content.startswith('---'):
            return None

        # Find closing delimiter
        lines = content.split('\n')
        end_idx = None
        for i, line in enumerate(lines[1:], 1):
            if line.strip() == '---':
                end_idx = i
                break

        if end_idx is None:
            return None

        # Parse YAML
        frontmatter_text = '\n'.join(lines[1:end_idx])

        # Simple YAML parser (handles common cases)
        frontmatter = {}
        current_key = None
        current_list = None

        for line in frontmatter_text.split('\n'):
            line = line.rstrip()

            # Skip empty lines and comments
            if not line or line.strip().startswith('#'):
                continue

            # Check for list item
            if line.startswith('  - ') and current_key:
                if current_list is None:
                    current_list = []
                    frontmatter[current_key] = current_list
                current_list.append(line.strip()[2:].strip())
                continue

            # Check for key: value
            if ':' in line and not line.startswith(' '):
                current_list = None
                key, _, value = line.partition(':')
                key = key.strip()
                value = value.strip()
                current_key = key

                # Handle inline list [a, b, c]
                if value.startswith('[') and value.endswith(']'):
                    items = value[1:-1].split(',')
                    frontmatter[key] = [item.strip().strip('"\'') for item in items if item.strip()]
                # Handle quoted string
                elif value.startswith('"') and value.endswith('"'):
                    frontmatter[key] = value[1:-1]
                elif value.startswith("'") and value.endswith("'"):
                    frontmatter[key] = value[1:-1]
                # Handle boolean
                elif value.lower() in ('true', 'false'):
                    frontmatter[key] = value.lower() == 'true'
                # Handle number
                elif value.startswith('~'):
                    frontmatter[key] = value  # Token estimate
                elif value.isdigit():
                    frontmatter[key] = int(value)
                # Handle empty (list follows)
                elif not value:
                    frontmatter[key] = None
                else:
                    frontmatter[key] = value

        return frontmatter

    except Exception as e:
        return None

def infer_component_type(self, file_path: Path) -> str:
    """Infer component type from file path."""
    path_str = str(file_path)

    if '/agents/' in path_str:
        return 'agent'
    elif '/commands/' in path_str:
        return 'command'
    elif '/skills/' in path_str:
        return 'skill'
    elif '/scripts/' in path_str:
        return 'script'
    elif '/hooks/' in path_str:
        return 'hook'
    elif '/prompts/' in path_str:
        return 'prompt'
    elif '/workflows/' in path_str:
        return 'workflow'
    elif '/guides/' in path_str:
        return 'guide'
    elif '/reference/' in path_str:
        return 'reference'
    elif '/adrs/' in path_str:
        return 'adr'
    else:
        return 'unknown'

def validate_component(self, file_path: Path) -> ValidationResult:
    """Validate a single component's frontmatter."""
    inferred_type = self.infer_component_type(file_path)
    result = ValidationResult(
        path=str(file_path.relative_to(self.project_root)),
        component_type=inferred_type,
        level=ConformanceLevel.L0_NONE
    )

    # Extract frontmatter
    frontmatter = self.extract_frontmatter(file_path)

    if frontmatter is None:
        result.errors.append("No YAML frontmatter found")
        return result

    result.frontmatter = frontmatter
    result.level = ConformanceLevel.L1_UNIVERSAL

    # Check universal required fields
    for field in self.UNIVERSAL_REQUIRED:
        if field not in frontmatter or frontmatter[field] is None:
            result.missing_fields.append(f"universal.{field}")

    # Check universal recommended fields
    for field in self.UNIVERSAL_RECOMMENDED:
        if field not in frontmatter:
            result.warnings.append(f"Missing recommended field: {field}")

    # Validate enum values
    if 'audience' in frontmatter:
        if frontmatter['audience'] not in self.VALID_AUDIENCES:
            result.errors.append(f"Invalid audience: {frontmatter['audience']}")

    if 'status' in frontmatter:
        if frontmatter['status'] not in self.VALID_STATUSES:
            result.errors.append(f"Invalid status: {frontmatter['status']}")

    if 'component_type' in frontmatter:
        if frontmatter['component_type'] not in self.VALID_COMPONENT_TYPES:
            result.warnings.append(f"Non-standard component_type: {frontmatter['component_type']}")

    # Check type-specific fields
    actual_type = frontmatter.get('component_type', inferred_type)
    if actual_type in self.TYPE_REQUIRED:
        type_fields = self.TYPE_REQUIRED[actual_type]
        has_all_type_fields = True
        for field in type_fields:
            if field not in frontmatter:
                result.missing_fields.append(f"{actual_type}.{field}")
                has_all_type_fields = False

        if has_all_type_fields and len(result.missing_fields) == 0:
            result.level = ConformanceLevel.L2_TYPED

    # Check MoE fields for agents
    if actual_type == 'agent' and 'moe_role' in frontmatter:
        moe_role = frontmatter['moe_role']
        if moe_role in self.MOE_REQUIRED:
            moe_fields = self.MOE_REQUIRED[moe_role]
            has_all_moe_fields = True
            for field in moe_fields:
                if field not in frontmatter:
                    result.warnings.append(f"Missing MoE field for {moe_role}: {field}")
                    has_all_moe_fields = False

            if has_all_moe_fields and result.level == ConformanceLevel.L2_TYPED:
                result.level = ConformanceLevel.L3_FULL

    # Validate agent-specific enums
    if actual_type == 'agent':
        if 'agent_type' in frontmatter:
            if frontmatter['agent_type'] not in self.VALID_AGENT_TYPES:
                result.warnings.append(f"Non-standard agent_type: {frontmatter['agent_type']}")

        if 'moe_role' in frontmatter:
            if frontmatter['moe_role'] not in self.VALID_MOE_ROLES:
                result.errors.append(f"Invalid moe_role: {frontmatter['moe_role']}")

    # Downgrade level if there are errors
    if result.errors:
        result.level = ConformanceLevel.L0_NONE
    elif result.missing_fields:
        if any(f.startswith('universal.') for f in result.missing_fields):
            result.level = ConformanceLevel.L0_NONE
        elif result.level == ConformanceLevel.L2_TYPED:
            result.level = ConformanceLevel.L1_UNIVERSAL

    return result

def discover_components(self, component_type: Optional[str] = None) -> List[Path]:
    """Discover all component files to validate."""
    patterns = {
        'agent': ['agents/*.md'],
        'command': ['commands/*.md'],
        'skill': ['skills/*/SKILL.md', 'skills/*/*.md'],
        'script': ['scripts/*.py'],  # Python scripts
        'hook': ['hooks/*.py', 'hooks/*.md'],
        'prompt': ['prompts/*.md'],
        'workflow': ['docs/workflows/*.md'],
        'guide': ['docs/guides/*.md', 'docs/getting-started/*.md'],
        'reference': ['docs/reference/*.md']
    }

    files = []

    if component_type and component_type in patterns:
        for pattern in patterns[component_type]:
            files.extend(self.project_root.glob(pattern))
    else:
        for type_patterns in patterns.values():
            for pattern in type_patterns:
                files.extend(self.project_root.glob(pattern))

    return sorted(set(files))

def audit_all(self, component_type: Optional[str] = None) -> Dict[str, Any]:
    """Audit all components and return summary."""
    files = self.discover_components(component_type)
    results = []

    level_counts = {level: 0 for level in ConformanceLevel}
    type_counts = {}

    for file_path in files:
        if file_path.name.startswith('.'):
            continue

        result = self.validate_component(file_path)
        results.append(result)

        level_counts[result.level] += 1

        if result.component_type not in type_counts:
            type_counts[result.component_type] = {level: 0 for level in ConformanceLevel}
        type_counts[result.component_type][result.level] += 1

    total = len(results)
    conforming = level_counts[ConformanceLevel.L2_TYPED] + level_counts[ConformanceLevel.L3_FULL]

    return {
        "summary": {
            "total_components": total,
            "conforming": conforming,
            "conformance_rate": round(conforming / total * 100, 1) if total > 0 else 0,
            "by_level": {level.name: count for level, count in level_counts.items()},
            "by_type": {
                t: {level.name: count for level, count in counts.items()}
                for t, counts in type_counts.items()
            }
        },
        "results": [r.to_dict() for r in results],
        "non_conforming": [
            r.to_dict() for r in results
            if r.level.value < ConformanceLevel.L2_TYPED.value
        ]
    }

def print_report(audit: Dict[str, Any], verbose: bool = False): """Print human-readable audit report.""" summary = audit["summary"]

print("\n" + "=" * 70)
print("CODITECT Component Frontmatter Audit Report")
print("ADR-018: Agentic Documentation Standard v1.1.0")
print("=" * 70)

print(f"\n📊 Summary")
print(f"   Total Components: {summary['total_components']}")
print(f"   Conforming (L2+): {summary['conforming']} ({summary['conformance_rate']}%)")

print(f"\n📈 By Conformance Level")
for level, count in summary['by_level'].items():
    emoji = "🔴" if "L0" in level else "🟡" if "L1" in level else "🟢" if "L2" in level else "⭐"
    print(f"   {emoji} {level}: {count}")

print(f"\n📁 By Component Type")
for comp_type, levels in summary['by_type'].items():
    total = sum(levels.values())
    conforming = levels.get('L2_TYPED', 0) + levels.get('L3_FULL', 0)
    rate = round(conforming / total * 100) if total > 0 else 0
    status = "✓" if rate >= 80 else "⚠" if rate >= 50 else "✗"
    print(f"   {status} {comp_type}: {conforming}/{total} ({rate}%)")

# Show non-conforming
non_conforming = audit['non_conforming']
if non_conforming:
    print(f"\n⚠️  Non-Conforming Components ({len(non_conforming)})")
    for item in non_conforming[:20]:  # Show first 20
        level = item['level'].replace('_', ' ')
        print(f"   ✗ [{level}] {item['path']}")
        if item['missing_fields'] and verbose:
            print(f"      Missing: {', '.join(item['missing_fields'][:5])}")
    if len(non_conforming) > 20:
        print(f"   ... and {len(non_conforming) - 20} more")

print("\n" + "=" * 70)

# Exit status recommendation
if summary['conformance_rate'] >= 80:
    print("✅ PASS - Conformance rate above 80%")
    return 0
elif summary['conformance_rate'] >= 50:
    print("⚠️  WARN - Conformance rate below 80%")
    return 1
else:
    print("❌ FAIL - Conformance rate below 50%")
    return 2

def main(): parser = argparse.ArgumentParser( description="Validate component frontmatter against ADR-018", epilog="Part of CODITECT Agentic Documentation Standard" ) parser.add_argument( "--type", choices=['agent', 'command', 'skill', 'script', 'hook', 'guide', 'reference', 'workflow'], help="Validate specific component type only" ) parser.add_argument( "--strict", action="store_true", help="Exit with error if conformance rate below 80 percent" ) parser.add_argument( "--json", action="store_true", help="Output as JSON" ) parser.add_argument( "--verbose", "-v", action="store_true", help="Show detailed missing fields" ) parser.add_argument( "--file", type=Path, help="Validate a single file" ) args = parser.parse_args()

# Find project root
script_path = Path(__file__).resolve()
project_root = script_path.parent.parent

validator = FrontmatterValidator(project_root)

if args.file:
    # Single file validation
    result = validator.validate_component(args.file)
    if args.json:
        print(json.dumps(result.to_dict(), indent=2))
    else:
        print(f"\nFile: {result.path}")
        print(f"Type: {result.component_type}")
        print(f"Level: {result.level.name}")
        if result.missing_fields:
            print(f"Missing: {', '.join(result.missing_fields)}")
        if result.errors:
            print(f"Errors: {', '.join(result.errors)}")
        if result.warnings:
            print(f"Warnings: {', '.join(result.warnings)}")
    sys.exit(0 if result.level.value >= 2 else 1)

# Full audit
audit = validator.audit_all(component_type=args.type)

if args.json:
    print(json.dumps(audit, indent=2))
    sys.exit(0)

exit_code = print_report(audit, verbose=args.verbose)

if args.strict and audit['summary']['conformance_rate'] < 80:
    sys.exit(1)

sys.exit(exit_code)

if name == "main": main()

#!/usr/bin/env python3 """​

#!/usr/bin/env python3 """