Skip to main content

scripts-inject-frontmatter

#!/usr/bin/env python3 """

title: "Inject Frontmatter" component_type: script version: "1.0.0" audience: contributor status: stable summary: "CODITECT Frontmatter Injection Script for ADR-018 compliance" keywords: ['frontmatter', 'automation', 'adr-018', 'standardization'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "inject-frontmatter.py" language: python executable: true usage: "python3 scripts/inject-frontmatter.py [options]" python_version: "3.10+" dependencies: [] modifies_files: true network_access: false requires_auth: false

CODITECT Frontmatter Injection Script

Automatically adds ADR-018 compliant YAML frontmatter to components that are missing it.

Usage: python3 scripts/inject-frontmatter.py --type agent # Process agents only python3 scripts/inject-frontmatter.py --type all # Process all types python3 scripts/inject-frontmatter.py --dry-run # Preview changes python3 scripts/inject-frontmatter.py --file PATH # Process single file

Author: AZ1.AI INC Version: 1.0.0 ADR: ADR-018-AGENTIC-DOCUMENTATION-STANDARD """

import argparse import re import sys from datetime import date from pathlib import Path from typing import Dict, List, Optional, Tuple

class FrontmatterInjector: """Inject standardized frontmatter into components."""

COMPONENT_PATTERNS = {
'agent': ['agents/*.md'],
'command': ['commands/*.md'],
'skill': ['skills/*/SKILL.md', 'skills/*/*.md'],
'script': ['scripts/*.py'],
'hook': ['hooks/*.py', 'hooks/*.md'],
'prompt': ['prompts/*.md'],
'workflow': ['docs/workflows/*.md'],
'guide': ['docs/guides/*.md', 'docs/getting-started/*.md'],
'reference': ['docs/reference/*.md'],
# ADR-213: ADRs now in coditect-documentation; keep local path as fallback
'adr': ['../../../docs/coditect-documentation/coditect-core/adrs/*.md',
'internal/architecture/adrs/*.md']
}

# MoE role inference from keywords
MOE_KEYWORDS = {
'orchestrator': ['orchestrat', 'coordinat', 'multi-agent', 'workflow'],
'analyst': ['analy', 'research', 'investigat', 'assess', 'audit'],
'judge': ['judge', 'review', 'evaluat', 'verdict', 'council'],
'specialist': [] # default
}

# Domain inference from keywords
DOMAIN_KEYWORDS = {
'security': ['security', 'vulnerab', 'penetrat', 'audit', 'compliance'],
'development': ['develop', 'code', 'implement', 'build', 'engineer'],
'qa': ['test', 'qa', 'quality', 'validation'],
'devops': ['devops', 'deploy', 'ci/cd', 'pipeline', 'docker', 'k8s'],
'documentation': ['document', 'doc', 'guide', 'tutorial', 'reference'],
'research': ['research', 'analy', 'investigat', 'study']
}

def __init__(self, project_root: Path, dry_run: bool = False):
self.project_root = project_root
self.dry_run = dry_run
self.today = date.today().isoformat()

def has_frontmatter(self, content: str) -> bool:
"""Check if content already has YAML frontmatter."""
return content.strip().startswith('---')

def is_adr018_compliant(self, content: str) -> bool:
"""Check if frontmatter is ADR-018 compliant."""
if not self.has_frontmatter(content):
return False
# ADR-018 requires component_type field
return 'component_type:' in content[:2000]

def parse_old_frontmatter(self, content: str) -> Tuple[Dict, str]:
"""Parse old-format frontmatter and return (data, body)."""
if not content.startswith('---'):
return ({}, content)

parts = content.split('---', 2)
if len(parts) < 3:
return ({}, content)

frontmatter_text = parts[1]
body = parts[2]

# Simple YAML-like parsing
data = {}
current_key = None
current_list = None

for line in frontmatter_text.split('\n'):
line = line.rstrip()
if not line or line.startswith('#'):
continue

# Check for list item
if line.startswith(' - ') and current_key:
if current_list is None:
current_list = []
data[current_key] = current_list
current_list.append(line.strip()[2:].strip())
continue

# Check for nested key (indented)
if line.startswith(' ') and ':' in line and not line.strip().startswith('-'):
continue # Skip nested structures for now

# Check for key: value
if ':' in line and not line.startswith(' '):
current_list = None
key, _, value = line.partition(':')
key = key.strip()
value = value.strip().strip('"').strip("'")
current_key = key
if value:
data[key] = value

return (data, body)

def extract_title(self, content: str, filename: str) -> str:
"""Extract title from first heading or generate from filename."""
# Try to find first markdown heading
match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if match:
return match.group(1).strip()

# Generate from filename
name = Path(filename).stem
# Convert kebab-case or snake_case to Title Case
name = name.replace('-', ' ').replace('_', ' ')
return name.title()

def extract_summary(self, content: str) -> str:
"""Extract summary from first paragraph after heading."""
# Skip frontmatter if present
if content.startswith('---'):
parts = content.split('---', 2)
if len(parts) >= 3:
content = parts[2]

# Find first non-heading paragraph
lines = content.strip().split('\n')
paragraph_lines = []
in_paragraph = False

for line in lines:
line = line.strip()
if not line:
if in_paragraph:
break
continue
if line.startswith('#'):
continue
if line.startswith('```'):
break
if line.startswith('|') or line.startswith('-'):
continue

in_paragraph = True
paragraph_lines.append(line)

if paragraph_lines:
summary = ' '.join(paragraph_lines)
# Truncate to ~100 chars
if len(summary) > 100:
summary = summary[:97] + '...'
return summary

return "Component description pending"

def extract_keywords(self, content: str, filename: str) -> List[str]:
"""Extract keywords from content and filename."""
keywords = set()

# From filename
name = Path(filename).stem.lower()
parts = re.split(r'[-_]', name)
keywords.update(p for p in parts if len(p) > 2)

# Common technical terms from content
content_lower = content.lower()
tech_terms = [
'api', 'database', 'frontend', 'backend', 'security', 'testing',
'deployment', 'docker', 'kubernetes', 'git', 'ci/cd', 'automation',
'validation', 'analysis', 'review', 'generation', 'optimization'
]
for term in tech_terms:
if term in content_lower:
keywords.add(term)

return sorted(list(keywords))[:5]

def infer_moe_role(self, content: str, filename: str) -> str:
"""Infer MoE role from content and filename."""
text = (content + ' ' + filename).lower()

for role, keywords in self.MOE_KEYWORDS.items():
for kw in keywords:
if kw in text:
return role

return 'specialist'

def infer_domains(self, content: str, filename: str) -> List[str]:
"""Infer domains from content and filename."""
text = (content + ' ' + filename).lower()
domains = []

for domain, keywords in self.DOMAIN_KEYWORDS.items():
for kw in keywords:
if kw in text:
domains.append(domain)
break

return domains if domains else ['development']

def infer_agent_type(self, content: str, filename: str) -> str:
"""Infer agent type from content."""
text = (content + ' ' + filename).lower()

if any(x in text for x in ['orchestrat', 'coordinat', 'multi-agent']):
return 'orchestrator'
if any(x in text for x in ['review', 'evaluat', 'audit']):
return 'reviewer'
if any(x in text for x in ['generat', 'creat', 'build']):
return 'generator'
if any(x in text for x in ['analy', 'research', 'investigat']):
return 'analyst'
if any(x in text for x in ['judge', 'verdict', 'score']):
return 'judge'

return 'specialist'

def generate_agent_frontmatter(self, content: str, filepath: Path) -> str:
"""Generate frontmatter for an agent."""
# Use old frontmatter data if available
old = getattr(self, '_old_frontmatter', {})

# Extract title - prefer old 'name' field, then heading, then filename
if 'name' in old:
title = old['name'].replace('-', ' ').title()
else:
title = self.extract_title(content, filepath.name)

# Use old description if available
if 'description' in old:
summary = old['description'][:100]
if len(old['description']) > 100:
summary = summary[:97] + '...'
else:
summary = self.extract_summary(content)

keywords = self.extract_keywords(content, filepath.name)
moe_role = self.infer_moe_role(content, filepath.name)
domains = self.infer_domains(content, filepath.name)
agent_type = self.infer_agent_type(content, filepath.name)

# Use old model info
model = old.get('model', 'sonnet')

# Use old tools if available
tools = old.get('tools', 'Read, Write, Edit, Bash, Grep, Glob, TodoWrite')

# Generate invocation pattern
agent_name = old.get('name', filepath.stem)
invocation = f"Task(subagent_type='{agent_name}', prompt='...')"

frontmatter = f"""---

title: "{title}" component_type: agent version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~2000 created: {self.today} updated: {self.today} agent_type: {agent_type} domain: {domains} moe_role: {moe_role} moe_capabilities:

  • specialized_analysis
  • task_execution invocation_pattern: "{invocation}" requires_context: true model: {model} tools: "{tools}" quality_score: 75 last_reviewed: {self.today}

""" return frontmatter

def generate_command_frontmatter(self, content: str, filepath: Path) -> str:
"""Generate frontmatter for a command."""
title = self.extract_title(content, filepath.name)
summary = self.extract_summary(content)
keywords = self.extract_keywords(content, filepath.name)

command_name = '/' + filepath.stem

frontmatter = f"""---

title: "{title}" component_type: command version: "1.0.0" audience: customer status: stable summary: "{summary}" keywords: {keywords} tokens: ~1500 created: {self.today} updated: {self.today} command_name: "{command_name}" aliases: [] usage: "{command_name} [options]" requires_confirmation: false modifies_files: false network_access: false

""" return frontmatter

def generate_skill_frontmatter(self, content: str, filepath: Path) -> str:
"""Generate frontmatter for a skill."""
title = self.extract_title(content, filepath.name)
summary = self.extract_summary(content)
keywords = self.extract_keywords(content, filepath.name)

skill_name = filepath.parent.name if filepath.name == 'SKILL.md' else filepath.stem

frontmatter = f"""---

title: "{title}" component_type: skill version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~3000 created: {self.today} updated: {self.today} skill_name: "{skill_name}" skill_category: pattern when_to_use: "When implementing {skill_name.replace('-', ' ')} patterns" when_not_to_use: "When simpler approaches suffice" composes_with: [] requires: []

""" return frontmatter

def generate_script_frontmatter(self, content: str, filepath: Path) -> str:
"""Generate frontmatter for a Python script (as docstring)."""
title = self.extract_title(content, filepath.name)
summary = self.extract_summary(content)
keywords = self.extract_keywords(content, filepath.name)

# For Python, we add to docstring, not YAML block
frontmatter = f'''"""

title: "{title}" component_type: script version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~500 created: {self.today} updated: {self.today} script_name: "{filepath.name}" language: python executable: true usage: "python3 scripts/{filepath.name} [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

''' return frontmatter

def generate_guide_frontmatter(self, content: str, filepath: Path) -> str:
"""Generate frontmatter for a guide."""
title = self.extract_title(content, filepath.name)
summary = self.extract_summary(content)
keywords = self.extract_keywords(content, filepath.name)

frontmatter = f"""---

title: "{title}" component_type: guide version: "1.0.0" audience: customer status: stable summary: "{summary}" keywords: {keywords} tokens: ~3000 created: {self.today} updated: {self.today} doc_type: guide when_to_read: "When learning about {filepath.stem.replace('-', ' ')}" prerequisites: [] next_steps: [] reading_time: "10 minutes"

""" return frontmatter

def generate_adr_frontmatter(self, content: str, filepath: Path) -> str:
"""Generate frontmatter for an ADR."""
title = self.extract_title(content, filepath.name)
summary = self.extract_summary(content)
keywords = self.extract_keywords(content, filepath.name)

# Extract ADR number if present
match = re.match(r'ADR-(\d+)', filepath.stem)
adr_number = match.group(1) if match else '000'

frontmatter = f"""---

""" return frontmatter

def generate_generic_frontmatter(self, content: str, filepath: Path, component_type: str) -> str:
"""Generate generic frontmatter for other types."""
title = self.extract_title(content, filepath.name)
summary = self.extract_summary(content)
keywords = self.extract_keywords(content, filepath.name)

frontmatter = f"""---

title: "{title}" component_type: {component_type} version: "1.0.0" audience: contributor status: stable summary: "{summary}" keywords: {keywords} tokens: ~1500 created: {self.today} updated: {self.today}

""" return frontmatter

def inject_frontmatter(self, filepath: Path, component_type: str) -> Tuple[bool, str]:
"""Inject frontmatter into a file."""
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()

# Skip README files
if filepath.name.lower() == 'readme.md':
return (False, "Skipped README")

# Check if already ADR-018 compliant
if self.is_adr018_compliant(content):
return (False, "Already ADR-018 compliant")

# Parse old frontmatter if present
old_data = {}
body = content
if self.has_frontmatter(content):
old_data, body = self.parse_old_frontmatter(content)

# Use old data to enrich new frontmatter generation
# Store old data for generator methods to use
self._old_frontmatter = old_data

# Generate appropriate frontmatter
if component_type == 'agent':
frontmatter = self.generate_agent_frontmatter(content, filepath)
elif component_type == 'command':
frontmatter = self.generate_command_frontmatter(content, filepath)
elif component_type == 'skill':
frontmatter = self.generate_skill_frontmatter(content, filepath)
elif component_type == 'script':
frontmatter = self.generate_script_frontmatter(content, filepath)
elif component_type == 'guide':
frontmatter = self.generate_guide_frontmatter(content, filepath)
elif component_type == 'adr':
frontmatter = self.generate_adr_frontmatter(content, filepath)
else:
frontmatter = self.generate_generic_frontmatter(content, filepath, component_type)

# For Python scripts, handle differently
if component_type == 'script' and filepath.suffix == '.py':
# Check if already has docstring with frontmatter
if '---' in content[:500] and 'component_type:' in content[:1000]:
return (False, "Already has frontmatter in docstring")

# Check for shebang
if content.startswith('#!'):
lines = content.split('\n', 1)
shebang = lines[0] + '\n'
rest = lines[1] if len(lines) > 1 else ''

# Check for existing docstring
if rest.strip().startswith('"""'):
# Insert into existing docstring
new_content = shebang + frontmatter + rest.lstrip().lstrip('"""')
else:
new_content = shebang + frontmatter + '"""\n' + rest
else:
new_content = frontmatter + '"""\n' + content
else:
# Use body (stripped of old frontmatter) for markdown files
new_content = frontmatter + body.lstrip()

if not self.dry_run:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(new_content)

return (True, "Frontmatter injected")

except Exception as e:
return (False, f"Error: {str(e)}")

def discover_files(self, component_type: str) -> List[Path]:
"""Discover files for a component type."""
if component_type == 'all':
files = []
for ctype, patterns in self.COMPONENT_PATTERNS.items():
for pattern in patterns:
files.extend(self.project_root.glob(pattern))
return files

patterns = self.COMPONENT_PATTERNS.get(component_type, [])
files = []
for pattern in patterns:
files.extend(self.project_root.glob(pattern))
return files

def process_type(self, component_type: str) -> Dict[str, int]:
"""Process all files of a component type."""
files = self.discover_files(component_type)
results = {"processed": 0, "skipped": 0, "errors": 0}

for filepath in sorted(files):
if filepath.name.startswith('.'):
continue

# Determine actual component type for 'all'
actual_type = component_type
if component_type == 'all':
for ctype, patterns in self.COMPONENT_PATTERNS.items():
for pattern in patterns:
if filepath.match(pattern):
actual_type = ctype
break
if actual_type != 'all':
break

success, message = self.inject_frontmatter(filepath, actual_type)

if success:
results["processed"] += 1
print(f" ✓ {filepath.relative_to(self.project_root)}")
elif "Error" in message:
results["errors"] += 1
print(f" ✗ {filepath.relative_to(self.project_root)}: {message}")
else:
results["skipped"] += 1

return results

def main(): parser = argparse.ArgumentParser( description="Inject ADR-018 compliant frontmatter into components", epilog="Part of CODITECT standardization pipeline" ) parser.add_argument("--type", choices=['agent', 'command', 'skill', 'script', 'hook', 'prompt', 'workflow', 'guide', 'reference', 'adr', 'all'], default='all', help="Component type to process") parser.add_argument("--file", type=Path, help="Process single file") parser.add_argument("--dry-run", action="store_true", help="Preview without changes") args = parser.parse_args()

# Find project root
script_path = Path(__file__).resolve()
project_root = script_path.parent.parent

injector = FrontmatterInjector(project_root, args.dry_run)

if args.dry_run:
print("🔍 DRY RUN - No files will be modified\n")

if args.file:
# Process single file
success, message = injector.inject_frontmatter(args.file, args.type)
print(f"{'✓' if success else '✗'} {args.file}: {message}")
else:
# Process by type
print(f"🔧 Processing {args.type} components...\n")
results = injector.process_type(args.type)

print(f"\n📊 Results:")
print(f" Processed: {results['processed']}")
print(f" Skipped: {results['skipped']}")
print(f" Errors: {results['errors']}")

if name == "main": main()