Skip to main content

#!/usr/bin/env python3 """ CODITECT Agent Validation Test Suite

Comprehensive tests to verify all 130+ agents have correct configuration, valid frontmatter, proper invocation patterns, and can be loaded by invoke-agent.py.

Run: python3 scripts/tests/test_agents.py python3 scripts/tests/test_agents.py -v # Verbose python3 scripts/tests/test_agents.py --agent git-workflow-orchestrator # Single agent

Author: CODITECT Team Version: 1.0.0 Created: 2025-12-22 """

import os import re import sys import json import argparse from pathlib import Path from typing import Dict, List, Tuple, Optional from dataclasses import dataclass from datetime import datetime

Colors for terminal output

Shared Colors module (consolidates 36 duplicate definitions)

_script_dir = Path(file).parent.parent # tests/ -> scripts/ sys.path.insert(0, str(_script_dir / "core")) from colors import Colors

class TestResult: """Result of a single test""" passed: bool message: str agent: str test_name: str

@dataclass class AgentValidation: """Validation results for an agent""" name: str path: str passed: bool tests: List[TestResult] errors: List[str] warnings: List[str]

class AgentTestSuite: """Comprehensive test suite for CODITECT agents"""

# Required frontmatter fields
REQUIRED_FIELDS = ['title', 'component_type', 'version', 'status']

# Recommended frontmatter fields
RECOMMENDED_FIELDS = ['summary', 'keywords', 'invocation_pattern', 'model', 'tools']

# Valid status values
VALID_STATUSES = ['stable', 'beta', 'experimental', 'deprecated', 'draft']

# Valid model values
VALID_MODELS = ['sonnet', 'opus', 'haiku', 'claude-3', 'claude-3.5-sonnet']

# Claude Code built-in subagent types (should NOT be used in invocation_pattern for custom agents)
CLAUDE_CODE_BUILTIN_TYPES = [
'general-purpose', 'Explore', 'Plan', 'statusline-setup', 'claude-code-guide',
'codebase-pattern-finder', 'orchestrator', 'thoughts-analyzer',
'software-design-document-specialist', 'educational-content-generator',
'web-search-researcher', 'assessment-creation-agent', 'orchestrator-code-review',
'rust-qa-specialist', 'websocket-protocol-designer', 'venture-capital-business-analyst',
'actix-web-specialist', 'testing-specialist', 'database-architect',
'rust-expert-developer', 'security-specialist', 'thoughts-locator',
'multi-tenant-architect', 'skill-quality-enhancer', 'codi-qa-specialist',
'script-utility-analyzer', 'devops-engineer', 'codi-devops-engineer',
'cloud-architect-code-reviewer', 'novelty-detection-specialist',
'ai-curriculum-specialist', 'codebase-analyzer', 'research-agent',
'cloud-architect', 'software-design-architect', 'monitoring-specialist',
'project-organizer', 'codi-test-engineer', 'terminal-integration-specialist',
'coditect-adr-specialist', 'frontend-react-typescript-expert',
'business-intelligence-analyst', 'k8s-statefulset-specialist',
'codebase-locator', 'wasm-optimization-expert', 'qa-reviewer',
'foundationdb-expert', 'codi-documentation-writer', 'ai-specialist',
'competitive-market-analyst', 'adr-compliance-specialist', 'senior-architect'
]

def __init__(self, framework_root: Path, verbose: bool = False):
self.framework_root = framework_root
self.agents_dir = framework_root / "agents"
self.verbose = verbose
self.results: List[AgentValidation] = []
self.total_tests = 0
self.passed_tests = 0
self.failed_tests = 0

def log(self, message: str, level: str = "info"):
"""Log message with color"""
if level == "pass":
print(f" {Colors.GREEN}✓{Colors.RESET} {message}")
elif level == "fail":
print(f" {Colors.RED}✗{Colors.RESET} {message}")
elif level == "warn":
print(f" {Colors.YELLOW}⚠{Colors.RESET} {message}")
elif level == "info" and self.verbose:
print(f" {Colors.BLUE}ℹ{Colors.RESET} {message}")

def parse_frontmatter(self, content: str) -> Tuple[Dict, str]:
"""Parse YAML frontmatter from markdown content"""
match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)

if not match:
return {}, content.strip()

frontmatter_text = match.group(1)
body = match.group(2).strip()

# Simple YAML parsing
frontmatter = {}
current_key = None
current_list = None

for line in frontmatter_text.split('\n'):
# Skip empty lines
if not line.strip():
continue

# Check for list item
if line.startswith(' - ') and current_key:
if current_list is None:
current_list = []
current_list.append(line.strip()[2:])
frontmatter[current_key] = current_list
continue

# Check for key-value pair
if ':' in line and not line.startswith(' '):
# Save previous list if exists
if current_list is not None:
frontmatter[current_key] = current_list
current_list = None

key, value = line.split(':', 1)
key = key.strip()
value = value.strip().strip('"').strip("'")

if value:
frontmatter[key] = value
current_key = None
else:
current_key = key
current_list = None

return frontmatter, body

def test_frontmatter_required_fields(self, frontmatter: Dict, agent_name: str) -> List[TestResult]:
"""Test that required frontmatter fields are present"""
results = []

for field in self.REQUIRED_FIELDS:
passed = field in frontmatter and frontmatter[field]
results.append(TestResult(
passed=passed,
message=f"Has required field '{field}'" if passed else f"Missing required field '{field}'",
agent=agent_name,
test_name=f"required_field_{field}"
))

return results

def test_frontmatter_recommended_fields(self, frontmatter: Dict, agent_name: str) -> List[TestResult]:
"""Test that recommended frontmatter fields are present"""
results = []

for field in self.RECOMMENDED_FIELDS:
passed = field in frontmatter and frontmatter[field]
results.append(TestResult(
passed=passed,
message=f"Has recommended field '{field}'" if passed else f"Missing recommended field '{field}'",
agent=agent_name,
test_name=f"recommended_field_{field}"
))

return results

def test_component_type(self, frontmatter: Dict, agent_name: str) -> TestResult:
"""Test that component_type is 'agent'"""
component_type = frontmatter.get('component_type', '')
passed = component_type == 'agent'
return TestResult(
passed=passed,
message=f"component_type is 'agent'" if passed else f"component_type is '{component_type}', expected 'agent'",
agent=agent_name,
test_name="component_type_agent"
)

def test_valid_status(self, frontmatter: Dict, agent_name: str) -> TestResult:
"""Test that status is a valid value"""
status = frontmatter.get('status', '')
passed = status in self.VALID_STATUSES
return TestResult(
passed=passed,
message=f"Status '{status}' is valid" if passed else f"Status '{status}' is invalid (valid: {self.VALID_STATUSES})",
agent=agent_name,
test_name="valid_status"
)

def test_valid_model(self, frontmatter: Dict, agent_name: str) -> TestResult:
"""Test that model is a valid value if specified"""
model = frontmatter.get('model', 'sonnet') # Default is sonnet
passed = model in self.VALID_MODELS
return TestResult(
passed=passed,
message=f"Model '{model}' is valid" if passed else f"Model '{model}' is invalid (valid: {self.VALID_MODELS})",
agent=agent_name,
test_name="valid_model"
)

def test_invocation_pattern(self, frontmatter: Dict, agent_name: str) -> TestResult:
"""Test that invocation_pattern uses correct format"""
pattern = frontmatter.get('invocation_pattern', '')

if not pattern:
return TestResult(
passed=False,
message="Missing invocation_pattern",
agent=agent_name,
test_name="invocation_pattern"
)

# Check for INCORRECT pattern: Task(subagent_type='agent-name', ...)
# This is wrong because custom agents are NOT Claude Code built-in types
incorrect_pattern = re.search(r"subagent_type=['\"]" + re.escape(agent_name) + r"['\"]", pattern)

if incorrect_pattern:
return TestResult(
passed=False,
message=f"INCORRECT invocation pattern! Uses '{agent_name}' as subagent_type, but custom agents must use 'general-purpose'",
agent=agent_name,
test_name="invocation_pattern"
)

# Check for CORRECT pattern: /agent or general-purpose
correct_patterns = [
f"/agent {agent_name}",
"subagent_type='general-purpose'",
'subagent_type="general-purpose"',
]

has_correct = any(p in pattern for p in correct_patterns)

if has_correct:
return TestResult(
passed=True,
message="Invocation pattern uses correct '/agent' or 'general-purpose' format",
agent=agent_name,
test_name="invocation_pattern"
)
else:
return TestResult(
passed=False,
message=f"Invocation pattern may be incorrect: {pattern[:80]}...",
agent=agent_name,
test_name="invocation_pattern"
)

def test_has_system_prompt(self, body: str, agent_name: str) -> TestResult:
"""Test that agent has a system prompt (body content)"""
# Check that body has substantial content (more than just headers)
lines = [l for l in body.split('\n') if l.strip() and not l.startswith('#')]
passed = len(lines) >= 5 # At least 5 non-header lines

return TestResult(
passed=passed,
message=f"Has system prompt ({len(lines)} content lines)" if passed else f"System prompt too short ({len(lines)} lines)",
agent=agent_name,
test_name="has_system_prompt"
)

def test_no_placeholder_content(self, content: str, agent_name: str) -> TestResult:
"""Test that agent doesn't have placeholder content"""
# Precise placeholder patterns to avoid false positives
# Note: [Additional...] is legitimate example content, not a placeholder
placeholders = [
'TODO:', 'FIXME:', 'XXX:',
'[INSERT ', '[INSERT]', '[INSERT:',
'[ADD HERE', '[ADD YOUR', '[ADD THE',
'[PLACEHOLDER', '[TBD]', '[TBD ',
'lorem ipsum', '<!-- TODO', '<!-- FIXME'
]

found = []
content_lower = content.lower()
for p in placeholders:
if p.lower() in content_lower:
found.append(p)

passed = len(found) == 0
return TestResult(
passed=passed,
message="No placeholder content found" if passed else f"Found placeholders: {found}",
agent=agent_name,
test_name="no_placeholders"
)

def test_title_matches_filename(self, frontmatter: Dict, agent_name: str) -> TestResult:
"""Test that title somewhat matches the filename"""
title = frontmatter.get('title', '').lower().replace(' ', '-').replace('_', '-')
agent_lower = agent_name.lower()

# Check if there's reasonable overlap
title_words = set(title.split('-'))
agent_words = set(agent_lower.split('-'))
overlap = title_words & agent_words

passed = len(overlap) >= 1 or title in agent_lower or agent_lower in title
return TestResult(
passed=passed,
message="Title matches filename" if passed else f"Title '{frontmatter.get('title')}' doesn't match filename '{agent_name}'",
agent=agent_name,
test_name="title_matches_filename"
)

def validate_agent(self, agent_path: Path) -> AgentValidation:
"""Run all validation tests on a single agent"""
agent_name = agent_path.stem
tests = []
errors = []
warnings = []

try:
content = agent_path.read_text(encoding='utf-8')
frontmatter, body = self.parse_frontmatter(content)

# Run all tests
tests.extend(self.test_frontmatter_required_fields(frontmatter, agent_name))
tests.append(self.test_component_type(frontmatter, agent_name))
tests.append(self.test_valid_status(frontmatter, agent_name))
tests.append(self.test_valid_model(frontmatter, agent_name))
tests.append(self.test_invocation_pattern(frontmatter, agent_name))
tests.append(self.test_has_system_prompt(body, agent_name))
tests.append(self.test_no_placeholder_content(content, agent_name))
tests.append(self.test_title_matches_filename(frontmatter, agent_name))

# Recommended field tests (warnings only)
for result in self.test_frontmatter_recommended_fields(frontmatter, agent_name):
if not result.passed:
warnings.append(result.message)

except Exception as e:
errors.append(f"Failed to parse agent: {str(e)}")
tests.append(TestResult(
passed=False,
message=f"Parse error: {str(e)}",
agent=agent_name,
test_name="parse_agent"
))

passed = all(t.passed for t in tests) and len(errors) == 0
return AgentValidation(
name=agent_name,
path=str(agent_path),
passed=passed,
tests=tests,
errors=errors,
warnings=warnings
)

def run_all_tests(self, single_agent: Optional[str] = None) -> bool:
"""Run tests on all agents or a single agent"""
print(f"\n{Colors.BOLD}CODITECT Agent Validation Test Suite{Colors.RESET}")
print("=" * 50)

# Find all agent files
if single_agent:
agent_files = [self.agents_dir / f"{single_agent}.md"]
if not agent_files[0].exists():
print(f"{Colors.RED}Agent not found: {single_agent}{Colors.RESET}")
return False
else:
agent_files = sorted([
f for f in self.agents_dir.glob("*.md")
if f.name != "README.md"
])

print(f"\nTesting {len(agent_files)} agents...\n")

# Run tests on each agent
for agent_path in agent_files:
validation = self.validate_agent(agent_path)
self.results.append(validation)

# Print results
status = f"{Colors.GREEN}PASS{Colors.RESET}" if validation.passed else f"{Colors.RED}FAIL{Colors.RESET}"
print(f"{validation.name}: {status}")

for test in validation.tests:
self.total_tests += 1
if test.passed:
self.passed_tests += 1
if self.verbose:
self.log(test.message, "pass")
else:
self.failed_tests += 1
self.log(test.message, "fail")

for error in validation.errors:
self.log(error, "fail")

if self.verbose:
for warning in validation.warnings:
self.log(warning, "warn")

# Print summary
self.print_summary()

return self.failed_tests == 0

def print_summary(self):
"""Print test summary"""
print("\n" + "=" * 50)
print(f"{Colors.BOLD}Test Summary{Colors.RESET}")
print("=" * 50)

total_agents = len(self.results)
passed_agents = sum(1 for r in self.results if r.passed)
failed_agents = total_agents - passed_agents

print(f"\nAgents: {passed_agents}/{total_agents} passed")
print(f"Tests: {self.passed_tests}/{self.total_tests} passed")

if failed_agents > 0:
print(f"\n{Colors.RED}Failed Agents:{Colors.RESET}")
for result in self.results:
if not result.passed:
print(f" - {result.name}")
for test in result.tests:
if not test.passed:
print(f" {Colors.RED}✗{Colors.RESET} {test.message}")

# Overall result
if failed_agents == 0:
print(f"\n{Colors.GREEN}{Colors.BOLD}All agents passed validation!{Colors.RESET}")
else:
print(f"\n{Colors.RED}{Colors.BOLD}{failed_agents} agent(s) failed validation{Colors.RESET}")

def export_results(self, output_path: Path):
"""Export results to JSON"""
data = {
"timestamp": datetime.now().isoformat(),
"summary": {
"total_agents": len(self.results),
"passed_agents": sum(1 for r in self.results if r.passed),
"failed_agents": sum(1 for r in self.results if not r.passed),
"total_tests": self.total_tests,
"passed_tests": self.passed_tests,
"failed_tests": self.failed_tests
},
"agents": [
{
"name": r.name,
"path": r.path,
"passed": r.passed,
"tests": [
{"name": t.test_name, "passed": t.passed, "message": t.message}
for t in r.tests
],
"errors": r.errors,
"warnings": r.warnings
}
for r in self.results
]
}

with open(output_path, 'w') as f:
json.dump(data, f, indent=2)
print(f"\nResults exported to: {output_path}")

def main(): parser = argparse.ArgumentParser(description="CODITECT Agent Validation Test Suite") parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output') parser.add_argument('--agent', type=str, help='Test single agent by name') parser.add_argument('--export', type=str, help='Export results to JSON file') args = parser.parse_args()

# Find framework root
script_path = Path(__file__).resolve()
framework_root = script_path.parent.parent.parent # scripts/tests/ -> scripts -> coditect-core

# Verify framework root
if not (framework_root / "agents").exists():
print(f"Error: Could not find agents directory at {framework_root / 'agents'}")
sys.exit(1)

# Run tests
suite = AgentTestSuite(framework_root, verbose=args.verbose)
success = suite.run_all_tests(single_agent=args.agent)

# Export if requested
if args.export:
suite.export_results(Path(args.export))

sys.exit(0 if success else 1)

if name == "main": main()