Skip to main content

#!/usr/bin/env python3 """ CODITECT Agent QA Grader

Grades agents against CODITECT-STANDARD-AGENTS.md criteria.

Weights: File Format (20%), YAML Frontmatter (40%), Instruction Quality (30%), Documentation (10%) Bonus: Content Quality (10% bonus, capped at 100)

Usage: python3 scripts/qa/grade-agents.py [path] [--json output.json] [--verbose]

ADR-161: Component Quality Assurance Framework """

import os import sys import re import json import argparse from pathlib import Path

Add parent for qa_common import

sys.path.insert(0, os.path.dirname(file)) from qa_common import ( parse_frontmatter, count_words, content_quality_score, grade_from_score, compute_weighted_score, aggregate_results, output_results, parse_tools_field, validate_tools, VALID_TOOLS, VALID_MODELS )

CODITECT_CORE = Path(file).resolve().parents[2] AGENTS_DIR = CODITECT_CORE / "agents"

def grade_agent(filepath): """Grade a single agent file.""" filename = os.path.basename(filepath) agent_name = filename.replace('.md', '')

with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()

fm, body = parse_frontmatter(content)
body_lower = body.lower()
word_count = count_words(body)
scores = {}

# A. FILE FORMAT (20%)
scores['A1_filename_format'] = 1 if re.match(r'^[a-z0-9]+(-[a-z0-9]+)*\.md$', filename) else 0
scores['A2_name_matches_file'] = 1 if str(fm.get('name', '')) == agent_name else 0
scores['A3_name_length'] = 1 if len(agent_name) <= 64 else 0

# B. YAML FRONTMATTER (40%)
scores['B1_name_present'] = 1 if fm.get('name') else 0
desc = fm.get('description', '')
scores['B2_description_present'] = 1 if desc and len(str(desc).strip()) > 0 else 0
tools_list = parse_tools_field(fm.get('tools', ''))
scores['B3_tools_valid'] = 1 if validate_tools(tools_list) else 0
scores['B4_model_valid'] = 1 if str(fm.get('model', '')).lower() in VALID_MODELS else 0
scores['B5_desc_length_ok'] = 1 if len(str(desc)) <= 1024 else 0

# C. INSTRUCTION QUALITY (30%)
scores['C1_role_statement'] = 1 if re.search(r'you are\s+(a|an|the)\s+', body_lower[:500]) else 0
scores['C2_responsibilities'] = 1 if re.search(r'##\s*(core\s+)?responsibilities', body_lower) else 0
scores['C3_capabilities'] = 1 if re.search(r'##\s*capabilities', body_lower) and re.search(r'^###\s+', body, re.MULTILINE) else 0
scores['C4_invocation_examples'] = 1 if re.search(r'Task\(', body) else 0
scores['C5_limitations'] = 1 if re.search(r'##\s*(limitations|when\s+not\s+to\s+use)', body_lower) else 0
scores['C6_integration'] = 1 if re.search(r'##\s*(integration|related\s+components)', body_lower) else 0

# D. DOCUMENTATION (10%)
scores['D1_markdown_headings'] = 1 if re.search(r'^#\s+', body, re.MULTILINE) and re.search(r'^##\s+', body, re.MULTILINE) else 0
scores['D2_code_examples'] = 1 if re.search(r'```\w+', body) else 0
scores['D3_content_depth'] = 1 if word_count >= 200 else 0

# E. CONTENT QUALITY (bonus 10%)
cq = content_quality_score(body)
scores['E1_specificity'] = 1 if cq['specificity'] >= 0.02 else 0
scores['E2_code_runnable'] = 1 if cq['code_quality'] == 1 else 0
scores['E3_anti_patterns'] = 1 if re.search(r'##\s*anti.?patterns', body_lower) else 0
scores['E4_success_criteria'] = 1 if re.search(r'##\s*success\s+(output|criteria)', body_lower) else 0
scores['E5_not_stale'] = 1 if fm.get('updated') else 0

# Compute weighted scores
categories = [
('A_file_format', 20, ['A1_filename_format', 'A2_name_matches_file', 'A3_name_length']),
('B_frontmatter', 40, ['B1_name_present', 'B2_description_present', 'B3_tools_valid', 'B4_model_valid', 'B5_desc_length_ok']),
('C_instruction_quality', 30, ['C1_role_statement', 'C2_responsibilities', 'C3_capabilities', 'C4_invocation_examples', 'C5_limitations', 'C6_integration']),
('D_documentation', 10, ['D1_markdown_headings', 'D2_code_examples', 'D3_content_depth']),
]
total_base, category_scores = compute_weighted_score(scores, categories)

# Bonus
e_checks = ['E1_specificity', 'E2_code_runnable', 'E3_anti_patterns', 'E4_success_criteria', 'E5_not_stale']
e_score = sum(scores[k] for k in e_checks) / len(e_checks) * 10
total_enhanced = min(100, total_base + e_score)
category_scores['E_content_quality'] = round(e_score, 1)

return {
'name': agent_name,
'scores': scores,
'category_scores': category_scores,
'total_base': total_base,
'total_enhanced': round(total_enhanced, 1),
'grade': grade_from_score(total_base),
'word_count': word_count,
'track': fm.get('track', 'N/A'),
}

def main(): parser = argparse.ArgumentParser(description='Grade CODITECT agents') parser.add_argument('path', nargs='?', default=str(AGENTS_DIR), help='Agent file or directory') parser.add_argument('--json', dest='json_output', help='Output JSON to file') parser.add_argument('--verbose', action='store_true', help='Show detailed output') args = parser.parse_args()

target = Path(args.path)
if target.is_file():
agent_files = [target]
else:
agent_files = sorted([f for f in target.glob('*.md') if f.name != 'README.md'])

results = []
errors = []
for filepath in agent_files:
try:
results.append(grade_agent(str(filepath)))
except Exception as e:
errors.append({'file': filepath.name, 'error': str(e)})

data = aggregate_results(results, 'agents')
data['errors'] = errors

if args.json_output:
output_results(data, args.json_output, 'json')
output_results(data, format='summary')

if args.verbose:
sorted_results = sorted(results, key=lambda x: x['total_base'], reverse=True)
print(f"\nTOP 10:")
for r in sorted_results[:10]:
print(f" {r['grade']} {r['total_base']:5.1f}% | {r['name']}")
print(f"\nBOTTOM 10:")
for r in sorted_results[-10:]:
print(f" {r['grade']} {r['total_base']:5.1f}% | {r['name']}")

if name == 'main': main()