Skip to main content

#!/usr/bin/env python3 """ CODITECT Unified QA Grader (Orchestrator)

Discovers all components by type and runs type-specific graders. Produces unified JSON report and markdown dashboard.

Usage: python3 scripts/qa/grade-all.py [--type agents|skills|...] [--json output.json] [--verbose] [--report output.md]

ADR-161: Component Quality Assurance Framework """

import os import sys import json import argparse import importlib.util from pathlib import Path from datetime import datetime

sys.path.insert(0, os.path.dirname(file)) from qa_common import grade_from_score

CODITECT_CORE = Path(file).resolve().parents[2] QA_DIR = Path(file).resolve().parent

Map type names to grader modules and discovery paths

COMPONENT_TYPES = { 'agents': { 'module': 'grade-agents', 'dir': CODITECT_CORE / 'agents', 'pattern': '.md', 'exclude': ['README.md'], }, 'skills': { 'module': 'grade-skills', 'dir': CODITECT_CORE / 'skills', 'pattern': 'dirs', }, 'commands': { 'module': 'grade-commands', 'dir': CODITECT_CORE / 'commands', 'pattern': '.md', 'exclude': ['README.md'], }, 'hooks': { 'module': 'grade-hooks', 'dir': CODITECT_CORE / 'hooks', 'pattern': 'mixed', }, 'scripts': { 'module': 'grade-scripts', 'dir': CODITECT_CORE / 'scripts', 'pattern': 'recursive', }, 'workflows': { 'module': 'grade-workflows', 'dir': CODITECT_CORE / 'workflows', 'pattern': '*.md', }, 'tools': { 'module': 'grade-tools', 'dir': CODITECT_CORE / 'tools', 'pattern': 'dirs', }, }

def load_grader_module(module_name): """Dynamically load a grader module by name.""" module_path = QA_DIR / f"{module_name}.py" if not module_path.exists(): return None spec = importlib.util.spec_from_file_location(module_name.replace('-', '_'), module_path) mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) return mod

def run_grader(comp_type, config): """Run a type-specific grader and return results.""" module_name = config['module'] mod = load_grader_module(module_name) if mod is None: return {'component_type': comp_type, 'summary': {'total_components': 0, 'average_score': 0, 'grade_distribution': {}, 'errors': 1}, 'errors': [f'Module {module_name}.py not found'], 'components': [], 'attribute_pass_rates': {}}

# Each grader's main() prints to stdout; we'll call the grade functions directly
comp_dir = config['dir']
if not comp_dir.exists():
return {'component_type': comp_type, 'summary': {'total_components': 0, 'average_score': 0, 'grade_distribution': {}, 'errors': 0}, 'errors': [], 'components': [], 'attribute_pass_rates': {}}

results = []
errors = []

if comp_type == 'agents':
files = sorted([f for f in comp_dir.glob('*.md') if f.name != 'README.md'])
for f in files:
try:
results.append(mod.grade_agent(str(f)))
except Exception as e:
errors.append({'file': f.name, 'error': str(e)})

elif comp_type == 'skills':
dirs = sorted([d for d in comp_dir.iterdir() if d.is_dir() and (d / 'SKILL.md').exists()])
for d in dirs:
try:
r = mod.grade_skill(d)
if r:
results.append(r)
except Exception as e:
errors.append({'file': d.name, 'error': str(e)})

elif comp_type == 'commands':
files = sorted([f for f in comp_dir.glob('*.md') if f.name != 'README.md'])
for f in files:
try:
results.append(mod.grade_command(str(f)))
except Exception as e:
errors.append({'file': f.name, 'error': str(e)})

elif comp_type == 'hooks':
files = sorted([
f for f in comp_dir.iterdir()
if f.is_file() and f.suffix in {'.py', '.sh', '.md'}
and f.name != 'README.md' and f.name != '__init__.py'
and not f.name.startswith('__')
])
for f in files:
try:
results.append(mod.grade_hook(str(f)))
except Exception as e:
errors.append({'file': f.name, 'error': str(e)})

elif comp_type == 'scripts':
skip_dirs = {'__pycache__', 'agents', 'qa', 'core', 'moe_classifier', 'context-storage'}
files = sorted([
f for f in comp_dir.rglob('*')
if f.is_file() and f.suffix in {'.py', '.sh'}
and f.name != '__init__.py'
and not any(skip in f.relative_to(comp_dir).parts for skip in skip_dirs)
])
for f in files:
try:
results.append(mod.grade_script(str(f)))
except Exception as e:
errors.append({'file': f.name, 'error': str(e)})

elif comp_type == 'workflows':
files = sorted(set(
list(comp_dir.rglob('*.workflow.md')) +
[f for f in comp_dir.glob('*.md') if f.name != 'README.md']
))
for f in files:
try:
results.append(mod.grade_workflow(str(f)))
except Exception as e:
errors.append({'file': f.name, 'error': str(e)})

elif comp_type == 'tools':
dirs = sorted([d for d in comp_dir.iterdir() if d.is_dir() and d.name != '__pycache__'])
for d in dirs:
try:
r = mod.grade_tool(d)
if r:
results.append(r)
except Exception as e:
errors.append({'file': d.name, 'error': str(e)})

# Aggregate
from qa_common import aggregate_results
data = aggregate_results(results, comp_type)
data['errors'] = errors
return data

def generate_markdown_report(all_data, output_path=None): """Generate a markdown dashboard report.""" lines = [] lines.append(f"# CODITECT Component QA Dashboard") lines.append(f"") lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}") lines.append(f"Framework: ADR-161 Component Quality Assurance Framework") lines.append(f"")

# Overall summary
total_components = 0
total_weighted_score = 0.0
type_summaries = []

for comp_type, data in all_data.items():
summary = data.get('summary', {})
count = summary.get('total_components', 0)
avg = summary.get('average_score', 0)
total_components += count
total_weighted_score += avg * count
type_summaries.append((comp_type, count, avg, summary.get('grade_distribution', {})))

overall_avg = total_weighted_score / total_components if total_components > 0 else 0

lines.append(f"## Overall Health")
lines.append(f"")
lines.append(f"| Metric | Value |")
lines.append(f"|--------|-------|")
lines.append(f"| Total Components | {total_components} |")
lines.append(f"| Overall Average | {overall_avg:.1f}% ({grade_from_score(overall_avg)}) |")
lines.append(f"| Component Types | {len(all_data)} |")
lines.append(f"")

# Per-type summary table
lines.append(f"## Score by Component Type")
lines.append(f"")
lines.append(f"| Type | Count | Avg Score | Grade | A | B | C | D | F |")
lines.append(f"|------|-------|-----------|-------|---|---|---|---|---|")

for comp_type, count, avg, dist in sorted(type_summaries, key=lambda x: x[2], reverse=True):
grade = grade_from_score(avg)
a = dist.get('A', 0)
b = dist.get('B', 0)
c = dist.get('C', 0)
d = dist.get('D', 0)
f = dist.get('F', 0)
lines.append(f"| **{comp_type}** | {count} | {avg:.1f}% | {grade} | {a} | {b} | {c} | {d} | {f} |")

lines.append(f"")

# Bottom performers across all types
lines.append(f"## Bottom 20 Components (Across All Types)")
lines.append(f"")
lines.append(f"| Type | Component | Score | Grade |")
lines.append(f"|------|-----------|-------|-------|")

all_components = []
for comp_type, data in all_data.items():
for comp in data.get('components', []):
all_components.append({
'type': comp_type,
'name': comp['name'],
'score': comp['total_base'],
'grade': comp['grade'],
})

worst = sorted(all_components, key=lambda x: x['score'])[:20]
for c in worst:
lines.append(f"| {c['type']} | {c['name']} | {c['score']:.1f}% | {c['grade']} |")

lines.append(f"")

# Attribute pass rates per type
lines.append(f"## Attribute Pass Rates")
lines.append(f"")
for comp_type, data in sorted(all_data.items()):
attr_rates = data.get('attribute_pass_rates', {})
if not attr_rates:
continue
lines.append(f"### {comp_type.title()}")
lines.append(f"")
lines.append(f"| Attribute | Pass Rate | Passed | Failed |")
lines.append(f"|-----------|-----------|--------|--------|")
for attr, info in sorted(attr_rates.items(), key=lambda x: x[1]['rate']):
lines.append(f"| {attr} | {info['rate']:.1f}% | {info['passed']} | {info['failed']} |")
lines.append(f"")

# Errors
total_errors = sum(len(data.get('errors', [])) for data in all_data.values())
if total_errors > 0:
lines.append(f"## Errors ({total_errors})")
lines.append(f"")
for comp_type, data in all_data.items():
for err in data.get('errors', []):
lines.append(f"- **{comp_type}** `{err.get('file', 'unknown')}`: {err.get('error', 'unknown')}")
lines.append(f"")

report = '\n'.join(lines)

if output_path:
with open(output_path, 'w') as f:
f.write(report)
print(f"Report written to: {output_path}")
else:
print(report)

return report

def main(): parser = argparse.ArgumentParser(description='CODITECT Unified QA Grader') parser.add_argument('--type', dest='comp_type', choices=list(COMPONENT_TYPES.keys()), help='Grade only one component type') parser.add_argument('--json', dest='json_output', help='Output JSON to file') parser.add_argument('--report', dest='report_output', help='Output markdown report to file') parser.add_argument('--verbose', action='store_true') args = parser.parse_args()

types_to_grade = [args.comp_type] if args.comp_type else list(COMPONENT_TYPES.keys())

all_data = {}
for comp_type in types_to_grade:
config = COMPONENT_TYPES[comp_type]
print(f"Grading {comp_type}...", end=' ', flush=True)
data = run_grader(comp_type, config)
all_data[comp_type] = data
summary = data.get('summary', {})
print(f"{summary.get('total_components', 0)} components, avg {summary.get('average_score', 0):.1f}%")

# Print summary
print(f"\n{'='*60}")
print(f"CODITECT UNIFIED QA REPORT")
print(f"{'='*60}")
total = sum(d['summary']['total_components'] for d in all_data.values())
print(f"\nTotal Components Graded: {total}")

for comp_type in sorted(all_data.keys()):
s = all_data[comp_type]['summary']
grade = grade_from_score(s['average_score'])
print(f" {comp_type:12s}: {s['total_components']:4d} components | avg {s['average_score']:5.1f}% ({grade})")

# JSON output
if args.json_output:
with open(args.json_output, 'w') as f:
json.dump(all_data, f, indent=2)
print(f"\nJSON written to: {args.json_output}")

# Markdown report
if args.report_output:
generate_markdown_report(all_data, args.report_output)

# Verbose mode
if args.verbose:
all_components = []
for comp_type, data in all_data.items():
for comp in data.get('components', []):
all_components.append({
'type': comp_type,
'name': comp['name'],
'score': comp['total_base'],
'grade': comp['grade'],
})

sorted_all = sorted(all_components, key=lambda x: x['score'], reverse=True)
print(f"\nTOP 10 (ALL TYPES):")
for c in sorted_all[:10]:
print(f" {c['grade']} {c['score']:5.1f}% | [{c['type']}] {c['name']}")
print(f"\nBOTTOM 10 (ALL TYPES):")
for c in sorted_all[-10:]:
print(f" {c['grade']} {c['score']:5.1f}% | [{c['type']}] {c['name']}")

if name == 'main': main()