Skip to main content

#!/usr/bin/env python3 """ CODITECT Hook QA Grader

Grades hooks against CODITECT-STANDARD-HOOKS.md criteria.

Weights: Structure (20%), Security (30%), Performance (20%), Integration (15%), Documentation (15%)

Usage: python3 scripts/qa/grade-hooks.py [path] [--json output.json] [--verbose]

ADR-161: Component Quality Assurance Framework """

import os import sys import re import json import argparse from pathlib import Path

sys.path.insert(0, os.path.dirname(file)) from qa_common import ( parse_frontmatter, count_words, grade_from_score, compute_weighted_score, aggregate_results, output_results )

CODITECT_CORE = Path(file).resolve().parents[2] HOOKS_DIR = CODITECT_CORE / "hooks"

def grade_hook(filepath): """Grade a single hook file.""" filename = os.path.basename(filepath) hook_name = filename.rsplit('.', 1)[0] ext = filepath.rsplit('.', 1)[-1] if '.' in filepath else ''

with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()

# Hooks can be .py, .sh, or .md
is_python = ext == 'py'
is_shell = ext == 'sh'
is_markdown = ext == 'md'

# For Python/Shell hooks, frontmatter is in docstring YAML
if is_python or is_shell:
fm, body = _extract_hook_frontmatter(content)
else:
fm, body = parse_frontmatter(content)

body_lower = body.lower() if body else content.lower()
word_count = count_words(body if body else content)
scores = {}

# A. STRUCTURE (20%)
if is_python:
scores['A1_shebang'] = 1 if content.startswith('#!/usr/bin/env python3') or content.startswith('#!/usr/bin/python3') else 0
elif is_shell:
scores['A1_shebang'] = 1 if content.startswith('#!/bin/bash') or content.startswith('#!/usr/bin/env bash') or content.startswith('#!/bin/sh') else 0
else:
scores['A1_shebang'] = 1 # Markdown hooks don't need shebang

scores['A2_exit_codes'] = 1 if re.search(r'(sys\.exit|exit\s*\(|return\s+\d|exit_code)', content) else 0
if is_python:
scores['A3_error_handling'] = 1 if re.search(r'(try:|except\s+\w+)', content) else 0
elif is_shell:
scores['A3_error_handling'] = 1 if re.search(r'(set\s+-e|trap\s+|if\s+\[)', content) else 0
else:
scores['A3_error_handling'] = 1 if re.search(r'error.handling|fallback|exception', body_lower) else 0
scores['A4_function_structure'] = 1 if re.search(r'(def\s+\w+|function\s+\w+|\w+\(\)\s*\{)', content) else 0

# B. SECURITY (30%)
scores['B1_input_validation'] = 1 if re.search(r'(json\.loads?|json_data|validate|sanitize)', content) else 0
# No command injection: check for dangerous subprocess patterns
has_shell_true = bool(re.search(r'shell\s*=\s*True', content))
has_eval = bool(re.search(r'\beval\s*\(', content))
has_exec = bool(re.search(r'\bexec\s*\(', content))
scores['B2_no_injection'] = 0 if (has_shell_true or has_eval or has_exec) else 1
scores['B3_no_hardcoded_secrets'] = 0 if re.search(r'(password\s*=\s*["\'][^"\']+|api_key\s*=\s*["\'][^"\']+|secret\s*=\s*["\'][^"\']+)', content) else 1
scores['B4_path_validation'] = 1 if re.search(r'(os\.path\.|Path\(|pathlib|realpath|abspath)', content) or is_markdown else 0

# C. PERFORMANCE (20%)
scores['C1_no_blocking_io'] = 1 if not re.search(r'(time\.sleep\s*\(\s*[5-9]|time\.sleep\s*\(\s*\d{2,})', content) else 0
# Lazy imports: check if heavy modules are imported inside functions vs top-level
heavy_imports = re.findall(r'^import\s+(requests|pandas|numpy|scipy|tensorflow|torch)', content, re.MULTILINE)
scores['C2_lazy_imports'] = 0 if heavy_imports else 1
# Size proxy for execution speed
line_count = len(content.split('\n'))
scores['C3_reasonable_size'] = 1 if line_count <= 500 else 0

# D. INTEGRATION (15%)
scores['D1_stdin_json'] = 1 if re.search(r'(sys\.stdin|stdin|json\.load|read_input|input_data)', content) else (1 if is_markdown else 0)
scores['D2_stdout_output'] = 1 if re.search(r'(print\(|sys\.stdout|json\.dumps|output|echo)', content) else (1 if is_markdown else 0)
scores['D3_config_documented'] = 1 if re.search(r'(settings\.json|configuration|config|event_trigger|hook_name)', content) or fm.get('event_trigger') or fm.get('hook_name') else 0

# E. DOCUMENTATION (15%)
if is_python:
scores['E1_header_comment'] = 1 if re.search(r'""".*?"""', content, re.DOTALL) else 0
elif is_shell:
scores['E1_header_comment'] = 1 if re.search(r'^#\s+\w+', content, re.MULTILINE) else 0
else:
scores['E1_header_comment'] = 1 if re.search(r'^#\s+', body, re.MULTILINE) else 0
scores['E2_inline_comments'] = 1 if len(re.findall(r'#\s+\w+', content)) >= 3 else 0
scores['E3_usage_documented'] = 1 if fm.get('usage') or re.search(r'usage:|example|##\s*usage', body_lower if body else content.lower()) else 0

categories = [
('A_structure', 20, ['A1_shebang', 'A2_exit_codes', 'A3_error_handling', 'A4_function_structure']),
('B_security', 30, ['B1_input_validation', 'B2_no_injection', 'B3_no_hardcoded_secrets', 'B4_path_validation']),
('C_performance', 20, ['C1_no_blocking_io', 'C2_lazy_imports', 'C3_reasonable_size']),
('D_integration', 15, ['D1_stdin_json', 'D2_stdout_output', 'D3_config_documented']),
('E_documentation', 15, ['E1_header_comment', 'E2_inline_comments', 'E3_usage_documented']),
]
total_base, category_scores = compute_weighted_score(scores, categories)

return {
'name': hook_name,
'scores': scores,
'category_scores': category_scores,
'total_base': total_base,
'grade': grade_from_score(total_base),
'word_count': word_count,
'file_type': ext,
'track': fm.get('track', 'N/A'),
}

def _extract_hook_frontmatter(content): """Extract YAML frontmatter from Python/Shell docstring.""" # Python: look for YAML in triple-quote docstring match = re.search(r'""".?---\s\n(.?)\n---\s\n(.*?)"""', content, re.DOTALL) if match: import yaml try: fm = yaml.safe_load(match.group(1)) body = match.group(2) return fm if fm else {}, body except Exception: pass

# Shell: look for YAML in comment block
match = re.search(r'^#\s*---\s*\n((?:#.*\n)*?)#\s*---', content, re.MULTILINE)
if match:
import yaml
yaml_text = re.sub(r'^#\s?', '', match.group(1), flags=re.MULTILINE)
try:
fm = yaml.safe_load(yaml_text)
return fm if fm else {}, content
except Exception:
pass

return {}, content

def main(): parser = argparse.ArgumentParser(description='Grade CODITECT hooks') parser.add_argument('path', nargs='?', default=str(HOOKS_DIR), help='Hook file or directory') parser.add_argument('--json', dest='json_output', help='Output JSON to file') parser.add_argument('--verbose', action='store_true') args = parser.parse_args()

target = Path(args.path)
if target.is_file():
hook_files = [target]
else:
hook_files = sorted([
f for f in target.iterdir()
if f.is_file() and f.suffix in {'.py', '.sh', '.md'}
and f.name != 'README.md' and f.name != '__init__.py'
and not f.name.startswith('__')
])

results = []
errors = []
for filepath in hook_files:
try:
results.append(grade_hook(str(filepath)))
except Exception as e:
errors.append({'file': filepath.name, 'error': str(e)})

data = aggregate_results(results, 'hooks')
data['errors'] = errors

if args.json_output:
output_results(data, args.json_output, 'json')
output_results(data, format='summary')

if args.verbose:
sorted_results = sorted(results, key=lambda x: x['total_base'], reverse=True)
print(f"\nTOP 10:")
for r in sorted_results[:10]:
print(f" {r['grade']} {r['total_base']:5.1f}% | {r['name']} ({r['file_type']})")
print(f"\nBOTTOM 10:")
for r in sorted_results[-10:]:
print(f" {r['grade']} {r['total_base']:5.1f}% | {r['name']} ({r['file_type']})")

if name == 'main': main()