#!/usr/bin/env python3 """ Test script for Judge Persona Loader (H.3.1.8).
Comprehensive tests for:
- Persona loading - single and batch
- Schema validation - required fields and constraints
- Prompt template rendering - with context injection
- Weight normalization - across personas
- Model routing - configuration priority
- Diversity verification - model family requirements
- Verdict mapping - persona-specific to unified
- Environment variable overrides """
import json import os import sys import tempfile import unittest from pathlib import Path from typing import Dict, List from unittest.mock import patch, MagicMock
Add module path
sys.path.insert(0, str(Path(file).parent))
from core.persona_loader import ( PersonaLoader, PersonaRegistry, PersonaConfig, JudgePersona, EvaluationDimension, ModelRouting, Demographics, EvaluationStyle, RedFlag, Verdict, PersonaNotFoundError, PersonaValidationError, load_persona, get_prompt_template, validate_persona_schema, verify_panel_diversity, get_default_loader, )
class TestPersonaLoading(unittest.TestCase): """Tests for persona loading functionality."""
@classmethod
def setUpClass(cls):
"""Set up test fixtures."""
cls.loader = PersonaLoader()
def test_loader_initialization(self):
"""Test that PersonaLoader initializes successfully."""
self.assertIsNotNone(self.loader)
self.assertIsNotNone(self.loader.registry)
self.assertIsNotNone(self.loader.configuration)
def test_get_all_personas(self):
"""Test loading all personas."""
personas = self.loader.get_all_personas()
self.assertIsInstance(personas, list)
self.assertGreaterEqual(len(personas), 5)
def test_get_all_personas_enabled_only(self):
"""Test that enabled_only filter works."""
all_personas = self.loader.get_all_personas(enabled_only=False)
enabled_personas = self.loader.get_all_personas(enabled_only=True)
# All should be enabled by default
self.assertEqual(len(all_personas), len(enabled_personas))
def test_get_persona_by_id(self):
"""Test loading a specific persona by ID."""
persona = self.loader.get_persona("technical_architect")
self.assertIsInstance(persona, JudgePersona)
self.assertEqual(persona.persona_id, "technical_architect")
self.assertEqual(persona.demographics.name, "Marcus Rivera")
def test_get_persona_not_found(self):
"""Test that PersonaNotFoundError is raised for unknown persona."""
with self.assertRaises(PersonaNotFoundError) as ctx:
self.loader.get_persona("nonexistent_persona")
self.assertIn("nonexistent_persona", str(ctx.exception))
self.assertIn("Available", str(ctx.exception))
def test_all_expected_personas_exist(self):
"""Test that all 5 core personas exist."""
expected_ids = [
"technical_architect",
"compliance_auditor",
"security_analyst",
"domain_expert_healthcare",
"qa_evaluator",
]
for persona_id in expected_ids:
persona = self.loader.get_persona(persona_id)
self.assertIsNotNone(persona)
self.assertEqual(persona.persona_id, persona_id)
def test_persona_demographics(self):
"""Test that persona demographics are properly loaded."""
persona = self.loader.get_persona("compliance_auditor")
self.assertEqual(persona.demographics.name, "Dr. Patricia Okonkwo")
self.assertEqual(persona.demographics.experience_years, 18)
self.assertIn("CISSP", persona.demographics.credentials)
self.assertIn("HCISPP", persona.demographics.credentials)
class TestSchemaValidation(unittest.TestCase): """Tests for persona schema validation."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_validate_valid_persona(self):
"""Test that valid personas pass validation."""
persona = self.loader.get_persona("technical_architect")
is_valid, errors = self.loader.validate_persona_schema(persona)
self.assertTrue(is_valid)
self.assertEqual(len(errors), 0)
def test_validate_all_personas(self):
"""Test validation of all loaded personas."""
results = self.loader.validate_all_personas()
self.assertIsInstance(results, dict)
# All personas should be valid
for persona_id, (is_valid, errors) in results.items():
self.assertTrue(is_valid, f"{persona_id} failed validation: {errors}")
def test_validate_required_fields(self):
"""Test that validation catches missing required fields."""
# Create a minimal invalid persona
invalid_persona = JudgePersona(
persona_id="", # Empty - should fail
version="1.0.0",
enabled=True,
demographics=Demographics(
name="", # Empty - should fail
title="Test",
experience_years=10,
credentials=[],
background="Test"
),
expertise={},
evaluation_style=EvaluationStyle(
strictness="", # Empty - should fail
focus="Test"
),
model_routing=ModelRouting(
primary_model="", # Empty - should fail
backup_model="test",
model_family="test"
),
weight=0, # Invalid - should fail
trigger_conditions=[], # Empty - should fail
evaluation_dimensions=[], # Empty - should fail
output_schema={}
)
is_valid, errors = self.loader.validate_persona_schema(invalid_persona)
self.assertFalse(is_valid)
self.assertGreater(len(errors), 0)
def test_validate_weight_range(self):
"""Test that weight validation works."""
persona = self.loader.get_persona("technical_architect")
self.assertGreater(persona.weight, 0)
self.assertLessEqual(persona.weight, 1)
class TestPromptTemplateGeneration(unittest.TestCase): """Tests for prompt template generation."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
cls.sample_artifact = """
def calculate_risk_score(patient_data): # Risk calculation logic score = patient_data.get('age', 0) * 0.1 return score """
def test_generate_prompt_basic(self):
"""Test basic prompt generation."""
persona = self.loader.get_persona("technical_architect")
prompt = self.loader.get_prompt_template(persona, self.sample_artifact)
self.assertIsInstance(prompt, str)
self.assertGreater(len(prompt), 100)
# Should contain persona name
self.assertIn("Marcus Rivera", prompt)
# Should contain artifact
self.assertIn("calculate_risk_score", prompt)
def test_generate_prompt_with_context(self):
"""Test prompt generation with context."""
persona = self.loader.get_persona("technical_architect")
context = {
"adrs": "ADR-001: Use event sourcing for state management",
"requirements": "Must handle 1000 concurrent users",
"tech_stack": "Python, Django, PostgreSQL"
}
prompt = self.loader.get_prompt_template(persona, self.sample_artifact, context)
# Should contain context sections
self.assertIn("ADR-001", prompt)
self.assertIn("1000 concurrent users", prompt)
self.assertIn("PostgreSQL", prompt)
def test_generate_prompt_compliance_auditor(self):
"""Test prompt generation for compliance auditor."""
persona = self.loader.get_persona("compliance_auditor")
context = {
"compliance_requirements": "HIPAA Security Rule 164.312",
"frameworks": "HIPAA, SOC2"
}
prompt = self.loader.get_prompt_template(persona, self.sample_artifact, context)
# Should include compliance-specific elements
self.assertIn("Dr. Patricia Okonkwo", prompt)
self.assertIn("164.312", prompt)
self.assertIn("COMPLIANT", prompt) # In output schema
def test_generate_prompt_security_analyst(self):
"""Test prompt generation for security analyst."""
persona = self.loader.get_persona("security_analyst")
context = {
"security_requirements": "Must prevent SQL injection",
"tech_stack": "Python, SQLAlchemy"
}
prompt = self.loader.get_prompt_template(persona, self.sample_artifact, context)
# Should include security-specific elements
self.assertIn("James Nakamura", prompt)
self.assertIn("ADVERSARIAL", prompt)
self.assertIn("OWASP", prompt)
def test_generate_prompt_healthcare_domain(self):
"""Test prompt generation for healthcare domain expert."""
persona = self.loader.get_persona("domain_expert_healthcare")
context = {
"clinical_requirements": "Must support ICD-10 coding",
"clinical_standards": "HL7 FHIR R4"
}
prompt = self.loader.get_prompt_template(persona, self.sample_artifact, context)
# Should include healthcare-specific elements
self.assertIn("Dr. Elena Vasquez", prompt)
self.assertIn("ICD-10", prompt)
self.assertIn("CLINICALLY", prompt)
def test_generate_prompt_qa_evaluator(self):
"""Test prompt generation for QA evaluator."""
persona = self.loader.get_persona("qa_evaluator")
context = {
"existing_tests": "Unit tests: 45%, Integration: 20%",
"requirements": "80% code coverage required"
}
prompt = self.loader.get_prompt_template(persona, self.sample_artifact, context)
# Should include QA-specific elements
self.assertIn("Priya Sharma", prompt)
self.assertIn("METHODICAL", prompt)
self.assertIn("coverage", prompt.lower())
def test_prompt_contains_evaluation_dimensions(self):
"""Test that prompt contains evaluation dimensions."""
persona = self.loader.get_persona("technical_architect")
prompt = self.loader.get_prompt_template(persona, self.sample_artifact)
# Should contain dimension names
self.assertIn("Architectural Soundness", prompt)
self.assertIn("Design Pattern", prompt)
self.assertIn("Error Handling", prompt)
def test_prompt_contains_red_flags(self):
"""Test that prompt contains red flags section."""
persona = self.loader.get_persona("technical_architect")
prompt = self.loader.get_prompt_template(persona, self.sample_artifact)
# Should contain red flag patterns
self.assertIn("God classes", prompt)
self.assertIn("Tight coupling", prompt)
class TestModelRouting(unittest.TestCase): """Tests for model routing functionality."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_get_model_for_persona(self):
"""Test getting model for a persona."""
model = self.loader.get_model_for_persona("technical_architect")
self.assertIsInstance(model, str)
self.assertGreater(len(model), 0)
def test_get_backup_model(self):
"""Test getting backup model."""
primary = self.loader.get_model_for_persona("technical_architect", use_backup=False)
backup = self.loader.get_model_for_persona("technical_architect", use_backup=True)
# Primary and backup should be different (usually)
self.assertIsInstance(backup, str)
def test_runtime_override(self):
"""Test runtime model override."""
override = "custom-model-v1"
model = self.loader.get_model_for_persona(
"technical_architect",
override_model=override
)
self.assertEqual(model, override)
def test_environment_variable_override(self):
"""Test environment variable model override."""
env_model = "env-override-model"
env_key = "CODITECT_JUDGE_MODEL_TECHNICAL_ARCHITECT"
with patch.dict(os.environ, {env_key: env_model}):
# Need to reload to pick up env var
model = self.loader.get_model_for_persona("technical_architect")
self.assertEqual(model, env_model)
def test_get_all_model_mappings(self):
"""Test getting all model mappings."""
mappings = self.loader.get_all_model_mappings()
self.assertIsInstance(mappings, dict)
self.assertIn("technical_architect", mappings)
ta_mapping = mappings["technical_architect"]
self.assertIn("primary_model", ta_mapping)
self.assertIn("backup_model", ta_mapping)
self.assertIn("source", ta_mapping)
def test_model_families_diverse(self):
"""Test that model families are diverse."""
families = set()
for persona in self.loader.get_all_personas():
families.add(persona.model_routing.model_family)
# Should have at least 3 different families
self.assertGreaterEqual(len(families), 3)
class TestDiversityVerification(unittest.TestCase): """Tests for panel diversity verification."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_verify_panel_diversity_all_personas(self):
"""Test diversity verification with all personas."""
is_valid, details = self.loader.verify_panel_diversity()
self.assertIsInstance(is_valid, bool)
self.assertIsInstance(details, dict)
self.assertIn("num_families", details)
self.assertIn("min_required", details)
self.assertIn("family_weights", details)
def test_verify_panel_diversity_meets_requirements(self):
"""Test that default panel meets diversity requirements."""
is_valid, details = self.loader.verify_panel_diversity()
self.assertTrue(is_valid, f"Panel diversity check failed: {details}")
self.assertTrue(details["meets_min_families"])
self.assertTrue(details["meets_max_weight"])
def test_verify_panel_diversity_specific_personas(self):
"""Test diversity verification with specific personas."""
is_valid, details = self.loader.verify_panel_diversity(
persona_ids=["technical_architect", "compliance_auditor", "security_analyst"]
)
self.assertIsInstance(is_valid, bool)
# With 3 personas from different families, should be valid
self.assertGreaterEqual(details["num_families"], 2)
def test_family_weight_distribution(self):
"""Test that no single family dominates."""
is_valid, details = self.loader.verify_panel_diversity()
max_weight = details["max_family_weight"]
max_allowed = details["max_allowed_weight"]
self.assertLessEqual(max_weight, max_allowed)
class TestWeightNormalization(unittest.TestCase): """Tests for persona weight normalization."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_weights_sum_approximately_one(self):
"""Test that persona weights sum to approximately 1."""
personas = self.loader.get_all_personas()
total_weight = sum(p.weight for p in personas)
# Should sum to 1.0 (with small tolerance for floating point)
self.assertAlmostEqual(total_weight, 1.0, places=2)
def test_individual_weights_valid(self):
"""Test that individual weights are valid."""
personas = self.loader.get_all_personas()
for persona in personas:
self.assertGreater(persona.weight, 0)
self.assertLessEqual(persona.weight, 1)
class TestVerdictMapping(unittest.TestCase): """Tests for verdict mapping functionality."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_map_pass_verdicts(self):
"""Test mapping of pass-equivalent verdicts."""
pass_verdicts = ["PASS", "COMPLIANT", "SECURE", "CLINICALLY_SAFE", "ADEQUATELY_TESTED"]
for verdict_str in pass_verdicts:
result = self.loader.map_verdict_to_consensus("technical_architect", verdict_str)
self.assertEqual(result, Verdict.PASS, f"{verdict_str} should map to PASS")
def test_map_fail_verdicts(self):
"""Test mapping of fail-equivalent verdicts."""
fail_verdicts = ["FAIL", "NON_COMPLIANT", "VULNERABLE", "CLINICALLY_UNSAFE", "INSUFFICIENT_TESTING"]
for verdict_str in fail_verdicts:
result = self.loader.map_verdict_to_consensus("technical_architect", verdict_str)
self.assertEqual(result, Verdict.FAIL, f"{verdict_str} should map to FAIL")
def test_map_conditional_verdicts(self):
"""Test mapping of conditional verdicts."""
conditional_verdicts = ["CONDITIONAL_PASS", "PARTIALLY_COMPLIANT", "NEEDS_HARDENING", "SAFETY_CONCERNS", "TESTING_GAPS"]
for verdict_str in conditional_verdicts:
result = self.loader.map_verdict_to_consensus("technical_architect", verdict_str)
self.assertEqual(result, Verdict.CONDITIONAL, f"{verdict_str} should map to CONDITIONAL")
class TestTriggerConditions(unittest.TestCase): """Tests for trigger condition matching."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_get_personas_for_artifact_hipaa(self):
"""Test getting personas for HIPAA-tagged artifact."""
personas = self.loader.get_personas_for_artifact(["HIPAA", "healthcare"])
persona_ids = [p.persona_id for p in personas]
# Should include compliance auditor and healthcare domain expert
self.assertIn("compliance_auditor", persona_ids)
self.assertIn("domain_expert_healthcare", persona_ids)
def test_get_personas_for_artifact_code(self):
"""Test getting personas for code artifact."""
personas = self.loader.get_personas_for_artifact(["code", "python"])
persona_ids = [p.persona_id for p in personas]
# Technical architect, security, and QA should trigger on all code
self.assertIn("technical_architect", persona_ids)
self.assertIn("security_analyst", persona_ids)
self.assertIn("qa_evaluator", persona_ids)
def test_get_personas_for_artifact_api(self):
"""Test getting personas for API artifact."""
personas = self.loader.get_personas_for_artifact(["API", "endpoint"])
# Should get security analyst at minimum
persona_ids = [p.persona_id for p in personas]
self.assertIn("security_analyst", persona_ids)
class TestConvenienceFunctions(unittest.TestCase): """Tests for module-level convenience functions."""
def test_load_persona_function(self):
"""Test the load_persona convenience function."""
persona = load_persona("technical_architect")
self.assertIsInstance(persona, JudgePersona)
self.assertEqual(persona.persona_id, "technical_architect")
def test_get_prompt_template_function(self):
"""Test the get_prompt_template convenience function."""
persona = load_persona("technical_architect")
prompt = get_prompt_template(persona, "def test(): pass")
self.assertIsInstance(prompt, str)
self.assertIn("Marcus Rivera", prompt)
def test_validate_persona_schema_function(self):
"""Test the validate_persona_schema convenience function."""
persona = load_persona("technical_architect")
is_valid, errors = validate_persona_schema(persona)
self.assertTrue(is_valid)
def test_verify_panel_diversity_function(self):
"""Test the verify_panel_diversity convenience function."""
is_valid, details = verify_panel_diversity()
self.assertIsInstance(is_valid, bool)
self.assertIsInstance(details, dict)
def test_get_default_loader_function(self):
"""Test the get_default_loader convenience function."""
loader1 = get_default_loader()
loader2 = get_default_loader()
# Should return the same cached instance
self.assertIs(loader1, loader2)
class TestConfigurationAccess(unittest.TestCase): """Tests for configuration access."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_configuration_access(self):
"""Test accessing global configuration."""
config = self.loader.configuration
self.assertIsInstance(config, PersonaConfig)
self.assertEqual(config.approval_threshold, 0.67)
self.assertEqual(config.confidence_floor, 0.60)
self.assertEqual(config.max_debate_rounds, 3)
def test_registry_access(self):
"""Test accessing full registry."""
registry = self.loader.registry
self.assertIsInstance(registry, PersonaRegistry)
self.assertIsNotNone(registry.version)
self.assertIsNotNone(registry.consensus_mapping)
class TestReload(unittest.TestCase): """Tests for configuration reload."""
def test_reload_config(self):
"""Test reloading configuration."""
loader = PersonaLoader()
initial_count = len(loader.get_all_personas())
# Reload should not change count (same file)
loader.reload()
after_count = len(loader.get_all_personas())
self.assertEqual(initial_count, after_count)
class TestEvaluationDimensions(unittest.TestCase): """Tests for evaluation dimension parsing."""
@classmethod
def setUpClass(cls):
cls.loader = PersonaLoader()
def test_dimensions_loaded(self):
"""Test that evaluation dimensions are loaded."""
persona = self.loader.get_persona("technical_architect")
self.assertGreater(len(persona.evaluation_dimensions), 0)
def test_dimension_attributes(self):
"""Test that dimensions have required attributes."""
persona = self.loader.get_persona("technical_architect")
for dim in persona.evaluation_dimensions:
self.assertIsInstance(dim, EvaluationDimension)
self.assertIsNotNone(dim.id)
self.assertIsNotNone(dim.name)
self.assertIsNotNone(dim.weight)
def test_dimension_weights_valid(self):
"""Test that dimension weights are valid."""
persona = self.loader.get_persona("technical_architect")
total_weight = sum(dim.weight for dim in persona.evaluation_dimensions)
# Should sum to approximately 1.0
self.assertAlmostEqual(total_weight, 1.0, places=1)
def run_tests(): """Run all tests.""" # Create test suite loader = unittest.TestLoader() suite = unittest.TestSuite()
# Add all test classes
test_classes = [
TestPersonaLoading,
TestSchemaValidation,
TestPromptTemplateGeneration,
TestModelRouting,
TestDiversityVerification,
TestWeightNormalization,
TestVerdictMapping,
TestTriggerConditions,
TestConvenienceFunctions,
TestConfigurationAccess,
TestReload,
TestEvaluationDimensions,
]
for test_class in test_classes:
tests = loader.loadTestsFromTestCase(test_class)
suite.addTests(tests)
# Run tests with verbosity
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
# Return exit code
return 0 if result.wasSuccessful() else 1
if name == "main": exit_code = run_tests() sys.exit(exit_code)