Skip to main content

#!/usr/bin/env python3 """ ADR-151 Context Graph Performance Tests (J.25.7.12)

Performance benchmarks for the context graph system ensuring:

  • Full build pipeline completes in <100ms for 128-node graphs
  • Seed selection completes in <20ms
  • BFS expansion completes in <50ms
  • Pruning completes in <30ms
  • Serialization completes in <10ms

Run: python3 scripts/context_graph/test_performance.py python3 scripts/context_graph/test_performance.py -v # verbose python3 scripts/context_graph/test_performance.py --runs 5 # more iterations

Created: 2026-02-05 Author: Claude (Opus 4.6) Track: J (Memory Intelligence) Task: J.25.7.12 """

import json import statistics import sys import time import unittest from pathlib import Path from typing import Dict, List, Any

Set up path for imports

_script_dir = Path(file).resolve().parent _coditect_root = _script_dir.parent.parent if str(_coditect_root) not in sys.path: sys.path.insert(0, str(_coditect_root))

Number of benchmark iterations (configurable via CLI)

BENCHMARK_RUNS = 3

class TestPerformance(unittest.TestCase): """Performance benchmarks for context graph pipeline."""

@classmethod
def setUpClass(cls):
"""Set up test fixtures and verify database availability."""
from scripts.context_graph import ContextGraphBuilder
from scripts.core.paths import get_org_db_path, get_sessions_db_path

cls.org_db_path = get_org_db_path()
cls.sessions_db_path = get_sessions_db_path()

if not cls.org_db_path.exists():
raise unittest.SkipTest(f"org.db not found at {cls.org_db_path}")

# Warm up: verify KG has sufficient data
builder = ContextGraphBuilder()
stats = builder.get_stats()
builder.close()

cls.total_nodes = stats.get("total_nodes", 0)
cls.total_edges = stats.get("total_edges", 0)

if cls.total_nodes < 100:
raise unittest.SkipTest(
f"Insufficient KG data ({cls.total_nodes} nodes). "
"Need at least 100 for meaningful benchmarks."
)

def _benchmark(self, fn, runs=None) -> Dict[str, float]:
"""Run a function multiple times and return timing statistics.

Returns dict with: min_ms, max_ms, mean_ms, median_ms, p95_ms
"""
runs = runs or BENCHMARK_RUNS
timings = []

for _ in range(runs):
start = time.perf_counter()
result = fn()
elapsed_ms = (time.perf_counter() - start) * 1000
timings.append(elapsed_ms)

return {
"min_ms": min(timings),
"max_ms": max(timings),
"mean_ms": statistics.mean(timings),
"median_ms": statistics.median(timings),
"p95_ms": sorted(timings)[int(len(timings) * 0.95)] if len(timings) >= 2 else max(timings),
"runs": runs,
"timings": timings,
}

# =========================================================================
# Full Pipeline Benchmarks
# =========================================================================

def test_full_build_128_nodes_under_100ms(self):
"""J.25.7.12: Full build pipeline <100ms for 128-node graph."""
from scripts.context_graph import ContextGraphBuilder

def build_128():
with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="find decisions about database architecture",
seed_strategy="semantic",
token_budget=8000,
max_nodes=128,
max_depth=3,
persist=False,
)
return graph

stats = self._benchmark(build_128)
median = stats["median_ms"]

self.assertLess(
median, 100,
f"Full 128-node build median {median:.1f}ms exceeds 100ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)

def test_full_build_64_nodes_under_60ms(self):
"""Full build pipeline <60ms for 64-node graph."""
from scripts.context_graph import ContextGraphBuilder

def build_64():
with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="find error solutions for Python",
seed_strategy="semantic",
token_budget=4000,
max_nodes=64,
max_depth=3,
persist=False,
)
return graph

stats = self._benchmark(build_64)
median = stats["median_ms"]

self.assertLess(
median, 60,
f"Full 64-node build median {median:.1f}ms exceeds 60ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)

def test_full_build_policy_first_under_100ms(self):
"""Policy-first strategy build <100ms."""
from scripts.context_graph import ContextGraphBuilder

def build_policy():
with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="find ADR governance decisions",
seed_strategy="policy_first",
token_budget=4000,
max_nodes=128,
persist=False,
)
return graph

stats = self._benchmark(build_policy)
median = stats["median_ms"]

self.assertLess(
median, 100,
f"Policy-first build median {median:.1f}ms exceeds 100ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)

# =========================================================================
# Component-Level Benchmarks
# =========================================================================

def test_seed_selection_under_20ms(self):
"""Seed selection alone completes in <20ms."""
from scripts.context_graph.algorithms import select_seed_nodes
import sqlite3

conn = sqlite3.connect(str(self.org_db_path))
conn.row_factory = sqlite3.Row

def select_seeds():
return select_seed_nodes(
conn=conn,
task_description="find security patterns and error solutions",
strategy="semantic",
max_seeds=5,
)

try:
stats = self._benchmark(select_seeds)
median = stats["median_ms"]

self.assertLess(
median, 20,
f"Seed selection median {median:.1f}ms exceeds 20ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)
finally:
conn.close()

def test_bfs_expansion_under_50ms(self):
"""BFS expansion from seeds completes in <50ms."""
from scripts.context_graph.algorithms import select_seed_nodes, bfs_expand
import sqlite3

conn = sqlite3.connect(str(self.org_db_path))
conn.row_factory = sqlite3.Row

try:
# Pre-compute seeds (not part of BFS benchmark)
seeds = select_seed_nodes(
conn=conn,
task_description="find component relationships",
strategy="semantic",
max_seeds=5,
)

if not seeds:
self.skipTest("No seeds found for BFS benchmark")

def expand():
return bfs_expand(
conn=conn,
seed_nodes=seeds,
max_depth=3,
max_nodes=256,
relevance_threshold=0.15,
decay_factor=0.8,
)

stats = self._benchmark(expand)
median = stats["median_ms"]

self.assertLess(
median, 50,
f"BFS expansion median {median:.1f}ms exceeds 50ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)
finally:
conn.close()

def test_pruning_under_30ms(self):
"""Pruning a 256-node graph to 128 nodes completes in <30ms."""
from scripts.context_graph import ContextGraphBuilder
from scripts.context_graph.pruning import prune_graph

# Build a larger graph to prune
with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="find all components and decisions",
seed_strategy="semantic",
token_budget=50000, # Large budget to get many nodes
max_nodes=256,
persist=False,
)

if graph.node_count < 20:
self.skipTest(f"Graph too small ({graph.node_count} nodes) for pruning benchmark")

def prune():
return prune_graph(
graph=graph,
token_budget=2000,
relevance_threshold=0.3,
max_depth=3,
preserve_seeds=True,
remove_disconnected=True,
)

stats = self._benchmark(prune)
median = stats["median_ms"]

self.assertLess(
median, 30,
f"Pruning median {median:.1f}ms exceeds 30ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)

def test_serialization_under_10ms(self):
"""Serialization to all formats completes in <10ms each."""
from scripts.context_graph import ContextGraphBuilder

with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="find decisions",
token_budget=4000,
max_nodes=128,
persist=False,
)

for fmt in ["markdown", "json", "text"]:
def serialize(f=fmt):
return builder.serialize_for_context(graph, format=f)

stats = self._benchmark(serialize)
median = stats["median_ms"]

self.assertLess(
median, 10,
f"Serialization ({fmt}) median {median:.1f}ms exceeds 10ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)

# =========================================================================
# Persistence Benchmarks
# =========================================================================

def test_persist_and_load_under_50ms(self):
"""Persist + load round-trip <50ms."""
from scripts.context_graph import ContextGraphBuilder

with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="perf test persistence",
token_budget=2000,
max_nodes=64,
persist=False,
)

def persist_and_load():
# Persist
builder._persist_graph(graph, session_id="perf-test")
# Load back
loaded = builder.load_graph(graph.id)
return loaded

stats = self._benchmark(persist_and_load)
median = stats["median_ms"]

self.assertLess(
median, 50,
f"Persist+load median {median:.1f}ms exceeds 50ms target. "
f"Stats: min={stats['min_ms']:.1f}ms, max={stats['max_ms']:.1f}ms"
)

# =========================================================================
# Scaling Benchmarks
# =========================================================================

def test_build_time_scales_linearly(self):
"""Build time scales roughly linearly with max_nodes."""
from scripts.context_graph import ContextGraphBuilder

sizes = [32, 64, 128]
times = {}

for size in sizes:
def build(n=size):
with ContextGraphBuilder() as builder:
graph = builder.build(
task_description="find decisions about infrastructure",
seed_strategy="semantic",
token_budget=size * 100,
max_nodes=n,
persist=False,
)
return graph

stats = self._benchmark(build, runs=2)
times[size] = stats["median_ms"]

# Verify 128-node build isn't more than 4x the 32-node build
# (linear would be 4x, allowing some overhead)
ratio = times[128] / times[32] if times[32] > 0 else float('inf')
self.assertLess(
ratio, 5.0,
f"128-node build ({times[128]:.1f}ms) is {ratio:.1f}x slower than "
f"32-node build ({times[32]:.1f}ms). Expected <5x for near-linear scaling."
)

class TestPerformanceReport(unittest.TestCase): """Generate a comprehensive performance report."""

@classmethod
def setUpClass(cls):
from scripts.core.paths import get_org_db_path
cls.org_db_path = get_org_db_path()
if not cls.org_db_path.exists():
raise unittest.SkipTest("org.db not found")

def test_generate_report(self):
"""Generate a human-readable performance summary."""
from scripts.context_graph import ContextGraphBuilder

results = {}

with ContextGraphBuilder() as builder:
stats = builder.get_stats()

# Full build at different sizes
for max_nodes in [32, 64, 128]:
start = time.perf_counter()
graph = builder.build(
task_description="find security decisions and error solutions",
seed_strategy="semantic",
token_budget=max_nodes * 100,
max_nodes=max_nodes,
persist=False,
)
elapsed = (time.perf_counter() - start) * 1000
results[f"build_{max_nodes}"] = {
"time_ms": round(elapsed, 1),
"actual_nodes": graph.node_count,
"actual_edges": graph.edge_count,
"tokens": graph.tokens_estimated,
}

# Print report
report_lines = [
"",
"=" * 60,
"CONTEXT GRAPH PERFORMANCE REPORT",
"=" * 60,
f"Knowledge Graph: {stats['total_nodes']:,} nodes, {stats['total_edges']:,} edges",
"",
f"{'Build Size':<15} {'Time (ms)':<12} {'Nodes':<8} {'Edges':<8} {'Tokens':<10} {'Status':<10}",
"-" * 60,
]

targets = {32: 40, 64: 60, 128: 100}
for max_nodes in [32, 64, 128]:
key = f"build_{max_nodes}"
r = results[key]
target = targets[max_nodes]
status = "PASS" if r["time_ms"] < target else "FAIL"
report_lines.append(
f"{max_nodes:<15} {r['time_ms']:<12.1f} {r['actual_nodes']:<8} "
f"{r['actual_edges']:<8} {r['tokens']:<10} {status:<10}"
)

report_lines.extend([
"-" * 60,
"",
"Targets: 32-node <40ms, 64-node <60ms, 128-node <100ms",
"=" * 60,
])

report = "\n".join(report_lines)
print(report)

# Assert primary target
self.assertLess(
results["build_128"]["time_ms"], 100,
"128-node build exceeded 100ms target"
)

def run_tests(verbosity: int = 2) -> bool: """Run all performance tests.""" loader = unittest.TestLoader() suite = unittest.TestSuite()

suite.addTests(loader.loadTestsFromTestCase(TestPerformance))
suite.addTests(loader.loadTestsFromTestCase(TestPerformanceReport))

runner = unittest.TextTestRunner(verbosity=verbosity)
result = runner.run(suite)
return result.wasSuccessful()

if name == "main": import argparse

parser = argparse.ArgumentParser(
description="ADR-151 Context Graph Performance Tests (J.25.7.12)"
)
parser.add_argument(
"-v", "--verbose",
action="store_true",
help="Verbose test output"
)
parser.add_argument(
"--runs",
type=int,
default=3,
help="Number of benchmark iterations per test (default: 3)"
)
args = parser.parse_args()

BENCHMARK_RUNS = args.runs

verbosity = 2 if args.verbose else 1
success = run_tests(verbosity)
sys.exit(0 if success else 1)