#!/usr/bin/env python3 """ ADR-151 Context Graph Pruning Strategies (Phase 5: CP-38, CP-39)
Implements pruning strategies to fit context graphs within token budgets while preserving the most relevant information.
Strategies: - prune_by_token_budget(): Remove lowest-relevance nodes until budget met - prune_by_relevance_threshold(): Remove nodes below relevance threshold - prune_edges(): Remove edges to pruned nodes - prune_disconnected(): Remove nodes not connected to seeds
Created: 2026-02-03 Author: Claude (Opus 4.5) Track: J (Memory Intelligence) Task: J.25.2.5 (CP-38, CP-39) """
import logging from typing import Dict, List, Optional, Set
from scripts.context_graph.algorithms import ContextGraph, GraphNode, GraphEdge
logger = logging.getLogger(name)
=============================================================================
CP-38: Token Budget Pruning
=============================================================================
def prune_by_token_budget( graph: ContextGraph, token_budget: int = 4000, preserve_seeds: bool = True, min_nodes: int = 5, ) -> ContextGraph: """ CP-38: Prune graph to fit within token budget.
Removes lowest-relevance nodes until estimated token count
is within budget, while preserving seed nodes.
Args:
graph: ContextGraph to prune
token_budget: Maximum tokens allowed
preserve_seeds: If True, never remove seed nodes
min_nodes: Minimum nodes to keep regardless of budget
Returns:
Pruned ContextGraph (modified in place)
"""
current_tokens = graph.tokens_estimated
if current_tokens <= token_budget:
logger.info(f"Graph already within budget: {current_tokens}/{token_budget} tokens")
return graph
logger.info(f"Pruning graph from {current_tokens} to {token_budget} tokens")
# Sort nodes by relevance (lowest first for removal)
nodes_sorted = sorted(
graph.nodes.values(),
key=lambda n: (n.is_seed if preserve_seeds else False, n.relevance_score)
)
# Remove nodes until within budget
removed_count = 0
for node in nodes_sorted:
if len(graph.nodes) <= min_nodes:
logger.warning(f"Reached minimum node count: {min_nodes}")
break
if preserve_seeds and node.is_seed:
continue
if graph.tokens_estimated <= token_budget:
break
# Remove node
del graph.nodes[node.id]
removed_count += 1
# Prune edges to removed nodes
graph = prune_edges(graph)
logger.info(f"Removed {removed_count} nodes, now {graph.tokens_estimated}/{token_budget} tokens")
return graph
def prune_by_token_budget_priority( graph: ContextGraph, token_budget: int = 4000, preserve_seeds: bool = True, type_priorities: Optional[Dict[str, int]] = None, ) -> ContextGraph: """ Advanced token budget pruning with node type priorities.
Removes nodes based on combination of relevance score and node type priority.
Lower priority types are removed first.
Args:
graph: ContextGraph to prune
token_budget: Maximum tokens allowed
preserve_seeds: If True, never remove seed nodes
type_priorities: Dict mapping node_type to priority (higher = keep)
Returns:
Pruned ContextGraph
"""
if type_priorities is None:
type_priorities = {
"decision": 100,
"adr": 95,
"policy": 90,
"error_solution": 85,
"skill_learning": 80,
"component": 70,
"function": 50,
"file": 40,
"session": 30,
"track": 60,
}
default_priority = 25
current_tokens = graph.tokens_estimated
if current_tokens <= token_budget:
return graph
logger.info(f"Priority-based pruning from {current_tokens} to {token_budget} tokens")
# Calculate composite score for each node
def composite_score(node: GraphNode) -> float:
type_score = type_priorities.get(node.node_type, default_priority) / 100.0
return node.relevance_score * 0.6 + type_score * 0.4
# Sort by composite score (lowest first for removal)
nodes_sorted = sorted(
graph.nodes.values(),
key=lambda n: (n.is_seed if preserve_seeds else False, composite_score(n))
)
removed_count = 0
for node in nodes_sorted:
if preserve_seeds and node.is_seed:
continue
if graph.tokens_estimated <= token_budget:
break
del graph.nodes[node.id]
removed_count += 1
graph = prune_edges(graph)
logger.info(f"Priority pruning removed {removed_count} nodes")
return graph
=============================================================================
CP-39: Relevance Threshold Pruning
=============================================================================
def prune_by_relevance_threshold( graph: ContextGraph, threshold: float = 0.3, preserve_seeds: bool = True, ) -> ContextGraph: """ CP-39: Prune nodes below relevance threshold.
Args:
graph: ContextGraph to prune
threshold: Minimum relevance score to keep (0.0-1.0)
preserve_seeds: If True, never remove seed nodes
Returns:
Pruned ContextGraph
"""
initial_count = graph.node_count
# Find nodes to remove
to_remove = []
for node_id, node in graph.nodes.items():
if preserve_seeds and node.is_seed:
continue
if node.relevance_score < threshold:
to_remove.append(node_id)
# Remove nodes
for node_id in to_remove:
del graph.nodes[node_id]
# Prune edges
graph = prune_edges(graph)
removed_count = initial_count - graph.node_count
logger.info(f"Threshold pruning (>{threshold}) removed {removed_count} nodes")
return graph
def prune_by_depth( graph: ContextGraph, max_depth: int = 2, preserve_seeds: bool = True, ) -> ContextGraph: """ Prune nodes beyond a certain depth from seeds.
Args:
graph: ContextGraph to prune
max_depth: Maximum depth to keep (0 = seeds only)
preserve_seeds: If True, never remove seed nodes
Returns:
Pruned ContextGraph
"""
initial_count = graph.node_count
# Find nodes to remove
to_remove = []
for node_id, node in graph.nodes.items():
if preserve_seeds and node.is_seed:
continue
if node.depth > max_depth:
to_remove.append(node_id)
# Remove nodes
for node_id in to_remove:
del graph.nodes[node_id]
# Prune edges
graph = prune_edges(graph)
removed_count = initial_count - graph.node_count
logger.info(f"Depth pruning (>{max_depth}) removed {removed_count} nodes")
return graph
=============================================================================
Edge and Connectivity Pruning
=============================================================================
def prune_edges(graph: ContextGraph) -> ContextGraph: """ Remove edges that reference nodes no longer in the graph.
Args:
graph: ContextGraph with potentially orphaned edges
Returns:
ContextGraph with clean edges
"""
node_ids = set(graph.nodes.keys())
initial_edge_count = len(graph.edges)
# Filter edges to only those with both endpoints present
graph.edges = [
edge for edge in graph.edges
if edge.from_node in node_ids and edge.to_node in node_ids
]
removed = initial_edge_count - len(graph.edges)
if removed > 0:
logger.debug(f"Removed {removed} orphaned edges")
return graph
def prune_disconnected( graph: ContextGraph, preserve_seeds: bool = True, ) -> ContextGraph: """ Remove nodes not connected to any seed node.
Uses BFS from seeds to find connected components.
Args:
graph: ContextGraph to prune
preserve_seeds: If True, seeds are always kept
Returns:
ContextGraph with only connected nodes
"""
if not graph.seed_nodes:
logger.warning("No seed nodes defined, skipping connectivity pruning")
return graph
initial_count = graph.node_count
# Build adjacency list
adjacency: Dict[str, Set[str]] = {node_id: set() for node_id in graph.nodes}
for edge in graph.edges:
if edge.from_node in adjacency and edge.to_node in adjacency:
adjacency[edge.from_node].add(edge.to_node)
adjacency[edge.to_node].add(edge.from_node)
# BFS from seeds to find connected nodes
connected: Set[str] = set()
queue = list(graph.seed_nodes)
while queue:
current = queue.pop(0)
if current in connected or current not in graph.nodes:
continue
connected.add(current)
queue.extend(adjacency.get(current, set()))
# Remove disconnected nodes
to_remove = [
node_id for node_id in graph.nodes
if node_id not in connected
]
for node_id in to_remove:
if preserve_seeds and graph.nodes[node_id].is_seed:
continue
del graph.nodes[node_id]
graph = prune_edges(graph)
removed_count = initial_count - graph.node_count
if removed_count > 0:
logger.info(f"Disconnected pruning removed {removed_count} nodes")
return graph
=============================================================================
Combined Pruning Pipeline
=============================================================================
def prune_graph( graph: ContextGraph, token_budget: int = 4000, relevance_threshold: float = 0.3, max_depth: Optional[int] = None, preserve_seeds: bool = True, remove_disconnected: bool = True, ) -> ContextGraph: """ Apply full pruning pipeline to context graph.
Order of operations:
1. Prune by depth (if specified)
2. Prune by relevance threshold
3. Prune disconnected nodes
4. Prune by token budget
Args:
graph: ContextGraph to prune
token_budget: Maximum tokens allowed
relevance_threshold: Minimum relevance score (0.0-1.0)
max_depth: Maximum depth from seeds (None = no limit)
preserve_seeds: If True, never remove seed nodes
remove_disconnected: If True, remove disconnected nodes
Returns:
Fully pruned ContextGraph
"""
initial_nodes = graph.node_count
initial_edges = graph.edge_count
logger.info(f"Starting pruning pipeline: {initial_nodes} nodes, {initial_edges} edges")
# 1. Depth pruning
if max_depth is not None:
graph = prune_by_depth(graph, max_depth, preserve_seeds)
# 2. Relevance threshold pruning
if relevance_threshold > 0:
graph = prune_by_relevance_threshold(graph, relevance_threshold, preserve_seeds)
# 3. Connectivity pruning
if remove_disconnected:
graph = prune_disconnected(graph, preserve_seeds)
# 4. Token budget pruning
if token_budget > 0:
graph = prune_by_token_budget(graph, token_budget, preserve_seeds)
logger.info(
f"Pruning complete: {initial_nodes} -> {graph.node_count} nodes, "
f"{initial_edges} -> {graph.edge_count} edges, "
f"{graph.tokens_estimated} tokens"
)
return graph