#!/usr/bin/env python3 """ Context Optimizer - CODITECT Adapter
Wraps the compaction and observation masking utilities from Agent-Skills-for-Context-Engineering for CODITECT framework integration.
Usage: python3 scripts/context-engineering/context_optimizer.py --compact "context text" python3 scripts/context-engineering/context_optimizer.py --mask --file observations.json python3 scripts/context-engineering/context_optimizer.py --budget 100000 --analyze
Source: external/Agent-Skills-for-Context-Engineering/skills/context-optimization/scripts/ """
import sys import json import argparse from pathlib import Path from datetime import datetime, timezone from typing import Dict, List, Optional
Add external module to path
EXTERNAL_PATH = Path(file).parent.parent.parent / "external" / "Agent-Skills-for-Context-Engineering" sys.path.insert(0, str(EXTERNAL_PATH / "skills" / "context-optimization" / "scripts"))
try: from compaction import ( ObservationStore, ContextBudget, estimate_token_count, estimate_message_tokens, categorize_messages, summarize_content, design_stable_prompt, calculate_cache_metrics ) EXTERNAL_AVAILABLE = True except ImportError: EXTERNAL_AVAILABLE = False print("Warning: External module not available. Using fallback implementation.", file=sys.stderr)
class CoditechContextOptimizer: """CODITECT-integrated context optimizer."""
def __init__(self, total_limit: int = 100000):
self.total_limit = total_limit
self.observation_store = ObservationStore() if EXTERNAL_AVAILABLE else None
self.budget = ContextBudget(total_limit) if EXTERNAL_AVAILABLE else None
# Optimization targets from skills
self.targets = {
"compaction_savings": (0.50, 0.70), # 50-70% target
"masking_savings": (0.60, 0.80), # 60-80% target
"cache_hit_rate": 0.70, # 70%+ target
"quality_threshold": 0.95 # 95% quality preservation
}
def optimize(self, messages: List[Dict], strategy: str = "auto") -> Dict:
"""
Apply context optimization strategies.
Args:
messages: List of context messages
strategy: Optimization strategy ("compaction", "masking", "cache", "auto")
Returns:
Optimization results with before/after metrics
"""
before_tokens = self._estimate_tokens(messages)
if strategy == "auto":
strategy = self._select_strategy(messages, before_tokens)
result = {
"strategy": strategy,
"before_tokens": before_tokens,
"optimizations_applied": [],
"timestamp": datetime.now(timezone.utc).isoformat()
}
if strategy in ("compaction", "auto"):
compacted = self._apply_compaction(messages)
result["compacted_messages"] = compacted
result["optimizations_applied"].append("compaction")
if strategy in ("masking", "auto"):
masked = self._apply_masking(messages)
result["masked_count"] = masked["count"]
result["masked_refs"] = masked["refs"]
result["optimizations_applied"].append("masking")
if strategy in ("cache", "auto"):
cache_opts = self._optimize_cache(messages)
result["cache_optimization"] = cache_opts
result["optimizations_applied"].append("cache")
# Calculate savings
optimized_messages = result.get("compacted_messages", messages)
after_tokens = self._estimate_tokens(optimized_messages)
result["after_tokens"] = after_tokens
result["tokens_saved"] = before_tokens - after_tokens
result["savings_percent"] = (before_tokens - after_tokens) / before_tokens if before_tokens > 0 else 0
return result
def compact_context(self, messages: List[Dict], preserve_recent: int = 5) -> Dict:
"""
Compact context by summarizing older messages.
Args:
messages: List of context messages
preserve_recent: Number of recent messages to preserve
Returns:
Compacted messages with summary
"""
if not EXTERNAL_AVAILABLE:
return {"messages": messages, "summary": None}
# Categorize messages
categories = categorize_messages(messages)
# Compaction priority (highest impact first)
priority_order = ["tool_output", "conversation", "retrieved_document", "other"]
compacted = []
summaries = []
for category in priority_order:
cat_messages = categories.get(category, [])
if category in ("system_prompt", "tool_definition"):
# Never compress these
compacted.extend(cat_messages)
continue
# Summarize older messages, keep recent
if len(cat_messages) > preserve_recent:
old_messages = cat_messages[:-preserve_recent]
recent_messages = cat_messages[-preserve_recent:]
# Summarize old messages
for msg in old_messages:
summary = summarize_content(msg.get("content", ""), category)
summaries.append({
"category": category,
"original_tokens": estimate_token_count(msg.get("content", "")),
"summary_tokens": estimate_token_count(summary),
"summary": summary
})
msg["content"] = summary
msg["compacted"] = True
compacted.extend(old_messages)
compacted.extend(recent_messages)
else:
compacted.extend(cat_messages)
return {
"messages": compacted,
"summaries": summaries,
"original_count": len(messages),
"compacted_count": len(compacted)
}
def mask_observations(self, observations: List[Dict], max_length: int = 500,
turns_threshold: int = 3) -> Dict:
"""
Mask verbose observations with references.
Args:
observations: List of observation messages
max_length: Maximum length before masking
turns_threshold: Mask observations older than this many turns
Returns:
Masked observations with reference IDs
"""
if not self.observation_store:
return {"observations": observations, "refs": [], "count": 0}
masked = []
refs = []
for i, obs in enumerate(observations):
content = obs.get("content", "")
turn_age = obs.get("turn_age", 0)
# Decide whether to mask
should_mask = (
len(content) > max_length and
turn_age >= turns_threshold and
not obs.get("critical", False) and
not obs.get("most_recent", False)
)
if should_mask:
masked_content, ref_id = self.observation_store.mask(content, max_length)
obs["content"] = masked_content
obs["masked"] = True
obs["ref_id"] = ref_id
refs.append(ref_id)
masked.append(obs)
return {
"observations": masked,
"refs": refs,
"count": len(refs),
"tokens_saved": sum(estimate_token_count(o.get("original_content", ""))
for o in masked if o.get("masked"))
}
def retrieve_observation(self, ref_id: str) -> Optional[str]:
"""Retrieve a masked observation by reference ID."""
if self.observation_store:
return self.observation_store.retrieve(ref_id)
return None
def check_budget(self, current_usage: int, metrics: Dict = None) -> Dict:
"""
Check if optimization should be triggered based on budget.
Args:
current_usage: Current token usage
metrics: Optional metrics for degradation detection
Returns:
Budget status with optimization recommendation
"""
if self.budget:
should_optimize, reasons = self.budget.should_optimize(current_usage, metrics)
usage = self.budget.get_usage()
else:
utilization = current_usage / self.total_limit
should_optimize = utilization > 0.8
reasons = [("high_utilization", utilization)] if should_optimize else []
usage = {"utilization_ratio": utilization}
return {
"should_optimize": should_optimize,
"reasons": reasons,
"usage": usage,
"recommendation": self._get_optimization_recommendation(reasons)
}
def _estimate_tokens(self, messages: List[Dict]) -> int:
"""Estimate tokens for message list."""
if EXTERNAL_AVAILABLE:
return estimate_message_tokens(messages)
return sum(len(m.get("content", "")) // 4 + 10 for m in messages)
def _select_strategy(self, messages: List[Dict], current_tokens: int) -> str:
"""Select optimization strategy based on context analysis."""
utilization = current_tokens / self.total_limit
if utilization < 0.7:
return "cache" # Just optimize cache ordering
# Analyze what dominates context
if EXTERNAL_AVAILABLE:
categories = categorize_messages(messages)
tool_output_tokens = sum(
estimate_token_count(m.get("content", ""))
for m in categories.get("tool_output", [])
)
if tool_output_tokens > current_tokens * 0.5:
return "masking" # Tool outputs dominate
return "compaction" # General compaction
def _apply_compaction(self, messages: List[Dict]) -> List[Dict]:
"""Apply compaction strategy."""
result = self.compact_context(messages)
return result["messages"]
def _apply_masking(self, messages: List[Dict]) -> Dict:
"""Apply masking strategy."""
observations = [m for m in messages if m.get("role") == "tool"]
result = self.mask_observations(observations)
return {"count": result["count"], "refs": result["refs"]}
def _optimize_cache(self, messages: List[Dict]) -> Dict:
"""Apply cache optimization strategy."""
if not EXTERNAL_AVAILABLE:
return {"recommendations": ["Stable prompt ordering"]}
# Check for stable vs dynamic content
recommendations = []
for msg in messages:
content = msg.get("content", "")
# Check for unstable patterns
import re
if re.search(r'\d{4}-\d{2}-\d{2}', content):
recommendations.append("Remove timestamps from stable sections")
if re.search(r'Session \d+', content):
recommendations.append("Stabilize session identifiers")
return {
"recommendations": recommendations or ["Cache ordering is optimal"],
"stable_content_ratio": 0.8 # Placeholder
}
def _get_optimization_recommendation(self, reasons: List) -> str:
"""Get optimization recommendation based on reasons."""
if not reasons:
return "Context usage is healthy - continue monitoring"
reason_types = [r[0] for r in reasons]
if "attention_degradation" in reason_types:
return "Apply observation masking to reduce mid-context bloat"
elif "quality_degradation" in reason_types:
return "Review context for poisoning and apply compaction"
else:
return "Apply compaction and masking to reduce utilization"
def main(): parser = argparse.ArgumentParser( description="CODITECT Context Optimizer", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python3 context_optimizer.py --compact --file messages.json python3 context_optimizer.py --mask --file observations.json python3 context_optimizer.py --budget 100000 --usage 85000 --check """ )
parser.add_argument("--compact", "-c", action="store_true", help="Apply compaction")
parser.add_argument("--mask", "-m", action="store_true", help="Apply observation masking")
parser.add_argument("--file", "-f", help="JSON file with messages/observations")
parser.add_argument("--budget", "-b", type=int, default=100000, help="Context token budget")
parser.add_argument("--usage", "-u", type=int, help="Current token usage for budget check")
parser.add_argument("--check", action="store_true", help="Check budget status only")
parser.add_argument("--json", "-j", action="store_true", help="Output JSON format")
args = parser.parse_args()
optimizer = CoditechContextOptimizer(total_limit=args.budget)
# Budget check mode
if args.check and args.usage:
result = optimizer.check_budget(args.usage)
if args.json:
print(json.dumps(result, indent=2))
else:
print(f"\nBudget Check:")
print(f" Should Optimize: {result['should_optimize']}")
print(f" Recommendation: {result['recommendation']}")
sys.exit(0)
# Load messages from file
if args.file:
with open(args.file, 'r') as f:
messages = json.load(f)
else:
# Read from stdin
if not sys.stdin.isatty():
messages = json.load(sys.stdin)
else:
parser.print_help()
sys.exit(1)
# Apply optimization
if args.compact:
result = optimizer.compact_context(messages)
elif args.mask:
result = optimizer.mask_observations(messages)
else:
result = optimizer.optimize(messages)
# Output
if args.json:
# Don't include full message content in JSON output
if "messages" in result:
result["message_count"] = len(result["messages"])
del result["messages"]
print(json.dumps(result, indent=2))
else:
print(f"\n{'='*60}")
print("CODITECT Context Optimization")
print(f"{'='*60}")
if "before_tokens" in result:
print(f"\nBefore: {result['before_tokens']} tokens")
print(f"After: {result['after_tokens']} tokens")
print(f"Saved: {result['tokens_saved']} tokens ({result['savings_percent']*100:.1f}%)")
if "optimizations_applied" in result:
print(f"\nStrategies Applied: {', '.join(result['optimizations_applied'])}")
if "summaries" in result:
print(f"\nCompaction Summaries: {len(result['summaries'])}")
if "refs" in result:
print(f"\nMasked Observations: {result.get('count', len(result['refs']))}")
print(f"\n{'='*60}\n")
if name == "main": main()