Skip to main content

Agent Skills Framework Extension

Memory Optimization Patterns Skill

When to Use This Skill

Use this skill when implementing memory optimization patterns patterns in your codebase.

How to Use This Skill

  1. Review the patterns and examples below
  2. Apply the relevant patterns to your implementation
  3. Follow the best practices outlined in this skill

Token reduction, context compression, intelligent summarization, and caching for efficient AI operations.

Core Capabilities

  1. Token Management - Counting, budgeting, tracking
  2. Context Compression - Smart reduction techniques
  3. Summarization - Multi-level content condensing
  4. Caching - Intelligent result reuse
  5. Incremental Loading - Load only what's needed
  6. Priority Pruning - Remove least important content

Token Budget Manager

# scripts/token-budget.py
from dataclasses import dataclass
from typing import List, Dict, Optional
from enum import Enum

class Priority(Enum):
CRITICAL = 3
HIGH = 2
MEDIUM = 1
LOW = 0

@dataclass
class TokenizedContent:
content: str
token_count: int
priority: Priority
source: str
compressed: bool = False

class TokenBudgetManager:
"""Manage token budgets across context components"""

def __init__(self, max_tokens: int):
self.max_tokens = max_tokens
self.allocated: Dict[str, int] = {}
self.contents: List[TokenizedContent] = []

def allocate(self, category: str, tokens: int) -> bool:
"""Allocate tokens to category"""
total_allocated = sum(self.allocated.values())
if total_allocated + tokens > self.max_tokens:
return False

self.allocated[category] = tokens
return True

def add_content(
self,
content: str,
priority: Priority,
source: str,
compress_if_needed: bool = True
) -> bool:
"""Add content, compress if exceeds budget"""
tokens = self._count_tokens(content)

# Check if fits
current_total = sum(c.token_count for c in self.contents)
if current_total + tokens > self.max_tokens:
if compress_if_needed:
content, tokens = self._compress(content, tokens)
else:
self._prune_by_priority(tokens)

tokenized = TokenizedContent(
content=content,
token_count=tokens,
priority=priority,
source=source,
compressed=compress_if_needed and tokens < self._count_tokens(content)
)

self.contents.append(tokenized)
return True

def _count_tokens(self, text: str) -> int:
"""Count tokens (simplified)"""
# In production: use tiktoken
return len(text.split()) * 1.3

def _compress(self, content: str, original_tokens: int) -> tuple[str, int]:
"""Compress content to fit budget"""
# Simple compression: take first 70%
words = content.split()
target_words = int(len(words) * 0.7)
compressed = ' '.join(words[:target_words]) + "..."

return compressed, self._count_tokens(compressed)

def _prune_by_priority(self, needed_tokens: int):
"""Remove low-priority content to make space"""
self.contents.sort(key=lambda c: c.priority.value)

freed = 0
while freed < needed_tokens and self.contents:
if self.contents[0].priority != Priority.CRITICAL:
removed = self.contents.pop(0)
freed += removed.token_count
else:
break

def get_usage_report(self) -> str:
"""Generate token usage report"""
total = sum(c.token_count for c in self.contents)
by_source = {}

for content in self.contents:
if content.source not in by_source:
by_source[content.source] = 0
by_source[content.source] += content.token_count

report = f"Token Usage: {total}/{self.max_tokens} ({total/self.max_tokens*100:.1f}%)\n\n"
report += "By Source:\n"
for source, tokens in sorted(by_source.items(), key=lambda x: x[1], reverse=True):
report += f" {source}: {tokens} tokens\n"

return report

# Usage
budget = TokenBudgetManager(max_tokens=100000)

budget.add_content(
"Long context from previous session...",
priority=Priority.HIGH,
source="session_history"
)

budget.add_content(
"Critical project documentation",
priority=Priority.CRITICAL,
source="documentation"
)

print(budget.get_usage_report())

Intelligent Summarization

# scripts/intelligent-summarization.py
from dataclasses import dataclass
from typing import List
import re

@dataclass
class SummaryLevel:
name: str
target_compression: float # 0.0 - 1.0
preserve_code: bool
preserve_examples: bool

SUMMARY_LEVELS = {
'extreme': SummaryLevel('extreme', 0.1, False, False),
'aggressive': SummaryLevel('aggressive', 0.3, False, True),
'moderate': SummaryLevel('moderate', 0.5, True, True),
'light': SummaryLevel('light', 0.7, True, True),
}

class IntelligentSummarizer:
"""Multi-level intelligent summarization"""

def summarize(self, text: str, level: str = 'moderate') -> str:
"""Summarize text at specified level"""
config = SUMMARY_LEVELS.get(level, SUMMARY_LEVELS['moderate'])

# Extract components
code_blocks = self._extract_code_blocks(text)
examples = self._extract_examples(text)
paragraphs = self._extract_paragraphs(text)

# Summarize paragraphs
summarized_paras = self._summarize_paragraphs(
paragraphs,
target_ratio=config.target_compression
)

# Reassemble
result = []
result.extend(summarized_paras)

if config.preserve_examples and examples:
result.append("\n## Examples\n")
result.extend(examples[:2]) # Top 2 examples

if config.preserve_code and code_blocks:
result.append("\n## Code\n")
result.extend(code_blocks[:3]) # Top 3 code blocks

return '\n'.join(result)

def _extract_code_blocks(self, text: str) -> List[str]:
"""Extract code blocks"""
pattern = r'```[\w]*\n(.*?)\n```'
matches = re.findall(pattern, text, re.DOTALL)
return [f"```\n{m}\n```" for m in matches]

def _extract_examples(self, text: str) -> List[str]:
"""Extract example sections"""
lines = text.split('\n')
examples = []
in_example = False
current = []

for line in lines:
if 'example' in line.lower() and line.startswith('#'):
in_example = True
current = [line]
elif in_example and line.startswith('#'):
examples.append('\n'.join(current))
in_example = False
current = []
elif in_example:
current.append(line)

if current:
examples.append('\n'.join(current))

return examples

def _extract_paragraphs(self, text: str) -> List[str]:
"""Extract text paragraphs (non-code, non-example)"""
# Remove code blocks
text_without_code = re.sub(r'```.*?```', '', text, flags=re.DOTALL)

paragraphs = text_without_code.split('\n\n')
return [p.strip() for p in paragraphs if p.strip() and len(p.strip()) > 50]

def _summarize_paragraphs(
self,
paragraphs: List[str],
target_ratio: float
) -> List[str]:
"""Summarize paragraphs to target ratio"""
target_count = max(1, int(len(paragraphs) * target_ratio))

# Score paragraphs by importance
scored = []
for para in paragraphs:
score = self._importance_score(para)
scored.append((score, para))

# Take top N
scored.sort(reverse=True, key=lambda x: x[0])
return [para for _, para in scored[:target_count]]

def _importance_score(self, paragraph: str) -> float:
"""Score paragraph importance"""
score = 0.0

# Keywords that indicate importance
important_words = ['must', 'critical', 'important', 'required', 'always', 'never']
for word in important_words:
if word in paragraph.lower():
score += 0.2

# Length (moderate length preferred)
word_count = len(paragraph.split())
if 50 < word_count < 200:
score += 0.3

# Has numbers/data
if re.search(r'\d+', paragraph):
score += 0.1

return score

# Usage
summarizer = IntelligentSummarizer()

long_doc = """
# Documentation

This is a comprehensive guide...

## Example 1

Here's an example...

```python
def example():
pass

More text... """

Different levels

extreme = summarizer.summarize(long_doc, level='extreme') moderate = summarizer.summarize(long_doc, level='moderate') light = summarizer.summarize(long_doc, level='light')


## Intelligent Caching System

```typescript
// scripts/intelligent-cache.ts
interface CacheEntry<T> {
key: string;
value: T;
timestamp: Date;
hitCount: number;
tokenSize: number;
priority: number;
}

class IntelligentCache<T> {
private cache = new Map<string, CacheEntry<T>>();
private maxTokens: number;
private currentTokens: number = 0;

constructor(maxTokens: number) {
this.maxTokens = maxTokens;
}

set(key: string, value: T, tokenSize: number, priority: number = 1): void {
// Evict if needed
while (this.currentTokens + tokenSize > this.maxTokens) {
this.evictLRU();
}

const entry: CacheEntry<T> = {
key,
value,
timestamp: new Date(),
hitCount: 0,
tokenSize,
priority,
};

this.cache.set(key, entry);
this.currentTokens += tokenSize;
}

get(key: string): T | undefined {
const entry = this.cache.get(key);
if (!entry) return undefined;

// Update stats
entry.hitCount++;
entry.timestamp = new Date();

return entry.value;
}

private evictLRU(): void {
if (this.cache.size === 0) return;

// Calculate eviction score (lower = evict first)
let lowestScore = Infinity;
let evictKey: string | null = null;

for (const [key, entry] of this.cache) {
// Score based on: recency, hit count, priority
const ageMinutes = (Date.now() - entry.timestamp.getTime()) / 60000;
const recencyScore = 1 / (1 + ageMinutes);
const hitScore = Math.log(1 + entry.hitCount);

const score = (recencyScore + hitScore) * entry.priority;

if (score < lowestScore) {
lowestScore = score;
evictKey = key;
}
}

if (evictKey) {
const entry = this.cache.get(evictKey)!;
this.cache.delete(evictKey);
this.currentTokens -= entry.tokenSize;
}
}

getStats(): string {
const entries = Array.from(this.cache.values());
const totalHits = entries.reduce((sum, e) => sum + e.hitCount, 0);

return `Cache Stats:
Entries: ${this.cache.size}
Tokens: ${this.currentTokens}/${this.maxTokens}
Total Hits: ${totalHits}
Hit Rate: ${entries.length > 0 ? (totalHits / entries.length).toFixed(2) : 0}`;
}
}

// Usage
const cache = new IntelligentCache<string>(50000);

cache.set('research:rust-async', 'Rust async research results...', 5000, 2);
cache.set('code:api-client', 'API client implementation...', 3000, 3);

const result = cache.get('research:rust-async');
console.log(cache.getStats());

Usage Examples

Token Budget Management

Apply memory-optimization-patterns skill to manage 100K token budget with priority-based allocation

Intelligent Summarization

Apply memory-optimization-patterns skill to summarize documentation with code preservation at moderate level

Smart Caching

Apply memory-optimization-patterns skill to cache research results with LRU eviction

Integration Points

  • memory-context-patterns - Context management
  • session-analysis-patterns - Session compression
  • prompt-analysis-patterns - Prompt optimization

Success Output

When successful, this skill MUST output:

✅ SKILL COMPLETE: memory-optimization-patterns

Completed:
- [x] Token budget allocated across {category_count} categories
- [x] Content compressed from {original_tokens} to {compressed_tokens} tokens ({reduction_pct}% reduction)
- [x] {pruned_count} low-priority items pruned to fit budget
- [x] Cache configured with {max_tokens} token limit, {entry_count} entries
- [x] Summarization applied at {level} level (compression ratio: {compression_ratio})

Outputs:
- Token usage report: {total_tokens}/{max_tokens} ({usage_pct}%)
- Compressed content: {output_path} ({final_size} tokens)
- Cache statistics: {hit_count} hits, {eviction_count} evictions
- Memory savings: {savings_tokens} tokens ({savings_pct}% reduction)

Completion Checklist

Before marking this skill as complete, verify:

  • Token counting function validated against tiktoken library
  • Budget allocations sum to ≤ max_tokens (no over-allocation)
  • Compression preserves critical content (CRITICAL priority items)
  • Pruning removed only LOW/MEDIUM priority items, not CRITICAL
  • Cache eviction policy tested (LRU with priority weighting)
  • Summarization level appropriate for use case (not over-compressed)
  • Code blocks and examples preserved when preserve_code=True
  • Final token usage under budget threshold (< 100% allocation)
  • Usage report generated and contains accurate statistics

Failure Indicators

This skill has FAILED if:

  • ❌ Token count calculation returns negative or implausible values
  • ❌ Budget allocation exceeds max_tokens (over-provisioned)
  • ❌ Compression removes CRITICAL content
  • ❌ Pruning deletes all content (nothing survives budget cuts)
  • ❌ Cache eviction fails (infinite loop or exception)
  • ❌ Summarization produces empty output
  • ❌ Code blocks lost when preserve_code=True
  • ❌ Final content exceeds max_tokens budget
  • ❌ Cache hit rate = 0% (cache not functioning)

When NOT to Use

Do NOT use this skill when:

  • Token budget not constrained (unlimited context window)
  • Content already optimized and minimal
  • Real-time streaming required (compression adds latency)
  • All content is CRITICAL priority (nothing to prune)
  • Simple content (< 1000 tokens, no optimization needed)
  • User needs full detailed output (no summarization acceptable)
  • Token cost negligible (budget not a concern)

Use alternative skills:

  • For session context → memory-context-patterns
  • For prompt engineering → prompt-analysis-patterns
  • For content generation → content-optimization-patterns
  • For caching strategy → caching-strategies

Anti-Patterns (Avoid)

Anti-PatternProblemSolution
Aggressive compression on all contentLoss of critical informationUse priority-based compression
No baseline measurementCan't measure savingsCount tokens before and after
Hardcoded token limitsInflexible across modelsUse configurable max_tokens
Ignoring token counting accuracyInaccurate budgetsUse tiktoken for precise counts
Over-caching everythingMemory bloatSet cache size limits with LRU eviction
Extreme summarization (0.1 ratio)Content becomes uselessUse moderate levels (0.5-0.7)
Not preserving structureLost readabilityKeep headings, examples, code blocks
Cache without eviction policyUnbounded growthImplement LRU or priority-based eviction

Principles

This skill embodies:

  • #1 Recycle → Extend → Re-Use → Create - Reuse cached results, extend with compression
  • #2 First Principles - Understand token economics (cost, limits, latency)
  • #3 Keep It Simple - Start with light compression (0.7), escalate only if needed
  • #4 Separation of Concerns - Separate counting, budgeting, compression, caching
  • #5 Eliminate Ambiguity - Explicit token counts and budgets, not vague "too large"
  • #8 No Assumptions - Measure actual token usage with tiktoken
  • #9 Progressive Disclosure - Multi-level summarization (light → moderate → aggressive)

Full Standard: CODITECT-STANDARD-AUTOMATION.md