Agent Skills Framework Extension
Performance Profiling Skill
When to Use This Skill
Use this skill when implementing performance profiling patterns in your codebase.
How to Use This Skill
- Review the patterns and examples below
- Apply the relevant patterns to your implementation
- Follow the best practices outlined in this skill
CPU/memory profiling, flame graphs, bottleneck analysis, and optimization strategies.
Core Capabilities
- CPU Profiling - py-spy, perf, Node.js profiler
- Memory Analysis - Heap snapshots, leak detection
- Flame Graphs - Visual performance analysis
- Database Profiling - Slow query analysis
- Continuous Profiling - Production monitoring
Node.js Profiling
// src/profiling/profiler.ts
import v8 from 'v8';
import { writeFileSync } from 'fs';
import { Session } from 'inspector';
export class NodeProfiler {
private session: Session;
constructor() {
this.session = new Session();
this.session.connect();
}
// CPU Profiling
async cpuProfile<T>(fn: () => Promise<T>, filename: string): Promise<T> {
this.session.post('Profiler.enable');
this.session.post('Profiler.start');
const result = await fn();
return new Promise((resolve) => {
this.session.post('Profiler.stop', (err, { profile }) => {
if (!err && profile) {
writeFileSync(filename, JSON.stringify(profile));
console.log(`CPU profile saved to ${filename}`);
}
this.session.post('Profiler.disable');
resolve(result);
});
});
}
// Heap Snapshot
async heapSnapshot(filename: string): Promise<void> {
const chunks: Buffer[] = [];
return new Promise((resolve) => {
this.session.on('HeapProfiler.addHeapSnapshotChunk', (m) => {
chunks.push(Buffer.from(m.params.chunk));
});
this.session.post('HeapProfiler.takeHeapSnapshot', null, () => {
writeFileSync(filename, Buffer.concat(chunks));
console.log(`Heap snapshot saved to ${filename}`);
resolve();
});
});
}
// Memory Usage
getMemoryUsage(): {
heapUsed: number;
heapTotal: number;
external: number;
rss: number;
} {
const usage = process.memoryUsage();
return {
heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
heapTotal: Math.round(usage.heapTotal / 1024 / 1024),
external: Math.round(usage.external / 1024 / 1024),
rss: Math.round(usage.rss / 1024 / 1024),
};
}
// Heap Statistics
getHeapStatistics(): v8.HeapStatistics {
return v8.getHeapStatistics();
}
}
// Express middleware for profiling endpoints
import { Router, Request, Response } from 'express';
const profiler = new NodeProfiler();
export const profilingRouter = Router();
profilingRouter.get('/profile/memory', (req: Request, res: Response) => {
res.json(profiler.getMemoryUsage());
});
profilingRouter.get('/profile/heap', (req: Request, res: Response) => {
res.json(profiler.getHeapStatistics());
});
profilingRouter.post('/profile/cpu/start', async (req: Request, res: Response) => {
const filename = `profiles/cpu-${Date.now()}.cpuprofile`;
// Start profiling (would need async control)
res.json({ message: 'Profiling started', filename });
});
Python Profiling
# src/profiling/python_profiler.py
import cProfile
import pstats
import io
import tracemalloc
import linecache
from functools import wraps
from typing import Callable, TypeVar, Any
import time
T = TypeVar('T')
class Profiler:
"""Performance profiling utilities for Python applications."""
@staticmethod
def cpu_profile(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator for CPU profiling."""
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
profiler = cProfile.Profile()
profiler.enable()
result = func(*args, **kwargs)
profiler.disable()
# Print stats
stream = io.StringIO()
stats = pstats.Stats(profiler, stream=stream)
stats.sort_stats('cumulative')
stats.print_stats(20)
print(stream.getvalue())
return result
return wrapper
@staticmethod
def memory_profile(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator for memory profiling."""
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
tracemalloc.start()
result = func(*args, **kwargs)
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
print("\n[ Memory Usage - Top 10 ]")
for stat in top_stats[:10]:
print(stat)
tracemalloc.stop()
return result
return wrapper
@staticmethod
def timing(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator for timing function execution."""
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
start = time.perf_counter()
result = func(*args, **kwargs)
end = time.perf_counter()
print(f"{func.__name__} took {end - start:.4f}s")
return result
return wrapper
class MemoryTracker:
"""Track memory allocations in a context."""
def __init__(self, top_n: int = 10):
self.top_n = top_n
self._snapshot_start = None
def __enter__(self):
tracemalloc.start()
self._snapshot_start = tracemalloc.take_snapshot()
return self
def __exit__(self, *args):
snapshot_end = tracemalloc.take_snapshot()
top_stats = snapshot_end.compare_to(self._snapshot_start, 'lineno')
print(f"\n[ Memory Diff - Top {self.top_n} ]")
for stat in top_stats[:self.top_n]:
print(stat)
tracemalloc.stop()
def display_top(self, snapshot, key_type='lineno', limit=10):
"""Display top memory consumers."""
snapshot = snapshot.filter_traces((
tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
tracemalloc.Filter(False, "<unknown>"),
))
top_stats = snapshot.statistics(key_type)
print(f"\nTop {limit} lines:")
for index, stat in enumerate(top_stats[:limit], 1):
frame = stat.traceback[0]
print(f"#{index}: {frame.filename}:{frame.lineno}: "
f"{stat.size / 1024:.1f} KiB")
line = linecache.getline(frame.filename, frame.lineno).strip()
if line:
print(f" {line}")
# Usage
@Profiler.timing
@Profiler.cpu_profile
def expensive_operation():
# Some CPU-intensive work
result = sum(i ** 2 for i in range(1000000))
return result
with MemoryTracker(top_n=5):
# Memory-intensive operations
data = [i for i in range(1000000)]
Database Query Profiling
// src/profiling/query-profiler.ts
interface QueryProfile {
query: string;
params: unknown[];
duration: number;
rows: number;
timestamp: Date;
explain?: unknown;
}
export class QueryProfiler {
private profiles: QueryProfile[] = [];
private slowQueryThreshold = 100; // ms
async profileQuery<T>(
pool: Pool,
query: string,
params: unknown[] = []
): Promise<{ rows: T[]; profile: QueryProfile }> {
const start = performance.now();
const result = await pool.query(query, params);
const duration = performance.now() - start;
const profile: QueryProfile = {
query,
params,
duration,
rows: result.rowCount ?? 0,
timestamp: new Date(),
};
// Get EXPLAIN for slow queries
if (duration > this.slowQueryThreshold) {
try {
const explain = await pool.query(`EXPLAIN ANALYZE ${query}`, params);
profile.explain = explain.rows;
console.warn(`Slow query (${duration.toFixed(2)}ms):`, query);
} catch {
// EXPLAIN failed (e.g., for INSERT)
}
}
this.profiles.push(profile);
return { rows: result.rows, profile };
}
getSlowQueries(threshold = 100): QueryProfile[] {
return this.profiles.filter(p => p.duration > threshold);
}
getStatistics(): {
total: number;
avgDuration: number;
slowQueries: number;
topQueries: Array<{ query: string; count: number; avgDuration: number }>;
} {
const total = this.profiles.length;
const avgDuration = this.profiles.reduce((sum, p) => sum + p.duration, 0) / total;
const slowQueries = this.profiles.filter(p => p.duration > this.slowQueryThreshold).length;
// Group by query pattern
const queryGroups = new Map<string, { count: number; totalDuration: number }>();
for (const profile of this.profiles) {
const pattern = this.normalizeQuery(profile.query);
const group = queryGroups.get(pattern) ?? { count: 0, totalDuration: 0 };
group.count++;
group.totalDuration += profile.duration;
queryGroups.set(pattern, group);
}
const topQueries = Array.from(queryGroups.entries())
.map(([query, stats]) => ({
query,
count: stats.count,
avgDuration: stats.totalDuration / stats.count,
}))
.sort((a, b) => b.avgDuration - a.avgDuration)
.slice(0, 10);
return { total, avgDuration, slowQueries, topQueries };
}
private normalizeQuery(query: string): string {
// Replace parameter values with placeholders
return query.replace(/\$\d+/g, '?').replace(/\s+/g, ' ').trim();
}
}
Continuous Profiling (Production)
// src/profiling/continuous-profiler.ts
import pprof from '@datadog/pprof';
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';
interface ProfileConfig {
serviceName: string;
environment: string;
sampleRate: number; // Hz
duration: number; // seconds
uploadInterval: number; // seconds
}
export class ContinuousProfiler {
private isRunning = false;
private s3Client: S3Client;
constructor(
private readonly config: ProfileConfig,
private readonly bucket: string
) {
this.s3Client = new S3Client({});
}
async start(): Promise<void> {
if (this.isRunning) return;
this.isRunning = true;
console.log('Starting continuous profiler');
// CPU profiling loop
this.profileLoop('cpu', async () => {
const profile = await pprof.time.profile({
durationMillis: this.config.duration * 1000,
sourceMapper: undefined,
});
return pprof.encode(profile);
});
// Heap profiling loop
this.profileLoop('heap', async () => {
const profile = await pprof.heap.profile(undefined, undefined);
return pprof.encode(profile);
});
}
private async profileLoop(
type: 'cpu' | 'heap',
collectFn: () => Promise<Buffer>
): Promise<void> {
while (this.isRunning) {
try {
const profile = await collectFn();
await this.uploadProfile(type, profile);
} catch (error) {
console.error(`${type} profiling error:`, error);
}
await this.sleep(this.config.uploadInterval * 1000);
}
}
private async uploadProfile(type: string, data: Buffer): Promise<void> {
const timestamp = new Date().toISOString();
const key = `profiles/${this.config.serviceName}/${this.config.environment}/${type}/${timestamp}.pb.gz`;
await this.s3Client.send(new PutObjectCommand({
Bucket: this.bucket,
Key: key,
Body: data,
ContentType: 'application/octet-stream',
Metadata: {
service: this.config.serviceName,
environment: this.config.environment,
type: type,
},
}));
}
stop(): void {
this.isRunning = false;
console.log('Stopping continuous profiler');
}
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
Usage Examples
Profile Node.js Application
Apply performance-profiling skill to add CPU and memory profiling endpoints to Express API
Analyze Slow Queries
Apply performance-profiling skill to implement database query profiling with EXPLAIN analysis
Setup Continuous Profiling
Apply performance-profiling skill to configure production continuous profiling with S3 storage
Integration Points
- monitoring-observability - Profile metrics and dashboards
- database-schema-optimization - Query optimization
- caching-strategies - Cache hit/miss analysis
Success Output
When successful, this skill MUST output:
✅ SKILL COMPLETE: performance-profiling
Completed:
- [x] CPU profiling completed ({duration}s, {samples} samples)
- [x] Memory analysis done (heap: {heap_mb}MB, top {top_n} allocations identified)
- [x] Slow queries detected ({slow_count} queries > {threshold}ms)
- [x] Flame graph generated and saved to {flame_graph_path}
- [x] Performance bottlenecks identified: {bottleneck_list}
Outputs:
- CPU profile: {cpu_profile_path} ({profile_size}KB)
- Heap snapshot: {heap_snapshot_path} ({snapshot_size}MB)
- Query analysis: {slow_query_count} slow queries documented
- Optimization recommendations: {recommendation_count} actionable items
Completion Checklist
Before marking this skill as complete, verify:
- Profiler successfully started and stopped without errors
- Profile data saved to correct file path with valid format
- Memory usage captured before and after operation
- Top memory consumers identified (at least top 10)
- Slow query threshold configured appropriately for workload
- EXPLAIN ANALYZE captured for queries exceeding threshold
- Statistics calculated (total queries, avg duration, percentiles)
- Continuous profiling (if enabled) successfully uploading to storage
- Profile output is readable and contains actionable data
Failure Indicators
This skill has FAILED if:
- ❌ Profiler crashes or hangs during execution
- ❌ Profile file empty or corrupted (0 bytes or invalid JSON)
- ❌ Memory tracking not started (tracemalloc.start() failed)
- ❌ Heap snapshot exceeds available disk space
- ❌ Query profiling returns empty result set
- ❌ EXPLAIN ANALYZE fails for all slow queries
- ❌ Performance overhead > 20% (profiling slows system excessively)
- ❌ Continuous profiler unable to upload profiles (S3 errors)
- ❌ Profile data shows no variation (all functions 0ms - invalid)
When NOT to Use
Do NOT use this skill when:
- Development environment only (no performance issues observed)
- Single function optimization needed (use simple timing decorator)
- Real-time production monitoring required (use monitoring-observability-patterns)
- Application already instrumented with APM (Datadog, New Relic)
- Profiling overhead unacceptable (latency-sensitive APIs)
- No performance bottleneck suspected (premature optimization)
- Load testing needed (use load-testing-patterns instead)
Use alternative skills:
- For production monitoring → monitoring-observability-patterns
- For load testing → load-testing-patterns
- For database optimization → database-schema-optimization
- For simple benchmarking → Built-in
timemodule or@timingdecorator
Anti-Patterns (Avoid)
| Anti-Pattern | Problem | Solution |
|---|---|---|
| Profiling in production without limit | Performance degradation | Use sampling (1-10% of requests) |
| Not cleaning up profiles | Disk space exhaustion | Set retention policy, delete old profiles |
| Profiling entire application | Noise overwhelms signal | Profile specific code paths/endpoints |
| Ignoring profiler overhead | Inaccurate measurements | Disable profiler for baseline measurements |
| Manual profile analysis only | Missing patterns | Use automated analysis tools (flame graphs) |
| No comparison baseline | Can't measure improvement | Profile before and after optimization |
| Hardcoded file paths | Profile overwrites | Use timestamps in filenames |
| Profiling without load | Doesn't reflect real usage | Profile under realistic load conditions |
Principles
This skill embodies:
- #2 First Principles - Understand what to measure before profiling (CPU, memory, I/O)
- #3 Keep It Simple - Start with simple timing, escalate to full profiling only if needed
- #4 Separation of Concerns - Separate profiling, analysis, and optimization phases
- #5 Eliminate Ambiguity - Explicit metrics (ms, MB, queries/sec) not vague "slow"
- #6 Clear, Understandable, Explainable - Flame graphs visualize complex call stacks
- #8 No Assumptions - Measure actual performance, don't guess bottlenecks
- #10 Research When in Doubt - Use proven profilers (py-spy, perf, Node.js Inspector)
Full Standard: CODITECT-STANDARD-AUTOMATION.md