Skip to main content

Agent Skills Framework Extension

Performance Profiling Skill

When to Use This Skill

Use this skill when implementing performance profiling patterns in your codebase.

How to Use This Skill

  1. Review the patterns and examples below
  2. Apply the relevant patterns to your implementation
  3. Follow the best practices outlined in this skill

CPU/memory profiling, flame graphs, bottleneck analysis, and optimization strategies.

Core Capabilities

  1. CPU Profiling - py-spy, perf, Node.js profiler
  2. Memory Analysis - Heap snapshots, leak detection
  3. Flame Graphs - Visual performance analysis
  4. Database Profiling - Slow query analysis
  5. Continuous Profiling - Production monitoring

Node.js Profiling

// src/profiling/profiler.ts
import v8 from 'v8';
import { writeFileSync } from 'fs';
import { Session } from 'inspector';

export class NodeProfiler {
private session: Session;

constructor() {
this.session = new Session();
this.session.connect();
}

// CPU Profiling
async cpuProfile<T>(fn: () => Promise<T>, filename: string): Promise<T> {
this.session.post('Profiler.enable');
this.session.post('Profiler.start');

const result = await fn();

return new Promise((resolve) => {
this.session.post('Profiler.stop', (err, { profile }) => {
if (!err && profile) {
writeFileSync(filename, JSON.stringify(profile));
console.log(`CPU profile saved to ${filename}`);
}
this.session.post('Profiler.disable');
resolve(result);
});
});
}

// Heap Snapshot
async heapSnapshot(filename: string): Promise<void> {
const chunks: Buffer[] = [];

return new Promise((resolve) => {
this.session.on('HeapProfiler.addHeapSnapshotChunk', (m) => {
chunks.push(Buffer.from(m.params.chunk));
});

this.session.post('HeapProfiler.takeHeapSnapshot', null, () => {
writeFileSync(filename, Buffer.concat(chunks));
console.log(`Heap snapshot saved to ${filename}`);
resolve();
});
});
}

// Memory Usage
getMemoryUsage(): {
heapUsed: number;
heapTotal: number;
external: number;
rss: number;
} {
const usage = process.memoryUsage();
return {
heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
heapTotal: Math.round(usage.heapTotal / 1024 / 1024),
external: Math.round(usage.external / 1024 / 1024),
rss: Math.round(usage.rss / 1024 / 1024),
};
}

// Heap Statistics
getHeapStatistics(): v8.HeapStatistics {
return v8.getHeapStatistics();
}
}

// Express middleware for profiling endpoints
import { Router, Request, Response } from 'express';

const profiler = new NodeProfiler();

export const profilingRouter = Router();

profilingRouter.get('/profile/memory', (req: Request, res: Response) => {
res.json(profiler.getMemoryUsage());
});

profilingRouter.get('/profile/heap', (req: Request, res: Response) => {
res.json(profiler.getHeapStatistics());
});

profilingRouter.post('/profile/cpu/start', async (req: Request, res: Response) => {
const filename = `profiles/cpu-${Date.now()}.cpuprofile`;
// Start profiling (would need async control)
res.json({ message: 'Profiling started', filename });
});

Python Profiling

# src/profiling/python_profiler.py
import cProfile
import pstats
import io
import tracemalloc
import linecache
from functools import wraps
from typing import Callable, TypeVar, Any
import time

T = TypeVar('T')

class Profiler:
"""Performance profiling utilities for Python applications."""

@staticmethod
def cpu_profile(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator for CPU profiling."""
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
profiler = cProfile.Profile()
profiler.enable()

result = func(*args, **kwargs)

profiler.disable()

# Print stats
stream = io.StringIO()
stats = pstats.Stats(profiler, stream=stream)
stats.sort_stats('cumulative')
stats.print_stats(20)
print(stream.getvalue())

return result
return wrapper

@staticmethod
def memory_profile(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator for memory profiling."""
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
tracemalloc.start()

result = func(*args, **kwargs)

snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')

print("\n[ Memory Usage - Top 10 ]")
for stat in top_stats[:10]:
print(stat)

tracemalloc.stop()
return result
return wrapper

@staticmethod
def timing(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator for timing function execution."""
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
start = time.perf_counter()
result = func(*args, **kwargs)
end = time.perf_counter()
print(f"{func.__name__} took {end - start:.4f}s")
return result
return wrapper


class MemoryTracker:
"""Track memory allocations in a context."""

def __init__(self, top_n: int = 10):
self.top_n = top_n
self._snapshot_start = None

def __enter__(self):
tracemalloc.start()
self._snapshot_start = tracemalloc.take_snapshot()
return self

def __exit__(self, *args):
snapshot_end = tracemalloc.take_snapshot()

top_stats = snapshot_end.compare_to(self._snapshot_start, 'lineno')

print(f"\n[ Memory Diff - Top {self.top_n} ]")
for stat in top_stats[:self.top_n]:
print(stat)

tracemalloc.stop()

def display_top(self, snapshot, key_type='lineno', limit=10):
"""Display top memory consumers."""
snapshot = snapshot.filter_traces((
tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
tracemalloc.Filter(False, "<unknown>"),
))
top_stats = snapshot.statistics(key_type)

print(f"\nTop {limit} lines:")
for index, stat in enumerate(top_stats[:limit], 1):
frame = stat.traceback[0]
print(f"#{index}: {frame.filename}:{frame.lineno}: "
f"{stat.size / 1024:.1f} KiB")
line = linecache.getline(frame.filename, frame.lineno).strip()
if line:
print(f" {line}")


# Usage
@Profiler.timing
@Profiler.cpu_profile
def expensive_operation():
# Some CPU-intensive work
result = sum(i ** 2 for i in range(1000000))
return result

with MemoryTracker(top_n=5):
# Memory-intensive operations
data = [i for i in range(1000000)]

Database Query Profiling

// src/profiling/query-profiler.ts
interface QueryProfile {
query: string;
params: unknown[];
duration: number;
rows: number;
timestamp: Date;
explain?: unknown;
}

export class QueryProfiler {
private profiles: QueryProfile[] = [];
private slowQueryThreshold = 100; // ms

async profileQuery<T>(
pool: Pool,
query: string,
params: unknown[] = []
): Promise<{ rows: T[]; profile: QueryProfile }> {
const start = performance.now();

const result = await pool.query(query, params);

const duration = performance.now() - start;

const profile: QueryProfile = {
query,
params,
duration,
rows: result.rowCount ?? 0,
timestamp: new Date(),
};

// Get EXPLAIN for slow queries
if (duration > this.slowQueryThreshold) {
try {
const explain = await pool.query(`EXPLAIN ANALYZE ${query}`, params);
profile.explain = explain.rows;
console.warn(`Slow query (${duration.toFixed(2)}ms):`, query);
} catch {
// EXPLAIN failed (e.g., for INSERT)
}
}

this.profiles.push(profile);
return { rows: result.rows, profile };
}

getSlowQueries(threshold = 100): QueryProfile[] {
return this.profiles.filter(p => p.duration > threshold);
}

getStatistics(): {
total: number;
avgDuration: number;
slowQueries: number;
topQueries: Array<{ query: string; count: number; avgDuration: number }>;
} {
const total = this.profiles.length;
const avgDuration = this.profiles.reduce((sum, p) => sum + p.duration, 0) / total;
const slowQueries = this.profiles.filter(p => p.duration > this.slowQueryThreshold).length;

// Group by query pattern
const queryGroups = new Map<string, { count: number; totalDuration: number }>();
for (const profile of this.profiles) {
const pattern = this.normalizeQuery(profile.query);
const group = queryGroups.get(pattern) ?? { count: 0, totalDuration: 0 };
group.count++;
group.totalDuration += profile.duration;
queryGroups.set(pattern, group);
}

const topQueries = Array.from(queryGroups.entries())
.map(([query, stats]) => ({
query,
count: stats.count,
avgDuration: stats.totalDuration / stats.count,
}))
.sort((a, b) => b.avgDuration - a.avgDuration)
.slice(0, 10);

return { total, avgDuration, slowQueries, topQueries };
}

private normalizeQuery(query: string): string {
// Replace parameter values with placeholders
return query.replace(/\$\d+/g, '?').replace(/\s+/g, ' ').trim();
}
}

Continuous Profiling (Production)

// src/profiling/continuous-profiler.ts
import pprof from '@datadog/pprof';
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';

interface ProfileConfig {
serviceName: string;
environment: string;
sampleRate: number; // Hz
duration: number; // seconds
uploadInterval: number; // seconds
}

export class ContinuousProfiler {
private isRunning = false;
private s3Client: S3Client;

constructor(
private readonly config: ProfileConfig,
private readonly bucket: string
) {
this.s3Client = new S3Client({});
}

async start(): Promise<void> {
if (this.isRunning) return;
this.isRunning = true;

console.log('Starting continuous profiler');

// CPU profiling loop
this.profileLoop('cpu', async () => {
const profile = await pprof.time.profile({
durationMillis: this.config.duration * 1000,
sourceMapper: undefined,
});
return pprof.encode(profile);
});

// Heap profiling loop
this.profileLoop('heap', async () => {
const profile = await pprof.heap.profile(undefined, undefined);
return pprof.encode(profile);
});
}

private async profileLoop(
type: 'cpu' | 'heap',
collectFn: () => Promise<Buffer>
): Promise<void> {
while (this.isRunning) {
try {
const profile = await collectFn();
await this.uploadProfile(type, profile);
} catch (error) {
console.error(`${type} profiling error:`, error);
}

await this.sleep(this.config.uploadInterval * 1000);
}
}

private async uploadProfile(type: string, data: Buffer): Promise<void> {
const timestamp = new Date().toISOString();
const key = `profiles/${this.config.serviceName}/${this.config.environment}/${type}/${timestamp}.pb.gz`;

await this.s3Client.send(new PutObjectCommand({
Bucket: this.bucket,
Key: key,
Body: data,
ContentType: 'application/octet-stream',
Metadata: {
service: this.config.serviceName,
environment: this.config.environment,
type: type,
},
}));
}

stop(): void {
this.isRunning = false;
console.log('Stopping continuous profiler');
}

private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}

Usage Examples

Profile Node.js Application

Apply performance-profiling skill to add CPU and memory profiling endpoints to Express API

Analyze Slow Queries

Apply performance-profiling skill to implement database query profiling with EXPLAIN analysis

Setup Continuous Profiling

Apply performance-profiling skill to configure production continuous profiling with S3 storage

Integration Points

  • monitoring-observability - Profile metrics and dashboards
  • database-schema-optimization - Query optimization
  • caching-strategies - Cache hit/miss analysis

Success Output

When successful, this skill MUST output:

✅ SKILL COMPLETE: performance-profiling

Completed:
- [x] CPU profiling completed ({duration}s, {samples} samples)
- [x] Memory analysis done (heap: {heap_mb}MB, top {top_n} allocations identified)
- [x] Slow queries detected ({slow_count} queries > {threshold}ms)
- [x] Flame graph generated and saved to {flame_graph_path}
- [x] Performance bottlenecks identified: {bottleneck_list}

Outputs:
- CPU profile: {cpu_profile_path} ({profile_size}KB)
- Heap snapshot: {heap_snapshot_path} ({snapshot_size}MB)
- Query analysis: {slow_query_count} slow queries documented
- Optimization recommendations: {recommendation_count} actionable items

Completion Checklist

Before marking this skill as complete, verify:

  • Profiler successfully started and stopped without errors
  • Profile data saved to correct file path with valid format
  • Memory usage captured before and after operation
  • Top memory consumers identified (at least top 10)
  • Slow query threshold configured appropriately for workload
  • EXPLAIN ANALYZE captured for queries exceeding threshold
  • Statistics calculated (total queries, avg duration, percentiles)
  • Continuous profiling (if enabled) successfully uploading to storage
  • Profile output is readable and contains actionable data

Failure Indicators

This skill has FAILED if:

  • ❌ Profiler crashes or hangs during execution
  • ❌ Profile file empty or corrupted (0 bytes or invalid JSON)
  • ❌ Memory tracking not started (tracemalloc.start() failed)
  • ❌ Heap snapshot exceeds available disk space
  • ❌ Query profiling returns empty result set
  • ❌ EXPLAIN ANALYZE fails for all slow queries
  • ❌ Performance overhead > 20% (profiling slows system excessively)
  • ❌ Continuous profiler unable to upload profiles (S3 errors)
  • ❌ Profile data shows no variation (all functions 0ms - invalid)

When NOT to Use

Do NOT use this skill when:

  • Development environment only (no performance issues observed)
  • Single function optimization needed (use simple timing decorator)
  • Real-time production monitoring required (use monitoring-observability-patterns)
  • Application already instrumented with APM (Datadog, New Relic)
  • Profiling overhead unacceptable (latency-sensitive APIs)
  • No performance bottleneck suspected (premature optimization)
  • Load testing needed (use load-testing-patterns instead)

Use alternative skills:

  • For production monitoring → monitoring-observability-patterns
  • For load testing → load-testing-patterns
  • For database optimization → database-schema-optimization
  • For simple benchmarking → Built-in time module or @timing decorator

Anti-Patterns (Avoid)

Anti-PatternProblemSolution
Profiling in production without limitPerformance degradationUse sampling (1-10% of requests)
Not cleaning up profilesDisk space exhaustionSet retention policy, delete old profiles
Profiling entire applicationNoise overwhelms signalProfile specific code paths/endpoints
Ignoring profiler overheadInaccurate measurementsDisable profiler for baseline measurements
Manual profile analysis onlyMissing patternsUse automated analysis tools (flame graphs)
No comparison baselineCan't measure improvementProfile before and after optimization
Hardcoded file pathsProfile overwritesUse timestamps in filenames
Profiling without loadDoesn't reflect real usageProfile under realistic load conditions

Principles

This skill embodies:

  • #2 First Principles - Understand what to measure before profiling (CPU, memory, I/O)
  • #3 Keep It Simple - Start with simple timing, escalate to full profiling only if needed
  • #4 Separation of Concerns - Separate profiling, analysis, and optimization phases
  • #5 Eliminate Ambiguity - Explicit metrics (ms, MB, queries/sec) not vague "slow"
  • #6 Clear, Understandable, Explainable - Flame graphs visualize complex call stacks
  • #8 No Assumptions - Measure actual performance, don't guess bottlenecks
  • #10 Research When in Doubt - Use proven profilers (py-spy, perf, Node.js Inspector)

Full Standard: CODITECT-STANDARD-AUTOMATION.md