Agent Skills Framework Extension

Performance Profiling Skill

When to Use This Skill

Use this skill when implementing performance profiling patterns in your codebase.

How to Use This Skill

Review the patterns and examples below
Apply the relevant patterns to your implementation
Follow the best practices outlined in this skill

CPU/memory profiling, flame graphs, bottleneck analysis, and optimization strategies.

Core Capabilities

CPU Profiling - py-spy, perf, Node.js profiler
Memory Analysis - Heap snapshots, leak detection
Flame Graphs - Visual performance analysis
Database Profiling - Slow query analysis
Continuous Profiling - Production monitoring

Node.js Profiling

// src/profiling/profiler.ts
import v8 from 'v8';
import { writeFileSync } from 'fs';
import { Session } from 'inspector';

export class NodeProfiler {
  private session: Session;

  constructor() {
    this.session = new Session();
    this.session.connect();
  }

  // CPU Profiling
  async cpuProfile<T>(fn: () => Promise<T>, filename: string): Promise<T> {
    this.session.post('Profiler.enable');
    this.session.post('Profiler.start');

    const result = await fn();

    return new Promise((resolve) => {
      this.session.post('Profiler.stop', (err, { profile }) => {
        if (!err && profile) {
          writeFileSync(filename, JSON.stringify(profile));
          console.log(`CPU profile saved to ${filename}`);
        }
        this.session.post('Profiler.disable');
        resolve(result);
      });
    });
  }

  // Heap Snapshot
  async heapSnapshot(filename: string): Promise<void> {
    const chunks: Buffer[] = [];

    return new Promise((resolve) => {
      this.session.on('HeapProfiler.addHeapSnapshotChunk', (m) => {
        chunks.push(Buffer.from(m.params.chunk));
      });

      this.session.post('HeapProfiler.takeHeapSnapshot', null, () => {
        writeFileSync(filename, Buffer.concat(chunks));
        console.log(`Heap snapshot saved to ${filename}`);
        resolve();
      });
    });
  }

  // Memory Usage
  getMemoryUsage(): {
    heapUsed: number;
    heapTotal: number;
    external: number;
    rss: number;
  } {
    const usage = process.memoryUsage();
    return {
      heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
      heapTotal: Math.round(usage.heapTotal / 1024 / 1024),
      external: Math.round(usage.external / 1024 / 1024),
      rss: Math.round(usage.rss / 1024 / 1024),
    };
  }

  // Heap Statistics
  getHeapStatistics(): v8.HeapStatistics {
    return v8.getHeapStatistics();
  }
}

// Express middleware for profiling endpoints
import { Router, Request, Response } from 'express';

const profiler = new NodeProfiler();

export const profilingRouter = Router();

profilingRouter.get('/profile/memory', (req: Request, res: Response) => {
  res.json(profiler.getMemoryUsage());
});

profilingRouter.get('/profile/heap', (req: Request, res: Response) => {
  res.json(profiler.getHeapStatistics());
});

profilingRouter.post('/profile/cpu/start', async (req: Request, res: Response) => {
  const filename = `profiles/cpu-${Date.now()}.cpuprofile`;
  // Start profiling (would need async control)
  res.json({ message: 'Profiling started', filename });
});

Python Profiling

# src/profiling/python_profiler.py
import cProfile
import pstats
import io
import tracemalloc
import linecache
from functools import wraps
from typing import Callable, TypeVar, Any
import time

T = TypeVar('T')

class Profiler:
    """Performance profiling utilities for Python applications."""

    @staticmethod
    def cpu_profile(func: Callable[..., T]) -> Callable[..., T]:
        """Decorator for CPU profiling."""
        @wraps(func)
        def wrapper(*args: Any, **kwargs: Any) -> T:
            profiler = cProfile.Profile()
            profiler.enable()

            result = func(*args, **kwargs)

            profiler.disable()

            # Print stats
            stream = io.StringIO()
            stats = pstats.Stats(profiler, stream=stream)
            stats.sort_stats('cumulative')
            stats.print_stats(20)
            print(stream.getvalue())

            return result
        return wrapper

    @staticmethod
    def memory_profile(func: Callable[..., T]) -> Callable[..., T]:
        """Decorator for memory profiling."""
        @wraps(func)
        def wrapper(*args: Any, **kwargs: Any) -> T:
            tracemalloc.start()

            result = func(*args, **kwargs)

            snapshot = tracemalloc.take_snapshot()
            top_stats = snapshot.statistics('lineno')

            print("\n[ Memory Usage - Top 10 ]")
            for stat in top_stats[:10]:
                print(stat)

            tracemalloc.stop()
            return result
        return wrapper

    @staticmethod
    def timing(func: Callable[..., T]) -> Callable[..., T]:
        """Decorator for timing function execution."""
        @wraps(func)
        def wrapper(*args: Any, **kwargs: Any) -> T:
            start = time.perf_counter()
            result = func(*args, **kwargs)
            end = time.perf_counter()
            print(f"{func.__name__} took {end - start:.4f}s")
            return result
        return wrapper


class MemoryTracker:
    """Track memory allocations in a context."""

    def __init__(self, top_n: int = 10):
        self.top_n = top_n
        self._snapshot_start = None

    def __enter__(self):
        tracemalloc.start()
        self._snapshot_start = tracemalloc.take_snapshot()
        return self

    def __exit__(self, *args):
        snapshot_end = tracemalloc.take_snapshot()

        top_stats = snapshot_end.compare_to(self._snapshot_start, 'lineno')

        print(f"\n[ Memory Diff - Top {self.top_n} ]")
        for stat in top_stats[:self.top_n]:
            print(stat)

        tracemalloc.stop()

    def display_top(self, snapshot, key_type='lineno', limit=10):
        """Display top memory consumers."""
        snapshot = snapshot.filter_traces((
            tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
            tracemalloc.Filter(False, "<unknown>"),
        ))
        top_stats = snapshot.statistics(key_type)

        print(f"\nTop {limit} lines:")
        for index, stat in enumerate(top_stats[:limit], 1):
            frame = stat.traceback[0]
            print(f"#{index}: {frame.filename}:{frame.lineno}: "
                  f"{stat.size / 1024:.1f} KiB")
            line = linecache.getline(frame.filename, frame.lineno).strip()
            if line:
                print(f"    {line}")


# Usage
@Profiler.timing
@Profiler.cpu_profile
def expensive_operation():
    # Some CPU-intensive work
    result = sum(i ** 2 for i in range(1000000))
    return result

with MemoryTracker(top_n=5):
    # Memory-intensive operations
    data = [i for i in range(1000000)]

Database Query Profiling

// src/profiling/query-profiler.ts
interface QueryProfile {
  query: string;
  params: unknown[];
  duration: number;
  rows: number;
  timestamp: Date;
  explain?: unknown;
}

export class QueryProfiler {
  private profiles: QueryProfile[] = [];
  private slowQueryThreshold = 100; // ms

  async profileQuery<T>(
    pool: Pool,
    query: string,
    params: unknown[] = []
  ): Promise<{ rows: T[]; profile: QueryProfile }> {
    const start = performance.now();

    const result = await pool.query(query, params);

    const duration = performance.now() - start;

    const profile: QueryProfile = {
      query,
      params,
      duration,
      rows: result.rowCount ?? 0,
      timestamp: new Date(),
    };

    // Get EXPLAIN for slow queries
    if (duration > this.slowQueryThreshold) {
      try {
        const explain = await pool.query(`EXPLAIN ANALYZE ${query}`, params);
        profile.explain = explain.rows;
        console.warn(`Slow query (${duration.toFixed(2)}ms):`, query);
      } catch {
        // EXPLAIN failed (e.g., for INSERT)
      }
    }

    this.profiles.push(profile);
    return { rows: result.rows, profile };
  }

  getSlowQueries(threshold = 100): QueryProfile[] {
    return this.profiles.filter(p => p.duration > threshold);
  }

  getStatistics(): {
    total: number;
    avgDuration: number;
    slowQueries: number;
    topQueries: Array<{ query: string; count: number; avgDuration: number }>;
  } {
    const total = this.profiles.length;
    const avgDuration = this.profiles.reduce((sum, p) => sum + p.duration, 0) / total;
    const slowQueries = this.profiles.filter(p => p.duration > this.slowQueryThreshold).length;

    // Group by query pattern
    const queryGroups = new Map<string, { count: number; totalDuration: number }>();
    for (const profile of this.profiles) {
      const pattern = this.normalizeQuery(profile.query);
      const group = queryGroups.get(pattern) ?? { count: 0, totalDuration: 0 };
      group.count++;
      group.totalDuration += profile.duration;
      queryGroups.set(pattern, group);
    }

    const topQueries = Array.from(queryGroups.entries())
      .map(([query, stats]) => ({
        query,
        count: stats.count,
        avgDuration: stats.totalDuration / stats.count,
      }))
      .sort((a, b) => b.avgDuration - a.avgDuration)
      .slice(0, 10);

    return { total, avgDuration, slowQueries, topQueries };
  }

  private normalizeQuery(query: string): string {
    // Replace parameter values with placeholders
    return query.replace(/\$\d+/g, '?').replace(/\s+/g, ' ').trim();
  }
}

Continuous Profiling (Production)

// src/profiling/continuous-profiler.ts
import pprof from '@datadog/pprof';
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';

interface ProfileConfig {
  serviceName: string;
  environment: string;
  sampleRate: number; // Hz
  duration: number; // seconds
  uploadInterval: number; // seconds
}

export class ContinuousProfiler {
  private isRunning = false;
  private s3Client: S3Client;

  constructor(
    private readonly config: ProfileConfig,
    private readonly bucket: string
  ) {
    this.s3Client = new S3Client({});
  }

  async start(): Promise<void> {
    if (this.isRunning) return;
    this.isRunning = true;

    console.log('Starting continuous profiler');

    // CPU profiling loop
    this.profileLoop('cpu', async () => {
      const profile = await pprof.time.profile({
        durationMillis: this.config.duration * 1000,
        sourceMapper: undefined,
      });
      return pprof.encode(profile);
    });

    // Heap profiling loop
    this.profileLoop('heap', async () => {
      const profile = await pprof.heap.profile(undefined, undefined);
      return pprof.encode(profile);
    });
  }

  private async profileLoop(
    type: 'cpu' | 'heap',
    collectFn: () => Promise<Buffer>
  ): Promise<void> {
    while (this.isRunning) {
      try {
        const profile = await collectFn();
        await this.uploadProfile(type, profile);
      } catch (error) {
        console.error(`${type} profiling error:`, error);
      }

      await this.sleep(this.config.uploadInterval * 1000);
    }
  }

  private async uploadProfile(type: string, data: Buffer): Promise<void> {
    const timestamp = new Date().toISOString();
    const key = `profiles/${this.config.serviceName}/${this.config.environment}/${type}/${timestamp}.pb.gz`;

    await this.s3Client.send(new PutObjectCommand({
      Bucket: this.bucket,
      Key: key,
      Body: data,
      ContentType: 'application/octet-stream',
      Metadata: {
        service: this.config.serviceName,
        environment: this.config.environment,
        type: type,
      },
    }));
  }

  stop(): void {
    this.isRunning = false;
    console.log('Stopping continuous profiler');
  }

  private sleep(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }
}

Usage Examples

Profile Node.js Application

Apply performance-profiling skill to add CPU and memory profiling endpoints to Express API

Analyze Slow Queries

Apply performance-profiling skill to implement database query profiling with EXPLAIN analysis

Setup Continuous Profiling

Apply performance-profiling skill to configure production continuous profiling with S3 storage

Integration Points

monitoring-observability - Profile metrics and dashboards
database-schema-optimization - Query optimization
caching-strategies - Cache hit/miss analysis

Success Output

When successful, this skill MUST output:

✅ SKILL COMPLETE: performance-profiling

Completed:
- [x] CPU profiling completed ({duration}s, {samples} samples)
- [x] Memory analysis done (heap: {heap_mb}MB, top {top_n} allocations identified)
- [x] Slow queries detected ({slow_count} queries > {threshold}ms)
- [x] Flame graph generated and saved to {flame_graph_path}
- [x] Performance bottlenecks identified: {bottleneck_list}

Outputs:
- CPU profile: {cpu_profile_path} ({profile_size}KB)
- Heap snapshot: {heap_snapshot_path} ({snapshot_size}MB)
- Query analysis: {slow_query_count} slow queries documented
- Optimization recommendations: {recommendation_count} actionable items

Completion Checklist

Before marking this skill as complete, verify:

Profiler successfully started and stopped without errors
Profile data saved to correct file path with valid format
Memory usage captured before and after operation
Top memory consumers identified (at least top 10)
Slow query threshold configured appropriately for workload
EXPLAIN ANALYZE captured for queries exceeding threshold
Statistics calculated (total queries, avg duration, percentiles)
Continuous profiling (if enabled) successfully uploading to storage
Profile output is readable and contains actionable data

Failure Indicators

This skill has FAILED if:

❌ Profiler crashes or hangs during execution
❌ Profile file empty or corrupted (0 bytes or invalid JSON)
❌ Memory tracking not started (tracemalloc.start() failed)
❌ Heap snapshot exceeds available disk space
❌ Query profiling returns empty result set
❌ EXPLAIN ANALYZE fails for all slow queries
❌ Performance overhead > 20% (profiling slows system excessively)
❌ Continuous profiler unable to upload profiles (S3 errors)
❌ Profile data shows no variation (all functions 0ms - invalid)

When NOT to Use

Do NOT use this skill when:

Development environment only (no performance issues observed)
Single function optimization needed (use simple timing decorator)
Real-time production monitoring required (use monitoring-observability-patterns)
Application already instrumented with APM (Datadog, New Relic)
Profiling overhead unacceptable (latency-sensitive APIs)
No performance bottleneck suspected (premature optimization)
Load testing needed (use load-testing-patterns instead)

Use alternative skills:

For production monitoring → monitoring-observability-patterns
For load testing → load-testing-patterns
For database optimization → database-schema-optimization
For simple benchmarking → Built-in time module or @timing decorator

Anti-Patterns (Avoid)

Anti-Pattern	Problem	Solution
Profiling in production without limit	Performance degradation	Use sampling (1-10% of requests)
Not cleaning up profiles	Disk space exhaustion	Set retention policy, delete old profiles
Profiling entire application	Noise overwhelms signal	Profile specific code paths/endpoints
Ignoring profiler overhead	Inaccurate measurements	Disable profiler for baseline measurements
Manual profile analysis only	Missing patterns	Use automated analysis tools (flame graphs)
No comparison baseline	Can't measure improvement	Profile before and after optimization
Hardcoded file paths	Profile overwrites	Use timestamps in filenames
Profiling without load	Doesn't reflect real usage	Profile under realistic load conditions

Principles

This skill embodies:

#2 First Principles - Understand what to measure before profiling (CPU, memory, I/O)
#3 Keep It Simple - Start with simple timing, escalate to full profiling only if needed
#4 Separation of Concerns - Separate profiling, analysis, and optimization phases
#5 Eliminate Ambiguity - Explicit metrics (ms, MB, queries/sec) not vague "slow"
#6 Clear, Understandable, Explainable - Flame graphs visualize complex call stacks
#8 No Assumptions - Measure actual performance, don't guess bottlenecks
#10 Research When in Doubt - Use proven profilers (py-spy, perf, Node.js Inspector)

Full Standard: CODITECT-STANDARD-AUTOMATION.md

When to Use This Skill​

How to Use This Skill​

Core Capabilities​

Node.js Profiling​

Python Profiling​

Database Query Profiling​

Continuous Profiling (Production)​

Usage Examples​

Profile Node.js Application​

Analyze Slow Queries​

Setup Continuous Profiling​

Integration Points​

Success Output​

Completion Checklist​

Failure Indicators​

When NOT to Use​

Anti-Patterns (Avoid)​

Principles​