017 Metrics Schema
Status
Context
The current situation requires a decision because:
- Requirement 1
- Constraint 2
- Need 3
Accepted | YYYY-MM-DD
-- Initialize extensions CREATE EXTENSION IF NOT EXISTS timescaledb; CREATE EXTENSION IF NOT EXISTS btree_gin;
-- Metrics Schema Definition CREATE SCHEMA metrics;
-- Raw Metrics Table CREATE TABLE metrics.raw_metrics ( id BIGSERIAL PRIMARY KEY, metric_name TEXT NOT NULL, value DOUBLE PRECISION NOT NULL, timestamp TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, tags JSONB NOT NULL DEFAULT '{}',
-- Constraints
CONSTRAINT valid_metric_name CHECK (metric_name ~ '^[a-zA-Z0-9_\.]+$'),
CONSTRAINT valid_value CHECK (NOT isnan(value) AND NOT isinf(value))
);
-- Create hypertable for time-series optimization SELECT create_hypertable('metrics.raw_metrics', 'timestamp');
-- Aggregated Metrics Table CREATE TABLE metrics.aggregated_metrics ( metric_name TEXT NOT NULL, window_size TEXT NOT NULL, timestamp TIMESTAMPTZ NOT NULL, tags JSONB NOT NULL DEFAULT '{}', min_value DOUBLE PRECISION NOT NULL, max_value DOUBLE PRECISION NOT NULL, avg_value DOUBLE PRECISION NOT NULL, sum_value DOUBLE PRECISION NOT NULL, count INTEGER NOT NULL, p50_value DOUBLE PRECISION, p90_value DOUBLE PRECISION, p95_value DOUBLE PRECISION, p99_value DOUBLE PRECISION,
-- Constraints
CONSTRAINT valid_window_size CHECK (window_size IN ('5m', '1h', '24h')),
CONSTRAINT valid_aggregate_values CHECK (
min_value <= max_value AND
avg_value BETWEEN min_value AND max_value AND
count > 0
),
PRIMARY KEY (metric_name, window_size, timestamp, tags)
);
-- Create hypertable for aggregated metrics SELECT create_hypertable('metrics.aggregated_metrics', 'timestamp');
-- System Metrics Table CREATE TABLE metrics.system_metrics ( timestamp TIMESTAMPTZ NOT NULL, metric_group TEXT NOT NULL, metrics JSONB NOT NULL,
PRIMARY KEY (timestamp, metric_group)
);
-- Create hypertable for system metrics SELECT create_hypertable('metrics.system_metrics', 'timestamp');
-- Pipeline Metrics Table CREATE TABLE metrics.pipeline_metrics ( timestamp TIMESTAMPTZ NOT NULL, pipeline_id UUID NOT NULL, stage TEXT NOT NULL, duration_ms INTEGER NOT NULL, input_size INTEGER NOT NULL, output_size INTEGER NOT NULL, error_count INTEGER NOT NULL DEFAULT 0, metadata JSONB NOT NULL DEFAULT '{}',
PRIMARY KEY (timestamp, pipeline_id, stage)
);
-- Create hypertable for pipeline metrics SELECT create_hypertable('metrics.pipeline_metrics', 'timestamp');
-- Vector Store Metrics Table CREATE TABLE metrics.vector_metrics ( timestamp TIMESTAMPTZ NOT NULL, operation TEXT NOT NULL, vector_count INTEGER NOT NULL, duration_ms INTEGER NOT NULL, index_size BIGINT NOT NULL, metadata JSONB NOT NULL DEFAULT '{}',
PRIMARY KEY (timestamp, operation)
);
-- Create hypertable for vector metrics SELECT create_hypertable('metrics.vector_metrics', 'timestamp');
-- Continuous Aggregates for Different Windows
-- 5-minute aggregates CREATE MATERIALIZED VIEW metrics.aggregate_5m WITH (timescaledb.continuous) AS SELECT time_bucket('5 minutes', timestamp) AS bucket, metric_name, tags, MIN(value) as min_value, MAX(value) as max_value, AVG(value) as avg_value, SUM(value) as sum_value, COUNT(*) as count, percentile_cont(0.50) WITHIN GROUP (ORDER BY value) as p50_value, percentile_cont(0.90) WITHIN GROUP (ORDER BY value) as p90_value, percentile_cont(0.95) WITHIN GROUP (ORDER BY value) as p95_value, percentile_cont(0.99) WITHIN GROUP (ORDER BY value) as p99_value FROM metrics.raw_metrics GROUP BY bucket, metric_name, tags WITH NO DATA;
-- 1-hour aggregates CREATE MATERIALIZED VIEW metrics.aggregate_1h WITH (timescaledb.continuous) AS SELECT time_bucket('1 hour', timestamp) AS bucket, metric_name, tags, MIN(value) as min_value, MAX(value) as max_value, AVG(value) as avg_value, SUM(value) as sum_value, COUNT(*) as count, percentile_cont(0.50) WITHIN GROUP (ORDER BY value) as p50_value, percentile_cont(0.90) WITHIN GROUP (ORDER BY value) as p90_value, percentile_cont(0.95) WITHIN GROUP (ORDER BY value) as p95_value, percentile_cont(0.99) WITHIN GROUP (ORDER BY value) as p99_value FROM metrics.raw_metrics GROUP BY bucket, metric_name, tags WITH NO DATA;
-- 24-hour aggregates CREATE MATERIALIZED VIEW metrics.aggregate_24h WITH (timescaledb.continuous) AS SELECT time_bucket('1 day', timestamp) AS bucket, metric_name, tags, MIN(value) as min_value, MAX(value) as max_value, AVG(value) as avg_value, SUM(value) as sum_value, COUNT(*) as count, percentile_cont(0.50) WITHIN GROUP (ORDER BY value) as p50_value, percentile_cont(0.90) WITHIN GROUP (ORDER BY value) as p90_value, percentile_cont(0.95) WITHIN GROUP (ORDER BY value) as p95_value, percentile_cont(0.99) WITHIN GROUP (ORDER BY value) as p99_value FROM metrics.raw_metrics GROUP BY bucket, metric_name, tags WITH NO DATA;
-- Add retention policies SELECT add_retention_policy('metrics.raw_metrics', INTERVAL '7 days'); SELECT add_retention_policy('metrics.aggregated_metrics', INTERVAL '90 days'); SELECT add_retention_policy('metrics.system_metrics', INTERVAL '30 days'); SELECT add_retention_policy('metrics.pipeline_metrics', INTERVAL '30 days'); SELECT add_retention_policy('metrics.vector_metrics', INTERVAL '30 days');
-- Create refresh policies for continuous aggregates SELECT add_continuous_aggregate_policy('metrics.aggregate_5m', start_offset => INTERVAL '1 hour', end_offset => INTERVAL '1 minute', schedule_interval => INTERVAL '5 minutes');
SELECT add_continuous_aggregate_policy('metrics.aggregate_1h', start_offset => INTERVAL '1 day', end_offset => INTERVAL '1 hour', schedule_interval => INTERVAL '1 hour');
SELECT add_continuous_aggregate_policy('metrics.aggregate_24h', start_offset => INTERVAL '7 days', end_offset => INTERVAL '1 day', schedule_interval => INTERVAL '1 day');
-- Create indices for efficient querying CREATE INDEX idx_raw_metrics_metric_name ON metrics.raw_metrics (metric_name, timestamp DESC); CREATE INDEX idx_raw_metrics_tags ON metrics.raw_metrics USING GIN (tags);
CREATE INDEX idx_aggregated_metrics_lookup ON metrics.aggregated_metrics ( metric_name, window_size, timestamp DESC ); CREATE INDEX idx_aggregated_metrics_tags ON metrics.aggregated_metrics USING GIN (tags);
CREATE INDEX idx_system_metrics_group ON metrics.system_metrics (metric_group, timestamp DESC); CREATE INDEX idx_system_metrics_metrics ON metrics.system_metrics USING GIN (metrics);
CREATE INDEX idx_pipeline_metrics_pipeline ON metrics.pipeline_metrics (pipeline_id, timestamp DESC); CREATE INDEX idx_pipeline_metrics_stage ON metrics.pipeline_metrics (stage, timestamp DESC); CREATE INDEX idx_pipeline_metrics_metadata ON metrics.pipeline_metrics USING GIN (metadata);
CREATE INDEX idx_vector_metrics_operation ON metrics.vector_metrics (operation, timestamp DESC); CREATE INDEX idx_vector_metrics_metadata ON metrics.vector_metrics USING GIN (metadata);
-- Create views for common queries CREATE VIEW metrics.recent_errors AS SELECT * FROM metrics.pipeline_metrics WHERE error_count > 0 AND timestamp >= NOW() - INTERVAL '24 hours' ORDER BY timestamp DESC;
CREATE VIEW metrics.system_health AS SELECT m.timestamp, m.metric_group, m.metrics->>'cpu_usage' as cpu_usage, m.metrics->>'memory_usage' as memory_usage, m.metrics->>'disk_usage' as disk_usage, m.metrics->>'network_in' as network_in, m.metrics->>'network_out' as network_out FROM metrics.system_metrics m WHERE timestamp >= NOW() - INTERVAL '1 hour' ORDER BY timestamp DESC;
CREATE VIEW metrics.vector_performance AS SELECT date_trunc('minute', timestamp) as minute, operation, AVG(duration_ms) as avg_duration, SUM(vector_count) as total_vectors, MAX(index_size) as index_size FROM metrics.vector_metrics WHERE timestamp >= NOW() - INTERVAL '24 hours' GROUP BY minute, operation ORDER BY minute DESC;
-- Create functions for common metrics queries CREATE OR REPLACE FUNCTION metrics.get_metric_statistics( p_metric_name TEXT, p_window_size TEXT, p_start_time TIMESTAMPTZ, p_end_time TIMESTAMPTZ, p_tags JSONB DEFAULT NULL ) RETURNS TABLE ( timestamp TIMESTAMPTZ, min_value DOUBLE PRECISION, max_value DOUBLE PRECISION, avg_value DOUBLE PRECISION, p95_value DOUBLE PRECISION, count INTEGER ) AS $$ BEGIN RETURN QUERY SELECT a.timestamp, a.min_value, a.max_value, a.avg_value, a.p95_value, a.count FROM metrics.aggregated_metrics a WHERE a.metric_name = p_metric_name AND a.window_size = p_window_size AND a.timestamp BETWEEN p_start_time AND p_end_time AND (p_tags IS NULL OR a.tags @> p_tags) ORDER BY a.timestamp DESC; END; $$ LANGUAGE plpgsql;
-- Create metrics summary function CREATE OR REPLACE FUNCTION metrics.get_system_summary( p_interval INTERVAL DEFAULT INTERVAL '1 hour' ) RETURNS TABLE ( metric_name TEXT, current_value DOUBLE PRECISION, min_value DOUBLE PRECISION, max_value DOUBLE PRECISION, avg_value DOUBLE PRECISION ) AS $$ BEGIN RETURN QUERY WITH recent_metrics AS ( SELECT metric_name, value, ROW_NUMBER() OVER (PARTITION BY metric_name ORDER BY timestamp DESC) as rn FROM metrics.raw_metrics WHERE timestamp >= NOW() - p_interval ) SELECT r.metric_name, MAX(CASE WHEN rn = 1 THEN value END) as current_value, MIN(value) as min_value, MAX(value) as max_value, AVG(value) as avg_value FROM recent_metrics r GROUP BY r.metric_name; END; $$ LANGUAGE plpgsql;