Sequence Diagram: Heartbeat Renewal Flow
Purpose: Periodic heartbeat mechanism to maintain active seat reservations and prevent zombie sessions.
Actors:
- CODITECT Client (background heartbeat thread)
- License API (Django on GKE)
- Redis (session TTL renewal)
- PostgreSQL (session activity logging)
Flow: Automatic heartbeat every 5 minutes with TTL renewal and failure handling
Mermaid Sequence Diagram
Step-by-Step Breakdown
1. Client Heartbeat Thread (Background)
Client-side: Heartbeat background thread:
# Client-side: Heartbeat manager
import threading
import time
import logging
from typing import Optional
logger = logging.getLogger(__name__)
class HeartbeatManager:
"""
Manages periodic heartbeat to maintain seat reservation.
Features:
- Background thread for non-blocking heartbeats
- Automatic retry on transient failures
- Graceful shutdown on permanent failures
"""
def __init__(
self,
jwt_token: str,
license_key: str,
session_id: str,
heartbeat_interval: int = 300 # 5 minutes
):
self.jwt_token = jwt_token
self.license_key = license_key
self.session_id = session_id
self.heartbeat_interval = heartbeat_interval
self.running = False
self.thread: Optional[threading.Thread] = None
self.failure_count = 0
self.max_failures = 3 # Stop after 3 consecutive failures
def start(self):
"""
Start heartbeat background thread.
"""
if self.running:
logger.warning("Heartbeat already running")
return
self.running = True
self.thread = threading.Thread(
target=self._heartbeat_loop,
daemon=True, # Exit when main thread exits
name="HeartbeatThread"
)
self.thread.start()
logger.info(f"Heartbeat started (interval: {self.heartbeat_interval}s)")
def stop(self):
"""
Stop heartbeat thread gracefully.
"""
if not self.running:
return
logger.info("Stopping heartbeat...")
self.running = False
if self.thread and self.thread.is_alive():
self.thread.join(timeout=5)
logger.info("Heartbeat stopped")
def _heartbeat_loop(self):
"""
Main heartbeat loop (runs in background thread).
"""
while self.running:
try:
# Send heartbeat
success = self._send_heartbeat()
if success:
self.failure_count = 0
logger.debug("Heartbeat successful")
else:
self.failure_count += 1
logger.warning(f"Heartbeat failed ({self.failure_count}/{self.max_failures})")
if self.failure_count >= self.max_failures:
logger.error("Max heartbeat failures reached - stopping CODITECT")
self._handle_heartbeat_failure()
break
# Sleep until next heartbeat
time.sleep(self.heartbeat_interval)
except Exception as e:
logger.exception(f"Heartbeat error: {e}")
self.failure_count += 1
if self.failure_count >= self.max_failures:
self._handle_heartbeat_failure()
break
time.sleep(self.heartbeat_interval)
def _send_heartbeat(self) -> bool:
"""
Send heartbeat to license API.
Returns:
True if successful, False otherwise
"""
import requests
try:
response = requests.post(
'https://api.coditect.ai/api/v1/license/seat/heartbeat',
headers={
'Authorization': f'Bearer {self.jwt_token}',
'Content-Type': 'application/json'
},
json={
'license_key': self.license_key,
'session_id': self.session_id
},
timeout=30 # 30 second timeout
)
if response.status_code == 200:
return True
elif response.status_code == 404:
# Session not found - lost seat
logger.error("Session not found - seat lost")
self._handle_session_lost()
return False
elif response.status_code == 403:
# License expired/invalid
logger.error("License expired or invalid")
self._handle_license_invalid()
return False
else:
# Unexpected error
logger.error(f"Heartbeat failed: {response.status_code} {response.text}")
return False
except requests.exceptions.Timeout:
logger.warning("Heartbeat timeout")
return False
except requests.exceptions.ConnectionError:
logger.warning("Heartbeat connection error (network issue)")
return False
except Exception as e:
logger.exception(f"Heartbeat exception: {e}")
return False
def _handle_heartbeat_failure(self):
"""
Handle max heartbeat failures.
Actions:
- Stop CODITECT
- Show error dialog
- Clean up resources
"""
logger.critical("Heartbeat failed - shutting down CODITECT")
# Stop background processes
self.running = False
# Show error dialog to user
from .ui import show_error_dialog
show_error_dialog(
title="Connection Lost",
message="Unable to maintain connection to license server.\n"
"CODITECT will now exit.\n\n"
"Please check your network connection and try again.",
exit_after=True
)
def _handle_session_lost(self):
"""
Handle session not found (seat lost).
Possible causes:
- Session TTL expired (heartbeat missed)
- Session released by admin
- Redis data loss (rare)
Actions:
- Attempt to re-acquire seat
- If fails, show error and exit
"""
logger.warning("Session lost - attempting to re-acquire seat")
try:
# Try to re-acquire seat
from .license_client import acquire_seat_with_retry
result = acquire_seat_with_retry(
jwt_token=self.jwt_token,
license_key=self.license_key,
user_email=self._get_user_email(),
max_retries=1 # Only try once
)
if result:
# Update session ID
self.session_id = result['session_id']
self.failure_count = 0
logger.info("Seat re-acquired successfully")
else:
# Failed to re-acquire
logger.error("Failed to re-acquire seat")
self._handle_heartbeat_failure()
except Exception as e:
logger.exception(f"Re-acquisition failed: {e}")
self._handle_heartbeat_failure()
def _handle_license_invalid(self):
"""
Handle license expired or invalid.
Actions:
- Stop CODITECT immediately
- Show expiry/invalid message
- Offer renewal link
"""
logger.error("License invalid - stopping CODITECT")
self.running = False
from .ui import show_error_dialog
show_error_dialog(
title="License Expired",
message="Your CODITECT license has expired.\n\n"
"Please renew your license at:\n"
"https://coditect.ai/renew",
exit_after=True,
show_renew_button=True
)
def _get_user_email(self) -> str:
"""Get user email from license cache."""
import json
import os
license_cache_path = os.path.expanduser('~/.coditect/license.json')
with open(license_cache_path, 'r') as f:
license_data = json.load(f)
return license_data.get('user_email', 'unknown@example.com')
2. Server-Side Heartbeat Endpoint
Django REST Framework heartbeat endpoint:
# Server-side: Heartbeat endpoint
from rest_framework import viewsets, status
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.permissions import IsAuthenticated
from rest_framework import serializers
import redis
from django.utils import timezone
# Request/Response Serializers
class HeartbeatRequestSerializer(serializers.Serializer):
license_key = serializers.CharField(max_length=255)
session_id = serializers.CharField(max_length=255)
class HeartbeatResponseSerializer(serializers.Serializer):
success = serializers.BooleanField()
active_seats = serializers.IntegerField()
next_heartbeat = serializers.IntegerField() # seconds
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework import status
from django.conf import settings
from django.utils import timezone
import logging
from apps.licenses.models import License, Session
logger = logging.getLogger(__name__)
@api_view(['POST'])
@permission_classes([IsAuthenticated])
def heartbeat(request):
"""
Renew session TTL via heartbeat.
Process:
1. Verify session exists in Redis
2. Renew session TTL (6 minutes)
3. Update last_heartbeat_at in PostgreSQL
4. Validate license still active
5. Return active seat count
Returns:
200 OK: Heartbeat successful
404 Not Found: Session not found (expired)
403 Forbidden: License expired/invalid
"""
# Validate request data
serializer = HeartbeatRequestSerializer(data=request.data)
if not serializer.is_valid():
return Response(
{"detail": "Invalid request data", "errors": serializer.errors},
status=status.HTTP_400_BAD_REQUEST
)
license_key = serializer.validated_data['license_key']
session_id = serializer.validated_data['session_id']
redis_client = redis.StrictRedis(
host=settings.REDIS_HOST,
port=6379,
db=0,
decode_responses=True
)
# Step 1: Verify session exists
session_key = f"session:{session_id}"
session_exists = redis_client.exists(session_key)
if not session_exists:
# Session expired or never existed
logger.warning(f"Session not found: {session_id}")
return Response(
{"detail": "Session not found (may have expired)"},
status=status.HTTP_404_NOT_FOUND
)
# Step 2: Renew session TTL (atomic operation)
lua_script = """
local session_key = KEYS[1]
local sessions_set_key = KEYS[2]
local session_id = ARGV[1]
local ttl = tonumber(ARGV[2])
-- Renew session TTL
redis.call('EXPIRE', session_key, ttl)
-- Renew sessions set TTL
redis.call('EXPIRE', sessions_set_key, ttl)
-- Verify session is still in set
local in_set = redis.call('SISMEMBER', sessions_set_key, session_id)
if in_set == 0 then
-- Session not in set - re-add it
redis.call('SADD', sessions_set_key, session_id)
end
return 1
"""
sessions_set_key = f"license:{license_key}:sessions"
redis_client.eval(
lua_script,
2, # Number of keys
session_key,
sessions_set_key,
session_id,
360 # 6 minute TTL
)
# Step 3: Update last_heartbeat_at in PostgreSQL
Session.objects.filter(
session_id=session_id
).update(
last_heartbeat_at=timezone.now()
)
# Step 4: Validate license still active
try:
license_obj = License.objects.get(license_key=license_key)
except License.DoesNotExist:
return Response(
{"detail": "License not found"},
status=status.HTTP_404_NOT_FOUND
)
if not license_obj.is_active or license_obj.is_expired:
# License became invalid - inform client to shutdown
logger.warning(f"License inactive during heartbeat: {license_key}")
return Response(
{"detail": "License expired or inactive"},
status=status.HTTP_403_FORBIDDEN
)
# Step 5: Get active seat count
active_count = redis_client.scard(sessions_set_key)
# Step 6: Update metrics
from prometheus_client import Counter, Gauge
heartbeat_success = Counter(
'heartbeat_success_total',
'Total successful heartbeats',
['license_key']
)
heartbeat_success.labels(license_key=license_key).inc()
active_sessions = Gauge(
'active_sessions',
'Active sessions',
['license_key']
)
active_sessions.labels(license_key=license_key).set(active_count)
# Step 7: Return success
return Response(
{
'success': True,
'active_seats': active_count,
'next_heartbeat': 300 # 5 minutes
},
status=status.HTTP_200_OK
)
Error Scenarios
Heartbeat Timeout (Network Issue)
Session Expired (Missed Heartbeats)
License Expired During Session
Performance Characteristics
Heartbeat Latency:
| Step | Operation | Latency | Notes |
|---|---|---|---|
| 1 | Network round-trip | ~100ms | Varies by location |
| 2 | Redis EXISTS | ~1ms | In-memory lookup |
| 3 | Redis Lua script (EXPIRE) | ~2ms | Atomic operation |
| 4 | PostgreSQL UPDATE | ~10ms | Indexed update |
| 5 | License validation | ~5ms | Cached or indexed |
| 6 | Redis SCARD | ~1ms | Set cardinality |
| 7 | Metrics logging | ~1ms | Prometheus counter |
Total: ~120ms (typical)
Resource Usage:
Heartbeat Overhead (per client):
- CPU: <0.1% (background thread)
- Memory: ~1 MB (thread stack)
- Network: ~500 bytes/5 min = 0.002 KB/s
- Redis: 2 keys per session (~200 bytes)
- PostgreSQL: 1 row per session (~500 bytes)
For 1000 active sessions:
- Network: 2 KB/s aggregate
- Redis: 200 KB memory
- PostgreSQL: 500 KB storage
Heartbeat Interval Tuning:
| Interval | TTL | Tolerance | Trade-offs |
|---|---|---|---|
| 5 min | 6 min | 1 min | ✅ Default (good balance) |
| 2 min | 3 min | 1 min | ⚠️ More network traffic |
| 10 min | 12 min | 2 min | ⚠️ Longer zombie cleanup |
| 1 min | 2 min | 1 min | ❌ Excessive traffic |
Recommendation: 5-minute heartbeat with 6-minute TTL (20% grace period)
Offline Mode Handling
What happens when network is unavailable?
# Client-side: Offline mode detection
class HeartbeatManager:
def _send_heartbeat(self) -> bool:
try:
response = requests.post(...)
return response.status_code == 200
except requests.exceptions.ConnectionError:
# Network unavailable - enter offline mode
logger.warning("Network unavailable - entering offline mode")
# Check if license token still valid
if self._is_license_token_valid():
logger.info("License token still valid - continuing offline")
# Reset failure count - don't count network issues as failures
self.failure_count = 0
return True # Treat as success (offline mode)
else:
logger.error("License token expired - cannot continue offline")
return False # Token expired - must exit
def _is_license_token_valid(self) -> bool:
"""
Check if cached license token is still valid.
Allows offline operation until token expires (24h default).
"""
import json
import os
from datetime import datetime
license_cache_path = os.path.expanduser('~/.coditect/license.json')
try:
with open(license_cache_path, 'r') as f:
license_data = json.load(f)
valid_until = datetime.fromisoformat(license_data['valid_until'])
return datetime.utcnow() < valid_until
except (FileNotFoundError, KeyError, ValueError):
return False
Offline Mode Flow:
Network Available:
├─ Heartbeat success → Continue normally
└─ Heartbeat fails (403/404) → Exit CODITECT
Network Unavailable:
├─ License token valid → Continue in offline mode
│ └─ Show "Offline mode" indicator in UI
└─ License token expired → Exit CODITECT
└─ Show "Network required" error
Related Documentation
- ADR-004: Session Management (TTL strategy)
- ADR-011: Zombie Session Cleanup
- 02-seat-acquisition-flow.md: Initial seat acquisition
- 04-seat-release-flow.md: Explicit seat release
- 05-zombie-session-cleanup-flow.md: Automatic cleanup
Last Updated: 2025-11-30 Diagram Type: Sequence (Mermaid) Scope: Core licensing flow - Heartbeat renewal