Skip to main content

Sequence Diagram: Heartbeat Renewal Flow

Purpose: Periodic heartbeat mechanism to maintain active seat reservations and prevent zombie sessions.

Actors:

  • CODITECT Client (background heartbeat thread)
  • License API (Django on GKE)
  • Redis (session TTL renewal)
  • PostgreSQL (session activity logging)

Flow: Automatic heartbeat every 5 minutes with TTL renewal and failure handling


Mermaid Sequence Diagram


Step-by-Step Breakdown

1. Client Heartbeat Thread (Background)

Client-side: Heartbeat background thread:

# Client-side: Heartbeat manager
import threading
import time
import logging
from typing import Optional

logger = logging.getLogger(__name__)

class HeartbeatManager:
"""
Manages periodic heartbeat to maintain seat reservation.

Features:
- Background thread for non-blocking heartbeats
- Automatic retry on transient failures
- Graceful shutdown on permanent failures
"""

def __init__(
self,
jwt_token: str,
license_key: str,
session_id: str,
heartbeat_interval: int = 300 # 5 minutes
):
self.jwt_token = jwt_token
self.license_key = license_key
self.session_id = session_id
self.heartbeat_interval = heartbeat_interval

self.running = False
self.thread: Optional[threading.Thread] = None
self.failure_count = 0
self.max_failures = 3 # Stop after 3 consecutive failures

def start(self):
"""
Start heartbeat background thread.
"""
if self.running:
logger.warning("Heartbeat already running")
return

self.running = True
self.thread = threading.Thread(
target=self._heartbeat_loop,
daemon=True, # Exit when main thread exits
name="HeartbeatThread"
)
self.thread.start()

logger.info(f"Heartbeat started (interval: {self.heartbeat_interval}s)")

def stop(self):
"""
Stop heartbeat thread gracefully.
"""
if not self.running:
return

logger.info("Stopping heartbeat...")
self.running = False

if self.thread and self.thread.is_alive():
self.thread.join(timeout=5)

logger.info("Heartbeat stopped")

def _heartbeat_loop(self):
"""
Main heartbeat loop (runs in background thread).
"""
while self.running:
try:
# Send heartbeat
success = self._send_heartbeat()

if success:
self.failure_count = 0
logger.debug("Heartbeat successful")
else:
self.failure_count += 1
logger.warning(f"Heartbeat failed ({self.failure_count}/{self.max_failures})")

if self.failure_count >= self.max_failures:
logger.error("Max heartbeat failures reached - stopping CODITECT")
self._handle_heartbeat_failure()
break

# Sleep until next heartbeat
time.sleep(self.heartbeat_interval)

except Exception as e:
logger.exception(f"Heartbeat error: {e}")
self.failure_count += 1

if self.failure_count >= self.max_failures:
self._handle_heartbeat_failure()
break

time.sleep(self.heartbeat_interval)

def _send_heartbeat(self) -> bool:
"""
Send heartbeat to license API.

Returns:
True if successful, False otherwise
"""
import requests

try:
response = requests.post(
'https://api.coditect.ai/api/v1/license/seat/heartbeat',
headers={
'Authorization': f'Bearer {self.jwt_token}',
'Content-Type': 'application/json'
},
json={
'license_key': self.license_key,
'session_id': self.session_id
},
timeout=30 # 30 second timeout
)

if response.status_code == 200:
return True

elif response.status_code == 404:
# Session not found - lost seat
logger.error("Session not found - seat lost")
self._handle_session_lost()
return False

elif response.status_code == 403:
# License expired/invalid
logger.error("License expired or invalid")
self._handle_license_invalid()
return False

else:
# Unexpected error
logger.error(f"Heartbeat failed: {response.status_code} {response.text}")
return False

except requests.exceptions.Timeout:
logger.warning("Heartbeat timeout")
return False

except requests.exceptions.ConnectionError:
logger.warning("Heartbeat connection error (network issue)")
return False

except Exception as e:
logger.exception(f"Heartbeat exception: {e}")
return False

def _handle_heartbeat_failure(self):
"""
Handle max heartbeat failures.

Actions:
- Stop CODITECT
- Show error dialog
- Clean up resources
"""
logger.critical("Heartbeat failed - shutting down CODITECT")

# Stop background processes
self.running = False

# Show error dialog to user
from .ui import show_error_dialog

show_error_dialog(
title="Connection Lost",
message="Unable to maintain connection to license server.\n"
"CODITECT will now exit.\n\n"
"Please check your network connection and try again.",
exit_after=True
)

def _handle_session_lost(self):
"""
Handle session not found (seat lost).

Possible causes:
- Session TTL expired (heartbeat missed)
- Session released by admin
- Redis data loss (rare)

Actions:
- Attempt to re-acquire seat
- If fails, show error and exit
"""
logger.warning("Session lost - attempting to re-acquire seat")

try:
# Try to re-acquire seat
from .license_client import acquire_seat_with_retry

result = acquire_seat_with_retry(
jwt_token=self.jwt_token,
license_key=self.license_key,
user_email=self._get_user_email(),
max_retries=1 # Only try once
)

if result:
# Update session ID
self.session_id = result['session_id']
self.failure_count = 0
logger.info("Seat re-acquired successfully")
else:
# Failed to re-acquire
logger.error("Failed to re-acquire seat")
self._handle_heartbeat_failure()

except Exception as e:
logger.exception(f"Re-acquisition failed: {e}")
self._handle_heartbeat_failure()

def _handle_license_invalid(self):
"""
Handle license expired or invalid.

Actions:
- Stop CODITECT immediately
- Show expiry/invalid message
- Offer renewal link
"""
logger.error("License invalid - stopping CODITECT")

self.running = False

from .ui import show_error_dialog

show_error_dialog(
title="License Expired",
message="Your CODITECT license has expired.\n\n"
"Please renew your license at:\n"
"https://coditect.ai/renew",
exit_after=True,
show_renew_button=True
)

def _get_user_email(self) -> str:
"""Get user email from license cache."""
import json
import os

license_cache_path = os.path.expanduser('~/.coditect/license.json')

with open(license_cache_path, 'r') as f:
license_data = json.load(f)
return license_data.get('user_email', 'unknown@example.com')

2. Server-Side Heartbeat Endpoint

Django REST Framework heartbeat endpoint:

# Server-side: Heartbeat endpoint
from rest_framework import viewsets, status
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.permissions import IsAuthenticated
from rest_framework import serializers
import redis
from django.utils import timezone

# Request/Response Serializers
class HeartbeatRequestSerializer(serializers.Serializer):
license_key = serializers.CharField(max_length=255)
session_id = serializers.CharField(max_length=255)

class HeartbeatResponseSerializer(serializers.Serializer):
success = serializers.BooleanField()
active_seats = serializers.IntegerField()
next_heartbeat = serializers.IntegerField() # seconds

from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework import status
from django.conf import settings
from django.utils import timezone
import logging

from apps.licenses.models import License, Session

logger = logging.getLogger(__name__)

@api_view(['POST'])
@permission_classes([IsAuthenticated])
def heartbeat(request):
"""
Renew session TTL via heartbeat.

Process:
1. Verify session exists in Redis
2. Renew session TTL (6 minutes)
3. Update last_heartbeat_at in PostgreSQL
4. Validate license still active
5. Return active seat count

Returns:
200 OK: Heartbeat successful
404 Not Found: Session not found (expired)
403 Forbidden: License expired/invalid
"""
# Validate request data
serializer = HeartbeatRequestSerializer(data=request.data)
if not serializer.is_valid():
return Response(
{"detail": "Invalid request data", "errors": serializer.errors},
status=status.HTTP_400_BAD_REQUEST
)

license_key = serializer.validated_data['license_key']
session_id = serializer.validated_data['session_id']

redis_client = redis.StrictRedis(
host=settings.REDIS_HOST,
port=6379,
db=0,
decode_responses=True
)

# Step 1: Verify session exists
session_key = f"session:{session_id}"
session_exists = redis_client.exists(session_key)

if not session_exists:
# Session expired or never existed
logger.warning(f"Session not found: {session_id}")

return Response(
{"detail": "Session not found (may have expired)"},
status=status.HTTP_404_NOT_FOUND
)

# Step 2: Renew session TTL (atomic operation)
lua_script = """
local session_key = KEYS[1]
local sessions_set_key = KEYS[2]
local session_id = ARGV[1]
local ttl = tonumber(ARGV[2])

-- Renew session TTL
redis.call('EXPIRE', session_key, ttl)

-- Renew sessions set TTL
redis.call('EXPIRE', sessions_set_key, ttl)

-- Verify session is still in set
local in_set = redis.call('SISMEMBER', sessions_set_key, session_id)

if in_set == 0 then
-- Session not in set - re-add it
redis.call('SADD', sessions_set_key, session_id)
end

return 1
"""

sessions_set_key = f"license:{license_key}:sessions"

redis_client.eval(
lua_script,
2, # Number of keys
session_key,
sessions_set_key,
session_id,
360 # 6 minute TTL
)

# Step 3: Update last_heartbeat_at in PostgreSQL
Session.objects.filter(
session_id=session_id
).update(
last_heartbeat_at=timezone.now()
)

# Step 4: Validate license still active
try:
license_obj = License.objects.get(license_key=license_key)
except License.DoesNotExist:
return Response(
{"detail": "License not found"},
status=status.HTTP_404_NOT_FOUND
)

if not license_obj.is_active or license_obj.is_expired:
# License became invalid - inform client to shutdown
logger.warning(f"License inactive during heartbeat: {license_key}")

return Response(
{"detail": "License expired or inactive"},
status=status.HTTP_403_FORBIDDEN
)

# Step 5: Get active seat count
active_count = redis_client.scard(sessions_set_key)

# Step 6: Update metrics
from prometheus_client import Counter, Gauge

heartbeat_success = Counter(
'heartbeat_success_total',
'Total successful heartbeats',
['license_key']
)
heartbeat_success.labels(license_key=license_key).inc()

active_sessions = Gauge(
'active_sessions',
'Active sessions',
['license_key']
)
active_sessions.labels(license_key=license_key).set(active_count)

# Step 7: Return success
return Response(
{
'success': True,
'active_seats': active_count,
'next_heartbeat': 300 # 5 minutes
},
status=status.HTTP_200_OK
)

Error Scenarios

Heartbeat Timeout (Network Issue)

Session Expired (Missed Heartbeats)

License Expired During Session


Performance Characteristics

Heartbeat Latency:

StepOperationLatencyNotes
1Network round-trip~100msVaries by location
2Redis EXISTS~1msIn-memory lookup
3Redis Lua script (EXPIRE)~2msAtomic operation
4PostgreSQL UPDATE~10msIndexed update
5License validation~5msCached or indexed
6Redis SCARD~1msSet cardinality
7Metrics logging~1msPrometheus counter

Total: ~120ms (typical)

Resource Usage:

Heartbeat Overhead (per client):
- CPU: <0.1% (background thread)
- Memory: ~1 MB (thread stack)
- Network: ~500 bytes/5 min = 0.002 KB/s
- Redis: 2 keys per session (~200 bytes)
- PostgreSQL: 1 row per session (~500 bytes)

For 1000 active sessions:
- Network: 2 KB/s aggregate
- Redis: 200 KB memory
- PostgreSQL: 500 KB storage

Heartbeat Interval Tuning:

IntervalTTLToleranceTrade-offs
5 min6 min1 min✅ Default (good balance)
2 min3 min1 min⚠️ More network traffic
10 min12 min2 min⚠️ Longer zombie cleanup
1 min2 min1 min❌ Excessive traffic

Recommendation: 5-minute heartbeat with 6-minute TTL (20% grace period)


Offline Mode Handling

What happens when network is unavailable?

# Client-side: Offline mode detection
class HeartbeatManager:
def _send_heartbeat(self) -> bool:
try:
response = requests.post(...)
return response.status_code == 200

except requests.exceptions.ConnectionError:
# Network unavailable - enter offline mode
logger.warning("Network unavailable - entering offline mode")

# Check if license token still valid
if self._is_license_token_valid():
logger.info("License token still valid - continuing offline")
# Reset failure count - don't count network issues as failures
self.failure_count = 0
return True # Treat as success (offline mode)
else:
logger.error("License token expired - cannot continue offline")
return False # Token expired - must exit

def _is_license_token_valid(self) -> bool:
"""
Check if cached license token is still valid.

Allows offline operation until token expires (24h default).
"""
import json
import os
from datetime import datetime

license_cache_path = os.path.expanduser('~/.coditect/license.json')

try:
with open(license_cache_path, 'r') as f:
license_data = json.load(f)

valid_until = datetime.fromisoformat(license_data['valid_until'])

return datetime.utcnow() < valid_until

except (FileNotFoundError, KeyError, ValueError):
return False

Offline Mode Flow:

Network Available:
├─ Heartbeat success → Continue normally
└─ Heartbeat fails (403/404) → Exit CODITECT

Network Unavailable:
├─ License token valid → Continue in offline mode
│ └─ Show "Offline mode" indicator in UI
└─ License token expired → Exit CODITECT
└─ Show "Network required" error

  • ADR-004: Session Management (TTL strategy)
  • ADR-011: Zombie Session Cleanup
  • 02-seat-acquisition-flow.md: Initial seat acquisition
  • 04-seat-release-flow.md: Explicit seat release
  • 05-zombie-session-cleanup-flow.md: Automatic cleanup

Last Updated: 2025-11-30 Diagram Type: Sequence (Mermaid) Scope: Core licensing flow - Heartbeat renewal