scripts-dashboard-healthcheck
#!/usr/bin/env python3 """ā
title: "Add core to path for unified logger" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Dashboard Health Check and Server Manager" keywords: ['dashboard', 'healthcheck'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "dashboard-healthcheck.py" language: python executable: true usage: "python3 scripts/dashboard-healthcheck.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: falseā
Dashboard Health Check and Server Manager
Checks if the dashboard HTTP server is running and starts it if needed. Prevents duplicate processes and ensures dashboard availability.
Features:
- Port availability check
- Process detection and cleanup
- Automatic server start
- Graceful shutdown of duplicates
- Comprehensive logging per ADR-0001
Author: AZ1.AI INC (Hal Casteel) License: MIT Framework: CODITECT """
import sys import subprocess import socket import logging import signal import time from pathlib import Path from typing import Optional, List, Tuple
Add core to path for unified logger
SCRIPT_DIR = Path(file).parent sys.path.insert(0, str(SCRIPT_DIR / "core"))
from unified_logger import setup_unified_logger
============================================================================
CONFIGURATION
============================================================================
DEFAULT_PORT = 8080 DASHBOARD_PATH = Path(file).parent.parent.parent.parent / "MEMORY-CONTEXT" / "dashboard"
============================================================================
LOGGING SETUP
============================================================================
def setup_logging() -> logging.Logger: """Configure unified logging per ADR-0001 standards""" log_dir = SCRIPT_DIR.parent / "logs" log_dir.mkdir(parents=True, exist_ok=True) log_file = log_dir / "dashboard-healthcheck.log"
logger = setup_unified_logger(
component="dashboard-healthcheck",
log_file=log_file,
max_lines=5000,
console_level=logging.INFO,
file_level=logging.DEBUG
)
return logger
============================================================================
PORT AND PROCESS MANAGEMENT
============================================================================
def is_port_in_use(port: int) -> bool: """Check if a port is already in use""" with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: try: s.bind(('localhost', port)) return False except OSError: return True
def get_process_on_port(port: int) -> Optional[Tuple[int, str]]: """ Find process ID and command using the specified port.
Returns:
Tuple of (pid, command) or None if no process found
"""
try:
# Use lsof to find process on port
result = subprocess.run(
['lsof', '-ti', f':{port}'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0 and result.stdout.strip():
pid = int(result.stdout.strip().split()[0])
# Get command for this PID
ps_result = subprocess.run(
['ps', '-p', str(pid), '-o', 'command='],
capture_output=True,
text=True,
timeout=5
)
command = ps_result.stdout.strip() if ps_result.returncode == 0 else "unknown"
return (pid, command)
return None
except Exception:
return None
def kill_process(pid: int, logger: logging.Logger) -> bool: """ Gracefully kill a process by PID.
Tries SIGTERM first, then SIGKILL if needed.
"""
try:
# Try graceful shutdown first
logger.info(f"Sending SIGTERM to process {pid}")
subprocess.run(['kill', '-TERM', str(pid)], check=False, timeout=5)
# Wait briefly for graceful shutdown
time.sleep(1)
# Check if still running
check = subprocess.run(
['ps', '-p', str(pid)],
capture_output=True,
timeout=5
)
if check.returncode != 0:
# Process terminated
logger.info(f"ā Process {pid} terminated gracefully")
return True
# Still running, force kill
logger.warning(f"Process {pid} didn't terminate, sending SIGKILL")
subprocess.run(['kill', '-9', str(pid)], check=False, timeout=5)
time.sleep(0.5)
return True
except Exception as e:
logger.error(f"Failed to kill process {pid}: {e}")
return False
def find_all_dashboard_servers() -> List[int]: """Find all running Python HTTP servers (dashboard instances)""" try: result = subprocess.run( ['pgrep', '-f', 'python.*http.server'], capture_output=True, text=True, timeout=5 )
if result.returncode == 0 and result.stdout.strip():
return [int(pid) for pid in result.stdout.strip().split('\n')]
return []
except Exception:
return []
============================================================================
SERVER MANAGEMENT
============================================================================
def start_dashboard_server(port: int, logger: logging.Logger) -> bool: """Start the dashboard HTTP server in background""" try: if not DASHBOARD_PATH.exists(): logger.error(f"Dashboard directory not found: {DASHBOARD_PATH}") return False
logger.info(f"Starting dashboard server on port {port}")
logger.info(f"Directory: {DASHBOARD_PATH}")
# Start server in background
process = subprocess.Popen(
[sys.executable, '-m', 'http.server', str(port)],
cwd=DASHBOARD_PATH,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True # Detach from parent
)
# Give it a moment to start
time.sleep(1)
# Verify it started
if is_port_in_use(port):
logger.info(f"ā Dashboard server started successfully (PID: {process.pid})")
logger.info(f"š Dashboard available at: http://localhost:{port}")
return True
else:
logger.error("Server failed to start (port not in use)")
return False
except Exception as e:
logger.error(f"Failed to start dashboard server: {e}", exc_info=True)
return False
============================================================================
HEALTH CHECK
============================================================================
def perform_healthcheck(port: int = DEFAULT_PORT, auto_start: bool = True) -> bool: """ Perform complete dashboard health check.
Args:
port: Port to check (default 8080)
auto_start: Start server if not running (default True)
Returns:
True if dashboard is available, False otherwise
"""
logger = setup_logging()
logger.info("="*60)
logger.info("Dashboard Health Check")
logger.info("="*60)
logger.info(f"Checking port: {port}")
logger.info(f"Auto-start: {auto_start}")
logger.info("")
# Check if port is in use
if is_port_in_use(port):
logger.info(f"ā Port {port} is in use")
# Get process info
process_info = get_process_on_port(port)
if process_info:
pid, command = process_info
logger.info(f" Process: {pid}")
logger.info(f" Command: {command}")
# Verify it's a dashboard server
if 'http.server' in command or 'SimpleHTTPServer' in command:
logger.info("ā Dashboard server is running")
logger.info(f"š Dashboard available at: http://localhost:{port}")
return True
else:
logger.warning(f"ā ļø Port {port} occupied by non-dashboard process")
logger.warning(f" Command: {command}")
if auto_start:
logger.info("Attempting to reclaim port...")
if kill_process(pid, logger):
logger.info("ā Port reclaimed")
# Continue to start server
else:
logger.error("ā Failed to reclaim port")
return False
else:
return False
else:
logger.warning("ā ļø Port in use but process not identifiable")
return False
else:
logger.info(f"Port {port} is available")
# Port is available, start server if requested
if auto_start:
logger.info("")
logger.info("Starting dashboard server...")
return start_dashboard_server(port, logger)
else:
logger.info("ā Dashboard server is not running (auto-start disabled)")
return False
def cleanup_duplicate_servers(port: int, logger: logging.Logger): """Kill any duplicate dashboard servers except the one on specified port""" all_servers = find_all_dashboard_servers()
if not all_servers:
return
# Get the process that should be running on our port
target_process = get_process_on_port(port)
target_pid = target_process[0] if target_process else None
duplicates = [pid for pid in all_servers if pid != target_pid]
if duplicates:
logger.info(f"Found {len(duplicates)} duplicate server(s)")
for pid in duplicates:
logger.info(f"Stopping duplicate server: {pid}")
kill_process(pid, logger)
============================================================================
MAIN EXECUTION
============================================================================
def main(): """Main entry point""" import argparse
parser = argparse.ArgumentParser(description='Dashboard health check and server manager')
parser.add_argument('--port', type=int, default=DEFAULT_PORT, help='Port to check/use (default: 8080)')
parser.add_argument('--no-start', action='store_true', help='Do not start server if not running')
parser.add_argument('--cleanup', action='store_true', help='Clean up duplicate servers')
args = parser.parse_args()
logger = setup_logging()
try:
# Clean up duplicates if requested
if args.cleanup:
cleanup_duplicate_servers(args.port, logger)
# Perform health check
healthy = perform_healthcheck(args.port, auto_start=not args.no_start)
logger.info("")
logger.info("="*60)
if healthy:
logger.info("ā
Dashboard is healthy and accessible")
return 0
else:
logger.error("ā Dashboard is not accessible")
return 1
except KeyboardInterrupt:
logger.warning("\n\nā ļø Health check interrupted")
return 130
except Exception as e:
logger.error(f"Fatal error: {e}", exc_info=True)
return 1
if name == "main": sys.exit(main())