Skip to main content

Edge Deployment Guide

On-Premise, Air-Gapped, and Hybrid Cloud Patterns

Document ID: C8-EDGE-DEPLOYMENT
Version: 1.0
Category: Technical Deep Dive


Deployment Topology Options

┌─────────────────────────────────────────────────────────────┐
│ DEPLOYMENT SPECTRUM │
├─────────────────────────────────────────────────────────────┤
│ │
│ CLOUD-NATIVE HYBRID AIR-GAPPED │
│ ──────────── ────── ────────── │
│ • Full SaaS • Cloud LLM • Local LLM │
│ • Managed services • Local data • Local data │
│ • Internet required • VPN/Direct • No internet │
│ • Lowest ops burden • Moderate ops • Highest ops │
│ • Fast iteration • Balanced • Slow updates │
│ │
│ ◄─────────────────────────────────────────────────────────►│
│ Convenience Control/Security │
└─────────────────────────────────────────────────────────────┘

On-Premise Architecture

Basic On-Premise Stack

# docker-compose.yml for on-premise deployment
version: '3.8'

services:
# Local LLM Server
llm-server:
image: vllm/vllm-openai:latest
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
- MODEL_NAME=meta-llama/Llama-3.1-70B-Instruct
volumes:
- ./models:/models
ports:
- "8000:8000"

# Vector Database
vector-db:
image: qdrant/qdrant:latest
volumes:
- ./qdrant_data:/qdrant/storage
ports:
- "6333:6333"

# Agent Orchestrator
orchestrator:
build: ./orchestrator
environment:
- LLM_ENDPOINT=http://llm-server:8000/v1
- VECTOR_DB_URL=http://vector-db:6333
- DATABASE_URL=postgresql://postgres:password@postgres:5432/agents
depends_on:
- llm-server
- vector-db
- postgres

# PostgreSQL for state
postgres:
image: postgres:15
volumes:
- ./pg_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=password
- POSTGRES_DB=agents

# Redis for caching
redis:
image: redis:7-alpine
volumes:
- ./redis_data:/data

Hardware Requirements

ComponentMinimumRecommendedEnterprise
LLM Server (70B)2x A100 40GB4x A100 80GB8x H100
LLM Server (7B)1x RTX 40902x A100 40GB4x A100
Vector DB32GB RAM, 500GB SSD64GB RAM, 1TB NVMe128GB RAM, 2TB NVMe
Orchestrator8 cores, 32GB RAM16 cores, 64GB RAM32 cores, 128GB RAM
Network10 Gbps internal25 Gbps internal100 Gbps internal

Air-Gapped Deployment

Architecture

┌─────────────────────────────────────────────────────────────┐
│ AIR-GAPPED NETWORK │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ SECURE ENCLAVE │ │
│ │ ┌───────────┐ ┌───────────┐ ┌───────────┐ │ │
│ │ │ Agent │ │ LLM │ │ Vector │ │ │
│ │ │Orchestrat │ │ Server │ │ DB │ │ │
│ │ └─────┬─────┘ └─────┬─────┘ └─────┬─────┘ │ │
│ │ │ │ │ │ │
│ │ ┌─────▼──────────────▼──────────────▼─────┐ │ │
│ │ │ Internal Network │ │ │
│ │ └─────────────────┬───────────────────────┘ │ │
│ └────────────────────┼────────────────────────────────┘ │
│ │ │
│ ┌────────────────────▼────────────────────────────────┐ │
│ │ Enterprise Applications │ │
│ │ (ERP, CRM, Document Management, etc.) │ │
│ └─────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘

│ (Physical media only)

┌────────▼────────┐
│ DATA DIODE │ (One-way transfer for updates)
└────────┬────────┘

┌────────▼────────────────────────────────────────────────────┐
│ INTERNET CONNECTED │
│ (Update preparation zone) │
└─────────────────────────────────────────────────────────────┘

Update Process

# update_package_creator.py - Run in connected environment

import hashlib
import tarfile
from pathlib import Path

class AirGappedUpdatePackage:
def __init__(self, version: str):
self.version = version
self.package_dir = Path(f"update_package_{version}")
self.package_dir.mkdir(exist_ok=True)
self.manifest = {
'version': version,
'created': datetime.utcnow().isoformat(),
'components': []
}

def add_model_weights(self, model_path: str, model_name: str):
"""Add model weights to update package"""
dest = self.package_dir / 'models' / model_name
dest.parent.mkdir(parents=True, exist_ok=True)

# Copy and hash
shutil.copytree(model_path, dest)
checksum = self.compute_checksum(dest)

self.manifest['components'].append({
'type': 'model',
'name': model_name,
'path': f'models/{model_name}',
'checksum': checksum
})

def add_application_code(self, code_path: str):
"""Add application code to update package"""
dest = self.package_dir / 'application'
shutil.copytree(code_path, dest)

checksum = self.compute_checksum(dest)
self.manifest['components'].append({
'type': 'application',
'path': 'application',
'checksum': checksum
})

def add_knowledge_base(self, kb_path: str):
"""Add knowledge base export to update package"""
dest = self.package_dir / 'knowledge_base'
shutil.copytree(kb_path, dest)

self.manifest['components'].append({
'type': 'knowledge_base',
'path': 'knowledge_base',
'checksum': self.compute_checksum(dest)
})

def finalize(self) -> Path:
"""Create final signed package"""
# Write manifest
manifest_path = self.package_dir / 'manifest.json'
with open(manifest_path, 'w') as f:
json.dump(self.manifest, f, indent=2)

# Create tarball
archive_path = f"update_{self.version}.tar.gz"
with tarfile.open(archive_path, 'w:gz') as tar:
tar.add(self.package_dir, arcname=self.version)

# Sign package
signature = self.sign_package(archive_path)

return Path(archive_path), signature

def compute_checksum(self, path: Path) -> str:
"""Compute SHA-256 checksum of directory"""
sha256 = hashlib.sha256()
for file in sorted(path.rglob('*')):
if file.is_file():
sha256.update(file.read_bytes())
return sha256.hexdigest()

Air-Gap Deployment Script

# deploy_air_gapped.py - Run in air-gapped environment

class AirGappedDeployer:
def __init__(self, package_path: str, signature_path: str):
self.package_path = package_path
self.signature_path = signature_path

def deploy(self):
"""Deploy update package in air-gapped environment"""
# Step 1: Verify signature
if not self.verify_signature():
raise SecurityError("Package signature verification failed")

# Step 2: Extract package
extract_dir = self.extract_package()

# Step 3: Verify checksums
manifest = self.load_manifest(extract_dir)
for component in manifest['components']:
if not self.verify_checksum(extract_dir, component):
raise IntegrityError(f"Checksum mismatch: {component['path']}")

# Step 4: Stop services
self.stop_services()

# Step 5: Backup current state
self.create_backup()

# Step 6: Deploy components
for component in manifest['components']:
self.deploy_component(extract_dir, component)

# Step 7: Start services
self.start_services()

# Step 8: Health check
if not self.health_check():
self.rollback()
raise DeploymentError("Health check failed, rolled back")

print(f"Successfully deployed version {manifest['version']}")

Hybrid Cloud Architecture

Pattern: Cloud LLM + Local Data

┌─────────────────────────────────────────────────────────────┐
│ CLOUD │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ LLM API (Claude/GPT) │ │
│ └───────────────────────┬─────────────────────────────┘ │
└──────────────────────────┼──────────────────────────────────┘
│ HTTPS (prompts only,
│ no raw data)
┌──────────────────────────┼──────────────────────────────────┐
│ ON-PREMISE │
│ ┌───────────────────────▼─────────────────────────────┐ │
│ │ AGENT ORCHESTRATOR │ │
│ │ • Constructs prompts locally │ │
│ │ • Masks/anonymizes sensitive data │ │
│ │ • Routes to cloud LLM │ │
│ │ • Processes responses locally │ │
│ └───────────┬───────────────────────┬─────────────────┘ │
│ │ │ │
│ ┌───────────▼───────────┐ ┌────────▼────────────────┐ │
│ │ LOCAL DATA STORE │ │ LOCAL VECTOR DB │ │
│ │ (Sensitive data │ │ (Embeddings only, │ │
│ │ stays here) │ │ no raw text) │ │
│ └───────────────────────┘ └─────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘

Data Masking Implementation

import re
from typing import Dict, List, Tuple

class DataMasker:
def __init__(self):
self.patterns = {
'ssn': (r'\b\d{3}-\d{2}-\d{4}\b', 'XXX-XX-XXXX'),
'email': (r'\b[\w.-]+@[\w.-]+\.\w+\b', 'email@masked.com'),
'phone': (r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', 'XXX-XXX-XXXX'),
'credit_card': (r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b', 'XXXX-XXXX-XXXX-XXXX'),
'name': None # Handled by NER
}
self.masked_values = {}
self.reverse_map = {}

def mask(self, text: str) -> Tuple[str, Dict]:
"""Mask sensitive data before sending to cloud LLM"""
masked_text = text

for data_type, pattern_tuple in self.patterns.items():
if pattern_tuple:
pattern, replacement = pattern_tuple
matches = re.findall(pattern, text)
for match in matches:
token = f"[MASKED_{data_type.upper()}_{len(self.masked_values)}]"
self.masked_values[token] = match
self.reverse_map[match] = token
masked_text = masked_text.replace(match, token)

# Named entity masking using NER
masked_text = self.mask_named_entities(masked_text)

return masked_text, self.masked_values

def unmask(self, text: str) -> str:
"""Restore masked values in LLM response"""
unmasked_text = text
for token, original in self.masked_values.items():
unmasked_text = unmasked_text.replace(token, original)
return unmasked_text

class HybridCloudAgent:
def __init__(self, cloud_llm_client, local_data_store):
self.cloud_llm = cloud_llm_client
self.local_data = local_data_store
self.masker = DataMasker()

async def process(self, task: str, context_ids: List[str]):
"""Process task using hybrid cloud architecture"""

# Step 1: Retrieve context from local store
local_context = self.local_data.retrieve(context_ids)

# Step 2: Mask sensitive data
masked_context, mask_map = self.masker.mask(local_context)
masked_task, _ = self.masker.mask(task)

# Step 3: Send to cloud LLM (masked data only)
prompt = f"""
Context: {masked_context}

Task: {masked_task}
"""

masked_response = await self.cloud_llm.complete(prompt)

# Step 4: Unmask response
response = self.masker.unmask(masked_response)

# Step 5: Store results locally
self.local_data.store_result(task, response)

return response

Model Selection for Edge

Local Model Options

ModelSizeVRAM RequiredQualitySpeed
Llama 3.1 8B16GB16GBGoodFast
Llama 3.1 70B140GB80GB (quantized: 40GB)ExcellentMedium
Mistral 7B14GB14GBGoodFast
Mixtral 8x7B90GB48GB (quantized)Very GoodMedium
Phi-3 Medium14GB14GBGoodFast

Quantization for Edge

# quantize_model.py

from transformers import AutoModelForCausalLM, AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig

def quantize_for_edge(model_name: str, output_dir: str, bits: int = 4):
"""Quantize model for edge deployment"""

# Load model
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Quantization config
quantize_config = BaseQuantizeConfig(
bits=bits,
group_size=128,
desc_act=False
)

# Quantize
quantized_model = AutoGPTQForCausalLM.from_pretrained(
model_name,
quantize_config
)

# Save
quantized_model.save_quantized(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Quantized model saved to {output_dir}")
print(f"Size reduction: ~{100 - (bits/16)*100:.0f}%")

Security Considerations

Network Segmentation

┌─────────────────────────────────────────────────────────────┐
│ DMZ │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ API Gateway (only if hybrid) │ │
│ └───────────────────────────────────────────────────────┘ │
└─────────────────────────────┬───────────────────────────────┘
│ (Firewall)
┌─────────────────────────────▼───────────────────────────────┐
│ APPLICATION ZONE │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ Agent Orchestrator, Application Servers │ │
│ └───────────────────────────────────────────────────────┘ │
└─────────────────────────────┬───────────────────────────────┘
│ (Firewall)
┌─────────────────────────────▼───────────────────────────────┐
│ DATA ZONE │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ LLM Server, Vector DB, PostgreSQL │ │
│ └───────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘

Document maintained by CODITECT Infrastructure Team