Agent Skills Framework Extension (Optional)
Infrastructure as Code Skill
When to Use This Skill
Use this skill when implementing infrastructure as code patterns in your codebase.
How to Use This Skill
- Review the patterns and examples below
- Apply the relevant patterns to your implementation
- Follow the best practices outlined in this skill
Terraform provisioning, Ansible automation, and reproducible infrastructure management for production-grade cloud deployments.
Core Capabilities
- Terraform Modules - Reusable, composable infrastructure components
- State Management - Remote state, locking, workspace strategies
- Multi-Environment - DRY configuration across environments
- Ansible Automation - Configuration management, provisioning
- Drift Detection - Infrastructure compliance monitoring
Terraform Module Structure
infrastructure/
├── modules/
│ ├── networking/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ ├── outputs.tf
│ │ └── README.md
│ ├── compute/
│ ├── database/
│ └── security/
├── environments/
│ ├── dev/
│ ├── staging/
│ └── production/
└── shared/
├── versions.tf
└── providers.tf
Networking Module
# modules/networking/main.tf
terraform {
required_version = ">= 1.5.0"
required_providers {
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
}
}
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "Primary region"
type = string
default = "us-central1"
}
variable "environment" {
description = "Environment name"
type = string
validation {
condition = contains(["dev", "staging", "production"], var.environment)
error_message = "Environment must be dev, staging, or production."
}
}
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.0.0.0/16"
}
locals {
name_prefix = "${var.project_id}-${var.environment}"
subnets = {
public = {
cidr = cidrsubnet(var.vpc_cidr, 4, 0)
purpose = "REGIONAL_MANAGED_PROXY"
role = "ACTIVE"
}
private = {
cidr = cidrsubnet(var.vpc_cidr, 4, 1)
purpose = "PRIVATE"
role = null
}
gke = {
cidr = cidrsubnet(var.vpc_cidr, 4, 2)
purpose = "PRIVATE"
role = null
}
}
}
resource "google_compute_network" "vpc" {
name = "${local.name_prefix}-vpc"
project = var.project_id
auto_create_subnetworks = false
routing_mode = "REGIONAL"
}
resource "google_compute_subnetwork" "subnets" {
for_each = local.subnets
name = "${local.name_prefix}-${each.key}"
project = var.project_id
region = var.region
network = google_compute_network.vpc.id
ip_cidr_range = each.value.cidr
purpose = each.value.purpose
role = each.value.role
private_ip_google_access = true
log_config {
aggregation_interval = "INTERVAL_5_SEC"
flow_sampling = 0.5
metadata = "INCLUDE_ALL_METADATA"
}
}
resource "google_compute_router" "router" {
name = "${local.name_prefix}-router"
project = var.project_id
region = var.region
network = google_compute_network.vpc.id
}
resource "google_compute_router_nat" "nat" {
name = "${local.name_prefix}-nat"
project = var.project_id
router = google_compute_router.router.name
region = var.region
nat_ip_allocate_option = "AUTO_ONLY"
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
log_config {
enable = true
filter = "ERRORS_ONLY"
}
}
resource "google_compute_firewall" "allow_internal" {
name = "${local.name_prefix}-allow-internal"
project = var.project_id
network = google_compute_network.vpc.name
allow {
protocol = "tcp"
ports = ["0-65535"]
}
allow {
protocol = "udp"
ports = ["0-65535"]
}
allow {
protocol = "icmp"
}
source_ranges = [var.vpc_cidr]
}
output "vpc_id" {
description = "VPC ID"
value = google_compute_network.vpc.id
}
output "vpc_name" {
description = "VPC name"
value = google_compute_network.vpc.name
}
output "subnet_ids" {
description = "Subnet IDs by name"
value = { for k, v in google_compute_subnetwork.subnets : k => v.id }
}
output "subnet_cidrs" {
description = "Subnet CIDR ranges by name"
value = { for k, v in google_compute_subnetwork.subnets : k => v.ip_cidr_range }
}
Database Module
# modules/database/main.tf
variable "name" {
description = "Database instance name"
type = string
}
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "Database region"
type = string
}
variable "environment" {
description = "Environment"
type = string
}
variable "vpc_network" {
description = "VPC network self link"
type = string
}
variable "tier" {
description = "Cloud SQL tier"
type = string
default = "db-custom-2-4096"
}
variable "disk_size" {
description = "Disk size in GB"
type = number
default = 100
}
variable "high_availability" {
description = "Enable high availability"
type = bool
default = true
}
locals {
instance_name = "${var.name}-${var.environment}"
}
resource "google_sql_database_instance" "main" {
name = local.instance_name
project = var.project_id
region = var.region
database_version = "POSTGRES_15"
deletion_protection = var.environment == "production"
settings {
tier = var.tier
disk_size = var.disk_size
disk_type = "PD_SSD"
disk_autoresize = true
availability_type = var.high_availability ? "REGIONAL" : "ZONAL"
backup_configuration {
enabled = true
point_in_time_recovery_enabled = true
start_time = "03:00"
transaction_log_retention_days = 7
backup_retention_settings {
retained_backups = 30
retention_unit = "COUNT"
}
}
ip_configuration {
ipv4_enabled = false
private_network = var.vpc_network
require_ssl = true
}
database_flags {
name = "log_checkpoints"
value = "on"
}
database_flags {
name = "log_connections"
value = "on"
}
database_flags {
name = "log_disconnections"
value = "on"
}
maintenance_window {
day = 7 # Sunday
hour = 4
update_track = "stable"
}
insights_config {
query_insights_enabled = true
record_application_tags = true
record_client_address = true
}
}
}
resource "google_sql_database" "database" {
name = var.name
project = var.project_id
instance = google_sql_database_instance.main.name
}
resource "random_password" "db_password" {
length = 32
special = true
}
resource "google_sql_user" "user" {
name = "app"
project = var.project_id
instance = google_sql_database_instance.main.name
password = random_password.db_password.result
}
resource "google_secret_manager_secret" "db_password" {
project = var.project_id
secret_id = "${local.instance_name}-db-password"
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "db_password" {
secret = google_secret_manager_secret.db_password.id
secret_data = random_password.db_password.result
}
output "instance_name" {
description = "Database instance name"
value = google_sql_database_instance.main.name
}
output "connection_name" {
description = "Database connection name"
value = google_sql_database_instance.main.connection_name
}
output "private_ip" {
description = "Private IP address"
value = google_sql_database_instance.main.private_ip_address
}
output "database_name" {
description = "Database name"
value = google_sql_database.database.name
}
output "password_secret_id" {
description = "Secret Manager ID for database password"
value = google_secret_manager_secret.db_password.id
}
Environment Configuration
# environments/production/main.tf
terraform {
required_version = ">= 1.5.0"
backend "gcs" {
bucket = "company-terraform-state"
prefix = "production"
}
}
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "Primary region"
type = string
default = "us-central1"
}
module "networking" {
source = "../../modules/networking"
project_id = var.project_id
region = var.region
environment = "production"
vpc_cidr = "10.0.0.0/16"
}
module "database" {
source = "../../modules/database"
name = "api"
project_id = var.project_id
region = var.region
environment = "production"
vpc_network = module.networking.vpc_id
tier = "db-custom-4-8192"
disk_size = 500
high_availability = true
depends_on = [module.networking]
}
module "gke" {
source = "../../modules/gke"
name = "api-cluster"
project_id = var.project_id
region = var.region
environment = "production"
vpc_network = module.networking.vpc_name
subnet = module.networking.subnet_ids["gke"]
min_nodes = 3
max_nodes = 10
depends_on = [module.networking]
}
Terragrunt DRY Configuration
# terragrunt.hcl (root)
remote_state {
backend = "gcs"
generate = {
path = "backend.tf"
if_exists = "overwrite_terragrunt"
}
config = {
bucket = "company-terraform-state"
prefix = "${path_relative_to_include()}/terraform.tfstate"
project = "company-project"
location = "US"
}
}
generate "provider" {
path = "provider.tf"
if_exists = "overwrite_terragrunt"
contents = <<EOF
provider "google" {
project = var.project_id
region = var.region
}
EOF
}
inputs = {
project_id = "company-project"
region = "us-central1"
}
# environments/production/terragrunt.hcl
include "root" {
path = find_in_parent_folders()
}
terraform {
source = "../../modules//networking"
}
inputs = {
environment = "production"
vpc_cidr = "10.0.0.0/16"
}
Ansible Playbook
# playbooks/configure-servers.yml
---
- name: Configure application servers
hosts: app_servers
become: true
vars_files:
- "vars/{{ environment }}.yml"
- "vars/secrets.yml"
roles:
- role: common
- role: docker
- role: monitoring
- role: application
tasks:
- name: Ensure required packages
apt:
name:
- curl
- htop
- vim
- jq
state: present
update_cache: yes
- name: Configure system limits
pam_limits:
domain: '*'
limit_type: "{{ item.type }}"
limit_item: "{{ item.item }}"
value: "{{ item.value }}"
loop:
- { type: soft, item: nofile, value: 65536 }
- { type: hard, item: nofile, value: 65536 }
- name: Deploy application configuration
template:
src: templates/app-config.j2
dest: /etc/app/config.yaml
owner: app
group: app
mode: '0640'
notify: restart application
handlers:
- name: restart application
systemd:
name: app
state: restarted
daemon_reload: yes
Drift Detection
#!/bin/bash
# scripts/drift-detection.sh
set -euo pipefail
ENVIRONMENTS=("dev" "staging" "production")
for env in "${ENVIRONMENTS[@]}"; do
echo "Checking drift for $env..."
cd "environments/$env"
# Initialize and refresh
terraform init -backend=true
terraform refresh
# Plan to detect drift
if terraform plan -detailed-exitcode -out=drift.tfplan 2>&1; then
echo "✅ No drift detected in $env"
else
exit_code=$?
if [ $exit_code -eq 2 ]; then
echo "⚠️ Drift detected in $env"
terraform show drift.tfplan
# Send alert
curl -X POST "$SLACK_WEBHOOK" \
-H 'Content-Type: application/json' \
-d "{\"text\": \"Infrastructure drift detected in $env environment\"}"
else
echo "❌ Error checking drift in $env"
exit 1
fi
fi
rm -f drift.tfplan
cd ../..
done
Usage Examples
Create VPC Module
Apply infrastructure-as-code skill to create a Terraform VPC module with public/private subnets, NAT gateway, and firewall rules
Multi-Environment Setup
Apply infrastructure-as-code skill to configure Terragrunt for dev/staging/production with DRY configuration
Database Provisioning
Apply infrastructure-as-code skill to create a Cloud SQL PostgreSQL module with HA, backups, and secrets management
Success Output
When successful, this skill MUST output:
✅ SKILL COMPLETE: infrastructure-as-code
Completed:
- [x] Terraform modules created (networking, database, compute)
- [x] Module variables defined with validation
- [x] Environment configurations created (dev, staging, production)
- [x] Remote state configured (GCS/S3)
- [x] Terraform plan executed successfully
- [x] Resources provisioned without errors
- [x] Outputs documented and exported
- [x] Drift detection configured
Outputs:
- Terraform modules: modules/networking/, modules/database/, modules/compute/
- Environment configs: environments/dev/, environments/staging/, environments/production/
- State backend: gs://company-terraform-state or s3://company-terraform-state
- Resources provisioned: 15 resources created, 0 errors
- Drift detection: Scheduled daily via CI/CD
Completion Checklist
Before marking this skill as complete, verify:
- All required modules created with main.tf, variables.tf, outputs.tf
- Variables include validation rules and descriptions
- Remote state backend configured (GCS/S3)
- State locking enabled (Cloud Storage/DynamoDB)
- Environment-specific configurations created
- Terraform plan runs without errors
- Resources provisioned successfully (terraform apply)
- Outputs documented and exported
- Drift detection script configured
- Documentation includes usage examples
Failure Indicators
This skill has FAILED if:
- ❌ Terraform modules missing required files (main.tf, variables.tf, outputs.tf)
- ❌ Variables lack validation or descriptions
- ❌ Remote state not configured (local state only)
- ❌ Terraform plan produces errors
- ❌ Resources fail to provision (terraform apply errors)
- ❌ Outputs not defined or exported
- ❌ No drift detection configured
- ❌ Secrets hardcoded in Terraform files
When NOT to Use
Do NOT use this skill when:
- Quick prototype or dev environment (manual setup faster)
- Cloud provider has native templates (CloudFormation, ARM templates may be better)
- Using managed platforms (Heroku, Vercel, Netlify handle infrastructure)
- Existing Ansible/Chef setup (coordinate before introducing Terraform)
- Team lacks Terraform expertise (training required first)
- Stateless applications without infrastructure needs
- Legacy systems with manual configuration dependencies
Anti-Patterns (Avoid)
| Anti-Pattern | Problem | Solution |
|---|---|---|
| Hardcoding secrets | Security vulnerability | Use Secret Manager/Vault references |
| No remote state | Team conflicts | Always use GCS/S3 backend |
| Missing state locking | Concurrent apply errors | Enable locking mechanism |
| No variable validation | Invalid values accepted | Add validation rules |
| Monolithic modules | Hard to reuse | Create composable modules |
| No drift detection | Infrastructure diverges | Schedule regular drift checks |
| Skipping terraform plan | Unexpected changes | Always plan before apply |
Principles
This skill embodies:
- #1 Recycle → Extend → Re-Use - Reusable modules across environments
- #3 Separation of Concerns - Modules by logical infrastructure component
- #7 Automation First - Declarative infrastructure, no manual changes
- #8 No Assumptions - Variable validation ensures correctness
Full Standard: CODITECT-STANDARD-AUTOMATION.md
Integration Points
- container-orchestration - GKE cluster provisioning
- monitoring-observability - Monitoring infrastructure setup
- cicd-pipeline-design - Terraform in CI/CD pipelines
- compliance-frameworks - Infrastructure compliance