Skip to main content

Agent Skills Framework Extension

Cloud Architecture Review Skill

When to Use This Skill

Use this skill when implementing cloud architecture review patterns in your codebase.

How to Use This Skill

  1. Review the patterns and examples below
  2. Apply the relevant patterns to your implementation
  3. Follow the best practices outlined in this skill

Cloud-native code review, IaC validation, cost optimization, and architecture best practices for AWS, GCP, and Azure.

Core Capabilities

  1. IaC Review - Terraform, CloudFormation, Pulumi validation
  2. Cost Optimization - Resource sizing and cost analysis
  3. Security Validation - Cloud security best practices
  4. Architecture Patterns - Multi-cloud design patterns
  5. Compliance Checks - Regulatory and policy validation

Terraform Review Automation

#!/usr/bin/env python3
"""
Automated Terraform code review with best practices validation.
Checks security, cost optimization, and architectural patterns.
"""

import re
import json
from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass
from enum import Enum

class Severity(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INFO = "info"

@dataclass
class ReviewFinding:
severity: Severity
category: str
message: str
file_path: str
line_number: Optional[int]
suggestion: str
cost_impact: Optional[str] = None

class TerraformReviewer:
def __init__(self, root_path: Path):
self.root_path = root_path
self.findings: List[ReviewFinding] = []

def review_all(self) -> List[ReviewFinding]:
"""Execute complete Terraform review."""
tf_files = list(self.root_path.rglob("*.tf"))

for tf_file in tf_files:
content = tf_file.read_text()
self._review_security(tf_file, content)
self._review_cost_optimization(tf_file, content)
self._review_best_practices(tf_file, content)
self._review_resource_naming(tf_file, content)

return sorted(self.findings, key=lambda f: f.severity.value)

def _review_security(self, file_path: Path, content: str):
"""Check for security anti-patterns."""
rules = [
{
'pattern': r'ingress\s*{\s*from_port\s*=\s*0\s*to_port\s*=\s*0',
'severity': Severity.CRITICAL,
'message': 'Security group allows all traffic (0.0.0.0/0)',
'suggestion': 'Restrict ingress to specific ports and CIDR blocks'
},
{
'pattern': r'publicly_accessible\s*=\s*true',
'severity': Severity.HIGH,
'message': 'Database configured as publicly accessible',
'suggestion': 'Set publicly_accessible = false and use VPC peering or VPN'
},
{
'pattern': r'enabled\s*=\s*false.*encryption',
'severity': Severity.HIGH,
'message': 'Encryption disabled',
'suggestion': 'Enable encryption at rest and in transit'
},
{
'pattern': r'versioning\s*{\s*enabled\s*=\s*false',
'severity': Severity.MEDIUM,
'message': 'S3 versioning disabled',
'suggestion': 'Enable versioning for disaster recovery'
},
{
'pattern': r'backup_retention_period\s*=\s*0',
'severity': Severity.HIGH,
'message': 'Backup retention disabled',
'suggestion': 'Set backup_retention_period >= 7 days'
}
]

for rule in rules:
for match in re.finditer(rule['pattern'], content, re.IGNORECASE):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=rule['severity'],
category='security',
message=rule['message'],
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion=rule['suggestion']
))

def _review_cost_optimization(self, file_path: Path, content: str):
"""Check for cost optimization opportunities."""
rules = [
{
'pattern': r'instance_type\s*=\s*"[a-z0-9]+\.(?:16|24|32)xlarge"',
'severity': Severity.MEDIUM,
'message': 'Very large instance size detected',
'suggestion': 'Consider right-sizing or using auto-scaling',
'cost_impact': 'High - oversized instances can cost $5K+/month'
},
{
'pattern': r'storage_type\s*=\s*"io1"',
'severity': Severity.LOW,
'message': 'Provisioned IOPS storage (io1) in use',
'suggestion': 'Evaluate if gp3 would meet requirements at lower cost',
'cost_impact': 'Medium - io1 costs 3-4x more than gp3'
},
{
'pattern': r'create_before_destroy\s*=\s*false',
'severity': Severity.LOW,
'message': 'create_before_destroy disabled',
'suggestion': 'Enable to reduce downtime during updates',
'cost_impact': None
},
{
'pattern': r'desired_capacity\s*=\s*\d{2,}',
'severity': Severity.MEDIUM,
'message': 'High desired capacity in auto-scaling group',
'suggestion': 'Review if this many instances are required',
'cost_impact': 'High - each additional instance adds cost'
}
]

for rule in rules:
for match in re.finditer(rule['pattern'], content):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=rule['severity'],
category='cost-optimization',
message=rule['message'],
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion=rule['suggestion'],
cost_impact=rule.get('cost_impact')
))

def _review_best_practices(self, file_path: Path, content: str):
"""Check for Terraform best practices."""
# Check for missing required providers
if 'terraform' in content and 'required_providers' not in content:
self.findings.append(ReviewFinding(
severity=Severity.MEDIUM,
category='best-practices',
message='Missing required_providers block',
file_path=str(file_path.relative_to(self.root_path)),
line_number=None,
suggestion='Add required_providers block with version constraints'
))

# Check for hard-coded values
ip_pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
for match in re.finditer(ip_pattern, content):
# Skip localhost and common defaults
ip = match.group()
if ip not in ['0.0.0.0', '127.0.0.1']:
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=Severity.LOW,
category='best-practices',
message=f'Hard-coded IP address: {ip}',
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion='Move IP addresses to variables or data sources'
))

# Check for missing tags
resource_pattern = r'resource\s+"[^"]+"\s+"[^"]+"\s*{'
for match in re.finditer(resource_pattern, content):
block_start = match.end()
block_content = self._extract_block(content[block_start:])

if 'tags' not in block_content:
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=Severity.LOW,
category='best-practices',
message='Resource missing tags',
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion='Add tags for cost tracking, environment, owner, etc.'
))

def _review_resource_naming(self, file_path: Path, content: str):
"""Validate resource naming conventions."""
# Extract resource names
pattern = r'resource\s+"([^"]+)"\s+"([^"]+)"'

for match in re.finditer(pattern, content):
resource_type = match.group(1)
resource_name = match.group(2)

# Check naming conventions
if not re.match(r'^[a-z][a-z0-9_]*$', resource_name):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=Severity.LOW,
category='naming',
message=f'Resource name "{resource_name}" violates naming convention',
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion='Use lowercase letters, numbers, and underscores only'
))

def _extract_block(self, content: str) -> str:
"""Extract HCL block content."""
depth = 1
i = 0
while i < len(content) and depth > 0:
if content[i] == '{':
depth += 1
elif content[i] == '}':
depth -= 1
i += 1
return content[:i]

def generate_report(self, output_format: str = 'json') -> str:
"""Generate review report in specified format."""
if output_format == 'json':
return json.dumps([
{
'severity': f.severity.value,
'category': f.category,
'message': f.message,
'file': f.file_path,
'line': f.line_number,
'suggestion': f.suggestion,
'cost_impact': f.cost_impact
}
for f in self.findings
], indent=2)

elif output_format == 'markdown':
report = "# Terraform Review Report\n\n"

# Group by severity
by_severity = {}
for finding in self.findings:
sev = finding.severity.value
if sev not in by_severity:
by_severity[sev] = []
by_severity[sev].append(finding)

# Order: critical, high, medium, low, info
for severity in ['critical', 'high', 'medium', 'low', 'info']:
if severity in by_severity:
report += f"## {severity.upper()} ({len(by_severity[severity])})\n\n"

for finding in by_severity[severity]:
report += f"### {finding.message}\n\n"
report += f"- **File:** `{finding.file_path}`"
if finding.line_number:
report += f" (Line {finding.line_number})"
report += "\n"
report += f"- **Category:** {finding.category}\n"
report += f"- **Suggestion:** {finding.suggestion}\n"
if finding.cost_impact:
report += f"- **Cost Impact:** {finding.cost_impact}\n"
report += "\n"

return report

else:
raise ValueError(f"Unsupported format: {output_format}")

# CLI usage
if __name__ == '__main__':
import sys

path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path.cwd()
output_format = sys.argv[2] if len(sys.argv) > 2 else 'markdown'

reviewer = TerraformReviewer(path)
reviewer.review_all()

print(reviewer.generate_report(output_format))

# Exit with error if critical/high findings
critical_count = sum(1 for f in reviewer.findings if f.severity == Severity.CRITICAL)
high_count = sum(1 for f in reviewer.findings if f.severity == Severity.HIGH)

if critical_count > 0 or high_count > 0:
print(f"\n❌ Found {critical_count} critical and {high_count} high severity issues", file=sys.stderr)
sys.exit(1)
else:
print(f"\n✅ No critical or high severity issues found")

GCP Cost Analysis Script

#!/usr/bin/env python3
"""
GCP cost analysis and optimization recommendations.
Requires: google-cloud-billing, google-cloud-recommender
"""

from google.cloud import billing_v1, recommender_v1
from datetime import datetime, timedelta
import json
from typing import List, Dict

class GCPCostAnalyzer:
def __init__(self, project_id: str, billing_account_id: str):
self.project_id = project_id
self.billing_account_id = billing_account_id
self.billing_client = billing_v1.CloudBillingClient()
self.recommender_client = recommender_v1.RecommenderClient()

def analyze_costs(self, days: int = 30) -> Dict:
"""Analyze costs for the past N days."""
end_date = datetime.now()
start_date = end_date - timedelta(days=days)

# Get billing data (simplified - actual implementation needs BigQuery)
costs_by_service = self._get_costs_by_service(start_date, end_date)
costs_by_region = self._get_costs_by_region(start_date, end_date)

# Get recommendations
recommendations = self._get_cost_recommendations()

# Calculate metrics
total_cost = sum(costs_by_service.values())
projected_monthly = (total_cost / days) * 30

return {
'period': {
'start': start_date.isoformat(),
'end': end_date.isoformat(),
'days': days
},
'totals': {
'actual': round(total_cost, 2),
'projected_monthly': round(projected_monthly, 2)
},
'by_service': costs_by_service,
'by_region': costs_by_region,
'recommendations': recommendations,
'potential_savings': sum(r['savings'] for r in recommendations)
}

def _get_cost_recommendations(self) -> List[Dict]:
"""Get cost optimization recommendations from GCP Recommender."""
recommendations = []

# Recommender types to check
recommender_types = [
'google.compute.commitment.UsageCommitmentRecommender',
'google.compute.instance.MachineTypeRecommender',
'google.compute.disk.IdleResourceRecommender',
'google.iam.policy.Recommender'
]

for recommender_type in recommender_types:
parent = f"projects/{self.project_id}/locations/global/recommenders/{recommender_type}"

try:
for recommendation in self.recommender_client.list_recommendations(parent=parent):
recommendations.append({
'type': recommender_type.split('.')[-1],
'description': recommendation.description,
'priority': recommendation.priority,
'savings': self._extract_savings(recommendation),
'resource': recommendation.name
})
except Exception as e:
print(f"Warning: Could not fetch {recommender_type}: {e}")

return recommendations

def _extract_savings(self, recommendation) -> float:
"""Extract estimated savings from recommendation."""
# Parse recommendation impact for cost savings
# This is simplified - actual implementation depends on recommendation type
return 0.0

def _get_costs_by_service(self, start_date, end_date) -> Dict[str, float]:
"""Get costs grouped by GCP service."""
# In production, query BigQuery billing export table
# Simplified example:
return {
'Compute Engine': 1234.56,
'Cloud Storage': 234.12,
'Cloud SQL': 567.89,
'Cloud Functions': 45.67,
'BigQuery': 123.45
}

def _get_costs_by_region(self, start_date, end_date) -> Dict[str, float]:
"""Get costs grouped by region."""
# In production, query BigQuery billing export table
return {
'us-central1': 1456.78,
'us-east1': 234.56,
'europe-west1': 123.45
}

def generate_report(self) -> str:
"""Generate cost analysis report in Markdown."""
analysis = self.analyze_costs()

report = f"""# GCP Cost Analysis Report

## Summary

- **Period:** {analysis['period']['days']} days ({analysis['period']['start'][:10]} to {analysis['period']['end'][:10]})
- **Total Cost:** ${analysis['totals']['actual']:,.2f}
- **Projected Monthly:** ${analysis['totals']['projected_monthly']:,.2f}
- **Potential Savings:** ${analysis['potential_savings']:,.2f}/month

## Costs by Service

"""
for service, cost in sorted(analysis['by_service'].items(), key=lambda x: x[1], reverse=True):
pct = (cost / analysis['totals']['actual']) * 100
report += f"- **{service}:** ${cost:,.2f} ({pct:.1f}%)\n"

report += "\n## Costs by Region\n\n"
for region, cost in sorted(analysis['by_region'].items(), key=lambda x: x[1], reverse=True):
report += f"- **{region}:** ${cost:,.2f}\n"

report += "\n## Optimization Recommendations\n\n"
for i, rec in enumerate(analysis['recommendations'], 1):
report += f"{i}. **{rec['type']}** (Priority: {rec['priority']})\n"
report += f" - {rec['description']}\n"
report += f" - Estimated savings: ${rec['savings']:,.2f}/month\n\n"

return report

# Usage
if __name__ == '__main__':
import sys

project_id = sys.argv[1] if len(sys.argv) > 1 else 'my-project'
billing_account = sys.argv[2] if len(sys.argv) > 2 else 'my-billing-account'

analyzer = GCPCostAnalyzer(project_id, billing_account)
print(analyzer.generate_report())

Multi-Cloud Architecture Pattern

# multi-cloud-architecture-checklist.yaml
# Architecture review checklist for multi-cloud deployments

infrastructure:
networking:
- vpc_design: "Isolated VPCs per environment with peering"
- subnet_strategy: "Public/private subnet separation"
- cross_cloud_connectivity: "VPN or dedicated interconnect"
- dns_strategy: "Cloud DNS with failover"

compute:
- instance_sizing: "Right-sized with auto-scaling"
- availability: "Multi-AZ deployment"
- disaster_recovery: "Cross-region replication"
- container_orchestration: "Kubernetes (GKE/EKS/AKS)"

storage:
- data_classification: "Hot/warm/cold tiers"
- backup_strategy: "Automated with retention policy"
- encryption: "At rest and in transit"
- replication: "Geo-redundant for critical data"

security:
- identity_management: "Centralized IAM with MFA"
- network_security: "Security groups and NACLs"
- secrets_management: "Cloud-native secret managers"
- compliance: "SOC2, HIPAA, GDPR as needed"

cost_optimization:
- reserved_instances: "For predictable workloads"
- spot_instances: "For fault-tolerant workloads"
- auto_scaling: "Scale down during off-peak"
- storage_lifecycle: "Auto-archive cold data"
- monitoring: "Cost alerts and budgets"

observability:
- logging: "Centralized log aggregation"
- monitoring: "Prometheus + Grafana or cloud-native"
- tracing: "Distributed tracing (Jaeger/OpenTelemetry)"
- alerting: "Multi-channel notifications"

deployment:
- ci_cd: "Automated pipelines with testing"
- infrastructure_as_code: "Terraform or Pulumi"
- configuration_management: "Ansible or cloud-native"
- rollback_strategy: "Automated rollback on failure"

Usage Examples

Review Terraform Code

Apply cloud-architecture-review skill to analyze Terraform files for security, cost optimization, and best practices

GCP Cost Analysis

Apply cloud-architecture-review skill to generate cost analysis report with optimization recommendations for GCP project

Multi-Cloud Architecture Validation

Apply cloud-architecture-review skill to validate multi-cloud architecture against best practices checklist

Integration Points

  • security-audit-patterns - Security validation
  • compliance-validation - Regulatory compliance
  • orchestration-patterns - Multi-agent review coordination

Success Output

When successful, this skill MUST output:

✅ SKILL COMPLETE: cloud-architecture-review

Completed:
- [x] Terraform/IaC files reviewed for security and best practices
- [x] Cost optimization opportunities identified
- [x] Architecture patterns validated against cloud best practices
- [x] Compliance checks executed (security groups, encryption, backups)

Outputs:
- Terraform review report (JSON/Markdown) with severity classifications
- Cost analysis with projected savings
- Security findings with remediation steps
- Multi-cloud architecture validation checklist

Completion Checklist

Before marking this skill as complete, verify:

  • All Terraform/CloudFormation/Pulumi files scanned
  • Security anti-patterns detected (public access, missing encryption, etc.)
  • Cost optimization analysis run with savings estimates
  • Resource naming conventions validated
  • Cross-cutting concerns checked (tags, backups, versioning)
  • Architecture checklist completed for networking, compute, storage, security
  • Review report generated in requested format (JSON/Markdown)
  • Critical/high severity findings flagged for immediate action

Failure Indicators

This skill has FAILED if:

  • ❌ No IaC files found or unable to parse Terraform/YAML
  • ❌ Review report missing severity classifications
  • ❌ Cost analysis returns zero findings in production infrastructure
  • ❌ Security checks skip critical patterns (public databases, unencrypted storage)
  • ❌ No recommendations provided despite findings
  • ❌ Architecture checklist not completed for multi-cloud deployments
  • ❌ Script exits with errors before completing scan

When NOT to Use

Do NOT use this skill when:

  • Infrastructure is not defined as code (manual console configurations)
  • No cloud resources exist yet (use cloud-architecture-design instead)
  • Infrastructure is managed by third-party platform (Heroku, Vercel)
  • Review scope is single service/resource (use targeted review instead)
  • No access to IaC repository or cloud credentials for validation
  • Team lacks authority to implement recommendations
  • Infrastructure is ephemeral/temporary (dev/test sandboxes)
  • Compliance requirements are unknown (define requirements first)

Anti-Patterns (Avoid)

Anti-PatternProblemSolution
Reviewing without contextMisses business requirementsUnderstand workload, SLAs, compliance needs first
Ignoring cost implicationsRecommendations may be too expensiveInclude cost-benefit analysis in all suggestions
Security-only focusMisses performance, reliability issuesReview across all architecture pillars
Generic recommendationsNot actionableProvide specific Terraform code changes
Skipping baseline metricsCan't measure improvementDocument current state before recommending changes
One-time reviewDrift occurs over timeAutomate reviews in CI/CD pipeline
Missing compliance mappingFindings don't align with requirementsMap findings to SOC2/HIPAA/GDPR controls
No prioritizationTeam overwhelmed by findingsClassify by severity and business impact

Principles

This skill embodies:

  • #1 First Principles - Understand WHY infrastructure exists before recommending changes
  • #3 Separation of Concerns - Review security, cost, architecture independently
  • #5 Eliminate Ambiguity - Specific findings with file:line references
  • #6 Clear, Understandable, Explainable - Recommendations with implementation steps
  • #8 No Assumptions - Verify actual configuration, don't assume defaults
  • #9 Keep It Simple - Recommend simplest architecture meeting requirements

Full Standard: CODITECT-STANDARD-AUTOMATION.md