Agent Skills Framework Extension
Cloud Architecture Review Skill
When to Use This Skill
Use this skill when implementing cloud architecture review patterns in your codebase.
How to Use This Skill
- Review the patterns and examples below
- Apply the relevant patterns to your implementation
- Follow the best practices outlined in this skill
Cloud-native code review, IaC validation, cost optimization, and architecture best practices for AWS, GCP, and Azure.
Core Capabilities
- IaC Review - Terraform, CloudFormation, Pulumi validation
- Cost Optimization - Resource sizing and cost analysis
- Security Validation - Cloud security best practices
- Architecture Patterns - Multi-cloud design patterns
- Compliance Checks - Regulatory and policy validation
Terraform Review Automation
#!/usr/bin/env python3
"""
Automated Terraform code review with best practices validation.
Checks security, cost optimization, and architectural patterns.
"""
import re
import json
from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass
from enum import Enum
class Severity(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INFO = "info"
@dataclass
class ReviewFinding:
severity: Severity
category: str
message: str
file_path: str
line_number: Optional[int]
suggestion: str
cost_impact: Optional[str] = None
class TerraformReviewer:
def __init__(self, root_path: Path):
self.root_path = root_path
self.findings: List[ReviewFinding] = []
def review_all(self) -> List[ReviewFinding]:
"""Execute complete Terraform review."""
tf_files = list(self.root_path.rglob("*.tf"))
for tf_file in tf_files:
content = tf_file.read_text()
self._review_security(tf_file, content)
self._review_cost_optimization(tf_file, content)
self._review_best_practices(tf_file, content)
self._review_resource_naming(tf_file, content)
return sorted(self.findings, key=lambda f: f.severity.value)
def _review_security(self, file_path: Path, content: str):
"""Check for security anti-patterns."""
rules = [
{
'pattern': r'ingress\s*{\s*from_port\s*=\s*0\s*to_port\s*=\s*0',
'severity': Severity.CRITICAL,
'message': 'Security group allows all traffic (0.0.0.0/0)',
'suggestion': 'Restrict ingress to specific ports and CIDR blocks'
},
{
'pattern': r'publicly_accessible\s*=\s*true',
'severity': Severity.HIGH,
'message': 'Database configured as publicly accessible',
'suggestion': 'Set publicly_accessible = false and use VPC peering or VPN'
},
{
'pattern': r'enabled\s*=\s*false.*encryption',
'severity': Severity.HIGH,
'message': 'Encryption disabled',
'suggestion': 'Enable encryption at rest and in transit'
},
{
'pattern': r'versioning\s*{\s*enabled\s*=\s*false',
'severity': Severity.MEDIUM,
'message': 'S3 versioning disabled',
'suggestion': 'Enable versioning for disaster recovery'
},
{
'pattern': r'backup_retention_period\s*=\s*0',
'severity': Severity.HIGH,
'message': 'Backup retention disabled',
'suggestion': 'Set backup_retention_period >= 7 days'
}
]
for rule in rules:
for match in re.finditer(rule['pattern'], content, re.IGNORECASE):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=rule['severity'],
category='security',
message=rule['message'],
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion=rule['suggestion']
))
def _review_cost_optimization(self, file_path: Path, content: str):
"""Check for cost optimization opportunities."""
rules = [
{
'pattern': r'instance_type\s*=\s*"[a-z0-9]+\.(?:16|24|32)xlarge"',
'severity': Severity.MEDIUM,
'message': 'Very large instance size detected',
'suggestion': 'Consider right-sizing or using auto-scaling',
'cost_impact': 'High - oversized instances can cost $5K+/month'
},
{
'pattern': r'storage_type\s*=\s*"io1"',
'severity': Severity.LOW,
'message': 'Provisioned IOPS storage (io1) in use',
'suggestion': 'Evaluate if gp3 would meet requirements at lower cost',
'cost_impact': 'Medium - io1 costs 3-4x more than gp3'
},
{
'pattern': r'create_before_destroy\s*=\s*false',
'severity': Severity.LOW,
'message': 'create_before_destroy disabled',
'suggestion': 'Enable to reduce downtime during updates',
'cost_impact': None
},
{
'pattern': r'desired_capacity\s*=\s*\d{2,}',
'severity': Severity.MEDIUM,
'message': 'High desired capacity in auto-scaling group',
'suggestion': 'Review if this many instances are required',
'cost_impact': 'High - each additional instance adds cost'
}
]
for rule in rules:
for match in re.finditer(rule['pattern'], content):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=rule['severity'],
category='cost-optimization',
message=rule['message'],
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion=rule['suggestion'],
cost_impact=rule.get('cost_impact')
))
def _review_best_practices(self, file_path: Path, content: str):
"""Check for Terraform best practices."""
# Check for missing required providers
if 'terraform' in content and 'required_providers' not in content:
self.findings.append(ReviewFinding(
severity=Severity.MEDIUM,
category='best-practices',
message='Missing required_providers block',
file_path=str(file_path.relative_to(self.root_path)),
line_number=None,
suggestion='Add required_providers block with version constraints'
))
# Check for hard-coded values
ip_pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
for match in re.finditer(ip_pattern, content):
# Skip localhost and common defaults
ip = match.group()
if ip not in ['0.0.0.0', '127.0.0.1']:
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=Severity.LOW,
category='best-practices',
message=f'Hard-coded IP address: {ip}',
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion='Move IP addresses to variables or data sources'
))
# Check for missing tags
resource_pattern = r'resource\s+"[^"]+"\s+"[^"]+"\s*{'
for match in re.finditer(resource_pattern, content):
block_start = match.end()
block_content = self._extract_block(content[block_start:])
if 'tags' not in block_content:
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=Severity.LOW,
category='best-practices',
message='Resource missing tags',
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion='Add tags for cost tracking, environment, owner, etc.'
))
def _review_resource_naming(self, file_path: Path, content: str):
"""Validate resource naming conventions."""
# Extract resource names
pattern = r'resource\s+"([^"]+)"\s+"([^"]+)"'
for match in re.finditer(pattern, content):
resource_type = match.group(1)
resource_name = match.group(2)
# Check naming conventions
if not re.match(r'^[a-z][a-z0-9_]*$', resource_name):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(ReviewFinding(
severity=Severity.LOW,
category='naming',
message=f'Resource name "{resource_name}" violates naming convention',
file_path=str(file_path.relative_to(self.root_path)),
line_number=line_num,
suggestion='Use lowercase letters, numbers, and underscores only'
))
def _extract_block(self, content: str) -> str:
"""Extract HCL block content."""
depth = 1
i = 0
while i < len(content) and depth > 0:
if content[i] == '{':
depth += 1
elif content[i] == '}':
depth -= 1
i += 1
return content[:i]
def generate_report(self, output_format: str = 'json') -> str:
"""Generate review report in specified format."""
if output_format == 'json':
return json.dumps([
{
'severity': f.severity.value,
'category': f.category,
'message': f.message,
'file': f.file_path,
'line': f.line_number,
'suggestion': f.suggestion,
'cost_impact': f.cost_impact
}
for f in self.findings
], indent=2)
elif output_format == 'markdown':
report = "# Terraform Review Report\n\n"
# Group by severity
by_severity = {}
for finding in self.findings:
sev = finding.severity.value
if sev not in by_severity:
by_severity[sev] = []
by_severity[sev].append(finding)
# Order: critical, high, medium, low, info
for severity in ['critical', 'high', 'medium', 'low', 'info']:
if severity in by_severity:
report += f"## {severity.upper()} ({len(by_severity[severity])})\n\n"
for finding in by_severity[severity]:
report += f"### {finding.message}\n\n"
report += f"- **File:** `{finding.file_path}`"
if finding.line_number:
report += f" (Line {finding.line_number})"
report += "\n"
report += f"- **Category:** {finding.category}\n"
report += f"- **Suggestion:** {finding.suggestion}\n"
if finding.cost_impact:
report += f"- **Cost Impact:** {finding.cost_impact}\n"
report += "\n"
return report
else:
raise ValueError(f"Unsupported format: {output_format}")
# CLI usage
if __name__ == '__main__':
import sys
path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path.cwd()
output_format = sys.argv[2] if len(sys.argv) > 2 else 'markdown'
reviewer = TerraformReviewer(path)
reviewer.review_all()
print(reviewer.generate_report(output_format))
# Exit with error if critical/high findings
critical_count = sum(1 for f in reviewer.findings if f.severity == Severity.CRITICAL)
high_count = sum(1 for f in reviewer.findings if f.severity == Severity.HIGH)
if critical_count > 0 or high_count > 0:
print(f"\n❌ Found {critical_count} critical and {high_count} high severity issues", file=sys.stderr)
sys.exit(1)
else:
print(f"\n✅ No critical or high severity issues found")
GCP Cost Analysis Script
#!/usr/bin/env python3
"""
GCP cost analysis and optimization recommendations.
Requires: google-cloud-billing, google-cloud-recommender
"""
from google.cloud import billing_v1, recommender_v1
from datetime import datetime, timedelta
import json
from typing import List, Dict
class GCPCostAnalyzer:
def __init__(self, project_id: str, billing_account_id: str):
self.project_id = project_id
self.billing_account_id = billing_account_id
self.billing_client = billing_v1.CloudBillingClient()
self.recommender_client = recommender_v1.RecommenderClient()
def analyze_costs(self, days: int = 30) -> Dict:
"""Analyze costs for the past N days."""
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
# Get billing data (simplified - actual implementation needs BigQuery)
costs_by_service = self._get_costs_by_service(start_date, end_date)
costs_by_region = self._get_costs_by_region(start_date, end_date)
# Get recommendations
recommendations = self._get_cost_recommendations()
# Calculate metrics
total_cost = sum(costs_by_service.values())
projected_monthly = (total_cost / days) * 30
return {
'period': {
'start': start_date.isoformat(),
'end': end_date.isoformat(),
'days': days
},
'totals': {
'actual': round(total_cost, 2),
'projected_monthly': round(projected_monthly, 2)
},
'by_service': costs_by_service,
'by_region': costs_by_region,
'recommendations': recommendations,
'potential_savings': sum(r['savings'] for r in recommendations)
}
def _get_cost_recommendations(self) -> List[Dict]:
"""Get cost optimization recommendations from GCP Recommender."""
recommendations = []
# Recommender types to check
recommender_types = [
'google.compute.commitment.UsageCommitmentRecommender',
'google.compute.instance.MachineTypeRecommender',
'google.compute.disk.IdleResourceRecommender',
'google.iam.policy.Recommender'
]
for recommender_type in recommender_types:
parent = f"projects/{self.project_id}/locations/global/recommenders/{recommender_type}"
try:
for recommendation in self.recommender_client.list_recommendations(parent=parent):
recommendations.append({
'type': recommender_type.split('.')[-1],
'description': recommendation.description,
'priority': recommendation.priority,
'savings': self._extract_savings(recommendation),
'resource': recommendation.name
})
except Exception as e:
print(f"Warning: Could not fetch {recommender_type}: {e}")
return recommendations
def _extract_savings(self, recommendation) -> float:
"""Extract estimated savings from recommendation."""
# Parse recommendation impact for cost savings
# This is simplified - actual implementation depends on recommendation type
return 0.0
def _get_costs_by_service(self, start_date, end_date) -> Dict[str, float]:
"""Get costs grouped by GCP service."""
# In production, query BigQuery billing export table
# Simplified example:
return {
'Compute Engine': 1234.56,
'Cloud Storage': 234.12,
'Cloud SQL': 567.89,
'Cloud Functions': 45.67,
'BigQuery': 123.45
}
def _get_costs_by_region(self, start_date, end_date) -> Dict[str, float]:
"""Get costs grouped by region."""
# In production, query BigQuery billing export table
return {
'us-central1': 1456.78,
'us-east1': 234.56,
'europe-west1': 123.45
}
def generate_report(self) -> str:
"""Generate cost analysis report in Markdown."""
analysis = self.analyze_costs()
report = f"""# GCP Cost Analysis Report
## Summary
- **Period:** {analysis['period']['days']} days ({analysis['period']['start'][:10]} to {analysis['period']['end'][:10]})
- **Total Cost:** ${analysis['totals']['actual']:,.2f}
- **Projected Monthly:** ${analysis['totals']['projected_monthly']:,.2f}
- **Potential Savings:** ${analysis['potential_savings']:,.2f}/month
## Costs by Service
"""
for service, cost in sorted(analysis['by_service'].items(), key=lambda x: x[1], reverse=True):
pct = (cost / analysis['totals']['actual']) * 100
report += f"- **{service}:** ${cost:,.2f} ({pct:.1f}%)\n"
report += "\n## Costs by Region\n\n"
for region, cost in sorted(analysis['by_region'].items(), key=lambda x: x[1], reverse=True):
report += f"- **{region}:** ${cost:,.2f}\n"
report += "\n## Optimization Recommendations\n\n"
for i, rec in enumerate(analysis['recommendations'], 1):
report += f"{i}. **{rec['type']}** (Priority: {rec['priority']})\n"
report += f" - {rec['description']}\n"
report += f" - Estimated savings: ${rec['savings']:,.2f}/month\n\n"
return report
# Usage
if __name__ == '__main__':
import sys
project_id = sys.argv[1] if len(sys.argv) > 1 else 'my-project'
billing_account = sys.argv[2] if len(sys.argv) > 2 else 'my-billing-account'
analyzer = GCPCostAnalyzer(project_id, billing_account)
print(analyzer.generate_report())
Multi-Cloud Architecture Pattern
# multi-cloud-architecture-checklist.yaml
# Architecture review checklist for multi-cloud deployments
infrastructure:
networking:
- vpc_design: "Isolated VPCs per environment with peering"
- subnet_strategy: "Public/private subnet separation"
- cross_cloud_connectivity: "VPN or dedicated interconnect"
- dns_strategy: "Cloud DNS with failover"
compute:
- instance_sizing: "Right-sized with auto-scaling"
- availability: "Multi-AZ deployment"
- disaster_recovery: "Cross-region replication"
- container_orchestration: "Kubernetes (GKE/EKS/AKS)"
storage:
- data_classification: "Hot/warm/cold tiers"
- backup_strategy: "Automated with retention policy"
- encryption: "At rest and in transit"
- replication: "Geo-redundant for critical data"
security:
- identity_management: "Centralized IAM with MFA"
- network_security: "Security groups and NACLs"
- secrets_management: "Cloud-native secret managers"
- compliance: "SOC2, HIPAA, GDPR as needed"
cost_optimization:
- reserved_instances: "For predictable workloads"
- spot_instances: "For fault-tolerant workloads"
- auto_scaling: "Scale down during off-peak"
- storage_lifecycle: "Auto-archive cold data"
- monitoring: "Cost alerts and budgets"
observability:
- logging: "Centralized log aggregation"
- monitoring: "Prometheus + Grafana or cloud-native"
- tracing: "Distributed tracing (Jaeger/OpenTelemetry)"
- alerting: "Multi-channel notifications"
deployment:
- ci_cd: "Automated pipelines with testing"
- infrastructure_as_code: "Terraform or Pulumi"
- configuration_management: "Ansible or cloud-native"
- rollback_strategy: "Automated rollback on failure"
Usage Examples
Review Terraform Code
Apply cloud-architecture-review skill to analyze Terraform files for security, cost optimization, and best practices
GCP Cost Analysis
Apply cloud-architecture-review skill to generate cost analysis report with optimization recommendations for GCP project
Multi-Cloud Architecture Validation
Apply cloud-architecture-review skill to validate multi-cloud architecture against best practices checklist
Integration Points
- security-audit-patterns - Security validation
- compliance-validation - Regulatory compliance
- orchestration-patterns - Multi-agent review coordination
Success Output
When successful, this skill MUST output:
✅ SKILL COMPLETE: cloud-architecture-review
Completed:
- [x] Terraform/IaC files reviewed for security and best practices
- [x] Cost optimization opportunities identified
- [x] Architecture patterns validated against cloud best practices
- [x] Compliance checks executed (security groups, encryption, backups)
Outputs:
- Terraform review report (JSON/Markdown) with severity classifications
- Cost analysis with projected savings
- Security findings with remediation steps
- Multi-cloud architecture validation checklist
Completion Checklist
Before marking this skill as complete, verify:
- All Terraform/CloudFormation/Pulumi files scanned
- Security anti-patterns detected (public access, missing encryption, etc.)
- Cost optimization analysis run with savings estimates
- Resource naming conventions validated
- Cross-cutting concerns checked (tags, backups, versioning)
- Architecture checklist completed for networking, compute, storage, security
- Review report generated in requested format (JSON/Markdown)
- Critical/high severity findings flagged for immediate action
Failure Indicators
This skill has FAILED if:
- ❌ No IaC files found or unable to parse Terraform/YAML
- ❌ Review report missing severity classifications
- ❌ Cost analysis returns zero findings in production infrastructure
- ❌ Security checks skip critical patterns (public databases, unencrypted storage)
- ❌ No recommendations provided despite findings
- ❌ Architecture checklist not completed for multi-cloud deployments
- ❌ Script exits with errors before completing scan
When NOT to Use
Do NOT use this skill when:
- Infrastructure is not defined as code (manual console configurations)
- No cloud resources exist yet (use cloud-architecture-design instead)
- Infrastructure is managed by third-party platform (Heroku, Vercel)
- Review scope is single service/resource (use targeted review instead)
- No access to IaC repository or cloud credentials for validation
- Team lacks authority to implement recommendations
- Infrastructure is ephemeral/temporary (dev/test sandboxes)
- Compliance requirements are unknown (define requirements first)
Anti-Patterns (Avoid)
| Anti-Pattern | Problem | Solution |
|---|---|---|
| Reviewing without context | Misses business requirements | Understand workload, SLAs, compliance needs first |
| Ignoring cost implications | Recommendations may be too expensive | Include cost-benefit analysis in all suggestions |
| Security-only focus | Misses performance, reliability issues | Review across all architecture pillars |
| Generic recommendations | Not actionable | Provide specific Terraform code changes |
| Skipping baseline metrics | Can't measure improvement | Document current state before recommending changes |
| One-time review | Drift occurs over time | Automate reviews in CI/CD pipeline |
| Missing compliance mapping | Findings don't align with requirements | Map findings to SOC2/HIPAA/GDPR controls |
| No prioritization | Team overwhelmed by findings | Classify by severity and business impact |
Principles
This skill embodies:
- #1 First Principles - Understand WHY infrastructure exists before recommending changes
- #3 Separation of Concerns - Review security, cost, architecture independently
- #5 Eliminate Ambiguity - Specific findings with file:line references
- #6 Clear, Understandable, Explainable - Recommendations with implementation steps
- #8 No Assumptions - Verify actual configuration, don't assume defaults
- #9 Keep It Simple - Recommend simplest architecture meeting requirements
Full Standard: CODITECT-STANDARD-AUTOMATION.md