#!/usr/bin/env python3 """ CODITECT Cost Estimation System (ADR-075)

Provides cost estimation for LLM token usage based on model pricing. Loads pricing from config/model-pricing.json. """

import json from dataclasses import dataclass from pathlib import Path from typing import Dict, Optional, TYPE_CHECKING

if TYPE_CHECKING: from scripts.core.usage_tracking import UsageSummary

Config path relative to coditect-core root

CONFIG_PATH = Path(file).parent.parent.parent / "config" / "model-pricing.json"

@dataclass class ModelPricing: """Pricing per 1M tokens (USD).

Attributes:
    input_price: USD per 1M input tokens
    output_price: USD per 1M output tokens
    cached_price: USD per 1M cached tokens (if applicable)
"""
input_price: float
output_price: float
cached_price: float = 0.0

Cached pricing data

_PRICING_CACHE: Optional[Dict[str, ModelPricing]] = None

def _load_pricing() -> Dict[str, ModelPricing]: """Load pricing configuration from JSON file.

Returns:
    Dictionary mapping model IDs to ModelPricing instances
"""
global _PRICING_CACHE

if _PRICING_CACHE is not None:
    return _PRICING_CACHE

pricing = {}

if CONFIG_PATH.exists():
    data = json.loads(CONFIG_PATH.read_text())
    models = data.get("models", {})

    for model_id, config in models.items():
        pricing[model_id] = ModelPricing(
            input_price=config.get("input_price", 1.0),
            output_price=config.get("output_price", 1.0),
            cached_price=config.get("cached_price", 0.0)
        )

_PRICING_CACHE = pricing
return pricing

def get_model_pricing(model: str) -> Optional[ModelPricing]: """Get pricing for a specific model.

Args:
    model: Model identifier (e.g., "claude-sonnet-4-5")

Returns:
    ModelPricing if found, None otherwise
"""
pricing = _load_pricing()
return pricing.get(model)

def estimate_cost(summary: "UsageSummary") -> Dict[str, float]: """Estimate cost from usage summary.

Calculates costs based on token counts and model pricing.
Unknown models default to $1 per 1M tokens.

Args:
    summary: UsageSummary with token usage data

Returns:
    {
        "total_estimated_usd": 0.42,
        "by_model": {
            "anthropic:claude-sonnet-4-5": 0.25,
            ...
        }
    }
"""
pricing_data = _load_pricing()
by_model = {}
total = 0.0

for key, usage in summary.model_usage.items():
    model = usage.model
    pricing = pricing_data.get(model)

    if pricing:
        input_cost = (usage.input_tokens / 1_000_000) * pricing.input_price
        output_cost = (usage.output_tokens / 1_000_000) * pricing.output_price
        cached_cost = (usage.cached_tokens / 1_000_000) * pricing.cached_price
        model_cost = input_cost + output_cost + cached_cost
    else:
        # Unknown model: default $1 per 1M tokens total
        model_cost = (usage.input_tokens + usage.output_tokens) / 1_000_000

    by_model[key] = round(model_cost, 4)
    total += model_cost

return {
    "total_estimated_usd": round(total, 4),
    "by_model": by_model
}

def format_cost_report(summary: "UsageSummary") -> str: """Format a human-readable cost report.

Args:
    summary: UsageSummary with token usage data

Returns:
    Formatted string report
"""
cost = estimate_cost(summary)

lines = [
    "Token Usage Cost Report",
    "=" * 50,
    f"Session: {summary.session_id}",
    f"Started: {summary.started_at.isoformat()}",
    "",
    "Totals:",
    f"  Calls: {summary.total_calls:,}",
    f"  Input Tokens: {summary.total_input_tokens:,}",
    f"  Output Tokens: {summary.total_output_tokens:,}",
    f"  Total Tokens: {summary.total_tokens:,}",
    f"  Estimated Cost: ${cost['total_estimated_usd']:.4f}",
    "",
    "By Model:",
]

for key, usage in summary.model_usage.items():
    model_cost = cost['by_model'].get(key, 0)
    lines.append(
        f"  {key}: {usage.total_tokens:,} tokens "
        f"({usage.num_calls} calls) - ${model_cost:.4f}"
    )

return "\n".join(lines)

def clear_pricing_cache() -> None: """Clear the pricing cache (for testing or config reload).""" global _PRICING_CACHE _PRICING_CACHE = None