scripts-coverage-gap-detector

#!/usr/bin/env python3 """ Coverage Gap Detector - K.5.7

Identifies untested code paths, generates focused test suggestions, and tracks coverage improvements over time.

Usage: python3 scripts/coverage-gap-detector.py [--coverage-file coverage.json] python3 scripts/coverage-gap-detector.py --threshold 80

Track: K (Workflow Automation) Agent: coverage-gap-detector Command: /coverage-gaps """

import argparse import json import re import subprocess import sys from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import Any

@dataclass class CoverageGap: """An identified coverage gap.""" file: str line_start: int line_end: int function_name: str gap_type: str # function, branch, line complexity: str # low, medium, high suggested_test: str

def run_command(cmd: list[str]) -> tuple[int, str, str]: """Run a shell command.""" try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) return result.returncode, result.stdout, result.stderr except Exception as e: return 1, "", str(e)

def parse_coverage_json(coverage_path: Path) -> dict[str, Any]: """Parse coverage.json (Istanbul/nyc format) or coverage.py JSON.""" try: with open(coverage_path) as f: return json.load(f) except Exception as e: print(f"Error parsing coverage file: {e}", file=sys.stderr) return {}

def parse_lcov(lcov_path: Path) -> dict[str, dict[str, Any]]: """Parse LCOV format coverage file.""" coverage = {} current_file = None current_data: dict[str, Any] = {"lines": {}, "functions": {}, "branches": {}}

try:
    with open(lcov_path) as f:
        for line in f:
            line = line.strip()

            if line.startswith("SF:"):
                current_file = line[3:]
                current_data = {"lines": {}, "functions": {}, "branches": {}}

            elif line.startswith("FN:"):
                parts = line[3:].split(",")
                if len(parts) >= 2:
                    current_data["functions"][parts[1]] = {"line": int(parts[0]), "hits": 0}

            elif line.startswith("FNDA:"):
                parts = line[5:].split(",")
                if len(parts) >= 2 and parts[1] in current_data["functions"]:
                    current_data["functions"][parts[1]]["hits"] = int(parts[0])

            elif line.startswith("DA:"):
                parts = line[3:].split(",")
                if len(parts) >= 2:
                    current_data["lines"][int(parts[0])] = int(parts[1])

            elif line.startswith("BRDA:"):
                parts = line[5:].split(",")
                if len(parts) >= 4:
                    key = f"{parts[0]}:{parts[1]}:{parts[2]}"
                    current_data["branches"][key] = int(parts[3]) if parts[3] != "-" else 0

            elif line == "end_of_record" and current_file:
                coverage[current_file] = current_data
                current_file = None

except Exception as e:
    print(f"Error parsing LCOV file: {e}", file=sys.stderr)

return coverage

def find_coverage_file() -> Path | None: """Find coverage file in common locations.""" common_paths = [ "coverage/coverage-final.json", "coverage.json", ".coverage.json", "coverage/lcov.info", "lcov.info", ".coverage", "htmlcov/coverage.json", ]

for path in common_paths:
    p = Path(path)
    if p.exists():
        return p

return None

def identify_gaps(coverage_data: dict[str, Any], threshold: float = 80) -> list[CoverageGap]: """Identify coverage gaps from coverage data.""" gaps = []

# Handle different coverage formats
if "coverage" in coverage_data:
    # coverage.py format
    file_coverage = coverage_data.get("files", {})
else:
    # Istanbul/nyc format
    file_coverage = coverage_data

for file_path, data in file_coverage.items():
    if not isinstance(data, dict):
        continue

    # Skip test files and node_modules
    if "test" in file_path.lower() or "node_modules" in file_path or "__pycache__" in file_path:
        continue

    # Get line coverage
    if "lines" in data:
        lines = data["lines"]
    elif "l" in data:
        lines = data["l"]
    elif "s" in data:  # Statement coverage
        lines = data["s"]
    else:
        continue

    # Find uncovered line ranges
    uncovered_lines = []
    if isinstance(lines, dict):
        uncovered_lines = [int(k) for k, v in lines.items() if v == 0]
    elif isinstance(lines, list):
        uncovered_lines = [i + 1 for i, v in enumerate(lines) if v == 0]

    if not uncovered_lines:
        continue

    # Group consecutive lines
    ranges = []
    start = uncovered_lines[0]
    end = uncovered_lines[0]

    for line in uncovered_lines[1:]:
        if line == end + 1:
            end = line
        else:
            ranges.append((start, end))
            start = line
            end = line
    ranges.append((start, end))

    # Create gaps for significant ranges
    for start, end in ranges:
        if end - start >= 2:  # At least 3 uncovered lines
            complexity = "high" if end - start > 20 else "medium" if end - start > 10 else "low"
            gaps.append(CoverageGap(
                file=file_path,
                line_start=start,
                line_end=end,
                function_name="",  # Would need AST parsing to determine
                gap_type="line",
                complexity=complexity,
                suggested_test=f"Add tests covering lines {start}-{end}"
            ))

    # Check function coverage
    functions = data.get("functions", data.get("f", {}))
    if isinstance(functions, dict):
        for func_name, hits in functions.items():
            hit_count = hits if isinstance(hits, int) else hits.get("hits", 0)
            if hit_count == 0:
                gaps.append(CoverageGap(
                    file=file_path,
                    line_start=0,
                    line_end=0,
                    function_name=str(func_name),
                    gap_type="function",
                    complexity="medium",
                    suggested_test=f"Add test for function '{func_name}'"
                ))

    # Check branch coverage
    branches = data.get("branches", data.get("b", {}))
    uncovered_branches = 0
    if isinstance(branches, dict):
        for branch_id, hits in branches.items():
            if isinstance(hits, list):
                uncovered_branches += sum(1 for h in hits if h == 0)
            elif hits == 0:
                uncovered_branches += 1

    if uncovered_branches > 3:
        gaps.append(CoverageGap(
            file=file_path,
            line_start=0,
            line_end=0,
            function_name="",
            gap_type="branch",
            complexity="high" if uncovered_branches > 10 else "medium",
            suggested_test=f"Add tests for {uncovered_branches} uncovered branches"
        ))

return sorted(gaps, key=lambda g: (g.complexity != "high", g.complexity != "medium", g.file))

def calculate_coverage_stats(coverage_data: dict[str, Any]) -> dict[str, float]: """Calculate overall coverage statistics.""" total_lines = 0 covered_lines = 0 total_functions = 0 covered_functions = 0 total_branches = 0 covered_branches = 0

for file_path, data in coverage_data.items():
    if not isinstance(data, dict):
        continue

    # Lines
    lines = data.get("lines", data.get("l", data.get("s", {})))
    if isinstance(lines, dict):
        total_lines += len(lines)
        covered_lines += sum(1 for v in lines.values() if v > 0)

    # Functions
    functions = data.get("functions", data.get("f", {}))
    if isinstance(functions, dict):
        total_functions += len(functions)
        for hits in functions.values():
            if isinstance(hits, int) and hits > 0:
                covered_functions += 1
            elif isinstance(hits, dict) and hits.get("hits", 0) > 0:
                covered_functions += 1

    # Branches
    branches = data.get("branches", data.get("b", {}))
    if isinstance(branches, dict):
        for hits in branches.values():
            if isinstance(hits, list):
                total_branches += len(hits)
                covered_branches += sum(1 for h in hits if h > 0)
            else:
                total_branches += 1
                if hits > 0:
                    covered_branches += 1

return {
    "line_coverage": (covered_lines / total_lines * 100) if total_lines > 0 else 0,
    "function_coverage": (covered_functions / total_functions * 100) if total_functions > 0 else 0,
    "branch_coverage": (covered_branches / total_branches * 100) if total_branches > 0 else 0,
    "total_lines": total_lines,
    "covered_lines": covered_lines,
}

def generate_report(gaps: list[CoverageGap], stats: dict[str, float], threshold: float) -> str: """Generate the coverage gap report.""" lines = [ "# Coverage Gap Analysis Report", "", f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", f"Threshold: {threshold}%", "", "---", "", "## Coverage Summary", "", f"| Metric | Coverage | Status |", f"|--------|----------|--------|", ]

for metric, value in [
    ("Line Coverage", stats.get("line_coverage", 0)),
    ("Function Coverage", stats.get("function_coverage", 0)),
    ("Branch Coverage", stats.get("branch_coverage", 0)),
]:
    status = "" if value >= threshold else "" if value >= threshold * 0.8 else ""
    lines.append(f"| {metric} | {value:.1f}% | {status} |")

lines.extend([
    "",
    f"**Total Lines:** {stats.get('total_lines', 0):,}",
    f"**Covered Lines:** {stats.get('covered_lines', 0):,}",
    "",
])

if not gaps:
    lines.extend([
        "## No Significant Gaps Found",
        "",
        f"Coverage meets the {threshold}% threshold across all files.",
    ])
else:
    # Group gaps by file
    by_file: dict[str, list[CoverageGap]] = {}
    for gap in gaps:
        if gap.file not in by_file:
            by_file[gap.file] = []
        by_file[gap.file].append(gap)

    lines.extend([
        f"## Coverage Gaps ({len(gaps)} total)",
        "",
    ])

    # Summary table
    lines.extend([
        "### Priority Gaps",
        "",
        "| File | Type | Lines | Complexity | Action |",
        "|------|------|-------|------------|--------|",
    ])

    for gap in gaps[:15]:
        file_short = gap.file.split("/")[-1] if "/" in gap.file else gap.file
        line_range = f"{gap.line_start}-{gap.line_end}" if gap.line_start else "-"
        lines.append(
            f"| `{file_short}` | {gap.gap_type} | {line_range} | "
            f"{gap.complexity} | {gap.suggested_test[:40]}... |"
        )
    lines.append("")

    # Detailed by file
    lines.extend([
        "### Gaps by File",
        "",
    ])

    for file_path, file_gaps in sorted(by_file.items())[:10]:
        file_short = file_path.split("/")[-2:] if "/" in file_path else [file_path]
        lines.extend([
            f"#### `{'/'.join(file_short)}`",
            "",
        ])

        for gap in file_gaps[:5]:
            emoji = {"high": "", "medium": "", "low": ""}[gap.complexity]
            if gap.gap_type == "function":
                lines.append(f"- {emoji} Function `{gap.function_name}` not tested")
            elif gap.gap_type == "branch":
                lines.append(f"- {emoji} {gap.suggested_test}")
            else:
                lines.append(f"- {emoji} Lines {gap.line_start}-{gap.line_end} uncovered")

        if len(file_gaps) > 5:
            lines.append(f"- ...and {len(file_gaps) - 5} more gaps")
        lines.append("")

    # Recommendations
    lines.extend([
        "## Recommendations",
        "",
        "### High Priority (Complexity: High)",
        "",
    ])

    high_gaps = [g for g in gaps if g.complexity == "high"]
    for gap in high_gaps[:5]:
        lines.append(f"1. **{gap.file}**: {gap.suggested_test}")

    lines.extend([
        "",
        "### Quick Wins (Complexity: Low)",
        "",
    ])

    low_gaps = [g for g in gaps if g.complexity == "low"]
    for gap in low_gaps[:5]:
        lines.append(f"- {gap.file}: {gap.suggested_test}")
    lines.append("")

lines.extend([
    "---",
    "*Generated by CODITECT Coverage Gap Detector*",
])

return "\n".join(lines)

def main(): parser = argparse.ArgumentParser( description="Identify coverage gaps and suggest tests" ) parser.add_argument( "--coverage-file", "-c", type=str, default=None, help="Path to coverage file (JSON or LCOV)" ) parser.add_argument( "--threshold", "-t", type=float, default=80, help="Coverage threshold percentage (default: 80)" ) parser.add_argument( "--output", "-o", type=str, default=None, help="Output file path (default: stdout)" ) parser.add_argument( "--json", action="store_true", help="Output as JSON instead of Markdown" )

args = parser.parse_args()

# Find coverage file
if args.coverage_file:
    coverage_path = Path(args.coverage_file)
else:
    coverage_path = find_coverage_file()

if not coverage_path or not coverage_path.exists():
    print("No coverage file found. Run your test suite with coverage first.", file=sys.stderr)
    print("Common commands:", file=sys.stderr)
    print("  pytest --cov --cov-report=json", file=sys.stderr)
    print("  npm test -- --coverage", file=sys.stderr)
    sys.exit(1)

print(f"Analyzing coverage from: {coverage_path}", file=sys.stderr)

# Parse coverage data
if coverage_path.suffix == ".info" or "lcov" in coverage_path.name:
    coverage_data = parse_lcov(coverage_path)
else:
    coverage_data = parse_coverage_json(coverage_path)

if not coverage_data:
    print("Failed to parse coverage data.", file=sys.stderr)
    sys.exit(1)

gaps = identify_gaps(coverage_data, args.threshold)
stats = calculate_coverage_stats(coverage_data)

print(f"Found {len(gaps)} coverage gaps.", file=sys.stderr)

if args.json:
    output = json.dumps({
        "stats": stats,
        "gaps": [
            {
                "file": g.file,
                "line_start": g.line_start,
                "line_end": g.line_end,
                "type": g.gap_type,
                "complexity": g.complexity,
                "suggestion": g.suggested_test
            }
            for g in gaps
        ]
    }, indent=2)
else:
    output = generate_report(gaps, stats, args.threshold)

if args.output:
    Path(args.output).write_text(output)
    print(f"Report written to: {args.output}", file=sys.stderr)
else:
    print(output)

if name == "main": main()