scripts-coverage-gap-detector
#!/usr/bin/env python3 """ Coverage Gap Detector - K.5.7
Identifies untested code paths, generates focused test suggestions, and tracks coverage improvements over time.
Usage: python3 scripts/coverage-gap-detector.py [--coverage-file coverage.json] python3 scripts/coverage-gap-detector.py --threshold 80
Track: K (Workflow Automation) Agent: coverage-gap-detector Command: /coverage-gaps """
import argparse import json import re import subprocess import sys from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import Any
@dataclass class CoverageGap: """An identified coverage gap.""" file: str line_start: int line_end: int function_name: str gap_type: str # function, branch, line complexity: str # low, medium, high suggested_test: str
def run_command(cmd: list[str]) -> tuple[int, str, str]: """Run a shell command.""" try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) return result.returncode, result.stdout, result.stderr except Exception as e: return 1, "", str(e)
def parse_coverage_json(coverage_path: Path) -> dict[str, Any]: """Parse coverage.json (Istanbul/nyc format) or coverage.py JSON.""" try: with open(coverage_path) as f: return json.load(f) except Exception as e: print(f"Error parsing coverage file: {e}", file=sys.stderr) return {}
def parse_lcov(lcov_path: Path) -> dict[str, dict[str, Any]]: """Parse LCOV format coverage file.""" coverage = {} current_file = None current_data: dict[str, Any] = {"lines": {}, "functions": {}, "branches": {}}
try:
with open(lcov_path) as f:
for line in f:
line = line.strip()
if line.startswith("SF:"):
current_file = line[3:]
current_data = {"lines": {}, "functions": {}, "branches": {}}
elif line.startswith("FN:"):
parts = line[3:].split(",")
if len(parts) >= 2:
current_data["functions"][parts[1]] = {"line": int(parts[0]), "hits": 0}
elif line.startswith("FNDA:"):
parts = line[5:].split(",")
if len(parts) >= 2 and parts[1] in current_data["functions"]:
current_data["functions"][parts[1]]["hits"] = int(parts[0])
elif line.startswith("DA:"):
parts = line[3:].split(",")
if len(parts) >= 2:
current_data["lines"][int(parts[0])] = int(parts[1])
elif line.startswith("BRDA:"):
parts = line[5:].split(",")
if len(parts) >= 4:
key = f"{parts[0]}:{parts[1]}:{parts[2]}"
current_data["branches"][key] = int(parts[3]) if parts[3] != "-" else 0
elif line == "end_of_record" and current_file:
coverage[current_file] = current_data
current_file = None
except Exception as e:
print(f"Error parsing LCOV file: {e}", file=sys.stderr)
return coverage
def find_coverage_file() -> Path | None: """Find coverage file in common locations.""" common_paths = [ "coverage/coverage-final.json", "coverage.json", ".coverage.json", "coverage/lcov.info", "lcov.info", ".coverage", "htmlcov/coverage.json", ]
for path in common_paths:
p = Path(path)
if p.exists():
return p
return None
def identify_gaps(coverage_data: dict[str, Any], threshold: float = 80) -> list[CoverageGap]: """Identify coverage gaps from coverage data.""" gaps = []
# Handle different coverage formats
if "coverage" in coverage_data:
# coverage.py format
file_coverage = coverage_data.get("files", {})
else:
# Istanbul/nyc format
file_coverage = coverage_data
for file_path, data in file_coverage.items():
if not isinstance(data, dict):
continue
# Skip test files and node_modules
if "test" in file_path.lower() or "node_modules" in file_path or "__pycache__" in file_path:
continue
# Get line coverage
if "lines" in data:
lines = data["lines"]
elif "l" in data:
lines = data["l"]
elif "s" in data: # Statement coverage
lines = data["s"]
else:
continue
# Find uncovered line ranges
uncovered_lines = []
if isinstance(lines, dict):
uncovered_lines = [int(k) for k, v in lines.items() if v == 0]
elif isinstance(lines, list):
uncovered_lines = [i + 1 for i, v in enumerate(lines) if v == 0]
if not uncovered_lines:
continue
# Group consecutive lines
ranges = []
start = uncovered_lines[0]
end = uncovered_lines[0]
for line in uncovered_lines[1:]:
if line == end + 1:
end = line
else:
ranges.append((start, end))
start = line
end = line
ranges.append((start, end))
# Create gaps for significant ranges
for start, end in ranges:
if end - start >= 2: # At least 3 uncovered lines
complexity = "high" if end - start > 20 else "medium" if end - start > 10 else "low"
gaps.append(CoverageGap(
file=file_path,
line_start=start,
line_end=end,
function_name="", # Would need AST parsing to determine
gap_type="line",
complexity=complexity,
suggested_test=f"Add tests covering lines {start}-{end}"
))
# Check function coverage
functions = data.get("functions", data.get("f", {}))
if isinstance(functions, dict):
for func_name, hits in functions.items():
hit_count = hits if isinstance(hits, int) else hits.get("hits", 0)
if hit_count == 0:
gaps.append(CoverageGap(
file=file_path,
line_start=0,
line_end=0,
function_name=str(func_name),
gap_type="function",
complexity="medium",
suggested_test=f"Add test for function '{func_name}'"
))
# Check branch coverage
branches = data.get("branches", data.get("b", {}))
uncovered_branches = 0
if isinstance(branches, dict):
for branch_id, hits in branches.items():
if isinstance(hits, list):
uncovered_branches += sum(1 for h in hits if h == 0)
elif hits == 0:
uncovered_branches += 1
if uncovered_branches > 3:
gaps.append(CoverageGap(
file=file_path,
line_start=0,
line_end=0,
function_name="",
gap_type="branch",
complexity="high" if uncovered_branches > 10 else "medium",
suggested_test=f"Add tests for {uncovered_branches} uncovered branches"
))
return sorted(gaps, key=lambda g: (g.complexity != "high", g.complexity != "medium", g.file))
def calculate_coverage_stats(coverage_data: dict[str, Any]) -> dict[str, float]: """Calculate overall coverage statistics.""" total_lines = 0 covered_lines = 0 total_functions = 0 covered_functions = 0 total_branches = 0 covered_branches = 0
for file_path, data in coverage_data.items():
if not isinstance(data, dict):
continue
# Lines
lines = data.get("lines", data.get("l", data.get("s", {})))
if isinstance(lines, dict):
total_lines += len(lines)
covered_lines += sum(1 for v in lines.values() if v > 0)
# Functions
functions = data.get("functions", data.get("f", {}))
if isinstance(functions, dict):
total_functions += len(functions)
for hits in functions.values():
if isinstance(hits, int) and hits > 0:
covered_functions += 1
elif isinstance(hits, dict) and hits.get("hits", 0) > 0:
covered_functions += 1
# Branches
branches = data.get("branches", data.get("b", {}))
if isinstance(branches, dict):
for hits in branches.values():
if isinstance(hits, list):
total_branches += len(hits)
covered_branches += sum(1 for h in hits if h > 0)
else:
total_branches += 1
if hits > 0:
covered_branches += 1
return {
"line_coverage": (covered_lines / total_lines * 100) if total_lines > 0 else 0,
"function_coverage": (covered_functions / total_functions * 100) if total_functions > 0 else 0,
"branch_coverage": (covered_branches / total_branches * 100) if total_branches > 0 else 0,
"total_lines": total_lines,
"covered_lines": covered_lines,
}
def generate_report(gaps: list[CoverageGap], stats: dict[str, float], threshold: float) -> str: """Generate the coverage gap report.""" lines = [ "# Coverage Gap Analysis Report", "", f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", f"Threshold: {threshold}%", "", "---", "", "## Coverage Summary", "", f"| Metric | Coverage | Status |", f"|--------|----------|--------|", ]
for metric, value in [
("Line Coverage", stats.get("line_coverage", 0)),
("Function Coverage", stats.get("function_coverage", 0)),
("Branch Coverage", stats.get("branch_coverage", 0)),
]:
status = "" if value >= threshold else "" if value >= threshold * 0.8 else ""
lines.append(f"| {metric} | {value:.1f}% | {status} |")
lines.extend([
"",
f"**Total Lines:** {stats.get('total_lines', 0):,}",
f"**Covered Lines:** {stats.get('covered_lines', 0):,}",
"",
])
if not gaps:
lines.extend([
"## No Significant Gaps Found",
"",
f"Coverage meets the {threshold}% threshold across all files.",
])
else:
# Group gaps by file
by_file: dict[str, list[CoverageGap]] = {}
for gap in gaps:
if gap.file not in by_file:
by_file[gap.file] = []
by_file[gap.file].append(gap)
lines.extend([
f"## Coverage Gaps ({len(gaps)} total)",
"",
])
# Summary table
lines.extend([
"### Priority Gaps",
"",
"| File | Type | Lines | Complexity | Action |",
"|------|------|-------|------------|--------|",
])
for gap in gaps[:15]:
file_short = gap.file.split("/")[-1] if "/" in gap.file else gap.file
line_range = f"{gap.line_start}-{gap.line_end}" if gap.line_start else "-"
lines.append(
f"| `{file_short}` | {gap.gap_type} | {line_range} | "
f"{gap.complexity} | {gap.suggested_test[:40]}... |"
)
lines.append("")
# Detailed by file
lines.extend([
"### Gaps by File",
"",
])
for file_path, file_gaps in sorted(by_file.items())[:10]:
file_short = file_path.split("/")[-2:] if "/" in file_path else [file_path]
lines.extend([
f"#### `{'/'.join(file_short)}`",
"",
])
for gap in file_gaps[:5]:
emoji = {"high": "", "medium": "", "low": ""}[gap.complexity]
if gap.gap_type == "function":
lines.append(f"- {emoji} Function `{gap.function_name}` not tested")
elif gap.gap_type == "branch":
lines.append(f"- {emoji} {gap.suggested_test}")
else:
lines.append(f"- {emoji} Lines {gap.line_start}-{gap.line_end} uncovered")
if len(file_gaps) > 5:
lines.append(f"- ...and {len(file_gaps) - 5} more gaps")
lines.append("")
# Recommendations
lines.extend([
"## Recommendations",
"",
"### High Priority (Complexity: High)",
"",
])
high_gaps = [g for g in gaps if g.complexity == "high"]
for gap in high_gaps[:5]:
lines.append(f"1. **{gap.file}**: {gap.suggested_test}")
lines.extend([
"",
"### Quick Wins (Complexity: Low)",
"",
])
low_gaps = [g for g in gaps if g.complexity == "low"]
for gap in low_gaps[:5]:
lines.append(f"- {gap.file}: {gap.suggested_test}")
lines.append("")
lines.extend([
"---",
"*Generated by CODITECT Coverage Gap Detector*",
])
return "\n".join(lines)
def main(): parser = argparse.ArgumentParser( description="Identify coverage gaps and suggest tests" ) parser.add_argument( "--coverage-file", "-c", type=str, default=None, help="Path to coverage file (JSON or LCOV)" ) parser.add_argument( "--threshold", "-t", type=float, default=80, help="Coverage threshold percentage (default: 80)" ) parser.add_argument( "--output", "-o", type=str, default=None, help="Output file path (default: stdout)" ) parser.add_argument( "--json", action="store_true", help="Output as JSON instead of Markdown" )
args = parser.parse_args()
# Find coverage file
if args.coverage_file:
coverage_path = Path(args.coverage_file)
else:
coverage_path = find_coverage_file()
if not coverage_path or not coverage_path.exists():
print("No coverage file found. Run your test suite with coverage first.", file=sys.stderr)
print("Common commands:", file=sys.stderr)
print(" pytest --cov --cov-report=json", file=sys.stderr)
print(" npm test -- --coverage", file=sys.stderr)
sys.exit(1)
print(f"Analyzing coverage from: {coverage_path}", file=sys.stderr)
# Parse coverage data
if coverage_path.suffix == ".info" or "lcov" in coverage_path.name:
coverage_data = parse_lcov(coverage_path)
else:
coverage_data = parse_coverage_json(coverage_path)
if not coverage_data:
print("Failed to parse coverage data.", file=sys.stderr)
sys.exit(1)
gaps = identify_gaps(coverage_data, args.threshold)
stats = calculate_coverage_stats(coverage_data)
print(f"Found {len(gaps)} coverage gaps.", file=sys.stderr)
if args.json:
output = json.dumps({
"stats": stats,
"gaps": [
{
"file": g.file,
"line_start": g.line_start,
"line_end": g.line_end,
"type": g.gap_type,
"complexity": g.complexity,
"suggestion": g.suggested_test
}
for g in gaps
]
}, indent=2)
else:
output = generate_report(gaps, stats, args.threshold)
if args.output:
Path(args.output).write_text(output)
print(f"Report written to: {args.output}", file=sys.stderr)
else:
print(output)
if name == "main": main()