#!/usr/bin/env python3 """ Commit Bug Scanner - K.5.4
Proactively scans commits for potential bugs, anti-patterns, and code smells. Analyzes recent commits and flags high-risk changes.
Usage: python3 scripts/commit-bug-scanner.py [--commits 10] [--branch main] python3 scripts/commit-bug-scanner.py --since 2024-01-01
Track: K (Workflow Automation) Agent: commit-bug-scanner Command: /bug-scan """
import argparse import json import re import subprocess import sys from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import Any
@dataclass class BugPattern: """A pattern that might indicate a bug.""" name: str pattern: str severity: str # high, medium, low description: str languages: list[str]
Common bug patterns to scan for
BUG_PATTERNS = [ # Security issues BugPattern("hardcoded_secret", r"(?:password|secret|api_key|token)\s*=\s*['"][^'"]+['"]", "high", "Possible hardcoded secret or credential", ["python", "javascript", "typescript"]), BugPattern("sql_injection", r"execute([^)]%s|format([^)])|f['"].SELECT.{", "high", "Potential SQL injection vulnerability", ["python"]), BugPattern("eval_usage", r"\beval\s*(", "high", "Dangerous eval() usage", ["python", "javascript"]),
# Error handling
BugPattern("bare_except", r"except\s*:", "medium", "Bare except catches all exceptions", ["python"]),
BugPattern("empty_catch", r"catch\s*\([^)]*\)\s*{\s*}", "medium", "Empty catch block swallows errors", ["javascript", "typescript"]),
BugPattern("ignored_error", r"catch\s*\([^)]*\)\s*{\s*//", "medium", "Caught error might be ignored", ["javascript", "typescript"]),
# Logic issues
BugPattern("todo_fixme", r"(?:TODO|FIXME|XXX|HACK|BUG):", "low", "Unresolved TODO/FIXME comment", ["*"]),
BugPattern("debug_print", r"(?:console\.log|print\(|debugger)", "low", "Debug statement left in code", ["*"]),
BugPattern("commented_code", r"^\s*#.*(?:def |class |import |from )", "low", "Commented out code", ["python"]),
# Performance issues
BugPattern("n_plus_one", r"for\s+.*in\s+.*:\s*\n\s*.*\.get\(|\.filter\(|\.query\(",
"medium", "Potential N+1 query pattern", ["python"]),
BugPattern("sync_in_async", r"async\s+def.*\n.*(?:time\.sleep|requests\.)",
"medium", "Synchronous call in async function", ["python"]),
# Type issues
BugPattern("none_comparison", r"==\s*None|!=\s*None", "low", "Use 'is None' instead of '== None'", ["python"]),
BugPattern("type_coercion", r"==\s*['\"]|['\"]\\s*==", "low", "Loose equality with string", ["javascript"]),
]
def run_command(cmd: list[str]) -> tuple[int, str, str]: """Run a shell command and return (returncode, stdout, stderr).""" try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) return result.returncode, result.stdout, result.stderr except Exception as e: return 1, "", str(e)
def get_commits(count: int = 10, since: str | None = None, branch: str = "HEAD") -> list[dict[str, Any]]: """Fetch recent commits.""" cmd = ["git", "log", branch, f"-{count}", "--pretty=format:%H|%an|%s|%ai"] if since: cmd.append(f"--since={since}")
code, stdout, stderr = run_command(cmd)
if code != 0:
return []
commits = []
for line in stdout.strip().split("\n"):
if not line:
continue
parts = line.split("|", 3)
if len(parts) == 4:
commits.append({
"sha": parts[0],
"author": parts[1],
"message": parts[2],
"date": parts[3]
})
return commits
def get_commit_diff(sha: str) -> str: """Get the diff for a commit.""" code, stdout, stderr = run_command(["git", "show", sha, "--pretty=format:", "--no-color"]) return stdout if code == 0 else ""
def get_file_extension(diff_line: str) -> str: """Extract file extension from diff header.""" match = re.search(r"^+++ [ab]/(.+)$", diff_line, re.MULTILINE) if match: return Path(match.group(1)).suffix.lstrip(".") return ""
def scan_diff_for_bugs(diff: str, patterns: list[BugPattern]) -> list[dict[str, Any]]: """Scan a diff for bug patterns.""" findings = []
# Only scan added lines
added_lines = []
current_file = ""
line_num = 0
for line in diff.split("\n"):
if line.startswith("+++ "):
current_file = line[6:] # Remove '+++ b/'
line_num = 0
elif line.startswith("@@"):
# Parse line number from hunk header
match = re.search(r"\+(\d+)", line)
if match:
line_num = int(match.group(1))
elif line.startswith("+") and not line.startswith("+++"):
added_lines.append((current_file, line_num, line[1:]))
line_num += 1
elif not line.startswith("-"):
line_num += 1
# Scan added lines for patterns
for file_path, line_num, content in added_lines:
ext = Path(file_path).suffix.lstrip(".") if file_path else ""
for pattern in patterns:
# Check if pattern applies to this language
if "*" not in pattern.languages and ext not in pattern.languages:
continue
if re.search(pattern.pattern, content, re.IGNORECASE):
findings.append({
"pattern": pattern.name,
"severity": pattern.severity,
"description": pattern.description,
"file": file_path,
"line": line_num,
"content": content[:100].strip()
})
return findings
def calculate_risk_score(findings: list[dict[str, Any]]) -> int: """Calculate overall risk score for a commit.""" severity_scores = {"high": 10, "medium": 5, "low": 1} return sum(severity_scores.get(f["severity"], 0) for f in findings)
def generate_report(commits_analysis: list[dict[str, Any]]) -> str: """Generate the bug scan report.""" total_commits = len(commits_analysis) total_findings = sum(len(c["findings"]) for c in commits_analysis) high_risk = sum(1 for c in commits_analysis if c["risk_score"] >= 10)
lines = [
"# Commit Bug Scan Report",
"",
f"**Commits Scanned:** {total_commits}",
f"**Total Findings:** {total_findings}",
f"**High Risk Commits:** {high_risk}",
f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
"",
"---",
"",
]
if not any(c["findings"] for c in commits_analysis):
lines.append("No potential bugs detected in scanned commits.")
lines.append("")
else:
# Sort by risk score
for commit in sorted(commits_analysis, key=lambda x: -x["risk_score"]):
if not commit["findings"]:
continue
risk_label = "HIGH" if commit["risk_score"] >= 10 else "MEDIUM" if commit["risk_score"] >= 5 else "LOW"
lines.append(f"## [{risk_label}] {commit['sha'][:8]}: {commit['message'][:60]}")
lines.append("")
lines.append(f"**Author:** {commit['author']} | **Risk Score:** {commit['risk_score']}")
lines.append("")
# Group findings by severity
by_severity = {"high": [], "medium": [], "low": []}
for finding in commit["findings"]:
by_severity[finding["severity"]].append(finding)
for severity in ["high", "medium", "low"]:
if by_severity[severity]:
emoji = {"high": "", "medium": "", "low": ""}[severity]
lines.append(f"### {emoji} {severity.title()} Severity")
lines.append("")
for f in by_severity[severity]:
lines.append(f"- **{f['pattern']}** in `{f['file']}:{f['line']}`")
lines.append(f" - {f['description']}")
lines.append(f" - `{f['content'][:80]}...`" if len(f['content']) > 80 else f" - `{f['content']}`")
lines.append("")
# Summary by pattern
pattern_counts: dict[str, int] = {}
for commit in commits_analysis:
for finding in commit["findings"]:
pattern_counts[finding["pattern"]] = pattern_counts.get(finding["pattern"], 0) + 1
if pattern_counts:
lines.extend([
"## Pattern Summary",
"",
"| Pattern | Count | Severity |",
"|---------|-------|----------|",
])
for pattern in BUG_PATTERNS:
if pattern.name in pattern_counts:
lines.append(f"| {pattern.name} | {pattern_counts[pattern.name]} | {pattern.severity} |")
lines.append("")
lines.extend([
"---",
"*Generated by CODITECT Commit Bug Scanner*",
])
return "\n".join(lines)
def main(): parser = argparse.ArgumentParser( description="Scan commits for potential bugs and anti-patterns" ) parser.add_argument( "--commits", "-n", type=int, default=10, help="Number of commits to scan (default: 10)" ) parser.add_argument( "--since", "-s", type=str, default=None, help="Scan commits since date (YYYY-MM-DD)" ) parser.add_argument( "--branch", "-b", type=str, default="HEAD", help="Branch to scan (default: HEAD)" ) parser.add_argument( "--output", "-o", type=str, default=None, help="Output file path (default: stdout)" ) parser.add_argument( "--json", action="store_true", help="Output as JSON instead of Markdown" ) parser.add_argument( "--min-severity", choices=["low", "medium", "high"], default="low", help="Minimum severity to report (default: low)" )
args = parser.parse_args()
severity_levels = {"low": 0, "medium": 1, "high": 2}
min_level = severity_levels[args.min_severity]
filtered_patterns = [p for p in BUG_PATTERNS if severity_levels[p.severity] >= min_level]
print(f"Scanning {args.commits} commits...", file=sys.stderr)
commits = get_commits(args.commits, args.since, args.branch)
if not commits:
print("No commits found to scan.", file=sys.stderr)
sys.exit(0)
commits_analysis = []
for commit in commits:
diff = get_commit_diff(commit["sha"])
findings = scan_diff_for_bugs(diff, filtered_patterns)
commits_analysis.append({
**commit,
"findings": findings,
"risk_score": calculate_risk_score(findings)
})
if args.json:
output = json.dumps(commits_analysis, indent=2, default=str)
else:
output = generate_report(commits_analysis)
if args.output:
Path(args.output).write_text(output)
print(f"Report written to: {args.output}", file=sys.stderr)
else:
print(output)
if name == "main": main()