Skip to main content

scripts-fix-md040-code-fence-language

#!/usr/bin/env python3 """

title: "Fix Md040 Code Fence Language" component_type: script version: "1.0.0" audience: contributor status: stable summary: "MD040 Code Fence Language Auto-Fixer (Non-Breaking)" keywords: ['code', 'docker', 'fence', 'fix', 'git'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "fix-md040-code-fence-language.py" language: python executable: true usage: "python3 scripts/fix-md040-code-fence-language.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

MD040 Code Fence Language Auto-Fixer (Non-Breaking)

Detects code blocks without language specifiers and suggests appropriate languages based on content heuristics.

Safety Features:

  • Dry-run mode (preview changes before applying)
  • Automatic backups before modifications
  • Git working tree validation
  • Rollback capability
  • Before/after validation reports

Usage: # DRY RUN: Preview changes (recommended first step) python3 scripts/fix-md040-code-fence-language.py --detect --dry-run

# Detect and generate suggestions
python3 scripts/fix-md040-code-fence-language.py --detect

# Auto-apply high-confidence suggestions with backup
python3 scripts/fix-md040-code-fence-language.py --auto-apply --min-confidence 90 --backup

# Rollback changes
python3 scripts/fix-md040-code-fence-language.py --rollback backup-TIMESTAMP

"""

import re import json import argparse import shutil import subprocess from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple, Optional from dataclasses import dataclass, asdict

@dataclass class CodeBlock: """Represents a code block without language specifier.""" file_path: str line_number: int content: str suggested_language: str confidence: int # 0-100 reasoning: str

class LanguageDetector: """Intelligent language detection based on code content."""

def __init__(self):
self.patterns = {
'bash': [
(r'^#!/bin/(ba)?sh', 95),
(r'^\s*(export|source|alias)\s+', 85),
(r'\$\{?[A-Z_]+\}?', 70),
(r'(echo|cd|ls|mkdir|rm|mv|cp)\s+', 65),
],
'python': [
(r'^#!/usr/bin/(env\s+)?python', 95),
(r'^\s*(import|from)\s+\w+', 90),
(r'^\s*def\s+\w+\(', 85),
(r'^\s*class\s+\w+[:(]', 85),
(r'(print|range|len|str|int|list|dict)\(', 75),
],
'javascript': [
(r'^\s*(const|let|var)\s+\w+\s*=', 85),
(r'^\s*function\s+\w+\(', 85),
(r'^\s*(async|await)\s+', 80),
(r'(console\.(log|error|warn)|require\(|module\.exports)', 80),
(r'=>\s*{', 75),
],
'typescript': [
(r'^\s*interface\s+\w+\s*{', 90),
(r'^\s*type\s+\w+\s*=', 85),
(r':\s*(string|number|boolean|any|void)', 80),
(r'(React\.|useState|useEffect)', 75),
],
'json': [
(r'^\s*\{[\s\n]*"[\w-]+":', 95),
(r'^\s*\[[\s\n]*\{', 90),
(r'"[\w-]+"\s*:\s*(".*?"|[\d.]+|true|false|null)', 85),
],
'yaml': [
(r'^\s*[\w-]+:\s*([\w-]+|"[^"]*")?\s*$', 85),
(r'^\s*-\s+[\w-]+:', 80),
(r'^\s*---\s*$', 90),
],
'markdown': [
(r'^#+\s+', 85),
(r'\[.*?\]\(.*?\)', 75),
(r'^>\s+', 70),
],
'sql': [
(r'^\s*(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP)\s+', 90),
(r'^\s*(FROM|WHERE|JOIN|GROUP BY|ORDER BY)\s+', 85),
],
'html': [
(r'^\s*<(!DOCTYPE|html|head|body|div|span|p|a)', 90),
(r'<\w+(\s+[\w-]+="[^"]*")*>', 80),
],
'css': [
(r'^\s*[\w.-]+\s*\{', 85),
(r'^\s*[\w-]+\s*:\s*[^;]+;', 80),
],
'rust': [
(r'^\s*fn\s+\w+\(', 90),
(r'^\s*(pub\s+)?(struct|enum|impl)\s+', 85),
(r'(let|mut|&str|Vec<)', 75),
],
'go': [
(r'^\s*func\s+(\w+\s*)?\(', 90),
(r'^\s*package\s+\w+', 95),
(r'(import|var|const|type|interface)\s+', 80),
],
'java': [
(r'^\s*(public|private|protected)\s+(class|interface|enum)', 90),
(r'^\s*(public|private|protected)\s+\w+\s+\w+\(', 85),
(r'(System\.out\.|import\s+java\.)', 80),
],
'c': [
(r'^\s*#include\s+<[\w./]+>', 90),
(r'^\s*(int|void|char|float|double)\s+\w+\(', 85),
],
'cpp': [
(r'^\s*#include\s+<[\w./]+>', 85),
(r'(std::|using namespace|template<)', 90),
],
'dockerfile': [
(r'^\s*FROM\s+[\w/:.-]+', 95),
(r'^\s*(RUN|CMD|ENTRYPOINT|COPY|ADD|WORKDIR|ENV)\s+', 90),
],
'makefile': [
(r'^\w+:\s*$', 85),
(r'^\t', 75),
(r'\$\([A-Z_]+\)', 70),
],
'xml': [
(r'^\s*<\?xml', 95),
(r'<\w+(\s+[\w:]+="[^"]*")*>', 80),
],
'toml': [
(r'^\s*\[[\w.-]+\]', 90),
(r'^\s*[\w-]+\s*=\s*', 80),
],
'ini': [
(r'^\s*\[[\w\s]+\]', 85),
(r'^\s*[\w]+\s*=\s*', 75),
],
}

def detect(self, code: str) -> Tuple[str, int, str]:
"""
Detect language from code content.

Returns:
(language, confidence, reasoning)
"""
scores = {}
reasons = {}

for lang, patterns in self.patterns.items():
max_score = 0
matching_patterns = []

for pattern, confidence in patterns:
if re.search(pattern, code, re.MULTILINE | re.IGNORECASE):
max_score = max(max_score, confidence)
matching_patterns.append(f"{pattern[:30]}...")

if max_score > 0:
scores[lang] = max_score
reasons[lang] = f"Matched patterns: {', '.join(matching_patterns[:2])}"

# No matches - use 'text' as safe default
if not scores:
return 'text', 50, 'No specific patterns detected, using generic text'

# Get highest scoring language
best_lang = max(scores.items(), key=lambda x: x[1])

return best_lang[0], best_lang[1], reasons[best_lang[0]]

class SafetyManager: """Manages safety features: backups, git checks, rollbacks."""

@staticmethod
def check_git_clean(repo_root: Path) -> bool:
"""Check if git working tree is clean."""
try:
result = subprocess.run(
['git', 'status', '--porcelain'],
cwd=repo_root,
capture_output=True,
text=True
)
return result.returncode == 0 and len(result.stdout.strip()) == 0
except Exception:
return False

@staticmethod
def create_backup(files: List[Path], backup_dir: Path) -> bool:
"""Create backup of files before modification."""
try:
backup_dir.mkdir(parents=True, exist_ok=True)

for file_path in files:
# Preserve directory structure in backup
rel_path = file_path.relative_to(file_path.parents[len(list(file_path.parents)) - 1])
backup_file = backup_dir / rel_path
backup_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(file_path, backup_file)

# Save manifest
manifest = {
'timestamp': datetime.now().isoformat(),
'files': [str(f) for f in files]
}
(backup_dir / 'manifest.json').write_text(json.dumps(manifest, indent=2))

return True
except Exception as e:
print(f"Backup failed: {e}")
return False

@staticmethod
def rollback(backup_dir: Path) -> bool:
"""Restore files from backup."""
try:
manifest_file = backup_dir / 'manifest.json'
if not manifest_file.exists():
print(f"Error: No manifest found in {backup_dir}")
return False

manifest = json.loads(manifest_file.read_text())

for file_str in manifest['files']:
source = Path(file_str)
rel_path = source.relative_to(source.parents[len(list(source.parents)) - 1])
backup_file = backup_dir / rel_path

if backup_file.exists():
shutil.copy2(backup_file, source)
print(f" ✓ Restored {source}")

return True
except Exception as e:
print(f"Rollback failed: {e}")
return False

class MD040Fixer: """Fixes MD040 violations by adding language specifiers to code blocks."""

def __init__(self, repo_root: Path, dry_run: bool = False):
self.repo_root = repo_root
self.dry_run = dry_run
self.detector = LanguageDetector()
self.code_block_pattern = re.compile(r'^```\s*$', re.MULTILINE)

def find_violations(self, file_path: Path) -> List[CodeBlock]:
"""Find all code blocks without language specifiers in a file."""
violations = []

try:
content = file_path.read_text(encoding='utf-8')
except Exception as e:
print(f"Error reading {file_path}: {e}")
return violations

lines = content.split('\n')
i = 0

while i < len(lines):
# Check for code fence without language
if lines[i].strip() == '```':
# Found code block without language
start_line = i + 1 # Line number (1-indexed)
code_lines = []
i += 1

# Collect code block content
while i < len(lines) and lines[i].strip() != '```':
code_lines.append(lines[i])
i += 1

code_content = '\n'.join(code_lines)

# Detect language
lang, confidence, reasoning = self.detector.detect(code_content)

violations.append(CodeBlock(
file_path=str(file_path.relative_to(self.repo_root)),
line_number=start_line,
content=code_content[:200], # First 200 chars
suggested_language=lang,
confidence=confidence,
reasoning=reasoning
))

i += 1

return violations

def scan_repository(self, pattern: str = "**/*.md") -> List[CodeBlock]:
"""Scan repository for MD040 violations."""
all_violations = []

for md_file in self.repo_root.glob(pattern):
if md_file.is_file():
violations = self.find_violations(md_file)
all_violations.extend(violations)

return all_violations

def apply_fix(self, file_path: Path, line_number: int, language: str) -> bool:
"""Apply fix to a specific code block."""
if self.dry_run:
# Dry run: just validate the fix would work
try:
content = file_path.read_text(encoding='utf-8')
lines = content.split('\n')
idx = line_number - 2

if idx >= 0 and idx < len(lines) and lines[idx].strip() == '```':
print(f" [DRY RUN] Would fix {file_path}:{line_number} → {language}")
return True
except Exception as e:
print(f" [DRY RUN] Cannot fix {file_path}:{line_number}: {e}")

return False

# Actual fix
try:
content = file_path.read_text(encoding='utf-8')
lines = content.split('\n')

# Find the code fence at line_number - 1 (0-indexed)
idx = line_number - 2

if idx >= 0 and idx < len(lines) and lines[idx].strip() == '```':
lines[idx] = f'```{language}'

# Write back
file_path.write_text('\n'.join(lines), encoding='utf-8')
return True
except Exception as e:
print(f"Error applying fix to {file_path}:{line_number}: {e}")

return False

def main(): parser = argparse.ArgumentParser( description='Fix MD040 code fence language violations (Non-Breaking)', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples:

Dry run (safe preview)

python3 %(prog)s --detect --dry-run

Detect and generate suggestions

python3 %(prog)s --detect

Auto-apply high-confidence with backup

python3 %(prog)s --auto-apply --min-confidence 90 --backup

Rollback changes

python3 %(prog)s --rollback backups/md040-backup-20251206-143022 ''' )

parser.add_argument('--detect', action='store_true',
help='Detect violations and generate suggestions')
parser.add_argument('--apply', type=str, metavar='FILE',
help='Apply fixes from suggestions file')
parser.add_argument('--auto-apply', action='store_true',
help='Automatically apply high-confidence suggestions')
parser.add_argument('--rollback', type=str, metavar='BACKUP_DIR',
help='Rollback changes from backup directory')
parser.add_argument('--dry-run', action='store_true',
help='Preview changes without modifying files')
parser.add_argument('--backup', action='store_true',
help='Create backup before applying fixes')
parser.add_argument('--require-clean-git', action='store_true',
help='Require clean git working tree before applying')
parser.add_argument('--min-confidence', type=int, default=80,
help='Minimum confidence for auto-apply (default: 80)')
parser.add_argument('--output', type=str, default='md040-suggestions.json',
help='Output file for suggestions (default: md040-suggestions.json)')
parser.add_argument('--repo-root', type=str, default='.',
help='Repository root directory (default: current directory)')

args = parser.parse_args()
repo_root = Path(args.repo_root).resolve()

# Handle rollback
if args.rollback:
backup_dir = Path(args.rollback)
print(f"Rolling back changes from {backup_dir}...")

if SafetyManager.rollback(backup_dir):
print("\n✅ Rollback completed successfully")
else:
print("\n❌ Rollback failed")

return

# Safety check: git working tree
if args.require_clean_git and not SafetyManager.check_git_clean(repo_root):
print("❌ Error: Git working tree is not clean")
print(" Commit or stash changes first, or remove --require-clean-git flag")
return

fixer = MD040Fixer(repo_root, dry_run=args.dry_run)

if args.detect or args.auto_apply:
print("Scanning repository for MD040 violations...")
violations = fixer.scan_repository()

print(f"\nFound {len(violations)} code blocks without language specifiers")

# Group by confidence
high_confidence = [v for v in violations if v.confidence >= 90]
medium_confidence = [v for v in violations if 70 <= v.confidence < 90]
low_confidence = [v for v in violations if v.confidence < 70]

print(f" High confidence (≥90%): {len(high_confidence)}")
print(f" Medium confidence (70-89%): {len(medium_confidence)}")
print(f" Low confidence (<70%): {len(low_confidence)}")

# Save suggestions
output_file = repo_root / args.output
with output_file.open('w', encoding='utf-8') as f:
json.dump([asdict(v) for v in violations], f, indent=2)

print(f"\nSuggestions saved to: {output_file}")

if args.auto_apply:
auto_apply_violations = [v for v in violations if v.confidence >= args.min_confidence]

if args.dry_run:
print(f"\n[DRY RUN] Would apply {len(auto_apply_violations)} fixes with confidence ≥{args.min_confidence}%...")
else:
# Create backup if requested
if args.backup:
affected_files = list(set([repo_root / v.file_path for v in auto_apply_violations]))
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
backup_dir = repo_root / 'backups' / f'md040-backup-{timestamp}'

print(f"\nCreating backup of {len(affected_files)} files...")
if not SafetyManager.create_backup(affected_files, backup_dir):
print("❌ Backup failed - aborting")
return

print(f"✅ Backup created: {backup_dir}")

print(f"\nApplying {len(auto_apply_violations)} fixes with confidence ≥{args.min_confidence}%...")

applied = 0
for violation in auto_apply_violations:
file_path = repo_root / violation.file_path
if fixer.apply_fix(file_path, violation.line_number, violation.suggested_language):
applied += 1

if args.dry_run:
print(f"\n[DRY RUN] Would apply {applied}/{len(auto_apply_violations)} fixes")
else:
print(f"\n✅ Applied {applied}/{len(auto_apply_violations)} fixes successfully")

if args.backup:
print(f"\n💡 To rollback: python3 {__file__} --rollback {backup_dir}")

elif args.apply:
suggestions_file = Path(args.apply)

if not suggestions_file.exists():
print(f"❌ Error: Suggestions file not found: {suggestions_file}")
return

with suggestions_file.open('r', encoding='utf-8') as f:
suggestions = json.load(f)

# Create backup if requested
if args.backup and not args.dry_run:
affected_files = list(set([repo_root / s['file_path'] for s in suggestions]))
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
backup_dir = repo_root / 'backups' / f'md040-backup-{timestamp}'

print(f"\nCreating backup of {len(affected_files)} files...")
if not SafetyManager.create_backup(affected_files, backup_dir):
print("❌ Backup failed - aborting")
return

print(f"✅ Backup created: {backup_dir}")

print(f"\nApplying {len(suggestions)} fixes from {suggestions_file}...")

applied = 0
for suggestion in suggestions:
file_path = repo_root / suggestion['file_path']
if fixer.apply_fix(file_path, suggestion['line_number'], suggestion['suggested_language']):
applied += 1

if args.dry_run:
print(f"\n[DRY RUN] Would apply {applied}/{len(suggestions)} fixes")
else:
print(f"\n✅ Applied {applied}/{len(suggestions)} fixes successfully")

else:
parser.print_help()

if name == 'main': main()