scripts-fix-md036-emphasis-to-heading
#!/usr/bin/env python3 """
title: "Fix Md036 Emphasis To Heading" component_type: script version: "1.0.0" audience: contributor status: stable summary: "MD036 Emphasis-to-Heading Converter (Non-Breaking, High-Confidence Only)" keywords: ['api', 'emphasis', 'fix', 'git', 'heading'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "fix-md036-emphasis-to-heading.py" language: python executable: true usage: "python3 scripts/fix-md036-emphasis-to-heading.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false
MD036 Emphasis-to-Heading Converter (Non-Breaking, High-Confidence Only)
Detects emphasis (bold/italic) used as headings and converts to proper markdown headings. Only suggests high-confidence conversions to avoid breaking valid emphasis.
Safety Features:
- Dry-run mode (preview changes before applying)
- Conservative detection (only obvious heading patterns)
- Automatic backups before modifications
- Manual review mode (generates suggestions for approval)
- Rollback capability
Usage: # DRY RUN: Preview high-confidence suggestions python3 scripts/fix-md036-emphasis-to-heading.py --detect --dry-run
# Detect and generate suggestions for manual review
python3 scripts/fix-md036-emphasis-to-heading.py --detect --min-confidence 80
# Auto-apply only very high-confidence suggestions (≥90%)
python3 scripts/fix-md036-emphasis-to-heading.py --auto-apply --min-confidence 90 --backup
# Rollback changes
python3 scripts/fix-md036-emphasis-to-heading.py --rollback backup-TIMESTAMP
"""
import re import json import argparse import shutil import subprocess from pathlib import Path from datetime import datetime from typing import List, Tuple from dataclasses import dataclass, asdict
@dataclass class EmphasisHeading: """Represents an emphasis-as-heading violation.""" file_path: str line_number: int emphasis_text: str suggested_heading_level: int confidence: int reasoning: str
class SafetyManager: """Manages safety features: backups, git checks, rollbacks."""
@staticmethod
def check_git_clean(repo_root: Path) -> bool:
"""Check if git working tree is clean."""
try:
result = subprocess.run(
['git', 'status', '--porcelain'],
cwd=repo_root,
capture_output=True,
text=True
)
return result.returncode == 0 and len(result.stdout.strip()) == 0
except Exception:
return False
@staticmethod
def create_backup(files: List[Path], backup_dir: Path) -> bool:
"""Create backup of files before modification."""
try:
backup_dir.mkdir(parents=True, exist_ok=True)
for file_path in files:
rel_path = file_path.relative_to(file_path.parents[len(list(file_path.parents)) - 1])
backup_file = backup_dir / rel_path
backup_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(file_path, backup_file)
manifest = {
'timestamp': datetime.now().isoformat(),
'files': [str(f) for f in files]
}
(backup_dir / 'manifest.json').write_text(json.dumps(manifest, indent=2))
return True
except Exception as e:
print(f"Backup failed: {e}")
return False
@staticmethod
def rollback(backup_dir: Path) -> bool:
"""Restore files from backup."""
try:
manifest_file = backup_dir / 'manifest.json'
if not manifest_file.exists():
print(f"Error: No manifest found in {backup_dir}")
return False
manifest = json.loads(manifest_file.read_text())
for file_str in manifest['files']:
source = Path(file_str)
rel_path = source.relative_to(source.parents[len(list(source.parents)) - 1])
backup_file = backup_dir / rel_path
if backup_file.exists():
shutil.copy2(backup_file, source)
print(f" ✓ Restored {source}")
return True
except Exception as e:
print(f"Rollback failed: {e}")
return False
class EmphasisDetector: """Detects emphasis used as headings with conservative confidence scoring."""
# Common heading patterns
HEADING_PATTERNS = [
'Overview', 'Introduction', 'Setup', 'Installation', 'Configuration',
'Usage', 'Examples', 'API', 'Reference', 'Documentation',
'Features', 'Requirements', 'Prerequisites', 'Getting Started',
'Quick Start', 'Tutorial', 'Guide', 'Troubleshooting',
'FAQ', 'Contributing', 'License', 'Credits', 'Acknowledgments',
'Summary', 'Conclusion', 'Next Steps', 'Resources',
'Phase', 'Step', 'Stage', 'Section', 'Chapter', 'Part'
]
def detect(self, text: str, next_line: str, prev_line: str) -> Tuple[int, int, str]:
"""
Detect if emphasized text should be a heading.
Returns:
(heading_level, confidence, reasoning)
"""
confidence = 0
reasons = []
# Check if followed by blank line + paragraph (strong indicator)
if next_line.strip() == '':
confidence += 40
reasons.append("Followed by blank line")
# Check if preceded by blank line
if prev_line.strip() == '':
confidence += 20
reasons.append("Preceded by blank line")
# Check if matches common heading pattern
for pattern in self.HEADING_PATTERNS:
if pattern.lower() in text.lower():
confidence += 30
reasons.append(f"Matches heading pattern: '{pattern}'")
break
# Check if short (< 60 chars) and standalone
if len(text) < 60:
confidence += 10
reasons.append("Short and concise")
# Determine heading level based on text patterns
heading_level = 2 # Default to h2
if any(word in text.lower() for word in ['phase', 'part', 'chapter']):
heading_level = 2
elif any(word in text.lower() for word in ['step', 'stage', 'section']):
heading_level = 3
elif text.endswith(':'):
heading_level = 4
reasoning = '; '.join(reasons) if reasons else "No strong indicators"
return heading_level, min(confidence, 100), reasoning
class MD036Fixer: """Fixes MD036 violations (emphasis used as heading)."""
def __init__(self, repo_root: Path, dry_run: bool = False):
self.repo_root = repo_root
self.dry_run = dry_run
self.detector = EmphasisDetector()
# Match bold or italic on its own line
self.emphasis_pattern = re.compile(r'^\s*(\*\*|__)(.*?)\1\s*$|^\s*(\*|_)(.*?)\3\s*$')
def find_violations(self, file_path: Path) -> List[EmphasisHeading]:
"""Find emphasis-as-heading violations in a file."""
violations = []
try:
content = file_path.read_text(encoding='utf-8')
lines = content.split('\n')
for i, line in enumerate(lines):
match = self.emphasis_pattern.match(line)
if match:
# Extract emphasized text
text = match.group(2) or match.group(4)
if not text or len(text.strip()) == 0:
continue
# Get context
next_line = lines[i + 1] if i + 1 < len(lines) else ''
prev_line = lines[i - 1] if i > 0 else ''
# Detect if this should be a heading
level, confidence, reasoning = self.detector.detect(text, next_line, prev_line)
if confidence > 0:
violations.append(EmphasisHeading(
file_path=str(file_path.relative_to(self.repo_root)),
line_number=i + 1,
emphasis_text=line.strip(),
suggested_heading_level=level,
confidence=confidence,
reasoning=reasoning
))
except Exception as e:
print(f"Error processing {file_path}: {e}")
return violations
def apply_fix(self, file_path: Path, line_number: int, heading_level: int) -> bool:
"""Apply fix to convert emphasis to heading."""
if self.dry_run:
print(f" [DRY RUN] Would fix {file_path}:{line_number} → h{heading_level}")
return True
try:
content = file_path.read_text(encoding='utf-8')
lines = content.split('\n')
idx = line_number - 1
if idx >= 0 and idx < len(lines):
# Extract text from emphasis
match = self.emphasis_pattern.match(lines[idx])
if match:
text = (match.group(2) or match.group(4)).strip()
# Convert to heading
lines[idx] = f"{'#' * heading_level} {text}"
# Write back
file_path.write_text('\n'.join(lines), encoding='utf-8')
return True
except Exception as e:
print(f"Error applying fix to {file_path}:{line_number}: {e}")
return False
def scan_repository(self, pattern: str = "**/*.md") -> List[EmphasisHeading]:
"""Scan repository for MD036 violations."""
all_violations = []
for md_file in self.repo_root.glob(pattern):
if md_file.is_file() and not str(md_file).startswith(str(self.repo_root / 'node_modules')):
violations = self.find_violations(md_file)
all_violations.extend(violations)
return all_violations
def main(): parser = argparse.ArgumentParser( description='Fix MD036 emphasis-as-heading violations (Non-Breaking)', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples:
Detect and preview high-confidence suggestions
python3 %(prog)s --detect --min-confidence 80 --dry-run
Generate suggestions for manual review
python3 %(prog)s --detect --min-confidence 70
Auto-apply very high-confidence suggestions with backup
python3 %(prog)s --auto-apply --min-confidence 90 --backup
Rollback changes
python3 %(prog)s --rollback backups/md036-backup-20251206-143022 ''' )
parser.add_argument('--detect', action='store_true',
help='Detect violations and generate suggestions')
parser.add_argument('--auto-apply', action='store_true',
help='Automatically apply high-confidence suggestions')
parser.add_argument('--rollback', type=str, metavar='BACKUP_DIR',
help='Rollback changes from backup directory')
parser.add_argument('--dry-run', action='store_true',
help='Preview changes without modifying files')
parser.add_argument('--backup', action='store_true',
help='Create backup before applying fixes')
parser.add_argument('--min-confidence', type=int, default=80,
help='Minimum confidence threshold (default: 80)')
parser.add_argument('--output', type=str, default='md036-suggestions.json',
help='Output file for suggestions (default: md036-suggestions.json)')
parser.add_argument('--repo-root', type=str, default='.',
help='Repository root directory (default: current directory)')
args = parser.parse_args()
repo_root = Path(args.repo_root).resolve()
# Handle rollback
if args.rollback:
backup_dir = Path(args.rollback)
print(f"Rolling back changes from {backup_dir}...")
if SafetyManager.rollback(backup_dir):
print("\n✅ Rollback completed successfully")
else:
print("\n❌ Rollback failed")
return
fixer = MD036Fixer(repo_root, dry_run=args.dry_run)
if args.detect or args.auto_apply:
print("Scanning repository for MD036 violations (emphasis as heading)...")
violations = fixer.scan_repository()
print(f"\nFound {len(violations)} potential emphasis-as-heading instances")
# Filter by confidence
high_conf = [v for v in violations if v.confidence >= 90]
medium_conf = [v for v in violations if 70 <= v.confidence < 90]
low_conf = [v for v in violations if v.confidence < 70]
print(f" Very high confidence (≥90%): {len(high_conf)}")
print(f" High confidence (70-89%): {len(medium_conf)}")
print(f" Medium confidence (<70%): {len(low_conf)}")
# Save suggestions
output_file = repo_root / args.output
with output_file.open('w', encoding='utf-8') as f:
json.dump([asdict(v) for v in violations], f, indent=2)
print(f"\nSuggestions saved to: {output_file}")
if args.auto_apply:
apply_violations = [v for v in violations if v.confidence >= args.min_confidence]
if not apply_violations:
print(f"\n⚠️ No violations with confidence ≥{args.min_confidence}%")
return
# Create backup if requested
if args.backup and not args.dry_run:
affected_files = list(set([repo_root / v.file_path for v in apply_violations]))
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
backup_dir = repo_root / 'backups' / f'md036-backup-{timestamp}'
print(f"\nCreating backup of {len(affected_files)} files...")
if not SafetyManager.create_backup(affected_files, backup_dir):
print("❌ Backup failed - aborting")
return
print(f"✅ Backup created: {backup_dir}")
if args.dry_run:
print(f"\n[DRY RUN] Would apply {len(apply_violations)} fixes...")
else:
print(f"\nApplying {len(apply_violations)} fixes with confidence ≥{args.min_confidence}%...")
applied = 0
for violation in apply_violations:
file_path = repo_root / violation.file_path
if fixer.apply_fix(file_path, violation.line_number, violation.suggested_heading_level):
applied += 1
if args.dry_run:
print(f"\n[DRY RUN] Would apply {applied} fixes")
else:
print(f"\n✅ Applied {applied} fixes successfully")
if args.backup:
print(f"\n💡 To rollback: python3 {__file__} --rollback {backup_dir}")
else:
parser.print_help()
if name == 'main': main()