scripts-fix-md036-emphasis-to-heading

#!/usr/bin/env python3 """

title: "Fix Md036 Emphasis To Heading" component_type: script version: "1.0.0" audience: contributor status: stable summary: "MD036 Emphasis-to-Heading Converter (Non-Breaking, High-Confidence Only)" keywords: ['api', 'emphasis', 'fix', 'git', 'heading'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "fix-md036-emphasis-to-heading.py" language: python executable: true usage: "python3 scripts/fix-md036-emphasis-to-heading.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

MD036 Emphasis-to-Heading Converter (Non-Breaking, High-Confidence Only)

Detects emphasis (bold/italic) used as headings and converts to proper markdown headings. Only suggests high-confidence conversions to avoid breaking valid emphasis.

Safety Features:

Dry-run mode (preview changes before applying)
Conservative detection (only obvious heading patterns)
Automatic backups before modifications
Manual review mode (generates suggestions for approval)
Rollback capability

Usage: # DRY RUN: Preview high-confidence suggestions python3 scripts/fix-md036-emphasis-to-heading.py --detect --dry-run

# Detect and generate suggestions for manual review
python3 scripts/fix-md036-emphasis-to-heading.py --detect --min-confidence 80

# Auto-apply only very high-confidence suggestions (≥90%)
python3 scripts/fix-md036-emphasis-to-heading.py --auto-apply --min-confidence 90 --backup

# Rollback changes
python3 scripts/fix-md036-emphasis-to-heading.py --rollback backup-TIMESTAMP

"""

import re import json import argparse import shutil import subprocess from pathlib import Path from datetime import datetime from typing import List, Tuple from dataclasses import dataclass, asdict

@dataclass class EmphasisHeading: """Represents an emphasis-as-heading violation.""" file_path: str line_number: int emphasis_text: str suggested_heading_level: int confidence: int reasoning: str

class SafetyManager: """Manages safety features: backups, git checks, rollbacks."""

@staticmethod
def check_git_clean(repo_root: Path) -> bool:
    """Check if git working tree is clean."""
    try:
        result = subprocess.run(
            ['git', 'status', '--porcelain'],
            cwd=repo_root,
            capture_output=True,
            text=True
        )
        return result.returncode == 0 and len(result.stdout.strip()) == 0
    except Exception:
        return False

@staticmethod
def create_backup(files: List[Path], backup_dir: Path) -> bool:
    """Create backup of files before modification."""
    try:
        backup_dir.mkdir(parents=True, exist_ok=True)

        for file_path in files:
            rel_path = file_path.relative_to(file_path.parents[len(list(file_path.parents)) - 1])
            backup_file = backup_dir / rel_path
            backup_file.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(file_path, backup_file)

        manifest = {
            'timestamp': datetime.now().isoformat(),
            'files': [str(f) for f in files]
        }
        (backup_dir / 'manifest.json').write_text(json.dumps(manifest, indent=2))

        return True
    except Exception as e:
        print(f"Backup failed: {e}")
        return False

@staticmethod
def rollback(backup_dir: Path) -> bool:
    """Restore files from backup."""
    try:
        manifest_file = backup_dir / 'manifest.json'
        if not manifest_file.exists():
            print(f"Error: No manifest found in {backup_dir}")
            return False

        manifest = json.loads(manifest_file.read_text())

        for file_str in manifest['files']:
            source = Path(file_str)
            rel_path = source.relative_to(source.parents[len(list(source.parents)) - 1])
            backup_file = backup_dir / rel_path

            if backup_file.exists():
                shutil.copy2(backup_file, source)
                print(f"  ✓ Restored {source}")

        return True
    except Exception as e:
        print(f"Rollback failed: {e}")
        return False

class EmphasisDetector: """Detects emphasis used as headings with conservative confidence scoring."""

# Common heading patterns
HEADING_PATTERNS = [
    'Overview', 'Introduction', 'Setup', 'Installation', 'Configuration',
    'Usage', 'Examples', 'API', 'Reference', 'Documentation',
    'Features', 'Requirements', 'Prerequisites', 'Getting Started',
    'Quick Start', 'Tutorial', 'Guide', 'Troubleshooting',
    'FAQ', 'Contributing', 'License', 'Credits', 'Acknowledgments',
    'Summary', 'Conclusion', 'Next Steps', 'Resources',
    'Phase', 'Step', 'Stage', 'Section', 'Chapter', 'Part'
]

def detect(self, text: str, next_line: str, prev_line: str) -> Tuple[int, int, str]:
    """
    Detect if emphasized text should be a heading.

    Returns:
        (heading_level, confidence, reasoning)
    """
    confidence = 0
    reasons = []

    # Check if followed by blank line + paragraph (strong indicator)
    if next_line.strip() == '':
        confidence += 40
        reasons.append("Followed by blank line")

    # Check if preceded by blank line
    if prev_line.strip() == '':
        confidence += 20
        reasons.append("Preceded by blank line")

    # Check if matches common heading pattern
    for pattern in self.HEADING_PATTERNS:
        if pattern.lower() in text.lower():
            confidence += 30
            reasons.append(f"Matches heading pattern: '{pattern}'")
            break

    # Check if short (< 60 chars) and standalone
    if len(text) < 60:
        confidence += 10
        reasons.append("Short and concise")

    # Determine heading level based on text patterns
    heading_level = 2  # Default to h2

    if any(word in text.lower() for word in ['phase', 'part', 'chapter']):
        heading_level = 2
    elif any(word in text.lower() for word in ['step', 'stage', 'section']):
        heading_level = 3
    elif text.endswith(':'):
        heading_level = 4

    reasoning = '; '.join(reasons) if reasons else "No strong indicators"

    return heading_level, min(confidence, 100), reasoning

class MD036Fixer: """Fixes MD036 violations (emphasis used as heading)."""

def __init__(self, repo_root: Path, dry_run: bool = False):
    self.repo_root = repo_root
    self.dry_run = dry_run
    self.detector = EmphasisDetector()
    # Match bold or italic on its own line
    self.emphasis_pattern = re.compile(r'^\s*(\*\*|__)(.*?)\1\s*$|^\s*(\*|_)(.*?)\3\s*$')

def find_violations(self, file_path: Path) -> List[EmphasisHeading]:
    """Find emphasis-as-heading violations in a file."""
    violations = []

    try:
        content = file_path.read_text(encoding='utf-8')
        lines = content.split('\n')

        for i, line in enumerate(lines):
            match = self.emphasis_pattern.match(line)

            if match:
                # Extract emphasized text
                text = match.group(2) or match.group(4)

                if not text or len(text.strip()) == 0:
                    continue

                # Get context
                next_line = lines[i + 1] if i + 1 < len(lines) else ''
                prev_line = lines[i - 1] if i > 0 else ''

                # Detect if this should be a heading
                level, confidence, reasoning = self.detector.detect(text, next_line, prev_line)

                if confidence > 0:
                    violations.append(EmphasisHeading(
                        file_path=str(file_path.relative_to(self.repo_root)),
                        line_number=i + 1,
                        emphasis_text=line.strip(),
                        suggested_heading_level=level,
                        confidence=confidence,
                        reasoning=reasoning
                    ))

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

    return violations

def apply_fix(self, file_path: Path, line_number: int, heading_level: int) -> bool:
    """Apply fix to convert emphasis to heading."""
    if self.dry_run:
        print(f"  [DRY RUN] Would fix {file_path}:{line_number} → h{heading_level}")
        return True

    try:
        content = file_path.read_text(encoding='utf-8')
        lines = content.split('\n')
        idx = line_number - 1

        if idx >= 0 and idx < len(lines):
            # Extract text from emphasis
            match = self.emphasis_pattern.match(lines[idx])
            if match:
                text = (match.group(2) or match.group(4)).strip()
                # Convert to heading
                lines[idx] = f"{'#' * heading_level} {text}"

                # Write back
                file_path.write_text('\n'.join(lines), encoding='utf-8')
                return True

    except Exception as e:
        print(f"Error applying fix to {file_path}:{line_number}: {e}")

    return False

def scan_repository(self, pattern: str = "**/*.md") -> List[EmphasisHeading]:
    """Scan repository for MD036 violations."""
    all_violations = []

    for md_file in self.repo_root.glob(pattern):
        if md_file.is_file() and not str(md_file).startswith(str(self.repo_root / 'node_modules')):
            violations = self.find_violations(md_file)
            all_violations.extend(violations)

    return all_violations

def main(): parser = argparse.ArgumentParser( description='Fix MD036 emphasis-as-heading violations (Non-Breaking)', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples:

Detect and preview high-confidence suggestions

python3 %(prog)s --detect --min-confidence 80 --dry-run

Generate suggestions for manual review

python3 %(prog)s --detect --min-confidence 70

Auto-apply very high-confidence suggestions with backup

python3 %(prog)s --auto-apply --min-confidence 90 --backup

Rollback changes

python3 %(prog)s --rollback backups/md036-backup-20251206-143022 ''' )

parser.add_argument('--detect', action='store_true',
                   help='Detect violations and generate suggestions')
parser.add_argument('--auto-apply', action='store_true',
                   help='Automatically apply high-confidence suggestions')
parser.add_argument('--rollback', type=str, metavar='BACKUP_DIR',
                   help='Rollback changes from backup directory')
parser.add_argument('--dry-run', action='store_true',
                   help='Preview changes without modifying files')
parser.add_argument('--backup', action='store_true',
                   help='Create backup before applying fixes')
parser.add_argument('--min-confidence', type=int, default=80,
                   help='Minimum confidence threshold (default: 80)')
parser.add_argument('--output', type=str, default='md036-suggestions.json',
                   help='Output file for suggestions (default: md036-suggestions.json)')
parser.add_argument('--repo-root', type=str, default='.',
                   help='Repository root directory (default: current directory)')

args = parser.parse_args()
repo_root = Path(args.repo_root).resolve()

# Handle rollback
if args.rollback:
    backup_dir = Path(args.rollback)
    print(f"Rolling back changes from {backup_dir}...")

    if SafetyManager.rollback(backup_dir):
        print("\n✅ Rollback completed successfully")
    else:
        print("\n❌ Rollback failed")

    return

fixer = MD036Fixer(repo_root, dry_run=args.dry_run)

if args.detect or args.auto_apply:
    print("Scanning repository for MD036 violations (emphasis as heading)...")
    violations = fixer.scan_repository()

    print(f"\nFound {len(violations)} potential emphasis-as-heading instances")

    # Filter by confidence
    high_conf = [v for v in violations if v.confidence >= 90]
    medium_conf = [v for v in violations if 70 <= v.confidence < 90]
    low_conf = [v for v in violations if v.confidence < 70]

    print(f"  Very high confidence (≥90%): {len(high_conf)}")
    print(f"  High confidence (70-89%): {len(medium_conf)}")
    print(f"  Medium confidence (<70%): {len(low_conf)}")

    # Save suggestions
    output_file = repo_root / args.output
    with output_file.open('w', encoding='utf-8') as f:
        json.dump([asdict(v) for v in violations], f, indent=2)

    print(f"\nSuggestions saved to: {output_file}")

    if args.auto_apply:
        apply_violations = [v for v in violations if v.confidence >= args.min_confidence]

        if not apply_violations:
            print(f"\n⚠️  No violations with confidence ≥{args.min_confidence}%")
            return

        # Create backup if requested
        if args.backup and not args.dry_run:
            affected_files = list(set([repo_root / v.file_path for v in apply_violations]))
            timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
            backup_dir = repo_root / 'backups' / f'md036-backup-{timestamp}'

            print(f"\nCreating backup of {len(affected_files)} files...")
            if not SafetyManager.create_backup(affected_files, backup_dir):
                print("❌ Backup failed - aborting")
                return

            print(f"✅ Backup created: {backup_dir}")

        if args.dry_run:
            print(f"\n[DRY RUN] Would apply {len(apply_violations)} fixes...")
        else:
            print(f"\nApplying {len(apply_violations)} fixes with confidence ≥{args.min_confidence}%...")

        applied = 0
        for violation in apply_violations:
            file_path = repo_root / violation.file_path
            if fixer.apply_fix(file_path, violation.line_number, violation.suggested_heading_level):
                applied += 1

        if args.dry_run:
            print(f"\n[DRY RUN] Would apply {applied} fixes")
        else:
            print(f"\n✅ Applied {applied} fixes successfully")

            if args.backup:
                print(f"\n💡 To rollback: python3 {__file__} --rollback {backup_dir}")

else:
    parser.print_help()

if name == 'main': main()

#!/usr/bin/env python3 """​

Detect and preview high-confidence suggestions

Generate suggestions for manual review

Auto-apply very high-confidence suggestions with backup

Rollback changes

#!/usr/bin/env python3 """