#!/usr/bin/env python3 """ AM.2.5: Update cross-repository references after lowercase migration.

Scans all repositories for references to renamed files and updates them. Handles inter-submodule links, import statements, and documentation references.

Usage: python3 scripts/lowercase-migration/update-cross-repo-references.py python3 scripts/lowercase-migration/update-cross-repo-references.py --dry-run python3 scripts/lowercase-migration/update-cross-repo-references.py --reports-dir path/to/reports """

import os import sys import json import re from pathlib import Path from datetime import datetime from collections import defaultdict

File types that may contain references

REFERENCE_FILE_TYPES = { '.md', '.txt', '.rst', # Documentation '.py', '.js', '.ts', '.tsx', # Code '.json', '.yaml', '.yml', # Config '.html', '.css', '.scss', # Web '.sh', '.bash', '.zsh', # Shell '.toml', '.ini', '.cfg', # Config }

Directories to skip

SKIP_DIRS = { '.git', 'node_modules', 'dist', 'build', '.venv', 'venv', 'pycache', '.pytest_cache', '.mypy_cache', 'target', '.next', '.nuxt', 'coverage' }

def load_all_renames(reports_dir: Path) -> dict: """Load all rename mappings from migration reports.""" renames = {} # old_path -> new_path

for report_file in reports_dir.glob('*-migration-report.json'):
    try:
        with open(report_file) as f:
            report = json.load(f)

        submod_path = Path(report.get('submodule', ''))

        for result in report.get('results', []):
            if result.get('success'):
                old_path = result.get('old_path', '')
                new_path = result.get('new_path', '')

                if old_path and new_path:
                    # Store both full path and filename
                    full_old = str(submod_path / old_path)
                    full_new = str(submod_path / new_path)
                    renames[full_old] = full_new

                    # Also store just the filename for simpler references
                    old_name = Path(old_path).name
                    new_name = Path(new_path).name
                    if old_name != new_name:
                        renames[old_name] = new_name

    except (json.JSONDecodeError, IOError) as e:
        print(f"Warning: Could not load {report_file}: {e}")

return renames

def find_reference_files(root_path: Path) -> list: """Find all files that might contain references.""" files = []

for path in root_path.rglob('*'):
    if path.is_file():
        # Skip directories
        if any(skip in path.parts for skip in SKIP_DIRS):
            continue

        # Check extension
        if path.suffix.lower() in REFERENCE_FILE_TYPES:
            files.append(path)

return files

def update_file_references(file_path: Path, renames: dict, dry_run: bool = False) -> dict: """Update references in a single file.""" result = { 'file': str(file_path), 'updates': [], 'error': None }

try:
    content = file_path.read_text(encoding='utf-8')
    new_content = content
    changes = []

    # Build regex patterns for each rename
    for old, new in renames.items():
        if old == new:
            continue

        # Escape special regex characters
        old_escaped = re.escape(old)

        # Pattern 1: Exact filename in quotes or brackets
        patterns = [
            (rf'(["\'\[\(])({old_escaped})(["\'\]\)])', rf'\1{new}\3'),
            # Pattern 2: In markdown links
            (rf'(\[.*?\]\()({old_escaped})(\))', rf'\1{new}\3'),
            # Pattern 3: Import statements
            (rf'(from\s+|import\s+)({old_escaped})', rf'\1{new}'),
            # Pattern 4: Path references (forward slash)
            (rf'(/|^)({old_escaped})(/|$)', rf'\1{new}\3'),
        ]

        for pattern, replacement in patterns:
            matches = re.findall(pattern, new_content)
            if matches:
                new_content = re.sub(pattern, replacement, new_content)
                changes.append({
                    'old': old,
                    'new': new,
                    'pattern': pattern[:50],
                    'count': len(matches)
                })

    if changes:
        result['updates'] = changes

        if not dry_run and new_content != content:
            file_path.write_text(new_content, encoding='utf-8')

except UnicodeDecodeError:
    result['error'] = 'Binary file or encoding error'
except IOError as e:
    result['error'] = str(e)

return result

def generate_update_report(results: list, output_path: Path): """Generate a detailed update report.""" report = { 'generated_at': datetime.utcnow().isoformat() + 'Z', 'summary': { 'files_scanned': len(results), 'files_updated': 0, 'total_updates': 0, 'errors': 0 }, 'by_file': [], 'errors': [] }

for r in results:
    if r['updates']:
        report['summary']['files_updated'] += 1
        report['summary']['total_updates'] += sum(u['count'] for u in r['updates'])
        report['by_file'].append({
            'file': r['file'],
            'updates': r['updates']
        })
    if r['error']:
        report['summary']['errors'] += 1
        report['errors'].append({
            'file': r['file'],
            'error': r['error']
        })

with open(output_path, 'w') as f:
    json.dump(report, f, indent=2)

return report

def main(): import argparse parser = argparse.ArgumentParser(description='Update cross-repository references') parser.add_argument('--root', '-r', default=None, help='Root repository path') parser.add_argument('--reports-dir', help='Directory containing migration reports') parser.add_argument('--dry-run', '-n', action='store_true', help='Preview without changes') parser.add_argument('--output', '-o', help='Output report path') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') args = parser.parse_args()

# Find root path
if args.root:
    root_path = Path(args.root).resolve()
else:
    script_path = Path(__file__).resolve()
    root_path = script_path.parent.parent.parent.parent.parent
    if not (root_path / '.gitmodules').exists():
        root_path = root_path.parent

# Find reports directory
if args.reports_dir:
    reports_dir = Path(args.reports_dir)
else:
    reports_dir = root_path / 'context-storage' / 'lowercase-migration' / 'reports'

if not reports_dir.exists():
    print(f"Error: Reports directory not found: {reports_dir}")
    print("Run migrations first to generate reports.")
    sys.exit(1)

print("=" * 70)
print("CROSS-REPOSITORY REFERENCE UPDATER")
print("=" * 70)
print(f"Root path: {root_path}")
print(f"Reports directory: {reports_dir}")
print(f"Dry run: {args.dry_run}")
print()

# Load all renames from reports
print("Loading rename mappings from migration reports...")
renames = load_all_renames(reports_dir)
print(f"Found {len(renames)} rename mappings")

if not renames:
    print("No renames found. Nothing to update.")
    return

# Find files to scan
print("\nScanning for reference files...")
ref_files = find_reference_files(root_path)
print(f"Found {len(ref_files)} files to scan")

# Update references
print("\nUpdating references...")
results = []
updated_count = 0

for i, file_path in enumerate(ref_files, 1):
    result = update_file_references(file_path, renames, args.dry_run)
    results.append(result)

    if result['updates']:
        updated_count += 1
        if args.verbose:
            print(f"  [{i}/{len(ref_files)}] ✓ {file_path.relative_to(root_path)}")
            for u in result['updates']:
                print(f"      {u['old']} → {u['new']} ({u['count']}x)")

    if i % 1000 == 0:
        print(f"  Processed {i}/{len(ref_files)} files...")

# Generate report
output_path = Path(args.output) if args.output else (
    root_path / 'context-storage' / 'lowercase-migration' / 'cross-reference-update-report.json'
)
output_path.parent.mkdir(parents=True, exist_ok=True)

report = generate_update_report(results, output_path)

print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"Files scanned:  {report['summary']['files_scanned']}")
print(f"Files updated:  {report['summary']['files_updated']}")
print(f"Total updates:  {report['summary']['total_updates']}")
print(f"Errors:         {report['summary']['errors']}")
print(f"\nReport saved to: {output_path}")

if args.dry_run:
    print("\n[DRY RUN] No files were actually modified.")

if name == 'main': main()