#!/usr/bin/env python3 """ AM.2.5: Update cross-repository references after lowercase migration.
Scans all repositories for references to renamed files and updates them. Handles inter-submodule links, import statements, and documentation references.
Usage: python3 scripts/lowercase-migration/update-cross-repo-references.py python3 scripts/lowercase-migration/update-cross-repo-references.py --dry-run python3 scripts/lowercase-migration/update-cross-repo-references.py --reports-dir path/to/reports """
import os import sys import json import re from pathlib import Path from datetime import datetime from collections import defaultdict
File types that may contain references
REFERENCE_FILE_TYPES = { '.md', '.txt', '.rst', # Documentation '.py', '.js', '.ts', '.tsx', # Code '.json', '.yaml', '.yml', # Config '.html', '.css', '.scss', # Web '.sh', '.bash', '.zsh', # Shell '.toml', '.ini', '.cfg', # Config }
Directories to skip
SKIP_DIRS = { '.git', 'node_modules', 'dist', 'build', '.venv', 'venv', 'pycache', '.pytest_cache', '.mypy_cache', 'target', '.next', '.nuxt', 'coverage' }
def load_all_renames(reports_dir: Path) -> dict: """Load all rename mappings from migration reports.""" renames = {} # old_path -> new_path
for report_file in reports_dir.glob('*-migration-report.json'):
try:
with open(report_file) as f:
report = json.load(f)
submod_path = Path(report.get('submodule', ''))
for result in report.get('results', []):
if result.get('success'):
old_path = result.get('old_path', '')
new_path = result.get('new_path', '')
if old_path and new_path:
# Store both full path and filename
full_old = str(submod_path / old_path)
full_new = str(submod_path / new_path)
renames[full_old] = full_new
# Also store just the filename for simpler references
old_name = Path(old_path).name
new_name = Path(new_path).name
if old_name != new_name:
renames[old_name] = new_name
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not load {report_file}: {e}")
return renames
def find_reference_files(root_path: Path) -> list: """Find all files that might contain references.""" files = []
for path in root_path.rglob('*'):
if path.is_file():
# Skip directories
if any(skip in path.parts for skip in SKIP_DIRS):
continue
# Check extension
if path.suffix.lower() in REFERENCE_FILE_TYPES:
files.append(path)
return files
def update_file_references(file_path: Path, renames: dict, dry_run: bool = False) -> dict: """Update references in a single file.""" result = { 'file': str(file_path), 'updates': [], 'error': None }
try:
content = file_path.read_text(encoding='utf-8')
new_content = content
changes = []
# Build regex patterns for each rename
for old, new in renames.items():
if old == new:
continue
# Escape special regex characters
old_escaped = re.escape(old)
# Pattern 1: Exact filename in quotes or brackets
patterns = [
(rf'(["\'\[\(])({old_escaped})(["\'\]\)])', rf'\1{new}\3'),
# Pattern 2: In markdown links
(rf'(\[.*?\]\()({old_escaped})(\))', rf'\1{new}\3'),
# Pattern 3: Import statements
(rf'(from\s+|import\s+)({old_escaped})', rf'\1{new}'),
# Pattern 4: Path references (forward slash)
(rf'(/|^)({old_escaped})(/|$)', rf'\1{new}\3'),
]
for pattern, replacement in patterns:
matches = re.findall(pattern, new_content)
if matches:
new_content = re.sub(pattern, replacement, new_content)
changes.append({
'old': old,
'new': new,
'pattern': pattern[:50],
'count': len(matches)
})
if changes:
result['updates'] = changes
if not dry_run and new_content != content:
file_path.write_text(new_content, encoding='utf-8')
except UnicodeDecodeError:
result['error'] = 'Binary file or encoding error'
except IOError as e:
result['error'] = str(e)
return result
def generate_update_report(results: list, output_path: Path): """Generate a detailed update report.""" report = { 'generated_at': datetime.utcnow().isoformat() + 'Z', 'summary': { 'files_scanned': len(results), 'files_updated': 0, 'total_updates': 0, 'errors': 0 }, 'by_file': [], 'errors': [] }
for r in results:
if r['updates']:
report['summary']['files_updated'] += 1
report['summary']['total_updates'] += sum(u['count'] for u in r['updates'])
report['by_file'].append({
'file': r['file'],
'updates': r['updates']
})
if r['error']:
report['summary']['errors'] += 1
report['errors'].append({
'file': r['file'],
'error': r['error']
})
with open(output_path, 'w') as f:
json.dump(report, f, indent=2)
return report
def main(): import argparse parser = argparse.ArgumentParser(description='Update cross-repository references') parser.add_argument('--root', '-r', default=None, help='Root repository path') parser.add_argument('--reports-dir', help='Directory containing migration reports') parser.add_argument('--dry-run', '-n', action='store_true', help='Preview without changes') parser.add_argument('--output', '-o', help='Output report path') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') args = parser.parse_args()
# Find root path
if args.root:
root_path = Path(args.root).resolve()
else:
script_path = Path(__file__).resolve()
root_path = script_path.parent.parent.parent.parent.parent
if not (root_path / '.gitmodules').exists():
root_path = root_path.parent
# Find reports directory
if args.reports_dir:
reports_dir = Path(args.reports_dir)
else:
reports_dir = root_path / 'context-storage' / 'lowercase-migration' / 'reports'
if not reports_dir.exists():
print(f"Error: Reports directory not found: {reports_dir}")
print("Run migrations first to generate reports.")
sys.exit(1)
print("=" * 70)
print("CROSS-REPOSITORY REFERENCE UPDATER")
print("=" * 70)
print(f"Root path: {root_path}")
print(f"Reports directory: {reports_dir}")
print(f"Dry run: {args.dry_run}")
print()
# Load all renames from reports
print("Loading rename mappings from migration reports...")
renames = load_all_renames(reports_dir)
print(f"Found {len(renames)} rename mappings")
if not renames:
print("No renames found. Nothing to update.")
return
# Find files to scan
print("\nScanning for reference files...")
ref_files = find_reference_files(root_path)
print(f"Found {len(ref_files)} files to scan")
# Update references
print("\nUpdating references...")
results = []
updated_count = 0
for i, file_path in enumerate(ref_files, 1):
result = update_file_references(file_path, renames, args.dry_run)
results.append(result)
if result['updates']:
updated_count += 1
if args.verbose:
print(f" [{i}/{len(ref_files)}] ✓ {file_path.relative_to(root_path)}")
for u in result['updates']:
print(f" {u['old']} → {u['new']} ({u['count']}x)")
if i % 1000 == 0:
print(f" Processed {i}/{len(ref_files)} files...")
# Generate report
output_path = Path(args.output) if args.output else (
root_path / 'context-storage' / 'lowercase-migration' / 'cross-reference-update-report.json'
)
output_path.parent.mkdir(parents=True, exist_ok=True)
report = generate_update_report(results, output_path)
print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"Files scanned: {report['summary']['files_scanned']}")
print(f"Files updated: {report['summary']['files_updated']}")
print(f"Total updates: {report['summary']['total_updates']}")
print(f"Errors: {report['summary']['errors']}")
print(f"\nReport saved to: {output_path}")
if args.dry_run:
print("\n[DRY RUN] No files were actually modified.")
if name == 'main': main()