#!/usr/bin/env python3 """ AM.2.2: Inventory uppercase files across ALL submodules.
Scans the entire coditect-rollout-master repository including all 74 submodules and generates a comprehensive inventory of files that need to be renamed.
Usage: python3 scripts/lowercase-migration/inventory-all-submodules.py python3 scripts/lowercase-migration/inventory-all-submodules.py --output inventory-full.json """
import os import json import re import subprocess from pathlib import Path from datetime import datetime from collections import defaultdict
Directories to skip
SKIP_DIRS = { '.git', 'node_modules', 'dist', 'build', '.venv', 'venv', 'pycache', '.pytest_cache', '.mypy_cache', 'target', '.next', '.nuxt', 'coverage', '.tox', 'eggs', '*.egg-info' }
File extensions to skip
SKIP_EXTENSIONS = {'.pyc', '.pyo', '.so', '.dll', '.exe', '.bin'}
def has_uppercase(name: str) -> bool: """Check if filename has uppercase characters (excluding extension check).""" # Split name and extension stem = Path(name).stem return any(c.isupper() for c in stem)
def to_lowercase_kebab(name: str) -> str: """Convert filename to lowercase-kebab-case.""" stem = Path(name).stem suffix = Path(name).suffix
# Insert hyphens before uppercase letters and lowercase everything
result = re.sub(r'([a-z])([A-Z])', r'\1-\2', stem)
result = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1-\2', result)
result = result.lower()
# Replace underscores and spaces with hyphens
result = re.sub(r'[_\s]+', '-', result)
# Remove multiple consecutive hyphens
result = re.sub(r'-+', '-', result)
# Remove leading/trailing hyphens
result = result.strip('-')
return result + suffix.lower()
def get_git_tracked_files(repo_path: Path) -> set: """Get list of files tracked by git in a repository.""" try: result = subprocess.run( ['git', 'ls-files'], cwd=repo_path, capture_output=True, text=True, check=True ) return set(result.stdout.strip().split('\n')) if result.stdout.strip() else set() except subprocess.CalledProcessError: return set()
def find_submodules(root_path: Path) -> list: """Find all git submodules in the repository.""" submodules = [] gitmodules_path = root_path / '.gitmodules'
if gitmodules_path.exists():
with open(gitmodules_path) as f:
content = f.read()
# Parse submodule paths
for match in re.finditer(r'path\s*=\s*(.+)', content):
path = match.group(1).strip()
full_path = root_path / path
if full_path.exists():
submodules.append({
'path': path,
'full_path': str(full_path),
'name': Path(path).name
})
return submodules
def scan_directory(dir_path: Path, git_tracked: set = None) -> dict: """Scan a directory for uppercase files.""" files_to_rename = [] dirs_to_rename = []
for root, dirs, files in os.walk(dir_path):
# Skip specified directories
dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith('.')]
rel_root = Path(root).relative_to(dir_path)
# Check directories
for d in dirs:
if has_uppercase(d):
rel_path = str(rel_root / d) if str(rel_root) != '.' else d
dirs_to_rename.append({
'path': rel_path,
'name': d,
'new_name': to_lowercase_kebab(d),
'depth': len(Path(rel_path).parts)
})
# Check files
for f in files:
if Path(f).suffix.lower() in SKIP_EXTENSIONS:
continue
if has_uppercase(f):
rel_path = str(rel_root / f) if str(rel_root) != '.' else f
# Check if file is git-tracked (if we have that info)
is_tracked = True
if git_tracked is not None:
is_tracked = rel_path in git_tracked
files_to_rename.append({
'path': rel_path,
'name': f,
'new_name': to_lowercase_kebab(f),
'git_tracked': is_tracked
})
# Sort directories by depth (deepest first for safe renaming)
dirs_to_rename.sort(key=lambda x: x['depth'], reverse=True)
return {
'files': files_to_rename,
'directories': dirs_to_rename,
'file_count': len(files_to_rename),
'dir_count': len(dirs_to_rename),
'tracked_files': len([f for f in files_to_rename if f.get('git_tracked', True)])
}
def main(): import argparse parser = argparse.ArgumentParser(description='Inventory uppercase files across all submodules') parser.add_argument('--output', '-o', default='inventory-all-submodules.json', help='Output file path') parser.add_argument('--root', '-r', default=None, help='Root repository path (default: auto-detect)') args = parser.parse_args()
# Find root path
if args.root:
root_path = Path(args.root)
else:
# Auto-detect: go up from script location
script_path = Path(__file__).resolve()
root_path = script_path.parent.parent.parent.parent.parent # coditect-rollout-master
if not (root_path / '.gitmodules').exists():
# Try one more level up
root_path = root_path.parent
print("=" * 70)
print("UPPERCASE FILE INVENTORY - ALL SUBMODULES")
print("=" * 70)
print(f"Root path: {root_path}")
print(f"Output: {args.output}")
print()
# Find all submodules
submodules = find_submodules(root_path)
print(f"Found {len(submodules)} submodules")
print()
# Inventory structure
inventory = {
'generated_at': datetime.utcnow().isoformat() + 'Z',
'root_path': str(root_path),
'root': None,
'submodules': {},
'summary': {
'total_files': 0,
'total_dirs': 0,
'total_tracked': 0,
'by_category': defaultdict(lambda: {'files': 0, 'dirs': 0, 'repos': 0})
}
}
# Scan root directory (excluding submodules)
print("Scanning root directory...")
root_tracked = get_git_tracked_files(root_path)
root_scan = scan_directory(root_path, root_tracked)
# Filter out submodule paths from root scan
submod_prefixes = [s['path'] for s in submodules]
root_scan['files'] = [f for f in root_scan['files']
if not any(f['path'].startswith(p) for p in submod_prefixes)]
root_scan['directories'] = [d for d in root_scan['directories']
if not any(d['path'].startswith(p) for p in submod_prefixes)]
root_scan['file_count'] = len(root_scan['files'])
root_scan['dir_count'] = len(root_scan['directories'])
inventory['root'] = root_scan
inventory['summary']['total_files'] += root_scan['file_count']
inventory['summary']['total_dirs'] += root_scan['dir_count']
print(f" Root: {root_scan['file_count']} files, {root_scan['dir_count']} dirs")
# Scan each submodule
print("\nScanning submodules...")
for submod in submodules:
submod_path = Path(submod['full_path'])
# Determine category
parts = Path(submod['path']).parts
category = parts[1] if len(parts) > 1 else 'root'
# Get git-tracked files for this submodule
tracked = get_git_tracked_files(submod_path)
# Scan
scan = scan_directory(submod_path, tracked)
scan['category'] = category
scan['path'] = submod['path']
inventory['submodules'][submod['name']] = scan
inventory['summary']['total_files'] += scan['file_count']
inventory['summary']['total_dirs'] += scan['dir_count']
inventory['summary']['total_tracked'] += scan['tracked_files']
inventory['summary']['by_category'][category]['files'] += scan['file_count']
inventory['summary']['by_category'][category]['dirs'] += scan['dir_count']
inventory['summary']['by_category'][category]['repos'] += 1
if scan['file_count'] > 0 or scan['dir_count'] > 0:
print(f" {submod['name']}: {scan['file_count']} files, {scan['dir_count']} dirs")
# Convert defaultdict to regular dict for JSON serialization
inventory['summary']['by_category'] = dict(inventory['summary']['by_category'])
# Save inventory
output_path = root_path / 'context-storage' / 'lowercase-migration' / args.output
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f:
json.dump(inventory, f, indent=2)
print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"Total files to rename: {inventory['summary']['total_files']}")
print(f"Total directories to rename: {inventory['summary']['total_dirs']}")
print(f"Git-tracked files: {inventory['summary']['total_tracked']}")
print(f"\nInventory saved to: {output_path}")
print("\nBy Category:")
for cat, stats in sorted(inventory['summary']['by_category'].items(),
key=lambda x: x[1]['files'], reverse=True):
print(f" {cat}: {stats['files']} files, {stats['dirs']} dirs ({stats['repos']} repos)")
return inventory
if name == 'main': main()