Skip to main content

scripts-codebase-inventory

#!/usr/bin/env python3 """

CODITECT Codebase Inventory Tool Copyright © 2025 AZ1.AI INC - All Rights Reserved

Generates comprehensive folder and file inventories for project analysis.

Usage: python3 codebase-inventory.py [path] python3 codebase-inventory.py . --format json python3 codebase-inventory.py docs/ --format markdown --output inventory.md

Options: path Target directory (default: current) --format Output format: table, json, markdown (default: table) --output Write to file instead of stdout --depth Maximum directory depth (default: 3) --exclude Patterns to exclude (can be repeated) --include-hidden Include hidden files and directories """

import argparse import json import os import sys from collections import defaultdict from datetime import datetime from pathlib import Path from typing import Any

def count_lines(file_path: Path) -> int: """Count lines in a file.""" try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: return sum(1 for _ in f) except Exception: return 0

def get_file_size_str(size_bytes: int) -> str: """Convert bytes to human-readable string.""" for unit in ['B', 'KB', 'MB', 'GB']: if size_bytes < 1024: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024 return f"{size_bytes:.1f} TB"

def should_exclude(path: Path, exclude_patterns: list[str]) -> bool: """Check if path matches any exclude pattern.""" path_str = str(path) for pattern in exclude_patterns: if pattern in path_str: return True if path.match(pattern): return True return False

def scan_directory( root: Path, max_depth: int = 3, current_depth: int = 0, exclude_patterns: list[str] = None, include_hidden: bool = False ) -> dict[str, Any]: """Recursively scan directory and collect statistics.""" exclude_patterns = exclude_patterns or []

result = {
"path": str(root),
"name": root.name or str(root),
"type": "directory",
"file_count": 0,
"dir_count": 0,
"total_size": 0,
"total_lines": 0,
"file_types": defaultdict(int),
"children": []
}

if not root.exists() or not root.is_dir():
return result

try:
for item in sorted(root.iterdir()):
# Skip hidden files unless requested
if not include_hidden and item.name.startswith('.'):
continue

# Skip excluded patterns
if should_exclude(item, exclude_patterns):
continue

if item.is_file():
result["file_count"] += 1
try:
size = item.stat().st_size
result["total_size"] += size
except OSError:
size = 0

# Track by extension
ext = item.suffix.lower() or "(no extension)"
result["file_types"][ext] += 1

# Count lines for text files
text_extensions = {'.md', '.py', '.js', '.ts', '.json', '.yaml', '.yml',
'.txt', '.html', '.css', '.sh', '.bash'}
if ext in text_extensions:
result["total_lines"] += count_lines(item)

elif item.is_dir():
result["dir_count"] += 1

# Recurse if not at max depth
if current_depth < max_depth:
child = scan_directory(
item,
max_depth=max_depth,
current_depth=current_depth + 1,
exclude_patterns=exclude_patterns,
include_hidden=include_hidden
)
result["children"].append(child)

# Aggregate child stats
result["file_count"] += child["file_count"]
result["dir_count"] += child["dir_count"]
result["total_size"] += child["total_size"]
result["total_lines"] += child["total_lines"]

for ext, count in child["file_types"].items():
result["file_types"][ext] += count

except PermissionError:
pass

# Convert defaultdict to regular dict for JSON serialization
result["file_types"] = dict(result["file_types"])

return result

def format_table(inventory: dict[str, Any], show_children: bool = True) -> str: """Format inventory as ASCII table.""" lines = [] lines.append("=" * 80) lines.append(f"CODEBASE INVENTORY: {inventory['path']}") lines.append("=" * 80) lines.append("")

# Summary
lines.append("## Summary")
lines.append(f" Total Files: {inventory['file_count']:,}")
lines.append(f" Total Directories: {inventory['dir_count']:,}")
lines.append(f" Total Size: {get_file_size_str(inventory['total_size'])}")
lines.append(f" Total Lines: {inventory['total_lines']:,}")
lines.append("")

# File types
lines.append("## File Types")
lines.append(f" {'Extension':<15} {'Count':>10}")
lines.append(f" {'-' * 15} {'-' * 10}")
for ext, count in sorted(inventory["file_types"].items(), key=lambda x: -x[1])[:15]:
lines.append(f" {ext:<15} {count:>10,}")
lines.append("")

# Children (top-level directories)
if show_children and inventory.get("children"):
lines.append("## Directory Breakdown")
lines.append(f" {'Directory':<30} {'Files':>10} {'Size':>12} {'Lines':>10}")
lines.append(f" {'-' * 30} {'-' * 10} {'-' * 12} {'-' * 10}")

for child in sorted(inventory["children"], key=lambda x: -x["file_count"]):
lines.append(
f" {child['name']:<30} "
f"{child['file_count']:>10,} "
f"{get_file_size_str(child['total_size']):>12} "
f"{child['total_lines']:>10,}"
)

lines.append("")
lines.append(f"Generated: {datetime.now().isoformat()}")

return "\n".join(lines)

def format_markdown(inventory: dict[str, Any]) -> str: """Format inventory as Markdown.""" lines = [] lines.append(f"# Codebase Inventory: {inventory['path']}") lines.append("") lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") lines.append("") lines.append("---") lines.append("")

# Summary
lines.append("## Summary")
lines.append("")
lines.append("| Metric | Value |")
lines.append("|--------|-------|")
lines.append(f"| Total Files | {inventory['file_count']:,} |")
lines.append(f"| Total Directories | {inventory['dir_count']:,} |")
lines.append(f"| Total Size | {get_file_size_str(inventory['total_size'])} |")
lines.append(f"| Total Lines | {inventory['total_lines']:,} |")
lines.append("")

# File types
lines.append("## File Types")
lines.append("")
lines.append("| Extension | Count |")
lines.append("|-----------|-------|")
for ext, count in sorted(inventory["file_types"].items(), key=lambda x: -x[1])[:15]:
lines.append(f"| {ext} | {count:,} |")
lines.append("")

# Directory breakdown
if inventory.get("children"):
lines.append("## Directory Breakdown")
lines.append("")
lines.append("| Directory | Files | Size | Lines |")
lines.append("|-----------|-------|------|-------|")

for child in sorted(inventory["children"], key=lambda x: -x["file_count"]):
lines.append(
f"| {child['name']} | "
f"{child['file_count']:,} | "
f"{get_file_size_str(child['total_size'])} | "
f"{child['total_lines']:,} |"
)

return "\n".join(lines)

def main() -> int: """Main entry point.""" parser = argparse.ArgumentParser( description="CODITECT Codebase Inventory Tool", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=doc ) parser.add_argument( "path", nargs="?", default=".", help="Target directory (default: current)" ) parser.add_argument( "--format", "-f", choices=["table", "json", "markdown"], default="table", help="Output format" ) parser.add_argument( "--output", "-o", help="Write to file instead of stdout" ) parser.add_argument( "--depth", "-d", type=int, default=3, help="Maximum directory depth" ) parser.add_argument( "--exclude", "-e", action="append", default=[], help="Patterns to exclude (can be repeated)" ) parser.add_argument( "--include-hidden", action="store_true", help="Include hidden files and directories" )

args = parser.parse_args()

target = Path(args.path).resolve()
if not target.exists():
print(f"Error: Path does not exist: {target}", file=sys.stderr)
return 1

# Default exclusions
exclude_patterns = args.exclude or []
exclude_patterns.extend([
"node_modules",
"__pycache__",
".git",
"venv",
".venv",
"dist",
"build"
])

# Scan directory
inventory = scan_directory(
target,
max_depth=args.depth,
exclude_patterns=exclude_patterns,
include_hidden=args.include_hidden
)

# Format output
if args.format == "json":
output = json.dumps(inventory, indent=2)
elif args.format == "markdown":
output = format_markdown(inventory)
else:
output = format_table(inventory)

# Write output
if args.output:
Path(args.output).write_text(output)
print(f"Inventory written to: {args.output}")
else:
print(output)

return 0

if name == "main": sys.exit(main())