scripts-batch-update-component-counts

#!/usr/bin/env python3 """

title: "Patterns to replace - (pattern, replacement)" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Batch update documentation files to use generic language for component counts." keywords: ['analysis', 'batch', 'component', 'counts', 'git'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "batch-update-component-counts.py" language: python executable: true usage: "python3 scripts/batch-update-component-counts.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

Batch update documentation files to use generic language for component counts.

This script replaces hardcoded component numbers with:

Generic language (e.g., "all agents" instead of "81 agents")
References to config/component-counts.json as single source of truth

Usage: python3 scripts/batch-update-component-counts.py [--dry-run] [--verbose]

Options: --dry-run Show what would be changed without modifying files --verbose Show detailed replacement information """

import json import re import sys from pathlib import Path from datetime import datetime, timezone

Patterns to replace - (pattern, replacement)

These replace specific count mentions with generic language

REPLACEMENT_PATTERNS = [ # Exact count patterns with agents/commands/skills/scripts (expanded range) (r'\b8[1-6]\s+agents?\b', 'all agents'), (r'\b7[78]\s+agents?\b', 'all agents'), (r'\b6[3-6]\s+agents?\b', 'all agents'), (r'\b5[0-4]\s+agents?\b', 'all agents'), (r'\b4[6-9]\s+agents?\b', 'all agents'), (r'\b11[0-2]\s+commands?\b', 'all commands'), (r'\b10[0-5]\s+commands?\b', 'all commands'), (r'\b9[0-6]\s+commands?\b', 'all commands'), (r'\b8[1-6]\s+commands?\b', 'all commands'), (r'\b7[2-4]\s+commands?\b', 'all commands'), (r'\b5[4-8]\s+skills?\b', 'all skills'), (r'\b3[0-4]\s+skills?\b', 'all skills'), (r'\b2[6-9]\s+skills?\b', 'all skills'), (r'\b10[3-9]\s+scripts?\b', 'all scripts'), (r'\b9[2-8]\s+scripts?\b', 'all scripts'), (r'\b35[0-9]\s+components?\b', 'all components'), (r'\b34[0-3]\s+components?\b', 'all components'), (r'\b33[0-8]\s+components?\b', 'all components'), (r'\b30[1-5]\s+components?\b', 'all components'), (r'\b29[0-0]\s+components?\b', 'all components'),

# Bold formatting patterns (expanded)
(r'\*\*\d+\s+Specialized\s+Agents?\*\*', '**All Specialized Agents**'),
(r'\*\*\d+\s+Slash\s+Commands?\*\*', '**All Slash Commands**'),
(r'\*\*\d+\s+Production\s+Skills?\*\*', '**All Production Skills**'),
(r'\*\*\d+\s+specialized\s+agents?\*\*', '**all specialized agents**'),
(r'\*\*\d+\s+slash\s+commands?\*\*', '**all slash commands**'),

# Hyphenated patterns (e.g., "78-agent")
(r'\b\d{2}-agent\b', 'multi-agent'),
(r'\b\d{3}-command\b', 'multi-command'),
(r'\b\d{2}-skill\b', 'multi-skill'),

# Plus notation patterns (e.g., "50+ agents")
(r'\b[4-8][0-9]\+\s+agents?\b', 'all agents'),
(r'\b[7-9][0-9]\+\s+commands?\b', 'all commands'),
(r'\b1[0-1][0-9]\+\s+commands?\b', 'all commands'),
(r'\b[2-5][0-9]\+\s+skills?\b', 'all skills'),
(r'\b[2-3][0-9][0-9]\+\s+components?\b', 'all components'),

# Count in parentheses patterns
(r'\(\d{2}\s+agents?\)', '(see config/component-counts.json)'),
(r'\(\d{2,3}\s+commands?\)', '(see config/component-counts.json)'),
(r'\(\d{2}\s+skills?\)', '(see config/component-counts.json)'),

# Directory comment patterns like "# 52 specialized AI agents"
(r'#\s+\d{2}\s+specialized\s+AI\s+agents', '# All specialized AI agents'),
(r'#\s+\d{2,3}\s+slash\s+commands', '# All slash commands'),

# Inline patterns like "52 specialized AI agents"
(r'\b\d{2}\s+specialized\s+AI\s+agents\b', 'all specialized AI agents'),
(r'\b\d{2,3}\s+slash\s+commands\b', 'all slash commands'),
(r'\b\d{2}\s+production\s+skills\b', 'all production skills'),

# Summary patterns like "(52 agents, 81 commands, 26 skills)"
(r'\(\d{2}\s+agents?,\s+\d{2,3}\s+commands?,\s+\d{2}\s+skills?\s*(?:accessible)?\)', '(see config/component-counts.json for current counts)'),

# Patterns in table cells like "| 52 specialized AI agents |"
(r'\|\s*\d{2}\s+specialized\s+AI\s+agents\s*\|', '| All specialized AI agents |'),

]

Files/directories to skip (historical, test data, or point-in-time snapshots)

SKIP_PATTERNS = [ 'config/component-counts.json', # The source of truth itself 'scripts/update-component-counts.py', # The script that generates it 'scripts/batch-update-component-counts.py', # This script '.git/', 'pycache/', 'node_modules/', 'CHANGELOG', # Historical records should stay as-is 'checkpoints/', # Historical checkpoints 'CHECKPOINTS/', # Historical checkpoints (uppercase) 'exports-archive/', 'batches/', # Historical batch summaries (e.g., CLAUDE-4.5-UPDATE-BATCH-*) 'commit-messages/', # Historical commit messages 'MEMORY-CONTEXT/', # Session history 'session-exports/', # Session exports 'GAP-ANALYSIS.md', # Analysis of specific point in time 'STATUS-REPORT', # Point-in-time status reports 'COMPLETION-REPORT', # Historical completion reports 'GRADING-REPORT', # Historical grading reports 'CODITECT-FRAMEWORK-ANALYSIS.md', # Point-in-time analysis 'COMPONENT-VALIDATION-WORKFLOW.md', # Workflow with fixed scope '.test.js', # Test files (100 components is test data) '.test.ts', # Test files 'RegistryManager.test', # Test files 'ComponentLoader.test', # Test files 'tools/component-viewer/docs/', # Component viewer docs (performance specs) 'config/framework-registry.json', # Has descriptions, not inventory counts 'config/scripts/', # Script configs with descriptions 'config/agent-llm-bindings', # Cost estimate examples # Additional historical/technical files 'docs/12-implementation-summaries/', # Historical implementation records 'docs/09-research-analysis/anthropic-research/', # Research documents (point-in-time) 'docs/09-research-analysis/completion-reports/', # Historical completion reports 'docs/09-research-analysis/audits/', # Point-in-time audit reports 'docs/05-project-planning/reports/', # Historical project reports 'docs/04-technical-specifications/testing/', # Test reports with fixed data 'COMPONENT-ACTIVATION-RUNTIME-INTEGRATION.md', # Technical spec with code examples 'VALIDATION-RESULTS.md', # Point-in-time validation results ]

File extensions to process

INCLUDE_EXTENSIONS = ['.md', '.txt', '.rst']

def should_process_file(file_path: Path) -> bool: """Check if file should be processed.""" path_str = str(file_path)

# Skip patterns
for pattern in SKIP_PATTERNS:
    if pattern in path_str:
        return False

# Check extension
return file_path.suffix.lower() in INCLUDE_EXTENSIONS

def process_file(file_path: Path, dry_run: bool = False, verbose: bool = False) -> dict: """Process a single file and return change statistics.""" stats = { 'file': str(file_path), 'changes': 0, 'replacements': [] }

try:
    content = file_path.read_text(encoding='utf-8')
    original_content = content

    for pattern, replacement in REPLACEMENT_PATTERNS:
        matches = list(re.finditer(pattern, content, re.IGNORECASE))
        if matches:
            for match in matches:
                stats['replacements'].append({
                    'original': match.group(),
                    'replacement': replacement,
                    'line': content[:match.start()].count('\n') + 1
                })
                stats['changes'] += 1

            content = re.sub(pattern, replacement, content, flags=re.IGNORECASE)

    if content != original_content:
        if verbose:
            print(f"\n  📝 {file_path.name}")
            for r in stats['replacements']:
                print(f"     Line {r['line']}: '{r['original']}' → '{r['replacement']}'")

        if not dry_run:
            file_path.write_text(content, encoding='utf-8')

    return stats

except Exception as e:
    print(f"  ⚠️  Error processing {file_path}: {e}")
    return stats

def main(): dry_run = '--dry-run' in sys.argv verbose = '--verbose' in sys.argv

# Find repo root (where config/component-counts.json should be)
script_dir = Path(__file__).parent
repo_root = script_dir.parent

# Verify we're in the right place
counts_file = repo_root / 'config' / 'component-counts.json'
if not counts_file.exists():
    print(f"❌ Error: config/component-counts.json not found at {counts_file}")
    print("   Please run from coditect-core repository root")
    sys.exit(1)

# Load current counts for reference
with open(counts_file) as f:
    counts = json.load(f)

print("=" * 60)
print("CODITECT Component Count Batch Updater")
print("=" * 60)
print(f"\nSingle Source of Truth: {counts_file}")
print(f"Current counts: agents={counts['counts']['agents']}, "
      f"commands={counts['counts']['commands']}, "
      f"skills={counts['counts']['skills']}, "
      f"scripts={counts['counts']['scripts']}")

if dry_run:
    print("\n🔍 DRY RUN MODE - No files will be modified")

print("\n" + "-" * 60)
print("Scanning documentation files...")
print("-" * 60)

# Directories to scan for documentation
scan_dirs = [
    repo_root / 'docs',
    repo_root / 'CODITECT-CORE-STANDARDS',
    repo_root / 'distribution',
    repo_root / 'skills',
    repo_root / 'commands',
    repo_root / 'agents',
    repo_root / 'hooks',
    repo_root / 'lib',
    repo_root / 'tools',
]

files_to_process = []

# Scan all directories
for scan_dir in scan_dirs:
    if scan_dir.exists():
        for file_path in scan_dir.rglob('*'):
            if file_path.is_file() and should_process_file(file_path):
                if file_path not in files_to_process:
                    files_to_process.append(file_path)

# Also check root-level markdown files
for md_file in repo_root.glob('*.md'):
    if should_process_file(md_file) and md_file not in files_to_process:
        files_to_process.append(md_file)

# Also check CLAUDE.md files anywhere
for claude_md in repo_root.rglob('CLAUDE.md'):
    if should_process_file(claude_md) and claude_md not in files_to_process:
        files_to_process.append(claude_md)

# Also check README.md files anywhere
for readme in repo_root.rglob('README.md'):
    if should_process_file(readme) and readme not in files_to_process:
        files_to_process.append(readme)

print(f"Found {len(files_to_process)} files to check\n")

# Process files
total_changes = 0
files_modified = 0
all_stats = []

for file_path in sorted(files_to_process):
    stats = process_file(file_path, dry_run, verbose)
    all_stats.append(stats)
    if stats['changes'] > 0:
        total_changes += stats['changes']
        files_modified += 1
        if not verbose:
            print(f"  📝 {file_path.relative_to(repo_root)}: {stats['changes']} changes")

print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
print(f"Files scanned:  {len(files_to_process)}")
print(f"Files modified: {files_modified}")
print(f"Total changes:  {total_changes}")

if dry_run and total_changes > 0:
    print(f"\n💡 To apply these changes, run without --dry-run:")
    print(f"   python3 scripts/batch-update-component-counts.py")
elif total_changes > 0:
    print(f"\n✅ All changes applied successfully!")
    print(f"   Remember to commit the changes:")
    print(f"   git add docs/ && git commit -m 'refactor(docs): Replace hardcoded counts with generic language'")
else:
    print(f"\n✅ No hardcoded counts found - files already use generic language")

return 0 if total_changes >= 0 else 1

if name == 'main': sys.exit(main())

#!/usr/bin/env python3 """​

Patterns to replace - (pattern, replacement)

These replace specific count mentions with generic language

Files/directories to skip (historical, test data, or point-in-time snapshots)

File extensions to process

#!/usr/bin/env python3 """