scripts-git-staging-manager
#!/usr/bin/env python3 """ Comprehensive Git Staging Manager
Ensures ALL modified files are staged for commit, including:
- Checkpoint files and indexes
- MANIFEST.json files (dashboard dependencies)
- Dedup state (global_hashes.json, checkpoint_index.json, unique_messages.jsonl)
- Session files and exports
- README and documentation
- Any other modified tracked files
Provides detailed logging for every file staged (or skipped) with success/error tracking.
Author: AZ1.AI INC (Hal Casteel) Framework: CODITECT License: MIT """
import subprocess import logging from pathlib import Path from typing import List, Dict, Set, Tuple from dataclasses import dataclass, field
@dataclass class GitStagingResult: """Result of git staging operation.""" success: bool files_staged: List[str] = field(default_factory=list) files_skipped: List[str] = field(default_factory=list) errors: List[str] = field(default_factory=list) total_files: int = 0
class GitStagingManager: """ Manages comprehensive git staging with detailed logging.
Ensures no files are missed during commit preparation.
"""
# Critical files that MUST be staged if modified
CRITICAL_PATHS = [
# Checkpoint system
"MEMORY-CONTEXT/checkpoints/",
"MEMORY-CONTEXT/dedup_state/checkpoint_index.json",
# Dashboard indexes (CRITICAL - don't miss!)
"MEMORY-CONTEXT/messages/MANIFEST.json",
"MEMORY-CONTEXT/messages/by-checkpoint/MANIFEST.json",
# Dedup state
"MEMORY-CONTEXT/dedup_state/global_hashes.json",
"MEMORY-CONTEXT/dedup_state/unique_messages.jsonl",
# Session and message storage
"MEMORY-CONTEXT/messages/",
"MEMORY-CONTEXT/sessions/",
"MEMORY-CONTEXT/backups/",
# Documentation
"README.md",
"CLAUDE.md",
".coditect/",
]
def __init__(self, repo_root: Path, logger: logging.Logger):
"""
Initialize git staging manager.
Args:
repo_root: Repository root directory
logger: Logger instance
"""
self.repo_root = Path(repo_root)
self.logger = logger
def get_modified_files(self) -> Tuple[Set[str], Set[str], Set[str]]:
"""
Get all modified, untracked, and deleted files.
Returns:
Tuple of (modified_files, untracked_files, deleted_files)
"""
self.logger.debug("Detecting modified files with git status")
try:
# Use porcelain format for reliable parsing
result = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=10
)
if result.returncode != 0:
self.logger.error(f"Git status failed: {result.stderr}")
return set(), set(), set()
modified_files = set()
untracked_files = set()
deleted_files = set()
for line in result.stdout.splitlines():
if not line.strip():
continue
# Parse git status format: "XY filename"
status = line[:2]
filename = line[3:].strip()
# Remove quotes if present
if filename.startswith('"') and filename.endswith('"'):
filename = filename[1:-1]
# Classify by status
if status[0] == '?' or status[1] == '?':
untracked_files.add(filename)
elif status[0] == 'D' or status[1] == 'D':
deleted_files.add(filename)
else:
modified_files.add(filename)
self.logger.info(f"✓ Detected {len(modified_files)} modified, {len(untracked_files)} untracked, {len(deleted_files)} deleted files")
self.logger.debug(f" Modified: {', '.join(sorted(modified_files)[:5])}..." if modified_files else " Modified: none")
self.logger.debug(f" Untracked: {', '.join(sorted(untracked_files)[:5])}..." if untracked_files else " Untracked: none")
self.logger.debug(f" Deleted: {', '.join(sorted(deleted_files)[:5])}..." if deleted_files else " Deleted: none")
return modified_files, untracked_files, deleted_files
except subprocess.TimeoutExpired:
self.logger.error("Git status command timed out")
return set(), set(), set()
except Exception as e:
self.logger.error(f"Failed to detect modified files: {e}")
return set(), set(), set()
def verify_critical_files_staged(self, staged_files: Set[str]) -> List[str]:
"""
Verify all critical paths are staged if they were modified.
Args:
staged_files: Set of files that were staged
Returns:
List of critical files that are missing from staging
"""
self.logger.debug("Verifying critical files are staged")
missing_critical = []
for critical_path in self.CRITICAL_PATHS:
# Check if any staged file matches this critical path
matches = [f for f in staged_files if f.startswith(critical_path)]
if not matches:
# Check if this critical path was actually modified
try:
result = subprocess.run(
["git", "status", "--porcelain", critical_path],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=5
)
if result.stdout.strip():
# Critical file was modified but not staged!
missing_critical.append(critical_path)
self.logger.warning(f"⚠️ Critical file modified but not staged: {critical_path}")
except Exception as e:
self.logger.debug(f"Could not check {critical_path}: {e}")
if missing_critical:
self.logger.error(f"❌ {len(missing_critical)} critical files not staged:")
for path in missing_critical:
self.logger.error(f" - {path}")
else:
self.logger.info("✓ All critical files verified staged")
return missing_critical
def stage_all_changes(self, include_untracked: bool = True) -> GitStagingResult:
"""
Stage all changes comprehensively with detailed logging.
Args:
include_untracked: Whether to stage untracked files
Returns:
GitStagingResult with detailed success/failure information
"""
result = GitStagingResult(success=False)
self.logger.info("=" * 60)
self.logger.info("Starting comprehensive git staging")
self.logger.info("=" * 60)
# Step 1: Detect all modified files
modified, untracked, deleted = self.get_modified_files()
result.total_files = len(modified) + len(untracked) + len(deleted)
if result.total_files == 0:
self.logger.info("✓ No files to stage (repository is clean)")
result.success = True
return result
self.logger.info(f"\n📋 Files to stage: {result.total_files} total")
self.logger.info(f" • Modified/Added: {len(modified)}")
self.logger.info(f" • Untracked: {len(untracked)}")
self.logger.info(f" • Deleted: {len(deleted)}")
# Step 2: Stage modified and deleted files (tracked files)
if modified or deleted:
self.logger.info("\n" + "=" * 60)
self.logger.info("Staging modified and deleted files")
self.logger.info("=" * 60)
try:
# Use git add -u to stage all tracked file changes
self.logger.debug("Running: git add -u")
cmd_result = subprocess.run(
["git", "add", "-u"],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=30
)
if cmd_result.returncode == 0:
result.files_staged.extend(modified)
result.files_staged.extend(deleted)
self.logger.info(f"✓ Staged {len(modified) + len(deleted)} tracked file changes")
# Log each file for audit trail
for filename in sorted(modified):
self.logger.debug(f" ✓ Staged (modified): {filename}")
for filename in sorted(deleted):
self.logger.debug(f" ✓ Staged (deleted): {filename}")
else:
error_msg = f"git add -u failed: {cmd_result.stderr}"
self.logger.error(f"❌ {error_msg}")
result.errors.append(error_msg)
return result
except subprocess.TimeoutExpired:
error_msg = "git add -u timed out (>30s)"
self.logger.error(f"❌ {error_msg}")
result.errors.append(error_msg)
return result
except Exception as e:
error_msg = f"Failed to stage tracked files: {e}"
self.logger.error(f"❌ {error_msg}")
result.errors.append(error_msg)
return result
# Step 3: Stage untracked files (if requested)
if include_untracked and untracked:
self.logger.info("\n" + "=" * 60)
self.logger.info("Staging untracked files")
self.logger.info("=" * 60)
for filename in sorted(untracked):
try:
self.logger.debug(f"Staging: {filename}")
cmd_result = subprocess.run(
["git", "add", filename],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=5
)
if cmd_result.returncode == 0:
result.files_staged.append(filename)
self.logger.debug(f" ✓ Staged (untracked): {filename}")
else:
result.files_skipped.append(filename)
self.logger.warning(f" ⚠️ Skipped: {filename} ({cmd_result.stderr.strip()})")
except Exception as e:
result.files_skipped.append(filename)
self.logger.warning(f" ⚠️ Failed to stage {filename}: {e}")
self.logger.info(f"✓ Staged {len([f for f in result.files_staged if f in untracked])} untracked files")
# Step 4: Verify critical files
self.logger.info("\n" + "=" * 60)
self.logger.info("Verifying critical files are staged")
self.logger.info("=" * 60)
staged_set = set(result.files_staged)
missing_critical = self.verify_critical_files_staged(staged_set)
if missing_critical:
# Try to stage missing critical files explicitly
self.logger.warning(f"⚠️ Attempting to stage {len(missing_critical)} missing critical files")
for critical_path in missing_critical:
try:
cmd_result = subprocess.run(
["git", "add", critical_path],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=5
)
if cmd_result.returncode == 0:
result.files_staged.append(critical_path)
self.logger.info(f" ✓ Recovered and staged: {critical_path}")
else:
error_msg = f"Failed to stage critical file {critical_path}: {cmd_result.stderr}"
result.errors.append(error_msg)
self.logger.error(f" ❌ {error_msg}")
except Exception as e:
error_msg = f"Exception staging critical file {critical_path}: {e}"
result.errors.append(error_msg)
self.logger.error(f" ❌ {error_msg}")
# Step 5: Final verification with git status
self.logger.info("\n" + "=" * 60)
self.logger.info("Final staging verification")
self.logger.info("=" * 60)
try:
verify_result = subprocess.run(
["git", "diff", "--name-only", "--cached"],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=10
)
if verify_result.returncode == 0:
actually_staged = set(verify_result.stdout.splitlines())
self.logger.info(f"✓ Git reports {len(actually_staged)} files staged")
# Check for discrepancies
expected_staged = set(result.files_staged)
if actually_staged != expected_staged:
extra = actually_staged - expected_staged
missing = expected_staged - actually_staged
if extra:
self.logger.warning(f"⚠️ {len(extra)} extra files staged: {', '.join(list(extra)[:5])}")
if missing:
self.logger.warning(f"⚠️ {len(missing)} expected files not staged: {', '.join(list(missing)[:5])}")
else:
self.logger.warning(f"⚠️ Could not verify staging: {verify_result.stderr}")
except Exception as e:
self.logger.warning(f"⚠️ Verification failed: {e}")
# Determine overall success
result.success = (len(result.errors) == 0)
# Final summary
self.logger.info("\n" + "=" * 60)
self.logger.info("Git Staging Summary")
self.logger.info("=" * 60)
self.logger.info(f"Status: {'✅ SUCCESS' if result.success else '❌ FAILED'}")
self.logger.info(f"Files staged: {len(result.files_staged)}")
self.logger.info(f"Files skipped: {len(result.files_skipped)}")
self.logger.info(f"Errors: {len(result.errors)}")
if result.errors:
self.logger.error("\nErrors encountered:")
for error in result.errors:
self.logger.error(f" • {error}")
return result
if name == "main": """Test git staging manager""" import sys
# Setup basic logging for testing
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Get repo root (assume we're in scripts/core/)
repo_root = Path(__file__).resolve().parents[4]
logger.info(f"Testing in repository: {repo_root}")
# Create staging manager
manager = GitStagingManager(repo_root, logger)
# Test staging
result = manager.stage_all_changes(include_untracked=False)
# Exit with appropriate code
sys.exit(0 if result.success else 1)