Skip to main content

scripts-git-staging-manager

#!/usr/bin/env python3 """ Comprehensive Git Staging Manager

Ensures ALL modified files are staged for commit, including:

  • Checkpoint files and indexes
  • MANIFEST.json files (dashboard dependencies)
  • Dedup state (global_hashes.json, checkpoint_index.json, unique_messages.jsonl)
  • Session files and exports
  • README and documentation
  • Any other modified tracked files

Provides detailed logging for every file staged (or skipped) with success/error tracking.

Author: AZ1.AI INC (Hal Casteel) Framework: CODITECT License: MIT """

import subprocess import logging from pathlib import Path from typing import List, Dict, Set, Tuple from dataclasses import dataclass, field

@dataclass class GitStagingResult: """Result of git staging operation.""" success: bool files_staged: List[str] = field(default_factory=list) files_skipped: List[str] = field(default_factory=list) errors: List[str] = field(default_factory=list) total_files: int = 0

class GitStagingManager: """ Manages comprehensive git staging with detailed logging.

Ensures no files are missed during commit preparation.
"""

# Critical files that MUST be staged if modified
CRITICAL_PATHS = [
# Checkpoint system
"MEMORY-CONTEXT/checkpoints/",
"MEMORY-CONTEXT/dedup_state/checkpoint_index.json",

# Dashboard indexes (CRITICAL - don't miss!)
"MEMORY-CONTEXT/messages/MANIFEST.json",
"MEMORY-CONTEXT/messages/by-checkpoint/MANIFEST.json",

# Dedup state
"MEMORY-CONTEXT/dedup_state/global_hashes.json",
"MEMORY-CONTEXT/dedup_state/unique_messages.jsonl",

# Session and message storage
"MEMORY-CONTEXT/messages/",
"MEMORY-CONTEXT/sessions/",
"MEMORY-CONTEXT/backups/",

# Documentation
"README.md",
"CLAUDE.md",
".coditect/",
]

def __init__(self, repo_root: Path, logger: logging.Logger):
"""
Initialize git staging manager.

Args:
repo_root: Repository root directory
logger: Logger instance
"""
self.repo_root = Path(repo_root)
self.logger = logger

def get_modified_files(self) -> Tuple[Set[str], Set[str], Set[str]]:
"""
Get all modified, untracked, and deleted files.

Returns:
Tuple of (modified_files, untracked_files, deleted_files)
"""
self.logger.debug("Detecting modified files with git status")

try:
# Use porcelain format for reliable parsing
result = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=10
)

if result.returncode != 0:
self.logger.error(f"Git status failed: {result.stderr}")
return set(), set(), set()

modified_files = set()
untracked_files = set()
deleted_files = set()

for line in result.stdout.splitlines():
if not line.strip():
continue

# Parse git status format: "XY filename"
status = line[:2]
filename = line[3:].strip()

# Remove quotes if present
if filename.startswith('"') and filename.endswith('"'):
filename = filename[1:-1]

# Classify by status
if status[0] == '?' or status[1] == '?':
untracked_files.add(filename)
elif status[0] == 'D' or status[1] == 'D':
deleted_files.add(filename)
else:
modified_files.add(filename)

self.logger.info(f"✓ Detected {len(modified_files)} modified, {len(untracked_files)} untracked, {len(deleted_files)} deleted files")
self.logger.debug(f" Modified: {', '.join(sorted(modified_files)[:5])}..." if modified_files else " Modified: none")
self.logger.debug(f" Untracked: {', '.join(sorted(untracked_files)[:5])}..." if untracked_files else " Untracked: none")
self.logger.debug(f" Deleted: {', '.join(sorted(deleted_files)[:5])}..." if deleted_files else " Deleted: none")

return modified_files, untracked_files, deleted_files

except subprocess.TimeoutExpired:
self.logger.error("Git status command timed out")
return set(), set(), set()
except Exception as e:
self.logger.error(f"Failed to detect modified files: {e}")
return set(), set(), set()

def verify_critical_files_staged(self, staged_files: Set[str]) -> List[str]:
"""
Verify all critical paths are staged if they were modified.

Args:
staged_files: Set of files that were staged

Returns:
List of critical files that are missing from staging
"""
self.logger.debug("Verifying critical files are staged")

missing_critical = []

for critical_path in self.CRITICAL_PATHS:
# Check if any staged file matches this critical path
matches = [f for f in staged_files if f.startswith(critical_path)]

if not matches:
# Check if this critical path was actually modified
try:
result = subprocess.run(
["git", "status", "--porcelain", critical_path],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=5
)

if result.stdout.strip():
# Critical file was modified but not staged!
missing_critical.append(critical_path)
self.logger.warning(f"⚠️ Critical file modified but not staged: {critical_path}")
except Exception as e:
self.logger.debug(f"Could not check {critical_path}: {e}")

if missing_critical:
self.logger.error(f"❌ {len(missing_critical)} critical files not staged:")
for path in missing_critical:
self.logger.error(f" - {path}")
else:
self.logger.info("✓ All critical files verified staged")

return missing_critical

def stage_all_changes(self, include_untracked: bool = True) -> GitStagingResult:
"""
Stage all changes comprehensively with detailed logging.

Args:
include_untracked: Whether to stage untracked files

Returns:
GitStagingResult with detailed success/failure information
"""
result = GitStagingResult(success=False)

self.logger.info("=" * 60)
self.logger.info("Starting comprehensive git staging")
self.logger.info("=" * 60)

# Step 1: Detect all modified files
modified, untracked, deleted = self.get_modified_files()
result.total_files = len(modified) + len(untracked) + len(deleted)

if result.total_files == 0:
self.logger.info("✓ No files to stage (repository is clean)")
result.success = True
return result

self.logger.info(f"\n📋 Files to stage: {result.total_files} total")
self.logger.info(f" • Modified/Added: {len(modified)}")
self.logger.info(f" • Untracked: {len(untracked)}")
self.logger.info(f" • Deleted: {len(deleted)}")

# Step 2: Stage modified and deleted files (tracked files)
if modified or deleted:
self.logger.info("\n" + "=" * 60)
self.logger.info("Staging modified and deleted files")
self.logger.info("=" * 60)

try:
# Use git add -u to stage all tracked file changes
self.logger.debug("Running: git add -u")
cmd_result = subprocess.run(
["git", "add", "-u"],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=30
)

if cmd_result.returncode == 0:
result.files_staged.extend(modified)
result.files_staged.extend(deleted)
self.logger.info(f"✓ Staged {len(modified) + len(deleted)} tracked file changes")

# Log each file for audit trail
for filename in sorted(modified):
self.logger.debug(f" ✓ Staged (modified): {filename}")
for filename in sorted(deleted):
self.logger.debug(f" ✓ Staged (deleted): {filename}")
else:
error_msg = f"git add -u failed: {cmd_result.stderr}"
self.logger.error(f"❌ {error_msg}")
result.errors.append(error_msg)
return result

except subprocess.TimeoutExpired:
error_msg = "git add -u timed out (>30s)"
self.logger.error(f"❌ {error_msg}")
result.errors.append(error_msg)
return result
except Exception as e:
error_msg = f"Failed to stage tracked files: {e}"
self.logger.error(f"❌ {error_msg}")
result.errors.append(error_msg)
return result

# Step 3: Stage untracked files (if requested)
if include_untracked and untracked:
self.logger.info("\n" + "=" * 60)
self.logger.info("Staging untracked files")
self.logger.info("=" * 60)

for filename in sorted(untracked):
try:
self.logger.debug(f"Staging: {filename}")
cmd_result = subprocess.run(
["git", "add", filename],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=5
)

if cmd_result.returncode == 0:
result.files_staged.append(filename)
self.logger.debug(f" ✓ Staged (untracked): {filename}")
else:
result.files_skipped.append(filename)
self.logger.warning(f" ⚠️ Skipped: {filename} ({cmd_result.stderr.strip()})")

except Exception as e:
result.files_skipped.append(filename)
self.logger.warning(f" ⚠️ Failed to stage {filename}: {e}")

self.logger.info(f"✓ Staged {len([f for f in result.files_staged if f in untracked])} untracked files")

# Step 4: Verify critical files
self.logger.info("\n" + "=" * 60)
self.logger.info("Verifying critical files are staged")
self.logger.info("=" * 60)

staged_set = set(result.files_staged)
missing_critical = self.verify_critical_files_staged(staged_set)

if missing_critical:
# Try to stage missing critical files explicitly
self.logger.warning(f"⚠️ Attempting to stage {len(missing_critical)} missing critical files")

for critical_path in missing_critical:
try:
cmd_result = subprocess.run(
["git", "add", critical_path],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=5
)

if cmd_result.returncode == 0:
result.files_staged.append(critical_path)
self.logger.info(f" ✓ Recovered and staged: {critical_path}")
else:
error_msg = f"Failed to stage critical file {critical_path}: {cmd_result.stderr}"
result.errors.append(error_msg)
self.logger.error(f" ❌ {error_msg}")
except Exception as e:
error_msg = f"Exception staging critical file {critical_path}: {e}"
result.errors.append(error_msg)
self.logger.error(f" ❌ {error_msg}")

# Step 5: Final verification with git status
self.logger.info("\n" + "=" * 60)
self.logger.info("Final staging verification")
self.logger.info("=" * 60)

try:
verify_result = subprocess.run(
["git", "diff", "--name-only", "--cached"],
capture_output=True,
text=True,
cwd=self.repo_root,
timeout=10
)

if verify_result.returncode == 0:
actually_staged = set(verify_result.stdout.splitlines())
self.logger.info(f"✓ Git reports {len(actually_staged)} files staged")

# Check for discrepancies
expected_staged = set(result.files_staged)
if actually_staged != expected_staged:
extra = actually_staged - expected_staged
missing = expected_staged - actually_staged

if extra:
self.logger.warning(f"⚠️ {len(extra)} extra files staged: {', '.join(list(extra)[:5])}")
if missing:
self.logger.warning(f"⚠️ {len(missing)} expected files not staged: {', '.join(list(missing)[:5])}")

else:
self.logger.warning(f"⚠️ Could not verify staging: {verify_result.stderr}")

except Exception as e:
self.logger.warning(f"⚠️ Verification failed: {e}")

# Determine overall success
result.success = (len(result.errors) == 0)

# Final summary
self.logger.info("\n" + "=" * 60)
self.logger.info("Git Staging Summary")
self.logger.info("=" * 60)
self.logger.info(f"Status: {'✅ SUCCESS' if result.success else '❌ FAILED'}")
self.logger.info(f"Files staged: {len(result.files_staged)}")
self.logger.info(f"Files skipped: {len(result.files_skipped)}")
self.logger.info(f"Errors: {len(result.errors)}")

if result.errors:
self.logger.error("\nErrors encountered:")
for error in result.errors:
self.logger.error(f" • {error}")

return result

if name == "main": """Test git staging manager""" import sys

# Setup basic logging for testing
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Get repo root (assume we're in scripts/core/)
repo_root = Path(__file__).resolve().parents[4]
logger.info(f"Testing in repository: {repo_root}")

# Create staging manager
manager = GitStagingManager(repo_root, logger)

# Test staging
result = manager.stage_all_changes(include_untracked=False)

# Exit with appropriate code
sys.exit(0 if result.success else 1)