#!/usr/bin/env python3 """ Migrate Legacy Session Logs to Project-Scoped Directories (J.27.4.1)
Scans legacy flat session logs (SESSION-LOG-*.md) in the session-logs root and attributes them to projects by parsing content for task IDs and path hints.
Task ID → Project mapping: A.., B.., C.., etc. → PILOT (standard track IDs) FPA.. → CUST-avivatec-fpa H.0.* → PILOT (ad-hoc framework tasks)
Path hint detection: "/PROJECTS/coditect-rollout-master/" → PILOT "/PROJECTS/coditect-jv-avivatec/" → CUST-avivatec-fpa
Usage: # Preview migration python3 migrate-legacy-session-logs.py --dry-run
# Execute migration
python3 migrate-legacy-session-logs.py --migrate
# Verbose output
python3 migrate-legacy-session-logs.py --migrate --verbose
Author: CODITECT Team Version: 1.0.0 Created: 2026-02-08 ADR: ADR-155 (Project-Scoped Session Logs) Task: J.27.4.1 """
import argparse import json import os import re import shutil import sys from collections import Counter from pathlib import Path from typing import Dict, List, Optional, Tuple
Project mapping rules
TASK_ID_PROJECT_MAP = { # Standard CODITECT tracks (A-N, O-AK) → PILOT "default": "PILOT", # Customer-specific prefixes "FPA": "CUST-avivatec-fpa", }
Path fragments → project mapping
PATH_HINT_MAP = { "coditect-rollout-master": "PILOT", "coditect-jv-avivatec": "CUST-avivatec-fpa", }
Standard track letters (all map to PILOT)
STANDARD_TRACK_LETTERS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
Task ID pattern: letter(s).number.number[.number]
TASK_ID_PATTERN = re.compile( r'\b([A-Z]{1,3}).(\d+).(\d+)(?:.(\d+))?\b' )
def get_session_logs_dir() -> Path: """Get session logs base directory.""" user_data = Path.home() / "PROJECTS" / ".coditect-data" if user_data.exists(): return user_data / "session-logs" return Path.home() / ".coditect" / "session-logs"
def get_machine_uuid() -> str: """Get machine UUID from machine-id.json.""" for candidate in [ Path.home() / "PROJECTS" / ".coditect-data" / "machine-id.json", Path.home() / ".coditect" / "machine-id.json", ]: if candidate.exists(): try: with open(candidate) as f: return json.load(f).get("machine_uuid", "unknown-machine") except (json.JSONDecodeError, IOError): pass return "unknown-machine"
def find_legacy_logs(logs_dir: Path) -> List[Path]: """ Find legacy flat session logs (not symlinks, not in projects/).
Returns:
List of Path objects for real (non-symlink) legacy log files
"""
results = []
for f in sorted(logs_dir.glob("SESSION-LOG-*.md")):
# Skip symlinks (J.27.1 already created symlinks for PILOT logs)
if f.is_symlink():
continue
# Skip if in a subdirectory
if f.parent != logs_dir:
continue
results.append(f)
return results
def parse_task_ids(content: str) -> Counter: """ Extract task ID prefixes from log content and count occurrences.
Returns:
Counter of prefix → count (e.g., {"A": 15, "FPA": 3})
"""
prefixes = Counter()
for match in TASK_ID_PATTERN.finditer(content):
prefix = match.group(1)
prefixes[prefix] += 1
return prefixes
def parse_path_hints(content: str) -> Counter: """ Detect project path fragments in log content.
Returns:
Counter of project_id → count
"""
projects = Counter()
for fragment, project_id in PATH_HINT_MAP.items():
count = content.count(fragment)
if count > 0:
projects[project_id] += count
return projects
def attribute_log(file_path: Path, verbose: bool = False) -> Optional[str]: """ Determine which project a legacy log belongs to.
Uses task IDs and path hints to attribute the log.
Args:
file_path: Path to the log file
verbose: Print attribution reasoning
Returns:
Project ID or None if cannot determine
"""
try:
content = file_path.read_text(encoding='utf-8', errors='replace')
except IOError:
return None
# Parse task IDs
task_prefixes = parse_task_ids(content)
# Parse path hints
path_projects = parse_path_hints(content)
# Score projects
project_scores: Counter = Counter()
# Task ID scoring
for prefix, count in task_prefixes.items():
if prefix in TASK_ID_PROJECT_MAP:
project_scores[TASK_ID_PROJECT_MAP[prefix]] += count * 2
elif len(prefix) == 1 and prefix in STANDARD_TRACK_LETTERS:
project_scores["PILOT"] += count * 2
else:
# Unknown multi-letter prefix, still count toward default
project_scores[TASK_ID_PROJECT_MAP["default"]] += count
# Path hint scoring (weighted higher - explicit project references)
for project_id, count in path_projects.items():
project_scores[project_id] += count * 3
if verbose and (task_prefixes or path_projects):
print(f" Task IDs: {dict(task_prefixes)}")
print(f" Path hints: {dict(path_projects)}")
print(f" Scores: {dict(project_scores)}")
if not project_scores:
return None
# Return highest-scoring project
winner = project_scores.most_common(1)[0]
return winner[0]
def migrate_logs( dry_run: bool = True, verbose: bool = False, default_project: str = "PILOT", ) -> Tuple[int, int, int]: """ Migrate legacy flat logs to project-scoped directories.
Args:
dry_run: Preview without moving
verbose: Detailed output
default_project: Fallback project for unattributable logs
Returns:
(migrated, skipped, errors) counts
"""
logs_dir = get_session_logs_dir()
machine_uuid = get_machine_uuid()
print("=" * 60)
print("Legacy Session Log Migration (J.27.4.1)")
print("=" * 60)
print(f"Source: {logs_dir}/SESSION-LOG-*.md (flat)")
print(f"Target: {logs_dir}/projects/{{project}}/{machine_uuid}/")
print(f"Machine: {machine_uuid}")
if dry_run:
print("[DRY-RUN MODE]")
print()
legacy_logs = find_legacy_logs(logs_dir)
print(f"Found {len(legacy_logs)} legacy flat log files (non-symlink)")
if not legacy_logs:
print("No legacy logs to migrate. All logs are already project-scoped or symlinked.")
return 0, 0, 0
migrated = 0
skipped = 0
errors = 0
for log_file in legacy_logs:
print(f"\n {log_file.name}:")
# Attribute to project
project_id = attribute_log(log_file, verbose=verbose)
if not project_id:
project_id = default_project
print(f" Attribution: {project_id} (default - no task IDs or paths found)")
else:
print(f" Attribution: {project_id}")
# Determine destination
dest_dir = logs_dir / "projects" / project_id / machine_uuid
dest_file = dest_dir / log_file.name
if dest_file.exists():
print(f" SKIP: Destination already exists")
skipped += 1
continue
if dry_run:
print(f" [DRY-RUN] Would move to: projects/{project_id}/{machine_uuid}/{log_file.name}")
migrated += 1
else:
try:
dest_dir.mkdir(parents=True, exist_ok=True)
shutil.move(str(log_file), str(dest_file))
print(f" Moved to: projects/{project_id}/{machine_uuid}/{log_file.name}")
migrated += 1
except (IOError, OSError) as e:
print(f" ERROR: {e}")
errors += 1
print(f"\n{'[DRY-RUN] ' if dry_run else ''}Summary:")
print(f" Migrated: {migrated}")
print(f" Skipped: {skipped} (already exist)")
print(f" Errors: {errors}")
return migrated, skipped, errors
def main(): parser = argparse.ArgumentParser( description="Migrate legacy session logs to project-scoped directories (J.27.4.1)", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Preview migration python3 migrate-legacy-session-logs.py --dry-run
# Execute migration
python3 migrate-legacy-session-logs.py --migrate
# Verbose with custom default project
python3 migrate-legacy-session-logs.py --migrate --verbose --default-project PILOT
""" ) parser.add_argument("--dry-run", action="store_true", help="Preview migration without moving files (default)") parser.add_argument("--migrate", action="store_true", help="Execute the migration") parser.add_argument("--verbose", "-v", action="store_true", help="Detailed attribution output") parser.add_argument("--default-project", type=str, default="PILOT", help="Default project for unattributable logs (default: PILOT)")
args = parser.parse_args()
# Default to dry-run if neither specified
if not args.migrate:
args.dry_run = True
migrated, skipped, errors = migrate_logs(
dry_run=args.dry_run,
verbose=args.verbose,
default_project=args.default_project,
)
sys.exit(1 if errors > 0 else 0)
if name == "main": main()