Skip to main content

#!/usr/bin/env python3 """ Migrate Legacy Session Logs to Project-Scoped Directories (J.27.4.1)

Scans legacy flat session logs (SESSION-LOG-*.md) in the session-logs root and attributes them to projects by parsing content for task IDs and path hints.

Task ID → Project mapping: A.., B.., C.., etc. → PILOT (standard track IDs) FPA.. → CUST-avivatec-fpa H.0.* → PILOT (ad-hoc framework tasks)

Path hint detection: "/PROJECTS/coditect-rollout-master/" → PILOT "/PROJECTS/coditect-jv-avivatec/" → CUST-avivatec-fpa

Usage: # Preview migration python3 migrate-legacy-session-logs.py --dry-run

# Execute migration
python3 migrate-legacy-session-logs.py --migrate

# Verbose output
python3 migrate-legacy-session-logs.py --migrate --verbose

Author: CODITECT Team Version: 1.0.0 Created: 2026-02-08 ADR: ADR-155 (Project-Scoped Session Logs) Task: J.27.4.1 """

import argparse import json import os import re import shutil import sys from collections import Counter from pathlib import Path from typing import Dict, List, Optional, Tuple

Project mapping rules

TASK_ID_PROJECT_MAP = { # Standard CODITECT tracks (A-N, O-AK) → PILOT "default": "PILOT", # Customer-specific prefixes "FPA": "CUST-avivatec-fpa", }

Path fragments → project mapping

PATH_HINT_MAP = { "coditect-rollout-master": "PILOT", "coditect-jv-avivatec": "CUST-avivatec-fpa", }

Standard track letters (all map to PILOT)

STANDARD_TRACK_LETTERS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

Task ID pattern: letter(s).number.number[.number]

TASK_ID_PATTERN = re.compile( r'\b([A-Z]{1,3}).(\d+).(\d+)(?:.(\d+))?\b' )

def get_session_logs_dir() -> Path: """Get session logs base directory.""" user_data = Path.home() / "PROJECTS" / ".coditect-data" if user_data.exists(): return user_data / "session-logs" return Path.home() / ".coditect" / "session-logs"

def get_machine_uuid() -> str: """Get machine UUID from machine-id.json.""" for candidate in [ Path.home() / "PROJECTS" / ".coditect-data" / "machine-id.json", Path.home() / ".coditect" / "machine-id.json", ]: if candidate.exists(): try: with open(candidate) as f: return json.load(f).get("machine_uuid", "unknown-machine") except (json.JSONDecodeError, IOError): pass return "unknown-machine"

def find_legacy_logs(logs_dir: Path) -> List[Path]: """ Find legacy flat session logs (not symlinks, not in projects/).

Returns:
List of Path objects for real (non-symlink) legacy log files
"""
results = []
for f in sorted(logs_dir.glob("SESSION-LOG-*.md")):
# Skip symlinks (J.27.1 already created symlinks for PILOT logs)
if f.is_symlink():
continue
# Skip if in a subdirectory
if f.parent != logs_dir:
continue
results.append(f)
return results

def parse_task_ids(content: str) -> Counter: """ Extract task ID prefixes from log content and count occurrences.

Returns:
Counter of prefix → count (e.g., {"A": 15, "FPA": 3})
"""
prefixes = Counter()
for match in TASK_ID_PATTERN.finditer(content):
prefix = match.group(1)
prefixes[prefix] += 1
return prefixes

def parse_path_hints(content: str) -> Counter: """ Detect project path fragments in log content.

Returns:
Counter of project_id → count
"""
projects = Counter()
for fragment, project_id in PATH_HINT_MAP.items():
count = content.count(fragment)
if count > 0:
projects[project_id] += count
return projects

def attribute_log(file_path: Path, verbose: bool = False) -> Optional[str]: """ Determine which project a legacy log belongs to.

Uses task IDs and path hints to attribute the log.

Args:
file_path: Path to the log file
verbose: Print attribution reasoning

Returns:
Project ID or None if cannot determine
"""
try:
content = file_path.read_text(encoding='utf-8', errors='replace')
except IOError:
return None

# Parse task IDs
task_prefixes = parse_task_ids(content)

# Parse path hints
path_projects = parse_path_hints(content)

# Score projects
project_scores: Counter = Counter()

# Task ID scoring
for prefix, count in task_prefixes.items():
if prefix in TASK_ID_PROJECT_MAP:
project_scores[TASK_ID_PROJECT_MAP[prefix]] += count * 2
elif len(prefix) == 1 and prefix in STANDARD_TRACK_LETTERS:
project_scores["PILOT"] += count * 2
else:
# Unknown multi-letter prefix, still count toward default
project_scores[TASK_ID_PROJECT_MAP["default"]] += count

# Path hint scoring (weighted higher - explicit project references)
for project_id, count in path_projects.items():
project_scores[project_id] += count * 3

if verbose and (task_prefixes or path_projects):
print(f" Task IDs: {dict(task_prefixes)}")
print(f" Path hints: {dict(path_projects)}")
print(f" Scores: {dict(project_scores)}")

if not project_scores:
return None

# Return highest-scoring project
winner = project_scores.most_common(1)[0]
return winner[0]

def migrate_logs( dry_run: bool = True, verbose: bool = False, default_project: str = "PILOT", ) -> Tuple[int, int, int]: """ Migrate legacy flat logs to project-scoped directories.

Args:
dry_run: Preview without moving
verbose: Detailed output
default_project: Fallback project for unattributable logs

Returns:
(migrated, skipped, errors) counts
"""
logs_dir = get_session_logs_dir()
machine_uuid = get_machine_uuid()

print("=" * 60)
print("Legacy Session Log Migration (J.27.4.1)")
print("=" * 60)
print(f"Source: {logs_dir}/SESSION-LOG-*.md (flat)")
print(f"Target: {logs_dir}/projects/{{project}}/{machine_uuid}/")
print(f"Machine: {machine_uuid}")
if dry_run:
print("[DRY-RUN MODE]")
print()

legacy_logs = find_legacy_logs(logs_dir)
print(f"Found {len(legacy_logs)} legacy flat log files (non-symlink)")

if not legacy_logs:
print("No legacy logs to migrate. All logs are already project-scoped or symlinked.")
return 0, 0, 0

migrated = 0
skipped = 0
errors = 0

for log_file in legacy_logs:
print(f"\n {log_file.name}:")

# Attribute to project
project_id = attribute_log(log_file, verbose=verbose)
if not project_id:
project_id = default_project
print(f" Attribution: {project_id} (default - no task IDs or paths found)")
else:
print(f" Attribution: {project_id}")

# Determine destination
dest_dir = logs_dir / "projects" / project_id / machine_uuid
dest_file = dest_dir / log_file.name

if dest_file.exists():
print(f" SKIP: Destination already exists")
skipped += 1
continue

if dry_run:
print(f" [DRY-RUN] Would move to: projects/{project_id}/{machine_uuid}/{log_file.name}")
migrated += 1
else:
try:
dest_dir.mkdir(parents=True, exist_ok=True)
shutil.move(str(log_file), str(dest_file))
print(f" Moved to: projects/{project_id}/{machine_uuid}/{log_file.name}")
migrated += 1
except (IOError, OSError) as e:
print(f" ERROR: {e}")
errors += 1

print(f"\n{'[DRY-RUN] ' if dry_run else ''}Summary:")
print(f" Migrated: {migrated}")
print(f" Skipped: {skipped} (already exist)")
print(f" Errors: {errors}")

return migrated, skipped, errors

def main(): parser = argparse.ArgumentParser( description="Migrate legacy session logs to project-scoped directories (J.27.4.1)", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Preview migration python3 migrate-legacy-session-logs.py --dry-run

# Execute migration
python3 migrate-legacy-session-logs.py --migrate

# Verbose with custom default project
python3 migrate-legacy-session-logs.py --migrate --verbose --default-project PILOT

""" ) parser.add_argument("--dry-run", action="store_true", help="Preview migration without moving files (default)") parser.add_argument("--migrate", action="store_true", help="Execute the migration") parser.add_argument("--verbose", "-v", action="store_true", help="Detailed attribution output") parser.add_argument("--default-project", type=str, default="PILOT", help="Default project for unattributable logs (default: PILOT)")

args = parser.parse_args()

# Default to dry-run if neither specified
if not args.migrate:
args.dry_run = True

migrated, skipped, errors = migrate_logs(
dry_run=args.dry_run,
verbose=args.verbose,
default_project=args.default_project,
)

sys.exit(1 if errors > 0 else 0)

if name == "main": main()