#!/usr/bin/env python3 """ CODITECT Path Discovery Module (ADR-114, ADR-118)

Provides consistent path discovery for all CODITECT scripts. Handles configurable PROJECTS location for customer flexibility.

DEPRECATION NOTICE (ADR-118): The legacy context.db is DEPRECATED and will be removed in v3.0.0. Use the four-tier database architecture instead: - get_org_db_path() → org.db (TIER 2: decisions, skill_learnings - CRITICAL) - get_sessions_db_path() → sessions.db (TIER 3: messages, tool_analytics - regenerable) - get_projects_db_path() → projects.db (TIER 4: project metadata - regenerable)

get_context_db_path() now emits DeprecationWarning when called.

Usage: from scripts.core.paths import ( # Four-tier database paths (ADR-118) - USE THESE get_org_db_path, # TIER 2: irreplaceable knowledge get_sessions_db_path, # TIER 3: regenerable session data get_projects_db_path, # TIER 4: project-specific data get_messaging_db_path, # ADR-160: inter-session coordination

    # Directory discovery (ADR-114)
    discover_projects_dir,
    get_user_data_dir,
    get_framework_dir,

    # Documentation paths (ADR-213)
    get_docs_dir,              # coditect-documentation root
    get_docs_core_dir,         # coditect-core docs within coditect-documentation
    get_tracks_dir,            # TRACK files directory
    get_adrs_dir,              # ADR directory
    get_analysis_dir,          # Analysis documents directory

    # Module-level constants
    PROJECTS_DIR,
    USER_DATA_LOC,
    FRAMEWORK_LOC,
    DOCS_DIR,
    TRACKS_DIR,
    ADRS_DIR,
    ANALYSIS_DIR,
)

Created: 2026-01-25 Updated: 2026-02-19 ADR: ADR-114 (User Data Separation), ADR-118 (Four-Tier Database Architecture), ADR-160 (Inter-Session Messaging), ADR-213 (Documentation Externalization) """

import json import os import subprocess import sys import warnings from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Tuple

Home directory

HOME = Path.home()

-----------------------------------------------------------------------------

Framework Location (Platform-Specific, Protected)

-----------------------------------------------------------------------------

def get_framework_dir() -> Path: """ Get the protected framework installation location.

Platform-specific:
- macOS: ~/Library/Application Support/CODITECT/core/
- Linux: ~/.local/share/coditect/core/
- Windows: %LOCALAPPDATA%\\CODITECT\\core\\
"""
if sys.platform == "darwin":
    return HOME / "Library" / "Application Support" / "CODITECT" / "core"
elif sys.platform == "win32":
    local_app_data = os.environ.get("LOCALAPPDATA", str(HOME / "AppData" / "Local"))
    return Path(local_app_data) / "CODITECT" / "core"
else:
    # Linux and other Unix-like systems
    xdg_data = os.environ.get("XDG_DATA_HOME", str(HOME / ".local" / "share"))
    return Path(xdg_data) / "coditect" / "core"

-----------------------------------------------------------------------------

PROJECTS Directory Discovery (ADR-114)

-----------------------------------------------------------------------------

def discover_projects_dir() -> Path: """ Discover the PROJECTS directory location.

CODITECT customers can choose their own PROJECTS directory.
Discovery priority:

1. Environment variable (highest priority): $CODITECT_PROJECTS
2. Config file: ~/.coditect/config/config.json → projects_dir
3. Symlink discovery: Find parent directory of existing .coditect symlink
4. Default fallback: ~/PROJECTS

Returns:
    Path: The discovered PROJECTS directory
"""
# 1. Environment variable (highest priority)
if env_projects := os.environ.get("CODITECT_PROJECTS"):
    path = Path(env_projects).expanduser()
    if path.exists():
        return path
    # If env var set but doesn't exist, still use it (may be created later)
    return path

# 2. Config file
config_path = HOME / ".coditect" / "config" / "config.json"
if config_path.exists():
    try:
        with open(config_path) as f:
            config = json.load(f)
            if projects_dir := config.get("projects_dir"):
                return Path(projects_dir).expanduser()
    except (json.JSONDecodeError, IOError):
        pass  # Fall through to next method

# 3. Symlink discovery - find where .coditect symlink lives
# Check common directory names customers might use
candidates = [
    HOME / "PROJECTS",
    HOME / "Projects",
    HOME / "projects",
    HOME / "Dev",
    HOME / "dev",
    HOME / "Development",
    HOME / "development",
    HOME / "Code",
    HOME / "code",
    HOME / "src",
    HOME / "workspace",
    HOME / "Workspace",
]

for candidate in candidates:
    if candidate.exists() and (candidate / ".coditect").is_symlink():
        return candidate

# 4. Default fallback
return HOME / "PROJECTS"

def get_user_data_dir() -> Path: """ Get the user data directory location.

User data is stored in $CODITECT_PROJECTS/.coditect-data/
This is separate from the framework (core/) to:
- Allow atomic framework updates without affecting user data
- Enable dedicated GCS backups for user data
- Prevent accidental git commits of user data

Returns:
    Path: The user data directory (may not exist yet)
"""
return discover_projects_dir() / ".coditect-data"

def get_context_storage_dir() -> Path: """ Get the context storage directory.

Context storage contains customer data (ADR-118 Four-Tier):
- org.db (TIER 2: skill_learnings, decisions, error_solutions) - CRITICAL
- sessions.db (TIER 3: messages, tool_analytics, token_economics) - Regenerable
- context.db (LEGACY: being migrated to org.db + sessions.db)
- platform.db (TIER 1: component index) - Regenerable
- unified_messages.jsonl (source of truth for sessions.db)

Returns:
    Path: The context storage directory
"""
return get_user_data_dir() / "context-storage"

-----------------------------------------------------------------------------

ADR-118: Four-Tier Database Paths

-----------------------------------------------------------------------------

def get_org_db_path() -> Path: """ Get the organization database path (ADR-118 TIER 2).

org.db contains IRREPLACEABLE accumulated knowledge:
- skill_learnings (758K+ rows)
- decisions (1.8K+ rows)
- error_solutions (475+ rows)

BACKUP: CRITICAL - Daily GFS retention

Returns:
    Path: The org.db file path
"""
return get_context_storage_dir() / "org.db"

def get_sessions_db_path() -> Path: """ Get the sessions database path (ADR-118 TIER 3).

sessions.db contains REGENERABLE session data:
- messages (251K+ rows)
- tool_analytics (8.2M+ rows)
- token_economics (17.8M+ rows)
- code_patterns, session_insights, embeddings

BACKUP: Optional (regenerable from unified_messages.jsonl via /cx)

Returns:
    Path: The sessions.db file path
"""
return get_context_storage_dir() / "sessions.db"

def get_context_db_path() -> Path: """ Get the legacy context database path.

.. deprecated:: 2.0.0
    context.db is deprecated and will be removed in v3.0.0.
    Use :func:`get_org_db_path` for irreplaceable data (decisions, skill_learnings)
    or :func:`get_sessions_db_path` for regenerable data (messages, tool_analytics).
    See ADR-118 for migration details.

DEPRECATED: context.db is being split into:
- org.db (TIER 2: irreplaceable)
- sessions.db (TIER 3: regenerable)

See ADR-118 for migration details.

Returns:
    Path: The context.db file path (legacy)
"""
warnings.warn(
    "get_context_db_path() is deprecated. "
    "Use get_org_db_path() for decisions/skill_learnings (TIER 2) "
    "or get_sessions_db_path() for messages/tool_analytics (TIER 3). "
    "See ADR-118 for migration details.",
    DeprecationWarning,
    stacklevel=2,
)
return get_context_storage_dir() / "context.db"

def get_projects_db_path() -> Path: """ Get the projects database path (ADR-118 TIER 4).

projects.db contains REGENERABLE project data:
- projects (registered project metadata)
- content_hashes (file change detection)
- project_embeddings (semantic search vectors)
- exclude_patterns (per-project ignore rules)

BACKUP: Optional (regenerable via /cx --index-project)

Returns:
    Path: The projects.db file path
"""
return get_context_storage_dir() / "projects.db"

def get_messaging_db_path() -> Path: """ Get the inter-session messaging database path (ADR-160).

messaging.db is a dedicated, lightweight (<1 MB) database for
inter-session coordination. It is intentionally separate from
sessions.db (18+ GB) to avoid SQLITE_BUSY contention.

Contains:
- session_registry: Active LLM session tracking
- inter_session_messages: Pub/sub message channels
- file_locks: Advisory file lock tracking

BACKUP: Not needed (ephemeral coordination data, auto-expires via TTL)

Returns:
    Path: The messaging.db file path
"""
return get_context_storage_dir() / "messaging.db"

def get_session_logs_dir() -> Path: """ Get the session logs directory.

Note: Session logs may be symlinked to a git repo for /sync-logs workflow.

Returns:
    Path: The session logs directory (or symlink)
"""
return get_user_data_dir() / "session-logs"

def get_session_log_sync_dir(project_id: Optional[str] = None) -> Path: """ Get the SSOT session log directory for sync operations (ADR-155).

Returns the project-scoped path under projects/{project_id}/{machine_uuid}/.
Falls back to the base session-logs dir if no project can be determined.

Args:
    project_id: Optional project ID (auto-detected if not provided)

Returns:
    Path: The canonical session log sync directory
"""
base = get_session_logs_dir()
machine_uuid = get_machine_uuid()
if not machine_uuid:
    machine_uuid = "unknown-machine"

if project_id is None:
    project_id = discover_project()

if project_id:
    return base / "projects" / project_id / machine_uuid
else:
    return base / machine_uuid

def get_rollout_master_dir() -> Path: """ Get the coditect-rollout-master directory.

Discovery:
1. Walk up from framework dir looking for .gitmodules
2. Walk up from CWD looking for .gitmodules with submodules/core/
3. Default: $CODITECT_PROJECTS/coditect-rollout-master

Returns:
    Path: The rollout-master directory
"""
# Try walking up from framework dir
framework = get_framework_dir()
# If the framework is at submodules/core/coditect-core within rollout-master
# Then rollout-master is 3 levels up
candidate = framework.parent.parent.parent
if (candidate / ".gitmodules").exists():
    return candidate

# Try from CWD
cwd = Path.cwd().resolve()
check = cwd
while check != check.parent:
    if (check / ".gitmodules").exists() and (check / "submodules" / "core").exists():
        return check
    check = check.parent

# Default
return discover_projects_dir() / "coditect-rollout-master"

def get_docs_dir() -> Path: """ Get the coditect-documentation directory (ADR-213).

The documentary content SSOT. Contains ADRs, analysis, research,
project plans, guides, standards, and all submodule documentation.

Resolution:
1. coditect-documentation submodule in rollout-master
2. Fallback to coditect-core internal/ (legacy location)

Returns:
    Path: The documentation directory
"""
docs_submodule = get_rollout_master_dir() / "submodules" / "docs" / "coditect-documentation"
if docs_submodule.exists():
    return docs_submodule
# Fallback to legacy location in coditect-core
return get_framework_dir()

def get_docs_core_dir() -> Path: """ Get the coditect-core documentation directory within coditect-documentation.

This is where coditect-core's documentary content (ADRs, analysis, etc.)
lives after externalization (ADR-213).

Returns:
    Path: The coditect-core docs directory
"""
docs = get_docs_dir()
core_docs = docs / "coditect-core"
if core_docs.exists():
    return core_docs
# Fallback: we're pointing at the framework itself
return docs

def get_tracks_dir() -> Path: """ Get the TRACK files directory (ADR-213).

TRACK files are the SSOT for task management. After externalization,
they live in coditect-documentation/coditect-core/project/plans/tracks/.

Returns:
    Path: The tracks directory
"""
# Try coditect-documentation first
docs_core = get_docs_core_dir()
tracks = docs_core / "project" / "plans" / "tracks"
if tracks.exists():
    return tracks
# Fallback to coditect-core internal/ (legacy location)
return get_framework_dir() / "internal" / "project" / "plans" / "tracks"

def get_adrs_dir() -> Path: """ Get the ADR directory (ADR-213).

After externalization, ADRs live in coditect-documentation/coditect-core/adrs/.

Returns:
    Path: The ADRs directory
"""
docs_core = get_docs_core_dir()
adrs = docs_core / "adrs"
if adrs.exists():
    return adrs
# Fallback to coditect-core internal/ (legacy location)
return get_framework_dir() / "internal" / "architecture" / "adrs"

def get_analysis_dir() -> Path: """ Get the analysis documents directory (ADR-213).

After externalization, analysis documents live in
coditect-documentation/coditect-core/analysis/.

Returns:
    Path: The analysis directory
"""
docs_core = get_docs_core_dir()
analysis = docs_core / "analysis"
if analysis.exists():
    return analysis
# Fallback to coditect-core internal/ (legacy location)
return get_framework_dir() / "internal" / "analysis"

def get_diagrams_dir() -> Path: """ Get the diagrams directory (ADR-213 centralization).

C4 architecture diagrams and mermaid sources, centralized in
coditect-documentation/coditect-core/diagrams/.

Returns:
    Path: The diagrams directory
"""
docs_core = get_docs_core_dir()
diagrams = docs_core / "diagrams"
if diagrams.exists():
    return diagrams
# Fallback to rollout-master root (legacy location)
return get_rollout_master_dir() / "diagrams"

def get_machine_id_path() -> Path: """ Get the machine-id.json file path.

Returns:
    Path: The machine ID file path
"""
return get_user_data_dir() / "machine-id.json"

def get_machine_uuid() -> Optional[str]: """ Get the machine UUID from machine-id.json.

Returns:
    Optional[str]: The machine UUID or None if not found
"""
machine_id_path = get_machine_id_path()
if machine_id_path.exists():
    try:
        with open(machine_id_path) as f:
            data = json.load(f)
            return data.get('machine_uuid')
    except (json.JSONDecodeError, IOError):
        pass
return None

-----------------------------------------------------------------------------

Project Discovery Helpers (Activity Recency + LLM Lock-Out)

-----------------------------------------------------------------------------

def _get_process_cwd(pid: int) -> Optional[Path]: """ Get working directory for a process using lsof (macOS).

Args:
    pid: Process ID

Returns:
    Path to process CWD, or None if detection fails
"""
try:
    result = subprocess.run(
        ['lsof', '-a', '-d', 'cwd', '-p', str(pid)],
        capture_output=True,
        text=True,
        timeout=3
    )
    for line in result.stdout.split('\n')[1:]:  # Skip header
        parts = line.split()
        if len(parts) >= 9 and parts[3] == 'cwd':
            return Path(parts[8]).resolve()
    return None
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
    return None

def _get_active_llm_cwds() -> Dict[str, List[Path]]: """ Get current working directories for all active LLM processes.

Detects Claude, Codex, Gemini, and Kimi CLI processes and resolves
their working directories. Excludes the current process to prevent
self-locking.

Returns:
    Dict mapping LLM name to list of CWD paths.
    Empty dict on any failure (graceful degradation).
"""
try:
    from scripts.core.cli_tool_detector import CLIToolDetector
except ImportError:
    return {}

# Build set of ancestor PIDs to exclude (current process tree)
exclude_pids = set()
try:
    pid = os.getpid()
    exclude_pids.add(pid)
    # Walk up the process tree to find the calling LLM process
    for _ in range(10):  # Max 10 levels
        result = subprocess.run(
            ['ps', '-o', 'ppid=', '-p', str(pid)],
            capture_output=True, text=True, timeout=2
        )
        ppid_str = result.stdout.strip()
        if not ppid_str:
            break
        ppid = int(ppid_str)
        if ppid <= 1:
            break
        exclude_pids.add(ppid)
        pid = ppid
except (subprocess.TimeoutExpired, ValueError, OSError):
    pass

detector = CLIToolDetector()
llm_cwds: Dict[str, List[Path]] = {}

for llm_name, tool_config in detector.CLI_TOOLS.items():
    binary = tool_config["binary"]

    try:
        result = subprocess.run(
            ['pgrep', '-x', binary],
            capture_output=True,
            text=True,
            timeout=3
        )
        if result.returncode != 0:
            continue

        seen_cwds: set = set()
        cwds: List[Path] = []
        for line in result.stdout.strip().split('\n'):
            if not line.strip():
                continue
            try:
                pid = int(line.strip())
            except ValueError:
                continue

            if pid in exclude_pids:
                continue

            cwd = _get_process_cwd(pid)
            if cwd and str(cwd) not in seen_cwds:
                seen_cwds.add(str(cwd))
                cwds.append(cwd)

        if cwds:
            llm_cwds[llm_name] = cwds

    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
        continue

return llm_cwds

def _get_project_activity_scores(project_ids: List[str]) -> Dict[str, datetime]: """ Get most recent activity timestamp for each project from sessions.db.

Queries the activity_feed table for MAX(occurred_at) per project.

Args:
    project_ids: List of project IDs to score

Returns:
    Dict mapping project_id to most recent activity datetime.
    Empty dict if sessions.db missing or query fails.
"""
import sqlite3

if not project_ids:
    return {}

sessions_db = get_sessions_db_path()
if not sessions_db.exists():
    return {}

try:
    conn = sqlite3.connect(str(sessions_db))
    cursor = conn.cursor()

    # Check if activity_feed table exists
    cursor.execute(
        "SELECT name FROM sqlite_master WHERE type='table' AND name='activity_feed'"
    )
    if not cursor.fetchone():
        conn.close()
        return {}

    placeholders = ','.join('?' * len(project_ids))
    cursor.execute(
        f"SELECT project_id, MAX(occurred_at) as last_activity "
        f"FROM activity_feed WHERE project_id IN ({placeholders}) "
        f"GROUP BY project_id",
        project_ids
    )
    rows = cursor.fetchall()
    conn.close()

    scores: Dict[str, datetime] = {}
    for project_id, timestamp_str in rows:
        if not timestamp_str:
            continue
        try:
            ts = timestamp_str.replace('Z', '+00:00')
            scores[project_id] = datetime.fromisoformat(ts)
        except (ValueError, AttributeError):
            pass

    return scores

except sqlite3.Error:
    return {}

def _filter_locked_projects( projects: List[Tuple[str, str]], active_llm_cwds: Dict[str, List[Path]] ) -> List[Tuple[str, str]]: """ Remove projects that are locked by an active LLM session.

A project is "locked" if any active LLM process has a CWD that
falls within the project's plan_location directory tree.

Args:
    projects: List of (project_id, plan_location) tuples
    active_llm_cwds: Active LLM working directories

Returns:
    Filtered project list. Returns original list if all would be
    filtered out (safety: never returns empty).
"""
if not active_llm_cwds:
    return projects

all_llm_cwds = set()
for cwds in active_llm_cwds.values():
    all_llm_cwds.update(cwds)

filtered = []
for project_id, plan_loc in projects:
    plan_path = Path(plan_loc).expanduser()
    if not plan_path.is_absolute():
        plan_path = discover_projects_dir() / plan_loc
    plan_path = plan_path.resolve()
    plan_str = str(plan_path)

    is_locked = any(
        str(llm_cwd).startswith(plan_str) for llm_cwd in all_llm_cwds
    )

    if not is_locked:
        filtered.append((project_id, plan_loc))

# Safety: never return empty list
return filtered if filtered else projects

def _get_bus_locked_project_ids() -> set: """ Get project IDs with active LLM sessions via messaging.db (H.13.6.3).

Uses the session registry in messaging.db as a faster, more accurate
alternative to pgrep-based process detection. Falls back gracefully
if messaging.db is not available.

Returns:
    Set of project_ids that have active sessions (excluding current PID).
"""
try:
    from scripts.core.session_message_bus import get_session_message_bus
except ImportError:
    return set()

try:
    bus = get_session_message_bus()
    sessions = bus.list_sessions(active_only=True)
    current_pid = os.getpid()
    locked = set()
    for s in sessions:
        if s.pid and s.pid != current_pid and s.project_id:
            locked.add(s.project_id)
    return locked
except Exception:
    return set()

def _rank_by_activity( projects: List[Tuple[str, str]], activity_scores: Dict[str, datetime] ) -> List[Tuple[str, str]]: """ Rank projects by most recent activity (newest first).

Projects without activity scores are placed at the end.

Args:
    projects: List of (project_id, plan_location) tuples
    activity_scores: Dict mapping project_id to last activity datetime

Returns:
    Projects sorted by activity recency (most recent first).
"""
if not activity_scores:
    return projects

with_activity = []
without_activity = []

for project in projects:
    project_id = project[0]
    if project_id in activity_scores:
        with_activity.append((project, activity_scores[project_id]))
    else:
        without_activity.append(project)

with_activity.sort(key=lambda x: x[1], reverse=True)
return [p[0] for p in with_activity] + without_activity

def discover_project(cwd: Optional[str] = None) -> Optional[str]: """ Discover the active project from the current working directory.

ADR-144: Multi-Project Registry Architecture

Resolution priority:
1. $CODITECT_PROJECT environment variable (explicit override)
2. Query project registry for matching plan_location
   - Single match: return immediately
   - Multiple matches: filter by LLM lock-out, rank by activity recency
   - Fallback: longest prefix match
3. Check for PROJECT-PLAN.md or CLAUDE.md with project metadata

Multi-match disambiguation (when multiple projects match CWD):
1. Filter out projects where another LLM session is actively working
2. Rank remaining by most recent activity in sessions.db
3. Fall back to longest prefix match if no activity data

Args:
    cwd: Optional working directory to check (defaults to os.getcwd())

Returns:
    Optional[str]: The project_id if found, None otherwise
"""
import sqlite3

# 1. Environment variable (explicit override)
if env_project := os.environ.get('CODITECT_PROJECT'):
    return env_project

if cwd is None:
    cwd = os.getcwd()

cwd_path = Path(cwd).resolve()

# 2. Query project registry (org.db)
org_db = get_org_db_path()
if org_db.exists():
    try:
        conn = sqlite3.connect(str(org_db))
        cursor = conn.cursor()

        # Check if projects table exists
        cursor.execute(
            "SELECT name FROM sqlite_master WHERE type='table' AND name='projects'"
        )
        if cursor.fetchone():
            # Get all active projects with their plan locations
            cursor.execute(
                "SELECT project_id, plan_location FROM projects WHERE status = 'active'"
            )
            projects = cursor.fetchall()
            conn.close()

            # Collect all matching projects (prefix match)
            matches: List[Tuple[str, str, int]] = []
            cwd_str = str(cwd_path)

            for project_id, plan_loc in projects:
                if not plan_loc:
                    continue

                # Resolve plan_location to absolute path
                plan_path = Path(plan_loc).expanduser()
                if not plan_path.is_absolute():
                    plan_path = discover_projects_dir() / plan_loc

                plan_path = plan_path.resolve()
                plan_str = str(plan_path)

                if cwd_str.startswith(plan_str):
                    matches.append((project_id, plan_loc, len(plan_str)))

            # Fast path: single match
            if len(matches) == 1:
                return matches[0][0]

            # Multiple matches: activity recency + LLM lock-out
            if len(matches) > 1:
                candidates = [(m[0], m[1]) for m in matches]

                # Step 1a: Filter via messaging.db session registry (H.13.6.3)
                bus_locked = _get_bus_locked_project_ids()
                if bus_locked:
                    filtered = [c for c in candidates if c[0] not in bus_locked]
                    if filtered:
                        candidates = filtered

                # Step 1b: Filter via pgrep process detection (fallback)
                if len(candidates) > 1:
                    active_llm_cwds = _get_active_llm_cwds()
                    if active_llm_cwds:
                        candidates = _filter_locked_projects(
                            candidates, active_llm_cwds
                        )

                # Step 2: Rank by activity recency
                project_ids = [c[0] for c in candidates]
                activity_scores = _get_project_activity_scores(project_ids)
                if activity_scores:
                    candidates = _rank_by_activity(
                        candidates, activity_scores
                    )
                    return candidates[0][0]

                # Fallback: longest prefix match
                matches.sort(key=lambda m: m[2], reverse=True)
                return matches[0][0]

            # Single or no match from prefix
            if matches:
                return matches[0][0]

    except sqlite3.Error:
        pass

# 3. Fallback: Check for project markers in directory hierarchy
check_path = cwd_path
while check_path != check_path.parent:
    # Check for CLAUDE.md with project metadata
    claude_md = check_path / 'CLAUDE.md'
    if claude_md.exists():
        try:
            with open(claude_md) as f:
                content = f.read(2000)  # Read first 2000 chars for frontmatter
                # Look for project_id or codename in frontmatter
                import re
                match = re.search(r'^project_id:\s*["\']?([^"\'\n]+)', content, re.MULTILINE)
                if match:
                    return match.group(1).strip()
                match = re.search(r'^codename:\s*["\']?([^"\'\n]+)', content, re.MULTILINE)
                if match:
                    return match.group(1).strip()
        except IOError:
            pass

    check_path = check_path.parent

return None

def get_current_project() -> Optional[str]: """ Get the current active project ID (ADR-156).

This is the primary function for determining project context.
Use this for all project-scoped operations.

Resolution priority (same as discover_project):
1. $CODITECT_PROJECT environment variable (explicit override)
2. Query project registry for matching plan_location
3. Check for PROJECT-PLAN.md or CLAUDE.md with project metadata

Returns:
    Optional[str]: The project_id if found, None otherwise

Example:
    project = get_current_project()
    if project:
        print(f"Working on: {project}")
"""
return discover_project()

def get_project_scope(project_id: Optional[str] = None) -> str: """ Determine data scope based on project (ADR-156).

Scope determines data isolation level:
- 'global': Visible to all projects (platform decisions)
- 'project': Visible to same project only
- 'customer': Visible to same tenant only (CUST-* projects)

Args:
    project_id: Optional project ID (auto-detected if not provided)

Returns:
    str: The data scope ('global', 'project', or 'customer')
"""
import sqlite3

if project_id is None:
    project_id = get_current_project()

if not project_id:
    return 'global'  # No project = global scope

# Customer projects (CUST-*) get customer scope for isolation
if project_id.startswith('CUST-'):
    return 'customer'

# Check project registry for explicit scope
org_db = get_org_db_path()
if org_db.exists():
    try:
        conn = sqlite3.connect(str(org_db))
        cursor = conn.cursor()
        cursor.execute(
            "SELECT scope FROM projects WHERE project_id = ?",
            (project_id,)
        )
        result = cursor.fetchone()
        conn.close()

        if result and result[0]:
            scope = result[0]
            if scope == 'platform':
                return 'global'
            elif scope == 'customer':
                return 'customer'
            else:
                return 'project'
    except sqlite3.Error:
        pass

# Default: project scope
return 'project'

def set_project_env(project_id: str) -> None: """ Set the CODITECT_PROJECT environment variable.

This affects all subsequent calls to discover_project() and
get_current_project() in the current process.

Args:
    project_id: The project ID to set

Example:
    set_project_env('CUST-avivatec-fpa')
    # Now all operations will use this project
"""
os.environ['CODITECT_PROJECT'] = project_id

def clear_project_env() -> None: """ Clear the CODITECT_PROJECT environment variable.

After calling this, project will be auto-detected from cwd.
"""
if 'CODITECT_PROJECT' in os.environ:
    del os.environ['CODITECT_PROJECT']

def get_project_session_log_dir(project_id: Optional[str] = None) -> Path: """ Get the session log directory for a project (ADR-155).

Args:
    project_id: Optional project ID (auto-detected if not provided)

Returns:
    Path: The session log directory (project-scoped or machine-scoped)
"""
machine_uuid = get_machine_uuid()
if not machine_uuid:
    machine_uuid = "unknown-machine"

if project_id is None:
    project_id = discover_project()

if project_id:
    # Project-scoped path
    return get_session_logs_dir() / "projects" / project_id / machine_uuid
else:
    # Legacy machine-scoped path
    return get_session_logs_dir() / machine_uuid

def get_backups_dir() -> Path: """ Get the local backups staging directory.

Returns:
    Path: The backups directory
"""
return get_user_data_dir() / "backups"

-----------------------------------------------------------------------------

ADR-120: Customer Extensions Directory

-----------------------------------------------------------------------------

def get_extensions_dir() -> Path: """ Get the customer extensions directory (ADR-120).

Customer extensions are stored in $USER_DATA_DIR/extensions/
This includes:
- agents/    Custom agent definitions
- skills/    Custom skills
- hooks/     Custom hooks
- commands/  Custom commands
- scripts/   Custom scripts
- tools/     Custom tools
- config/    Extension configuration

Note: Customer extensions are loaded AFTER framework components.
Customer components are prefixed with 'customer/' in the component index.

Returns:
    Path: The extensions directory
"""
return get_user_data_dir() / "extensions"

-----------------------------------------------------------------------------

Backward Compatibility

-----------------------------------------------------------------------------

def get_legacy_paths() -> dict: """ Get legacy paths that may still exist in old installations.

Used by migration scripts to detect and migrate old data.

Returns:
    dict: Mapping of legacy path names to Path objects
"""
framework = get_framework_dir()
return {
    "framework_machine_id": framework / "machine-id.json",
    "framework_session_logs": framework / "session-logs",
    "framework_context_storage": framework / "context-storage",
    "coditect_base_data": framework.parent / "data",  # ~/Library/.../CODITECT/data/
}

def check_migration_needed() -> bool: """ Check if user data migration is needed.

Returns True if user data exists in the old location (framework/)
but not in the new location (PROJECTS/.coditect-data/).

Returns:
    bool: True if migration is needed
"""
legacy = get_legacy_paths()
user_data = get_user_data_dir()

# Check if any user data exists in old location
old_exists = any([
    legacy["framework_machine_id"].exists() and not legacy["framework_machine_id"].is_symlink(),
    legacy["framework_session_logs"].exists() and not legacy["framework_session_logs"].is_symlink(),
    legacy["framework_context_storage"].exists() and not legacy["framework_context_storage"].is_symlink(),
])

# Check if user data directory exists
new_exists = user_data.exists()

return old_exists and not new_exists

-----------------------------------------------------------------------------

Module-Level Constants (Computed Once)

-----------------------------------------------------------------------------

These are computed at import time for convenience

FRAMEWORK_LOC = get_framework_dir() PROJECTS_DIR = discover_projects_dir() USER_DATA_LOC = get_user_data_dir() CONTEXT_STORAGE = get_context_storage_dir() SESSION_LOGS = get_session_logs_dir() MACHINE_ID_FILE = get_machine_id_path() BACKUPS_DIR = get_backups_dir()

ADR-120: Customer Extensions

EXTENSIONS_DIR = get_extensions_dir()

ADR-213: Documentation Externalization

DOCS_DIR = get_docs_dir() DOCS_CORE_DIR = get_docs_core_dir() TRACKS_DIR = get_tracks_dir() ADRS_DIR = get_adrs_dir() ANALYSIS_DIR = get_analysis_dir()

ADR-118: Four-Tier Database Paths

ORG_DB = get_org_db_path() SESSIONS_DB = get_sessions_db_path() PROJECTS_DB = get_projects_db_path() MESSAGING_DB = get_messaging_db_path() CONTEXT_DB = get_context_db_path() # Legacy, deprecated

-----------------------------------------------------------------------------

CLI for Testing

-----------------------------------------------------------------------------

if name == "main": import argparse

parser = argparse.ArgumentParser(description="CODITECT Path Discovery (ADR-114)")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--check-migration", action="store_true", help="Check if migration needed")
args = parser.parse_args()

paths = {
    "framework": str(FRAMEWORK_LOC),
    "projects_dir": str(PROJECTS_DIR),
    "user_data": str(USER_DATA_LOC),
    "context_storage": str(CONTEXT_STORAGE),
    "session_logs": str(SESSION_LOGS),
    "machine_id": str(MACHINE_ID_FILE),
    "backups": str(BACKUPS_DIR),
    "docs": str(DOCS_DIR),
    "docs_core": str(DOCS_CORE_DIR),
    "tracks": str(TRACKS_DIR),
    "adrs": str(ADRS_DIR),
    "analysis": str(ANALYSIS_DIR),
}

if args.check_migration:
    if check_migration_needed():
        print("Migration needed: User data exists in old location")
        sys.exit(1)
    else:
        print("No migration needed")
        sys.exit(0)

if args.json:
    print(json.dumps(paths, indent=2))
else:
    print("CODITECT Path Discovery (ADR-114)")
    print("=" * 50)
    print(f"Framework:       {FRAMEWORK_LOC}")
    print(f"PROJECTS Dir:    {PROJECTS_DIR}")
    print(f"User Data:       {USER_DATA_LOC}")
    print(f"Context Storage: {CONTEXT_STORAGE}")
    print(f"Session Logs:    {SESSION_LOGS}")
    print(f"Machine ID:      {MACHINE_ID_FILE}")
    print(f"Backups:         {BACKUPS_DIR}")
    print(f"Docs:            {DOCS_DIR}")
    print(f"Docs (core):     {DOCS_CORE_DIR}")
    print(f"Tracks:          {TRACKS_DIR}")
    print(f"ADRs:            {ADRS_DIR}")
    print(f"Analysis:        {ANALYSIS_DIR}")
    print()
    print(f"Framework exists:  {FRAMEWORK_LOC.exists()}")
    print(f"User data exists:  {USER_DATA_LOC.exists()}")
    print(f"Migration needed:  {check_migration_needed()}")