scripts-cli-tool-detector

#!/usr/bin/env python3 """

title: "CLI Tool Detector" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Auto-detect installed LLM CLI tools and session locations" keywords: ['cli', 'detector', 'claude', 'codex', 'gemini', 'llm'] tokens: ~400 created: 2026-01-28 updated: 2026-01-28 script_name: "cli_tool_detector.py" language: python executable: true usage: "from scripts.core.cli_tool_detector import CLIToolDetector" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

CLI Tool Detector for CODITECT /sx command.

Auto-discovers installed LLM CLI tools (Claude, Codex, Gemini) and their session locations. Modeled after ProviderDetector pattern (ADR-073).

Features:

Auto-detect installed CLI tools via binary check
Resolve session paths with environment variable overrides
Cache detection results for performance
Graceful handling when tools are uninstalled
Multi-tenant configuration awareness

Track: J.13 (Memory - Generic Session Export) Task: J.13.1.1 """

from future import annotations

import os import shutil import subprocess from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Any from datetime import datetime import json

@dataclass class ToolStatus: """Status of an LLM CLI tool installation.""" name: str installed: bool version: Optional[str] = None binary_path: Optional[Path] = None session_paths: List[Path] = field(default_factory=list) config_path: Optional[Path] = None last_checked: Optional[datetime] = None error: Optional[str] = None

def to_dict(self) -> Dict[str, Any]:
    """Convert to dictionary for serialization."""
    return {
        "name": self.name,
        "installed": self.installed,
        "version": self.version,
        "binary_path": str(self.binary_path) if self.binary_path else None,
        "session_paths": [str(p) for p in self.session_paths],
        "config_path": str(self.config_path) if self.config_path else None,
        "last_checked": self.last_checked.isoformat() if self.last_checked else None,
        "error": self.error
    }

class CLIToolDetector: """ Detects installed LLM CLI tools and session locations. Adapts to tenant/team/project/user configuration.

Modeled after ProviderDetector (ADR-073) for consistent detection patterns.
"""

# CLI tool definitions with session paths and metadata
CLI_TOOLS: Dict[str, Dict[str, Any]] = {
    "claude": {
        "binary": "claude",
        "session_paths": [
            "~/.claude/projects/",           # Project sessions (UUID directories)
        ],
        "export_paths": [
            "~/Claude-Exports/",             # Default export location
            "~/Downloads/",                  # Alternative export location
        ],
        "config_path": "~/.claude/settings.json",
        "version_cmd": ["claude", "--version"],
        "session_pattern": "**/*.jsonl",    # Claude stores in subdirs (project hash)
        "export_pattern": "claude-export-*.txt",
        "env_overrides": {
            "session": "CLAUDE_SESSION_DIR",
            "export": "CLAUDE_EXPORT_DIR",
        }
    },
    "codex": {
        "binary": "codex",
        "session_paths": [
            "~/.codex/sessions/",            # Rich sessions (date hierarchy)
        ],
        "export_paths": [
            "~/.codex/history.jsonl",        # Flat history file
        ],
        "config_path": "~/.codex/config.json",
        "version_cmd": ["codex", "--version"],
        "session_pattern": "**/*.jsonl",
        "env_overrides": {
            "session": "CODEX_SESSION_DIR",
            "history": "CODEX_HISTORY_PATH",
        }
    },
    "gemini": {
        "binary": "gemini",
        "session_paths": [
            "~/.gemini/tmp/",                # Session dirs (workdir hash subdirs)
        ],
        "export_paths": [
            "~/.gemini/tmp/",                # Logs also under tmp/<hash>/
        ],
        "config_path": "~/.gemini/settings.json",
        "version_cmd": ["gemini", "--version"],
        "session_pattern": "*/chats/session-*.json",  # JSON, not JSONL
        "export_pattern": "*/logs.json",
        "env_overrides": {
            "session": "GEMINI_CLI_SESSIONS_PATH",
            "history": "GEMINI_CLI_HISTORY_PATH",
        }
    },
    "kimi": {
        "binary": "kimi",
        "session_paths": [
            "~/.kimi/sessions/",             # Session directories
        ],
        "export_paths": [
            "~/.kimi/user-history/",         # User input history
        ],
        "config_path": "~/.kimi/kimi.json",
        "version_cmd": ["kimi", "--version"],
        "session_pattern": "**/*.jsonl",    # Nested in UUID dirs
        "env_overrides": {
            "session": "KIMI_SESSION_DIR",
        }
    },
}

def __init__(self, cache_ttl_seconds: int = 300):
    """
    Initialize CLIToolDetector.

    Args:
        cache_ttl_seconds: How long to cache detection results (default 5 minutes)
    """
    self._cache: Dict[str, ToolStatus] = {}
    self._cache_ttl = cache_ttl_seconds
    self._last_full_scan: Optional[datetime] = None

def detect_installed_tools(self, force_refresh: bool = False) -> Dict[str, ToolStatus]:
    """
    Auto-detect which CLI tools are installed and active.

    Args:
        force_refresh: Force re-detection even if cache is valid

    Returns:
        Dict mapping tool names to their ToolStatus
    """
    now = datetime.now()

    # Check cache validity
    if not force_refresh and self._last_full_scan:
        cache_age = (now - self._last_full_scan).total_seconds()
        if cache_age < self._cache_ttl and self._cache:
            return self._cache

    results: Dict[str, ToolStatus] = {}

    for tool_name, tool_config in self.CLI_TOOLS.items():
        status = self._detect_single_tool(tool_name, tool_config)
        results[tool_name] = status
        self._cache[tool_name] = status

    self._last_full_scan = now
    return results

def _detect_single_tool(self, tool_name: str, tool_config: Dict[str, Any]) -> ToolStatus:
    """Detect a single CLI tool's installation status."""
    status = ToolStatus(
        name=tool_name,
        installed=False,
        last_checked=datetime.now()
    )

    # Check if binary exists
    binary_name = tool_config["binary"]
    binary_path = shutil.which(binary_name)

    if binary_path:
        status.installed = True
        status.binary_path = Path(binary_path)

        # Try to get version
        try:
            result = subprocess.run(
                tool_config["version_cmd"],
                capture_output=True,
                text=True,
                timeout=5
            )
            if result.returncode == 0:
                # Extract version from output (first line usually)
                version_line = result.stdout.strip().split('\n')[0]
                status.version = version_line
        except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
            status.error = f"Version check failed: {e}"

    # Resolve session paths (respecting env overrides)
    status.session_paths = self._resolve_paths(
        tool_config.get("session_paths", []),
        tool_config.get("env_overrides", {}).get("session")
    )

    # Resolve config path
    config_path = tool_config.get("config_path")
    if config_path:
        resolved = Path(os.path.expanduser(config_path))
        if resolved.exists():
            status.config_path = resolved

    return status

def _resolve_paths(
    self,
    paths: List[str],
    env_override: Optional[str] = None
) -> List[Path]:
    """
    Resolve path list with environment variable override.

    Args:
        paths: List of path strings (may contain ~)
        env_override: Environment variable name for override

    Returns:
        List of resolved Path objects that exist
    """
    resolved: List[Path] = []

    # Check env override first
    if env_override:
        env_value = os.environ.get(env_override)
        if env_value:
            env_path = Path(os.path.expanduser(env_value))
            if env_path.exists():
                resolved.append(env_path)
                return resolved  # Env override takes precedence

    # Resolve default paths
    for path_str in paths:
        path = Path(os.path.expanduser(path_str))
        if path.exists():
            resolved.append(path)

    return resolved

def get_session_locations(self, llm: str) -> List[Path]:
    """
    Get session file locations for an LLM.

    Args:
        llm: LLM name (claude, codex, gemini)

    Returns:
        List of existing session directories/files

    Raises:
        ValueError: If LLM is not recognized
    """
    if llm not in self.CLI_TOOLS:
        raise ValueError(f"Unknown LLM: {llm}. Supported: {list(self.CLI_TOOLS.keys())}")

    # Ensure we have current detection
    tools = self.detect_installed_tools()
    status = tools.get(llm)

    if status and status.session_paths:
        return status.session_paths

    # Fallback to default resolution
    tool_config = self.CLI_TOOLS[llm]
    return self._resolve_paths(
        tool_config.get("session_paths", []),
        tool_config.get("env_overrides", {}).get("session")
    )

def get_export_locations(self, llm: str) -> List[Path]:
    """
    Get export file locations for an LLM.

    Args:
        llm: LLM name (claude, codex, gemini)

    Returns:
        List of existing export directories/files
    """
    if llm not in self.CLI_TOOLS:
        raise ValueError(f"Unknown LLM: {llm}")

    tool_config = self.CLI_TOOLS[llm]
    return self._resolve_paths(
        tool_config.get("export_paths", []),
        tool_config.get("env_overrides", {}).get("export")
    )

def get_session_pattern(self, llm: str) -> str:
    """Get the glob pattern for session files."""
    if llm not in self.CLI_TOOLS:
        raise ValueError(f"Unknown LLM: {llm}")
    return self.CLI_TOOLS[llm].get("session_pattern", "*.jsonl")

def get_active_session(self, llm: str) -> Optional[Path]:
    """
    Get the currently active session for an LLM (if detectable).

    This is heuristic-based - finds most recently modified session file.

    Args:
        llm: LLM name

    Returns:
        Path to most recent session file, or None
    """
    session_locations = self.get_session_locations(llm)
    pattern = self.get_session_pattern(llm)

    most_recent: Optional[Path] = None
    most_recent_mtime: float = 0

    for location in session_locations:
        if location.is_dir():
            # Search for session files
            for session_file in location.glob(pattern):
                if session_file.is_file():
                    mtime = session_file.stat().st_mtime
                    if mtime > most_recent_mtime:
                        most_recent = session_file
                        most_recent_mtime = mtime
        elif location.is_file():
            # Direct file reference (e.g., history.jsonl)
            mtime = location.stat().st_mtime
            if mtime > most_recent_mtime:
                most_recent = location
                most_recent_mtime = mtime

    return most_recent

def find_session_by_id(self, llm: str, session_id: str) -> Optional[Path]:
    """
    Find a session file by its ID.

    Args:
        llm: LLM name
        session_id: Session UUID or identifier

    Returns:
        Path to session file if found
    """
    session_locations = self.get_session_locations(llm)

    for location in session_locations:
        if location.is_dir():
            # Claude: UUID directory structure
            if llm == "claude":
                # Check for UUID-named session file
                candidate = location / session_id
                if candidate.exists():
                    return candidate
                # Also check for .jsonl extension
                candidate = location / f"{session_id}.jsonl"
                if candidate.exists():
                    return candidate
                # Scan subdirectories (project hash dirs)
                for project_dir in location.iterdir():
                    if project_dir.is_dir():
                        session_file = project_dir / f"{session_id}.jsonl"
                        if session_file.exists():
                            return session_file
            else:
                # Generic pattern matching
                pattern = f"*{session_id}*"
                matches = list(location.glob(pattern))
                if matches:
                    return matches[0]

    return None

def list_sessions(
    self,
    llm: str,
    limit: int = 50,
    after_date: Optional[datetime] = None
) -> List[Dict[str, Any]]:
    """
    List available sessions for an LLM.

    Args:
        llm: LLM name
        limit: Maximum number of sessions to return
        after_date: Only return sessions modified after this date

    Returns:
        List of session metadata dicts
    """
    session_locations = self.get_session_locations(llm)
    pattern = self.get_session_pattern(llm)
    sessions: List[Dict[str, Any]] = []

    for location in session_locations:
        if location.is_dir():
            for session_file in location.glob(pattern):
                if session_file.is_file():
                    stat = session_file.stat()
                    mtime = datetime.fromtimestamp(stat.st_mtime)

                    if after_date and mtime < after_date:
                        continue

                    sessions.append({
                        "path": session_file,
                        "session_id": session_file.stem,
                        "modified": mtime,
                        "size": stat.st_size,
                        "llm": llm
                    })
        elif location.is_file():
            # Single file (e.g., history.jsonl)
            stat = location.stat()
            mtime = datetime.fromtimestamp(stat.st_mtime)

            if after_date and mtime < after_date:
                continue

            sessions.append({
                "path": location,
                "session_id": "history",
                "modified": mtime,
                "size": stat.st_size,
                "llm": llm
            })

    # Sort by modification time (most recent first)
    sessions.sort(key=lambda x: x["modified"], reverse=True)

    return sessions[:limit]

def get_detected_llm(self) -> Optional[str]:
    """
    Get the currently active/detected LLM based on environment.

    Checks in order: CODITECT_LLM env var, then most recently used.

    Returns:
        LLM name or None if none detected
    """
    # Check env var first
    env_llm = os.environ.get("CODITECT_LLM")
    if env_llm and env_llm in self.CLI_TOOLS:
        tools = self.detect_installed_tools()
        if tools.get(env_llm, ToolStatus(name=env_llm, installed=False)).installed:
            return env_llm

    # Find most recently used
    tools = self.detect_installed_tools()
    most_recent_llm: Optional[str] = None
    most_recent_time: float = 0

    for llm_name, status in tools.items():
        if status.installed:
            active = self.get_active_session(llm_name)
            if active:
                mtime = active.stat().st_mtime
                if mtime > most_recent_time:
                    most_recent_time = mtime
                    most_recent_llm = llm_name

    return most_recent_llm

def to_dict(self) -> Dict[str, Any]:
    """Export detector state as dictionary."""
    tools = self.detect_installed_tools()
    return {
        "detected_tools": {name: status.to_dict() for name, status in tools.items()},
        "active_llm": self.get_detected_llm(),
        "last_scan": self._last_full_scan.isoformat() if self._last_full_scan else None
    }

Singleton instance for module-level use

_detector: Optional[CLIToolDetector] = None

def get_detector() -> CLIToolDetector: """Get or create singleton CLIToolDetector instance.""" global _detector if _detector is None: _detector = CLIToolDetector() return _detector

def detect_llm() -> Optional[str]: """Convenience function to detect current LLM.""" return get_detector().get_detected_llm()

def list_installed_tools() -> List[str]: """Convenience function to list installed LLM CLI tools.""" detector = get_detector() tools = detector.detect_installed_tools() return [name for name, status in tools.items() if status.installed]

if name == "main": # CLI for testing import argparse

parser = argparse.ArgumentParser(description="CLI Tool Detector")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--llm", help="Check specific LLM")
parser.add_argument("--sessions", action="store_true", help="List sessions")
args = parser.parse_args()

detector = CLIToolDetector()

if args.llm:
    if args.sessions:
        sessions = detector.list_sessions(args.llm)
        if args.json:
            print(json.dumps([{**s, "path": str(s["path"]), "modified": s["modified"].isoformat()} for s in sessions], indent=2))
        else:
            for s in sessions:
                print(f"{s['session_id']}: {s['path']} ({s['modified']})")
    else:
        tools = detector.detect_installed_tools()
        status = tools.get(args.llm)
        if args.json:
            print(json.dumps(status.to_dict() if status else {}, indent=2))
        else:
            print(f"{args.llm}: {'installed' if status and status.installed else 'not installed'}")
else:
    if args.json:
        print(json.dumps(detector.to_dict(), indent=2))
    else:
        tools = detector.detect_installed_tools()
        print("Detected LLM CLI Tools:")
        for name, status in tools.items():
            marker = "✓" if status.installed else "✗"
            version = f" ({status.version})" if status.version else ""
            print(f"  {marker} {name}{version}")

        active = detector.get_detected_llm()
        if active:
            print(f"\nActive LLM: {active}")

#!/usr/bin/env python3 """​

Singleton instance for module-level use

#!/usr/bin/env python3 """