Skip to main content

scripts-cli-tool-detector

#!/usr/bin/env python3 """

title: "CLI Tool Detector" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Auto-detect installed LLM CLI tools and session locations" keywords: ['cli', 'detector', 'claude', 'codex', 'gemini', 'llm'] tokens: ~400 created: 2026-01-28 updated: 2026-01-28 script_name: "cli_tool_detector.py" language: python executable: true usage: "from scripts.core.cli_tool_detector import CLIToolDetector" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

CLI Tool Detector for CODITECT /sx command.

Auto-discovers installed LLM CLI tools (Claude, Codex, Gemini) and their session locations. Modeled after ProviderDetector pattern (ADR-073).

Features:

  • Auto-detect installed CLI tools via binary check
  • Resolve session paths with environment variable overrides
  • Cache detection results for performance
  • Graceful handling when tools are uninstalled
  • Multi-tenant configuration awareness

Track: J.13 (Memory - Generic Session Export) Task: J.13.1.1 """

from future import annotations

import os import shutil import subprocess from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Any from datetime import datetime import json

@dataclass class ToolStatus: """Status of an LLM CLI tool installation.""" name: str installed: bool version: Optional[str] = None binary_path: Optional[Path] = None session_paths: List[Path] = field(default_factory=list) config_path: Optional[Path] = None last_checked: Optional[datetime] = None error: Optional[str] = None

def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
"name": self.name,
"installed": self.installed,
"version": self.version,
"binary_path": str(self.binary_path) if self.binary_path else None,
"session_paths": [str(p) for p in self.session_paths],
"config_path": str(self.config_path) if self.config_path else None,
"last_checked": self.last_checked.isoformat() if self.last_checked else None,
"error": self.error
}

class CLIToolDetector: """ Detects installed LLM CLI tools and session locations. Adapts to tenant/team/project/user configuration.

Modeled after ProviderDetector (ADR-073) for consistent detection patterns.
"""

# CLI tool definitions with session paths and metadata
CLI_TOOLS: Dict[str, Dict[str, Any]] = {
"claude": {
"binary": "claude",
"session_paths": [
"~/.claude/projects/", # Project sessions (UUID directories)
],
"export_paths": [
"~/Claude-Exports/", # Default export location
"~/Downloads/", # Alternative export location
],
"config_path": "~/.claude/settings.json",
"version_cmd": ["claude", "--version"],
"session_pattern": "**/*.jsonl", # Claude stores in subdirs (project hash)
"export_pattern": "claude-export-*.txt",
"env_overrides": {
"session": "CLAUDE_SESSION_DIR",
"export": "CLAUDE_EXPORT_DIR",
}
},
"codex": {
"binary": "codex",
"session_paths": [
"~/.codex/sessions/", # Rich sessions (date hierarchy)
],
"export_paths": [
"~/.codex/history.jsonl", # Flat history file
],
"config_path": "~/.codex/config.json",
"version_cmd": ["codex", "--version"],
"session_pattern": "**/*.jsonl",
"env_overrides": {
"session": "CODEX_SESSION_DIR",
"history": "CODEX_HISTORY_PATH",
}
},
"gemini": {
"binary": "gemini",
"session_paths": [
"~/.gemini/tmp/", # Session dirs (workdir hash subdirs)
],
"export_paths": [
"~/.gemini/tmp/", # Logs also under tmp/<hash>/
],
"config_path": "~/.gemini/settings.json",
"version_cmd": ["gemini", "--version"],
"session_pattern": "*/chats/session-*.json", # JSON, not JSONL
"export_pattern": "*/logs.json",
"env_overrides": {
"session": "GEMINI_CLI_SESSIONS_PATH",
"history": "GEMINI_CLI_HISTORY_PATH",
}
},
"kimi": {
"binary": "kimi",
"session_paths": [
"~/.kimi/sessions/", # Session directories
],
"export_paths": [
"~/.kimi/user-history/", # User input history
],
"config_path": "~/.kimi/kimi.json",
"version_cmd": ["kimi", "--version"],
"session_pattern": "**/*.jsonl", # Nested in UUID dirs
"env_overrides": {
"session": "KIMI_SESSION_DIR",
}
},
}

def __init__(self, cache_ttl_seconds: int = 300):
"""
Initialize CLIToolDetector.

Args:
cache_ttl_seconds: How long to cache detection results (default 5 minutes)
"""
self._cache: Dict[str, ToolStatus] = {}
self._cache_ttl = cache_ttl_seconds
self._last_full_scan: Optional[datetime] = None

def detect_installed_tools(self, force_refresh: bool = False) -> Dict[str, ToolStatus]:
"""
Auto-detect which CLI tools are installed and active.

Args:
force_refresh: Force re-detection even if cache is valid

Returns:
Dict mapping tool names to their ToolStatus
"""
now = datetime.now()

# Check cache validity
if not force_refresh and self._last_full_scan:
cache_age = (now - self._last_full_scan).total_seconds()
if cache_age < self._cache_ttl and self._cache:
return self._cache

results: Dict[str, ToolStatus] = {}

for tool_name, tool_config in self.CLI_TOOLS.items():
status = self._detect_single_tool(tool_name, tool_config)
results[tool_name] = status
self._cache[tool_name] = status

self._last_full_scan = now
return results

def _detect_single_tool(self, tool_name: str, tool_config: Dict[str, Any]) -> ToolStatus:
"""Detect a single CLI tool's installation status."""
status = ToolStatus(
name=tool_name,
installed=False,
last_checked=datetime.now()
)

# Check if binary exists
binary_name = tool_config["binary"]
binary_path = shutil.which(binary_name)

if binary_path:
status.installed = True
status.binary_path = Path(binary_path)

# Try to get version
try:
result = subprocess.run(
tool_config["version_cmd"],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
# Extract version from output (first line usually)
version_line = result.stdout.strip().split('\n')[0]
status.version = version_line
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
status.error = f"Version check failed: {e}"

# Resolve session paths (respecting env overrides)
status.session_paths = self._resolve_paths(
tool_config.get("session_paths", []),
tool_config.get("env_overrides", {}).get("session")
)

# Resolve config path
config_path = tool_config.get("config_path")
if config_path:
resolved = Path(os.path.expanduser(config_path))
if resolved.exists():
status.config_path = resolved

return status

def _resolve_paths(
self,
paths: List[str],
env_override: Optional[str] = None
) -> List[Path]:
"""
Resolve path list with environment variable override.

Args:
paths: List of path strings (may contain ~)
env_override: Environment variable name for override

Returns:
List of resolved Path objects that exist
"""
resolved: List[Path] = []

# Check env override first
if env_override:
env_value = os.environ.get(env_override)
if env_value:
env_path = Path(os.path.expanduser(env_value))
if env_path.exists():
resolved.append(env_path)
return resolved # Env override takes precedence

# Resolve default paths
for path_str in paths:
path = Path(os.path.expanduser(path_str))
if path.exists():
resolved.append(path)

return resolved

def get_session_locations(self, llm: str) -> List[Path]:
"""
Get session file locations for an LLM.

Args:
llm: LLM name (claude, codex, gemini)

Returns:
List of existing session directories/files

Raises:
ValueError: If LLM is not recognized
"""
if llm not in self.CLI_TOOLS:
raise ValueError(f"Unknown LLM: {llm}. Supported: {list(self.CLI_TOOLS.keys())}")

# Ensure we have current detection
tools = self.detect_installed_tools()
status = tools.get(llm)

if status and status.session_paths:
return status.session_paths

# Fallback to default resolution
tool_config = self.CLI_TOOLS[llm]
return self._resolve_paths(
tool_config.get("session_paths", []),
tool_config.get("env_overrides", {}).get("session")
)

def get_export_locations(self, llm: str) -> List[Path]:
"""
Get export file locations for an LLM.

Args:
llm: LLM name (claude, codex, gemini)

Returns:
List of existing export directories/files
"""
if llm not in self.CLI_TOOLS:
raise ValueError(f"Unknown LLM: {llm}")

tool_config = self.CLI_TOOLS[llm]
return self._resolve_paths(
tool_config.get("export_paths", []),
tool_config.get("env_overrides", {}).get("export")
)

def get_session_pattern(self, llm: str) -> str:
"""Get the glob pattern for session files."""
if llm not in self.CLI_TOOLS:
raise ValueError(f"Unknown LLM: {llm}")
return self.CLI_TOOLS[llm].get("session_pattern", "*.jsonl")

def get_active_session(self, llm: str) -> Optional[Path]:
"""
Get the currently active session for an LLM (if detectable).

This is heuristic-based - finds most recently modified session file.

Args:
llm: LLM name

Returns:
Path to most recent session file, or None
"""
session_locations = self.get_session_locations(llm)
pattern = self.get_session_pattern(llm)

most_recent: Optional[Path] = None
most_recent_mtime: float = 0

for location in session_locations:
if location.is_dir():
# Search for session files
for session_file in location.glob(pattern):
if session_file.is_file():
mtime = session_file.stat().st_mtime
if mtime > most_recent_mtime:
most_recent = session_file
most_recent_mtime = mtime
elif location.is_file():
# Direct file reference (e.g., history.jsonl)
mtime = location.stat().st_mtime
if mtime > most_recent_mtime:
most_recent = location
most_recent_mtime = mtime

return most_recent

def find_session_by_id(self, llm: str, session_id: str) -> Optional[Path]:
"""
Find a session file by its ID.

Args:
llm: LLM name
session_id: Session UUID or identifier

Returns:
Path to session file if found
"""
session_locations = self.get_session_locations(llm)

for location in session_locations:
if location.is_dir():
# Claude: UUID directory structure
if llm == "claude":
# Check for UUID-named session file
candidate = location / session_id
if candidate.exists():
return candidate
# Also check for .jsonl extension
candidate = location / f"{session_id}.jsonl"
if candidate.exists():
return candidate
# Scan subdirectories (project hash dirs)
for project_dir in location.iterdir():
if project_dir.is_dir():
session_file = project_dir / f"{session_id}.jsonl"
if session_file.exists():
return session_file
else:
# Generic pattern matching
pattern = f"*{session_id}*"
matches = list(location.glob(pattern))
if matches:
return matches[0]

return None

def list_sessions(
self,
llm: str,
limit: int = 50,
after_date: Optional[datetime] = None
) -> List[Dict[str, Any]]:
"""
List available sessions for an LLM.

Args:
llm: LLM name
limit: Maximum number of sessions to return
after_date: Only return sessions modified after this date

Returns:
List of session metadata dicts
"""
session_locations = self.get_session_locations(llm)
pattern = self.get_session_pattern(llm)
sessions: List[Dict[str, Any]] = []

for location in session_locations:
if location.is_dir():
for session_file in location.glob(pattern):
if session_file.is_file():
stat = session_file.stat()
mtime = datetime.fromtimestamp(stat.st_mtime)

if after_date and mtime < after_date:
continue

sessions.append({
"path": session_file,
"session_id": session_file.stem,
"modified": mtime,
"size": stat.st_size,
"llm": llm
})
elif location.is_file():
# Single file (e.g., history.jsonl)
stat = location.stat()
mtime = datetime.fromtimestamp(stat.st_mtime)

if after_date and mtime < after_date:
continue

sessions.append({
"path": location,
"session_id": "history",
"modified": mtime,
"size": stat.st_size,
"llm": llm
})

# Sort by modification time (most recent first)
sessions.sort(key=lambda x: x["modified"], reverse=True)

return sessions[:limit]

def get_detected_llm(self) -> Optional[str]:
"""
Get the currently active/detected LLM based on environment.

Checks in order: CODITECT_LLM env var, then most recently used.

Returns:
LLM name or None if none detected
"""
# Check env var first
env_llm = os.environ.get("CODITECT_LLM")
if env_llm and env_llm in self.CLI_TOOLS:
tools = self.detect_installed_tools()
if tools.get(env_llm, ToolStatus(name=env_llm, installed=False)).installed:
return env_llm

# Find most recently used
tools = self.detect_installed_tools()
most_recent_llm: Optional[str] = None
most_recent_time: float = 0

for llm_name, status in tools.items():
if status.installed:
active = self.get_active_session(llm_name)
if active:
mtime = active.stat().st_mtime
if mtime > most_recent_time:
most_recent_time = mtime
most_recent_llm = llm_name

return most_recent_llm

def to_dict(self) -> Dict[str, Any]:
"""Export detector state as dictionary."""
tools = self.detect_installed_tools()
return {
"detected_tools": {name: status.to_dict() for name, status in tools.items()},
"active_llm": self.get_detected_llm(),
"last_scan": self._last_full_scan.isoformat() if self._last_full_scan else None
}

Singleton instance for module-level use

_detector: Optional[CLIToolDetector] = None

def get_detector() -> CLIToolDetector: """Get or create singleton CLIToolDetector instance.""" global _detector if _detector is None: _detector = CLIToolDetector() return _detector

def detect_llm() -> Optional[str]: """Convenience function to detect current LLM.""" return get_detector().get_detected_llm()

def list_installed_tools() -> List[str]: """Convenience function to list installed LLM CLI tools.""" detector = get_detector() tools = detector.detect_installed_tools() return [name for name, status in tools.items() if status.installed]

if name == "main": # CLI for testing import argparse

parser = argparse.ArgumentParser(description="CLI Tool Detector")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--llm", help="Check specific LLM")
parser.add_argument("--sessions", action="store_true", help="List sessions")
args = parser.parse_args()

detector = CLIToolDetector()

if args.llm:
if args.sessions:
sessions = detector.list_sessions(args.llm)
if args.json:
print(json.dumps([{**s, "path": str(s["path"]), "modified": s["modified"].isoformat()} for s in sessions], indent=2))
else:
for s in sessions:
print(f"{s['session_id']}: {s['path']} ({s['modified']})")
else:
tools = detector.detect_installed_tools()
status = tools.get(args.llm)
if args.json:
print(json.dumps(status.to_dict() if status else {}, indent=2))
else:
print(f"{args.llm}: {'installed' if status and status.installed else 'not installed'}")
else:
if args.json:
print(json.dumps(detector.to_dict(), indent=2))
else:
tools = detector.detect_installed_tools()
print("Detected LLM CLI Tools:")
for name, status in tools.items():
marker = "✓" if status.installed else "✗"
version = f" ({status.version})" if status.version else ""
print(f" {marker} {name}{version}")

active = detector.get_detected_llm()
if active:
print(f"\nActive LLM: {active}")