Skip to main content

scripts-extractor-factory

#!/usr/bin/env python3 """

title: "Extractor Factory" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Factory for creating LLM session extractors" keywords: ['factory', 'extractor', 'claude', 'codex', 'gemini', 'registry'] tokens: ~200 created: 2026-01-28 updated: 2026-01-28 script_name: "extractor_factory.py" language: python executable: true usage: "from scripts.core.extractor_factory import ExtractorFactory" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

Extractor Factory for CODITECT /sx command.

Creates appropriate SessionExtractor instances based on LLM type or auto-detection.

Track: J.13 (Memory - Generic Session Export) Task: J.13.2.4 """

from future import annotations

import sys from pathlib import Path from typing import Dict, List, Optional, Type

Add parent paths for imports

_script_dir = Path(file).resolve().parent _scripts_dir = _script_dir.parent _coditect_root = _scripts_dir.parent if str(_coditect_root) not in sys.path: sys.path.insert(0, str(_coditect_root)) if str(_scripts_dir) not in sys.path: sys.path.insert(0, str(_scripts_dir))

from core.session_extractor import SessionExtractor

class ExtractorFactory: """ Factory for creating LLM session extractors.

Supports:
- Explicit extractor creation by LLM name
- Auto-detection based on source file format
- Custom extractor registration
"""

# Registry of available extractors
_extractors: Dict[str, Type[SessionExtractor]] = {}
_initialized: bool = False

@classmethod
def _ensure_initialized(cls) -> None:
"""Lazy initialization of extractors."""
if cls._initialized:
return

# Import extractors
try:
from extractors.claude_extractor import ClaudeExtractor
cls._extractors["claude"] = ClaudeExtractor
except ImportError:
pass

try:
from extractors.codex_extractor import CodexExtractor
cls._extractors["codex"] = CodexExtractor
except ImportError:
pass

try:
from extractors.gemini_extractor import GeminiExtractor
cls._extractors["gemini"] = GeminiExtractor
except ImportError:
pass

try:
from extractors.kimi_extractor import KimiExtractor
cls._extractors["kimi"] = KimiExtractor
except ImportError:
pass

cls._initialized = True

@classmethod
def get_extractor(cls, llm: str) -> SessionExtractor:
"""
Get extractor instance for specified LLM.

Args:
llm: LLM identifier (claude, codex, gemini)

Returns:
SessionExtractor instance

Raises:
ValueError: If LLM is not supported
"""
cls._ensure_initialized()

llm_lower = llm.lower()
if llm_lower not in cls._extractors:
raise ValueError(f"Unknown LLM: {llm}. Supported: {list(cls._extractors.keys())}")

return cls._extractors[llm_lower]()

@classmethod
def detect_extractor(cls, source: Path) -> Optional[SessionExtractor]:
"""
Auto-detect appropriate extractor for source.

Args:
source: Path to session file

Returns:
SessionExtractor instance if detected, None otherwise
"""
cls._ensure_initialized()

for extractor_cls in cls._extractors.values():
extractor = extractor_cls()
if extractor.can_extract(source):
return extractor

return None

@classmethod
def get_or_detect(cls, source: Path, llm: Optional[str] = None) -> SessionExtractor:
"""
Get extractor by LLM name or auto-detect from source.

Args:
source: Path to session file
llm: Optional LLM identifier

Returns:
SessionExtractor instance

Raises:
ValueError: If no extractor found
"""
if llm:
return cls.get_extractor(llm)

extractor = cls.detect_extractor(source)
if extractor is None:
raise ValueError(f"Could not detect LLM for source: {source}")

return extractor

@classmethod
def register(cls, llm: str, extractor_cls: Type[SessionExtractor]) -> None:
"""
Register a custom extractor.

Args:
llm: LLM identifier
extractor_cls: SessionExtractor subclass
"""
cls._ensure_initialized()
cls._extractors[llm.lower()] = extractor_cls

@classmethod
def list_supported(cls) -> List[str]:
"""List supported LLM identifiers."""
cls._ensure_initialized()
return list(cls._extractors.keys())

@classmethod
def is_supported(cls, llm: str) -> bool:
"""Check if LLM is supported."""
cls._ensure_initialized()
return llm.lower() in cls._extractors

Convenience functions

def get_extractor(llm: str) -> SessionExtractor: """Get extractor for specified LLM.""" return ExtractorFactory.get_extractor(llm)

def detect_extractor(source: Path) -> Optional[SessionExtractor]: """Auto-detect extractor for source.""" return ExtractorFactory.detect_extractor(source)

def get_or_detect(source: Path, llm: Optional[str] = None) -> SessionExtractor: """Get or detect extractor.""" return ExtractorFactory.get_or_detect(source, llm)

def list_supported_llms() -> List[str]: """List supported LLMs.""" return ExtractorFactory.list_supported()

if name == "main": import argparse

parser = argparse.ArgumentParser(description="Extractor Factory")
parser.add_argument("--list", action="store_true", help="List supported LLMs")
parser.add_argument("--detect", help="Detect LLM for file")
args = parser.parse_args()

if args.list:
print("Supported LLMs:")
for llm in ExtractorFactory.list_supported():
print(f" - {llm}")

elif args.detect:
source = Path(args.detect)
extractor = ExtractorFactory.detect_extractor(source)
if extractor:
print(f"Detected: {extractor.llm_name}")
else:
print("Could not detect LLM type")

else:
print("Use --list to see supported LLMs")
print("Use --detect <file> to detect LLM type")