scripts-extractor-factory

#!/usr/bin/env python3 """

title: "Extractor Factory" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Factory for creating LLM session extractors" keywords: ['factory', 'extractor', 'claude', 'codex', 'gemini', 'registry'] tokens: ~200 created: 2026-01-28 updated: 2026-01-28 script_name: "extractor_factory.py" language: python executable: true usage: "from scripts.core.extractor_factory import ExtractorFactory" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

Extractor Factory for CODITECT /sx command.

Creates appropriate SessionExtractor instances based on LLM type or auto-detection.

Track: J.13 (Memory - Generic Session Export) Task: J.13.2.4 """

from future import annotations

import sys from pathlib import Path from typing import Dict, List, Optional, Type

Add parent paths for imports

_script_dir = Path(file).resolve().parent _scripts_dir = _script_dir.parent _coditect_root = _scripts_dir.parent if str(_coditect_root) not in sys.path: sys.path.insert(0, str(_coditect_root)) if str(_scripts_dir) not in sys.path: sys.path.insert(0, str(_scripts_dir))

from core.session_extractor import SessionExtractor

class ExtractorFactory: """ Factory for creating LLM session extractors.

Supports:
- Explicit extractor creation by LLM name
- Auto-detection based on source file format
- Custom extractor registration
"""

# Registry of available extractors
_extractors: Dict[str, Type[SessionExtractor]] = {}
_initialized: bool = False

@classmethod
def _ensure_initialized(cls) -> None:
    """Lazy initialization of extractors."""
    if cls._initialized:
        return

    # Import extractors
    try:
        from extractors.claude_extractor import ClaudeExtractor
        cls._extractors["claude"] = ClaudeExtractor
    except ImportError:
        pass

    try:
        from extractors.codex_extractor import CodexExtractor
        cls._extractors["codex"] = CodexExtractor
    except ImportError:
        pass

    try:
        from extractors.gemini_extractor import GeminiExtractor
        cls._extractors["gemini"] = GeminiExtractor
    except ImportError:
        pass

    try:
        from extractors.kimi_extractor import KimiExtractor
        cls._extractors["kimi"] = KimiExtractor
    except ImportError:
        pass

    cls._initialized = True

@classmethod
def get_extractor(cls, llm: str) -> SessionExtractor:
    """
    Get extractor instance for specified LLM.

    Args:
        llm: LLM identifier (claude, codex, gemini)

    Returns:
        SessionExtractor instance

    Raises:
        ValueError: If LLM is not supported
    """
    cls._ensure_initialized()

    llm_lower = llm.lower()
    if llm_lower not in cls._extractors:
        raise ValueError(f"Unknown LLM: {llm}. Supported: {list(cls._extractors.keys())}")

    return cls._extractors[llm_lower]()

@classmethod
def detect_extractor(cls, source: Path) -> Optional[SessionExtractor]:
    """
    Auto-detect appropriate extractor for source.

    Args:
        source: Path to session file

    Returns:
        SessionExtractor instance if detected, None otherwise
    """
    cls._ensure_initialized()

    for extractor_cls in cls._extractors.values():
        extractor = extractor_cls()
        if extractor.can_extract(source):
            return extractor

    return None

@classmethod
def get_or_detect(cls, source: Path, llm: Optional[str] = None) -> SessionExtractor:
    """
    Get extractor by LLM name or auto-detect from source.

    Args:
        source: Path to session file
        llm: Optional LLM identifier

    Returns:
        SessionExtractor instance

    Raises:
        ValueError: If no extractor found
    """
    if llm:
        return cls.get_extractor(llm)

    extractor = cls.detect_extractor(source)
    if extractor is None:
        raise ValueError(f"Could not detect LLM for source: {source}")

    return extractor

@classmethod
def register(cls, llm: str, extractor_cls: Type[SessionExtractor]) -> None:
    """
    Register a custom extractor.

    Args:
        llm: LLM identifier
        extractor_cls: SessionExtractor subclass
    """
    cls._ensure_initialized()
    cls._extractors[llm.lower()] = extractor_cls

@classmethod
def list_supported(cls) -> List[str]:
    """List supported LLM identifiers."""
    cls._ensure_initialized()
    return list(cls._extractors.keys())

@classmethod
def is_supported(cls, llm: str) -> bool:
    """Check if LLM is supported."""
    cls._ensure_initialized()
    return llm.lower() in cls._extractors

Convenience functions

def get_extractor(llm: str) -> SessionExtractor: """Get extractor for specified LLM.""" return ExtractorFactory.get_extractor(llm)

def detect_extractor(source: Path) -> Optional[SessionExtractor]: """Auto-detect extractor for source.""" return ExtractorFactory.detect_extractor(source)

def get_or_detect(source: Path, llm: Optional[str] = None) -> SessionExtractor: """Get or detect extractor.""" return ExtractorFactory.get_or_detect(source, llm)

def list_supported_llms() -> List[str]: """List supported LLMs.""" return ExtractorFactory.list_supported()

if name == "main": import argparse

parser = argparse.ArgumentParser(description="Extractor Factory")
parser.add_argument("--list", action="store_true", help="List supported LLMs")
parser.add_argument("--detect", help="Detect LLM for file")
args = parser.parse_args()

if args.list:
    print("Supported LLMs:")
    for llm in ExtractorFactory.list_supported():
        print(f"  - {llm}")

elif args.detect:
    source = Path(args.detect)
    extractor = ExtractorFactory.detect_extractor(source)
    if extractor:
        print(f"Detected: {extractor.llm_name}")
    else:
        print("Could not detect LLM type")

else:
    print("Use --list to see supported LLMs")
    print("Use --detect <file> to detect LLM type")

#!/usr/bin/env python3 """​

Add parent paths for imports

Convenience functions

#!/usr/bin/env python3 """