scripts-extractor-factory
#!/usr/bin/env python3 """
title: "Extractor Factory" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Factory for creating LLM session extractors" keywords: ['factory', 'extractor', 'claude', 'codex', 'gemini', 'registry'] tokens: ~200 created: 2026-01-28 updated: 2026-01-28 script_name: "extractor_factory.py" language: python executable: true usage: "from scripts.core.extractor_factory import ExtractorFactory" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false
Extractor Factory for CODITECT /sx command.
Creates appropriate SessionExtractor instances based on LLM type or auto-detection.
Track: J.13 (Memory - Generic Session Export) Task: J.13.2.4 """
from future import annotations
import sys from pathlib import Path from typing import Dict, List, Optional, Type
Add parent paths for imports
_script_dir = Path(file).resolve().parent _scripts_dir = _script_dir.parent _coditect_root = _scripts_dir.parent if str(_coditect_root) not in sys.path: sys.path.insert(0, str(_coditect_root)) if str(_scripts_dir) not in sys.path: sys.path.insert(0, str(_scripts_dir))
from core.session_extractor import SessionExtractor
class ExtractorFactory: """ Factory for creating LLM session extractors.
Supports:
- Explicit extractor creation by LLM name
- Auto-detection based on source file format
- Custom extractor registration
"""
# Registry of available extractors
_extractors: Dict[str, Type[SessionExtractor]] = {}
_initialized: bool = False
@classmethod
def _ensure_initialized(cls) -> None:
"""Lazy initialization of extractors."""
if cls._initialized:
return
# Import extractors
try:
from extractors.claude_extractor import ClaudeExtractor
cls._extractors["claude"] = ClaudeExtractor
except ImportError:
pass
try:
from extractors.codex_extractor import CodexExtractor
cls._extractors["codex"] = CodexExtractor
except ImportError:
pass
try:
from extractors.gemini_extractor import GeminiExtractor
cls._extractors["gemini"] = GeminiExtractor
except ImportError:
pass
try:
from extractors.kimi_extractor import KimiExtractor
cls._extractors["kimi"] = KimiExtractor
except ImportError:
pass
cls._initialized = True
@classmethod
def get_extractor(cls, llm: str) -> SessionExtractor:
"""
Get extractor instance for specified LLM.
Args:
llm: LLM identifier (claude, codex, gemini)
Returns:
SessionExtractor instance
Raises:
ValueError: If LLM is not supported
"""
cls._ensure_initialized()
llm_lower = llm.lower()
if llm_lower not in cls._extractors:
raise ValueError(f"Unknown LLM: {llm}. Supported: {list(cls._extractors.keys())}")
return cls._extractors[llm_lower]()
@classmethod
def detect_extractor(cls, source: Path) -> Optional[SessionExtractor]:
"""
Auto-detect appropriate extractor for source.
Args:
source: Path to session file
Returns:
SessionExtractor instance if detected, None otherwise
"""
cls._ensure_initialized()
for extractor_cls in cls._extractors.values():
extractor = extractor_cls()
if extractor.can_extract(source):
return extractor
return None
@classmethod
def get_or_detect(cls, source: Path, llm: Optional[str] = None) -> SessionExtractor:
"""
Get extractor by LLM name or auto-detect from source.
Args:
source: Path to session file
llm: Optional LLM identifier
Returns:
SessionExtractor instance
Raises:
ValueError: If no extractor found
"""
if llm:
return cls.get_extractor(llm)
extractor = cls.detect_extractor(source)
if extractor is None:
raise ValueError(f"Could not detect LLM for source: {source}")
return extractor
@classmethod
def register(cls, llm: str, extractor_cls: Type[SessionExtractor]) -> None:
"""
Register a custom extractor.
Args:
llm: LLM identifier
extractor_cls: SessionExtractor subclass
"""
cls._ensure_initialized()
cls._extractors[llm.lower()] = extractor_cls
@classmethod
def list_supported(cls) -> List[str]:
"""List supported LLM identifiers."""
cls._ensure_initialized()
return list(cls._extractors.keys())
@classmethod
def is_supported(cls, llm: str) -> bool:
"""Check if LLM is supported."""
cls._ensure_initialized()
return llm.lower() in cls._extractors
Convenience functions
def get_extractor(llm: str) -> SessionExtractor: """Get extractor for specified LLM.""" return ExtractorFactory.get_extractor(llm)
def detect_extractor(source: Path) -> Optional[SessionExtractor]: """Auto-detect extractor for source.""" return ExtractorFactory.detect_extractor(source)
def get_or_detect(source: Path, llm: Optional[str] = None) -> SessionExtractor: """Get or detect extractor.""" return ExtractorFactory.get_or_detect(source, llm)
def list_supported_llms() -> List[str]: """List supported LLMs.""" return ExtractorFactory.list_supported()
if name == "main": import argparse
parser = argparse.ArgumentParser(description="Extractor Factory")
parser.add_argument("--list", action="store_true", help="List supported LLMs")
parser.add_argument("--detect", help="Detect LLM for file")
args = parser.parse_args()
if args.list:
print("Supported LLMs:")
for llm in ExtractorFactory.list_supported():
print(f" - {llm}")
elif args.detect:
source = Path(args.detect)
extractor = ExtractorFactory.detect_extractor(source)
if extractor:
print(f"Detected: {extractor.llm_name}")
else:
print("Could not detect LLM type")
else:
print("Use --list to see supported LLMs")
print("Use --detect <file> to detect LLM type")