scripts-base

""" Base Type Expert Agent

Abstract base class for all Type Expert agents. Each expert specializes in one document type and can:

Deeply analyze if a document truly belongs to their type
Identify what's missing to satisfy all analysts
Generate contextually-appropriate content enhancements
Explain their reasoning for audit trails """

from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple from pathlib import Path import re import sys

sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote, ClassificationResult

@dataclass class TypeAnalysis: """Result of a Type Expert's analysis.""" # Core assessment is_this_type: bool # True if document truly belongs to this type confidence: float # Expert's confidence in assessment (0-1)

# Reasoning
evidence_for: List[str]         # Evidence supporting this type
evidence_against: List[str]     # Evidence against this type
semantic_purpose: str           # What the document is really trying to do

# Recommendations
missing_signals: List[str]      # Signals needed to satisfy analysts
recommended_changes: List[Dict] # Specific content changes

# Analyst targeting
analysts_to_sway: Dict[str, str]  # {analyst_name: reason_they_voted_wrong}

# Metadata
expert_type: str                # Which type expert made this analysis

@dataclass class ContentEnhancement: """A specific content enhancement recommendation.""" signal_type: str # e.g., 'prerequisites', 'api_table' content: str # The actual content to add insertion_point: str # 'after_frontmatter', 'before_first_h2', 'end', etc. reason: str # Why this enhancement helps expected_analyst_boost: Dict[str, float] # {analyst: expected_confidence_boost} priority: int # 1-5, higher = more important

class TypeExpert(ABC): """ Abstract base class for Type Expert agents.

Each Type Expert is a specialist that deeply understands one document type
and can analyze documents to determine if they truly belong to that type.
"""

# Subclasses must define these
expert_type: str = "base"

# Semantic indicators that strongly suggest this type
strong_indicators: List[str] = []

# Structural patterns typical of this type
structural_patterns: List[Tuple[str, float]] = []

# What each analyst looks for in this type
analyst_expectations: Dict[str, List[str]] = {}

@abstractmethod
def analyze(
    self,
    document: Document,
    analyst_votes: List[AnalystVote]
) -> TypeAnalysis:
    """
    Deeply analyze whether document belongs to this type.

    Args:
        document: The document to analyze
        analyst_votes: Votes from all analysts for context

    Returns:
        TypeAnalysis with assessment and recommendations
    """
    pass

@abstractmethod
def generate_enhancements(
    self,
    document: Document,
    analysis: TypeAnalysis
) -> List[ContentEnhancement]:
    """
    Generate specific content enhancements to improve classification.

    Args:
        document: The document to enhance
        analysis: Previous analysis results

    Returns:
        List of ContentEnhancement recommendations
    """
    pass

def get_analyst_vote(
    self,
    votes: List[AnalystVote],
    analyst_name: str
) -> Optional[AnalystVote]:
    """Get a specific analyst's vote."""
    for vote in votes:
        if vote.agent == analyst_name:
            return vote
    return None

def count_votes_for_type(
    self,
    votes: List[AnalystVote],
    doc_type: str
) -> Tuple[int, float]:
    """Count votes for a specific type and average confidence."""
    matching = [v for v in votes if v.classification == doc_type]
    if not matching:
        return 0, 0.0
    avg_conf = sum(v.confidence for v in matching) / len(matching)
    return len(matching), avg_conf

def identify_disagreeing_analysts(
    self,
    votes: List[AnalystVote],
    expected_type: str
) -> Dict[str, AnalystVote]:
    """Find analysts that didn't vote for expected type."""
    disagreeing = {}
    for vote in votes:
        if vote.classification != expected_type:
            disagreeing[vote.agent] = vote
    return disagreeing

def extract_headings(self, content: str) -> List[Tuple[int, str]]:
    """Extract all markdown headings with levels."""
    headings = []
    for match in re.finditer(r'^(#{1,6})\s+(.+)$', content, re.MULTILINE):
        level = len(match.group(1))
        text = match.group(2).strip()
        headings.append((level, text))
    return headings

def extract_code_blocks(self, content: str) -> List[Tuple[str, str]]:
    """Extract code blocks with their languages."""
    blocks = []
    for match in re.finditer(r'```(\w*)\n(.*?)```', content, re.DOTALL):
        lang = match.group(1) or 'text'
        code = match.group(2)
        blocks.append((lang, code))
    return blocks

def has_tables(self, content: str) -> bool:
    """Check if content has markdown tables."""
    return bool(re.search(r'\|.*\|.*\|', content))

def has_mermaid(self, content: str) -> bool:
    """Check if content has mermaid diagrams."""
    return '```mermaid' in content.lower()

def has_checkboxes(self, content: str) -> bool:
    """Check if content has task checkboxes."""
    return bool(re.search(r'\[[ x]\]', content, re.I))

def extract_frontmatter_type(self, document: Document) -> Optional[str]:
    """Get the type from frontmatter if present."""
    return document.frontmatter.get('type') or document.frontmatter.get('component_type')

def analyze_semantic_purpose(self, document: Document) -> str:
    """
    Analyze what the document is semantically trying to accomplish.

    Returns a description of the document's purpose.
    """
    content = document.body or document.content
    title = document.frontmatter.get('title', document.filename)

    # Look for purpose indicators
    purposes = []

    # Teaching/instructional
    if re.search(r'how to|learn|understand|tutorial|step by step', content, re.I):
        purposes.append("teaching users how to do something")

    # Reference/documentation
    if re.search(r'api|reference|specification|schema|configuration', content, re.I):
        purposes.append("providing technical reference information")

    # Process/workflow
    if re.search(r'workflow|pipeline|process|phase|stage', content, re.I):
        purposes.append("defining a process or workflow")

    # Decision
    if re.search(r'decision|context|consequences|tradeoff', content, re.I):
        purposes.append("documenting an architectural decision")

    # Agent/role
    if re.search(r'you are|responsible for|capabilities|role', content, re.I):
        purposes.append("defining an AI agent's role and capabilities")

    # Command
    if re.search(r'invocation|usage|command|arguments', content, re.I):
        purposes.append("documenting a command interface")

    if purposes:
        return "; ".join(purposes)
    return "general documentation"