scripts-guide-expert

""" Guide Type Expert

Specializes in understanding what makes a document a "guide" vs other types. Guides are instructional documents that teach users how to accomplish tasks.

Key characteristics:

Step-by-step instructions
Prerequisites section
Practical examples
Troubleshooting section
Clear learning objectives """

import re from typing import Dict, List, Optional from pathlib import Path import sys

sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote from .base import TypeExpert, TypeAnalysis, ContentEnhancement

class GuideExpert(TypeExpert): """Expert in identifying and enhancing guide documents."""

expert_type = "guide"

# Strong semantic indicators of a guide
strong_indicators = [
    r'how to\b',
    r'step\s*\d',
    r'tutorial',
    r'learn\s+(?:how|to)',
    r'getting\s+started',
    r'walkthrough',
    r'follow\s+(?:these|the)\s+steps',
]

# What each analyst looks for in guides
analyst_expectations = {
    'metadata': [
        "type: guide in frontmatter",
        "component_type: guide",
        "audience field",
    ],
    'content': [
        "## Prerequisites section",
        "## Step N headers",
        "## Quick Start section",
        "## Troubleshooting section",
    ],
    'structural': [
        "Path contains /guides/ or /getting-started/",
        "Numbered sections",
        "Progressive structure",
    ],
    'semantic': [
        "Instructional intent",
        "Teaching language",
        "Action-oriented content",
    ],
    'pattern': [
        "Filename matches guide patterns",
        "Title suggests instruction",
    ],
}

def analyze(
    self,
    document: Document,
    analyst_votes: List[AnalystVote]
) -> TypeAnalysis:
    """Analyze if document is truly a guide."""
    content = document.body or document.content
    headings = self.extract_headings(content)
    h2_texts = [h[1].lower() for h in headings if h[0] == 2]

    # Gather evidence
    evidence_for = []
    evidence_against = []

    # Check strong indicators
    for indicator in self.strong_indicators:
        if re.search(indicator, content, re.I):
            evidence_for.append(f"Contains guide indicator: '{indicator}'")

    # Check structure
    if any('step' in h for h in h2_texts):
        evidence_for.append("Has step-based structure")
    if any('prerequisites' in h or 'prereq' in h for h in h2_texts):
        evidence_for.append("Has prerequisites section")
    if any('troubleshoot' in h for h in h2_texts):
        evidence_for.append("Has troubleshooting section")
    if any('quick start' in h or 'getting started' in h for h in h2_texts):
        evidence_for.append("Has quick start section")

    # Check for code examples (guides usually have them)
    code_blocks = self.extract_code_blocks(content)
    if len(code_blocks) >= 2:
        evidence_for.append(f"Has {len(code_blocks)} code examples")

    # Check for numbered lists (step-by-step)
    if re.search(r'^\d+\.\s+', content, re.MULTILINE):
        evidence_for.append("Uses numbered lists for steps")

    # Evidence against being a guide
    if self.has_mermaid(content) and not any('step' in h for h in h2_texts):
        evidence_against.append("Has diagrams but no steps - might be workflow")

    # Check if it's really a reference
    if re.search(r'##\s*API\s*Reference', content, re.I):
        evidence_against.append("Has API Reference section - might be reference doc")

    # Check if it's really an ADR
    adr_sections = {'status', 'context', 'decision', 'consequences'}
    adr_matches = sum(1 for h in h2_texts if any(s in h for s in adr_sections))
    if adr_matches >= 3:
        evidence_against.append("Has ADR structure - might be architecture decision")

    # Calculate confidence
    confidence = min(0.98, len(evidence_for) * 0.15)
    if evidence_against:
        confidence -= len(evidence_against) * 0.1

    is_guide = len(evidence_for) >= 2 and confidence > 0.5

    # Identify disagreeing analysts
    disagreeing = self.identify_disagreeing_analysts(analyst_votes, 'guide')
    analysts_to_sway = {}

    for analyst_name, vote in disagreeing.items():
        if analyst_name == 'content':
            analysts_to_sway[analyst_name] = "Needs more guide section headers (Prerequisites, Step N, Troubleshooting)"
        elif analyst_name == 'semantic':
            analysts_to_sway[analyst_name] = "Needs more instructional language and learning objectives"
        elif analyst_name == 'pattern':
            analysts_to_sway[analyst_name] = "Filename or path doesn't match guide conventions"
        elif analyst_name == 'metadata':
            analysts_to_sway[analyst_name] = "Frontmatter needs type: guide"

    # Identify missing signals
    missing = []
    if not any('prerequisites' in h for h in h2_texts):
        missing.append('prerequisites')
    if not any('step' in h for h in h2_texts):
        missing.append('step_headers')
    if not any('troubleshoot' in h for h in h2_texts):
        missing.append('troubleshooting')
    if not any('next' in h for h in h2_texts):
        missing.append('next_steps')

    return TypeAnalysis(
        is_this_type=is_guide,
        confidence=max(0, confidence),
        evidence_for=evidence_for,
        evidence_against=evidence_against,
        semantic_purpose=self.analyze_semantic_purpose(document),
        missing_signals=missing,
        recommended_changes=[],  # Filled by generate_enhancements
        analysts_to_sway=analysts_to_sway,
        expert_type=self.expert_type
    )

def generate_enhancements(
    self,
    document: Document,
    analysis: TypeAnalysis
) -> List[ContentEnhancement]:
    """Generate contextual guide enhancements."""
    enhancements = []
    content = document.body or document.content
    title = document.frontmatter.get('title', 'this topic')

    # Generate prerequisites if missing
    if 'prerequisites' in analysis.missing_signals:
        # Try to infer prerequisites from content
        prereqs = self._infer_prerequisites(content)
        prereq_content = self._generate_prerequisites_section(prereqs)

        enhancements.append(ContentEnhancement(
            signal_type='prerequisites',
            content=prereq_content,
            insertion_point='after_frontmatter',
            reason="Guides need prerequisites to set expectations",
            expected_analyst_boost={'content': 0.15, 'semantic': 0.10},
            priority=1
        ))

    # Generate step headers if missing
    if 'step_headers' in analysis.missing_signals:
        # Analyze content to create contextual steps
        steps = self._infer_steps(content, title)
        step_content = self._generate_step_section(steps)

        enhancements.append(ContentEnhancement(
            signal_type='step_headers',
            content=step_content,
            insertion_point='after_overview',
            reason="Guides need numbered steps for clarity",
            expected_analyst_boost={'content': 0.20, 'structural': 0.10},
            priority=1
        ))

    # Generate troubleshooting if missing
    if 'troubleshooting' in analysis.missing_signals:
        # Generate contextual troubleshooting
        issues = self._infer_common_issues(content, title)
        trouble_content = self._generate_troubleshooting_section(issues)

        enhancements.append(ContentEnhancement(
            signal_type='troubleshooting',
            content=trouble_content,
            insertion_point='before_end',
            reason="Guides should help users resolve common issues",
            expected_analyst_boost={'content': 0.10, 'semantic': 0.05},
            priority=2
        ))

    # Generate next steps if missing
    if 'next_steps' in analysis.missing_signals:
        next_content = self._generate_next_steps(document)

        enhancements.append(ContentEnhancement(
            signal_type='next_steps',
            content=next_content,
            insertion_point='end',
            reason="Guides should point to next learning resources",
            expected_analyst_boost={'content': 0.10, 'structural': 0.05},
            priority=3
        ))

    return enhancements

def _infer_prerequisites(self, content: str) -> List[str]:
    """Infer prerequisites from document content."""
    prereqs = []

    # Look for tool mentions
    if re.search(r'python|pip|python3', content, re.I):
        prereqs.append("Python 3.10+ installed")
    if re.search(r'node|npm|yarn', content, re.I):
        prereqs.append("Node.js 18+ installed")
    if re.search(r'git\b|clone|repository', content, re.I):
        prereqs.append("Git configured")
    if re.search(r'docker|container', content, re.I):
        prereqs.append("Docker installed")
    if re.search(r'claude|anthropic|api', content, re.I):
        prereqs.append("Claude API access configured")
    if re.search(r'gcloud|google cloud|gcp', content, re.I):
        prereqs.append("Google Cloud SDK installed")

    # Look for knowledge prerequisites
    if re.search(r'yaml|json', content, re.I):
        prereqs.append("Basic understanding of YAML/JSON")
    if re.search(r'command\s*line|terminal|bash', content, re.I):
        prereqs.append("Familiarity with command line")

    if not prereqs:
        prereqs = ["Basic understanding of the topic", "Access to required resources"]

    return prereqs

def _generate_prerequisites_section(self, prereqs: List[str]) -> str:
    """Generate a prerequisites section."""
    items = "\n".join(f"- [ ] {p}" for p in prereqs)
    return f"""

Prerequisites

Before starting, ensure you have: {items}

Verify setup:

# Run verification checks as needed

"""

def _infer_steps(self, content: str, title: str) -> List[str]:
    """Infer logical steps from content."""
    # Look for existing numbered items or action verbs
    steps = []

    # Check for existing numbered content
    numbered = re.findall(r'^\d+\.\s+(.+)$', content, re.MULTILINE)
    if numbered and len(numbered) >= 3:
        steps = numbered[:5]
    else:
        # Generate generic steps based on content analysis
        steps = [
            "Set up your environment",
            "Configure the necessary settings",
            "Execute the main process",
            "Verify the results",
        ]

    return steps

def _generate_step_section(self, steps: List[str]) -> str:
    """Generate step-by-step section."""
    step_content = "\n## Quick Start\n\n"
    for i, step in enumerate(steps, 1):
        step_content += f"### Step {i}: {step.strip()}\n\n"
        step_content += f"[Details for step {i}]\n\n"
    return step_content

def _infer_common_issues(self, content: str, title: str) -> List[Dict]:
    """Infer common issues from content."""
    issues = []

    # Look for error patterns
    if re.search(r'error|fail|exception', content, re.I):
        issues.append({
            'problem': 'Unexpected error occurs',
            'solution': 'Check logs for specific error message and verify prerequisites'
        })

    if re.search(r'permission|access|auth', content, re.I):
        issues.append({
            'problem': 'Permission or access denied',
            'solution': 'Verify credentials and access permissions are correctly configured'
        })

    if re.search(r'not found|missing', content, re.I):
        issues.append({
            'problem': 'Resource not found',
            'solution': 'Ensure all required files and dependencies are in place'
        })

    if not issues:
        issues = [
            {'problem': 'Setup not working as expected', 'solution': 'Verify all prerequisites are met'},
            {'problem': 'Unexpected behavior', 'solution': 'Check configuration settings'}
        ]

    return issues

def _generate_troubleshooting_section(self, issues: List[Dict]) -> str:
    """Generate troubleshooting section."""
    content = "\n## Troubleshooting\n\n"
    for issue in issues:
        content += f"### {issue['problem']}\n"
        content += f"**Problem:** {issue['problem']}\n"
        content += f"**Solution:** {issue['solution']}\n\n"
    return content

def _generate_next_steps(self, document: Document) -> str:
    """Generate next steps section."""
    return """

Next Steps

After completing this guide:

Practice: Apply these concepts in your own project
Explore: Check out related documentation
Deepen: Review advanced topics and best practices """

Prerequisites​

Next Steps​

Prerequisites

Next Steps