Skip to main content

scripts-guide-expert

""" Guide Type Expert

Specializes in understanding what makes a document a "guide" vs other types. Guides are instructional documents that teach users how to accomplish tasks.

Key characteristics:

  • Step-by-step instructions
  • Prerequisites section
  • Practical examples
  • Troubleshooting section
  • Clear learning objectives """

import re from typing import Dict, List, Optional from pathlib import Path import sys

sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote from .base import TypeExpert, TypeAnalysis, ContentEnhancement

class GuideExpert(TypeExpert): """Expert in identifying and enhancing guide documents."""

expert_type = "guide"

# Strong semantic indicators of a guide
strong_indicators = [
r'how to\b',
r'step\s*\d',
r'tutorial',
r'learn\s+(?:how|to)',
r'getting\s+started',
r'walkthrough',
r'follow\s+(?:these|the)\s+steps',
]

# What each analyst looks for in guides
analyst_expectations = {
'metadata': [
"type: guide in frontmatter",
"component_type: guide",
"audience field",
],
'content': [
"## Prerequisites section",
"## Step N headers",
"## Quick Start section",
"## Troubleshooting section",
],
'structural': [
"Path contains /guides/ or /getting-started/",
"Numbered sections",
"Progressive structure",
],
'semantic': [
"Instructional intent",
"Teaching language",
"Action-oriented content",
],
'pattern': [
"Filename matches guide patterns",
"Title suggests instruction",
],
}

def analyze(
self,
document: Document,
analyst_votes: List[AnalystVote]
) -> TypeAnalysis:
"""Analyze if document is truly a guide."""
content = document.body or document.content
headings = self.extract_headings(content)
h2_texts = [h[1].lower() for h in headings if h[0] == 2]

# Gather evidence
evidence_for = []
evidence_against = []

# Check strong indicators
for indicator in self.strong_indicators:
if re.search(indicator, content, re.I):
evidence_for.append(f"Contains guide indicator: '{indicator}'")

# Check structure
if any('step' in h for h in h2_texts):
evidence_for.append("Has step-based structure")
if any('prerequisites' in h or 'prereq' in h for h in h2_texts):
evidence_for.append("Has prerequisites section")
if any('troubleshoot' in h for h in h2_texts):
evidence_for.append("Has troubleshooting section")
if any('quick start' in h or 'getting started' in h for h in h2_texts):
evidence_for.append("Has quick start section")

# Check for code examples (guides usually have them)
code_blocks = self.extract_code_blocks(content)
if len(code_blocks) >= 2:
evidence_for.append(f"Has {len(code_blocks)} code examples")

# Check for numbered lists (step-by-step)
if re.search(r'^\d+\.\s+', content, re.MULTILINE):
evidence_for.append("Uses numbered lists for steps")

# Evidence against being a guide
if self.has_mermaid(content) and not any('step' in h for h in h2_texts):
evidence_against.append("Has diagrams but no steps - might be workflow")

# Check if it's really a reference
if re.search(r'##\s*API\s*Reference', content, re.I):
evidence_against.append("Has API Reference section - might be reference doc")

# Check if it's really an ADR
adr_sections = {'status', 'context', 'decision', 'consequences'}
adr_matches = sum(1 for h in h2_texts if any(s in h for s in adr_sections))
if adr_matches >= 3:
evidence_against.append("Has ADR structure - might be architecture decision")

# Calculate confidence
confidence = min(0.98, len(evidence_for) * 0.15)
if evidence_against:
confidence -= len(evidence_against) * 0.1

is_guide = len(evidence_for) >= 2 and confidence > 0.5

# Identify disagreeing analysts
disagreeing = self.identify_disagreeing_analysts(analyst_votes, 'guide')
analysts_to_sway = {}

for analyst_name, vote in disagreeing.items():
if analyst_name == 'content':
analysts_to_sway[analyst_name] = "Needs more guide section headers (Prerequisites, Step N, Troubleshooting)"
elif analyst_name == 'semantic':
analysts_to_sway[analyst_name] = "Needs more instructional language and learning objectives"
elif analyst_name == 'pattern':
analysts_to_sway[analyst_name] = "Filename or path doesn't match guide conventions"
elif analyst_name == 'metadata':
analysts_to_sway[analyst_name] = "Frontmatter needs type: guide"

# Identify missing signals
missing = []
if not any('prerequisites' in h for h in h2_texts):
missing.append('prerequisites')
if not any('step' in h for h in h2_texts):
missing.append('step_headers')
if not any('troubleshoot' in h for h in h2_texts):
missing.append('troubleshooting')
if not any('next' in h for h in h2_texts):
missing.append('next_steps')

return TypeAnalysis(
is_this_type=is_guide,
confidence=max(0, confidence),
evidence_for=evidence_for,
evidence_against=evidence_against,
semantic_purpose=self.analyze_semantic_purpose(document),
missing_signals=missing,
recommended_changes=[], # Filled by generate_enhancements
analysts_to_sway=analysts_to_sway,
expert_type=self.expert_type
)

def generate_enhancements(
self,
document: Document,
analysis: TypeAnalysis
) -> List[ContentEnhancement]:
"""Generate contextual guide enhancements."""
enhancements = []
content = document.body or document.content
title = document.frontmatter.get('title', 'this topic')

# Generate prerequisites if missing
if 'prerequisites' in analysis.missing_signals:
# Try to infer prerequisites from content
prereqs = self._infer_prerequisites(content)
prereq_content = self._generate_prerequisites_section(prereqs)

enhancements.append(ContentEnhancement(
signal_type='prerequisites',
content=prereq_content,
insertion_point='after_frontmatter',
reason="Guides need prerequisites to set expectations",
expected_analyst_boost={'content': 0.15, 'semantic': 0.10},
priority=1
))

# Generate step headers if missing
if 'step_headers' in analysis.missing_signals:
# Analyze content to create contextual steps
steps = self._infer_steps(content, title)
step_content = self._generate_step_section(steps)

enhancements.append(ContentEnhancement(
signal_type='step_headers',
content=step_content,
insertion_point='after_overview',
reason="Guides need numbered steps for clarity",
expected_analyst_boost={'content': 0.20, 'structural': 0.10},
priority=1
))

# Generate troubleshooting if missing
if 'troubleshooting' in analysis.missing_signals:
# Generate contextual troubleshooting
issues = self._infer_common_issues(content, title)
trouble_content = self._generate_troubleshooting_section(issues)

enhancements.append(ContentEnhancement(
signal_type='troubleshooting',
content=trouble_content,
insertion_point='before_end',
reason="Guides should help users resolve common issues",
expected_analyst_boost={'content': 0.10, 'semantic': 0.05},
priority=2
))

# Generate next steps if missing
if 'next_steps' in analysis.missing_signals:
next_content = self._generate_next_steps(document)

enhancements.append(ContentEnhancement(
signal_type='next_steps',
content=next_content,
insertion_point='end',
reason="Guides should point to next learning resources",
expected_analyst_boost={'content': 0.10, 'structural': 0.05},
priority=3
))

return enhancements

def _infer_prerequisites(self, content: str) -> List[str]:
"""Infer prerequisites from document content."""
prereqs = []

# Look for tool mentions
if re.search(r'python|pip|python3', content, re.I):
prereqs.append("Python 3.10+ installed")
if re.search(r'node|npm|yarn', content, re.I):
prereqs.append("Node.js 18+ installed")
if re.search(r'git\b|clone|repository', content, re.I):
prereqs.append("Git configured")
if re.search(r'docker|container', content, re.I):
prereqs.append("Docker installed")
if re.search(r'claude|anthropic|api', content, re.I):
prereqs.append("Claude API access configured")
if re.search(r'gcloud|google cloud|gcp', content, re.I):
prereqs.append("Google Cloud SDK installed")

# Look for knowledge prerequisites
if re.search(r'yaml|json', content, re.I):
prereqs.append("Basic understanding of YAML/JSON")
if re.search(r'command\s*line|terminal|bash', content, re.I):
prereqs.append("Familiarity with command line")

if not prereqs:
prereqs = ["Basic understanding of the topic", "Access to required resources"]

return prereqs

def _generate_prerequisites_section(self, prereqs: List[str]) -> str:
"""Generate a prerequisites section."""
items = "\n".join(f"- [ ] {p}" for p in prereqs)
return f"""

Prerequisites

Before starting, ensure you have: {items}

Verify setup:

# Run verification checks as needed

"""

def _infer_steps(self, content: str, title: str) -> List[str]:
"""Infer logical steps from content."""
# Look for existing numbered items or action verbs
steps = []

# Check for existing numbered content
numbered = re.findall(r'^\d+\.\s+(.+)$', content, re.MULTILINE)
if numbered and len(numbered) >= 3:
steps = numbered[:5]
else:
# Generate generic steps based on content analysis
steps = [
"Set up your environment",
"Configure the necessary settings",
"Execute the main process",
"Verify the results",
]

return steps

def _generate_step_section(self, steps: List[str]) -> str:
"""Generate step-by-step section."""
step_content = "\n## Quick Start\n\n"
for i, step in enumerate(steps, 1):
step_content += f"### Step {i}: {step.strip()}\n\n"
step_content += f"[Details for step {i}]\n\n"
return step_content

def _infer_common_issues(self, content: str, title: str) -> List[Dict]:
"""Infer common issues from content."""
issues = []

# Look for error patterns
if re.search(r'error|fail|exception', content, re.I):
issues.append({
'problem': 'Unexpected error occurs',
'solution': 'Check logs for specific error message and verify prerequisites'
})

if re.search(r'permission|access|auth', content, re.I):
issues.append({
'problem': 'Permission or access denied',
'solution': 'Verify credentials and access permissions are correctly configured'
})

if re.search(r'not found|missing', content, re.I):
issues.append({
'problem': 'Resource not found',
'solution': 'Ensure all required files and dependencies are in place'
})

if not issues:
issues = [
{'problem': 'Setup not working as expected', 'solution': 'Verify all prerequisites are met'},
{'problem': 'Unexpected behavior', 'solution': 'Check configuration settings'}
]

return issues

def _generate_troubleshooting_section(self, issues: List[Dict]) -> str:
"""Generate troubleshooting section."""
content = "\n## Troubleshooting\n\n"
for issue in issues:
content += f"### {issue['problem']}\n"
content += f"**Problem:** {issue['problem']}\n"
content += f"**Solution:** {issue['solution']}\n\n"
return content

def _generate_next_steps(self, document: Document) -> str:
"""Generate next steps section."""
return """

Next Steps

After completing this guide:

  1. Practice: Apply these concepts in your own project
  2. Explore: Check out related documentation
  3. Deepen: Review advanced topics and best practices """