scripts-workflow-expert
""" Workflow Type Expert
Specializes in understanding what makes a document a "workflow" document. Workflows define processes, pipelines, and multi-phase operations.
Key characteristics:
- Phase/stage structure
- Process diagrams (Mermaid)
- Sequential steps with dependencies
- Input/output definitions
- Quality gates or checkpoints """
import re from typing import Dict, List from pathlib import Path import sys
sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote from .base import TypeExpert, TypeAnalysis, ContentEnhancement
class WorkflowExpert(TypeExpert): """Expert in identifying and enhancing workflow documents."""
expert_type = "workflow"
strong_indicators = [
r'workflow',
r'pipeline',
r'phase\s*\d',
r'stage\s*\d',
r'process\s*flow',
r'sequenceDiagram',
r'flowchart',
r'graph\s+(TD|LR|TB|RL)',
]
analyst_expectations = {
'metadata': ["type: workflow in frontmatter"],
'content': ["## Phase sections", "Mermaid diagrams", "Checkboxes/checklists"],
'structural': ["Sequential structure", "Path contains /workflows/"],
'semantic': ["Process-oriented language", "Dependency descriptions"],
'pattern': ["Filename suggests workflow", "WORKFLOW in name"],
}
def analyze(
self,
document: Document,
analyst_votes: List[AnalystVote]
) -> TypeAnalysis:
"""Analyze if document is truly a workflow."""
content = document.body or document.content
headings = self.extract_headings(content)
h2_texts = [h[1].lower() for h in headings if h[0] == 2]
evidence_for = []
evidence_against = []
# Check strong indicators
for indicator in self.strong_indicators:
if re.search(indicator, content, re.I):
evidence_for.append(f"Contains workflow indicator: '{indicator}'")
# Check for phase structure
if any('phase' in h for h in h2_texts):
evidence_for.append("Has phase-based structure")
if any('stage' in h for h in h2_texts):
evidence_for.append("Has stage-based structure")
# Check for diagrams
if self.has_mermaid(content):
evidence_for.append("Contains Mermaid diagrams")
# Check for checklists
if self.has_checkboxes(content):
evidence_for.append("Has task checklists")
# Check path
if '/workflow' in str(document.path).lower():
evidence_for.append("Located in workflows directory")
# Evidence against
if any('step' in h and 'how to' in content.lower() for h in h2_texts):
evidence_against.append("Step structure with how-to language - might be guide")
confidence = min(0.98, len(evidence_for) * 0.15)
if evidence_against:
confidence -= len(evidence_against) * 0.1
is_workflow = len(evidence_for) >= 2 and confidence > 0.5
# Identify what's missing
missing = []
if not any('phase' in h or 'stage' in h for h in h2_texts):
missing.append('phases')
if not self.has_mermaid(content):
missing.append('diagram')
if not self.has_checkboxes(content):
missing.append('checklist')
disagreeing = self.identify_disagreeing_analysts(analyst_votes, 'workflow')
analysts_to_sway = {
name: f"Needs more workflow signals to classify as workflow"
for name in disagreeing
}
return TypeAnalysis(
is_this_type=is_workflow,
confidence=max(0, confidence),
evidence_for=evidence_for,
evidence_against=evidence_against,
semantic_purpose=self.analyze_semantic_purpose(document),
missing_signals=missing,
recommended_changes=[],
analysts_to_sway=analysts_to_sway,
expert_type=self.expert_type
)
def generate_enhancements(
self,
document: Document,
analysis: TypeAnalysis
) -> List[ContentEnhancement]:
"""Generate contextual workflow enhancements."""
enhancements = []
title = document.frontmatter.get('title', 'Process')
if 'phases' in analysis.missing_signals:
enhancements.append(ContentEnhancement(
signal_type='phases',
content=self._generate_phases(title),
insertion_point='after_overview',
reason="Workflows need phase-based structure",
expected_analyst_boost={'content': 0.20, 'structural': 0.15},
priority=1
))
if 'diagram' in analysis.missing_signals:
enhancements.append(ContentEnhancement(
signal_type='diagram',
content=self._generate_diagram(title),
insertion_point='after_phases',
reason="Workflows should include process diagrams",
expected_analyst_boost={'content': 0.15, 'semantic': 0.10},
priority=1
))
if 'checklist' in analysis.missing_signals:
enhancements.append(ContentEnhancement(
signal_type='checklist',
content=self._generate_checklist(title),
insertion_point='before_end',
reason="Workflows benefit from completion checklists",
expected_analyst_boost={'content': 0.10, 'pattern': 0.05},
priority=2
))
return enhancements
def _generate_phases(self, title: str) -> str:
return f"""
Workflow Phases
Phase 1: Initialization
Set up prerequisites and validate inputs.
Phase 2: Execution
Execute the main workflow steps.
Phase 3: Validation
Verify outputs and check quality gates.
Phase 4: Completion
Finalize the process and generate reports. """
def _generate_diagram(self, title: str) -> str:
return f"""
Process Flow
"""
def _generate_checklist(self, title: str) -> str:
return """
Completion Checklist
- Prerequisites verified
- All phases completed
- Outputs validated
- Quality gates passed
- Documentation updated """