scripts-report-expert

""" Report Expert - Type Expert for Report Documents

Identifies report files - documents that summarize findings, status, analysis results, or project progress.

Key signals:

REPORT/STATUS/SUMMARY in filename
Executive summary sections
Findings/results sections
Metrics and statistics
Recommendations
Date-stamped content """

import re from typing import Dict, List from pathlib import Path

import sys sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote

from .base import TypeExpert, TypeAnalysis, ContentEnhancement

class ReportExpert(TypeExpert): """Expert for identifying report documents."""

EXPERT_TYPE = 'report'

# Report-specific sections
REPORT_SECTIONS = [
    'executive summary',
    'summary',
    'findings',
    'results',
    'analysis',
    'conclusions',
    'recommendations',
    'metrics',
    'statistics',
    'status',
    'progress',
    'assessment',
    'evaluation',
    'observations',
    'key takeaways',
]

# Report patterns
REPORT_PATTERNS = [
    r'as\s+of\s+\w+\s+\d+',  # "As of December 28"
    r'report\s+(date|period)',
    r'(total|count|percentage|ratio):\s*\d+',
    r'\d+%\s+(complete|done|finished)',
    r'status:\s*(complete|pending|in progress)',
    r'(increased|decreased)\s+by\s+\d+',
    r'compared\s+to\s+(last|previous)',
    r'(weekly|monthly|quarterly|annual)\s+report',
]

def analyze(self, document: Document, analyst_votes: List[AnalystVote]) -> TypeAnalysis:
    """Analyze if document is a report."""
    content = document.content
    content_lower = content.lower()
    evidence_for = []
    evidence_against = []
    missing_signals = []

    # Check filename
    filename = Path(document.path).stem.upper()
    is_report_file = any(x in filename for x in ['REPORT', 'STATUS', 'SUMMARY', 'ANALYSIS', 'ASSESSMENT'])
    if is_report_file:
        evidence_for.append(f"Filename indicates report: {Path(document.path).name}")

    # Check for date in filename (common for reports)
    if re.search(r'\d{4}[-_]\d{2}[-_]\d{2}', filename):
        evidence_for.append("Filename has date stamp")

    # Check for report sections
    section_count = 0
    for section in self.REPORT_SECTIONS:
        if re.search(rf'#+\s*{section}', content_lower):
            section_count += 1
            if section_count <= 4:
                evidence_for.append(f"Has report section: '{section}'")

    # Check for report patterns
    pattern_count = 0
    for pattern in self.REPORT_PATTERNS:
        if re.search(pattern, content_lower):
            pattern_count += 1
            if pattern_count <= 3:
                evidence_for.append(f"Has report pattern: '{pattern[:30]}'")

    # Check for metrics/statistics
    metric_patterns = [
        r'\d+\s*%',  # Percentages
        r'\d+\s*/\s*\d+',  # Ratios
        r'total:\s*\d+',  # Totals
        r'\$[\d,]+',  # Dollar amounts
    ]
    metric_count = sum(len(re.findall(p, content, re.IGNORECASE)) for p in metric_patterns)
    if metric_count > 5:
        evidence_for.append(f"Contains {metric_count} metrics/statistics")

    # Check for tables with data
    table_rows = len(re.findall(r'^\|.*\d+.*\|$', content, re.MULTILINE))
    if table_rows > 3:
        evidence_for.append(f"Has data tables with {table_rows} rows")

    # Check for temporal language
    if re.search(r'(this\s+week|this\s+month|last\s+quarter|year\s+to\s+date)', content_lower):
        evidence_for.append("Uses temporal/period language")

    # Evidence against
    if re.search(r'#+\s*(step\s+\d|how\s+to|tutorial)', content_lower):
        evidence_against.append("Has tutorial sections - might be guide")
    if re.search(r'you\s+are\s+(a|an)\s+\w+\s+agent', content_lower):
        evidence_against.append("Has agent persona - might be agent doc")
    if re.search(r'```[\w]*\n[\s\S]{300,}?\n```', content):
        evidence_against.append("Has substantial code blocks - might be reference")

    # Missing signals
    if section_count < 2:
        missing_signals.append('report_sections')
    if metric_count < 3:
        missing_signals.append('metrics')
    if not re.search(r'#+\s*(summary|conclusions|findings)', content_lower):
        missing_signals.append('summary_section')

    # Calculate confidence
    confidence = self._calculate_confidence(
        is_report_file, evidence_for, evidence_against,
        section_count, metric_count
    )

    is_report = confidence > 0.6 or (is_report_file and confidence > 0.4)

    # Determine which analysts to sway
    analysts_to_sway = {}
    for vote in analyst_votes:
        if vote.classification != 'report' and is_report:
            analysts_to_sway[vote.agent] = f"Document is report, not {vote.classification}"

    return TypeAnalysis(
        is_this_type=is_report,
        confidence=confidence,
        evidence_for=evidence_for,
        evidence_against=evidence_against,
        semantic_purpose="Summarize findings, status, or analysis results" if is_report else "Unknown",
        missing_signals=missing_signals,
        recommended_changes=[],
        analysts_to_sway=analysts_to_sway,
        expert_type=self.EXPERT_TYPE
    )

def _calculate_confidence(
    self,
    is_report_file: bool,
    evidence_for: List[str],
    evidence_against: List[str],
    section_count: int,
    metric_count: int
) -> float:
    """Calculate confidence score."""
    # Filename is strong signal
    base = 0.55 if is_report_file else 0.15

    # Sections are important
    base += min(0.2, section_count * 0.05)

    # Metrics indicate report
    base += min(0.15, metric_count * 0.02)

    # Other evidence
    base += min(0.1, len(evidence_for) * 0.02)

    # Subtract for counter-evidence
    base -= len(evidence_against) * 0.12

    return max(0.0, min(0.98, base))

def generate_enhancements(
    self,
    document: Document,
    analysis: TypeAnalysis
) -> List[ContentEnhancement]:
    """Generate enhancements for report documents."""
    enhancements = []

    for signal in analysis.missing_signals:
        if signal == 'report_sections':
            enhancements.append(ContentEnhancement(
                signal_type='report_sections',
                content=self._generate_report_structure(),
                insertion_point='after_title',
                reason='Reports need standard sections like Summary, Findings, Recommendations',
                expected_analyst_boost={'structural': 0.2, 'content': 0.15},
                priority=1
            ))
        elif signal == 'metrics':
            enhancements.append(ContentEnhancement(
                signal_type='metrics',
                content=self._generate_metrics_section(),
                insertion_point='after_summary',
                reason='Reports should include quantitative metrics',
                expected_analyst_boost={'content': 0.15, 'pattern': 0.1},
                priority=1
            ))
        elif signal == 'summary_section':
            enhancements.append(ContentEnhancement(
                signal_type='summary_section',
                content=self._generate_summary(),
                insertion_point='after_title',
                reason='Reports need executive summary or conclusions',
                expected_analyst_boost={'structural': 0.15, 'semantic': 0.1},
                priority=1
            ))

    return enhancements

def _generate_report_structure(self) -> str:
    """Generate report structure."""
    return '''## Executive Summary

[Brief overview of key findings and recommendations]

Findings

Finding 1

[Description and supporting data]

Finding 2

[Description and supporting data]

Recommendations

[Recommendation 1]
[Recommendation 2]

Conclusions

[Summary of conclusions and next steps] '''

def _generate_metrics_section(self) -> str:
    """Generate metrics section."""
    return '''## Key Metrics

Metric	Value	Change
Total Items	0	-
Completion Rate	0%	-
Success Rate	0%	-

'''

def _generate_summary(self) -> str:
    """Generate summary section."""
    return '''## Executive Summary

Report Period: [Date range] Status: [Overall status]

Key Highlights

[Highlight 1]
[Highlight 2]
[Highlight 3]

Action Items

[Action 1]
[Action 2]

'''

Findings​

Finding 1​

Finding 2​

Recommendations​

Conclusions​

Key Highlights​

Action Items​