scripts-report-expert
""" Report Expert - Type Expert for Report Documents
Identifies report files - documents that summarize findings, status, analysis results, or project progress.
Key signals:
- REPORT/STATUS/SUMMARY in filename
- Executive summary sections
- Findings/results sections
- Metrics and statistics
- Recommendations
- Date-stamped content """
import re from typing import Dict, List from pathlib import Path
import sys sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote
from .base import TypeExpert, TypeAnalysis, ContentEnhancement
class ReportExpert(TypeExpert): """Expert for identifying report documents."""
EXPERT_TYPE = 'report'
# Report-specific sections
REPORT_SECTIONS = [
'executive summary',
'summary',
'findings',
'results',
'analysis',
'conclusions',
'recommendations',
'metrics',
'statistics',
'status',
'progress',
'assessment',
'evaluation',
'observations',
'key takeaways',
]
# Report patterns
REPORT_PATTERNS = [
r'as\s+of\s+\w+\s+\d+', # "As of December 28"
r'report\s+(date|period)',
r'(total|count|percentage|ratio):\s*\d+',
r'\d+%\s+(complete|done|finished)',
r'status:\s*(complete|pending|in progress)',
r'(increased|decreased)\s+by\s+\d+',
r'compared\s+to\s+(last|previous)',
r'(weekly|monthly|quarterly|annual)\s+report',
]
def analyze(self, document: Document, analyst_votes: List[AnalystVote]) -> TypeAnalysis:
"""Analyze if document is a report."""
content = document.content
content_lower = content.lower()
evidence_for = []
evidence_against = []
missing_signals = []
# Check filename
filename = Path(document.path).stem.upper()
is_report_file = any(x in filename for x in ['REPORT', 'STATUS', 'SUMMARY', 'ANALYSIS', 'ASSESSMENT'])
if is_report_file:
evidence_for.append(f"Filename indicates report: {Path(document.path).name}")
# Check for date in filename (common for reports)
if re.search(r'\d{4}[-_]\d{2}[-_]\d{2}', filename):
evidence_for.append("Filename has date stamp")
# Check for report sections
section_count = 0
for section in self.REPORT_SECTIONS:
if re.search(rf'#+\s*{section}', content_lower):
section_count += 1
if section_count <= 4:
evidence_for.append(f"Has report section: '{section}'")
# Check for report patterns
pattern_count = 0
for pattern in self.REPORT_PATTERNS:
if re.search(pattern, content_lower):
pattern_count += 1
if pattern_count <= 3:
evidence_for.append(f"Has report pattern: '{pattern[:30]}'")
# Check for metrics/statistics
metric_patterns = [
r'\d+\s*%', # Percentages
r'\d+\s*/\s*\d+', # Ratios
r'total:\s*\d+', # Totals
r'\$[\d,]+', # Dollar amounts
]
metric_count = sum(len(re.findall(p, content, re.IGNORECASE)) for p in metric_patterns)
if metric_count > 5:
evidence_for.append(f"Contains {metric_count} metrics/statistics")
# Check for tables with data
table_rows = len(re.findall(r'^\|.*\d+.*\|$', content, re.MULTILINE))
if table_rows > 3:
evidence_for.append(f"Has data tables with {table_rows} rows")
# Check for temporal language
if re.search(r'(this\s+week|this\s+month|last\s+quarter|year\s+to\s+date)', content_lower):
evidence_for.append("Uses temporal/period language")
# Evidence against
if re.search(r'#+\s*(step\s+\d|how\s+to|tutorial)', content_lower):
evidence_against.append("Has tutorial sections - might be guide")
if re.search(r'you\s+are\s+(a|an)\s+\w+\s+agent', content_lower):
evidence_against.append("Has agent persona - might be agent doc")
if re.search(r'```[\w]*\n[\s\S]{300,}?\n```', content):
evidence_against.append("Has substantial code blocks - might be reference")
# Missing signals
if section_count < 2:
missing_signals.append('report_sections')
if metric_count < 3:
missing_signals.append('metrics')
if not re.search(r'#+\s*(summary|conclusions|findings)', content_lower):
missing_signals.append('summary_section')
# Calculate confidence
confidence = self._calculate_confidence(
is_report_file, evidence_for, evidence_against,
section_count, metric_count
)
is_report = confidence > 0.6 or (is_report_file and confidence > 0.4)
# Determine which analysts to sway
analysts_to_sway = {}
for vote in analyst_votes:
if vote.classification != 'report' and is_report:
analysts_to_sway[vote.agent] = f"Document is report, not {vote.classification}"
return TypeAnalysis(
is_this_type=is_report,
confidence=confidence,
evidence_for=evidence_for,
evidence_against=evidence_against,
semantic_purpose="Summarize findings, status, or analysis results" if is_report else "Unknown",
missing_signals=missing_signals,
recommended_changes=[],
analysts_to_sway=analysts_to_sway,
expert_type=self.EXPERT_TYPE
)
def _calculate_confidence(
self,
is_report_file: bool,
evidence_for: List[str],
evidence_against: List[str],
section_count: int,
metric_count: int
) -> float:
"""Calculate confidence score."""
# Filename is strong signal
base = 0.55 if is_report_file else 0.15
# Sections are important
base += min(0.2, section_count * 0.05)
# Metrics indicate report
base += min(0.15, metric_count * 0.02)
# Other evidence
base += min(0.1, len(evidence_for) * 0.02)
# Subtract for counter-evidence
base -= len(evidence_against) * 0.12
return max(0.0, min(0.98, base))
def generate_enhancements(
self,
document: Document,
analysis: TypeAnalysis
) -> List[ContentEnhancement]:
"""Generate enhancements for report documents."""
enhancements = []
for signal in analysis.missing_signals:
if signal == 'report_sections':
enhancements.append(ContentEnhancement(
signal_type='report_sections',
content=self._generate_report_structure(),
insertion_point='after_title',
reason='Reports need standard sections like Summary, Findings, Recommendations',
expected_analyst_boost={'structural': 0.2, 'content': 0.15},
priority=1
))
elif signal == 'metrics':
enhancements.append(ContentEnhancement(
signal_type='metrics',
content=self._generate_metrics_section(),
insertion_point='after_summary',
reason='Reports should include quantitative metrics',
expected_analyst_boost={'content': 0.15, 'pattern': 0.1},
priority=1
))
elif signal == 'summary_section':
enhancements.append(ContentEnhancement(
signal_type='summary_section',
content=self._generate_summary(),
insertion_point='after_title',
reason='Reports need executive summary or conclusions',
expected_analyst_boost={'structural': 0.15, 'semantic': 0.1},
priority=1
))
return enhancements
def _generate_report_structure(self) -> str:
"""Generate report structure."""
return '''## Executive Summary
[Brief overview of key findings and recommendations]
Findings
Finding 1
[Description and supporting data]
Finding 2
[Description and supporting data]
Recommendations
- [Recommendation 1]
- [Recommendation 2]
Conclusions
[Summary of conclusions and next steps] '''
def _generate_metrics_section(self) -> str:
"""Generate metrics section."""
return '''## Key Metrics
| Metric | Value | Change |
|---|---|---|
| Total Items | 0 | - |
| Completion Rate | 0% | - |
| Success Rate | 0% | - |
'''
def _generate_summary(self) -> str:
"""Generate summary section."""
return '''## Executive Summary
Report Period: [Date range] Status: [Overall status]
Key Highlights
- [Highlight 1]
- [Highlight 2]
- [Highlight 3]
Action Items
- [Action 1]
- [Action 2]
'''