#!/usr/bin/env python3 """ Meeting Analyzer Script

Batch process meeting notes and transcripts to extract decisions, action items, and strategic context.

Usage: python3 meeting-analyzer.py [options] python3 meeting-analyzer.py path/to/meeting.md --format full python3 meeting-analyzer.py path/to/meetings/ --batch --output reports/

Standard: CODITECT-STANDARD-MEETING-ANALYSIS v1.0.0 Version: 1.0.0 Created: 2026-01-13 """

import argparse import json import os import re import sys from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Any import yaml

ADR-114: Add coditect lib path (framework installation)

sys.path.insert(0, str(Path.home() / ".coditect" / "lib"))

try: from coditect_utils import load_config, setup_logging except ImportError: # Fallback if coditect_utils not available def load_config(): return {} def setup_logging(): import logging logging.basicConfig(level=logging.INFO) return logging.getLogger(name)

logger = setup_logging()

class MeetingAnalyzer: """Analyzes meeting notes and extracts structured information."""

MEETING_TYPES = ['strategic', 'operational', 'technical', 'sales', 'advisory', 'legal']

DECISION_PATTERNS = [
    r'(?:we |they )?decided (?:to |that )',
    r'the decision (?:is|was) ',
    r'(?:we |they )?agreed (?:to |that )',
    r"(?:we're|we are) going (?:to |with )",
    r'the plan is ',
    r'(?:we |they )?will ',
    r"(?:we |they )?won't ",
    r"let's do ",
    r'final (?:answer|decision) is ',
]

ACTION_PATTERNS = [
    r'(\w+) will ',
    r'(\w+) should ',
    r'(\w+) needs? to ',
    r'action:? ',
    r'todo:? ',
    r'follow up (?:on |with )',
    r'send .+ to ',
    r'schedule ',
    r'by (?:end of |next |\d)',
]

def __init__(self, config: Optional[Dict] = None):
    self.config = config or load_config()
    self.analysis_date = datetime.now().strftime('%Y-%m-%d')

def analyze_file(self, file_path: str, meeting_type: Optional[str] = None) -> Dict[str, Any]:
    """Analyze a single meeting notes file."""
    logger.info(f"Analyzing: {file_path}")

    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Extract metadata
    metadata = self._extract_metadata(content, file_path)

    # Classify meeting type if not provided
    if meeting_type:
        metadata['type'] = meeting_type
    elif not metadata.get('type'):
        metadata['type'] = self._classify_meeting(content)

    # Extract structured elements
    decisions = self._extract_decisions(content)
    action_items = self._extract_action_items(content)
    participants = self._extract_participants(content)
    topics = self._extract_topics(content)

    return {
        'metadata': metadata,
        'decisions': decisions,
        'action_items': action_items,
        'participants': participants,
        'topics': topics,
        'analysis_date': self.analysis_date,
        'source_file': str(file_path),
    }

def _extract_metadata(self, content: str, file_path: str) -> Dict[str, str]:
    """Extract meeting metadata from content or filename."""
    metadata = {}

    # Try to extract date from filename
    filename = Path(file_path).stem
    date_match = re.search(r'(\d{4}[-_]\d{2}[-_]\d{2})', filename)
    if date_match:
        metadata['date'] = date_match.group(1).replace('_', '-')

    # Extract from content
    date_in_content = re.search(r'(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2},?\s+\d{4}', content, re.I)
    if date_in_content:
        metadata['date'] = date_in_content.group()

    # Extract title from first heading or filename
    title_match = re.search(r'^#\s+(.+)$', content, re.M)
    if title_match:
        metadata['title'] = title_match.group(1)
    else:
        metadata['title'] = filename.replace('-', ' ').replace('_', ' ').title()

    return metadata

def _classify_meeting(self, content: str) -> str:
    """Classify meeting type based on content."""
    content_lower = content.lower()

    # Scoring based on keyword presence
    scores = {
        'strategic': len(re.findall(r'\b(strategy|roadmap|priorities|direction|planning|board)\b', content_lower)),
        'operational': len(re.findall(r'\b(standup|status|blockers?|progress|sync|check-in)\b', content_lower)),
        'technical': len(re.findall(r'\b(architecture|design|code|api|implementation|review)\b', content_lower)),
        'sales': len(re.findall(r'\b(customer|proposal|negotiat|contract|revenue|deal)\b', content_lower)),
        'advisory': len(re.findall(r'\b(advisor|mentor|guidance|recommend|consult)\b', content_lower)),
        'legal': len(re.findall(r'\b(legal|compliance|terms|agreement|contract|liability)\b', content_lower)),
    }

    return max(scores, key=scores.get) if any(scores.values()) else 'operational'

def _extract_decisions(self, content: str) -> List[Dict[str, Any]]:
    """Extract decisions from meeting content."""
    decisions = []
    decision_id = 1

    # Split into sentences/paragraphs
    sentences = re.split(r'[.!?]\s+', content)

    for sentence in sentences:
        for pattern in self.DECISION_PATTERNS:
            if re.search(pattern, sentence, re.I):
                decisions.append({
                    'id': f'D-{self.analysis_date.replace("-", "")}-{decision_id:03d}',
                    'statement': sentence.strip()[:500],  # Limit length
                    'confidence': 'medium',
                    'extracted_pattern': pattern,
                })
                decision_id += 1
                break  # Avoid duplicate extraction

    return decisions

def _extract_action_items(self, content: str) -> List[Dict[str, Any]]:
    """Extract action items from meeting content."""
    action_items = []
    action_id = 1

    sentences = re.split(r'[.!?]\s+', content)

    for sentence in sentences:
        # Check for owner patterns
        owner_match = re.search(r'^(\w+)\s+(?:will|should|needs? to)', sentence, re.I)

        for pattern in self.ACTION_PATTERNS:
            if re.search(pattern, sentence, re.I):
                owner = owner_match.group(1) if owner_match else 'UNASSIGNED'

                # Extract deadline if present
                deadline = 'TBD'
                deadline_match = re.search(r'by (\w+ \d+|\d{4}-\d{2}-\d{2}|end of \w+|next \w+)', sentence, re.I)
                if deadline_match:
                    deadline = deadline_match.group(1)

                action_items.append({
                    'id': f'AI-{self.analysis_date.replace("-", "")}-{action_id:03d}',
                    'action': sentence.strip()[:300],
                    'owner': owner,
                    'deadline': deadline,
                    'priority': 'medium',
                    'status': 'pending',
                })
                action_id += 1
                break

    return action_items

def _extract_participants(self, content: str) -> List[str]:
    """Extract participant names from content."""
    # Look for name patterns like "**Name:**" or "Name said"
    name_pattern = r'\*\*(\w+ \w+)\*\*:'
    names = set(re.findall(name_pattern, content))

    # Also look for "Name:" pattern
    speaker_pattern = r'^(\w+ \w+):'
    speakers = set(re.findall(speaker_pattern, content, re.M))

    return list(names | speakers)

def _extract_topics(self, content: str) -> List[Dict[str, str]]:
    """Extract main topics from content."""
    topics = []

    # Look for headers
    headers = re.findall(r'^#{2,3}\s+(.+)$', content, re.M)
    for header in headers[:10]:  # Limit to first 10 topics
        topics.append({
            'topic': header,
            'type': 'header_detected',
        })

    return topics

def generate_summary(self, analysis: Dict[str, Any]) -> str:
    """Generate markdown summary from analysis."""
    metadata = analysis['metadata']
    decisions = analysis['decisions']
    action_items = analysis['action_items']

    summary = f"""# Meeting Summary: {metadata.get('title', 'Untitled Meeting')}

Date: {metadata.get('date', 'Unknown')} | Type: {metadata.get('type', 'Unknown')}

Key Outcomes

""" # Add decision highlights for i, dec in enumerate(decisions[:5], 1): summary += f"- {dec['statement'][:100]}...\n"

    if not decisions:
        summary += "- No explicit decisions captured\n"

    summary += "\n## Decisions Made\n\n"
    summary += "| ID | Decision | Confidence |\n"
    summary += "|----|---------|-----------|\n"
    for dec in decisions[:10]:
        summary += f"| {dec['id']} | {dec['statement'][:80]}... | {dec['confidence']} |\n"

    summary += "\n## Action Items\n\n"
    summary += "| ID | Action | Owner | Deadline | Status |\n"
    summary += "|----|--------|-------|----------|--------|\n"
    for ai in action_items[:15]:
        summary += f"| {ai['id']} | {ai['action'][:60]}... | {ai['owner']} | {ai['deadline']} | {ai['status']} |\n"

    summary += f"\n---\n*Analysis generated: {self.analysis_date}*\n"

    return summary

def generate_full_analysis(self, analysis: Dict[str, Any]) -> str:
    """Generate full analysis using template format."""
    # Load template if available
    template_path = Path.home() / ".coditect" / "coditect-core-standards" / "TEMPLATES" / "MEETING-ANALYSIS-TEMPLATE.md"

    metadata = analysis['metadata']

    full_doc = f"""---

title: 'Meeting Analysis: {metadata.get("title", "Untitled")}' type: meeting-analysis version: 1.0.0 created: '{self.analysis_date}' status: draft

Meeting Analysis: {metadata.get('title', 'Untitled Meeting')}

Meeting Metadata

Field	Value
Meeting ID	MTG-{self.analysis_date.replace('-', '')}-001
Date	{metadata.get('date', 'Unknown')}
Type	{metadata.get('type', 'Unknown')}

Participants

Name	Role
"""

    for p in analysis['participants'][:10]:
        full_doc += f"| {p} | Participant |\n"

    full_doc += "\n---\n\n## Executive Summary\n\n"
    for dec in analysis['decisions'][:3]:
        full_doc += f"- {dec['statement'][:150]}\n"

    full_doc += "\n---\n\n## Decisions Made\n\n"
    for dec in analysis['decisions']:
        full_doc += f"""### {dec['id']}: Decision

Statement: {dec['statement']}

Confidence: {dec['confidence']}

"""

    full_doc += "## Action Items\n\n"
    full_doc += "| ID | Action | Owner | Deadline | Priority | Status |\n"
    full_doc += "|----|--------|-------|----------|----------|--------|\n"
    for ai in analysis['action_items']:
        full_doc += f"| {ai['id']} | {ai['action'][:50]}... | {ai['owner']} | {ai['deadline']} | {ai['priority']} | {ai['status']} |\n"

    full_doc += f"\n---\n\n**Generated:** {self.analysis_date}\n"
    full_doc += "**Standard:** CODITECT-STANDARD-MEETING-ANALYSIS v1.0.0\n"

    return full_doc

def main(): parser = argparse.ArgumentParser( description='Analyze meeting notes and extract decisions/action items', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python3 meeting-analyzer.py notes.md python3 meeting-analyzer.py notes.md --format full --output analysis.md python3 meeting-analyzer.py meetings/ --batch """ )

parser.add_argument('input', help='Path to meeting notes file or directory')
parser.add_argument('--format', choices=['summary', 'full', 'json'], default='summary',
                   help='Output format (default: summary)')
parser.add_argument('--output', '-o', help='Output file path')
parser.add_argument('--type', choices=MeetingAnalyzer.MEETING_TYPES,
                   help='Override meeting type classification')
parser.add_argument('--batch', action='store_true',
                   help='Process all .md files in directory')
parser.add_argument('--sync-tasks', action='store_true',
                   help='Sync action items to PILOT plan')

args = parser.parse_args()

analyzer = MeetingAnalyzer()
input_path = Path(args.input)

if args.batch and input_path.is_dir():
    # Batch processing
    files = list(input_path.glob('*.md'))
    logger.info(f"Processing {len(files)} files...")

    for f in files:
        analysis = analyzer.analyze_file(str(f), args.type)
        output_name = f.stem + '-analysis' + ('.json' if args.format == 'json' else '.md')
        output_path = Path(args.output) / output_name if args.output else f.parent / output_name

        _write_output(analyzer, analysis, args.format, str(output_path))
else:
    # Single file
    if not input_path.exists():
        logger.error(f"File not found: {input_path}")
        sys.exit(1)

    analysis = analyzer.analyze_file(str(input_path), args.type)
    output_path = args.output or str(input_path.with_suffix('.analysis.md'))

    _write_output(analyzer, analysis, args.format, output_path)

logger.info("Analysis complete!")

def _write_output(analyzer: MeetingAnalyzer, analysis: Dict, fmt: str, output_path: str): """Write analysis output to file.""" if fmt == 'json': content = json.dumps(analysis, indent=2, default=str) elif fmt == 'full': content = analyzer.generate_full_analysis(analysis) else: content = analyzer.generate_summary(analysis)

Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
    f.write(content)

logger.info(f"Output written to: {output_path}")
print(f"✅ Analysis saved: {output_path}")
print(f"   Decisions: {len(analysis['decisions'])}")
print(f"   Action Items: {len(analysis['action_items'])}")

if name == 'main': main()

Key Outcomes​

title: 'Meeting Analysis: {metadata.get("title", "Untitled")}' type: meeting-analysis version: 1.0.0 created: '{self.analysis_date}' status: draft​

Meeting Analysis: {metadata.get('title', 'Untitled Meeting')}

Meeting Metadata​

Participants​

Key Outcomes

title: 'Meeting Analysis: {metadata.get("title", "Untitled")}' type: meeting-analysis version: 1.0.0 created: '{self.analysis_date}' status: draft

Meeting Metadata

Participants