#!/usr/bin/env python3 """ Meeting Analyzer Script
Batch process meeting notes and transcripts to extract decisions, action items, and strategic context.
Usage:
python3 meeting-analyzer.py
Standard: CODITECT-STANDARD-MEETING-ANALYSIS v1.0.0 Version: 1.0.0 Created: 2026-01-13 """
import argparse import json import os import re import sys from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Any import yaml
ADR-114: Add coditect lib path (framework installation)
sys.path.insert(0, str(Path.home() / ".coditect" / "lib"))
try: from coditect_utils import load_config, setup_logging except ImportError: # Fallback if coditect_utils not available def load_config(): return {} def setup_logging(): import logging logging.basicConfig(level=logging.INFO) return logging.getLogger(name)
logger = setup_logging()
class MeetingAnalyzer: """Analyzes meeting notes and extracts structured information."""
MEETING_TYPES = ['strategic', 'operational', 'technical', 'sales', 'advisory', 'legal']
DECISION_PATTERNS = [
r'(?:we |they )?decided (?:to |that )',
r'the decision (?:is|was) ',
r'(?:we |they )?agreed (?:to |that )',
r"(?:we're|we are) going (?:to |with )",
r'the plan is ',
r'(?:we |they )?will ',
r"(?:we |they )?won't ",
r"let's do ",
r'final (?:answer|decision) is ',
]
ACTION_PATTERNS = [
r'(\w+) will ',
r'(\w+) should ',
r'(\w+) needs? to ',
r'action:? ',
r'todo:? ',
r'follow up (?:on |with )',
r'send .+ to ',
r'schedule ',
r'by (?:end of |next |\d)',
]
def __init__(self, config: Optional[Dict] = None):
self.config = config or load_config()
self.analysis_date = datetime.now().strftime('%Y-%m-%d')
def analyze_file(self, file_path: str, meeting_type: Optional[str] = None) -> Dict[str, Any]:
"""Analyze a single meeting notes file."""
logger.info(f"Analyzing: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract metadata
metadata = self._extract_metadata(content, file_path)
# Classify meeting type if not provided
if meeting_type:
metadata['type'] = meeting_type
elif not metadata.get('type'):
metadata['type'] = self._classify_meeting(content)
# Extract structured elements
decisions = self._extract_decisions(content)
action_items = self._extract_action_items(content)
participants = self._extract_participants(content)
topics = self._extract_topics(content)
return {
'metadata': metadata,
'decisions': decisions,
'action_items': action_items,
'participants': participants,
'topics': topics,
'analysis_date': self.analysis_date,
'source_file': str(file_path),
}
def _extract_metadata(self, content: str, file_path: str) -> Dict[str, str]:
"""Extract meeting metadata from content or filename."""
metadata = {}
# Try to extract date from filename
filename = Path(file_path).stem
date_match = re.search(r'(\d{4}[-_]\d{2}[-_]\d{2})', filename)
if date_match:
metadata['date'] = date_match.group(1).replace('_', '-')
# Extract from content
date_in_content = re.search(r'(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2},?\s+\d{4}', content, re.I)
if date_in_content:
metadata['date'] = date_in_content.group()
# Extract title from first heading or filename
title_match = re.search(r'^#\s+(.+)$', content, re.M)
if title_match:
metadata['title'] = title_match.group(1)
else:
metadata['title'] = filename.replace('-', ' ').replace('_', ' ').title()
return metadata
def _classify_meeting(self, content: str) -> str:
"""Classify meeting type based on content."""
content_lower = content.lower()
# Scoring based on keyword presence
scores = {
'strategic': len(re.findall(r'\b(strategy|roadmap|priorities|direction|planning|board)\b', content_lower)),
'operational': len(re.findall(r'\b(standup|status|blockers?|progress|sync|check-in)\b', content_lower)),
'technical': len(re.findall(r'\b(architecture|design|code|api|implementation|review)\b', content_lower)),
'sales': len(re.findall(r'\b(customer|proposal|negotiat|contract|revenue|deal)\b', content_lower)),
'advisory': len(re.findall(r'\b(advisor|mentor|guidance|recommend|consult)\b', content_lower)),
'legal': len(re.findall(r'\b(legal|compliance|terms|agreement|contract|liability)\b', content_lower)),
}
return max(scores, key=scores.get) if any(scores.values()) else 'operational'
def _extract_decisions(self, content: str) -> List[Dict[str, Any]]:
"""Extract decisions from meeting content."""
decisions = []
decision_id = 1
# Split into sentences/paragraphs
sentences = re.split(r'[.!?]\s+', content)
for sentence in sentences:
for pattern in self.DECISION_PATTERNS:
if re.search(pattern, sentence, re.I):
decisions.append({
'id': f'D-{self.analysis_date.replace("-", "")}-{decision_id:03d}',
'statement': sentence.strip()[:500], # Limit length
'confidence': 'medium',
'extracted_pattern': pattern,
})
decision_id += 1
break # Avoid duplicate extraction
return decisions
def _extract_action_items(self, content: str) -> List[Dict[str, Any]]:
"""Extract action items from meeting content."""
action_items = []
action_id = 1
sentences = re.split(r'[.!?]\s+', content)
for sentence in sentences:
# Check for owner patterns
owner_match = re.search(r'^(\w+)\s+(?:will|should|needs? to)', sentence, re.I)
for pattern in self.ACTION_PATTERNS:
if re.search(pattern, sentence, re.I):
owner = owner_match.group(1) if owner_match else 'UNASSIGNED'
# Extract deadline if present
deadline = 'TBD'
deadline_match = re.search(r'by (\w+ \d+|\d{4}-\d{2}-\d{2}|end of \w+|next \w+)', sentence, re.I)
if deadline_match:
deadline = deadline_match.group(1)
action_items.append({
'id': f'AI-{self.analysis_date.replace("-", "")}-{action_id:03d}',
'action': sentence.strip()[:300],
'owner': owner,
'deadline': deadline,
'priority': 'medium',
'status': 'pending',
})
action_id += 1
break
return action_items
def _extract_participants(self, content: str) -> List[str]:
"""Extract participant names from content."""
# Look for name patterns like "**Name:**" or "Name said"
name_pattern = r'\*\*(\w+ \w+)\*\*:'
names = set(re.findall(name_pattern, content))
# Also look for "Name:" pattern
speaker_pattern = r'^(\w+ \w+):'
speakers = set(re.findall(speaker_pattern, content, re.M))
return list(names | speakers)
def _extract_topics(self, content: str) -> List[Dict[str, str]]:
"""Extract main topics from content."""
topics = []
# Look for headers
headers = re.findall(r'^#{2,3}\s+(.+)$', content, re.M)
for header in headers[:10]: # Limit to first 10 topics
topics.append({
'topic': header,
'type': 'header_detected',
})
return topics
def generate_summary(self, analysis: Dict[str, Any]) -> str:
"""Generate markdown summary from analysis."""
metadata = analysis['metadata']
decisions = analysis['decisions']
action_items = analysis['action_items']
summary = f"""# Meeting Summary: {metadata.get('title', 'Untitled Meeting')}
Date: {metadata.get('date', 'Unknown')} | Type: {metadata.get('type', 'Unknown')}
Key Outcomes
""" # Add decision highlights for i, dec in enumerate(decisions[:5], 1): summary += f"- {dec['statement'][:100]}...\n"
if not decisions:
summary += "- No explicit decisions captured\n"
summary += "\n## Decisions Made\n\n"
summary += "| ID | Decision | Confidence |\n"
summary += "|----|---------|-----------|\n"
for dec in decisions[:10]:
summary += f"| {dec['id']} | {dec['statement'][:80]}... | {dec['confidence']} |\n"
summary += "\n## Action Items\n\n"
summary += "| ID | Action | Owner | Deadline | Status |\n"
summary += "|----|--------|-------|----------|--------|\n"
for ai in action_items[:15]:
summary += f"| {ai['id']} | {ai['action'][:60]}... | {ai['owner']} | {ai['deadline']} | {ai['status']} |\n"
summary += f"\n---\n*Analysis generated: {self.analysis_date}*\n"
return summary
def generate_full_analysis(self, analysis: Dict[str, Any]) -> str:
"""Generate full analysis using template format."""
# Load template if available
template_path = Path.home() / ".coditect" / "coditect-core-standards" / "TEMPLATES" / "MEETING-ANALYSIS-TEMPLATE.md"
metadata = analysis['metadata']
full_doc = f"""---
title: 'Meeting Analysis: {metadata.get("title", "Untitled")}' type: meeting-analysis version: 1.0.0 created: '{self.analysis_date}' status: draft
Meeting Analysis: {metadata.get('title', 'Untitled Meeting')}
Meeting Metadata
| Field | Value |
|---|---|
| Meeting ID | MTG-{self.analysis_date.replace('-', '')}-001 |
| Date | {metadata.get('date', 'Unknown')} |
| Type | {metadata.get('type', 'Unknown')} |
Participants
| Name | Role |
|---|---|
| """ |
for p in analysis['participants'][:10]:
full_doc += f"| {p} | Participant |\n"
full_doc += "\n---\n\n## Executive Summary\n\n"
for dec in analysis['decisions'][:3]:
full_doc += f"- {dec['statement'][:150]}\n"
full_doc += "\n---\n\n## Decisions Made\n\n"
for dec in analysis['decisions']:
full_doc += f"""### {dec['id']}: Decision
Statement: {dec['statement']}
Confidence: {dec['confidence']}
"""
full_doc += "## Action Items\n\n"
full_doc += "| ID | Action | Owner | Deadline | Priority | Status |\n"
full_doc += "|----|--------|-------|----------|----------|--------|\n"
for ai in analysis['action_items']:
full_doc += f"| {ai['id']} | {ai['action'][:50]}... | {ai['owner']} | {ai['deadline']} | {ai['priority']} | {ai['status']} |\n"
full_doc += f"\n---\n\n**Generated:** {self.analysis_date}\n"
full_doc += "**Standard:** CODITECT-STANDARD-MEETING-ANALYSIS v1.0.0\n"
return full_doc
def main(): parser = argparse.ArgumentParser( description='Analyze meeting notes and extract decisions/action items', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python3 meeting-analyzer.py notes.md python3 meeting-analyzer.py notes.md --format full --output analysis.md python3 meeting-analyzer.py meetings/ --batch """ )
parser.add_argument('input', help='Path to meeting notes file or directory')
parser.add_argument('--format', choices=['summary', 'full', 'json'], default='summary',
help='Output format (default: summary)')
parser.add_argument('--output', '-o', help='Output file path')
parser.add_argument('--type', choices=MeetingAnalyzer.MEETING_TYPES,
help='Override meeting type classification')
parser.add_argument('--batch', action='store_true',
help='Process all .md files in directory')
parser.add_argument('--sync-tasks', action='store_true',
help='Sync action items to PILOT plan')
args = parser.parse_args()
analyzer = MeetingAnalyzer()
input_path = Path(args.input)
if args.batch and input_path.is_dir():
# Batch processing
files = list(input_path.glob('*.md'))
logger.info(f"Processing {len(files)} files...")
for f in files:
analysis = analyzer.analyze_file(str(f), args.type)
output_name = f.stem + '-analysis' + ('.json' if args.format == 'json' else '.md')
output_path = Path(args.output) / output_name if args.output else f.parent / output_name
_write_output(analyzer, analysis, args.format, str(output_path))
else:
# Single file
if not input_path.exists():
logger.error(f"File not found: {input_path}")
sys.exit(1)
analysis = analyzer.analyze_file(str(input_path), args.type)
output_path = args.output or str(input_path.with_suffix('.analysis.md'))
_write_output(analyzer, analysis, args.format, output_path)
logger.info("Analysis complete!")
def _write_output(analyzer: MeetingAnalyzer, analysis: Dict, fmt: str, output_path: str): """Write analysis output to file.""" if fmt == 'json': content = json.dumps(analysis, indent=2, default=str) elif fmt == 'full': content = analyzer.generate_full_analysis(analysis) else: content = analyzer.generate_summary(analysis)
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
logger.info(f"Output written to: {output_path}")
print(f"✅ Analysis saved: {output_path}")
print(f" Decisions: {len(analysis['decisions'])}")
print(f" Action Items: {len(analysis['action_items'])}")
if name == 'main': main()