Agent Skills Framework Extension
Thoughts Analysis Patterns Skill
When to Use This Skill
Use this skill when implementing thoughts analysis patterns patterns in your codebase.
How to Use This Skill
- Review the patterns and examples below
- Apply the relevant patterns to your implementation
- Follow the best practices outlined in this skill
Research document analysis, insight extraction, pattern recognition, and knowledge synthesis from complex texts.
Core Capabilities
- Structure Analysis - Document organization and flow
- Insight Extraction - Key findings and conclusions
- Pattern Recognition - Recurring themes and concepts
- Concept Mapping - Entity relationships
- Argument Analysis - Claims, evidence, reasoning
- Knowledge Synthesis - Cross-document integration
Document Structure Analyzer
# scripts/document-analyzer.py
from dataclasses import dataclass
from typing import List, Dict, Optional
import re
@dataclass
class DocumentSection:
level: int
title: str
content: str
subsections: List['DocumentSection']
keywords: List[str]
insights: List[str]
class DocumentAnalyzer:
"""Analyze research document structure and content"""
def analyze(self, document: str) -> DocumentSection:
"""Analyze complete document"""
root = self._parse_structure(document)
self._extract_keywords(root)
self._extract_insights(root)
return root
def _parse_structure(self, text: str) -> DocumentSection:
"""Parse hierarchical structure"""
lines = text.split('\n')
root = DocumentSection(
level=0,
title="Root",
content="",
subsections=[],
keywords=[],
insights=[]
)
stack = [root]
current_content = []
for line in lines:
# Check for header
header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
if header_match:
# Save previous content
if stack and current_content:
stack[-1].content = '\n'.join(current_content)
current_content = []
level = len(header_match.group(1))
title = header_match.group(2).strip()
section = DocumentSection(
level=level,
title=title,
content="",
subsections=[],
keywords=[],
insights=[]
)
# Pop stack to appropriate level
while len(stack) > level:
stack.pop()
# Add to parent
if stack:
stack[-1].subsections.append(section)
stack.append(section)
else:
current_content.append(line)
# Save final content
if stack and current_content:
stack[-1].content = '\n'.join(current_content)
return root
def _extract_keywords(self, section: DocumentSection):
"""Extract keywords from section"""
# Simple keyword extraction
text = f"{section.title} {section.content}".lower()
words = re.findall(r'\b\w{4,}\b', text)
# Filter common words
stopwords = {'this', 'that', 'with', 'from', 'have', 'will', 'been'}
keywords = [w for w in words if w not in stopwords]
# Count frequency
freq = {}
for word in keywords:
freq[word] = freq.get(word, 0) + 1
# Top keywords
section.keywords = sorted(freq.keys(), key=lambda w: freq[w], reverse=True)[:10]
# Recurse
for subsection in section.subsections:
self._extract_keywords(subsection)
def _extract_insights(self, section: DocumentSection):
"""Extract key insights"""
content = section.content.lower()
# Look for insight indicators
indicators = [
'therefore', 'thus', 'in conclusion', 'key finding',
'importantly', 'notably', 'it follows that'
]
sentences = content.split('.')
insights = []
for sent in sentences:
if any(ind in sent for ind in indicators):
insights.append(sent.strip())
section.insights = insights[:3] # Top 3
# Recurse
for subsection in section.subsections:
self._extract_insights(subsection)
def generate_summary(self, section: DocumentSection, depth: int = 2) -> str:
"""Generate hierarchical summary"""
lines = []
if section.level > 0:
indent = ' ' * (section.level - 1)
lines.append(f"{indent}{'#' * section.level} {section.title}")
if section.keywords:
lines.append(f"{indent}Keywords: {', '.join(section.keywords[:5])}")
if section.insights:
lines.append(f"{indent}Insights:")
for insight in section.insights:
lines.append(f"{indent} - {insight[:100]}...")
if section.level < depth:
for subsection in section.subsections:
lines.extend(self.generate_summary(subsection, depth).split('\n'))
return '\n'.join(lines)
# Usage
analyzer = DocumentAnalyzer()
doc = """
# Machine Learning Research
## Neural Networks
Deep learning has revolutionized AI. Therefore, we focus on neural architectures.
### CNNs
Convolutional networks excel at vision tasks.
### RNNs
Recurrent networks handle sequences well.
## Conclusion
Notably, ensemble methods outperform single models.
"""
result = analyzer.analyze(doc)
print(analyzer.generate_summary(result))
Insight Extraction Engine
# scripts/insight-extraction.py
from dataclasses import dataclass
from typing import List, Dict
import re
@dataclass
class Insight:
text: str
category: str # 'finding', 'conclusion', 'recommendation', 'observation'
confidence: float
evidence: List[str]
location: str
class InsightExtractor:
"""Extract insights from research documents"""
INSIGHT_PATTERNS = {
'finding': [
r'we found that (.+)',
r'results show (.+)',
r'analysis reveals (.+)',
],
'conclusion': [
r'therefore (.+)',
r'in conclusion (.+)',
r'thus (.+)',
],
'recommendation': [
r'we recommend (.+)',
r'should (.+)',
r'it is advised (.+)',
],
'observation': [
r'notably (.+)',
r'interestingly (.+)',
r'it appears (.+)',
],
}
def extract(self, text: str) -> List[Insight]:
"""Extract all insights"""
insights = []
for category, patterns in self.INSIGHT_PATTERNS.items():
for pattern in patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
for match in matches:
insight_text = match.group(1).strip()
# Extract evidence (sentences around insight)
evidence = self._extract_evidence(text, match.start())
insights.append(Insight(
text=insight_text,
category=category,
confidence=0.8,
evidence=evidence,
location=f"char {match.start()}"
))
return insights
def _extract_evidence(self, text: str, position: int) -> List[str]:
"""Extract surrounding sentences as evidence"""
# Get 200 chars before and after
start = max(0, position - 200)
end = min(len(text), position + 200)
context = text[start:end]
sentences = context.split('.')
return [s.strip() for s in sentences if len(s.strip()) > 20][:2]
def generate_report(self, insights: List[Insight]) -> str:
"""Generate insight report"""
by_category = {}
for insight in insights:
if insight.category not in by_category:
by_category[insight.category] = []
by_category[insight.category].append(insight)
report = "# Extracted Insights\n\n"
for category, category_insights in by_category.items():
report += f"## {category.title()}s\n\n"
for i, insight in enumerate(category_insights, 1):
report += f"{i}. {insight.text}\n"
if insight.evidence:
report += f" Evidence: {insight.evidence[0][:100]}...\n"
report += "\n"
return report
# Usage
extractor = InsightExtractor()
text = """
Our analysis reveals that neural networks outperform traditional methods by 30%.
Therefore, we recommend adopting deep learning for production systems.
Interestingly, smaller models often generalize better than larger ones.
"""
insights = extractor.extract(text)
print(extractor.generate_report(insights))
Pattern Recognition System
# scripts/pattern-recognition.py
from dataclasses import dataclass
from typing import List, Dict, Set
from collections import Counter
import re
@dataclass
class Pattern:
type: str # 'concept', 'relationship', 'trend'
description: str
frequency: int
examples: List[str]
confidence: float
class PatternRecognizer:
"""Recognize patterns across documents"""
def recognize(self, documents: List[str]) -> List[Pattern]:
"""Recognize all patterns"""
patterns = []
# Concept patterns
patterns.extend(self._find_concept_patterns(documents))
# Relationship patterns
patterns.extend(self._find_relationship_patterns(documents))
# Trend patterns
patterns.extend(self._find_trend_patterns(documents))
return sorted(patterns, key=lambda p: p.frequency, reverse=True)
def _find_concept_patterns(self, documents: List[str]) -> List[Pattern]:
"""Find recurring concepts"""
# Extract noun phrases (simplified)
all_phrases = []
for doc in documents:
phrases = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', doc)
all_phrases.extend(phrases)
# Count frequency
freq = Counter(all_phrases)
patterns = []
for phrase, count in freq.most_common(10):
if count >= 2: # Appears at least twice
examples = [
doc[:100] for doc in documents
if phrase in doc
][:3]
patterns.append(Pattern(
type='concept',
description=f"Recurring concept: {phrase}",
frequency=count,
examples=examples,
confidence=min(count / len(documents), 1.0)
))
return patterns
def _find_relationship_patterns(self, documents: List[str]) -> List[Pattern]:
"""Find entity relationships"""
# Look for "X causes Y", "X enables Y" patterns
relationship_patterns = [
(r'(\w+) causes (\w+)', 'causal'),
(r'(\w+) enables (\w+)', 'enabling'),
(r'(\w+) requires (\w+)', 'dependency'),
]
patterns = []
for doc in documents:
for regex, rel_type in relationship_patterns:
matches = re.findall(regex, doc, re.IGNORECASE)
if matches:
for match in matches:
patterns.append(Pattern(
type='relationship',
description=f"{match[0]} {rel_type} {match[1]}",
frequency=1,
examples=[doc[:100]],
confidence=0.7
))
return patterns
def _find_trend_patterns(self, documents: List[str]) -> List[Pattern]:
"""Find temporal or progression trends"""
# Look for increasing/decreasing patterns
trend_indicators = [
'increasing', 'decreasing', 'growing', 'declining',
'rising', 'falling', 'improving', 'worsening'
]
patterns = []
for doc in documents:
for indicator in trend_indicators:
if indicator in doc.lower():
context = self._extract_context(doc, indicator)
patterns.append(Pattern(
type='trend',
description=f"Trend: {indicator}",
frequency=1,
examples=[context],
confidence=0.6
))
return patterns
def _extract_context(self, text: str, keyword: str) -> str:
"""Extract context around keyword"""
index = text.lower().find(keyword)
if index == -1:
return ""
start = max(0, index - 50)
end = min(len(text), index + 100)
return text[start:end]
# Usage
recognizer = PatternRecognizer()
docs = [
"Machine Learning enables better predictions. ML improves over time.",
"Deep Learning causes significant improvements. ML requires large datasets.",
]
patterns = recognizer.recognize(docs)
for pattern in patterns:
print(f"{pattern.type}: {pattern.description} (freq={pattern.frequency})")
Usage Examples
Document Analysis
Apply thoughts-analysis-patterns skill to analyze research paper structure and extract insights
Insight Extraction
Apply thoughts-analysis-patterns skill to extract findings and recommendations from technical report
Pattern Recognition
Apply thoughts-analysis-patterns skill to identify recurring concepts across multiple documents
Success Output
When successful, this skill MUST output:
✅ SKILL COMPLETE: thoughts-analysis-patterns
Completed:
- [x] Document structure parsed and analyzed (headers, sections, subsections)
- [x] Keywords extracted from each section (top 10 per section)
- [x] Insights identified with categories (findings, conclusions, recommendations)
- [x] Patterns recognized across multiple documents (concepts, relationships, trends)
- [x] Summary report generated with hierarchical structure
Outputs:
- Document analysis report with structure tree
- Insight extraction report by category
- Pattern recognition report with frequency counts
- Knowledge synthesis across documents
- Recommendations for further analysis
Completion Checklist
Before marking this skill as complete, verify:
- Document structure correctly identifies all headers (H1-H6)
- Keywords extracted are relevant and domain-specific
- Insights categorized correctly (finding vs. conclusion vs. recommendation)
- Patterns identified appear in multiple documents (not spurious)
- Evidence provided for each insight (surrounding sentences)
- Confidence scores assigned to insights (0.0-1.0)
- Summary report is hierarchical and readable
- Cross-document patterns are validated (not false positives)
Failure Indicators
This skill has FAILED if:
- ❌ Document structure parsing misses sections or headers
- ❌ Keywords extracted are generic (stopwords like "this", "that")
- ❌ Insights misclassified (conclusion labeled as finding)
- ❌ Patterns identified are spurious (appear only once)
- ❌ No evidence provided for insights (context missing)
- ❌ Confidence scores not calibrated (all HIGH or all LOW)
- ❌ Summary report is unstructured or unreadable
- ❌ Cross-document analysis missed obvious connections
When NOT to Use
Do NOT use this skill when:
- Documents are structured data (CSV, JSON) - use data-analysis patterns instead
- Real-time analysis required (use streaming-analysis patterns)
- Documents are code files (use code-analysis-patterns skill)
- Documents are images/PDFs without OCR (use document-extraction skill first)
- Single-sentence analysis (use NLP patterns, not document analysis)
- Translation needed (use language-translation skill)
- Sentiment analysis required (use sentiment-analysis-patterns skill)
Use these alternatives instead:
- Structured data:
data-analysis-patternsskill - Code analysis:
code-analysis-patternsskill - Sentiment:
sentiment-analysis-patternsskill
Anti-Patterns (Avoid)
| Anti-Pattern | Problem | Solution |
|---|---|---|
| No stopword filtering | Generic keywords extracted | Filter common words (this, that, with, from) |
| Single-document patterns | False pattern detection | Require patterns appear in 2+ documents |
| No confidence scoring | All insights treated equally | Assign confidence based on evidence quality |
| Regex-only extraction | Misses semantic insights | Combine regex with semantic analysis |
| No evidence tracking | Insights lack context | Extract surrounding sentences as evidence |
| Hardcoded categories | Inflexible categorization | Use configurable category patterns |
| No validation step | Spurious insights reported | Validate insights against source text |
Principles
This skill embodies:
- #2 Progressive Disclosure - Start with structure, then keywords, then insights, then patterns
- #5 Eliminate Ambiguity - Clear categorization (finding vs. conclusion vs. recommendation)
- #6 Clear, Understandable, Explainable - Provide evidence for every insight
- #7 Verification Protocol - Validate patterns appear in multiple documents
- #8 No Assumptions - Assign confidence scores, don't assume all insights are equal
Full Standard: CODITECT-STANDARD-AUTOMATION.md
Integration Points
- research-patterns - Technical research
- educational-content-patterns - Content generation
- competitive-analysis - Market insight extraction