Skip to main content

Agent Skills Framework Extension

Thoughts Analysis Patterns Skill

When to Use This Skill

Use this skill when implementing thoughts analysis patterns patterns in your codebase.

How to Use This Skill

  1. Review the patterns and examples below
  2. Apply the relevant patterns to your implementation
  3. Follow the best practices outlined in this skill

Research document analysis, insight extraction, pattern recognition, and knowledge synthesis from complex texts.

Core Capabilities

  1. Structure Analysis - Document organization and flow
  2. Insight Extraction - Key findings and conclusions
  3. Pattern Recognition - Recurring themes and concepts
  4. Concept Mapping - Entity relationships
  5. Argument Analysis - Claims, evidence, reasoning
  6. Knowledge Synthesis - Cross-document integration

Document Structure Analyzer

# scripts/document-analyzer.py
from dataclasses import dataclass
from typing import List, Dict, Optional
import re

@dataclass
class DocumentSection:
level: int
title: str
content: str
subsections: List['DocumentSection']
keywords: List[str]
insights: List[str]

class DocumentAnalyzer:
"""Analyze research document structure and content"""

def analyze(self, document: str) -> DocumentSection:
"""Analyze complete document"""
root = self._parse_structure(document)
self._extract_keywords(root)
self._extract_insights(root)
return root

def _parse_structure(self, text: str) -> DocumentSection:
"""Parse hierarchical structure"""
lines = text.split('\n')
root = DocumentSection(
level=0,
title="Root",
content="",
subsections=[],
keywords=[],
insights=[]
)

stack = [root]
current_content = []

for line in lines:
# Check for header
header_match = re.match(r'^(#{1,6})\s+(.+)$', line)

if header_match:
# Save previous content
if stack and current_content:
stack[-1].content = '\n'.join(current_content)
current_content = []

level = len(header_match.group(1))
title = header_match.group(2).strip()

section = DocumentSection(
level=level,
title=title,
content="",
subsections=[],
keywords=[],
insights=[]
)

# Pop stack to appropriate level
while len(stack) > level:
stack.pop()

# Add to parent
if stack:
stack[-1].subsections.append(section)

stack.append(section)
else:
current_content.append(line)

# Save final content
if stack and current_content:
stack[-1].content = '\n'.join(current_content)

return root

def _extract_keywords(self, section: DocumentSection):
"""Extract keywords from section"""
# Simple keyword extraction
text = f"{section.title} {section.content}".lower()
words = re.findall(r'\b\w{4,}\b', text)

# Filter common words
stopwords = {'this', 'that', 'with', 'from', 'have', 'will', 'been'}
keywords = [w for w in words if w not in stopwords]

# Count frequency
freq = {}
for word in keywords:
freq[word] = freq.get(word, 0) + 1

# Top keywords
section.keywords = sorted(freq.keys(), key=lambda w: freq[w], reverse=True)[:10]

# Recurse
for subsection in section.subsections:
self._extract_keywords(subsection)

def _extract_insights(self, section: DocumentSection):
"""Extract key insights"""
content = section.content.lower()

# Look for insight indicators
indicators = [
'therefore', 'thus', 'in conclusion', 'key finding',
'importantly', 'notably', 'it follows that'
]

sentences = content.split('.')
insights = []

for sent in sentences:
if any(ind in sent for ind in indicators):
insights.append(sent.strip())

section.insights = insights[:3] # Top 3

# Recurse
for subsection in section.subsections:
self._extract_insights(subsection)

def generate_summary(self, section: DocumentSection, depth: int = 2) -> str:
"""Generate hierarchical summary"""
lines = []

if section.level > 0:
indent = ' ' * (section.level - 1)
lines.append(f"{indent}{'#' * section.level} {section.title}")

if section.keywords:
lines.append(f"{indent}Keywords: {', '.join(section.keywords[:5])}")

if section.insights:
lines.append(f"{indent}Insights:")
for insight in section.insights:
lines.append(f"{indent} - {insight[:100]}...")

if section.level < depth:
for subsection in section.subsections:
lines.extend(self.generate_summary(subsection, depth).split('\n'))

return '\n'.join(lines)

# Usage
analyzer = DocumentAnalyzer()

doc = """
# Machine Learning Research

## Neural Networks

Deep learning has revolutionized AI. Therefore, we focus on neural architectures.

### CNNs

Convolutional networks excel at vision tasks.

### RNNs

Recurrent networks handle sequences well.

## Conclusion

Notably, ensemble methods outperform single models.
"""

result = analyzer.analyze(doc)
print(analyzer.generate_summary(result))

Insight Extraction Engine

# scripts/insight-extraction.py
from dataclasses import dataclass
from typing import List, Dict
import re

@dataclass
class Insight:
text: str
category: str # 'finding', 'conclusion', 'recommendation', 'observation'
confidence: float
evidence: List[str]
location: str

class InsightExtractor:
"""Extract insights from research documents"""

INSIGHT_PATTERNS = {
'finding': [
r'we found that (.+)',
r'results show (.+)',
r'analysis reveals (.+)',
],
'conclusion': [
r'therefore (.+)',
r'in conclusion (.+)',
r'thus (.+)',
],
'recommendation': [
r'we recommend (.+)',
r'should (.+)',
r'it is advised (.+)',
],
'observation': [
r'notably (.+)',
r'interestingly (.+)',
r'it appears (.+)',
],
}

def extract(self, text: str) -> List[Insight]:
"""Extract all insights"""
insights = []

for category, patterns in self.INSIGHT_PATTERNS.items():
for pattern in patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
for match in matches:
insight_text = match.group(1).strip()

# Extract evidence (sentences around insight)
evidence = self._extract_evidence(text, match.start())

insights.append(Insight(
text=insight_text,
category=category,
confidence=0.8,
evidence=evidence,
location=f"char {match.start()}"
))

return insights

def _extract_evidence(self, text: str, position: int) -> List[str]:
"""Extract surrounding sentences as evidence"""
# Get 200 chars before and after
start = max(0, position - 200)
end = min(len(text), position + 200)

context = text[start:end]
sentences = context.split('.')

return [s.strip() for s in sentences if len(s.strip()) > 20][:2]

def generate_report(self, insights: List[Insight]) -> str:
"""Generate insight report"""
by_category = {}
for insight in insights:
if insight.category not in by_category:
by_category[insight.category] = []
by_category[insight.category].append(insight)

report = "# Extracted Insights\n\n"

for category, category_insights in by_category.items():
report += f"## {category.title()}s\n\n"
for i, insight in enumerate(category_insights, 1):
report += f"{i}. {insight.text}\n"
if insight.evidence:
report += f" Evidence: {insight.evidence[0][:100]}...\n"
report += "\n"

return report

# Usage
extractor = InsightExtractor()

text = """
Our analysis reveals that neural networks outperform traditional methods by 30%.
Therefore, we recommend adopting deep learning for production systems.
Interestingly, smaller models often generalize better than larger ones.
"""

insights = extractor.extract(text)
print(extractor.generate_report(insights))

Pattern Recognition System

# scripts/pattern-recognition.py
from dataclasses import dataclass
from typing import List, Dict, Set
from collections import Counter
import re

@dataclass
class Pattern:
type: str # 'concept', 'relationship', 'trend'
description: str
frequency: int
examples: List[str]
confidence: float

class PatternRecognizer:
"""Recognize patterns across documents"""

def recognize(self, documents: List[str]) -> List[Pattern]:
"""Recognize all patterns"""
patterns = []

# Concept patterns
patterns.extend(self._find_concept_patterns(documents))

# Relationship patterns
patterns.extend(self._find_relationship_patterns(documents))

# Trend patterns
patterns.extend(self._find_trend_patterns(documents))

return sorted(patterns, key=lambda p: p.frequency, reverse=True)

def _find_concept_patterns(self, documents: List[str]) -> List[Pattern]:
"""Find recurring concepts"""
# Extract noun phrases (simplified)
all_phrases = []
for doc in documents:
phrases = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', doc)
all_phrases.extend(phrases)

# Count frequency
freq = Counter(all_phrases)

patterns = []
for phrase, count in freq.most_common(10):
if count >= 2: # Appears at least twice
examples = [
doc[:100] for doc in documents
if phrase in doc
][:3]

patterns.append(Pattern(
type='concept',
description=f"Recurring concept: {phrase}",
frequency=count,
examples=examples,
confidence=min(count / len(documents), 1.0)
))

return patterns

def _find_relationship_patterns(self, documents: List[str]) -> List[Pattern]:
"""Find entity relationships"""
# Look for "X causes Y", "X enables Y" patterns
relationship_patterns = [
(r'(\w+) causes (\w+)', 'causal'),
(r'(\w+) enables (\w+)', 'enabling'),
(r'(\w+) requires (\w+)', 'dependency'),
]

patterns = []
for doc in documents:
for regex, rel_type in relationship_patterns:
matches = re.findall(regex, doc, re.IGNORECASE)
if matches:
for match in matches:
patterns.append(Pattern(
type='relationship',
description=f"{match[0]} {rel_type} {match[1]}",
frequency=1,
examples=[doc[:100]],
confidence=0.7
))

return patterns

def _find_trend_patterns(self, documents: List[str]) -> List[Pattern]:
"""Find temporal or progression trends"""
# Look for increasing/decreasing patterns
trend_indicators = [
'increasing', 'decreasing', 'growing', 'declining',
'rising', 'falling', 'improving', 'worsening'
]

patterns = []
for doc in documents:
for indicator in trend_indicators:
if indicator in doc.lower():
context = self._extract_context(doc, indicator)
patterns.append(Pattern(
type='trend',
description=f"Trend: {indicator}",
frequency=1,
examples=[context],
confidence=0.6
))

return patterns

def _extract_context(self, text: str, keyword: str) -> str:
"""Extract context around keyword"""
index = text.lower().find(keyword)
if index == -1:
return ""

start = max(0, index - 50)
end = min(len(text), index + 100)
return text[start:end]

# Usage
recognizer = PatternRecognizer()

docs = [
"Machine Learning enables better predictions. ML improves over time.",
"Deep Learning causes significant improvements. ML requires large datasets.",
]

patterns = recognizer.recognize(docs)
for pattern in patterns:
print(f"{pattern.type}: {pattern.description} (freq={pattern.frequency})")

Usage Examples

Document Analysis

Apply thoughts-analysis-patterns skill to analyze research paper structure and extract insights

Insight Extraction

Apply thoughts-analysis-patterns skill to extract findings and recommendations from technical report

Pattern Recognition

Apply thoughts-analysis-patterns skill to identify recurring concepts across multiple documents

Success Output

When successful, this skill MUST output:

✅ SKILL COMPLETE: thoughts-analysis-patterns

Completed:
- [x] Document structure parsed and analyzed (headers, sections, subsections)
- [x] Keywords extracted from each section (top 10 per section)
- [x] Insights identified with categories (findings, conclusions, recommendations)
- [x] Patterns recognized across multiple documents (concepts, relationships, trends)
- [x] Summary report generated with hierarchical structure

Outputs:
- Document analysis report with structure tree
- Insight extraction report by category
- Pattern recognition report with frequency counts
- Knowledge synthesis across documents
- Recommendations for further analysis

Completion Checklist

Before marking this skill as complete, verify:

  • Document structure correctly identifies all headers (H1-H6)
  • Keywords extracted are relevant and domain-specific
  • Insights categorized correctly (finding vs. conclusion vs. recommendation)
  • Patterns identified appear in multiple documents (not spurious)
  • Evidence provided for each insight (surrounding sentences)
  • Confidence scores assigned to insights (0.0-1.0)
  • Summary report is hierarchical and readable
  • Cross-document patterns are validated (not false positives)

Failure Indicators

This skill has FAILED if:

  • ❌ Document structure parsing misses sections or headers
  • ❌ Keywords extracted are generic (stopwords like "this", "that")
  • ❌ Insights misclassified (conclusion labeled as finding)
  • ❌ Patterns identified are spurious (appear only once)
  • ❌ No evidence provided for insights (context missing)
  • ❌ Confidence scores not calibrated (all HIGH or all LOW)
  • ❌ Summary report is unstructured or unreadable
  • ❌ Cross-document analysis missed obvious connections

When NOT to Use

Do NOT use this skill when:

  • Documents are structured data (CSV, JSON) - use data-analysis patterns instead
  • Real-time analysis required (use streaming-analysis patterns)
  • Documents are code files (use code-analysis-patterns skill)
  • Documents are images/PDFs without OCR (use document-extraction skill first)
  • Single-sentence analysis (use NLP patterns, not document analysis)
  • Translation needed (use language-translation skill)
  • Sentiment analysis required (use sentiment-analysis-patterns skill)

Use these alternatives instead:

  • Structured data: data-analysis-patterns skill
  • Code analysis: code-analysis-patterns skill
  • Sentiment: sentiment-analysis-patterns skill

Anti-Patterns (Avoid)

Anti-PatternProblemSolution
No stopword filteringGeneric keywords extractedFilter common words (this, that, with, from)
Single-document patternsFalse pattern detectionRequire patterns appear in 2+ documents
No confidence scoringAll insights treated equallyAssign confidence based on evidence quality
Regex-only extractionMisses semantic insightsCombine regex with semantic analysis
No evidence trackingInsights lack contextExtract surrounding sentences as evidence
Hardcoded categoriesInflexible categorizationUse configurable category patterns
No validation stepSpurious insights reportedValidate insights against source text

Principles

This skill embodies:

  • #2 Progressive Disclosure - Start with structure, then keywords, then insights, then patterns
  • #5 Eliminate Ambiguity - Clear categorization (finding vs. conclusion vs. recommendation)
  • #6 Clear, Understandable, Explainable - Provide evidence for every insight
  • #7 Verification Protocol - Validate patterns appear in multiple documents
  • #8 No Assumptions - Assign confidence scores, don't assume all insights are equal

Full Standard: CODITECT-STANDARD-AUTOMATION.md

Integration Points

  • research-patterns - Technical research
  • educational-content-patterns - Content generation
  • competitive-analysis - Market insight extraction