Skip to main content

#!/usr/bin/env python3 """ Generate YAML manifests for all research staging directories per ADR-207.

This script scans analyze-new-artifacts/ directories and generates manifests conforming to config/schemas/research-manifest-v1.schema.json.

Author: Claude (Sonnet 4.5) Created: 2026-02-16 """

import os import sys from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple

Categorization from ADR-207

CATEGORIZATION = { # technology-evaluation (18) "coditect-abacus.ai-research-analyisis": "technology-evaluation", "coditect-agent-zero-research": "technology-evaluation", "CODITECT-claude-code-eval-loops": "technology-evaluation", "CODITECT-clawdrop-skils-research": "technology-evaluation", "coditect-codex.research": "technology-evaluation", "coditect-copilotkit-research": "technology-evaluation", "CODITECT-docker-registry-research": "technology-evaluation", "CODITECT-docusaurus-search-research": "technology-evaluation", "coditect-gemini-api-context-url": "technology-evaluation", "coditect-google-cloud-workstation-research": "technology-evaluation", "CODITECT-kimi-2.5-research": "technology-evaluation", "coditect-microsoft-fabric-research": "technology-evaluation", "coditect-motia-backend-research": "technology-evaluation", "coditect-openclaw-research": "technology-evaluation", "CODITECT-OPENCODE-RESEARCH": "technology-evaluation", "coditect-paperbanana-research": "technology-evaluation", "coditect-plugins-research": "technology-evaluation", "CODITECT-unified-studio": "technology-evaluation", "coditect-video-content-extraction-research": "technology-evaluation",

# academic (13)
"coditect-agentic-paradigms-for-llm‑enabled-healthcare-communication": "academic",
"coditect-ai-applicability-to-occupations": "academic",
"coditect-ai-guardrails-research": "academic",
"coditect-ai-research-pdfs": "academic",
"coditect-ambiguity-and-intent-research": "academic",
"coditect-anthropic-constitution": "academic",
"CODITECT-parallel‑agent-reinforcement-learning-research": "academic",
"coditect-prompt-repetition-research": "academic",
"coditect-recursive-large-language-models": "academic",
"coditect-scaling-agent-systems": "academic",
"coditect-society-of-mind-minsky": "academic",
"coditect-universal-document-object-model": "academic",
"CODITECT-yann-lecun-research": "academic",

# competitive-intelligence (16)
"coditect-a16z-youtube-analysis-research": "competitive-intelligence",
"coditect-agentiic-enterprise-systems": "competitive-intelligence",
"CODITECT-AI-HOUSE-DAVOS-TRANSCRIPTS": "competitive-intelligence",
"coditect-all-in-podcast-transcripts": "competitive-intelligence",
"coditect-anthropic-cowork-impact-competitive-impact": "competitive-intelligence",
"coditect-davos-economic-forum-2026": "competitive-intelligence",
"CODITECT-dylan-davis-research": "competitive-intelligence",
"coditect-eric-schmidt-ai": "competitive-intelligence",
"coditect-ibm-ai-research": "competitive-intelligence",
"coditect-lex-clips-podcast-transcripts": "competitive-intelligence",
"coditect-mckinsey-ai-hype-cycle-research": "competitive-intelligence",
"coditect-mckinsey-research": "competitive-intelligence",
"CODITECT-moltbot-research": "competitive-intelligence",
"coditect-nate-b-jones-youtube-transcript-analysis": "competitive-intelligence",
"CODITECT-palantir-research": "competitive-intelligence",
"coditect-third-golden-age-of-software": "competitive-intelligence",

# business-market (14)
"CODITECT-Avivatec-Project": "business-market",
"coditect-avivatect-fp-and-a-research": "business-market",
"CODITECT-az1-entity-analysis": "business-market",
"coditect-canada-entrepreneurs": "business-market",
"coditect-financial-model-2026-02-04": "business-market",
"CODITECT-founders-handbook": "business-market",
"CODITECT-hi-tech-ventures-research": "business-market",
"coditect-legal-contracts": "business-market",
"coditect-product-market-fit": "business-market",
"coditect-roblox-market-research": "business-market",
"CODITECT-RUNWAY-mentor-deck-research": "business-market",
"coditect-tiny-seed-playbook": "business-market",
"coditect-use-cases": "business-market",
"coditect-value-proposition": "business-market",

# domain (6)
"coditect-ai-risk-management-framework": "domain",
"coditect-bioscience-workorders-research": "domain",
"coditect-c3pao-accreditation-research": "domain",
"CODITECT-LIMS-sample-receiving-process": "domain",
"coditect-regulatory-frameworks-research": "domain",
"coditect-zero-trust-security-agentic-ai-research": "domain",

# process-internal (28)
"CODITECT-ambiguity-and-intent-research-DUPLICATE": "process-internal",
"CODITECT-analysis-workspace": "process-internal",
"CODITECT-analyze-ui-images": "process-internal",
"coditect-bookmarks-buku": "process-internal",
"coditect-browser-analysis": "process-internal",
"coditect-browser-screenshots": "process-internal",
"CODITECT-consequence-aware-automous-execution": "process-internal",
"coditect-conflict-avoidance-strategies": "process-internal",
"coditect-context-graph-research": "process-internal",
"coditect-continual-learning-analysis": "process-internal",
"coditect-core-process-framework": "process-internal",
"coditect-decision-rights-and-responsibility-frameworks": "process-internal",
"coditect-internal-meeting-note-analysis": "process-internal",
"coditect-license-management": "process-internal",
"coditect-master-prompt-research": "process-internal",
"coditect-method-for-analysis-large-file-sets": "process-internal",
"CODITECT-moe-judges-research": "process-internal",
"coditect-new-installation-errors-troubleshoot": "process-internal",
"coditect-new-project-ai-initiation-process": "process-internal",
"coditect-prd-user-intention-research": "process-internal",
"coditect-profile-images": "process-internal",
"coditect-ralph-wiggum-technique": "process-internal",
"CODITECT-screenshots-for-analysis": "process-internal",
"coditect-second-brain-research": "process-internal",
"CODITECT-SKILLS-cross-functional-llm-research": "process-internal",
"coditect-system-prompt": "process-internal",
"coditect-udom-research-autonomous-orchestration": "process-internal",
"coditect-ui-ux-agent-design-research": "process-internal",

}

def strip_prefix(dirname: str) -> str: """Strip coditect- or CODITECT- prefix, lowercase for topic.""" topic = dirname if topic.lower().startswith("coditect-"): topic = topic[9:] elif topic.lower().startswith("coditect"): topic = topic[8:] return topic.lower().strip("-")

def detect_input_type(dir_path: Path) -> str: """Detect input source type from file extensions.""" extensions = set() for item in dir_path.iterdir(): if item.is_file() and not item.name.startswith("."): ext = item.suffix.lower() if ext in [".pdf", ".txt", ".md", ".docx", ".mp4", ".json"]: extensions.add(ext)

if ".pdf" in extensions and len(extensions) == 1:
return "pdf"
elif ".txt" in extensions and len(extensions) == 1:
return "transcript"
elif ".md" in extensions and len(extensions) == 1:
return "url"
elif ".git" in [f.name for f in dir_path.iterdir() if f.is_dir()]:
return "git-repository"
else:
return "multi-source"

def detect_artifacts(dir_path: Path) -> List[Dict]: """Detect artifacts in the staging directory.""" artifacts = []

# Check for pipeline artifacts directory
artifacts_dir = dir_path / "artifacts"
if artifacts_dir.exists() and artifacts_dir.is_dir():
# Pipeline-generated artifacts
artifact_types = {
"executive-summary.md": "executive-summary",
"assessment.md": "assessment",
"sdd.md": "sdd",
"tdd.md": "tdd",
"c4-architecture.md": "c4-diagram",
"glossary.md": "glossary",
"1-2-3-detailed-quick-start.md": "quick-start",
"mermaid-diagrams.md": "mermaid-diagrams",
"coditect-impact.md": "coditect-impact",
}

for filename, artifact_type in artifact_types.items():
artifact_path = artifacts_dir / filename
if artifact_path.exists():
artifacts.append({
"type": artifact_type,
"staging_path": f"artifacts/{filename}",
"status": "staging"
})

# Check for ADRs
adr_dir = artifacts_dir / "adrs"
if adr_dir.exists():
adr_count = len(list(adr_dir.glob("ADR-*.md")))
if adr_count > 0:
artifacts.append({
"type": "adr",
"staging_path": f"artifacts/adrs/ ({adr_count} files)",
"status": "staging"
})

# Always add raw-input artifact
artifacts.insert(0, {
"type": "raw-input",
"staging_path": ".",
"status": "staging"
})

# Check for promoted artifacts in internal/analysis/
# This would require checking the promoted location, skipping for now

return artifacts if artifacts else [{"type": "raw-input", "staging_path": ".", "status": "staging"}]

def get_recommendation(dir_path: Path, category: str) -> Tuple[str, str]: """Determine recommendation and confidence based on artifacts.""" artifacts_dir = dir_path / "artifacts"

# If has pipeline artifacts, better confidence
if artifacts_dir.exists() and artifacts_dir.is_dir():
has_exec_summary = (artifacts_dir / "executive-summary.md").exists()
has_assessment = (artifacts_dir / "assessment.md").exists()

if has_exec_summary or has_assessment:
return ("MONITOR", "MEDIUM")

# Default for raw inputs
return ("MONITOR", "LOW")

def estimate_date(dirname: str) -> str: """Estimate research date. Default to 2026-01-15.""" # Could parse dates from directory names or file mtimes # For now, use default return "2026-01-15"

def generate_yaml(dirname: str, category: str, dir_path: Path) -> str: """Generate YAML manifest for a research directory.""" topic = strip_prefix(dirname) research_id = f"2026-01-15-{topic}" date_conducted = estimate_date(dirname) input_type = detect_input_type(dir_path) artifacts = detect_artifacts(dir_path) recommendation, confidence = get_recommendation(dir_path, category)

# Build YAML manually (no pyyaml dependency)
yaml_lines = [
"# Research Manifest (ADR-207)",
f"# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
"",
"metadata:",
f" research_id: \"{research_id}\"",
f" topic: \"{dirname}\"",
f" category: {category}",
f" date_conducted: {date_conducted}",
]

# Add pipeline_version if artifacts exist
artifacts_dir = dir_path / "artifacts"
if artifacts_dir.exists():
yaml_lines.append(" pipeline_version: \"1.0\"")

yaml_lines.extend([
f" recommendation: {recommendation}",
f" confidence: {confidence}",
"",
"input_sources:",
f" type: {input_type}",
" primary:",
f" url: \"file://analyze-new-artifacts/{dirname}/\"",
"",
"artifacts:",
])

for artifact in artifacts:
yaml_lines.append(f" - type: {artifact['type']}")
if "staging_path" in artifact:
yaml_lines.append(f" staging_path: \"{artifact['staging_path']}\"")
yaml_lines.append(f" status: {artifact['status']}")

# Add tags based on category
tags = [category, topic.split("-")[0] if "-" in topic else topic]
yaml_lines.extend([
"",
"tags:",
])
for tag in tags[:3]: # Limit to 3 tags
yaml_lines.append(f" - {tag}")

yaml_lines.append("")

return "\n".join(yaml_lines)

def main(): """Main entry point.""" # Paths base_dir = Path(file).parent.parent staging_dir = base_dir / "analyze-new-artifacts" manifest_dir = base_dir / "internal" / "research" / "manifests"

if not staging_dir.exists():
print(f"ERROR: Staging directory not found: {staging_dir}")
sys.exit(1)

# Create manifest directory
manifest_dir.mkdir(parents=True, exist_ok=True)

# Generate manifests
generated = 0
skipped = 0
errors = 0

print(f"Generating manifests for {len(CATEGORIZATION)} directories...")
print(f"Output: {manifest_dir}")
print()

for dirname, category in sorted(CATEGORIZATION.items()):
dir_path = staging_dir / dirname

if not dir_path.exists():
print(f"SKIP: {dirname} (not found)")
skipped += 1
continue

try:
yaml_content = generate_yaml(dirname, category, dir_path)
topic = strip_prefix(dirname)
manifest_filename = f"2026-01-15-{topic}.yaml"
manifest_path = manifest_dir / manifest_filename

with open(manifest_path, "w") as f:
f.write(yaml_content)

print(f"OK: {manifest_filename} ({category})")
generated += 1

except Exception as e:
print(f"ERROR: {dirname} - {e}")
errors += 1

print()
print("=" * 60)
print(f"Generated: {generated}")
print(f"Skipped: {skipped}")
print(f"Errors: {errors}")
print(f"Total: {len(CATEGORIZATION)}")
print()

# Category breakdown
print("By Category:")
category_counts = {}
for category in CATEGORIZATION.values():
category_counts[category] = category_counts.get(category, 0) + 1

for category, count in sorted(category_counts.items()):
print(f" {category}: {count}")

return 0 if errors == 0 else 1

if name == "main": sys.exit(main())