scripts-generate-publication-pdf
#!/usr/bin/env python3 """
title: "CODITECT branding constants" component_type: script version: "1.0.0" audience: contributor status: stable summary: "CODITECT Publication PDF Generator" keywords: ['generate', 'generation', 'pdf', 'publication'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "generate-publication-pdf.py" language: python executable: true usage: "python3 scripts/generate-publication-pdf.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false
CODITECT Publication PDF Generator
Professional-grade PDF generation from Markdown documents with:
- Mermaid diagram rendering
- Consistent CODITECT branding
- Headers, footers, page numbers
- Table of contents generation
- Cover page support
Copyright 2025 AZ1.AI Inc. All rights reserved. Author: Hal Casteel, Founder/CEO/CTO
Usage:
python3 scripts/generate-publication-pdf.py <input.md> [options]
python3 scripts/generate-publication-pdf.py --batch
Examples: # Single file python3 scripts/generate-publication-pdf.py docs/00-coditect-introduction/WHAT-IS-CODITECT-CORE.md
# With cover page
python3 scripts/generate-publication-pdf.py docs/ARCHITECTURE.md --cover --title "System Architecture"
# Batch processing
python3 scripts/generate-publication-pdf.py --batch docs/00-coditect-introduction/ --output pdf-output/
# Custom output
python3 scripts/generate-publication-pdf.py input.md --output my-document.pdf
"""
import argparse import json import os import re import shutil import subprocess import sys import tempfile from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any, List
CODITECT branding constants
CODITECT_VERSION = "1.7.2" ORGANIZATION = "AZ1.AI Inc." AUTHOR = "Hal Casteel, Founder/CEO/CTO" COPYRIGHT_YEAR = 2025
Get script directory and project root
SCRIPT_DIR = Path(file).parent.resolve() PROJECT_ROOT = SCRIPT_DIR.parent CONFIG_PATH = PROJECT_ROOT / "docs" / "99-publishing" / "config" / "pdf-generation-config.json" TEMPLATES_DIR = PROJECT_ROOT / "docs" / "99-publishing" / "templates"
class CoditectPdfGenerator: """Professional PDF generator with CODITECT branding."""
def __init__(self, config_path: Optional[Path] = None):
"""Initialize the generator with configuration."""
self.config = self._load_config(config_path or CONFIG_PATH)
self.temp_dir = None
def _load_config(self, config_path: Path) -> Dict[str, Any]:
"""Load configuration from JSON file."""
if config_path.exists():
with open(config_path, "r") as f:
return json.load(f)
else:
print(f"Warning: Config not found at {config_path}, using defaults")
return self._default_config()
def _default_config(self) -> Dict[str, Any]:
"""Return default configuration."""
return {
"defaults": {
"page_size": "A4",
"include_cover": True,
"include_toc": True,
"include_header": True,
"include_footer": True,
"page_numbers": True
},
"branding": {
"organization": ORGANIZATION,
"author": AUTHOR,
"copyright_year": COPYRIGHT_YEAR
},
"mermaid": {
"enabled": True,
"theme": "default",
"output_format": "png",
"scale": 2
}
}
def _check_dependencies(self) -> bool:
"""Check if required tools are installed."""
dependencies = {
"md2pdf-mermaid": "pip install md2pdf-mermaid",
"mmdc": "npm install -g @mermaid-js/mermaid-cli" # mermaid-cli
}
missing = []
for tool, install_cmd in dependencies.items():
if tool == "md2pdf-mermaid":
# Check Python package
try:
import md2pdf_mermaid
except ImportError:
missing.append((tool, install_cmd))
else:
# Check CLI tool
if shutil.which(tool) is None:
missing.append((tool, install_cmd))
if missing:
print("Missing dependencies:")
for tool, cmd in missing:
print(f" - {tool}: Install with '{cmd}'")
# Try alternative approach with md-to-pdf
if shutil.which("md-to-pdf") or shutil.which("npx"):
print("\nAlternative: Using md-to-pdf (npx md-to-pdf)")
return True
return False
return True
def _extract_frontmatter(self, content: str) -> tuple:
"""Extract YAML frontmatter from markdown content."""
frontmatter = {}
body = content
# Check for YAML frontmatter
if content.startswith("---"):
parts = content.split("---", 2)
if len(parts) >= 3:
try:
import yaml
frontmatter = yaml.safe_load(parts[1]) or {}
except ImportError:
# Parse simple key: value pairs
for line in parts[1].strip().split("\n"):
if ":" in line:
key, value = line.split(":", 1)
frontmatter[key.strip()] = value.strip()
except Exception:
pass
body = parts[2]
return frontmatter, body
def _extract_title(self, content: str) -> str:
"""Extract title from markdown content."""
# Try to find first H1
match = re.search(r"^#\s+(.+)$", content, re.MULTILINE)
if match:
return match.group(1).strip()
return "Untitled Document"
def _generate_toc(self, content: str) -> str:
"""Generate table of contents from markdown headings."""
toc_lines = ["## Table of Contents\n"]
headings = re.findall(r"^(#{2,4})\s+(.+)$", content, re.MULTILINE)
for level_markers, heading in headings:
level = len(level_markers) - 2 # H2 = 0, H3 = 1, H4 = 2
indent = " " * level
# Create anchor link
anchor = re.sub(r"[^\w\s-]", "", heading.lower())
anchor = re.sub(r"\s+", "-", anchor)
toc_lines.append(f"{indent}- [{heading}](#{anchor})")
return "\n".join(toc_lines) + "\n\n---\n\n"
def _render_mermaid_diagrams(self, content: str, output_dir: Path) -> str:
"""Pre-render Mermaid diagrams to images."""
mermaid_pattern = r"```mermaid\n(.*?)```"
matches = list(re.finditer(mermaid_pattern, content, re.DOTALL))
if not matches:
return content
print(f" Found {len(matches)} Mermaid diagram(s)")
# Check for mermaid-cli
mmdc_path = shutil.which("mmdc")
if not mmdc_path:
# Try npx
if shutil.which("npx"):
mmdc_path = "npx mmdc"
else:
print(" Warning: mermaid-cli not found, diagrams may not render")
return content
# Create temp directory for diagrams
diagrams_dir = output_dir / "mermaid-diagrams"
diagrams_dir.mkdir(parents=True, exist_ok=True)
new_content = content
for i, match in enumerate(matches):
diagram_code = match.group(1)
input_file = diagrams_dir / f"diagram-{i}.mmd"
output_file = diagrams_dir / f"diagram-{i}.png"
# Write diagram code
with open(input_file, "w") as f:
f.write(diagram_code)
# Render diagram
try:
cmd = f"{mmdc_path} -i {input_file} -o {output_file} -b white -s 2"
subprocess.run(cmd, shell=True, check=True, capture_output=True)
# Replace mermaid block with image
relative_path = output_file.relative_to(output_dir)
replacement = f""
new_content = new_content.replace(match.group(0), replacement)
print(f" Rendered diagram {i+1}")
except subprocess.CalledProcessError as e:
print(f" Warning: Failed to render diagram {i+1}: {e}")
return new_content
def _generate_cover_page(self, title: str, subtitle: str = "",
version: str = CODITECT_VERSION,
classification: str = "Technical Documentation") -> str:
"""Generate cover page HTML."""
logo_path = PROJECT_ROOT / "docs" / "00-coditect-introduction" / "assets" / "images" / "coditect-logo.png"
date_str = datetime.now().strftime("%B %d, %Y")
cover_html = f"""
{title}
{subtitle}
def generate_pdf(self, input_path: Path, output_path: Optional[Path] = None,
include_cover: bool = True, include_toc: bool = True,
title: Optional[str] = None, subtitle: str = "",
doc_type: str = "technical") -> Path:
"""Generate a publication-grade PDF from markdown."""
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
# Read markdown content
with open(input_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract frontmatter and title
frontmatter, body = self._extract_frontmatter(content)
doc_title = title or frontmatter.get("title") or self._extract_title(body)
print(f"Generating PDF: {doc_title}")
# Determine output path
if output_path is None:
output_dir = input_path.parent / "pdf-output"
output_dir.mkdir(parents=True, exist_ok=True)
output_name = input_path.stem + ".pdf"
output_path = output_dir / output_name
# Create temp directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Pre-render Mermaid diagrams
processed_content = self._render_mermaid_diagrams(body, temp_path)
# Build final document
final_content = ""
# Add cover page
if include_cover:
doc_config = self.config.get("document_types", {}).get(doc_type, {})
classification = doc_config.get("classification", "Technical Documentation")
final_content += self._generate_cover_page(doc_title, subtitle, classification=classification)
# Add table of contents
if include_toc:
final_content += self._generate_toc(processed_content)
# Add main content
final_content += processed_content
# Write processed markdown
processed_md = temp_path / "processed.md"
with open(processed_md, "w", encoding="utf-8") as f:
f.write(final_content)
# Generate PDF using available tools
success = self._generate_with_md2pdf(processed_md, output_path)
if not success:
success = self._generate_with_md_to_pdf(processed_md, output_path)
if not success:
success = self._generate_with_pandoc(processed_md, output_path)
if success:
print(f" Created: {output_path}")
return output_path
else:
raise RuntimeError("Failed to generate PDF with any available tool")
def _generate_with_md2pdf(self, input_path: Path, output_path: Path) -> bool:
"""Generate PDF using md2pdf-mermaid Python package."""
try:
from md2pdf_mermaid import md2pdf
css_path = TEMPLATES_DIR / "coditect-theme.css"
css = css_path.read_text() if css_path.exists() else None
md2pdf(
str(output_path),
md_file_path=str(input_path),
css=css,
base_url=str(input_path.parent)
)
return True
except ImportError:
return False
except Exception as e:
print(f" md2pdf-mermaid failed: {e}")
return False
def _generate_with_md_to_pdf(self, input_path: Path, output_path: Path) -> bool:
"""Generate PDF using md-to-pdf (npm package via npx)."""
if not shutil.which("npx"):
return False
try:
css_path = TEMPLATES_DIR / "coditect-theme.css"
cmd = [
"npx", "md-to-pdf",
str(input_path),
"--pdf-options", json.dumps({
"format": "A4",
"margin": {"top": "25mm", "right": "20mm", "bottom": "25mm", "left": "20mm"},
"displayHeaderFooter": True,
"headerTemplate": "<div></div>",
"footerTemplate": f'<div style="width:100%;font-size:8pt;text-align:center;color:#666;">Copyright © {COPYRIGHT_YEAR} {ORGANIZATION} | Page <span class="pageNumber"></span> of <span class="totalPages"></span></div>'
})
]
if css_path.exists():
cmd.extend(["--stylesheet", str(css_path)])
result = subprocess.run(cmd, capture_output=True, text=True)
# md-to-pdf outputs to same directory with .pdf extension
temp_output = input_path.with_suffix(".pdf")
if temp_output.exists():
shutil.move(str(temp_output), str(output_path))
return True
return False
except Exception as e:
print(f" md-to-pdf failed: {e}")
return False
def _generate_with_pandoc(self, input_path: Path, output_path: Path) -> bool:
"""Generate PDF using Pandoc (fallback)."""
if not shutil.which("pandoc"):
return False
try:
css_path = TEMPLATES_DIR / "coditect-theme.css"
cmd = [
"pandoc",
str(input_path),
"-o", str(output_path),
"--pdf-engine=wkhtmltopdf",
"-V", "geometry:margin=25mm"
]
if css_path.exists():
cmd.extend(["--css", str(css_path)])
subprocess.run(cmd, check=True, capture_output=True)
return True
except Exception as e:
print(f" Pandoc failed: {e}")
return False
def batch_generate(self, input_dir: Path, output_dir: Optional[Path] = None,
pattern: str = "*.md", **kwargs) -> List[Path]:
"""Generate PDFs for all markdown files in a directory."""
if not input_dir.is_dir():
raise ValueError(f"Not a directory: {input_dir}")
if output_dir is None:
output_dir = input_dir / "pdf-output"
output_dir.mkdir(parents=True, exist_ok=True)
md_files = list(input_dir.glob(pattern))
# Exclude README.md from batch processing
md_files = [f for f in md_files if f.name.upper() != "README.MD"]
print(f"Found {len(md_files)} markdown files in {input_dir}")
generated = []
for md_file in md_files:
try:
output_path = output_dir / (md_file.stem + ".pdf")
result = self.generate_pdf(md_file, output_path, **kwargs)
generated.append(result)
except Exception as e:
print(f" Error processing {md_file.name}: {e}")
print(f"\nGenerated {len(generated)} PDFs in {output_dir}")
return generated
def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="CODITECT Publication PDF Generator", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples:
Generate single PDF
python3 scripts/generate-publication-pdf.py docs/ARCHITECTURE.md
Generate with cover page and custom title
python3 scripts/generate-publication-pdf.py docs/README.md --cover --title "User Guide"
Batch generate all markdown files
python3 scripts/generate-publication-pdf.py --batch docs/00-coditect-introduction/
Specify output path
python3 scripts/generate-publication-pdf.py input.md --output my-doc.pdf
Copyright 2025 AZ1.AI Inc. All rights reserved. """ )
parser.add_argument("input", nargs="?", help="Input markdown file or directory (with --batch)")
parser.add_argument("--batch", action="store_true", help="Process all markdown files in directory")
parser.add_argument("--output", "-o", help="Output PDF path or directory")
parser.add_argument("--cover", action="store_true", default=True, help="Include cover page (default: True)")
parser.add_argument("--no-cover", action="store_true", help="Exclude cover page")
parser.add_argument("--toc", action="store_true", default=True, help="Include table of contents (default: True)")
parser.add_argument("--no-toc", action="store_true", help="Exclude table of contents")
parser.add_argument("--title", help="Document title (extracted from markdown if not provided)")
parser.add_argument("--subtitle", default="", help="Document subtitle")
parser.add_argument("--type", default="technical",
choices=["technical", "architecture", "user_guide", "reference", "internal"],
help="Document type for classification")
parser.add_argument("--check-deps", action="store_true", help="Check dependencies and exit")
args = parser.parse_args()
generator = CoditectPdfGenerator()
# Check dependencies
if args.check_deps:
if generator._check_dependencies():
print("All dependencies are installed.")
sys.exit(0)
else:
print("\nInstall missing dependencies and try again.")
sys.exit(1)
if not args.input:
parser.print_help()
sys.exit(1)
input_path = Path(args.input).resolve()
# Handle options
include_cover = args.cover and not args.no_cover
include_toc = args.toc and not args.no_toc
try:
if args.batch:
output_dir = Path(args.output) if args.output else None
generator.batch_generate(
input_path,
output_dir=output_dir,
include_cover=include_cover,
include_toc=include_toc,
doc_type=args.type
)
else:
output_path = Path(args.output) if args.output else None
generator.generate_pdf(
input_path,
output_path=output_path,
include_cover=include_cover,
include_toc=include_toc,
title=args.title,
subtitle=args.subtitle,
doc_type=args.type
)
print("\nPDF generation complete!")
sys.exit(0)
except FileNotFoundError as e:
print(f"Error: {e}")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if name == "main": main()