scripts-generate-publication-pdf

#!/usr/bin/env python3 """

title: "CODITECT branding constants" component_type: script version: "1.0.0" audience: contributor status: stable summary: "CODITECT Publication PDF Generator" keywords: ['generate', 'generation', 'pdf', 'publication'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "generate-publication-pdf.py" language: python executable: true usage: "python3 scripts/generate-publication-pdf.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

CODITECT Publication PDF Generator

Professional-grade PDF generation from Markdown documents with:

Mermaid diagram rendering
Consistent CODITECT branding
Headers, footers, page numbers
Table of contents generation
Cover page support

Usage: python3 scripts/generate-publication-pdf.py <input.md> [options] python3 scripts/generate-publication-pdf.py --batch [options]

Examples: # Single file python3 scripts/generate-publication-pdf.py docs/00-coditect-introduction/WHAT-IS-CODITECT-CORE.md

# With cover page
python3 scripts/generate-publication-pdf.py docs/ARCHITECTURE.md --cover --title "System Architecture"

# Batch processing
python3 scripts/generate-publication-pdf.py --batch docs/00-coditect-introduction/ --output pdf-output/

# Custom output
python3 scripts/generate-publication-pdf.py input.md --output my-document.pdf

"""

import argparse import json import os import re import shutil import subprocess import sys import tempfile from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any, List

CODITECT branding constants

CODITECT_VERSION = "1.7.2" ORGANIZATION = "AZ1.AI Inc." AUTHOR = "Hal Casteel, Founder/CEO/CTO" COPYRIGHT_YEAR = 2025

Get script directory and project root

SCRIPT_DIR = Path(file).parent.resolve() PROJECT_ROOT = SCRIPT_DIR.parent CONFIG_PATH = PROJECT_ROOT / "docs" / "99-publishing" / "config" / "pdf-generation-config.json" TEMPLATES_DIR = PROJECT_ROOT / "docs" / "99-publishing" / "templates"

class CoditectPdfGenerator: """Professional PDF generator with CODITECT branding."""

def __init__(self, config_path: Optional[Path] = None):
    """Initialize the generator with configuration."""
    self.config = self._load_config(config_path or CONFIG_PATH)
    self.temp_dir = None

def _load_config(self, config_path: Path) -> Dict[str, Any]:
    """Load configuration from JSON file."""
    if config_path.exists():
        with open(config_path, "r") as f:
            return json.load(f)
    else:
        print(f"Warning: Config not found at {config_path}, using defaults")
        return self._default_config()

def _default_config(self) -> Dict[str, Any]:
    """Return default configuration."""
    return {
        "defaults": {
            "page_size": "A4",
            "include_cover": True,
            "include_toc": True,
            "include_header": True,
            "include_footer": True,
            "page_numbers": True
        },
        "branding": {
            "organization": ORGANIZATION,
            "author": AUTHOR,
            "copyright_year": COPYRIGHT_YEAR
        },
        "mermaid": {
            "enabled": True,
            "theme": "default",
            "output_format": "png",
            "scale": 2
        }
    }

def _check_dependencies(self) -> bool:
    """Check if required tools are installed."""
    dependencies = {
        "md2pdf-mermaid": "pip install md2pdf-mermaid",
        "mmdc": "npm install -g @mermaid-js/mermaid-cli"  # mermaid-cli
    }

    missing = []
    for tool, install_cmd in dependencies.items():
        if tool == "md2pdf-mermaid":
            # Check Python package
            try:
                import md2pdf_mermaid
            except ImportError:
                missing.append((tool, install_cmd))
        else:
            # Check CLI tool
            if shutil.which(tool) is None:
                missing.append((tool, install_cmd))

    if missing:
        print("Missing dependencies:")
        for tool, cmd in missing:
            print(f"  - {tool}: Install with '{cmd}'")

        # Try alternative approach with md-to-pdf
        if shutil.which("md-to-pdf") or shutil.which("npx"):
            print("\nAlternative: Using md-to-pdf (npx md-to-pdf)")
            return True

        return False

    return True

def _extract_frontmatter(self, content: str) -> tuple:
    """Extract YAML frontmatter from markdown content."""
    frontmatter = {}
    body = content

    # Check for YAML frontmatter
    if content.startswith("---"):
        parts = content.split("---", 2)
        if len(parts) >= 3:
            try:
                import yaml
                frontmatter = yaml.safe_load(parts[1]) or {}
            except ImportError:
                # Parse simple key: value pairs
                for line in parts[1].strip().split("\n"):
                    if ":" in line:
                        key, value = line.split(":", 1)
                        frontmatter[key.strip()] = value.strip()
            except Exception:
                pass
            body = parts[2]

    return frontmatter, body

def _extract_title(self, content: str) -> str:
    """Extract title from markdown content."""
    # Try to find first H1
    match = re.search(r"^#\s+(.+)$", content, re.MULTILINE)
    if match:
        return match.group(1).strip()
    return "Untitled Document"

def _generate_toc(self, content: str) -> str:
    """Generate table of contents from markdown headings."""
    toc_lines = ["## Table of Contents\n"]
    headings = re.findall(r"^(#{2,4})\s+(.+)$", content, re.MULTILINE)

    for level_markers, heading in headings:
        level = len(level_markers) - 2  # H2 = 0, H3 = 1, H4 = 2
        indent = "  " * level
        # Create anchor link
        anchor = re.sub(r"[^\w\s-]", "", heading.lower())
        anchor = re.sub(r"\s+", "-", anchor)
        toc_lines.append(f"{indent}- [{heading}](#{anchor})")

    return "\n".join(toc_lines) + "\n\n---\n\n"

def _render_mermaid_diagrams(self, content: str, output_dir: Path) -> str:
    """Pre-render Mermaid diagrams to images."""
    mermaid_pattern = r"```mermaid\n(.*?)```"
    matches = list(re.finditer(mermaid_pattern, content, re.DOTALL))

    if not matches:
        return content

    print(f"  Found {len(matches)} Mermaid diagram(s)")

    # Check for mermaid-cli
    mmdc_path = shutil.which("mmdc")
    if not mmdc_path:
        # Try npx
        if shutil.which("npx"):
            mmdc_path = "npx mmdc"
        else:
            print("  Warning: mermaid-cli not found, diagrams may not render")
            return content

    # Create temp directory for diagrams
    diagrams_dir = output_dir / "mermaid-diagrams"
    diagrams_dir.mkdir(parents=True, exist_ok=True)

    new_content = content
    for i, match in enumerate(matches):
        diagram_code = match.group(1)
        input_file = diagrams_dir / f"diagram-{i}.mmd"
        output_file = diagrams_dir / f"diagram-{i}.png"

        # Write diagram code
        with open(input_file, "w") as f:
            f.write(diagram_code)

        # Render diagram
        try:
            cmd = f"{mmdc_path} -i {input_file} -o {output_file} -b white -s 2"
            subprocess.run(cmd, shell=True, check=True, capture_output=True)

            # Replace mermaid block with image
            relative_path = output_file.relative_to(output_dir)
            replacement = f"![Diagram {i+1}]({relative_path})"
            new_content = new_content.replace(match.group(0), replacement)
            print(f"  Rendered diagram {i+1}")
        except subprocess.CalledProcessError as e:
            print(f"  Warning: Failed to render diagram {i+1}: {e}")

    return new_content

def _generate_cover_page(self, title: str, subtitle: str = "",
                         version: str = CODITECT_VERSION,
                         classification: str = "Technical Documentation") -> str:
    """Generate cover page HTML."""
    logo_path = PROJECT_ROOT / "docs" / "00-coditect-introduction" / "assets" / "images" / "coditect-logo.png"

    date_str = datetime.now().strftime("%B %d, %Y")

    cover_html = f"""

CODITECT

{title}

{subtitle}

Author: {AUTHOR}

Organization: {ORGANIZATION}

Date: {date_str}

Version {version}

""" return cover_html

def generate_pdf(self, input_path: Path, output_path: Optional[Path] = None,
                 include_cover: bool = True, include_toc: bool = True,
                 title: Optional[str] = None, subtitle: str = "",
                 doc_type: str = "technical") -> Path:
    """Generate a publication-grade PDF from markdown."""

    if not input_path.exists():
        raise FileNotFoundError(f"Input file not found: {input_path}")

    # Read markdown content
    with open(input_path, "r", encoding="utf-8") as f:
        content = f.read()

    # Extract frontmatter and title
    frontmatter, body = self._extract_frontmatter(content)
    doc_title = title or frontmatter.get("title") or self._extract_title(body)

    print(f"Generating PDF: {doc_title}")

    # Determine output path
    if output_path is None:
        output_dir = input_path.parent / "pdf-output"
        output_dir.mkdir(parents=True, exist_ok=True)
        output_name = input_path.stem + ".pdf"
        output_path = output_dir / output_name

    # Create temp directory for processing
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)

        # Pre-render Mermaid diagrams
        processed_content = self._render_mermaid_diagrams(body, temp_path)

        # Build final document
        final_content = ""

        # Add cover page
        if include_cover:
            doc_config = self.config.get("document_types", {}).get(doc_type, {})
            classification = doc_config.get("classification", "Technical Documentation")
            final_content += self._generate_cover_page(doc_title, subtitle, classification=classification)

        # Add table of contents
        if include_toc:
            final_content += self._generate_toc(processed_content)

        # Add main content
        final_content += processed_content

        # Write processed markdown
        processed_md = temp_path / "processed.md"
        with open(processed_md, "w", encoding="utf-8") as f:
            f.write(final_content)

        # Generate PDF using available tools
        success = self._generate_with_md2pdf(processed_md, output_path)

        if not success:
            success = self._generate_with_md_to_pdf(processed_md, output_path)

        if not success:
            success = self._generate_with_pandoc(processed_md, output_path)

        if success:
            print(f"  Created: {output_path}")
            return output_path
        else:
            raise RuntimeError("Failed to generate PDF with any available tool")

def _generate_with_md2pdf(self, input_path: Path, output_path: Path) -> bool:
    """Generate PDF using md2pdf-mermaid Python package."""
    try:
        from md2pdf_mermaid import md2pdf

        css_path = TEMPLATES_DIR / "coditect-theme.css"
        css = css_path.read_text() if css_path.exists() else None

        md2pdf(
            str(output_path),
            md_file_path=str(input_path),
            css=css,
            base_url=str(input_path.parent)
        )
        return True
    except ImportError:
        return False
    except Exception as e:
        print(f"  md2pdf-mermaid failed: {e}")
        return False

def _generate_with_md_to_pdf(self, input_path: Path, output_path: Path) -> bool:
    """Generate PDF using md-to-pdf (npm package via npx)."""
    if not shutil.which("npx"):
        return False

    try:
        css_path = TEMPLATES_DIR / "coditect-theme.css"

        cmd = [
            "npx", "md-to-pdf",
            str(input_path),
            "--pdf-options", json.dumps({
                "format": "A4",
                "margin": {"top": "25mm", "right": "20mm", "bottom": "25mm", "left": "20mm"},
                "displayHeaderFooter": True,
                "headerTemplate": "<div></div>",
                "footerTemplate": f'<div style="width:100%;font-size:8pt;text-align:center;color:#666;">Copyright © {COPYRIGHT_YEAR} {ORGANIZATION} | Page <span class="pageNumber"></span> of <span class="totalPages"></span></div>'
            })
        ]

        if css_path.exists():
            cmd.extend(["--stylesheet", str(css_path)])

        result = subprocess.run(cmd, capture_output=True, text=True)

        # md-to-pdf outputs to same directory with .pdf extension
        temp_output = input_path.with_suffix(".pdf")
        if temp_output.exists():
            shutil.move(str(temp_output), str(output_path))
            return True

        return False
    except Exception as e:
        print(f"  md-to-pdf failed: {e}")
        return False

def _generate_with_pandoc(self, input_path: Path, output_path: Path) -> bool:
    """Generate PDF using Pandoc (fallback)."""
    if not shutil.which("pandoc"):
        return False

    try:
        css_path = TEMPLATES_DIR / "coditect-theme.css"

        cmd = [
            "pandoc",
            str(input_path),
            "-o", str(output_path),
            "--pdf-engine=wkhtmltopdf",
            "-V", "geometry:margin=25mm"
        ]

        if css_path.exists():
            cmd.extend(["--css", str(css_path)])

        subprocess.run(cmd, check=True, capture_output=True)
        return True
    except Exception as e:
        print(f"  Pandoc failed: {e}")
        return False

def batch_generate(self, input_dir: Path, output_dir: Optional[Path] = None,
                   pattern: str = "*.md", **kwargs) -> List[Path]:
    """Generate PDFs for all markdown files in a directory."""
    if not input_dir.is_dir():
        raise ValueError(f"Not a directory: {input_dir}")

    if output_dir is None:
        output_dir = input_dir / "pdf-output"

    output_dir.mkdir(parents=True, exist_ok=True)

    md_files = list(input_dir.glob(pattern))
    # Exclude README.md from batch processing
    md_files = [f for f in md_files if f.name.upper() != "README.MD"]

    print(f"Found {len(md_files)} markdown files in {input_dir}")

    generated = []
    for md_file in md_files:
        try:
            output_path = output_dir / (md_file.stem + ".pdf")
            result = self.generate_pdf(md_file, output_path, **kwargs)
            generated.append(result)
        except Exception as e:
            print(f"  Error processing {md_file.name}: {e}")

    print(f"\nGenerated {len(generated)} PDFs in {output_dir}")
    return generated

def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="CODITECT Publication PDF Generator", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples:

Generate single PDF

python3 scripts/generate-publication-pdf.py docs/ARCHITECTURE.md

Generate with cover page and custom title

python3 scripts/generate-publication-pdf.py docs/README.md --cover --title "User Guide"

Batch generate all markdown files

python3 scripts/generate-publication-pdf.py --batch docs/00-coditect-introduction/

Specify output path

python3 scripts/generate-publication-pdf.py input.md --output my-doc.pdf

parser.add_argument("input", nargs="?", help="Input markdown file or directory (with --batch)")
parser.add_argument("--batch", action="store_true", help="Process all markdown files in directory")
parser.add_argument("--output", "-o", help="Output PDF path or directory")
parser.add_argument("--cover", action="store_true", default=True, help="Include cover page (default: True)")
parser.add_argument("--no-cover", action="store_true", help="Exclude cover page")
parser.add_argument("--toc", action="store_true", default=True, help="Include table of contents (default: True)")
parser.add_argument("--no-toc", action="store_true", help="Exclude table of contents")
parser.add_argument("--title", help="Document title (extracted from markdown if not provided)")
parser.add_argument("--subtitle", default="", help="Document subtitle")
parser.add_argument("--type", default="technical",
                    choices=["technical", "architecture", "user_guide", "reference", "internal"],
                    help="Document type for classification")
parser.add_argument("--check-deps", action="store_true", help="Check dependencies and exit")

args = parser.parse_args()

generator = CoditectPdfGenerator()

# Check dependencies
if args.check_deps:
    if generator._check_dependencies():
        print("All dependencies are installed.")
        sys.exit(0)
    else:
        print("\nInstall missing dependencies and try again.")
        sys.exit(1)

if not args.input:
    parser.print_help()
    sys.exit(1)

input_path = Path(args.input).resolve()

# Handle options
include_cover = args.cover and not args.no_cover
include_toc = args.toc and not args.no_toc

try:
    if args.batch:
        output_dir = Path(args.output) if args.output else None
        generator.batch_generate(
            input_path,
            output_dir=output_dir,
            include_cover=include_cover,
            include_toc=include_toc,
            doc_type=args.type
        )
    else:
        output_path = Path(args.output) if args.output else None
        generator.generate_pdf(
            input_path,
            output_path=output_path,
            include_cover=include_cover,
            include_toc=include_toc,
            title=args.title,
            subtitle=args.subtitle,
            doc_type=args.type
        )

    print("\nPDF generation complete!")
    sys.exit(0)

except FileNotFoundError as e:
    print(f"Error: {e}")
    sys.exit(1)
except Exception as e:
    print(f"Error: {e}")
    sys.exit(1)

if name == "main": main()

#!/usr/bin/env python3 """​