#!/usr/bin/env python3
"""
Create PDF using Playwright's native PDF printing (creates vectors!).
"""
import asyncio
from pathlib import Path
from PIL import Image
from playwright.async_api import async_playwright
async def main():
script_dir = Path(__file__).parent
svg_dir = script_dir / "svg_layers"
webp_dir = script_dir / "webp_highres"
pdf_dir = script_dir / "pdf_pages"
pdf_dir.mkdir(exist_ok=True)
print("Creating vector PDFs using Playwright Print-to-PDF...")
print()
# Get page dimensions
sample_webp = webp_dir / "page_0020_3.webp"
img = Image.open(sample_webp)
page_width, page_height = img.size
# Convert pixels to inches (assuming 96 DPI)
width_inches = page_width / 96
height_inches = page_height / 96
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
# Generate PDF for each page
for page_num in range(1, 341):
if page_num % 10 == 0:
print(f" Creating PDF: {page_num}/340...")
svg_file = svg_dir / f"page_{page_num:04d}.svg"
webp_file = webp_dir / f"page_{page_num:04d}_3.webp"
# Create HTML
html = f"""
"""
if webp_file.exists():
html += f'
\n'
if svg_file.exists():
html += f'
\n'
html += """
"""
# Save HTML
html_file = pdf_dir / f"page_{page_num:04d}.html"
with open(html_file, 'w') as f:
f.write(html)
# Navigate and print to PDF
await page.goto(f"file://{html_file.absolute()}")
await page.wait_for_load_state('networkidle')
pdf_file = pdf_dir / f"page_{page_num:04d}.pdf"
await page.pdf(
path=str(pdf_file),
width=f"{width_inches}in",
height=f"{height_inches}in",
print_background=True,
margin={'top': '0', 'bottom': '0', 'left': '0', 'right': '0'}
)
# Cleanup HTML
html_file.unlink()
await browser.close()
print()
print("Merging individual PDFs...")
# Merge all PDFs using PyPDF2
from PyPDF2 import PdfMerger
merger = PdfMerger()
pdf_files = sorted(pdf_dir.glob("page_*.pdf"))
for i, pdf_file in enumerate(pdf_files, 1):
if i % 50 == 0:
print(f" Merging: {i}/{len(pdf_files)}...")
merger.append(str(pdf_file))
output_pdf = script_dir / "Invitation_to_Cybersecurity.pdf"
merger.write(str(output_pdf))
merger.close()
# Cleanup individual PDFs
print("\nCleaning up...")
for pdf_file in pdf_files:
pdf_file.unlink()
pdf_dir.rmdir()
file_size = output_pdf.stat().st_size / 1024 / 1024
print()
print(f"✓ Vector PDF created!")
print(f" Location: {output_pdf}")
print(f" Pages: {len(pdf_files)}")
print(f" Size: {file_size:.1f} MB")
print()
print("Text should be vector with embedded fonts!")
if __name__ == "__main__":
asyncio.run(main())