from io import BytesIO from ebooklib.epub import IMAGE_MEDIA_TYPES from dataclasses import dataclass from ebooklib import epub import argparse import json import math import io import re import time import urllib.request import urllib.parse from PIL import ImageFont, Image, ImageDraw URL_ROOT = "https://www.cardboardtranslation.com/feeds/posts/default/-" ENTRIES_PER_FETCH = 100 TITLE_REGEX = re.compile(r"(?<=] ).*") VOLUME_STRING_REGEX = re.compile(r"\[Vol.\s+(?P\d)\]") CHAPTER_REGEX = re.compile(r".*\] Chapter (?P\d+(\.\d)?)(?::.*)?") IMAGE_TEXT_XY = (411, 27) VOL_FONT = ImageFont.truetype("OreichalkonVF.ttf", 120) VOL_FONT.set_variation_by_name("Medium") volumes: set[int] = set() @dataclass class Entry: title: str volume: int chapter: int content: str entries: list[Entry] = [] parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", help="The book to download") parser.add_argument( "-a", "--author", help="The author name to add to the metadata", default=None ) def process_orig_entry(entry: dict) -> Entry | None: orig_title = entry["title"]["$t"] try: entry_title = TITLE_REGEX.search(orig_title).group(0) except: return None entry_vol = int(VOLUME_STRING_REGEX.search(orig_title).group("num")) entry_chap = CHAPTER_REGEX.search(orig_title).group("num") volumes.add(entry_vol) content = entry["content"]["$t"] return Entry(entry_title, entry_vol, entry_chap, content) def root_fetch(url: str) -> int: with urllib.request.urlopen(url) as r: feed = json.loads(r.read()) entries_count = feed["feed"]["openSearch$totalResults"]["$t"] feeds_to_fetch = math.ceil(int(entries_count) / ENTRIES_PER_FETCH) print(f"Fetching {feeds_to_fetch} feeds") return feeds_to_fetch def fetch_entries(root: str, count: int) -> list[Entry]: entries: list[Entry] = [] for i in range(0, count): start_index = (i * ENTRIES_PER_FETCH) + 1 print(f"Fetching Entries {start_index} to {start_index + 100}") url = f"{root}&start-index={start_index}" with urllib.request.urlopen(url) as r: feed_content = json.loads(r.read()) feed_entries = feed_content["feed"]["entry"] for entry in feed_entries: ent = process_orig_entry(entry) if type(ent) is Entry: entries.append(ent) return entries def normalize_content(orig_content: str, vol_num: int) -> str: normalized = orig_content.replace("


", "") paras = normalized.split("

", maxsplit=2) if paras[0].__contains__("Chapter 0"): paras.pop(0) first_para = paras[0].replace("

", "

") + "

" elif paras[0].__contains__("Volume 3") and vol_num == 3: first_para = "

Chapter 1: A New Journey, Starting with New Clothes

" paras.pop(1) else: first_para = ( paras[0].replace("

", "

").replace(f"[Vol. {vol_num}] ", "") + "

" ) paras[1] = paras[1] + "

" paras[0] = first_para normalized = "".join(paras) return normalized def create_cover(vol_num: int): with Image.open("cover.jpg") as im: drw = ImageDraw.Draw(im) drw.text( IMAGE_TEXT_XY, f"Volume {vol_num}", font=VOL_FONT, fill=(0, 0, 0, 255), ) io_file = io.BytesIO() im.save(io_file, format="PNG") return io_file def create_chapter(entry: Entry) -> epub.EpubHtml: is_prologue = False if not "Prologue" in entry.chapter else True chap_name = ( f"Chapter {entry.chapter}" if not "Prologue" in entry.chapter else "Prologue" ) ht = epub.EpubHtml( uid=f"ch{entry.chapter.replace('.', '_')}", title=chap_name, file_name=f"ch{entry.chapter.replace('.', '_')}.xhtml", lang="en", ) ht.set_content(normalize_content(entry.content, entry.volume)) return ht def create_volumes( vol_count: int, entries: list[Entry], book_name: str, author: str | None ): for v in range(1, vol_count + 1): print(f"Creating Volume {v}") vol_entries = [e for e in entries if e.volume == v] vol_entries.reverse() name_filesafe = book_name.replace(" ", "_") book = epub.EpubBook() book.set_identifier(f"{name_filesafe}_Vol_{v}") book.set_title(f"{book_name}, Volume {v}") book.set_language("English") cover_io = create_cover(v) book.set_cover("cover.png", cover_io.getvalue()) if author: book.add_author(author) book.add_metadata("DC", "creator", "Cardboard Translation", {"id": "cardboard"}) book.add_metadata("DC", "creator", "dish", {"id": "dish"}) book.add_metadata("DC", "publisher", "Cardboard Translation") book.add_metadata( None, "meta", "aut", {"property": "role", "scheme": "marc:relators", "refines": "#creator"}, ) book.add_metadata( None, "meta", "trl", {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"}, ) book.add_metadata( None, "meta", "aut", {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"}, ) book.add_metadata( None, "meta", "com", {"property": "role", "scheme": "marc:relators", "refines": "#dish"}, ) with open("description.txt", "r") as f: book.add_metadata("DC", "description", f.read()) # Series Metadata book.add_metadata( None, "meta", book_name, {"property": "belongs-to-collection", "id": "c01"} ) book.add_metadata( None, "meta", "series", {"refines": "#c01", "property": "collection-type"} ) book.add_metadata( None, "meta", str(v), {"refines": "#c01", "property": "group-position"} ) book.spine = ["nav"] for ch in vol_entries: chap: epub.EpubHtml = create_chapter(ch) book.add_item(chap) book.toc.append(chap) book.spine.append(chap) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) epub.write_epub(f"{name_filesafe}_Vol_{v}.epub", book) def main(): args = parser.parse_args() translation_name = urllib.parse.quote(args.name) book_root = ( f"{URL_ROOT}/{translation_name}?alt=json&max-results={ENTRIES_PER_FETCH}" ) count_to_fetch = root_fetch(book_root) entries = fetch_entries(book_root, count_to_fetch) vol_count = len(volumes) create_volumes(vol_count, entries, args.name, args.author) if __name__ == "__main__": main()