main.py at main · pyrox.dev/cardboard-downloader

pyrox.dev / cardboard-downloader
fork
A very janky downloader for Cardboard Translations ebooks
fork
cardboard-downloader / main.py
at main 218 lines 6.9 kB view raw
wrap content
dish initial commit 10hrs ago
f5d3584d
  1from io import BytesIO
  2from ebooklib.epub import IMAGE_MEDIA_TYPES
  3from dataclasses import dataclass
  4from ebooklib import epub
  5
  6import argparse
  7import json
  8import math
  9import io
 10import re
 11import time
 12import urllib.request
 13import urllib.parse
 14
 15from PIL import ImageFont, Image, ImageDraw
 16
 17URL_ROOT = "https://www.cardboardtranslation.com/feeds/posts/default/-"
 18ENTRIES_PER_FETCH = 100
 19TITLE_REGEX = re.compile(r"(?<=] ).*")
 20VOLUME_STRING_REGEX = re.compile(r"\[Vol.\s+(?P<num>\d)\]")
 21CHAPTER_REGEX = re.compile(r".*\] Chapter (?P<num>\d+(\.\d)?)(?::.*)?")
 22IMAGE_TEXT_XY = (411, 27)
 23VOL_FONT = ImageFont.truetype("OreichalkonVF.ttf", 120)
 24VOL_FONT.set_variation_by_name("Medium")
 25
 26volumes: set[int] = set()
 27
 28
 29@dataclass
 30class Entry:
 31    title: str
 32    volume: int
 33    chapter: int
 34    content: str
 35
 36
 37entries: list[Entry] = []
 38
 39parser = argparse.ArgumentParser()
 40parser.add_argument("-n", "--name", help="The book to download")
 41parser.add_argument(
 42    "-a", "--author", help="The author name to add to the metadata", default=None
 43)
 44
 45
 46def process_orig_entry(entry: dict) -> Entry | None:
 47    orig_title = entry["title"]["$t"]
 48    try:
 49        entry_title = TITLE_REGEX.search(orig_title).group(0)
 50    except:
 51        return None
 52    entry_vol = int(VOLUME_STRING_REGEX.search(orig_title).group("num"))
 53    entry_chap = CHAPTER_REGEX.search(orig_title).group("num")
 54    volumes.add(entry_vol)
 55    content = entry["content"]["$t"]
 56
 57    return Entry(entry_title, entry_vol, entry_chap, content)
 58
 59
 60def root_fetch(url: str) -> int:
 61    with urllib.request.urlopen(url) as r:
 62        feed = json.loads(r.read())
 63        entries_count = feed["feed"]["openSearch$totalResults"]["$t"]
 64        feeds_to_fetch = math.ceil(int(entries_count) / ENTRIES_PER_FETCH)
 65        print(f"Fetching {feeds_to_fetch} feeds")
 66
 67    return feeds_to_fetch
 68
 69
 70def fetch_entries(root: str, count: int) -> list[Entry]:
 71    entries: list[Entry] = []
 72    for i in range(0, count):
 73        start_index = (i * ENTRIES_PER_FETCH) + 1
 74        print(f"Fetching Entries {start_index} to {start_index + 100}")
 75        url = f"{root}&start-index={start_index}"
 76        with urllib.request.urlopen(url) as r:
 77            feed_content = json.loads(r.read())
 78            feed_entries = feed_content["feed"]["entry"]
 79            for entry in feed_entries:
 80                ent = process_orig_entry(entry)
 81                if type(ent) is Entry:
 82                    entries.append(ent)
 83
 84    return entries
 85
 86
 87def normalize_content(orig_content: str, vol_num: int) -> str:
 88    normalized = orig_content.replace("<p><br /></p>", "")
 89    paras = normalized.split("</p>", maxsplit=2)
 90    if paras[0].__contains__("Chapter 0"):
 91        paras.pop(0)
 92        first_para = paras[0].replace("<p>", "<h2>") + "</h2>"
 93    elif paras[0].__contains__("Volume 3") and vol_num == 3:
 94        first_para = "<h2>Chapter 1: A New Journey, Starting with New Clothes</h2>"
 95        paras.pop(1)
 96    else:
 97        first_para = (
 98            paras[0].replace("<p>", "<h2>").replace(f"[Vol. {vol_num}] ", "") + "</h2>"
 99        )
100        paras[1] = paras[1] + "</p>"
101    paras[0] = first_para
102    normalized = "".join(paras)
103    return normalized
104
105
106def create_cover(vol_num: int):
107    with Image.open("cover.jpg") as im:
108        drw = ImageDraw.Draw(im)
109        drw.text(
110            IMAGE_TEXT_XY,
111            f"Volume  {vol_num}",
112            font=VOL_FONT,
113            fill=(0, 0, 0, 255),
114        )
115    io_file = io.BytesIO()
116    im.save(io_file, format="PNG")
117    return io_file
118
119
120def create_chapter(entry: Entry) -> epub.EpubHtml:
121    is_prologue = False if not "Prologue" in entry.chapter else True
122    chap_name = (
123        f"Chapter {entry.chapter}" if not "Prologue" in entry.chapter else "Prologue"
124    )
125    ht = epub.EpubHtml(
126        uid=f"ch{entry.chapter.replace('.', '_')}",
127        title=chap_name,
128        file_name=f"ch{entry.chapter.replace('.', '_')}.xhtml",
129        lang="en",
130    )
131    ht.set_content(normalize_content(entry.content, entry.volume))
132
133    return ht
134
135
136def create_volumes(
137    vol_count: int, entries: list[Entry], book_name: str, author: str | None
138):
139    for v in range(1, vol_count + 1):
140        print(f"Creating Volume {v}")
141        vol_entries = [e for e in entries if e.volume == v]
142        vol_entries.reverse()
143        name_filesafe = book_name.replace(" ", "_")
144        book = epub.EpubBook()
145        book.set_identifier(f"{name_filesafe}_Vol_{v}")
146        book.set_title(f"{book_name}, Volume {v}")
147        book.set_language("English")
148        cover_io = create_cover(v)
149        book.set_cover("cover.png", cover_io.getvalue())
150        if author:
151            book.add_author(author)
152        book.add_metadata("DC", "creator", "Cardboard Translation", {"id": "cardboard"})
153        book.add_metadata("DC", "creator", "dish", {"id": "dish"})
154        book.add_metadata("DC", "publisher", "Cardboard Translation")
155        book.add_metadata(
156            None,
157            "meta",
158            "aut",
159            {"property": "role", "scheme": "marc:relators", "refines": "#creator"},
160        )
161        book.add_metadata(
162            None,
163            "meta",
164            "trl",
165            {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"},
166        )
167        book.add_metadata(
168            None,
169            "meta",
170            "aut",
171            {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"},
172        )
173        book.add_metadata(
174            None,
175            "meta",
176            "com",
177            {"property": "role", "scheme": "marc:relators", "refines": "#dish"},
178        )
179        with open("description.txt", "r") as f:
180            book.add_metadata("DC", "description", f.read())
181        # Series Metadata
182        book.add_metadata(
183            None, "meta", book_name, {"property": "belongs-to-collection", "id": "c01"}
184        )
185        book.add_metadata(
186            None, "meta", "series", {"refines": "#c01", "property": "collection-type"}
187        )
188        book.add_metadata(
189            None, "meta", str(v), {"refines": "#c01", "property": "group-position"}
190        )
191        book.spine = ["nav"]
192
193        for ch in vol_entries:
194            chap: epub.EpubHtml = create_chapter(ch)
195            book.add_item(chap)
196            book.toc.append(chap)
197            book.spine.append(chap)
198
199        book.add_item(epub.EpubNcx())
200        book.add_item(epub.EpubNav())
201
202        epub.write_epub(f"{name_filesafe}_Vol_{v}.epub", book)
203
204
205def main():
206    args = parser.parse_args()
207    translation_name = urllib.parse.quote(args.name)
208    book_root = (
209        f"{URL_ROOT}/{translation_name}?alt=json&max-results={ENTRIES_PER_FETCH}"
210    )
211    count_to_fetch = root_fetch(book_root)
212    entries = fetch_entries(book_root, count_to_fetch)
213    vol_count = len(volumes)
214    create_volumes(vol_count, entries, args.name, args.author)
215
216
217if __name__ == "__main__":
218    main()
Configure Feed

Configure Feed