A very janky downloader for Cardboard Translations ebooks
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 218 lines 6.9 kB view raw
1from io import BytesIO 2from ebooklib.epub import IMAGE_MEDIA_TYPES 3from dataclasses import dataclass 4from ebooklib import epub 5 6import argparse 7import json 8import math 9import io 10import re 11import time 12import urllib.request 13import urllib.parse 14 15from PIL import ImageFont, Image, ImageDraw 16 17URL_ROOT = "https://www.cardboardtranslation.com/feeds/posts/default/-" 18ENTRIES_PER_FETCH = 100 19TITLE_REGEX = re.compile(r"(?<=] ).*") 20VOLUME_STRING_REGEX = re.compile(r"\[Vol.\s+(?P<num>\d)\]") 21CHAPTER_REGEX = re.compile(r".*\] Chapter (?P<num>\d+(\.\d)?)(?::.*)?") 22IMAGE_TEXT_XY = (411, 27) 23VOL_FONT = ImageFont.truetype("OreichalkonVF.ttf", 120) 24VOL_FONT.set_variation_by_name("Medium") 25 26volumes: set[int] = set() 27 28 29@dataclass 30class Entry: 31 title: str 32 volume: int 33 chapter: int 34 content: str 35 36 37entries: list[Entry] = [] 38 39parser = argparse.ArgumentParser() 40parser.add_argument("-n", "--name", help="The book to download") 41parser.add_argument( 42 "-a", "--author", help="The author name to add to the metadata", default=None 43) 44 45 46def process_orig_entry(entry: dict) -> Entry | None: 47 orig_title = entry["title"]["$t"] 48 try: 49 entry_title = TITLE_REGEX.search(orig_title).group(0) 50 except: 51 return None 52 entry_vol = int(VOLUME_STRING_REGEX.search(orig_title).group("num")) 53 entry_chap = CHAPTER_REGEX.search(orig_title).group("num") 54 volumes.add(entry_vol) 55 content = entry["content"]["$t"] 56 57 return Entry(entry_title, entry_vol, entry_chap, content) 58 59 60def root_fetch(url: str) -> int: 61 with urllib.request.urlopen(url) as r: 62 feed = json.loads(r.read()) 63 entries_count = feed["feed"]["openSearch$totalResults"]["$t"] 64 feeds_to_fetch = math.ceil(int(entries_count) / ENTRIES_PER_FETCH) 65 print(f"Fetching {feeds_to_fetch} feeds") 66 67 return feeds_to_fetch 68 69 70def fetch_entries(root: str, count: int) -> list[Entry]: 71 entries: list[Entry] = [] 72 for i in range(0, count): 73 start_index = (i * ENTRIES_PER_FETCH) + 1 74 print(f"Fetching Entries {start_index} to {start_index + 100}") 75 url = f"{root}&start-index={start_index}" 76 with urllib.request.urlopen(url) as r: 77 feed_content = json.loads(r.read()) 78 feed_entries = feed_content["feed"]["entry"] 79 for entry in feed_entries: 80 ent = process_orig_entry(entry) 81 if type(ent) is Entry: 82 entries.append(ent) 83 84 return entries 85 86 87def normalize_content(orig_content: str, vol_num: int) -> str: 88 normalized = orig_content.replace("<p><br /></p>", "") 89 paras = normalized.split("</p>", maxsplit=2) 90 if paras[0].__contains__("Chapter 0"): 91 paras.pop(0) 92 first_para = paras[0].replace("<p>", "<h2>") + "</h2>" 93 elif paras[0].__contains__("Volume 3") and vol_num == 3: 94 first_para = "<h2>Chapter 1: A New Journey, Starting with New Clothes</h2>" 95 paras.pop(1) 96 else: 97 first_para = ( 98 paras[0].replace("<p>", "<h2>").replace(f"[Vol. {vol_num}] ", "") + "</h2>" 99 ) 100 paras[1] = paras[1] + "</p>" 101 paras[0] = first_para 102 normalized = "".join(paras) 103 return normalized 104 105 106def create_cover(vol_num: int): 107 with Image.open("cover.jpg") as im: 108 drw = ImageDraw.Draw(im) 109 drw.text( 110 IMAGE_TEXT_XY, 111 f"Volume {vol_num}", 112 font=VOL_FONT, 113 fill=(0, 0, 0, 255), 114 ) 115 io_file = io.BytesIO() 116 im.save(io_file, format="PNG") 117 return io_file 118 119 120def create_chapter(entry: Entry) -> epub.EpubHtml: 121 is_prologue = False if not "Prologue" in entry.chapter else True 122 chap_name = ( 123 f"Chapter {entry.chapter}" if not "Prologue" in entry.chapter else "Prologue" 124 ) 125 ht = epub.EpubHtml( 126 uid=f"ch{entry.chapter.replace('.', '_')}", 127 title=chap_name, 128 file_name=f"ch{entry.chapter.replace('.', '_')}.xhtml", 129 lang="en", 130 ) 131 ht.set_content(normalize_content(entry.content, entry.volume)) 132 133 return ht 134 135 136def create_volumes( 137 vol_count: int, entries: list[Entry], book_name: str, author: str | None 138): 139 for v in range(1, vol_count + 1): 140 print(f"Creating Volume {v}") 141 vol_entries = [e for e in entries if e.volume == v] 142 vol_entries.reverse() 143 name_filesafe = book_name.replace(" ", "_") 144 book = epub.EpubBook() 145 book.set_identifier(f"{name_filesafe}_Vol_{v}") 146 book.set_title(f"{book_name}, Volume {v}") 147 book.set_language("English") 148 cover_io = create_cover(v) 149 book.set_cover("cover.png", cover_io.getvalue()) 150 if author: 151 book.add_author(author) 152 book.add_metadata("DC", "creator", "Cardboard Translation", {"id": "cardboard"}) 153 book.add_metadata("DC", "creator", "dish", {"id": "dish"}) 154 book.add_metadata("DC", "publisher", "Cardboard Translation") 155 book.add_metadata( 156 None, 157 "meta", 158 "aut", 159 {"property": "role", "scheme": "marc:relators", "refines": "#creator"}, 160 ) 161 book.add_metadata( 162 None, 163 "meta", 164 "trl", 165 {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"}, 166 ) 167 book.add_metadata( 168 None, 169 "meta", 170 "aut", 171 {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"}, 172 ) 173 book.add_metadata( 174 None, 175 "meta", 176 "com", 177 {"property": "role", "scheme": "marc:relators", "refines": "#dish"}, 178 ) 179 with open("description.txt", "r") as f: 180 book.add_metadata("DC", "description", f.read()) 181 # Series Metadata 182 book.add_metadata( 183 None, "meta", book_name, {"property": "belongs-to-collection", "id": "c01"} 184 ) 185 book.add_metadata( 186 None, "meta", "series", {"refines": "#c01", "property": "collection-type"} 187 ) 188 book.add_metadata( 189 None, "meta", str(v), {"refines": "#c01", "property": "group-position"} 190 ) 191 book.spine = ["nav"] 192 193 for ch in vol_entries: 194 chap: epub.EpubHtml = create_chapter(ch) 195 book.add_item(chap) 196 book.toc.append(chap) 197 book.spine.append(chap) 198 199 book.add_item(epub.EpubNcx()) 200 book.add_item(epub.EpubNav()) 201 202 epub.write_epub(f"{name_filesafe}_Vol_{v}.epub", book) 203 204 205def main(): 206 args = parser.parse_args() 207 translation_name = urllib.parse.quote(args.name) 208 book_root = ( 209 f"{URL_ROOT}/{translation_name}?alt=json&max-results={ENTRIES_PER_FETCH}" 210 ) 211 count_to_fetch = root_fetch(book_root) 212 entries = fetch_entries(book_root, count_to_fetch) 213 vol_count = len(volumes) 214 create_volumes(vol_count, entries, args.name, args.author) 215 216 217if __name__ == "__main__": 218 main()