A very janky downloader for Cardboard Translations ebooks
1from io import BytesIO
2from ebooklib.epub import IMAGE_MEDIA_TYPES
3from dataclasses import dataclass
4from ebooklib import epub
5
6import argparse
7import json
8import math
9import io
10import re
11import time
12import urllib.request
13import urllib.parse
14
15from PIL import ImageFont, Image, ImageDraw
16
17URL_ROOT = "https://www.cardboardtranslation.com/feeds/posts/default/-"
18ENTRIES_PER_FETCH = 100
19TITLE_REGEX = re.compile(r"(?<=] ).*")
20VOLUME_STRING_REGEX = re.compile(r"\[Vol.\s+(?P<num>\d)\]")
21CHAPTER_REGEX = re.compile(r".*\] Chapter (?P<num>\d+(\.\d)?)(?::.*)?")
22IMAGE_TEXT_XY = (411, 27)
23VOL_FONT = ImageFont.truetype("OreichalkonVF.ttf", 120)
24VOL_FONT.set_variation_by_name("Medium")
25
26volumes: set[int] = set()
27
28
29@dataclass
30class Entry:
31 title: str
32 volume: int
33 chapter: int
34 content: str
35
36
37entries: list[Entry] = []
38
39parser = argparse.ArgumentParser()
40parser.add_argument("-n", "--name", help="The book to download")
41parser.add_argument(
42 "-a", "--author", help="The author name to add to the metadata", default=None
43)
44
45
46def process_orig_entry(entry: dict) -> Entry | None:
47 orig_title = entry["title"]["$t"]
48 try:
49 entry_title = TITLE_REGEX.search(orig_title).group(0)
50 except:
51 return None
52 entry_vol = int(VOLUME_STRING_REGEX.search(orig_title).group("num"))
53 entry_chap = CHAPTER_REGEX.search(orig_title).group("num")
54 volumes.add(entry_vol)
55 content = entry["content"]["$t"]
56
57 return Entry(entry_title, entry_vol, entry_chap, content)
58
59
60def root_fetch(url: str) -> int:
61 with urllib.request.urlopen(url) as r:
62 feed = json.loads(r.read())
63 entries_count = feed["feed"]["openSearch$totalResults"]["$t"]
64 feeds_to_fetch = math.ceil(int(entries_count) / ENTRIES_PER_FETCH)
65 print(f"Fetching {feeds_to_fetch} feeds")
66
67 return feeds_to_fetch
68
69
70def fetch_entries(root: str, count: int) -> list[Entry]:
71 entries: list[Entry] = []
72 for i in range(0, count):
73 start_index = (i * ENTRIES_PER_FETCH) + 1
74 print(f"Fetching Entries {start_index} to {start_index + 100}")
75 url = f"{root}&start-index={start_index}"
76 with urllib.request.urlopen(url) as r:
77 feed_content = json.loads(r.read())
78 feed_entries = feed_content["feed"]["entry"]
79 for entry in feed_entries:
80 ent = process_orig_entry(entry)
81 if type(ent) is Entry:
82 entries.append(ent)
83
84 return entries
85
86
87def normalize_content(orig_content: str, vol_num: int) -> str:
88 normalized = orig_content.replace("<p><br /></p>", "")
89 paras = normalized.split("</p>", maxsplit=2)
90 if paras[0].__contains__("Chapter 0"):
91 paras.pop(0)
92 first_para = paras[0].replace("<p>", "<h2>") + "</h2>"
93 elif paras[0].__contains__("Volume 3") and vol_num == 3:
94 first_para = "<h2>Chapter 1: A New Journey, Starting with New Clothes</h2>"
95 paras.pop(1)
96 else:
97 first_para = (
98 paras[0].replace("<p>", "<h2>").replace(f"[Vol. {vol_num}] ", "") + "</h2>"
99 )
100 paras[1] = paras[1] + "</p>"
101 paras[0] = first_para
102 normalized = "".join(paras)
103 return normalized
104
105
106def create_cover(vol_num: int):
107 with Image.open("cover.jpg") as im:
108 drw = ImageDraw.Draw(im)
109 drw.text(
110 IMAGE_TEXT_XY,
111 f"Volume {vol_num}",
112 font=VOL_FONT,
113 fill=(0, 0, 0, 255),
114 )
115 io_file = io.BytesIO()
116 im.save(io_file, format="PNG")
117 return io_file
118
119
120def create_chapter(entry: Entry) -> epub.EpubHtml:
121 is_prologue = False if not "Prologue" in entry.chapter else True
122 chap_name = (
123 f"Chapter {entry.chapter}" if not "Prologue" in entry.chapter else "Prologue"
124 )
125 ht = epub.EpubHtml(
126 uid=f"ch{entry.chapter.replace('.', '_')}",
127 title=chap_name,
128 file_name=f"ch{entry.chapter.replace('.', '_')}.xhtml",
129 lang="en",
130 )
131 ht.set_content(normalize_content(entry.content, entry.volume))
132
133 return ht
134
135
136def create_volumes(
137 vol_count: int, entries: list[Entry], book_name: str, author: str | None
138):
139 for v in range(1, vol_count + 1):
140 print(f"Creating Volume {v}")
141 vol_entries = [e for e in entries if e.volume == v]
142 vol_entries.reverse()
143 name_filesafe = book_name.replace(" ", "_")
144 book = epub.EpubBook()
145 book.set_identifier(f"{name_filesafe}_Vol_{v}")
146 book.set_title(f"{book_name}, Volume {v}")
147 book.set_language("English")
148 cover_io = create_cover(v)
149 book.set_cover("cover.png", cover_io.getvalue())
150 if author:
151 book.add_author(author)
152 book.add_metadata("DC", "creator", "Cardboard Translation", {"id": "cardboard"})
153 book.add_metadata("DC", "creator", "dish", {"id": "dish"})
154 book.add_metadata("DC", "publisher", "Cardboard Translation")
155 book.add_metadata(
156 None,
157 "meta",
158 "aut",
159 {"property": "role", "scheme": "marc:relators", "refines": "#creator"},
160 )
161 book.add_metadata(
162 None,
163 "meta",
164 "trl",
165 {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"},
166 )
167 book.add_metadata(
168 None,
169 "meta",
170 "aut",
171 {"property": "role", "scheme": "marc:relators", "refines": "#cardboard"},
172 )
173 book.add_metadata(
174 None,
175 "meta",
176 "com",
177 {"property": "role", "scheme": "marc:relators", "refines": "#dish"},
178 )
179 with open("description.txt", "r") as f:
180 book.add_metadata("DC", "description", f.read())
181 # Series Metadata
182 book.add_metadata(
183 None, "meta", book_name, {"property": "belongs-to-collection", "id": "c01"}
184 )
185 book.add_metadata(
186 None, "meta", "series", {"refines": "#c01", "property": "collection-type"}
187 )
188 book.add_metadata(
189 None, "meta", str(v), {"refines": "#c01", "property": "group-position"}
190 )
191 book.spine = ["nav"]
192
193 for ch in vol_entries:
194 chap: epub.EpubHtml = create_chapter(ch)
195 book.add_item(chap)
196 book.toc.append(chap)
197 book.spine.append(chap)
198
199 book.add_item(epub.EpubNcx())
200 book.add_item(epub.EpubNav())
201
202 epub.write_epub(f"{name_filesafe}_Vol_{v}.epub", book)
203
204
205def main():
206 args = parser.parse_args()
207 translation_name = urllib.parse.quote(args.name)
208 book_root = (
209 f"{URL_ROOT}/{translation_name}?alt=json&max-results={ENTRIES_PER_FETCH}"
210 )
211 count_to_fetch = root_fetch(book_root)
212 entries = fetch_entries(book_root, count_to_fetch)
213 vol_count = len(volumes)
214 create_volumes(vol_count, entries, args.name, args.author)
215
216
217if __name__ == "__main__":
218 main()