fix: increase loadable epub size (#1638)

## Summary

* **What is the goal of this PR?** Slightly increase the OOM limit when
loading epubs
* **What changes are included?** Switched vectors for parsing epubs to
deques, allowing use of more memory

## Additional Context

* Increases loadable epub size from 2000+ chapter/ToC entries loadable
to 5000+ chapter/ToC entries
* #1574, but without the complicated stuff

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? _**NO**_

### Testing
| Commit | Book | Time |
|----------|------|-------|
| 83cd96bc2fd90e5f5a8b0ceaed2022a16a017a1c | 1000.epub | ~30 sec |
| 83cd96bc2fd90e5f5a8b0ceaed2022a16a017a1c | 5000.epub | crash |
| PR | 1000.epub | ~29 sec |
| PR | 5000.epub | ~2 min 20 sec |

=> No actual loading time regressions

[tested_epubs.zip](https://github.com/user-attachments/files/26645243/tested_epubs.zip)

authored by

CSCMe and committed by

GitHub 2 weeks ago 9bc5111c 9c11f3e4

+15 -14

5 changed files

expand all

lib

Epub

BookMetadataCache.cpp

BookMetadataCache.h

parsers

ContentOpfParser.h

ZipFile

ZipFile.cpp

ZipFile.h

+8 -8

lib/Epub/Epub/BookMetadataCache.cpp

··· 4 4 #include <Serialization.h> 5 5 #include <ZipFile.h> 6 6 7 - #include <vector> 7 + #include <deque> 8 8 9 9 #include "FsHelpers.h" 10 10 ··· 50 50 51 51 if (spineCount >= LARGE_SPINE_THRESHOLD) { 52 52 spineHrefIndex.clear(); 53 - spineHrefIndex.reserve(spineCount); 53 + spineHrefIndex.resize(spineCount); 54 54 spineFile.seek(0); 55 55 for (int i = 0; i < spineCount; i++) { 56 56 auto entry = readSpineEntry(spineFile); ··· 58 58 idx.hrefHash = fnvHash64(entry.href); 59 59 idx.hrefLen = static_cast<uint16_t>(entry.href.size()); 60 60 idx.spineIndex = static_cast<int16_t>(i); 61 - spineHrefIndex.push_back(idx); 61 + spineHrefIndex[i] = idx; 62 62 } 63 63 std::sort(spineHrefIndex.begin(), spineHrefIndex.end(), 64 64 [](const SpineHrefIndexEntry& a, const SpineHrefIndexEntry& b) { ··· 153 153 // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin 154 154 155 155 // Build spineIndex->tocIndex mapping in one pass (O(n) instead of O(n*m)) 156 - std::vector<int16_t> spineToTocIndex(spineCount, -1); 156 + std::deque<int16_t> spineToTocIndex(spineCount, -1); 157 157 tocFile.seek(0); 158 158 for (int j = 0; j < tocCount; j++) { 159 159 auto tocEntry = readTocEntry(tocFile); ··· 181 181 // This is O(n*log(m)) instead of O(n*m) while avoiding memory exhaustion. 182 182 // See: https://github.com/crosspoint-reader/crosspoint-reader/issues/134 183 183 184 - std::vector<uint32_t> spineSizes; 184 + std::deque<uint32_t> spineSizes; 185 185 bool useBatchSizes = false; 186 186 187 187 if (spineCount >= LARGE_SPINE_THRESHOLD) { 188 188 LOG_DBG("BMC", "Using batch size lookup for %d spine items", spineCount); 189 189 190 - std::vector<ZipFile::SizeTarget> targets; 191 - targets.reserve(spineCount); 190 + std::deque<ZipFile::SizeTarget> targets; 191 + targets.resize(spineCount); 192 192 193 193 spineFile.seek(0); 194 194 for (int i = 0; i < spineCount; i++) { ··· 199 199 t.hash = ZipFile::fnvHash64(path.c_str(), path.size()); 200 200 t.len = static_cast<uint16_t>(path.size()); 201 201 t.index = static_cast<uint16_t>(i); 202 - targets.push_back(t); 202 + targets[i] = t; 203 203 } 204 204 205 205 std::sort(targets.begin(), targets.end(), [](const ZipFile::SizeTarget& a, const ZipFile::SizeTarget& b) {

+2 -2

lib/Epub/Epub/BookMetadataCache.h

··· 3 3 #include <HalStorage.h> 4 4 5 5 #include <algorithm> 6 + #include <deque> 6 7 #include <string> 7 - #include <vector> 8 8 9 9 class BookMetadataCache { 10 10 public: ··· 61 61 uint16_t hrefLen; // length for collision reduction 62 62 int16_t spineIndex; 63 63 }; 64 - std::vector<SpineHrefIndexEntry> spineHrefIndex; 64 + std::deque<SpineHrefIndexEntry> spineHrefIndex; 65 65 bool useSpineHrefIndex = false; 66 66 67 67 static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400;

+2 -1

lib/Epub/Epub/parsers/ContentOpfParser.h

··· 2 2 #include <Print.h> 3 3 4 4 #include <algorithm> 5 + #include <deque> 5 6 #include <vector> 6 7 7 8 #include "Epub.h" ··· 37 38 uint16_t idLen; // length for collision reduction 38 39 uint32_t fileOffset; // offset in .items.bin 39 40 }; 40 - std::vector<ItemIndexEntry> itemIndex; 41 + std::deque<ItemIndexEntry> itemIndex; 41 42 bool useItemIndex = false; 42 43 43 44 static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400;

+1 -1

lib/ZipFile/ZipFile.cpp

··· 295 295 return true; 296 296 } 297 297 298 - int ZipFile::fillUncompressedSizes(std::vector<SizeTarget>& targets, std::vector<uint32_t>& sizes) { 298 + int ZipFile::fillUncompressedSizes(std::deque<SizeTarget>& targets, std::deque<uint32_t>& sizes) { 299 299 if (targets.empty()) { 300 300 return 0; 301 301 }

+2 -2

lib/ZipFile/ZipFile.h

··· 1 1 #pragma once 2 2 #include <HalStorage.h> 3 3 4 + #include <deque> 4 5 #include <string> 5 6 #include <unordered_map> 6 - #include <vector> 7 7 8 8 class ZipFile { 9 9 public: ··· 64 64 // Batch lookup: scan ZIP central dir once and fill sizes for matching targets. 65 65 // targets must be sorted by (hash, len). sizes[target.index] receives uncompressedSize. 66 66 // Returns number of targets matched. 67 - int fillUncompressedSizes(std::vector<SizeTarget>& targets, std::vector<uint32_t>& sizes); 67 + int fillUncompressedSizes(std::deque<SizeTarget>& targets, std::deque<uint32_t>& sizes); 68 68 // Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple 69 69 // These functions will open and close the zip as needed 70 70 uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false);

Configure Feed

Configure Feed