A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: Switch to xpath map for paragraph level syncing in KOSync (#1686)

Switch KOReader sync progress mapping from chapter matching to
XPath-based mapping.

- resolves KOReader positions using real XHTML ancestry paths
- supports paragraph-based upload mapping with text offsets
where needed
- passes the current paragraph index into sync so uploads map
back to KOReader more accurately

No HTTP client changes are included. No reader-state or resume-flow
changes are included.

---------

Co-authored-by: jpirnay <jens@pirnay.com>

authored by

Justin Mitchell
jpirnay
and committed by
GitHub
302dea1e e8645ed9

+1050 -110
+99 -12
lib/Epub/Epub/Section.cpp
··· 10 10 #include "parsers/ChapterHtmlSlimParser.h" 11 11 12 12 namespace { 13 - constexpr uint8_t SECTION_FILE_VERSION = 19; 13 + constexpr uint8_t SECTION_FILE_VERSION = 20; 14 14 constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + 15 15 sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(bool) + 16 - sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t); 16 + sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint32_t); 17 + 18 + struct PageLutEntry { 19 + uint32_t fileOffset; 20 + uint16_t paragraphIndex; 21 + }; 17 22 } // namespace 18 23 19 24 uint32_t Section::onPageComplete(std::unique_ptr<Page> page) { ··· 44 49 static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) + 45 50 sizeof(extraParagraphSpacing) + sizeof(paragraphAlignment) + sizeof(viewportWidth) + 46 51 sizeof(viewportHeight) + sizeof(pageCount) + sizeof(hyphenationEnabled) + 47 - sizeof(embeddedStyle) + sizeof(imageRendering) + sizeof(uint32_t) + sizeof(uint32_t), 52 + sizeof(embeddedStyle) + sizeof(imageRendering) + sizeof(uint32_t) + 53 + sizeof(uint32_t) + sizeof(uint32_t), 48 54 "Header size mismatch"); 49 55 serialization::writePod(file, SECTION_FILE_VERSION); 50 56 serialization::writePod(file, fontId); ··· 59 65 serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0, patched later) 60 66 serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for LUT offset (patched later) 61 67 serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for anchor map offset (patched later) 68 + serialization::writePod(file, static_cast<uint32_t>(0)); // Placeholder for paragraph LUT offset (patched later) 62 69 } 63 70 64 71 bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing, ··· 190 197 } 191 198 writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, 192 199 viewportHeight, hyphenationEnabled, embeddedStyle, imageRendering); 193 - std::vector<uint32_t> lut = {}; 200 + std::vector<PageLutEntry> lut = {}; 194 201 195 202 // Derive the content base directory and image cache path prefix for the parser 196 203 size_t lastSlash = localPath.find_last_of('/'); ··· 210 217 ChapterHtmlSlimParser visitor( 211 218 epub, tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, 212 219 viewportHeight, hyphenationEnabled, 213 - [this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, 220 + [this, &lut](std::unique_ptr<Page> page, const uint16_t paragraphIndex) { 221 + lut.push_back({this->onPageComplete(std::move(page)), paragraphIndex}); 222 + }, 214 223 embeddedStyle, contentBase, imageBasePath, imageRendering, popupFn, cssParser); 215 224 Hyphenator::setPreferredLanguage(epub->getLanguage()); 216 225 success = visitor.parseAndBuildPages(); ··· 230 239 const uint32_t lutOffset = file.position(); 231 240 bool hasFailedLutRecords = false; 232 241 // Write LUT 233 - for (const uint32_t& pos : lut) { 234 - if (pos == 0) { 242 + for (const auto& entry : lut) { 243 + if (entry.fileOffset == 0) { 235 244 hasFailedLutRecords = true; 236 245 break; 237 246 } 238 - serialization::writePod(file, pos); 247 + serialization::writePod(file, entry.fileOffset); 239 248 } 240 249 241 250 if (hasFailedLutRecords) { ··· 255 264 serialization::writePod(file, page); 256 265 } 257 266 258 - // Patch header with final pageCount, lutOffset, and anchorMapOffset 259 - file.seek(HEADER_SIZE - sizeof(uint32_t) * 2 - sizeof(pageCount)); 267 + const uint32_t paragraphLutOffset = file.position(); 268 + serialization::writePod(file, static_cast<uint16_t>(lut.size())); 269 + for (const auto& entry : lut) { 270 + serialization::writePod(file, entry.paragraphIndex); 271 + } 272 + 273 + // Patch header with final pageCount, lutOffset, anchorMapOffset, and paragraphLutOffset 274 + file.seek(HEADER_SIZE - sizeof(uint32_t) * 3 - sizeof(pageCount)); 260 275 serialization::writePod(file, pageCount); 261 276 serialization::writePod(file, lutOffset); 262 277 serialization::writePod(file, anchorMapOffset); 278 + serialization::writePod(file, paragraphLutOffset); 263 279 // Explicit close() required: member variable persists beyond function scope 264 280 file.close(); 265 281 if (cssParser) { ··· 273 289 return nullptr; 274 290 } 275 291 276 - file.seek(HEADER_SIZE - sizeof(uint32_t) * 2); 292 + file.seek(HEADER_SIZE - sizeof(uint32_t) * 3); 277 293 uint32_t lutOffset; 278 294 serialization::readPod(file, lutOffset); 279 295 file.seek(lutOffset + sizeof(uint32_t) * currentPage); ··· 294 310 } 295 311 296 312 const uint32_t fileSize = f.size(); 297 - f.seek(HEADER_SIZE - sizeof(uint32_t)); 313 + f.seek(HEADER_SIZE - sizeof(uint32_t) * 2); 298 314 uint32_t anchorMapOffset; 299 315 serialization::readPod(f, anchorMapOffset); 300 316 if (anchorMapOffset == 0 || anchorMapOffset >= fileSize) { ··· 316 332 317 333 return std::nullopt; 318 334 } 335 + 336 + std::optional<uint16_t> Section::getPageForParagraphIndex(const uint16_t pIndex) const { 337 + FsFile f; 338 + if (!Storage.openFileForRead("SCT", filePath, f)) { 339 + return std::nullopt; 340 + } 341 + 342 + const uint32_t fileSize = f.size(); 343 + f.seek(HEADER_SIZE - sizeof(uint32_t)); 344 + uint32_t paragraphLutOffset; 345 + serialization::readPod(f, paragraphLutOffset); 346 + if (paragraphLutOffset == 0 || paragraphLutOffset >= fileSize) { 347 + return std::nullopt; 348 + } 349 + 350 + f.seek(paragraphLutOffset); 351 + uint16_t count; 352 + serialization::readPod(f, count); 353 + if (count == 0) { 354 + return std::nullopt; 355 + } 356 + 357 + const uint32_t lutEnd = paragraphLutOffset + sizeof(uint16_t) + count * sizeof(uint16_t); 358 + if (lutEnd > fileSize) { 359 + return std::nullopt; 360 + } 361 + 362 + uint16_t resultPage = count - 1; 363 + for (uint16_t i = 0; i < count; i++) { 364 + uint16_t pagePIdx; 365 + serialization::readPod(f, pagePIdx); 366 + if (pagePIdx >= pIndex) { 367 + resultPage = i; 368 + break; 369 + } 370 + } 371 + 372 + return resultPage; 373 + } 374 + 375 + std::optional<uint16_t> Section::getParagraphIndexForPage(const uint16_t page) const { 376 + FsFile f; 377 + if (!Storage.openFileForRead("SCT", filePath, f)) { 378 + return std::nullopt; 379 + } 380 + 381 + const uint32_t fileSize = f.size(); 382 + f.seek(HEADER_SIZE - sizeof(uint32_t)); 383 + uint32_t paragraphLutOffset; 384 + serialization::readPod(f, paragraphLutOffset); 385 + if (paragraphLutOffset == 0 || paragraphLutOffset >= fileSize) { 386 + return std::nullopt; 387 + } 388 + 389 + f.seek(paragraphLutOffset); 390 + uint16_t count; 391 + serialization::readPod(f, count); 392 + if (count == 0 || page >= count) { 393 + return std::nullopt; 394 + } 395 + 396 + const uint32_t entryEnd = paragraphLutOffset + sizeof(uint16_t) + (page + 1) * sizeof(uint16_t); 397 + if (entryEnd > fileSize) { 398 + return std::nullopt; 399 + } 400 + 401 + f.seek(paragraphLutOffset + sizeof(uint16_t) + page * sizeof(uint16_t)); 402 + uint16_t pIdx; 403 + serialization::readPod(f, pIdx); 404 + return pIdx; 405 + }
+6
lib/Epub/Epub/Section.h
··· 42 42 43 43 // Look up the page number for an anchor id from the section cache file. 44 44 std::optional<uint16_t> getPageForAnchor(const std::string& anchor) const; 45 + 46 + // Look up the page number for a synthetic paragraph index from XPath p[N]. 47 + std::optional<uint16_t> getPageForParagraphIndex(uint16_t pIndex) const; 48 + 49 + // Look up the synthetic paragraph index for the given rendered page. 50 + std::optional<uint16_t> getParagraphIndexForPage(uint16_t page) const; 45 51 };
+7 -3
lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
··· 163 163 return; 164 164 } 165 165 166 + if (strcmp(name, "p") == 0) { 167 + self->xpathParagraphIndex++; 168 + } 169 + 166 170 // Extract class, style, and id attributes 167 171 std::string classAttr; 168 172 std::string styleAttr; ··· 428 432 // Create page for image - only break if image won't fit remaining space 429 433 if (self->currentPage && !self->currentPage->elements.empty() && 430 434 (self->currentPageNextY + displayHeight > self->viewportHeight)) { 431 - self->completePageFn(std::move(self->currentPage)); 435 + self->completePageFn(std::move(self->currentPage), self->xpathParagraphIndex); 432 436 self->completedPageCount++; 433 437 self->currentPage.reset(new Page()); 434 438 if (!self->currentPage) { ··· 1066 1070 anchorData.push_back({std::move(pendingAnchorId), static_cast<uint16_t>(completedPageCount)}); 1067 1071 pendingAnchorId.clear(); 1068 1072 } 1069 - completePageFn(std::move(currentPage)); 1073 + completePageFn(std::move(currentPage), xpathParagraphIndex); 1070 1074 completedPageCount++; 1071 1075 currentPage.reset(); 1072 1076 currentTextBlock.reset(); ··· 1084 1088 } 1085 1089 1086 1090 if (currentPageNextY + lineHeight > viewportHeight) { 1087 - completePageFn(std::move(currentPage)); 1091 + completePageFn(std::move(currentPage), xpathParagraphIndex); 1088 1092 completedPageCount++; 1089 1093 currentPage.reset(new Page()); 1090 1094 currentPageNextY = 0;
+3 -2
lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
··· 25 25 std::shared_ptr<Epub> epub; 26 26 const std::string& filepath; 27 27 GfxRenderer& renderer; 28 - std::function<void(std::unique_ptr<Page>)> completePageFn; 28 + std::function<void(std::unique_ptr<Page>, uint16_t)> completePageFn; 29 29 std::function<void()> popupFn; // Popup callback 30 30 int depth = 0; 31 31 int skipUntilDepth = INT_MAX; ··· 74 74 int completedPageCount = 0; 75 75 std::vector<std::pair<std::string, uint16_t>> anchorData; 76 76 std::string pendingAnchorId; // deferred until after previous text block is flushed 77 + uint16_t xpathParagraphIndex = 0; 77 78 78 79 // Footnote link tracking 79 80 bool insideFootnoteLink = false; ··· 99 100 const int fontId, const float lineCompression, const bool extraParagraphSpacing, 100 101 const uint8_t paragraphAlignment, const uint16_t viewportWidth, 101 102 const uint16_t viewportHeight, const bool hyphenationEnabled, 102 - const std::function<void(std::unique_ptr<Page>)>& completePageFn, 103 + const std::function<void(std::unique_ptr<Page>, uint16_t)>& completePageFn, 103 104 const bool embeddedStyle, const std::string& contentBase, 104 105 const std::string& imageBasePath, const uint8_t imageRendering = 0, 105 106 const std::function<void()>& popupFn = nullptr, const CssParser* cssParser = nullptr)
+563
lib/KOReaderSync/ChapterXPathResolver.cpp
··· 1 + #include "ChapterXPathResolver.h" 2 + 3 + #include <Logging.h> 4 + #include <Print.h> 5 + #include <Utf8.h> 6 + #include <XmlParserUtils.h> 7 + #include <expat.h> 8 + 9 + #include <algorithm> 10 + #include <cmath> 11 + #include <cstring> 12 + #include <string> 13 + #include <utility> 14 + #include <vector> 15 + 16 + namespace { 17 + std::string stripPrefix(const XML_Char* name) { 18 + if (!name) { 19 + return ""; 20 + } 21 + 22 + const char* local = std::strrchr(name, ':'); 23 + return local ? std::string(local + 1) : std::string(name); 24 + } 25 + 26 + struct NameCounter { 27 + std::string name; 28 + int count; 29 + }; 30 + 31 + struct ParentState { 32 + std::vector<NameCounter> children; 33 + 34 + int nextIndex(const std::string& name) { 35 + for (auto& child : children) { 36 + if (child.name == name) { 37 + child.count++; 38 + return child.count; 39 + } 40 + } 41 + 42 + children.push_back({name, 1}); 43 + return 1; 44 + } 45 + }; 46 + 47 + struct PathSegment { 48 + std::string name; 49 + int index; 50 + }; 51 + 52 + std::string buildParagraphXPath(const int spineIndex, const std::vector<PathSegment>& path, const int charOffset) { 53 + std::string xpath = "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body"; 54 + for (const auto& segment : path) { 55 + xpath += "/" + segment.name + "[" + std::to_string(segment.index) + "]"; 56 + } 57 + if (charOffset > 0) { 58 + xpath += "/text()." + std::to_string(charOffset); 59 + } 60 + return xpath; 61 + } 62 + 63 + size_t countUtf8Codepoints(const XML_Char* data, const int len) { 64 + if (!data || len <= 0) { 65 + return 0; 66 + } 67 + 68 + size_t count = 0; 69 + const unsigned char* ptr = reinterpret_cast<const unsigned char*>(data); 70 + const unsigned char* end = ptr + len; 71 + while (ptr < end) { 72 + utf8NextCodepoint(&ptr); 73 + count++; 74 + } 75 + 76 + return count; 77 + } 78 + 79 + class ParagraphTextCounter final : public Print { 80 + public: 81 + ParagraphTextCounter() { 82 + parser = XML_ParserCreate(nullptr); 83 + if (!parser) { 84 + LOG_ERR("KOX", "Failed to create XML parser"); 85 + return; 86 + } 87 + 88 + XML_SetUserData(parser, this); 89 + XML_SetElementHandler(parser, &ParagraphTextCounter::startElement, &ParagraphTextCounter::endElement); 90 + XML_SetCharacterDataHandler(parser, &ParagraphTextCounter::characterData); 91 + } 92 + 93 + ~ParagraphTextCounter() override { destroyXmlParser(parser); } 94 + 95 + bool ok() const { return parser != nullptr && parseOk; } 96 + 97 + bool finish() { 98 + if (!parser || !parseOk || stopped) { 99 + return parseOk; 100 + } 101 + 102 + if (XML_Parse(parser, "", 0, XML_TRUE) == XML_STATUS_ERROR) { 103 + LOG_ERR("KOX", "Final XML parse error: %s", XML_ErrorString(XML_GetErrorCode(parser))); 104 + parseOk = false; 105 + } 106 + return parseOk; 107 + } 108 + 109 + size_t write(uint8_t c) override { return write(&c, 1); } 110 + 111 + size_t write(const uint8_t* buffer, size_t size) override { 112 + if (!parser || !parseOk || stopped) { 113 + return size; 114 + } 115 + 116 + if (XML_Parse(parser, reinterpret_cast<const char*>(buffer), static_cast<int>(size), XML_FALSE) != XML_STATUS_OK) { 117 + const enum XML_Error error = XML_GetErrorCode(parser); 118 + if (error != XML_ERROR_ABORTED) { 119 + LOG_ERR("KOX", "XML parse error: %s", XML_ErrorString(error)); 120 + parseOk = false; 121 + } 122 + } 123 + 124 + return size; 125 + } 126 + 127 + size_t totalVisibleChars() const { return visibleChars; } 128 + 129 + private: 130 + static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char**) { 131 + auto* self = static_cast<ParagraphTextCounter*>(userData); 132 + self->onStartElement(name); 133 + } 134 + 135 + static void XMLCALL endElement(void* userData, const XML_Char* name) { 136 + auto* self = static_cast<ParagraphTextCounter*>(userData); 137 + self->onEndElement(name); 138 + } 139 + 140 + static void XMLCALL characterData(void* userData, const XML_Char* data, const int len) { 141 + auto* self = static_cast<ParagraphTextCounter*>(userData); 142 + self->onCharacterData(data, len); 143 + } 144 + 145 + void onStartElement(const XML_Char* rawName) { 146 + const std::string name = stripPrefix(rawName); 147 + 148 + if (!insideBody) { 149 + if (name == "body") { 150 + insideBody = true; 151 + bodyDepth = depth; 152 + } 153 + depth++; 154 + return; 155 + } 156 + 157 + if (name == "p") { 158 + paragraphDepth++; 159 + } 160 + depth++; 161 + } 162 + 163 + void onEndElement(const XML_Char* rawName) { 164 + const std::string name = stripPrefix(rawName); 165 + 166 + depth--; 167 + if (!insideBody) { 168 + return; 169 + } 170 + 171 + if (depth == bodyDepth && name == "body") { 172 + insideBody = false; 173 + return; 174 + } 175 + 176 + if (name == "p" && paragraphDepth > 0) { 177 + paragraphDepth--; 178 + } 179 + } 180 + 181 + void onCharacterData(const XML_Char* data, const int len) { 182 + if (!insideBody || paragraphDepth <= 0 || len <= 0) { 183 + return; 184 + } 185 + 186 + visibleChars += countUtf8Codepoints(data, len); 187 + } 188 + 189 + private: 190 + XML_Parser parser = nullptr; 191 + bool parseOk = true; 192 + bool insideBody = false; 193 + bool stopped = false; 194 + int depth = 0; 195 + int bodyDepth = -1; 196 + int paragraphDepth = 0; 197 + size_t visibleChars = 0; 198 + }; 199 + 200 + class XPathParagraphResolver final : public Print { 201 + public: 202 + explicit XPathParagraphResolver(const int targetParagraph) : targetParagraph(targetParagraph) { 203 + parser = XML_ParserCreate(nullptr); 204 + if (!parser) { 205 + LOG_ERR("KOX", "Failed to create XML parser"); 206 + return; 207 + } 208 + 209 + XML_SetUserData(parser, this); 210 + XML_SetElementHandler(parser, &XPathParagraphResolver::startElement, &XPathParagraphResolver::endElement); 211 + } 212 + 213 + ~XPathParagraphResolver() override { destroyXmlParser(parser); } 214 + 215 + bool ok() const { return parser != nullptr && parseOk; } 216 + 217 + bool finish() { 218 + if (!parser || !parseOk || stopped) { 219 + return parseOk; 220 + } 221 + 222 + if (XML_Parse(parser, "", 0, XML_TRUE) == XML_STATUS_ERROR) { 223 + LOG_ERR("KOX", "Final XML parse error: %s", XML_ErrorString(XML_GetErrorCode(parser))); 224 + parseOk = false; 225 + } 226 + return parseOk; 227 + } 228 + 229 + bool hasMatch() const { return !xpath.empty(); } 230 + const std::string& getXPath() const { return xpath; } 231 + 232 + size_t write(uint8_t c) override { return write(&c, 1); } 233 + 234 + size_t write(const uint8_t* buffer, size_t size) override { 235 + if (!parser || !parseOk || stopped) { 236 + return size; 237 + } 238 + 239 + if (XML_Parse(parser, reinterpret_cast<const char*>(buffer), static_cast<int>(size), XML_FALSE) != XML_STATUS_OK) { 240 + const enum XML_Error error = XML_GetErrorCode(parser); 241 + if (error != XML_ERROR_ABORTED) { 242 + LOG_ERR("KOX", "XML parse error: %s", XML_ErrorString(error)); 243 + parseOk = false; 244 + } 245 + } 246 + 247 + return size; 248 + } 249 + 250 + int spineIndex = 0; 251 + 252 + private: 253 + static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char**) { 254 + auto* self = static_cast<XPathParagraphResolver*>(userData); 255 + self->onStartElement(name); 256 + } 257 + 258 + static void XMLCALL endElement(void* userData, const XML_Char* name) { 259 + auto* self = static_cast<XPathParagraphResolver*>(userData); 260 + self->onEndElement(name); 261 + } 262 + 263 + void onStartElement(const XML_Char* rawName) { 264 + const std::string name = stripPrefix(rawName); 265 + 266 + if (!insideBody) { 267 + if (name == "body") { 268 + insideBody = true; 269 + bodyDepth = depth; 270 + parentStates.emplace_back(); 271 + } 272 + depth++; 273 + return; 274 + } 275 + 276 + const int siblingIndex = parentStates.back().nextIndex(name); 277 + path.push_back({name, siblingIndex}); 278 + parentStates.emplace_back(); 279 + 280 + if (name == "p") { 281 + paragraphCount++; 282 + if (paragraphCount == targetParagraph) { 283 + xpath = buildParagraphXPath(spineIndex, path, 0); 284 + stopped = true; 285 + XML_StopParser(parser, XML_FALSE); 286 + } 287 + } 288 + 289 + depth++; 290 + } 291 + 292 + void onEndElement(const XML_Char* rawName) { 293 + const std::string name = stripPrefix(rawName); 294 + 295 + depth--; 296 + if (!insideBody) { 297 + return; 298 + } 299 + 300 + if (depth == bodyDepth && name == "body") { 301 + insideBody = false; 302 + parentStates.clear(); 303 + path.clear(); 304 + return; 305 + } 306 + 307 + if (!path.empty()) { 308 + path.pop_back(); 309 + } 310 + if (!parentStates.empty()) { 311 + parentStates.pop_back(); 312 + } 313 + } 314 + 315 + XML_Parser parser = nullptr; 316 + const int targetParagraph; 317 + bool parseOk = true; 318 + bool insideBody = false; 319 + bool stopped = false; 320 + int depth = 0; 321 + int bodyDepth = -1; 322 + int paragraphCount = 0; 323 + std::vector<ParentState> parentStates; 324 + std::vector<PathSegment> path; 325 + std::string xpath; 326 + }; 327 + 328 + class XPathProgressResolver final : public Print { 329 + public: 330 + explicit XPathProgressResolver(const size_t targetVisibleChar) : targetVisibleChar(targetVisibleChar) { 331 + parser = XML_ParserCreate(nullptr); 332 + if (!parser) { 333 + LOG_ERR("KOX", "Failed to create XML parser"); 334 + return; 335 + } 336 + 337 + XML_SetUserData(parser, this); 338 + XML_SetElementHandler(parser, &XPathProgressResolver::startElement, &XPathProgressResolver::endElement); 339 + XML_SetCharacterDataHandler(parser, &XPathProgressResolver::characterData); 340 + } 341 + 342 + ~XPathProgressResolver() override { destroyXmlParser(parser); } 343 + 344 + bool ok() const { return parser != nullptr && parseOk; } 345 + 346 + bool finish() { 347 + if (!parser || !parseOk || stopped) { 348 + return parseOk; 349 + } 350 + 351 + if (XML_Parse(parser, "", 0, XML_TRUE) == XML_STATUS_ERROR) { 352 + LOG_ERR("KOX", "Final XML parse error: %s", XML_ErrorString(XML_GetErrorCode(parser))); 353 + parseOk = false; 354 + } 355 + return parseOk; 356 + } 357 + 358 + bool hasMatch() const { return !xpath.empty(); } 359 + const std::string& getXPath() const { return xpath; } 360 + 361 + size_t write(uint8_t c) override { return write(&c, 1); } 362 + 363 + size_t write(const uint8_t* buffer, size_t size) override { 364 + if (!parser || !parseOk || stopped) { 365 + return size; 366 + } 367 + 368 + if (XML_Parse(parser, reinterpret_cast<const char*>(buffer), static_cast<int>(size), XML_FALSE) != XML_STATUS_OK) { 369 + const enum XML_Error error = XML_GetErrorCode(parser); 370 + if (error != XML_ERROR_ABORTED) { 371 + LOG_ERR("KOX", "XML parse error: %s", XML_ErrorString(error)); 372 + parseOk = false; 373 + } 374 + } 375 + 376 + return size; 377 + } 378 + 379 + int spineIndex = 0; 380 + 381 + private: 382 + static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char**) { 383 + auto* self = static_cast<XPathProgressResolver*>(userData); 384 + self->onStartElement(name); 385 + } 386 + 387 + static void XMLCALL endElement(void* userData, const XML_Char* name) { 388 + auto* self = static_cast<XPathProgressResolver*>(userData); 389 + self->onEndElement(name); 390 + } 391 + 392 + static void XMLCALL characterData(void* userData, const XML_Char* data, const int len) { 393 + auto* self = static_cast<XPathProgressResolver*>(userData); 394 + self->onCharacterData(data, len); 395 + } 396 + 397 + void onStartElement(const XML_Char* rawName) { 398 + const std::string name = stripPrefix(rawName); 399 + 400 + if (!insideBody) { 401 + if (name == "body") { 402 + insideBody = true; 403 + bodyDepth = depth; 404 + parentStates.emplace_back(); 405 + } 406 + depth++; 407 + return; 408 + } 409 + 410 + const int siblingIndex = parentStates.back().nextIndex(name); 411 + path.push_back({name, siblingIndex}); 412 + parentStates.emplace_back(); 413 + 414 + if (name == "p") { 415 + paragraphDepth++; 416 + paragraphVisibleChars = 0; 417 + } 418 + 419 + depth++; 420 + } 421 + 422 + void onEndElement(const XML_Char* rawName) { 423 + const std::string name = stripPrefix(rawName); 424 + 425 + depth--; 426 + if (!insideBody) { 427 + return; 428 + } 429 + 430 + if (depth == bodyDepth && name == "body") { 431 + insideBody = false; 432 + parentStates.clear(); 433 + path.clear(); 434 + return; 435 + } 436 + 437 + if (name == "p" && paragraphDepth > 0) { 438 + paragraphDepth--; 439 + paragraphVisibleChars = 0; 440 + } 441 + 442 + if (!path.empty()) { 443 + path.pop_back(); 444 + } 445 + if (!parentStates.empty()) { 446 + parentStates.pop_back(); 447 + } 448 + } 449 + 450 + void onCharacterData(const XML_Char* data, const int len) { 451 + if (!insideBody || paragraphDepth <= 0 || len <= 0 || stopped) { 452 + return; 453 + } 454 + 455 + const size_t codepointCount = countUtf8Codepoints(data, len); 456 + const size_t nextVisibleChars = visibleChars + codepointCount; 457 + if (targetVisibleChar <= nextVisibleChars) { 458 + const size_t delta = targetVisibleChar - visibleChars; 459 + const int charOffset = static_cast<int>(paragraphVisibleChars + delta); 460 + xpath = buildParagraphXPath(spineIndex, path, std::max(1, charOffset)); 461 + stopped = true; 462 + XML_StopParser(parser, XML_FALSE); 463 + return; 464 + } 465 + 466 + visibleChars = nextVisibleChars; 467 + paragraphVisibleChars += codepointCount; 468 + } 469 + 470 + XML_Parser parser = nullptr; 471 + const size_t targetVisibleChar; 472 + bool parseOk = true; 473 + bool insideBody = false; 474 + bool stopped = false; 475 + int depth = 0; 476 + int bodyDepth = -1; 477 + int paragraphDepth = 0; 478 + size_t visibleChars = 0; 479 + size_t paragraphVisibleChars = 0; 480 + std::vector<ParentState> parentStates; 481 + std::vector<PathSegment> path; 482 + std::string xpath; 483 + }; 484 + } // namespace 485 + 486 + std::string ChapterXPathResolver::findXPathForParagraph(const std::shared_ptr<Epub>& epub, const int spineIndex, 487 + const uint16_t paragraphIndex) { 488 + if (!epub || paragraphIndex == 0 || spineIndex < 0 || spineIndex >= epub->getSpineItemsCount()) { 489 + return ""; 490 + } 491 + 492 + const auto href = epub->getSpineItem(spineIndex).href; 493 + if (href.empty()) { 494 + return ""; 495 + } 496 + 497 + XPathParagraphResolver resolver(paragraphIndex); 498 + if (!resolver.ok()) { 499 + return ""; 500 + } 501 + 502 + resolver.spineIndex = spineIndex; 503 + if (!epub->readItemContentsToStream(href, resolver, 1024) || !resolver.finish()) { 504 + return ""; 505 + } 506 + 507 + if (resolver.hasMatch()) { 508 + LOG_DBG("KOX", "Resolved paragraph %u in spine %d -> %s", paragraphIndex, spineIndex, resolver.getXPath().c_str()); 509 + return resolver.getXPath(); 510 + } 511 + 512 + LOG_DBG("KOX", "Paragraph %u not found in spine %d", paragraphIndex, spineIndex); 513 + return ""; 514 + } 515 + 516 + std::string ChapterXPathResolver::findXPathForProgress(const std::shared_ptr<Epub>& epub, const int spineIndex, 517 + const float intraSpineProgress) { 518 + if (!epub || spineIndex < 0 || spineIndex >= epub->getSpineItemsCount()) { 519 + return ""; 520 + } 521 + 522 + const auto href = epub->getSpineItem(spineIndex).href; 523 + if (href.empty()) { 524 + return ""; 525 + } 526 + 527 + if (!(intraSpineProgress > 0.0f)) { 528 + return "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body"; 529 + } 530 + 531 + ParagraphTextCounter counter; 532 + if (!counter.ok() || !epub->readItemContentsToStream(href, counter, 1024) || !counter.finish()) { 533 + return ""; 534 + } 535 + 536 + const size_t totalVisibleChars = counter.totalVisibleChars(); 537 + if (totalVisibleChars == 0) { 538 + return ""; 539 + } 540 + 541 + const float clamped = std::max(0.0f, std::min(1.0f, intraSpineProgress)); 542 + const size_t targetVisibleChar = 543 + std::max<size_t>(1, std::min(totalVisibleChars, static_cast<size_t>(std::ceil(clamped * totalVisibleChars)))); 544 + 545 + XPathProgressResolver resolver(targetVisibleChar); 546 + if (!resolver.ok()) { 547 + return ""; 548 + } 549 + 550 + resolver.spineIndex = spineIndex; 551 + if (!epub->readItemContentsToStream(href, resolver, 1024) || !resolver.finish()) { 552 + return ""; 553 + } 554 + 555 + if (resolver.hasMatch()) { 556 + LOG_DBG("KOX", "Resolved progress %.3f in spine %d -> %s", intraSpineProgress, spineIndex, 557 + resolver.getXPath().c_str()); 558 + return resolver.getXPath(); 559 + } 560 + 561 + LOG_DBG("KOX", "Could not resolve progress %.3f in spine %d", intraSpineProgress, spineIndex); 562 + return ""; 563 + }
+30
lib/KOReaderSync/ChapterXPathResolver.h
··· 1 + #pragma once 2 + 3 + #include <Epub.h> 4 + 5 + #include <cstdint> 6 + #include <memory> 7 + #include <string> 8 + 9 + class ChapterXPathResolver { 10 + public: 11 + /** 12 + * Resolve the Nth paragraph in a spine item to its real XHTML ancestry path. 13 + * 14 + * Returns a KOReader-compatible path like: 15 + * /body/DocFragment[8]/body/div[2]/section[1]/p[4] 16 + * 17 + * An empty string means parsing failed or the paragraph index was not found. 18 + */ 19 + static std::string findXPathForParagraph(const std::shared_ptr<Epub>& epub, int spineIndex, uint16_t paragraphIndex); 20 + 21 + /** 22 + * Resolve intra-spine progress to a real XHTML ancestry path plus text offset. 23 + * 24 + * Returns a KOReader-compatible path like: 25 + * /body/DocFragment[8]/body/div[2]/section[1]/p[4]/text().96 26 + * 27 + * An empty string means parsing failed or the location could not be resolved. 28 + */ 29 + static std::string findXPathForProgress(const std::shared_ptr<Epub>& epub, int spineIndex, float intraSpineProgress); 30 + };
+257 -76
lib/KOReaderSync/ProgressMapper.cpp
··· 2 2 3 3 #include <Logging.h> 4 4 5 + #include <algorithm> 5 6 #include <cmath> 7 + #include <cstring> 6 8 7 - KOReaderPosition ProgressMapper::toKOReader(const std::shared_ptr<Epub>& epub, const CrossPointPosition& pos) { 8 - KOReaderPosition result; 9 + #include "ChapterXPathResolver.h" 10 + #include "Epub/htmlEntities.h" 11 + #include "Utf8.h" 9 12 10 - // Calculate page progress within current spine item 11 - float intraSpineProgress = 0.0f; 12 - if (pos.totalPages > 0) { 13 - intraSpineProgress = static_cast<float>(pos.pageNumber) / static_cast<float>(pos.totalPages); 13 + namespace { 14 + int parseIndex(const std::string& xpath, const char* prefix, bool last = false) { 15 + const size_t prefixLen = strlen(prefix); 16 + const size_t pos = last ? xpath.rfind(prefix) : xpath.find(prefix); 17 + if (pos == std::string::npos) return -1; 18 + const size_t numStart = pos + prefixLen; 19 + const size_t numEnd = xpath.find(']', numStart); 20 + if (numEnd == std::string::npos || numEnd == numStart) return -1; 21 + int val = 0; 22 + for (size_t i = numStart; i < numEnd; i++) { 23 + if (xpath[i] < '0' || xpath[i] > '9') return -1; 24 + val = val * 10 + (xpath[i] - '0'); 14 25 } 26 + return val; 27 + } 15 28 16 - // Calculate overall book progress (0.0-1.0) 17 - result.percentage = epub->calculateProgress(pos.spineIndex, intraSpineProgress); 29 + int parseCharOffset(const std::string& xpath) { 30 + const size_t textPos = xpath.rfind("text()"); 31 + if (textPos == std::string::npos) return 0; 32 + const size_t dotPos = xpath.find('.', textPos); 33 + if (dotPos == std::string::npos || dotPos + 1 >= xpath.size()) return 0; 34 + int val = 0; 35 + for (size_t i = dotPos + 1; i < xpath.size(); i++) { 36 + if (xpath[i] < '0' || xpath[i] > '9') return 0; 37 + val = val * 10 + (xpath[i] - '0'); 38 + } 39 + return val; 40 + } 18 41 19 - // Generate XPath with estimated paragraph position based on page 20 - result.xpath = generateXPath(pos.spineIndex, pos.pageNumber, pos.totalPages); 42 + class ParagraphStreamer final : public Print { 43 + size_t bytesWritten = 0; 44 + bool globalInTag = false; 45 + bool globalInEntity = false; 46 + enum { IDLE, SAW_LT, SAW_LT_P } pState = IDLE; 47 + static constexpr size_t MAX_ENTITY_SIZE = 16; 48 + char entityBuffer[MAX_ENTITY_SIZE] = {}; 49 + size_t entityLen = 0; 21 50 22 - // Get chapter info for logging 23 - const int tocIndex = epub->getTocIndexForSpineIndex(pos.spineIndex); 24 - const std::string chapterName = (tocIndex >= 0) ? epub->getTocItem(tocIndex).title : "unknown"; 51 + // Forward mode: count paragraphs at a byte offset 52 + size_t fwdTarget; 53 + int fwdResult = 0; 54 + bool fwdCaptured = false; 25 55 26 - LOG_DBG("ProgressMapper", "CrossPoint -> KOReader: chapter='%s', page=%d/%d -> %.2f%% at %s", chapterName.c_str(), 27 - pos.pageNumber, pos.totalPages, result.percentage * 100, result.xpath.c_str()); 56 + // Reverse mode: find position of Nth paragraph + char offset 57 + int revParagraph; 58 + int revChar; 59 + int pCount = 0; 60 + bool revPFound = false; 61 + bool revDone = false; 62 + int revVisChars = 0; // Visible chars counted WITHIN target paragraph 63 + size_t totalVisChars = 0; // Total visible chars in entire file 64 + size_t targetVisChars = 0; // Visible chars from start of file to target position 28 65 29 - return result; 30 - } 66 + void onP() { 67 + pCount++; 68 + if (!revPFound && revParagraph > 0 && pCount >= revParagraph) { 69 + revPFound = true; 70 + revVisChars = 0; 71 + if (revChar <= 0) { 72 + targetVisChars = totalVisChars; 73 + revDone = true; 74 + } 75 + } 76 + } 31 77 32 - CrossPointPosition ProgressMapper::toCrossPoint(const std::shared_ptr<Epub>& epub, const KOReaderPosition& koPos, 33 - int currentSpineIndex, int totalPagesInCurrentSpine) { 34 - CrossPointPosition result; 35 - result.spineIndex = 0; 36 - result.pageNumber = 0; 37 - result.totalPages = 0; 38 - 39 - const size_t bookSize = epub->getBookSize(); 40 - if (bookSize == 0) { 41 - return result; 78 + void onVisibleCodepoint() { 79 + totalVisChars++; 80 + if (revPFound && !revDone) { 81 + revVisChars++; 82 + if (revVisChars >= revChar) { 83 + targetVisChars = totalVisChars; 84 + revDone = true; 85 + } 86 + } 42 87 } 43 88 44 - // Use percentage-based lookup for both spine and page positioning 45 - // XPath parsing is unreliable since CrossPoint doesn't preserve detailed HTML structure 46 - const size_t targetBytes = static_cast<size_t>(bookSize * koPos.percentage); 89 + void onVisibleText(const char* text) { 90 + if (!text) { 91 + return; 92 + } 47 93 48 - // Find the spine item that contains this byte position 49 - const int spineCount = epub->getSpineItemsCount(); 50 - bool spineFound = false; 51 - for (int i = 0; i < spineCount; i++) { 52 - const size_t cumulativeSize = epub->getCumulativeSpineItemSize(i); 53 - if (cumulativeSize >= targetBytes) { 54 - result.spineIndex = i; 55 - spineFound = true; 56 - break; 94 + const unsigned char* ptr = reinterpret_cast<const unsigned char*>(text); 95 + while (*ptr != 0) { 96 + utf8NextCodepoint(&ptr); 97 + onVisibleCodepoint(); 57 98 } 58 99 } 59 100 60 - // If no spine item was found (e.g., targetBytes beyond last cumulative size), 61 - // default to the last spine item so we map to the end of the book instead of the beginning. 62 - if (!spineFound && spineCount > 0) { 63 - result.spineIndex = spineCount - 1; 101 + void flushEntityAsLiteral() { 102 + for (size_t i = 0; i < entityLen; i++) { 103 + onVisibleCodepoint(); 104 + } 64 105 } 65 106 66 - // Estimate page number within the spine item using percentage 67 - if (result.spineIndex < epub->getSpineItemsCount()) { 68 - const size_t prevCumSize = (result.spineIndex > 0) ? epub->getCumulativeSpineItemSize(result.spineIndex - 1) : 0; 69 - const size_t currentCumSize = epub->getCumulativeSpineItemSize(result.spineIndex); 70 - const size_t spineSize = currentCumSize - prevCumSize; 107 + void finishEntity() { 108 + entityBuffer[entityLen] = '\0'; 109 + const char* resolved = lookupHtmlEntity(entityBuffer, entityLen); 110 + if (resolved) { 111 + onVisibleText(resolved); 112 + } else { 113 + flushEntityAsLiteral(); 114 + } 115 + globalInEntity = false; 116 + entityLen = 0; 117 + } 71 118 72 - int estimatedTotalPages = 0; 119 + public: 120 + explicit ParagraphStreamer(size_t targetByte) : fwdTarget(targetByte), revParagraph(0), revChar(0) {} 121 + ParagraphStreamer(int paragraph, int charOff) : fwdTarget(SIZE_MAX), revParagraph(paragraph), revChar(charOff) {} 73 122 74 - // If we are in the same spine, use the known total pages 75 - if (result.spineIndex == currentSpineIndex && totalPagesInCurrentSpine > 0) { 76 - estimatedTotalPages = totalPagesInCurrentSpine; 123 + size_t write(uint8_t c) override { 124 + if (!fwdCaptured && bytesWritten >= fwdTarget) { 125 + fwdResult = pCount; 126 + fwdCaptured = true; 77 127 } 78 - // Otherwise try to estimate based on density from current spine 79 - else if (currentSpineIndex >= 0 && currentSpineIndex < epub->getSpineItemsCount() && totalPagesInCurrentSpine > 0) { 80 - const size_t prevCurrCumSize = 81 - (currentSpineIndex > 0) ? epub->getCumulativeSpineItemSize(currentSpineIndex - 1) : 0; 82 - const size_t currCumSize = epub->getCumulativeSpineItemSize(currentSpineIndex); 83 - const size_t currSpineSize = currCumSize - prevCurrCumSize; 128 + bytesWritten++; 129 + 130 + if (globalInEntity) { 131 + if (entityLen + 1 < MAX_ENTITY_SIZE) { 132 + entityBuffer[entityLen++] = static_cast<char>(c); 133 + } else { 134 + flushEntityAsLiteral(); 135 + globalInEntity = false; 136 + entityLen = 0; 137 + } 84 138 85 - if (currSpineSize > 0) { 86 - float ratio = static_cast<float>(spineSize) / static_cast<float>(currSpineSize); 87 - estimatedTotalPages = static_cast<int>(totalPagesInCurrentSpine * ratio); 88 - if (estimatedTotalPages < 1) estimatedTotalPages = 1; 139 + if (globalInEntity) { 140 + if (c == ';') { 141 + finishEntity(); 142 + } else if (c == '<' || c == ' ' || c == '\t' || c == '\n' || c == '\r') { 143 + flushEntityAsLiteral(); 144 + globalInEntity = false; 145 + entityLen = 0; 146 + } 147 + } 148 + } else if (c == '<') { 149 + globalInTag = true; 150 + } else if (c == '>') { 151 + globalInTag = false; 152 + } else if (!globalInTag) { 153 + if (c == '&') { 154 + globalInEntity = true; 155 + entityBuffer[0] = '&'; 156 + entityLen = 1; 157 + } else { 158 + const bool startsCodepoint = (c & 0xC0) != 0x80; 159 + if (startsCodepoint) { 160 + onVisibleCodepoint(); 161 + } 89 162 } 90 163 } 91 164 92 - result.totalPages = estimatedTotalPages; 165 + // Paragraph detection 166 + switch (pState) { 167 + case IDLE: 168 + if (c == '<') pState = SAW_LT; 169 + break; 170 + case SAW_LT: 171 + pState = (c == 'p' || c == 'P') ? SAW_LT_P : ((c == '<') ? SAW_LT : IDLE); 172 + break; 173 + case SAW_LT_P: 174 + if (c == '>' || c == '/' || c == ' ' || c == '\t' || c == '\n' || c == '\r') onP(); 175 + pState = (c == '<') ? SAW_LT : IDLE; 176 + break; 177 + } 178 + return 1; 179 + } 180 + 181 + size_t write(const uint8_t* buffer, size_t size) override { 182 + for (size_t i = 0; i < size; i++) write(buffer[i]); 183 + return size; 184 + } 185 + 186 + public: 187 + int paragraphCount() const { return fwdCaptured ? fwdResult : pCount; } 188 + size_t totalBytes() const { return bytesWritten; } 189 + bool found() const { return revDone || revPFound; } 190 + float progress() const { 191 + return totalVisChars > 0 ? static_cast<float>(targetVisChars) / static_cast<float>(totalVisChars) : 0.0f; 192 + } 193 + }; 194 + 195 + bool streamSpine(const std::shared_ptr<Epub>& epub, int spineIndex, ParagraphStreamer& s) { 196 + const auto href = epub->getSpineItem(spineIndex).href; 197 + return !href.empty() && epub->readItemContentsToStream(href, s, 1024); 198 + } 199 + } // namespace 93 200 94 - if (spineSize > 0 && estimatedTotalPages > 0) { 95 - const size_t bytesIntoSpine = (targetBytes > prevCumSize) ? (targetBytes - prevCumSize) : 0; 96 - const float intraSpineProgress = static_cast<float>(bytesIntoSpine) / static_cast<float>(spineSize); 97 - const float clampedProgress = std::max(0.0f, std::min(1.0f, intraSpineProgress)); 98 - result.pageNumber = static_cast<int>(clampedProgress * estimatedTotalPages); 99 - result.pageNumber = std::max(0, std::min(result.pageNumber, estimatedTotalPages - 1)); 201 + KOReaderPosition ProgressMapper::toKOReader(const std::shared_ptr<Epub>& epub, const CrossPointPosition& pos) { 202 + KOReaderPosition result; 203 + float intra = (pos.totalPages > 0) ? static_cast<float>(pos.pageNumber) / static_cast<float>(pos.totalPages) : 0.0f; 204 + result.percentage = epub->calculateProgress(pos.spineIndex, intra); 205 + if (pos.hasParagraphIndex && pos.paragraphIndex > 0) { 206 + result.xpath = ChapterXPathResolver::findXPathForParagraph(epub, pos.spineIndex, pos.paragraphIndex); 207 + } else { 208 + result.xpath = ChapterXPathResolver::findXPathForProgress(epub, pos.spineIndex, intra); 209 + } 210 + if (result.xpath.empty()) { 211 + result.xpath = generateXPath(epub, pos.spineIndex, intra); 212 + } 213 + LOG_DBG("PM", "-> KO: spine=%d page=%d/%d %.2f%% %s", pos.spineIndex, pos.pageNumber, pos.totalPages, 214 + result.percentage * 100, result.xpath.c_str()); 215 + return result; 216 + } 217 + 218 + CrossPointPosition ProgressMapper::toCrossPoint(const std::shared_ptr<Epub>& epub, const KOReaderPosition& koPos, 219 + int currentSpineIndex, int totalPagesInCurrentSpine) { 220 + CrossPointPosition result{}; 221 + const size_t bookSize = epub->getBookSize(); 222 + if (bookSize == 0) return result; 223 + 224 + const int spineCount = epub->getSpineItemsCount(); 225 + const float clampedPercentage = std::max(0.0f, std::min(1.0f, koPos.percentage)); 226 + const size_t targetBytes = static_cast<size_t>(static_cast<float>(bookSize) * clampedPercentage); 227 + 228 + const int docFrag = parseIndex(koPos.xpath, "/body/DocFragment["); 229 + const int xpathP = parseIndex(koPos.xpath, "/p[", true); 230 + const int xpathChar = parseCharOffset(koPos.xpath); 231 + const int xpathSpine = (docFrag >= 1) ? (docFrag - 1) : -1; 232 + if (xpathP > 0) { 233 + result.paragraphIndex = static_cast<uint16_t>(xpathP); 234 + result.hasParagraphIndex = true; 235 + } 236 + 237 + if (xpathSpine >= 0 && xpathSpine < spineCount) { 238 + result.spineIndex = xpathSpine; 239 + } else { 240 + for (int i = 0; i < spineCount; i++) { 241 + if (epub->getCumulativeSpineItemSize(i) >= targetBytes) { 242 + result.spineIndex = i; 243 + break; 244 + } 100 245 } 101 246 } 247 + if (result.spineIndex >= spineCount) return result; 248 + 249 + const size_t prevCum = (result.spineIndex > 0) ? epub->getCumulativeSpineItemSize(result.spineIndex - 1) : 0; 250 + const size_t spineSize = epub->getCumulativeSpineItemSize(result.spineIndex) - prevCum; 102 251 103 - LOG_DBG("ProgressMapper", "KOReader -> CrossPoint: %.2f%% at %s -> spine=%d, page=%d", koPos.percentage * 100, 104 - koPos.xpath.c_str(), result.spineIndex, result.pageNumber); 252 + if (result.spineIndex == currentSpineIndex && totalPagesInCurrentSpine > 0) { 253 + result.totalPages = totalPagesInCurrentSpine; 254 + } else if (currentSpineIndex >= 0 && currentSpineIndex < spineCount && totalPagesInCurrentSpine > 0) { 255 + const size_t pc = (currentSpineIndex > 0) ? epub->getCumulativeSpineItemSize(currentSpineIndex - 1) : 0; 256 + const size_t cs = epub->getCumulativeSpineItemSize(currentSpineIndex) - pc; 257 + if (cs > 0) 258 + result.totalPages = std::max( 259 + 1, static_cast<int>(totalPagesInCurrentSpine * static_cast<float>(spineSize) / static_cast<float>(cs))); 260 + } 261 + if (spineSize == 0 || result.totalPages == 0) return result; 262 + 263 + float intra = 0.0f; 264 + if (xpathP > 0) { 265 + ParagraphStreamer s(xpathP, xpathChar); 266 + if (streamSpine(epub, result.spineIndex, s) && s.found()) { 267 + intra = s.progress(); 268 + LOG_DBG("PM", "XPath p[%d]+%d -> %.1f%%", xpathP, xpathChar, intra * 100); 269 + } 270 + } 271 + if (intra <= 0.0f) { 272 + const size_t bytesIn = (targetBytes > prevCum) ? (targetBytes - prevCum) : 0; 273 + intra = std::max(0.0f, std::min(1.0f, static_cast<float>(bytesIn) / static_cast<float>(spineSize))); 274 + } 105 275 276 + result.pageNumber = std::max(0, std::min(static_cast<int>(intra * result.totalPages), result.totalPages - 1)); 277 + LOG_DBG("PM", "<- KO: %.2f%% %s -> spine=%d page=%d/%d", koPos.percentage * 100, koPos.xpath.c_str(), 278 + result.spineIndex, result.pageNumber, result.totalPages); 106 279 return result; 107 280 } 108 281 109 - std::string ProgressMapper::generateXPath(int spineIndex, int pageNumber, int totalPages) { 110 - // Use 0-based DocFragment indices for KOReader 111 - // Use a simple xpath pointing to the DocFragment - KOReader will use the percentage for fine positioning within it 112 - // Avoid specifying paragraph numbers as they may not exist in the target document 113 - return "/body/DocFragment[" + std::to_string(spineIndex) + "]/body"; 282 + std::string ProgressMapper::generateXPath(const std::shared_ptr<Epub>& epub, int spineIndex, float intra) { 283 + const std::string base = "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body"; 284 + if (intra <= 0.0f) return base; 285 + 286 + size_t spineSize = 0; 287 + const auto href = epub->getSpineItem(spineIndex).href; 288 + if (href.empty() || !epub->getItemSize(href, &spineSize) || spineSize == 0) return base; 289 + 290 + ParagraphStreamer s(static_cast<size_t>(spineSize * std::min(intra, 1.0f))); 291 + if (!streamSpine(epub, spineIndex, s)) return base; 292 + 293 + const int p = s.paragraphCount(); 294 + return (p > 0) ? base + "/p[" + std::to_string(p) + "]" : base; 114 295 }
+10 -7
lib/KOReaderSync/ProgressMapper.h
··· 8 8 * CrossPoint position representation. 9 9 */ 10 10 struct CrossPointPosition { 11 - int spineIndex; // Current spine item (chapter) index 12 - int pageNumber; // Current page within the spine item 13 - int totalPages; // Total pages in the current spine item 11 + int spineIndex; // Current spine item (chapter) index 12 + int pageNumber; // Current page within the spine item 13 + int totalPages; // Total pages in the current spine item 14 + uint16_t paragraphIndex = 0; // 1-based synthetic paragraph index from XPath p[N] 15 + bool hasParagraphIndex = false; // True when paragraphIndex was resolved from XPath 14 16 }; 15 17 16 18 /** ··· 59 61 60 62 private: 61 63 /** 62 - * Generate XPath for KOReader compatibility. 63 - * Format: /body/DocFragment[spineIndex+1]/body 64 - * Since CrossPoint doesn't preserve HTML structure, we rely on percentage for positioning. 64 + * Generate a fallback XPath by streaming the spine item's XHTML and resolving 65 + * a paragraph/text position from intra-spine progress. 66 + * Produces a full ancestry path such as 67 + * /body/DocFragment[3]/body/p[42]/text().17. 65 68 */ 66 - static std::string generateXPath(int spineIndex, int pageNumber, int totalPages); 69 + static std::string generateXPath(const std::shared_ptr<Epub>& epub, int spineIndex, float intraSpineProgress); 67 70 };
+40 -7
src/activities/reader/EpubReaderActivity.cpp
··· 10 10 #include <Logging.h> 11 11 #include <esp_system.h> 12 12 13 + #include <limits> 14 + 13 15 #include "CrossPointSettings.h" 14 16 #include "CrossPointState.h" 15 17 #include "EpubReaderChapterSelectionActivity.h" ··· 63 65 if (dataSize == 4 || dataSize == 6) { 64 66 currentSpineIndex = data[0] + (data[1] << 8); 65 67 nextPageNumber = data[2] + (data[3] << 8); 68 + if (nextPageNumber == UINT16_MAX) { 69 + // UINT16_MAX is an in-memory navigation sentinel for "open previous 70 + // chapter on its last page". It should never be treated as persisted 71 + // resume state after sleep or reopen. 72 + LOG_DBG("ERS", "Ignoring stale last-page sentinel from progress cache"); 73 + nextPageNumber = 0; 74 + } 66 75 cachedSpineIndex = currentSpineIndex; 67 76 LOG_DBG("ERS", "Loaded cache: %d, %d", currentSpineIndex, nextPageNumber); 68 77 } ··· 186 195 onGoHome(); 187 196 } else { 188 197 currentSpineIndex = epub->getSpineItemsCount() - 1; 189 - nextPageNumber = UINT16_MAX; 198 + nextPageNumber = 0; 199 + pendingPageJump = std::numeric_limits<uint16_t>::max(); 190 200 requestUpdate(); 191 201 } 192 202 return; ··· 390 400 } 391 401 case EpubReaderMenuActivity::MenuAction::SYNC: { 392 402 if (KOREADER_STORE.hasCredentials()) { 393 - const int currentPage = section ? section->currentPage : 0; 394 - const int totalPages = section ? section->pageCount : 0; 403 + const int currentPage = section ? section->currentPage : nextPageNumber; 404 + const int totalPages = section ? section->pageCount : cachedChapterTotalPageCount; 405 + std::optional<uint16_t> paragraphIndex; 406 + if (section && currentPage >= 0 && currentPage < section->pageCount) { 407 + const uint16_t paragraphPage = 408 + currentPage > 0 ? static_cast<uint16_t>(currentPage - 1) : static_cast<uint16_t>(currentPage); 409 + if (const auto pIdx = section->getParagraphIndexForPage(paragraphPage)) { 410 + paragraphIndex = *pIdx; 411 + } 412 + } 395 413 startActivityForResult( 396 414 std::make_unique<KOReaderSyncActivity>(renderer, mappedInput, epub, epub->getPath(), currentSpineIndex, 397 - currentPage, totalPages), 415 + currentPage, totalPages, paragraphIndex), 398 416 [this](const ActivityResult& result) { 399 417 if (!result.isCancelled) { 400 418 const auto& sync = std::get<SyncResult>(result.data); ··· 402 420 RenderLock lock(*this); 403 421 currentSpineIndex = sync.spineIndex; 404 422 nextPageNumber = sync.page; 423 + cachedChapterTotalPageCount = 0; // Prevent rescaling sync page 424 + pendingPageJump.reset(); 425 + saveProgress(currentSpineIndex, nextPageNumber, 0); 405 426 section.reset(); 406 427 } 407 428 } ··· 484 505 // We don't want to delete the section mid-render, so grab the semaphore 485 506 { 486 507 RenderLock lock(*this); 487 - nextPageNumber = UINT16_MAX; 508 + nextPageNumber = 0; 509 + pendingPageJump = std::numeric_limits<uint16_t>::max(); 488 510 currentSpineIndex--; 489 511 section.reset(); 490 512 } ··· 566 588 LOG_DBG("ERS", "Cache found, skipping build..."); 567 589 } 568 590 569 - if (nextPageNumber == UINT16_MAX) { 570 - section->currentPage = section->pageCount - 1; 591 + if (pendingPageJump.has_value()) { 592 + if (*pendingPageJump >= section->pageCount && section->pageCount > 0) { 593 + section->currentPage = section->pageCount - 1; 594 + } else { 595 + section->currentPage = *pendingPageJump; 596 + } 597 + pendingPageJump.reset(); 571 598 } else { 572 599 section->currentPage = nextPageNumber; 600 + if (section->currentPage < 0) { 601 + section->currentPage = 0; 602 + } else if (section->currentPage >= section->pageCount && section->pageCount > 0) { 603 + LOG_DBG("ERS", "Clamping cached page %d to %d", section->currentPage, section->pageCount - 1); 604 + section->currentPage = section->pageCount - 1; 605 + } 573 606 } 574 607 575 608 if (!pendingAnchor.empty()) {
+3
src/activities/reader/EpubReaderActivity.h
··· 3 3 #include <Epub/FootnoteEntry.h> 4 4 #include <Epub/Section.h> 5 5 6 + #include <optional> 7 + 6 8 #include "EpubReaderMenuActivity.h" 7 9 #include "activities/Activity.h" 8 10 ··· 11 13 std::unique_ptr<Section> section = nullptr; 12 14 int currentSpineIndex = 0; 13 15 int nextPageNumber = 0; 16 + std::optional<uint16_t> pendingPageJump; 14 17 // Set when navigating to a footnote href with a fragment (e.g. #note1). 15 18 // Cleared on the next render after the new section loads and resolves it to a page. 16 19 std::string pendingAnchor;
+27 -2
src/activities/reader/KOReaderSyncActivity.cpp
··· 6 6 #include <WiFi.h> 7 7 #include <esp_sntp.h> 8 8 9 + #include "Epub/Section.h" 9 10 #include "KOReaderCredentialStore.h" 10 11 #include "KOReaderDocumentId.h" 11 12 #include "MappedInputManager.h" ··· 14 15 #include "fontIds.h" 15 16 16 17 namespace { 18 + CrossPointPosition makeLocalPositionWithParagraph(const int spineIndex, const int page, const int totalPages, 19 + const std::optional<uint16_t>& paragraphIndex) { 20 + CrossPointPosition pos = {spineIndex, page, totalPages}; 21 + if (paragraphIndex.has_value()) { 22 + pos.paragraphIndex = *paragraphIndex; 23 + pos.hasParagraphIndex = true; 24 + } 25 + return pos; 26 + } 27 + 17 28 void syncTimeWithNTP() { 18 29 // Stop SNTP if already running (can't reconfigure while running) 19 30 if (esp_sntp_enabled()) { ··· 135 146 KOReaderPosition koPos = {remoteProgress.progress, remoteProgress.percentage}; 136 147 remotePosition = ProgressMapper::toCrossPoint(epub, koPos, currentSpineIndex, totalPagesInSpine); 137 148 149 + // If XPath carried a paragraph index, refine the page using the section cache's 150 + // per-page paragraph LUT instead of anchor matching. 151 + if (remotePosition.hasParagraphIndex) { 152 + Section tempSection(epub, remotePosition.spineIndex, renderer); 153 + const auto paragraphPage = tempSection.getPageForParagraphIndex(remotePosition.paragraphIndex); 154 + if (paragraphPage.has_value()) { 155 + LOG_DBG("KOSync", "Paragraph %u resolved to page %d (was %d)", remotePosition.paragraphIndex, *paragraphPage, 156 + remotePosition.pageNumber); 157 + remotePosition.pageNumber = *paragraphPage; 158 + } 159 + } 160 + 138 161 // Calculate local progress in KOReader format (for display) 139 - CrossPointPosition localPos = {currentSpineIndex, currentPage, totalPagesInSpine}; 162 + CrossPointPosition localPos = 163 + makeLocalPositionWithParagraph(currentSpineIndex, currentPage, totalPagesInSpine, currentParagraphIndex); 140 164 localProgress = ProgressMapper::toKOReader(epub, localPos); 141 165 142 166 { ··· 162 186 requestUpdateAndWait(); 163 187 164 188 // Convert current position to KOReader format 165 - CrossPointPosition localPos = {currentSpineIndex, currentPage, totalPagesInSpine}; 189 + CrossPointPosition localPos = 190 + makeLocalPositionWithParagraph(currentSpineIndex, currentPage, totalPagesInSpine, currentParagraphIndex); 166 191 KOReaderPosition koPos = ProgressMapper::toKOReader(epub, localPos); 167 192 168 193 KOReaderProgress progress;
+5 -1
src/activities/reader/KOReaderSyncActivity.h
··· 3 3 4 4 #include <functional> 5 5 #include <memory> 6 + #include <optional> 6 7 7 8 #include "KOReaderSyncClient.h" 8 9 #include "ProgressMapper.h" ··· 22 23 public: 23 24 explicit KOReaderSyncActivity(GfxRenderer& renderer, MappedInputManager& mappedInput, 24 25 const std::shared_ptr<Epub>& epub, const std::string& epubPath, int currentSpineIndex, 25 - int currentPage, int totalPagesInSpine) 26 + int currentPage, int totalPagesInSpine, 27 + std::optional<uint16_t> currentParagraphIndex = std::nullopt) 26 28 : Activity("KOReaderSync", renderer, mappedInput), 27 29 epub(epub), 28 30 epubPath(epubPath), 29 31 currentSpineIndex(currentSpineIndex), 30 32 currentPage(currentPage), 31 33 totalPagesInSpine(totalPagesInSpine), 34 + currentParagraphIndex(currentParagraphIndex), 32 35 remoteProgress{}, 33 36 remotePosition{}, 34 37 localProgress{} {} ··· 58 61 int currentSpineIndex; 59 62 int currentPage; 60 63 int totalPagesInSpine; 64 + std::optional<uint16_t> currentParagraphIndex; 61 65 62 66 State state = WIFI_SELECTION; 63 67 std::string statusMessage;