A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at records-reader 295 lines 10 kB view raw
1#include "ProgressMapper.h" 2 3#include <Logging.h> 4 5#include <algorithm> 6#include <cmath> 7#include <cstring> 8 9#include "ChapterXPathResolver.h" 10#include "Epub/htmlEntities.h" 11#include "Utf8.h" 12 13namespace { 14int parseIndex(const std::string& xpath, const char* prefix, bool last = false) { 15 const size_t prefixLen = strlen(prefix); 16 const size_t pos = last ? xpath.rfind(prefix) : xpath.find(prefix); 17 if (pos == std::string::npos) return -1; 18 const size_t numStart = pos + prefixLen; 19 const size_t numEnd = xpath.find(']', numStart); 20 if (numEnd == std::string::npos || numEnd == numStart) return -1; 21 int val = 0; 22 for (size_t i = numStart; i < numEnd; i++) { 23 if (xpath[i] < '0' || xpath[i] > '9') return -1; 24 val = val * 10 + (xpath[i] - '0'); 25 } 26 return val; 27} 28 29int parseCharOffset(const std::string& xpath) { 30 const size_t textPos = xpath.rfind("text()"); 31 if (textPos == std::string::npos) return 0; 32 const size_t dotPos = xpath.find('.', textPos); 33 if (dotPos == std::string::npos || dotPos + 1 >= xpath.size()) return 0; 34 int val = 0; 35 for (size_t i = dotPos + 1; i < xpath.size(); i++) { 36 if (xpath[i] < '0' || xpath[i] > '9') return 0; 37 val = val * 10 + (xpath[i] - '0'); 38 } 39 return val; 40} 41 42class ParagraphStreamer final : public Print { 43 size_t bytesWritten = 0; 44 bool globalInTag = false; 45 bool globalInEntity = false; 46 enum { IDLE, SAW_LT, SAW_LT_P } pState = IDLE; 47 static constexpr size_t MAX_ENTITY_SIZE = 16; 48 char entityBuffer[MAX_ENTITY_SIZE] = {}; 49 size_t entityLen = 0; 50 51 // Forward mode: count paragraphs at a byte offset 52 size_t fwdTarget; 53 int fwdResult = 0; 54 bool fwdCaptured = false; 55 56 // Reverse mode: find position of Nth paragraph + char offset 57 int revParagraph; 58 int revChar; 59 int pCount = 0; 60 bool revPFound = false; 61 bool revDone = false; 62 int revVisChars = 0; // Visible chars counted WITHIN target paragraph 63 size_t totalVisChars = 0; // Total visible chars in entire file 64 size_t targetVisChars = 0; // Visible chars from start of file to target position 65 66 void onP() { 67 pCount++; 68 if (!revPFound && revParagraph > 0 && pCount >= revParagraph) { 69 revPFound = true; 70 revVisChars = 0; 71 if (revChar <= 0) { 72 targetVisChars = totalVisChars; 73 revDone = true; 74 } 75 } 76 } 77 78 void onVisibleCodepoint() { 79 totalVisChars++; 80 if (revPFound && !revDone) { 81 revVisChars++; 82 if (revVisChars >= revChar) { 83 targetVisChars = totalVisChars; 84 revDone = true; 85 } 86 } 87 } 88 89 void onVisibleText(const char* text) { 90 if (!text) { 91 return; 92 } 93 94 const unsigned char* ptr = reinterpret_cast<const unsigned char*>(text); 95 while (*ptr != 0) { 96 utf8NextCodepoint(&ptr); 97 onVisibleCodepoint(); 98 } 99 } 100 101 void flushEntityAsLiteral() { 102 for (size_t i = 0; i < entityLen; i++) { 103 onVisibleCodepoint(); 104 } 105 } 106 107 void finishEntity() { 108 entityBuffer[entityLen] = '\0'; 109 const char* resolved = lookupHtmlEntity(entityBuffer, entityLen); 110 if (resolved) { 111 onVisibleText(resolved); 112 } else { 113 flushEntityAsLiteral(); 114 } 115 globalInEntity = false; 116 entityLen = 0; 117 } 118 119 public: 120 explicit ParagraphStreamer(size_t targetByte) : fwdTarget(targetByte), revParagraph(0), revChar(0) {} 121 ParagraphStreamer(int paragraph, int charOff) : fwdTarget(SIZE_MAX), revParagraph(paragraph), revChar(charOff) {} 122 123 size_t write(uint8_t c) override { 124 if (!fwdCaptured && bytesWritten >= fwdTarget) { 125 fwdResult = pCount; 126 fwdCaptured = true; 127 } 128 bytesWritten++; 129 130 if (globalInEntity) { 131 if (entityLen + 1 < MAX_ENTITY_SIZE) { 132 entityBuffer[entityLen++] = static_cast<char>(c); 133 } else { 134 flushEntityAsLiteral(); 135 globalInEntity = false; 136 entityLen = 0; 137 } 138 139 if (globalInEntity) { 140 if (c == ';') { 141 finishEntity(); 142 } else if (c == '<' || c == ' ' || c == '\t' || c == '\n' || c == '\r') { 143 flushEntityAsLiteral(); 144 globalInEntity = false; 145 entityLen = 0; 146 } 147 } 148 } else if (c == '<') { 149 globalInTag = true; 150 } else if (c == '>') { 151 globalInTag = false; 152 } else if (!globalInTag) { 153 if (c == '&') { 154 globalInEntity = true; 155 entityBuffer[0] = '&'; 156 entityLen = 1; 157 } else { 158 const bool startsCodepoint = (c & 0xC0) != 0x80; 159 if (startsCodepoint) { 160 onVisibleCodepoint(); 161 } 162 } 163 } 164 165 // Paragraph detection 166 switch (pState) { 167 case IDLE: 168 if (c == '<') pState = SAW_LT; 169 break; 170 case SAW_LT: 171 pState = (c == 'p' || c == 'P') ? SAW_LT_P : ((c == '<') ? SAW_LT : IDLE); 172 break; 173 case SAW_LT_P: 174 if (c == '>' || c == '/' || c == ' ' || c == '\t' || c == '\n' || c == '\r') onP(); 175 pState = (c == '<') ? SAW_LT : IDLE; 176 break; 177 } 178 return 1; 179 } 180 181 size_t write(const uint8_t* buffer, size_t size) override { 182 for (size_t i = 0; i < size; i++) write(buffer[i]); 183 return size; 184 } 185 186 public: 187 int paragraphCount() const { return fwdCaptured ? fwdResult : pCount; } 188 size_t totalBytes() const { return bytesWritten; } 189 bool found() const { return revDone || revPFound; } 190 float progress() const { 191 return totalVisChars > 0 ? static_cast<float>(targetVisChars) / static_cast<float>(totalVisChars) : 0.0f; 192 } 193}; 194 195bool streamSpine(const std::shared_ptr<Epub>& epub, int spineIndex, ParagraphStreamer& s) { 196 const auto href = epub->getSpineItem(spineIndex).href; 197 return !href.empty() && epub->readItemContentsToStream(href, s, 1024); 198} 199} // namespace 200 201KOReaderPosition ProgressMapper::toKOReader(const std::shared_ptr<Epub>& epub, const CrossPointPosition& pos) { 202 KOReaderPosition result; 203 float intra = (pos.totalPages > 0) ? static_cast<float>(pos.pageNumber) / static_cast<float>(pos.totalPages) : 0.0f; 204 result.percentage = epub->calculateProgress(pos.spineIndex, intra); 205 if (pos.hasParagraphIndex && pos.paragraphIndex > 0) { 206 result.xpath = ChapterXPathResolver::findXPathForParagraph(epub, pos.spineIndex, pos.paragraphIndex); 207 } else { 208 result.xpath = ChapterXPathResolver::findXPathForProgress(epub, pos.spineIndex, intra); 209 } 210 if (result.xpath.empty()) { 211 result.xpath = generateXPath(epub, pos.spineIndex, intra); 212 } 213 LOG_DBG("PM", "-> KO: spine=%d page=%d/%d %.2f%% %s", pos.spineIndex, pos.pageNumber, pos.totalPages, 214 result.percentage * 100, result.xpath.c_str()); 215 return result; 216} 217 218CrossPointPosition ProgressMapper::toCrossPoint(const std::shared_ptr<Epub>& epub, const KOReaderPosition& koPos, 219 int currentSpineIndex, int totalPagesInCurrentSpine) { 220 CrossPointPosition result{}; 221 const size_t bookSize = epub->getBookSize(); 222 if (bookSize == 0) return result; 223 224 const int spineCount = epub->getSpineItemsCount(); 225 const float clampedPercentage = std::max(0.0f, std::min(1.0f, koPos.percentage)); 226 const size_t targetBytes = static_cast<size_t>(static_cast<float>(bookSize) * clampedPercentage); 227 228 const int docFrag = parseIndex(koPos.xpath, "/body/DocFragment["); 229 const int xpathP = parseIndex(koPos.xpath, "/p[", true); 230 const int xpathChar = parseCharOffset(koPos.xpath); 231 const int xpathSpine = (docFrag >= 1) ? (docFrag - 1) : -1; 232 if (xpathP > 0) { 233 result.paragraphIndex = static_cast<uint16_t>(xpathP); 234 result.hasParagraphIndex = true; 235 } 236 237 if (xpathSpine >= 0 && xpathSpine < spineCount) { 238 result.spineIndex = xpathSpine; 239 } else { 240 for (int i = 0; i < spineCount; i++) { 241 if (epub->getCumulativeSpineItemSize(i) >= targetBytes) { 242 result.spineIndex = i; 243 break; 244 } 245 } 246 } 247 if (result.spineIndex >= spineCount) return result; 248 249 const size_t prevCum = (result.spineIndex > 0) ? epub->getCumulativeSpineItemSize(result.spineIndex - 1) : 0; 250 const size_t spineSize = epub->getCumulativeSpineItemSize(result.spineIndex) - prevCum; 251 252 if (result.spineIndex == currentSpineIndex && totalPagesInCurrentSpine > 0) { 253 result.totalPages = totalPagesInCurrentSpine; 254 } else if (currentSpineIndex >= 0 && currentSpineIndex < spineCount && totalPagesInCurrentSpine > 0) { 255 const size_t pc = (currentSpineIndex > 0) ? epub->getCumulativeSpineItemSize(currentSpineIndex - 1) : 0; 256 const size_t cs = epub->getCumulativeSpineItemSize(currentSpineIndex) - pc; 257 if (cs > 0) 258 result.totalPages = std::max( 259 1, static_cast<int>(totalPagesInCurrentSpine * static_cast<float>(spineSize) / static_cast<float>(cs))); 260 } 261 if (spineSize == 0 || result.totalPages == 0) return result; 262 263 float intra = 0.0f; 264 if (xpathP > 0) { 265 ParagraphStreamer s(xpathP, xpathChar); 266 if (streamSpine(epub, result.spineIndex, s) && s.found()) { 267 intra = s.progress(); 268 LOG_DBG("PM", "XPath p[%d]+%d -> %.1f%%", xpathP, xpathChar, intra * 100); 269 } 270 } 271 if (intra <= 0.0f) { 272 const size_t bytesIn = (targetBytes > prevCum) ? (targetBytes - prevCum) : 0; 273 intra = std::max(0.0f, std::min(1.0f, static_cast<float>(bytesIn) / static_cast<float>(spineSize))); 274 } 275 276 result.pageNumber = std::max(0, std::min(static_cast<int>(intra * result.totalPages), result.totalPages - 1)); 277 LOG_DBG("PM", "<- KO: %.2f%% %s -> spine=%d page=%d/%d", koPos.percentage * 100, koPos.xpath.c_str(), 278 result.spineIndex, result.pageNumber, result.totalPages); 279 return result; 280} 281 282std::string ProgressMapper::generateXPath(const std::shared_ptr<Epub>& epub, int spineIndex, float intra) { 283 const std::string base = "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body"; 284 if (intra <= 0.0f) return base; 285 286 size_t spineSize = 0; 287 const auto href = epub->getSpineItem(spineIndex).href; 288 if (href.empty() || !epub->getItemSize(href, &spineSize) || spineSize == 0) return base; 289 290 ParagraphStreamer s(static_cast<size_t>(spineSize * std::min(intra, 1.0f))); 291 if (!streamSpine(epub, spineIndex, s)) return base; 292 293 const int p = s.paragraphCount(); 294 return (p > 0) ? base + "/p[" + std::to_string(p) + "]" : base; 295}