A fork of https://github.com/crosspoint-reader/crosspoint-reader
1#include "ProgressMapper.h"
2
3#include <Logging.h>
4
5#include <algorithm>
6#include <cmath>
7#include <cstring>
8
9#include "ChapterXPathResolver.h"
10#include "Epub/htmlEntities.h"
11#include "Utf8.h"
12
13namespace {
14int parseIndex(const std::string& xpath, const char* prefix, bool last = false) {
15 const size_t prefixLen = strlen(prefix);
16 const size_t pos = last ? xpath.rfind(prefix) : xpath.find(prefix);
17 if (pos == std::string::npos) return -1;
18 const size_t numStart = pos + prefixLen;
19 const size_t numEnd = xpath.find(']', numStart);
20 if (numEnd == std::string::npos || numEnd == numStart) return -1;
21 int val = 0;
22 for (size_t i = numStart; i < numEnd; i++) {
23 if (xpath[i] < '0' || xpath[i] > '9') return -1;
24 val = val * 10 + (xpath[i] - '0');
25 }
26 return val;
27}
28
29int parseCharOffset(const std::string& xpath) {
30 const size_t textPos = xpath.rfind("text()");
31 if (textPos == std::string::npos) return 0;
32 const size_t dotPos = xpath.find('.', textPos);
33 if (dotPos == std::string::npos || dotPos + 1 >= xpath.size()) return 0;
34 int val = 0;
35 for (size_t i = dotPos + 1; i < xpath.size(); i++) {
36 if (xpath[i] < '0' || xpath[i] > '9') return 0;
37 val = val * 10 + (xpath[i] - '0');
38 }
39 return val;
40}
41
42class ParagraphStreamer final : public Print {
43 size_t bytesWritten = 0;
44 bool globalInTag = false;
45 bool globalInEntity = false;
46 enum { IDLE, SAW_LT, SAW_LT_P } pState = IDLE;
47 static constexpr size_t MAX_ENTITY_SIZE = 16;
48 char entityBuffer[MAX_ENTITY_SIZE] = {};
49 size_t entityLen = 0;
50
51 // Forward mode: count paragraphs at a byte offset
52 size_t fwdTarget;
53 int fwdResult = 0;
54 bool fwdCaptured = false;
55
56 // Reverse mode: find position of Nth paragraph + char offset
57 int revParagraph;
58 int revChar;
59 int pCount = 0;
60 bool revPFound = false;
61 bool revDone = false;
62 int revVisChars = 0; // Visible chars counted WITHIN target paragraph
63 size_t totalVisChars = 0; // Total visible chars in entire file
64 size_t targetVisChars = 0; // Visible chars from start of file to target position
65
66 void onP() {
67 pCount++;
68 if (!revPFound && revParagraph > 0 && pCount >= revParagraph) {
69 revPFound = true;
70 revVisChars = 0;
71 if (revChar <= 0) {
72 targetVisChars = totalVisChars;
73 revDone = true;
74 }
75 }
76 }
77
78 void onVisibleCodepoint() {
79 totalVisChars++;
80 if (revPFound && !revDone) {
81 revVisChars++;
82 if (revVisChars >= revChar) {
83 targetVisChars = totalVisChars;
84 revDone = true;
85 }
86 }
87 }
88
89 void onVisibleText(const char* text) {
90 if (!text) {
91 return;
92 }
93
94 const unsigned char* ptr = reinterpret_cast<const unsigned char*>(text);
95 while (*ptr != 0) {
96 utf8NextCodepoint(&ptr);
97 onVisibleCodepoint();
98 }
99 }
100
101 void flushEntityAsLiteral() {
102 for (size_t i = 0; i < entityLen; i++) {
103 onVisibleCodepoint();
104 }
105 }
106
107 void finishEntity() {
108 entityBuffer[entityLen] = '\0';
109 const char* resolved = lookupHtmlEntity(entityBuffer, entityLen);
110 if (resolved) {
111 onVisibleText(resolved);
112 } else {
113 flushEntityAsLiteral();
114 }
115 globalInEntity = false;
116 entityLen = 0;
117 }
118
119 public:
120 explicit ParagraphStreamer(size_t targetByte) : fwdTarget(targetByte), revParagraph(0), revChar(0) {}
121 ParagraphStreamer(int paragraph, int charOff) : fwdTarget(SIZE_MAX), revParagraph(paragraph), revChar(charOff) {}
122
123 size_t write(uint8_t c) override {
124 if (!fwdCaptured && bytesWritten >= fwdTarget) {
125 fwdResult = pCount;
126 fwdCaptured = true;
127 }
128 bytesWritten++;
129
130 if (globalInEntity) {
131 if (entityLen + 1 < MAX_ENTITY_SIZE) {
132 entityBuffer[entityLen++] = static_cast<char>(c);
133 } else {
134 flushEntityAsLiteral();
135 globalInEntity = false;
136 entityLen = 0;
137 }
138
139 if (globalInEntity) {
140 if (c == ';') {
141 finishEntity();
142 } else if (c == '<' || c == ' ' || c == '\t' || c == '\n' || c == '\r') {
143 flushEntityAsLiteral();
144 globalInEntity = false;
145 entityLen = 0;
146 }
147 }
148 } else if (c == '<') {
149 globalInTag = true;
150 } else if (c == '>') {
151 globalInTag = false;
152 } else if (!globalInTag) {
153 if (c == '&') {
154 globalInEntity = true;
155 entityBuffer[0] = '&';
156 entityLen = 1;
157 } else {
158 const bool startsCodepoint = (c & 0xC0) != 0x80;
159 if (startsCodepoint) {
160 onVisibleCodepoint();
161 }
162 }
163 }
164
165 // Paragraph detection
166 switch (pState) {
167 case IDLE:
168 if (c == '<') pState = SAW_LT;
169 break;
170 case SAW_LT:
171 pState = (c == 'p' || c == 'P') ? SAW_LT_P : ((c == '<') ? SAW_LT : IDLE);
172 break;
173 case SAW_LT_P:
174 if (c == '>' || c == '/' || c == ' ' || c == '\t' || c == '\n' || c == '\r') onP();
175 pState = (c == '<') ? SAW_LT : IDLE;
176 break;
177 }
178 return 1;
179 }
180
181 size_t write(const uint8_t* buffer, size_t size) override {
182 for (size_t i = 0; i < size; i++) write(buffer[i]);
183 return size;
184 }
185
186 public:
187 int paragraphCount() const { return fwdCaptured ? fwdResult : pCount; }
188 size_t totalBytes() const { return bytesWritten; }
189 bool found() const { return revDone || revPFound; }
190 float progress() const {
191 return totalVisChars > 0 ? static_cast<float>(targetVisChars) / static_cast<float>(totalVisChars) : 0.0f;
192 }
193};
194
195bool streamSpine(const std::shared_ptr<Epub>& epub, int spineIndex, ParagraphStreamer& s) {
196 const auto href = epub->getSpineItem(spineIndex).href;
197 return !href.empty() && epub->readItemContentsToStream(href, s, 1024);
198}
199} // namespace
200
201KOReaderPosition ProgressMapper::toKOReader(const std::shared_ptr<Epub>& epub, const CrossPointPosition& pos) {
202 KOReaderPosition result;
203 float intra = (pos.totalPages > 0) ? static_cast<float>(pos.pageNumber) / static_cast<float>(pos.totalPages) : 0.0f;
204 result.percentage = epub->calculateProgress(pos.spineIndex, intra);
205 if (pos.hasParagraphIndex && pos.paragraphIndex > 0) {
206 result.xpath = ChapterXPathResolver::findXPathForParagraph(epub, pos.spineIndex, pos.paragraphIndex);
207 } else {
208 result.xpath = ChapterXPathResolver::findXPathForProgress(epub, pos.spineIndex, intra);
209 }
210 if (result.xpath.empty()) {
211 result.xpath = generateXPath(epub, pos.spineIndex, intra);
212 }
213 LOG_DBG("PM", "-> KO: spine=%d page=%d/%d %.2f%% %s", pos.spineIndex, pos.pageNumber, pos.totalPages,
214 result.percentage * 100, result.xpath.c_str());
215 return result;
216}
217
218CrossPointPosition ProgressMapper::toCrossPoint(const std::shared_ptr<Epub>& epub, const KOReaderPosition& koPos,
219 int currentSpineIndex, int totalPagesInCurrentSpine) {
220 CrossPointPosition result{};
221 const size_t bookSize = epub->getBookSize();
222 if (bookSize == 0) return result;
223
224 const int spineCount = epub->getSpineItemsCount();
225 const float clampedPercentage = std::max(0.0f, std::min(1.0f, koPos.percentage));
226 const size_t targetBytes = static_cast<size_t>(static_cast<float>(bookSize) * clampedPercentage);
227
228 const int docFrag = parseIndex(koPos.xpath, "/body/DocFragment[");
229 const int xpathP = parseIndex(koPos.xpath, "/p[", true);
230 const int xpathChar = parseCharOffset(koPos.xpath);
231 const int xpathSpine = (docFrag >= 1) ? (docFrag - 1) : -1;
232 if (xpathP > 0) {
233 result.paragraphIndex = static_cast<uint16_t>(xpathP);
234 result.hasParagraphIndex = true;
235 }
236
237 if (xpathSpine >= 0 && xpathSpine < spineCount) {
238 result.spineIndex = xpathSpine;
239 } else {
240 for (int i = 0; i < spineCount; i++) {
241 if (epub->getCumulativeSpineItemSize(i) >= targetBytes) {
242 result.spineIndex = i;
243 break;
244 }
245 }
246 }
247 if (result.spineIndex >= spineCount) return result;
248
249 const size_t prevCum = (result.spineIndex > 0) ? epub->getCumulativeSpineItemSize(result.spineIndex - 1) : 0;
250 const size_t spineSize = epub->getCumulativeSpineItemSize(result.spineIndex) - prevCum;
251
252 if (result.spineIndex == currentSpineIndex && totalPagesInCurrentSpine > 0) {
253 result.totalPages = totalPagesInCurrentSpine;
254 } else if (currentSpineIndex >= 0 && currentSpineIndex < spineCount && totalPagesInCurrentSpine > 0) {
255 const size_t pc = (currentSpineIndex > 0) ? epub->getCumulativeSpineItemSize(currentSpineIndex - 1) : 0;
256 const size_t cs = epub->getCumulativeSpineItemSize(currentSpineIndex) - pc;
257 if (cs > 0)
258 result.totalPages = std::max(
259 1, static_cast<int>(totalPagesInCurrentSpine * static_cast<float>(spineSize) / static_cast<float>(cs)));
260 }
261 if (spineSize == 0 || result.totalPages == 0) return result;
262
263 float intra = 0.0f;
264 if (xpathP > 0) {
265 ParagraphStreamer s(xpathP, xpathChar);
266 if (streamSpine(epub, result.spineIndex, s) && s.found()) {
267 intra = s.progress();
268 LOG_DBG("PM", "XPath p[%d]+%d -> %.1f%%", xpathP, xpathChar, intra * 100);
269 }
270 }
271 if (intra <= 0.0f) {
272 const size_t bytesIn = (targetBytes > prevCum) ? (targetBytes - prevCum) : 0;
273 intra = std::max(0.0f, std::min(1.0f, static_cast<float>(bytesIn) / static_cast<float>(spineSize)));
274 }
275
276 result.pageNumber = std::max(0, std::min(static_cast<int>(intra * result.totalPages), result.totalPages - 1));
277 LOG_DBG("PM", "<- KO: %.2f%% %s -> spine=%d page=%d/%d", koPos.percentage * 100, koPos.xpath.c_str(),
278 result.spineIndex, result.pageNumber, result.totalPages);
279 return result;
280}
281
282std::string ProgressMapper::generateXPath(const std::shared_ptr<Epub>& epub, int spineIndex, float intra) {
283 const std::string base = "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body";
284 if (intra <= 0.0f) return base;
285
286 size_t spineSize = 0;
287 const auto href = epub->getSpineItem(spineIndex).href;
288 if (href.empty() || !epub->getItemSize(href, &spineSize) || spineSize == 0) return base;
289
290 ParagraphStreamer s(static_cast<size_t>(spineSize * std::min(intra, 1.0f)));
291 if (!streamSpine(epub, spineIndex, s)) return base;
292
293 const int p = s.paragraphCount();
294 return (p > 0) ? base + "/p[" + std::to_string(p) + "]" : base;
295}