fix: Fix hyphenation and rendering of decomposed characters (#1037)

+29 -5

lib/EpdFont/EpdFont.cpp

··· 17 17 18 18 int cursorX = startX; 19 19 const int cursorY = startY; 20 + int lastBaseX = startX; 21 + int lastBaseAdvance = 0; 22 + int lastBaseTop = 0; 23 + bool hasBaseGlyph = false; 24 + constexpr int MIN_COMBINING_GAP_PX = 1; 20 25 uint32_t cp; 21 26 while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&string)))) { 22 27 const EpdGlyph* glyph = getGlyph(cp); ··· 30 35 continue; 31 36 } 32 37 33 - *minX = std::min(*minX, cursorX + glyph->left); 34 - *maxX = std::max(*maxX, cursorX + glyph->left + glyph->width); 35 - *minY = std::min(*minY, cursorY + glyph->top - glyph->height); 36 - *maxY = std::max(*maxY, cursorY + glyph->top); 37 - cursorX += glyph->advanceX; 38 + const bool isCombining = utf8IsCombiningMark(cp); 39 + int raiseBy = 0; 40 + if (isCombining && hasBaseGlyph) { 41 + const int currentGap = glyph->top - glyph->height - lastBaseTop; 42 + if (currentGap < MIN_COMBINING_GAP_PX) { 43 + raiseBy = MIN_COMBINING_GAP_PX - currentGap; 44 + } 45 + } 46 + 47 + const int glyphBaseX = (isCombining && hasBaseGlyph) ? (lastBaseX + lastBaseAdvance / 2) : cursorX; 48 + const int glyphBaseY = cursorY - raiseBy; 49 + 50 + *minX = std::min(*minX, glyphBaseX + glyph->left); 51 + *maxX = std::max(*maxX, glyphBaseX + glyph->left + glyph->width); 52 + *minY = std::min(*minY, glyphBaseY + glyph->top - glyph->height); 53 + *maxY = std::max(*maxY, glyphBaseY + glyph->top); 54 + 55 + if (!isCombining) { 56 + lastBaseX = cursorX; 57 + lastBaseAdvance = glyph->advanceX; 58 + lastBaseTop = glyph->top; 59 + hasBaseGlyph = true; 60 + cursorX += glyph->advanceX; 61 + } 38 62 } 39 63 } 40 64

+24 -9

lib/Epub/Epub/ParsedText.cpp

··· 378 378 words.insert(insertWordIt, remainder); 379 379 wordStyles.insert(insertStyleIt, style); 380 380 381 - // The remainder inherits whatever continuation status the original word had with the word after it. 382 - // Find the continues entry for the original word and insert the remainder's entry after it. 381 + // Continuation flag handling after splitting a word into prefix + remainder. 382 + // 383 + // The prefix keeps the original word's continuation flag so that no-break-space groups 384 + // stay linked. The remainder always gets continues=false because it starts on the next 385 + // line and is not attached to the prefix. 386 + // 387 + // Example: "200 Quadratkilometer" produces tokens: 388 + // [0] "200" continues=false 389 + // [1] " " continues=true 390 + // [2] "Quadratkilometer" continues=true <-- the word being split 391 + // 392 + // After splitting "Quadratkilometer" at "Quadrat-" / "kilometer": 393 + // [0] "200" continues=false 394 + // [1] " " continues=true 395 + // [2] "Quadrat-" continues=true (KEPT — still attached to the no-break group) 396 + // [3] "kilometer" continues=false (NEW — starts fresh on the next line) 397 + // 398 + // This lets the backtracking loop keep the entire prefix group ("200 Quadrat-") on one 399 + // line, while "kilometer" moves to the next line. 383 400 auto continuesIt = wordContinues.begin(); 384 401 std::advance(continuesIt, wordIndex); 385 - const bool originalContinuedToNext = *continuesIt; 386 - // The original word (now prefix) does NOT continue to remainder (hyphen separates them) 387 - *continuesIt = false; 402 + // *continuesIt is intentionally left unchanged — the prefix keeps its original attachment. 388 403 const auto insertContinuesIt = std::next(continuesIt); 389 - wordContinues.insert(insertContinuesIt, originalContinuedToNext); 404 + wordContinues.insert(insertContinuesIt, false); 390 405 391 - // Keep the indexed vector in sync if provided 406 + // Keep the indexed vector in sync if provided. 392 407 if (continuesVec) { 393 - (*continuesVec)[wordIndex] = false; 394 - continuesVec->insert(continuesVec->begin() + wordIndex + 1, originalContinuedToNext); 408 + // (*continuesVec)[wordIndex] stays unchanged — prefix keeps its attachment. 409 + continuesVec->insert(continuesVec->begin() + wordIndex + 1, false); 395 410 } 396 411 397 412 // Update cached widths to reflect the new prefix/remainder pairing.

+207

lib/Epub/Epub/hyphenation/HyphenationCommon.cpp

··· 174 174 while (*ptr != 0) { 175 175 const unsigned char* current = ptr; 176 176 const uint32_t cp = utf8NextCodepoint(&ptr); 177 + // If this is a combining diacritic (e.g., U+0301 = acute) and there's 178 + // a previous base character that can be composed into a single 179 + // precomposed Unicode scalar (Latin-1 / Latin-Extended), do that 180 + // composition here. This provides lightweight NFC-like behavior for 181 + // common Western European diacritics (acute, grave, circumflex, tilde, 182 + // diaeresis, cedilla) without pulling in a full Unicode normalization 183 + // library. 184 + if (!cps.empty()) { 185 + uint32_t prev = cps.back().value; 186 + uint32_t composed = 0; 187 + switch (cp) { 188 + case 0x0300: // grave 189 + switch (prev) { 190 + case 0x0041: 191 + composed = 0x00C0; 192 + break; // A -> À 193 + case 0x0061: 194 + composed = 0x00E0; 195 + break; // a -> à 196 + case 0x0045: 197 + composed = 0x00C8; 198 + break; // E -> È 199 + case 0x0065: 200 + composed = 0x00E8; 201 + break; // e -> è 202 + case 0x0049: 203 + composed = 0x00CC; 204 + break; // I -> Ì 205 + case 0x0069: 206 + composed = 0x00EC; 207 + break; // i -> ì 208 + case 0x004F: 209 + composed = 0x00D2; 210 + break; // O -> Ò 211 + case 0x006F: 212 + composed = 0x00F2; 213 + break; // o -> ò 214 + case 0x0055: 215 + composed = 0x00D9; 216 + break; // U -> Ù 217 + case 0x0075: 218 + composed = 0x00F9; 219 + break; // u -> ù 220 + default: 221 + break; 222 + } 223 + break; 224 + case 0x0301: // acute 225 + switch (prev) { 226 + case 0x0041: 227 + composed = 0x00C1; 228 + break; // A -> Á 229 + case 0x0061: 230 + composed = 0x00E1; 231 + break; // a -> á 232 + case 0x0045: 233 + composed = 0x00C9; 234 + break; // E -> É 235 + case 0x0065: 236 + composed = 0x00E9; 237 + break; // e -> é 238 + case 0x0049: 239 + composed = 0x00CD; 240 + break; // I -> Í 241 + case 0x0069: 242 + composed = 0x00ED; 243 + break; // i -> í 244 + case 0x004F: 245 + composed = 0x00D3; 246 + break; // O -> Ó 247 + case 0x006F: 248 + composed = 0x00F3; 249 + break; // o -> ó 250 + case 0x0055: 251 + composed = 0x00DA; 252 + break; // U -> Ú 253 + case 0x0075: 254 + composed = 0x00FA; 255 + break; // u -> ú 256 + case 0x0059: 257 + composed = 0x00DD; 258 + break; // Y -> Ý 259 + case 0x0079: 260 + composed = 0x00FD; 261 + break; // y -> ý 262 + default: 263 + break; 264 + } 265 + break; 266 + case 0x0302: // circumflex 267 + switch (prev) { 268 + case 0x0041: 269 + composed = 0x00C2; 270 + break; // A -> Â 271 + case 0x0061: 272 + composed = 0x00E2; 273 + break; // a -> â 274 + case 0x0045: 275 + composed = 0x00CA; 276 + break; // E -> Ê 277 + case 0x0065: 278 + composed = 0x00EA; 279 + break; // e -> ê 280 + case 0x0049: 281 + composed = 0x00CE; 282 + break; // I -> Î 283 + case 0x0069: 284 + composed = 0x00EE; 285 + break; // i -> î 286 + case 0x004F: 287 + composed = 0x00D4; 288 + break; // O -> Ô 289 + case 0x006F: 290 + composed = 0x00F4; 291 + break; // o -> ô 292 + case 0x0055: 293 + composed = 0x00DB; 294 + break; // U -> Û 295 + case 0x0075: 296 + composed = 0x00FB; 297 + break; // u -> û 298 + default: 299 + break; 300 + } 301 + break; 302 + case 0x0303: // tilde 303 + switch (prev) { 304 + case 0x0041: 305 + composed = 0x00C3; 306 + break; // A -> Ã 307 + case 0x0061: 308 + composed = 0x00E3; 309 + break; // a -> ã 310 + case 0x004E: 311 + composed = 0x00D1; 312 + break; // N -> Ñ 313 + case 0x006E: 314 + composed = 0x00F1; 315 + break; // n -> ñ 316 + default: 317 + break; 318 + } 319 + break; 320 + case 0x0308: // diaeresis/umlaut 321 + switch (prev) { 322 + case 0x0041: 323 + composed = 0x00C4; 324 + break; // A -> Ä 325 + case 0x0061: 326 + composed = 0x00E4; 327 + break; // a -> ä 328 + case 0x0045: 329 + composed = 0x00CB; 330 + break; // E -> Ë 331 + case 0x0065: 332 + composed = 0x00EB; 333 + break; // e -> ë 334 + case 0x0049: 335 + composed = 0x00CF; 336 + break; // I -> Ï 337 + case 0x0069: 338 + composed = 0x00EF; 339 + break; // i -> ï 340 + case 0x004F: 341 + composed = 0x00D6; 342 + break; // O -> Ö 343 + case 0x006F: 344 + composed = 0x00F6; 345 + break; // o -> ö 346 + case 0x0055: 347 + composed = 0x00DC; 348 + break; // U -> Ü 349 + case 0x0075: 350 + composed = 0x00FC; 351 + break; // u -> ü 352 + case 0x0059: 353 + composed = 0x0178; 354 + break; // Y -> Ÿ 355 + case 0x0079: 356 + composed = 0x00FF; 357 + break; // y -> ÿ 358 + default: 359 + break; 360 + } 361 + break; 362 + case 0x0327: // cedilla 363 + switch (prev) { 364 + case 0x0043: 365 + composed = 0x00C7; 366 + break; // C -> Ç 367 + case 0x0063: 368 + composed = 0x00E7; 369 + break; // c -> ç 370 + default: 371 + break; 372 + } 373 + break; 374 + default: 375 + break; 376 + } 377 + 378 + if (composed != 0) { 379 + cps.back().value = composed; 380 + continue; // skip pushing the combining mark itself 381 + } 382 + } 383 + 177 384 cps.push_back({cp, static_cast<size_t>(current - base)}); 178 385 } 179 386

+49 -1

lib/Epub/Epub/hyphenation/Hyphenator.cpp

··· 1 1 #include "Hyphenator.h" 2 2 3 + #include <algorithm> 3 4 #include <vector> 4 5 5 6 #include "HyphenationCommon.h" 7 + #include "LanguageHyphenator.h" 6 8 #include "LanguageRegistry.h" 7 9 8 10 const LanguageHyphenator* Hyphenator::cachedHyphenator_ = nullptr; ··· 32 34 } 33 35 34 36 // Builds a vector of break information from explicit hyphen markers in the given codepoints. 37 + // Only hyphens that appear between two alphabetic characters are considered valid breaks. 38 + // 39 + // Example: "US-Satellitensystems" (cps: U, S, -, S, a, t, ...) 40 + // -> finds '-' at index 2 with alphabetic neighbors 'S' and 'S' 41 + // -> returns one BreakInfo at the byte offset of 'S' (the char after '-'), 42 + // with requiresInsertedHyphen=false because '-' is already visible. 43 + // 44 + // Example: "Satel\u00ADliten" (soft-hyphen between 'l' and 'l') 45 + // -> returns one BreakInfo with requiresInsertedHyphen=true (soft-hyphen 46 + // is invisible and needs a visible '-' when the break is used). 35 47 std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) { 36 48 std::vector<Hyphenator::BreakInfo> breaks; 37 49 38 - // Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters. 39 50 for (size_t i = 1; i + 1 < cps.size(); ++i) { 40 51 const uint32_t cp = cps[i].value; 41 52 if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) { ··· 63 74 // Explicit hyphen markers (soft or hard) take precedence over language breaks. 64 75 auto explicitBreakInfos = buildExplicitBreakInfos(cps); 65 76 if (!explicitBreakInfos.empty()) { 77 + // When a word contains explicit hyphens we also run Liang patterns on each alphabetic 78 + // segment between them. Without this, "US-Satellitensystems" would only offer one split 79 + // point (after "US-"), making it impossible to break mid-"Satellitensystems" even when 80 + // "US-Satelliten-" would fit on the line. 81 + // 82 + // Example: "US-Satellitensystems" 83 + // Segments: ["US", "Satellitensystems"] 84 + // Explicit break: after "US-" -> @3 (no inserted hyphen) 85 + // Pattern breaks on "Satellitensystems" -> @5 Sa|tel (+hyphen) 86 + // @8 Satel|li (+hyphen) 87 + // @10 Satelli|ten (+hyphen) 88 + // @13 Satelliten|sys (+hyphen) 89 + // @16 Satellitensys|tems (+hyphen) 90 + // Result: 6 sorted break points; the line-breaker picks the widest prefix that fits. 91 + if (hyphenator) { 92 + size_t segStart = 0; 93 + for (size_t i = 0; i <= cps.size(); ++i) { 94 + const bool atEnd = (i == cps.size()); 95 + const bool atHyphen = !atEnd && isExplicitHyphen(cps[i].value); 96 + if (atEnd || atHyphen) { 97 + if (i > segStart) { 98 + std::vector<CodepointInfo> segment(cps.begin() + segStart, cps.begin() + i); 99 + auto segIndexes = hyphenator->breakIndexes(segment); 100 + for (const size_t idx : segIndexes) { 101 + const size_t cpIdx = segStart + idx; 102 + if (cpIdx < cps.size()) { 103 + explicitBreakInfos.push_back({cps[cpIdx].byteOffset, true}); 104 + } 105 + } 106 + } 107 + segStart = i + 1; 108 + } 109 + } 110 + // Merge explicit and pattern breaks into ascending byte-offset order. 111 + std::sort(explicitBreakInfos.begin(), explicitBreakInfos.end(), 112 + [](const BreakInfo& a, const BreakInfo& b) { return a.byteOffset < b.byteOffset; }); 113 + } 66 114 return explicitBreakInfos; 67 115 } 68 116

+17 -4

lib/Epub/Epub/hyphenation/Hyphenator.h

··· 9 9 class Hyphenator { 10 10 public: 11 11 struct BreakInfo { 12 - size_t byteOffset; 13 - bool requiresInsertedHyphen; 12 + size_t byteOffset; // Byte position inside the UTF-8 word where a break may occur. 13 + bool requiresInsertedHyphen; // true = a visible '-' must be rendered at the break (pattern/fallback breaks). 14 + // false = the word already contains a hyphen at this position (explicit '-'). 14 15 }; 15 - // Returns byte offsets where the word may be hyphenated. When includeFallback is true, all positions obeying the 16 - // minimum prefix/suffix constraints are returned even if no language-specific rule matches. 16 + 17 + // Returns byte offsets where the word may be hyphenated. 18 + // 19 + // Break sources (in priority order): 20 + // 1. Explicit hyphens already present in the word (e.g. '-' or soft-hyphen U+00AD). 21 + // When found, language patterns are additionally run on each alphabetic segment 22 + // between hyphens so compound words can break within their parts. 23 + // Example: "US-Satellitensystems" yields breaks after "US-" (no inserted hyphen) 24 + // plus pattern breaks inside "Satellitensystems" (Sa|tel|li|ten|sys|tems). 25 + // 2. Language-specific Liang patterns (e.g. German de_patterns). 26 + // Example: "Quadratkilometer" -> Qua|drat|ki|lo|me|ter. 27 + // 3. Fallback every-N-chars splitting (only when includeFallback is true AND no 28 + // pattern breaks were found). Used as a last resort to prevent a single oversized 29 + // word from overflowing the page width. 17 30 static std::vector<BreakInfo> breakOffsets(const std::string& word, bool includeFallback); 18 31 19 32 // Provide a publication-level language hint (e.g. "en", "en-US", "ru") used to select hyphenation rules.

+38 -6

lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp

··· 594 594 continue; 595 595 } 596 596 597 - // Detect U+00A0 (non-breaking space): UTF-8 encoding is 0xC2 0xA0 598 - // Render a visible space without allowing a line break around it. 597 + // Detect U+00A0 (non-breaking space, UTF-8: 0xC2 0xA0) or 598 + // U+202F (narrow no-break space, UTF-8: 0xE2 0x80 0xAF). 599 + // 600 + // Both are rendered as a visible space but must never allow a line break around them. 601 + // We split the no-break space into its own word token and link the surrounding words 602 + // with continuation flags so the layout engine treats them as an indivisible group. 603 + // 604 + // Example: "200 Quadratkilometer" or "200 Quadratkilometer" 605 + // Input bytes: "200\xC2\xA0Quadratkilometer" (or 0xE2 0x80 0xAF for U+202F) 606 + // Tokens produced: 607 + // [0] "200" continues=false 608 + // [1] " " continues=true (attaches to "200", no gap) 609 + // [2] "Quadratkilometer" continues=true (attaches to " ", no gap) 610 + // 611 + // The continuation flags prevent the line-breaker from inserting a line break 612 + // between "200" and "Quadratkilometer". However, "Quadratkilometer" is now a 613 + // standalone word for hyphenation purposes, so Liang patterns can produce 614 + // "200 Quadrat-" / "kilometer" instead of the unusable "200" / "Quadratkilometer". 599 615 if (static_cast<uint8_t>(s[i]) == 0xC2 && i + 1 < len && static_cast<uint8_t>(s[i + 1]) == 0xA0) { 600 - // Flush any pending text so style is applied correctly. 601 616 if (self->partWordBufferIndex > 0) { 602 617 self->flushPartWordBuffer(); 603 618 } 604 619 605 - // Add a standalone space that attaches to the previous word. 606 620 self->partWordBuffer[0] = ' '; 607 621 self->partWordBuffer[1] = '\0'; 608 622 self->partWordBufferIndex = 1; 609 623 self->nextWordContinues = true; // Attach space to previous word (no break). 610 624 self->flushPartWordBuffer(); 611 625 612 - // Ensure the next real word attaches to this space (no break). 613 - self->nextWordContinues = true; 626 + self->nextWordContinues = true; // Next real word attaches to this space (no break). 614 627 615 628 i++; // Skip the second byte (0xA0) 629 + continue; 630 + } 631 + 632 + // U+202F (narrow no-break space) — identical logic to U+00A0 above. 633 + if (static_cast<uint8_t>(s[i]) == 0xE2 && i + 2 < len && static_cast<uint8_t>(s[i + 1]) == 0x80 && 634 + static_cast<uint8_t>(s[i + 2]) == 0xAF) { 635 + if (self->partWordBufferIndex > 0) { 636 + self->flushPartWordBuffer(); 637 + } 638 + 639 + self->partWordBuffer[0] = ' '; 640 + self->partWordBuffer[1] = '\0'; 641 + self->partWordBufferIndex = 1; 642 + self->nextWordContinues = true; 643 + self->flushPartWordBuffer(); 644 + 645 + self->nextWordContinues = true; 646 + 647 + i += 2; // Skip the remaining two bytes (0x80 0xAF) 616 648 continue; 617 649 } 618 650

+87 -4

lib/GfxRenderer/GfxRenderer.cpp

··· 157 157 } 158 158 } 159 159 160 - if constexpr (rotation == TextRotation::Rotated90CW) { 161 - *cursorY -= glyph->advanceX; 162 - } else { 163 - *cursorX += glyph->advanceX; 160 + if (!utf8IsCombiningMark(cp)) { 161 + if constexpr (rotation == TextRotation::Rotated90CW) { 162 + *cursorY -= glyph->advanceX; 163 + } else { 164 + *cursorX += glyph->advanceX; 165 + } 164 166 } 165 167 } 166 168 ··· 212 214 const EpdFontFamily::Style style) const { 213 215 int yPos = y + getFontAscenderSize(fontId); 214 216 int xpos = x; 217 + int lastBaseX = x; 218 + int lastBaseY = yPos; 219 + int lastBaseAdvance = 0; 220 + int lastBaseTop = 0; 221 + bool hasBaseGlyph = false; 215 222 216 223 // cannot draw a NULL / empty string 217 224 if (text == nullptr || *text == '\0') { ··· 224 231 return; 225 232 } 226 233 const auto& font = fontIt->second; 234 + constexpr int MIN_COMBINING_GAP_PX = 1; 227 235 228 236 uint32_t cp; 229 237 while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&text)))) { 238 + if (utf8IsCombiningMark(cp) && hasBaseGlyph) { 239 + const EpdGlyph* combiningGlyph = font.getGlyph(cp, style); 240 + if (!combiningGlyph) { 241 + combiningGlyph = font.getGlyph(REPLACEMENT_GLYPH, style); 242 + } 243 + 244 + int raiseBy = 0; 245 + if (combiningGlyph) { 246 + const int currentGap = combiningGlyph->top - combiningGlyph->height - lastBaseTop; 247 + if (currentGap < MIN_COMBINING_GAP_PX) { 248 + raiseBy = MIN_COMBINING_GAP_PX - currentGap; 249 + } 250 + } 251 + 252 + int combiningX = lastBaseX + lastBaseAdvance / 2; 253 + int combiningY = lastBaseY - raiseBy; 254 + renderChar(font, cp, &combiningX, &combiningY, black, style); 255 + continue; 256 + } 257 + 258 + const EpdGlyph* glyph = font.getGlyph(cp, style); 259 + if (!glyph) { 260 + glyph = font.getGlyph(REPLACEMENT_GLYPH, style); 261 + } 262 + 263 + if (!utf8IsCombiningMark(cp)) { 264 + lastBaseX = xpos; 265 + lastBaseY = yPos; 266 + lastBaseAdvance = glyph ? glyph->advanceX : 0; 267 + lastBaseTop = glyph ? glyph->top : 0; 268 + hasBaseGlyph = true; 269 + } 270 + 230 271 renderChar(font, cp, &xpos, &yPos, black, style); 231 272 } 232 273 } ··· 864 905 int width = 0; 865 906 const auto& font = fontIt->second; 866 907 while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&text)))) { 908 + if (utf8IsCombiningMark(cp)) { 909 + continue; 910 + } 867 911 const EpdGlyph* glyph = font.getGlyph(cp, style); 868 912 if (!glyph) glyph = font.getGlyph(REPLACEMENT_GLYPH, style); 869 913 if (glyph) width += glyph->advanceX; ··· 917 961 918 962 int xPos = x; 919 963 int yPos = y; 964 + int lastBaseX = x; 965 + int lastBaseY = y; 966 + int lastBaseAdvance = 0; 967 + int lastBaseTop = 0; 968 + bool hasBaseGlyph = false; 969 + constexpr int MIN_COMBINING_GAP_PX = 1; 920 970 921 971 uint32_t cp; 922 972 while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&text)))) { 973 + if (utf8IsCombiningMark(cp) && hasBaseGlyph) { 974 + const EpdGlyph* combiningGlyph = font.getGlyph(cp, style); 975 + if (!combiningGlyph) { 976 + combiningGlyph = font.getGlyph(REPLACEMENT_GLYPH, style); 977 + } 978 + 979 + int raiseBy = 0; 980 + if (combiningGlyph) { 981 + const int currentGap = combiningGlyph->top - combiningGlyph->height - lastBaseTop; 982 + if (currentGap < MIN_COMBINING_GAP_PX) { 983 + raiseBy = MIN_COMBINING_GAP_PX - currentGap; 984 + } 985 + } 986 + 987 + int combiningX = lastBaseX - raiseBy; 988 + int combiningY = lastBaseY - lastBaseAdvance / 2; 989 + renderCharImpl<TextRotation::Rotated90CW>(*this, renderMode, font, cp, &combiningX, &combiningY, black, style); 990 + continue; 991 + } 992 + 993 + const EpdGlyph* glyph = font.getGlyph(cp, style); 994 + if (!glyph) { 995 + glyph = font.getGlyph(REPLACEMENT_GLYPH, style); 996 + } 997 + 998 + if (!utf8IsCombiningMark(cp)) { 999 + lastBaseX = xPos; 1000 + lastBaseY = yPos; 1001 + lastBaseAdvance = glyph ? glyph->advanceX : 0; 1002 + lastBaseTop = glyph ? glyph->top : 0; 1003 + hasBaseGlyph = true; 1004 + } 1005 + 923 1006 renderCharImpl<TextRotation::Rotated90CW>(*this, renderMode, font, cp, &xPos, &yPos, black, style); 924 1007 } 925 1008 }

+8

lib/Utf8/Utf8.h

··· 9 9 size_t utf8RemoveLastChar(std::string& str); 10 10 // Truncate string by removing N UTF-8 codepoints from the end. 11 11 void utf8TruncateChars(std::string& str, size_t numChars); 12 + 13 + // Returns true for Unicode combining diacritical marks that should not advance the cursor. 14 + inline bool utf8IsCombiningMark(const uint32_t cp) { 15 + return (cp >= 0x0300 && cp <= 0x036F) // Combining Diacritical Marks 16 + || (cp >= 0x1DC0 && cp <= 0x1DFF) // Combining Diacritical Marks Supplement 17 + || (cp >= 0x20D0 && cp <= 0x20FF) // Combining Diacritical Marks for Symbols 18 + || (cp >= 0xFE20 && cp <= 0xFE2F); // Combining Half Marks 19 + }

Configure Feed

Configure Feed