A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: Improve large CSS files handling (#779)

## Summary

Closes #766. Thank you for the help @bramschulting!

**What is the goal of this PR?**
- First and foremost, fix issue #766.
- Through working on that, I realized the current CSS parsing/loading
code can be improved dramatically for large files and still had
additional performance improvements to be made, even with EPUBs with
small CSS.

**What changes are included?**
- Stream CSS parsing and reuse normalization buffers to cut allocations
- Add rule limits and selector validation to release rules and free up
memory when needed
- Skip CSS parsing/loading entirely when "Book's Embedded Style" is off

## Additional Context

- My test EPUB has been updated
[here](https://github.com/jdk2pq/css-test-epub) to include a very large
CSS file to test this out

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? _**YES**_, Codex

authored by

Jake Kenneally and committed by
GitHub
46c2109f 5816ab2a

+367 -292
+8 -25
lib/Epub/Epub.cpp
··· 208 208 return true; 209 209 } 210 210 211 - std::string Epub::getCssRulesCache() const { return cachePath + "/css_rules.cache"; } 212 - 213 - bool Epub::loadCssRulesFromCache() const { 214 - FsFile cssCacheFile; 215 - if (Storage.openFileForRead("EBP", getCssRulesCache(), cssCacheFile)) { 216 - if (cssParser->loadFromCache(cssCacheFile)) { 217 - cssCacheFile.close(); 218 - LOG_DBG("EBP", "Loaded CSS rules from cache"); 219 - return true; 220 - } 221 - cssCacheFile.close(); 222 - LOG_DBG("EBP", "CSS cache invalid, reparsing"); 223 - } 224 - return false; 225 - } 226 - 227 211 void Epub::parseCssFiles() const { 228 212 if (cssFiles.empty()) { 229 213 LOG_DBG("EBP", "No CSS files to parse, but CssParser created for inline styles"); 230 214 } 231 215 232 - // Try to load from CSS cache first 233 - if (!loadCssRulesFromCache()) { 234 - // Cache miss - parse CSS files 216 + // See if we have a cached version of the CSS rules 217 + if (!cssParser->hasCache()) { 218 + // No cache yet - parse CSS files 235 219 for (const auto& cssPath : cssFiles) { 236 220 LOG_DBG("EBP", "Parsing CSS file: %s", cssPath.c_str()); 237 221 ··· 262 246 } 263 247 264 248 // Save to cache for next time 265 - FsFile cssCacheFile; 266 - if (Storage.openFileForWrite("EBP", getCssRulesCache(), cssCacheFile)) { 267 - cssParser->saveToCache(cssCacheFile); 268 - cssCacheFile.close(); 249 + if (!cssParser->saveToCache()) { 250 + LOG_ERR("EBP", "Failed to save CSS rules to cache"); 269 251 } 252 + cssParser->clear(); 270 253 271 254 LOG_DBG("EBP", "Loaded %zu CSS style rules from %zu files", cssParser->ruleCount(), cssFiles.size()); 272 255 } ··· 279 262 // Initialize spine/TOC cache 280 263 bookMetadataCache.reset(new BookMetadataCache(cachePath)); 281 264 // Always create CssParser - needed for inline style parsing even without CSS files 282 - cssParser.reset(new CssParser()); 265 + cssParser.reset(new CssParser(cachePath)); 283 266 284 267 // Try to load existing cache first 285 268 if (bookMetadataCache->load()) { 286 - if (!skipLoadingCss && !loadCssRulesFromCache()) { 269 + if (!skipLoadingCss && !cssParser->hasCache()) { 287 270 LOG_DBG("EBP", "Warning: CSS rules cache not found, attempting to parse CSS files"); 288 271 // to get CSS file list 289 272 if (!parseContentOpf(bookMetadataCache->coreMetadata)) {
+1 -3
lib/Epub/Epub.h
··· 35 35 bool parseTocNcxFile() const; 36 36 bool parseTocNavFile() const; 37 37 void parseCssFiles() const; 38 - std::string getCssRulesCache() const; 39 - bool loadCssRulesFromCache() const; 40 38 41 39 public: 42 40 explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { ··· 73 71 74 72 size_t getBookSize() const; 75 73 float calculateProgress(int currentSpineIndex, float currentSpineRead) const; 76 - const CssParser* getCssParser() const { return cssParser.get(); } 74 + CssParser* getCssParser() const { return cssParser.get(); } 77 75 };
+16 -1
lib/Epub/Epub/Section.cpp
··· 181 181 viewportHeight, hyphenationEnabled, embeddedStyle); 182 182 std::vector<uint32_t> lut = {}; 183 183 184 + CssParser* cssParser = nullptr; 185 + if (embeddedStyle) { 186 + cssParser = epub->getCssParser(); 187 + if (cssParser) { 188 + if (!cssParser->loadFromCache()) { 189 + LOG_ERR("SCT", "Failed to load CSS from cache"); 190 + } 191 + } 192 + } 184 193 ChapterHtmlSlimParser visitor( 185 194 tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, 186 195 viewportHeight, hyphenationEnabled, 187 196 [this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, 188 - embeddedStyle, popupFn, embeddedStyle ? epub->getCssParser() : nullptr); 197 + embeddedStyle, popupFn, cssParser); 189 198 Hyphenator::setPreferredLanguage(epub->getLanguage()); 190 199 success = visitor.parseAndBuildPages(); 191 200 ··· 194 203 LOG_ERR("SCT", "Failed to parse XML and build pages"); 195 204 file.close(); 196 205 Storage.remove(filePath.c_str()); 206 + if (cssParser) { 207 + cssParser->clear(); 208 + } 197 209 return false; 198 210 } 199 211 ··· 220 232 serialization::writePod(file, pageCount); 221 233 serialization::writePod(file, lutOffset); 222 234 file.close(); 235 + if (cssParser) { 236 + cssParser->clear(); 237 + } 223 238 return true; 224 239 } 225 240
+320 -253
lib/Epub/Epub/css/CssParser.cpp
··· 1 1 #include "CssParser.h" 2 2 3 + #include <Arduino.h> 3 4 #include <Logging.h> 4 5 5 6 #include <algorithm> 7 + #include <array> 6 8 #include <cctype> 9 + #include <string_view> 7 10 8 11 namespace { 9 12 10 - // Buffer size for reading CSS files 11 - constexpr size_t READ_BUFFER_SIZE = 512; 12 - 13 - // Maximum CSS file size we'll process (prevent memory issues) 14 - constexpr size_t MAX_CSS_SIZE = 64 * 1024; 15 - 16 - // Check if character is CSS whitespace 17 - bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; } 18 - 19 - // Read entire file into string (with size limit) 20 - std::string readFileContent(FsFile& file) { 21 - std::string content; 22 - content.reserve(std::min(static_cast<size_t>(file.size()), MAX_CSS_SIZE)); 23 - 24 - char buffer[READ_BUFFER_SIZE]; 25 - while (file.available() && content.size() < MAX_CSS_SIZE) { 26 - const int bytesRead = file.read(buffer, sizeof(buffer)); 27 - if (bytesRead <= 0) break; 28 - content.append(buffer, bytesRead); 29 - } 30 - return content; 31 - } 32 - 33 - // Remove CSS comments (/* ... */) from content 34 - std::string stripComments(const std::string& css) { 35 - std::string result; 36 - result.reserve(css.size()); 37 - 38 - size_t pos = 0; 39 - while (pos < css.size()) { 40 - // Look for start of comment 41 - if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') { 42 - // Find end of comment 43 - const size_t endPos = css.find("*/", pos + 2); 44 - if (endPos == std::string::npos) { 45 - // Unterminated comment - skip rest of file 46 - break; 47 - } 48 - pos = endPos + 2; 49 - } else { 50 - result.push_back(css[pos]); 51 - ++pos; 52 - } 53 - } 54 - return result; 55 - } 56 - 57 - // Skip @-rules (like @media, @import, @font-face) 58 - // Returns position after the @-rule 59 - size_t skipAtRule(const std::string& css, const size_t start) { 60 - // Find the end - either semicolon (simple @-rule) or matching brace 61 - size_t pos = start + 1; // Skip the '@' 62 - 63 - // Skip identifier 64 - while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) { 65 - ++pos; 66 - } 13 + // Stack-allocated string buffer to avoid heap reallocations during parsing 14 + // Provides string-like interface with fixed capacity 15 + struct StackBuffer { 16 + static constexpr size_t CAPACITY = 1024; 17 + char data[CAPACITY]; 18 + size_t len = 0; 67 19 68 - // Look for { or ; 69 - int braceDepth = 0; 70 - while (pos < css.size()) { 71 - const char c = css[pos]; 72 - if (c == '{') { 73 - ++braceDepth; 74 - } else if (c == '}') { 75 - --braceDepth; 76 - if (braceDepth == 0) { 77 - return pos + 1; 78 - } 79 - } else if (c == ';' && braceDepth == 0) { 80 - return pos + 1; 20 + void push_back(char c) { 21 + if (len < CAPACITY - 1) { 22 + data[len++] = c; 81 23 } 82 - ++pos; 83 24 } 84 - return css.size(); 85 - } 86 25 87 - // Extract next rule from CSS content 88 - // Returns true if a rule was found, with selector and body filled 89 - bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) { 90 - selector.clear(); 91 - body.clear(); 92 - 93 - // Skip whitespace and @-rules until we find a regular rule 94 - while (pos < css.size()) { 95 - // Skip whitespace 96 - while (pos < css.size() && isCssWhitespace(css[pos])) { 97 - ++pos; 98 - } 99 - 100 - if (pos >= css.size()) return false; 26 + void clear() { len = 0; } 27 + bool empty() const { return len == 0; } 28 + size_t size() const { return len; } 101 29 102 - // Handle @-rules iteratively (avoids recursion/stack overflow) 103 - if (css[pos] == '@') { 104 - pos = skipAtRule(css, pos); 105 - continue; // Try again after skipping the @-rule 106 - } 30 + // Get string view of current content (zero-copy) 31 + std::string_view view() const { return std::string_view(data, len); } 107 32 108 - break; // Found start of a regular rule 109 - } 33 + // Convert to string for passing to functions (single allocation) 34 + std::string str() const { return std::string(data, len); } 35 + }; 110 36 111 - if (pos >= css.size()) return false; 37 + // Buffer size for reading CSS files 38 + constexpr size_t READ_BUFFER_SIZE = 512; 112 39 113 - // Find opening brace 114 - const size_t bracePos = css.find('{', pos); 115 - if (bracePos == std::string::npos) return false; 40 + // Maximum number of CSS rules to store in the selector map 41 + // Prevents unbounded memory growth from pathological CSS files 42 + constexpr size_t MAX_RULES = 1500; 116 43 117 - // Extract selector (everything before the brace) 118 - selector = css.substr(pos, bracePos - pos); 44 + // Minimum free heap required to apply CSS during rendering 45 + // If below this threshold, we skip CSS to avoid display artifacts. 46 + constexpr size_t MIN_FREE_HEAP_FOR_CSS = 48 * 1024; 119 47 120 - // Find matching closing brace 121 - int depth = 1; 122 - const size_t bodyStart = bracePos + 1; 123 - size_t bodyEnd = bodyStart; 48 + // Maximum length for a single selector string 49 + // Prevents parsing of extremely long or malformed selectors 50 + constexpr size_t MAX_SELECTOR_LENGTH = 256; 124 51 125 - while (bodyEnd < css.size() && depth > 0) { 126 - if (css[bodyEnd] == '{') 127 - ++depth; 128 - else if (css[bodyEnd] == '}') 129 - --depth; 130 - ++bodyEnd; 131 - } 132 - 133 - // Extract body (between braces) 134 - if (bodyEnd > bodyStart) { 135 - body = css.substr(bodyStart, bodyEnd - bodyStart - 1); 136 - } 137 - 138 - pos = bodyEnd; 139 - return true; 140 - } 52 + // Check if character is CSS whitespace 53 + bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; } 141 54 142 55 } // anonymous namespace 143 56 ··· 165 78 result.pop_back(); 166 79 } 167 80 return result; 81 + } 82 + 83 + void CssParser::normalizedInto(const std::string& s, std::string& out) { 84 + out.clear(); 85 + out.reserve(s.size()); 86 + 87 + bool inSpace = true; // Start true to skip leading space 88 + for (const char c : s) { 89 + if (isCssWhitespace(c)) { 90 + if (!inSpace) { 91 + out.push_back(' '); 92 + inSpace = true; 93 + } 94 + } else { 95 + out.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(c)))); 96 + inSpace = false; 97 + } 98 + } 99 + 100 + if (!out.empty() && out.back() == ' ') { 101 + out.pop_back(); 102 + } 168 103 } 169 104 170 105 std::vector<std::string> CssParser::splitOnChar(const std::string& s, const char delimiter) { ··· 290 225 291 226 return CssLength{numericValue, unit}; 292 227 } 228 + // Declaration parsing 293 229 294 - int8_t CssParser::interpretSpacing(const std::string& val) { 295 - const std::string v = normalized(val); 296 - if (v.empty()) return 0; 230 + void CssParser::parseDeclarationIntoStyle(const std::string& decl, CssStyle& style, std::string& propNameBuf, 231 + std::string& propValueBuf) { 232 + const size_t colonPos = decl.find(':'); 233 + if (colonPos == std::string::npos || colonPos == 0) return; 297 234 298 - // For spacing, we convert to "lines" (discrete units for e-ink) 299 - // 1em ≈ 1 line, percentages based on ~30 lines per page 235 + normalizedInto(decl.substr(0, colonPos), propNameBuf); 236 + normalizedInto(decl.substr(colonPos + 1), propValueBuf); 300 237 301 - float multiplier = 0.0f; 302 - size_t unitStart = v.size(); 238 + if (propNameBuf.empty() || propValueBuf.empty()) return; 303 239 304 - for (size_t i = 0; i < v.size(); ++i) { 305 - const char c = v[i]; 306 - if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { 307 - unitStart = i; 308 - break; 240 + if (propNameBuf == "text-align") { 241 + style.textAlign = interpretAlignment(propValueBuf); 242 + style.defined.textAlign = 1; 243 + } else if (propNameBuf == "font-style") { 244 + style.fontStyle = interpretFontStyle(propValueBuf); 245 + style.defined.fontStyle = 1; 246 + } else if (propNameBuf == "font-weight") { 247 + style.fontWeight = interpretFontWeight(propValueBuf); 248 + style.defined.fontWeight = 1; 249 + } else if (propNameBuf == "text-decoration" || propNameBuf == "text-decoration-line") { 250 + style.textDecoration = interpretDecoration(propValueBuf); 251 + style.defined.textDecoration = 1; 252 + } else if (propNameBuf == "text-indent") { 253 + style.textIndent = interpretLength(propValueBuf); 254 + style.defined.textIndent = 1; 255 + } else if (propNameBuf == "margin-top") { 256 + style.marginTop = interpretLength(propValueBuf); 257 + style.defined.marginTop = 1; 258 + } else if (propNameBuf == "margin-bottom") { 259 + style.marginBottom = interpretLength(propValueBuf); 260 + style.defined.marginBottom = 1; 261 + } else if (propNameBuf == "margin-left") { 262 + style.marginLeft = interpretLength(propValueBuf); 263 + style.defined.marginLeft = 1; 264 + } else if (propNameBuf == "margin-right") { 265 + style.marginRight = interpretLength(propValueBuf); 266 + style.defined.marginRight = 1; 267 + } else if (propNameBuf == "margin") { 268 + const auto values = splitWhitespace(propValueBuf); 269 + if (!values.empty()) { 270 + style.marginTop = interpretLength(values[0]); 271 + style.marginRight = values.size() >= 2 ? interpretLength(values[1]) : style.marginTop; 272 + style.marginBottom = values.size() >= 3 ? interpretLength(values[2]) : style.marginTop; 273 + style.marginLeft = values.size() >= 4 ? interpretLength(values[3]) : style.marginRight; 274 + style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; 275 + } 276 + } else if (propNameBuf == "padding-top") { 277 + style.paddingTop = interpretLength(propValueBuf); 278 + style.defined.paddingTop = 1; 279 + } else if (propNameBuf == "padding-bottom") { 280 + style.paddingBottom = interpretLength(propValueBuf); 281 + style.defined.paddingBottom = 1; 282 + } else if (propNameBuf == "padding-left") { 283 + style.paddingLeft = interpretLength(propValueBuf); 284 + style.defined.paddingLeft = 1; 285 + } else if (propNameBuf == "padding-right") { 286 + style.paddingRight = interpretLength(propValueBuf); 287 + style.defined.paddingRight = 1; 288 + } else if (propNameBuf == "padding") { 289 + const auto values = splitWhitespace(propValueBuf); 290 + if (!values.empty()) { 291 + style.paddingTop = interpretLength(values[0]); 292 + style.paddingRight = values.size() >= 2 ? interpretLength(values[1]) : style.paddingTop; 293 + style.paddingBottom = values.size() >= 3 ? interpretLength(values[2]) : style.paddingTop; 294 + style.paddingLeft = values.size() >= 4 ? interpretLength(values[3]) : style.paddingRight; 295 + style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = style.defined.paddingLeft = 296 + 1; 309 297 } 310 298 } 311 - 312 - const std::string numPart = v.substr(0, unitStart); 313 - const std::string unitPart = v.substr(unitStart); 314 - 315 - if (unitPart == "em" || unitPart == "rem") { 316 - multiplier = 1.0f; // 1em = 1 line 317 - } else if (unitPart == "%") { 318 - multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines 319 - } else { 320 - return 0; // Unsupported unit for spacing 321 - } 322 - 323 - char* endPtr = nullptr; 324 - const float numericValue = std::strtof(numPart.c_str(), &endPtr); 325 - 326 - if (endPtr == numPart.c_str()) return 0; 327 - 328 - int lines = static_cast<int>(numericValue * multiplier); 329 - 330 - // Clamp to reasonable range (0-2 lines) 331 - if (lines < 0) lines = 0; 332 - if (lines > 2) lines = 2; 333 - 334 - return static_cast<int8_t>(lines); 335 299 } 336 300 337 - // Declaration parsing 338 - 339 301 CssStyle CssParser::parseDeclarations(const std::string& declBlock) { 340 302 CssStyle style; 303 + std::string propNameBuf; 304 + std::string propValueBuf; 341 305 342 - // Split declarations by semicolon 343 - const auto declarations = splitOnChar(declBlock, ';'); 344 - 345 - for (const auto& decl : declarations) { 346 - // Find colon separator 347 - const size_t colonPos = decl.find(':'); 348 - if (colonPos == std::string::npos || colonPos == 0) continue; 349 - 350 - std::string propName = normalized(decl.substr(0, colonPos)); 351 - std::string propValue = normalized(decl.substr(colonPos + 1)); 352 - 353 - if (propName.empty() || propValue.empty()) continue; 354 - 355 - // Match property and set value 356 - if (propName == "text-align") { 357 - style.textAlign = interpretAlignment(propValue); 358 - style.defined.textAlign = 1; 359 - } else if (propName == "font-style") { 360 - style.fontStyle = interpretFontStyle(propValue); 361 - style.defined.fontStyle = 1; 362 - } else if (propName == "font-weight") { 363 - style.fontWeight = interpretFontWeight(propValue); 364 - style.defined.fontWeight = 1; 365 - } else if (propName == "text-decoration" || propName == "text-decoration-line") { 366 - style.textDecoration = interpretDecoration(propValue); 367 - style.defined.textDecoration = 1; 368 - } else if (propName == "text-indent") { 369 - style.textIndent = interpretLength(propValue); 370 - style.defined.textIndent = 1; 371 - } else if (propName == "margin-top") { 372 - style.marginTop = interpretLength(propValue); 373 - style.defined.marginTop = 1; 374 - } else if (propName == "margin-bottom") { 375 - style.marginBottom = interpretLength(propValue); 376 - style.defined.marginBottom = 1; 377 - } else if (propName == "margin-left") { 378 - style.marginLeft = interpretLength(propValue); 379 - style.defined.marginLeft = 1; 380 - } else if (propName == "margin-right") { 381 - style.marginRight = interpretLength(propValue); 382 - style.defined.marginRight = 1; 383 - } else if (propName == "margin") { 384 - // Shorthand: 1-4 values for top, right, bottom, left 385 - const auto values = splitWhitespace(propValue); 386 - if (!values.empty()) { 387 - style.marginTop = interpretLength(values[0]); 388 - style.marginRight = values.size() >= 2 ? interpretLength(values[1]) : style.marginTop; 389 - style.marginBottom = values.size() >= 3 ? interpretLength(values[2]) : style.marginTop; 390 - style.marginLeft = values.size() >= 4 ? interpretLength(values[3]) : style.marginRight; 391 - style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; 306 + size_t start = 0; 307 + for (size_t i = 0; i <= declBlock.size(); ++i) { 308 + if (i == declBlock.size() || declBlock[i] == ';') { 309 + if (i > start) { 310 + const size_t len = i - start; 311 + std::string decl = declBlock.substr(start, len); 312 + if (!decl.empty()) { 313 + parseDeclarationIntoStyle(decl, style, propNameBuf, propValueBuf); 314 + } 392 315 } 393 - } else if (propName == "padding-top") { 394 - style.paddingTop = interpretLength(propValue); 395 - style.defined.paddingTop = 1; 396 - } else if (propName == "padding-bottom") { 397 - style.paddingBottom = interpretLength(propValue); 398 - style.defined.paddingBottom = 1; 399 - } else if (propName == "padding-left") { 400 - style.paddingLeft = interpretLength(propValue); 401 - style.defined.paddingLeft = 1; 402 - } else if (propName == "padding-right") { 403 - style.paddingRight = interpretLength(propValue); 404 - style.defined.paddingRight = 1; 405 - } else if (propName == "padding") { 406 - // Shorthand: 1-4 values for top, right, bottom, left 407 - const auto values = splitWhitespace(propValue); 408 - if (!values.empty()) { 409 - style.paddingTop = interpretLength(values[0]); 410 - style.paddingRight = values.size() >= 2 ? interpretLength(values[1]) : style.paddingTop; 411 - style.paddingBottom = values.size() >= 3 ? interpretLength(values[2]) : style.paddingTop; 412 - style.paddingLeft = values.size() >= 4 ? interpretLength(values[3]) : style.paddingRight; 413 - style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = 414 - style.defined.paddingLeft = 1; 415 - } 316 + start = i + 1; 416 317 } 417 318 } 418 319 ··· 421 322 422 323 // Rule processing 423 324 424 - void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) { 425 - const CssStyle style = parseDeclarations(declarations); 426 - 427 - // Only store if any properties were set 428 - if (!style.defined.anySet()) return; 325 + void CssParser::processRuleBlockWithStyle(const std::string& selectorGroup, const CssStyle& style) { 326 + // Check if we've reached the rule limit before processing 327 + if (rulesBySelector_.size() >= MAX_RULES) { 328 + LOG_DBG("CSS", "Reached max rules limit (%zu), stopping CSS parsing", MAX_RULES); 329 + return; 330 + } 429 331 430 332 // Handle comma-separated selectors 431 333 const auto selectors = splitOnChar(selectorGroup, ','); 432 334 433 335 for (const auto& sel : selectors) { 336 + // Validate selector length before processing 337 + if (sel.size() > MAX_SELECTOR_LENGTH) { 338 + LOG_DBG("CSS", "Selector too long (%zu > %zu), skipping", sel.size(), MAX_SELECTOR_LENGTH); 339 + continue; 340 + } 341 + 434 342 // Normalize the selector 435 343 std::string key = normalized(sel); 436 344 if (key.empty()) continue; 345 + 346 + // Skip if this would exceed the rule limit 347 + if (rulesBySelector_.size() >= MAX_RULES) { 348 + LOG_DBG("CSS", "Reached max rules limit, stopping selector processing"); 349 + return; 350 + } 437 351 438 352 // Store or merge with existing 439 353 auto it = rulesBySelector_.find(key); ··· 453 367 return false; 454 368 } 455 369 456 - // Read file content 457 - const std::string content = readFileContent(source); 458 - if (content.empty()) { 459 - return true; // Empty file is valid 460 - } 370 + size_t totalRead = 0; 371 + 372 + // Use stack-allocated buffers for parsing to avoid heap reallocations 373 + StackBuffer selector; 374 + StackBuffer declBuffer; 375 + // Keep these as std::string since they're passed by reference to parseDeclarationIntoStyle 376 + std::string propNameBuf; 377 + std::string propValueBuf; 378 + 379 + bool inComment = false; 380 + bool maybeSlash = false; 381 + bool prevStar = false; 461 382 462 - // Remove comments 463 - const std::string cleaned = stripComments(content); 383 + bool inAtRule = false; 384 + int atDepth = 0; 385 + 386 + int bodyDepth = 0; 387 + bool skippingRule = false; 388 + CssStyle currentStyle; 464 389 465 - // Parse rules 466 - size_t pos = 0; 467 - std::string selector, body; 390 + auto handleChar = [&](const char c) { 391 + if (inAtRule) { 392 + if (c == '{') { 393 + ++atDepth; 394 + } else if (c == '}') { 395 + if (atDepth > 0) --atDepth; 396 + if (atDepth == 0) inAtRule = false; 397 + } else if (c == ';' && atDepth == 0) { 398 + inAtRule = false; 399 + } 400 + return; 401 + } 468 402 469 - while (extractNextRule(cleaned, pos, selector, body)) { 470 - processRuleBlock(selector, body); 403 + if (bodyDepth == 0) { 404 + if (selector.empty() && isCssWhitespace(c)) { 405 + return; 406 + } 407 + if (c == '@' && selector.empty()) { 408 + inAtRule = true; 409 + atDepth = 0; 410 + return; 411 + } 412 + if (c == '{') { 413 + bodyDepth = 1; 414 + currentStyle = CssStyle{}; 415 + declBuffer.clear(); 416 + if (selector.size() > MAX_SELECTOR_LENGTH * 4) { 417 + skippingRule = true; 418 + } 419 + return; 420 + } 421 + selector.push_back(c); 422 + return; 423 + } 424 + 425 + // bodyDepth > 0 426 + if (c == '{') { 427 + ++bodyDepth; 428 + return; 429 + } 430 + if (c == '}') { 431 + --bodyDepth; 432 + if (bodyDepth == 0) { 433 + if (!skippingRule && !declBuffer.empty()) { 434 + parseDeclarationIntoStyle(declBuffer.str(), currentStyle, propNameBuf, propValueBuf); 435 + } 436 + if (!skippingRule) { 437 + processRuleBlockWithStyle(selector.str(), currentStyle); 438 + } 439 + selector.clear(); 440 + declBuffer.clear(); 441 + skippingRule = false; 442 + return; 443 + } 444 + return; 445 + } 446 + if (bodyDepth > 1) { 447 + return; 448 + } 449 + if (!skippingRule) { 450 + if (c == ';') { 451 + if (!declBuffer.empty()) { 452 + parseDeclarationIntoStyle(declBuffer.str(), currentStyle, propNameBuf, propValueBuf); 453 + declBuffer.clear(); 454 + } 455 + } else { 456 + declBuffer.push_back(c); 457 + } 458 + } 459 + }; 460 + 461 + char buffer[READ_BUFFER_SIZE]; 462 + while (source.available()) { 463 + int bytesRead = source.read(buffer, sizeof(buffer)); 464 + if (bytesRead <= 0) break; 465 + 466 + totalRead += static_cast<size_t>(bytesRead); 467 + 468 + for (int i = 0; i < bytesRead; ++i) { 469 + const char c = buffer[i]; 470 + 471 + if (inComment) { 472 + if (prevStar && c == '/') { 473 + inComment = false; 474 + prevStar = false; 475 + continue; 476 + } 477 + prevStar = c == '*'; 478 + continue; 479 + } 480 + 481 + if (maybeSlash) { 482 + if (c == '*') { 483 + inComment = true; 484 + maybeSlash = false; 485 + prevStar = false; 486 + continue; 487 + } 488 + handleChar('/'); 489 + maybeSlash = false; 490 + // fall through to process current char 491 + } 492 + 493 + if (c == '/') { 494 + maybeSlash = true; 495 + continue; 496 + } 497 + 498 + handleChar(c); 499 + } 471 500 } 472 501 473 - LOG_DBG("CSS", "Parsed %zu rules", rulesBySelector_.size()); 502 + if (maybeSlash) { 503 + handleChar('/'); 504 + } 505 + 506 + LOG_DBG("CSS", "Parsed %zu rules from %zu bytes", rulesBySelector_.size(), totalRead); 474 507 return true; 475 508 } 476 509 477 510 // Style resolution 478 511 479 512 CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const { 513 + static bool lowHeapWarningLogged = false; 514 + if (ESP.getFreeHeap() < MIN_FREE_HEAP_FOR_CSS) { 515 + if (!lowHeapWarningLogged) { 516 + lowHeapWarningLogged = true; 517 + LOG_DBG("CSS", "Warning: low heap (%u bytes) below MIN_FREE_HEAP_FOR_CSS (%u), returning empty style", 518 + ESP.getFreeHeap(), static_cast<unsigned>(MIN_FREE_HEAP_FOR_CSS)); 519 + } 520 + return CssStyle{}; 521 + } 480 522 CssStyle result; 481 523 const std::string tag = normalized(tagName); 482 524 ··· 521 563 522 564 // Cache format version - increment when format changes 523 565 constexpr uint8_t CSS_CACHE_VERSION = 2; 566 + constexpr char rulesCache[] = "/css_rules.cache"; 524 567 525 - bool CssParser::saveToCache(FsFile& file) const { 526 - if (!file) { 568 + bool CssParser::hasCache() const { return Storage.exists((cachePath + rulesCache).c_str()); } 569 + 570 + bool CssParser::saveToCache() const { 571 + if (cachePath.empty()) { 572 + return false; 573 + } 574 + 575 + FsFile file; 576 + if (!Storage.openFileForWrite("CSS", cachePath + rulesCache, file)) { 527 577 return false; 528 578 } 529 579 ··· 583 633 } 584 634 585 635 LOG_DBG("CSS", "Saved %u rules to cache", ruleCount); 636 + file.close(); 586 637 return true; 587 638 } 588 639 589 - bool CssParser::loadFromCache(FsFile& file) { 590 - if (!file) { 640 + bool CssParser::loadFromCache() { 641 + if (cachePath.empty()) { 642 + return false; 643 + } 644 + 645 + FsFile file; 646 + if (!Storage.openFileForRead("CSS", cachePath + rulesCache, file)) { 591 647 return false; 592 648 } 593 649 ··· 598 654 uint8_t version = 0; 599 655 if (file.read(&version, 1) != 1 || version != CSS_CACHE_VERSION) { 600 656 LOG_DBG("CSS", "Cache version mismatch (got %u, expected %u)", version, CSS_CACHE_VERSION); 657 + file.close(); 601 658 return false; 602 659 } 603 660 604 661 // Read rule count 605 662 uint16_t ruleCount = 0; 606 663 if (file.read(&ruleCount, sizeof(ruleCount)) != sizeof(ruleCount)) { 664 + file.close(); 607 665 return false; 608 666 } 609 667 ··· 613 671 uint16_t selectorLen = 0; 614 672 if (file.read(&selectorLen, sizeof(selectorLen)) != sizeof(selectorLen)) { 615 673 rulesBySelector_.clear(); 674 + file.close(); 616 675 return false; 617 676 } 618 677 ··· 620 679 selector.resize(selectorLen); 621 680 if (file.read(&selector[0], selectorLen) != selectorLen) { 622 681 rulesBySelector_.clear(); 682 + file.close(); 623 683 return false; 624 684 } 625 685 ··· 629 689 630 690 if (file.read(&enumVal, 1) != 1) { 631 691 rulesBySelector_.clear(); 692 + file.close(); 632 693 return false; 633 694 } 634 695 style.textAlign = static_cast<CssTextAlign>(enumVal); 635 696 636 697 if (file.read(&enumVal, 1) != 1) { 637 698 rulesBySelector_.clear(); 699 + file.close(); 638 700 return false; 639 701 } 640 702 style.fontStyle = static_cast<CssFontStyle>(enumVal); 641 703 642 704 if (file.read(&enumVal, 1) != 1) { 643 705 rulesBySelector_.clear(); 706 + file.close(); 644 707 return false; 645 708 } 646 709 style.fontWeight = static_cast<CssFontWeight>(enumVal); 647 710 648 711 if (file.read(&enumVal, 1) != 1) { 649 712 rulesBySelector_.clear(); 713 + file.close(); 650 714 return false; 651 715 } 652 716 style.textDecoration = static_cast<CssTextDecoration>(enumVal); ··· 668 732 !readLength(style.marginLeft) || !readLength(style.marginRight) || !readLength(style.paddingTop) || 669 733 !readLength(style.paddingBottom) || !readLength(style.paddingLeft) || !readLength(style.paddingRight)) { 670 734 rulesBySelector_.clear(); 735 + file.close(); 671 736 return false; 672 737 } 673 738 ··· 675 740 uint16_t definedBits = 0; 676 741 if (file.read(&definedBits, sizeof(definedBits)) != sizeof(definedBits)) { 677 742 rulesBySelector_.clear(); 743 + file.close(); 678 744 return false; 679 745 } 680 746 style.defined.textAlign = (definedBits & 1 << 0) != 0; ··· 695 761 } 696 762 697 763 LOG_DBG("CSS", "Loaded %u rules from cache", ruleCount); 764 + file.close(); 698 765 return true; 699 766 }
+15 -7
lib/Epub/Epub/css/CssParser.h
··· 4 4 5 5 #include <string> 6 6 #include <unordered_map> 7 + #include <utility> 7 8 #include <vector> 8 9 9 10 #include "CssStyle.h" ··· 29 30 */ 30 31 class CssParser { 31 32 public: 32 - CssParser() = default; 33 + explicit CssParser(std::string cachePath) : cachePath(std::move(cachePath)) {} 33 34 ~CssParser() = default; 34 35 35 36 // Non-copyable ··· 77 78 void clear() { rulesBySelector_.clear(); } 78 79 79 80 /** 81 + * Check if CSS rules cache file exists 82 + */ 83 + bool hasCache() const; 84 + 85 + /** 80 86 * Save parsed CSS rules to a cache file. 81 - * @param file Open file handle to write to 82 87 * @return true if cache was written successfully 83 88 */ 84 - bool saveToCache(FsFile& file) const; 89 + bool saveToCache() const; 85 90 86 91 /** 87 92 * Load CSS rules from a cache file. 88 93 * Clears any existing rules before loading. 89 - * @param file Open file handle to read from 90 94 * @return true if cache was loaded successfully 91 95 */ 92 - bool loadFromCache(FsFile& file); 96 + bool loadFromCache(); 93 97 94 98 private: 95 99 // Storage: maps normalized selector -> style properties 96 100 std::unordered_map<std::string, CssStyle> rulesBySelector_; 97 101 102 + std::string cachePath; 103 + 98 104 // Internal parsing helpers 99 - void processRuleBlock(const std::string& selectorGroup, const std::string& declarations); 105 + void processRuleBlockWithStyle(const std::string& selectorGroup, const CssStyle& style); 100 106 static CssStyle parseDeclarations(const std::string& declBlock); 107 + static void parseDeclarationIntoStyle(const std::string& decl, CssStyle& style, std::string& propNameBuf, 108 + std::string& propValueBuf); 101 109 102 110 // Individual property value parsers 103 111 static CssTextAlign interpretAlignment(const std::string& val); ··· 105 113 static CssFontWeight interpretFontWeight(const std::string& val); 106 114 static CssTextDecoration interpretDecoration(const std::string& val); 107 115 static CssLength interpretLength(const std::string& val); 108 - static int8_t interpretSpacing(const std::string& val); 109 116 110 117 // String utilities 111 118 static std::string normalized(const std::string& s); 119 + static void normalizedInto(const std::string& s, std::string& out); 112 120 static std::vector<std::string> splitOnChar(const std::string& s, char delimiter); 113 121 static std::vector<std::string> splitWhitespace(const std::string& s); 114 122 };
+4 -1
platformio.ini
··· 27 27 # https://libexpat.github.io/doc/api/latest/#XML_GE 28 28 -DXML_GE=0 29 29 -DXML_CONTEXT_BYTES=1024 30 - -std=c++2a 30 + -std=gnu++2a 31 31 # Enable UTF-8 long file names in SdFat 32 32 -DUSE_UTF8_LONG_NAMES=1 33 + 34 + build_unflags = 35 + -std=gnu++11 33 36 34 37 ; Board configuration 35 38 board_build.flash_mode = dio
+1 -1
src/RecentBooksStore.cpp
··· 88 88 // If epub, try to load the metadata for title/author and cover 89 89 if (StringUtils::checkFileExtension(lastBookFileName, ".epub")) { 90 90 Epub epub(path, "/.crosspoint"); 91 - epub.load(false); 91 + epub.load(false, true); 92 92 return RecentBook{path, epub.getTitle(), epub.getAuthor(), epub.getThumbBmpPath()}; 93 93 } else if (StringUtils::checkFileExtension(lastBookFileName, ".xtch") || 94 94 StringUtils::checkFileExtension(lastBookFileName, ".xtc")) {
+2 -1
src/activities/reader/ReaderActivity.cpp
··· 2 2 3 3 #include <HalStorage.h> 4 4 5 + #include "CrossPointSettings.h" 5 6 #include "Epub.h" 6 7 #include "EpubReaderActivity.h" 7 8 #include "Txt.h" ··· 35 36 } 36 37 37 38 auto epub = std::unique_ptr<Epub>(new Epub(path, "/.crosspoint")); 38 - if (epub->load()) { 39 + if (epub->load(true, SETTINGS.embeddedStyle == 0)) { 39 40 return epub; 40 41 } 41 42