A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: Eliminate per-pixel overheads in image rendering (#1293)

## Summary

Replace per-pixel getRenderMode() + rotateCoordinates() + bounds checks
with a DirectPixelWriter struct that pre-computes orientation and render
mode state once per row. Use bitwise ops instead of division/modulo for
cache pixel packing. Skip PNG cache allocation when buffer exceeds 48KB
(framebuffer size) since PNG decode is fast enough that caching provides
minimal benefit, and the large buffer competes with the 44KB PNG decoder
for heap.

## Additional Context
Measured improvements on ESP32-C3 @ 160MHz:
- JPEG decode: 5-7% faster (1:1 scale)
- PNG decode: 15-20% faster (1:1 scale)
- Cache renders: 3-6% faster across both formats
- Eliminates "Failed to allocate cache buffer" errors for large PNGs

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? _**< PARTIALLY >**_

authored by

martin brook and committed by
GitHub
1df543d4 63961625

+215 -32
+9 -5
lib/Epub/Epub/blocks/ImageBlock.cpp
··· 4 4 #include <Logging.h> 5 5 #include <Serialization.h> 6 6 7 - #include "../converters/DitherUtils.h" 7 + #include "../converters/DirectPixelWriter.h" 8 8 #include "../converters/ImageDecoderFactory.h" 9 9 10 10 // Cache file format: ··· 66 66 return false; 67 67 } 68 68 69 + DirectPixelWriter pw; 70 + pw.init(renderer); 71 + 69 72 for (int row = 0; row < cachedHeight; row++) { 70 73 if (cacheFile.read(rowBuffer, bytesPerRow) != bytesPerRow) { 71 74 LOG_ERR("IMG", "Cache read error at row %d", row); ··· 74 77 return false; 75 78 } 76 79 77 - int destY = y + row; 80 + const int destY = y + row; 81 + pw.beginRow(destY); 78 82 for (int col = 0; col < cachedWidth; col++) { 79 - int byteIdx = col / 4; 80 - int bitShift = 6 - (col % 4) * 2; // MSB first within byte 83 + const int byteIdx = col >> 2; // col / 4 84 + const int bitShift = 6 - (col & 3) * 2; // MSB first within byte 81 85 uint8_t pixelValue = (rowBuffer[byteIdx] >> bitShift) & 0x03; 82 86 83 - drawPixelWithRenderMode(renderer, x + col, destY, pixelValue); 87 + pw.writePixel(x + col, pixelValue); 84 88 } 85 89 } 86 90
+156
lib/Epub/Epub/converters/DirectPixelWriter.h
··· 1 + #pragma once 2 + 3 + #include <GfxRenderer.h> 4 + #include <HalDisplay.h> 5 + #include <stdint.h> 6 + 7 + // Direct framebuffer writer that eliminates per-pixel overhead from the image 8 + // rendering hot path. Pre-computes orientation transform as linear coefficients 9 + // and caches render-mode state so the inner loop is: one multiply, one add, 10 + // one shift, and one AND per pixel — no branches, no method calls. 11 + // 12 + // Caller is responsible for ensuring (outX, outY) are within screen bounds. 13 + // ImageBlock::render() already validates this before entering the pixel loop, 14 + // and the JPEG/PNG callbacks pre-clamp destination ranges to screen bounds. 15 + struct DirectPixelWriter { 16 + uint8_t* fb; 17 + GfxRenderer::RenderMode mode; 18 + 19 + // Orientation is collapsed into a linear transform: 20 + // phyX = phyXBase + x * phyXStepX + y * phyXStepY 21 + // phyY = phyYBase + x * phyYStepX + y * phyYStepY 22 + int phyXBase, phyYBase; 23 + int phyXStepX, phyYStepX; // per logical-X step 24 + int phyXStepY, phyYStepY; // per logical-Y step 25 + 26 + // Row-precomputed: the Y-dependent portion of the physical coords 27 + int rowPhyXBase, rowPhyYBase; 28 + 29 + void init(GfxRenderer& renderer) { 30 + fb = renderer.getFrameBuffer(); 31 + mode = renderer.getRenderMode(); 32 + 33 + switch (renderer.getOrientation()) { 34 + case GfxRenderer::Portrait: 35 + // phyX = y, phyY = (DISPLAY_HEIGHT-1) - x 36 + phyXBase = 0; 37 + phyYBase = HalDisplay::DISPLAY_HEIGHT - 1; 38 + phyXStepX = 0; 39 + phyYStepX = -1; 40 + phyXStepY = 1; 41 + phyYStepY = 0; 42 + break; 43 + case GfxRenderer::LandscapeClockwise: 44 + // phyX = (DISPLAY_WIDTH-1) - x, phyY = (DISPLAY_HEIGHT-1) - y 45 + phyXBase = HalDisplay::DISPLAY_WIDTH - 1; 46 + phyYBase = HalDisplay::DISPLAY_HEIGHT - 1; 47 + phyXStepX = -1; 48 + phyYStepX = 0; 49 + phyXStepY = 0; 50 + phyYStepY = -1; 51 + break; 52 + case GfxRenderer::PortraitInverted: 53 + // phyX = (DISPLAY_WIDTH-1) - y, phyY = x 54 + phyXBase = HalDisplay::DISPLAY_WIDTH - 1; 55 + phyYBase = 0; 56 + phyXStepX = 0; 57 + phyYStepX = 1; 58 + phyXStepY = -1; 59 + phyYStepY = 0; 60 + break; 61 + case GfxRenderer::LandscapeCounterClockwise: 62 + // phyX = x, phyY = y 63 + phyXBase = 0; 64 + phyYBase = 0; 65 + phyXStepX = 1; 66 + phyYStepX = 0; 67 + phyXStepY = 0; 68 + phyYStepY = 1; 69 + break; 70 + default: 71 + // Fallback to LandscapeCounterClockwise (identity transform) 72 + phyXBase = 0; 73 + phyYBase = 0; 74 + phyXStepX = 1; 75 + phyYStepX = 0; 76 + phyXStepY = 0; 77 + phyYStepY = 1; 78 + break; 79 + } 80 + } 81 + 82 + // Call once per row before the column loop. 83 + // Pre-computes the Y-dependent portion so writePixel() only needs the X part. 84 + inline void beginRow(int logicalY) { 85 + rowPhyXBase = phyXBase + logicalY * phyXStepY; 86 + rowPhyYBase = phyYBase + logicalY * phyYStepY; 87 + } 88 + 89 + // Write a single 2-bit dithered pixel value to the framebuffer. 90 + // Must be called after beginRow() for the current row. 91 + // No bounds checking — caller guarantees coordinates are valid. 92 + inline void writePixel(int logicalX, uint8_t pixelValue) const { 93 + // Determine whether to draw based on render mode 94 + bool draw; 95 + bool state; 96 + switch (mode) { 97 + case GfxRenderer::BW: 98 + draw = (pixelValue < 3); 99 + state = true; 100 + break; 101 + case GfxRenderer::GRAYSCALE_MSB: 102 + draw = (pixelValue == 1 || pixelValue == 2); 103 + state = false; 104 + break; 105 + case GfxRenderer::GRAYSCALE_LSB: 106 + draw = (pixelValue == 1); 107 + state = false; 108 + break; 109 + default: 110 + return; 111 + } 112 + 113 + if (!draw) return; 114 + 115 + const int phyX = rowPhyXBase + logicalX * phyXStepX; 116 + const int phyY = rowPhyYBase + logicalX * phyYStepX; 117 + 118 + const uint16_t byteIndex = phyY * HalDisplay::DISPLAY_WIDTH_BYTES + (phyX >> 3); 119 + const uint8_t bitMask = 1 << (7 - (phyX & 7)); 120 + 121 + if (state) { 122 + fb[byteIndex] &= ~bitMask; // Clear bit (draw black) 123 + } else { 124 + fb[byteIndex] |= bitMask; // Set bit (draw white) 125 + } 126 + } 127 + }; 128 + 129 + // Direct cache writer that eliminates per-pixel overhead from PixelCache::setPixel(). 130 + // Pre-computes row pointer so the inner loop is just byte index + bit manipulation. 131 + // 132 + // Caller guarantees coordinates are within cache bounds. 133 + struct DirectCacheWriter { 134 + uint8_t* buffer; 135 + int bytesPerRow; 136 + int originX; 137 + uint8_t* rowPtr; // Pre-computed for current row 138 + 139 + void init(uint8_t* cacheBuffer, int cacheBytesPerRow, int cacheOriginX) { 140 + buffer = cacheBuffer; 141 + bytesPerRow = cacheBytesPerRow; 142 + originX = cacheOriginX; 143 + rowPtr = nullptr; 144 + } 145 + 146 + // Call once per row before the column loop. 147 + inline void beginRow(int screenY, int cacheOriginY) { rowPtr = buffer + (screenY - cacheOriginY) * bytesPerRow; } 148 + 149 + // Write a 2-bit pixel value. No bounds checking. 150 + inline void writePixel(int screenX, uint8_t value) const { 151 + const int localX = screenX - originX; 152 + const int byteIdx = localX >> 2; // localX / 4 153 + const int bitShift = 6 - (localX & 3) * 2; // MSB first: pixel 0 at bits 6-7 154 + rowPtr[byteIdx] = (rowPtr[byteIdx] & ~(0x03 << bitShift)) | ((value & 0x03) << bitShift); 155 + } 156 + };
-13
lib/Epub/Epub/converters/DitherUtils.h
··· 1 1 #pragma once 2 2 3 - #include <GfxRenderer.h> 4 3 #include <stdint.h> 5 4 6 5 // 4x4 Bayer matrix for ordered dithering ··· 26 25 if (adjusted < 192) return 2; 27 26 return 3; 28 27 } 29 - 30 - // Draw a pixel respecting the current render mode for grayscale support 31 - inline void drawPixelWithRenderMode(GfxRenderer& renderer, int x, int y, uint8_t pixelValue) { 32 - GfxRenderer::RenderMode renderMode = renderer.getRenderMode(); 33 - if (renderMode == GfxRenderer::BW && pixelValue < 3) { 34 - renderer.drawPixel(x, y, true); 35 - } else if (renderMode == GfxRenderer::GRAYSCALE_MSB && (pixelValue == 1 || pixelValue == 2)) { 36 - renderer.drawPixel(x, y, false); 37 - } else if (renderMode == GfxRenderer::GRAYSCALE_LSB && pixelValue == 1) { 38 - renderer.drawPixel(x, y, false); 39 - } 40 - }
+26 -10
lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp
··· 9 9 #include <cstdlib> 10 10 #include <new> 11 11 12 + #include "DirectPixelWriter.h" 12 13 #include "DitherUtils.h" 13 14 #include "PixelCache.h" 14 15 ··· 167 168 168 169 if (dstYStart >= dstYEnd || dstXStart >= dstXEnd) return 1; 169 170 171 + // Pre-compute orientation and render-mode state once per callback invocation 172 + DirectPixelWriter pw; 173 + pw.init(renderer); 174 + 175 + DirectCacheWriter cw; 176 + if (caching) { 177 + cw.init(ctx->cache.buffer, ctx->cache.bytesPerRow, ctx->cache.originX); 178 + } 179 + 170 180 // === 1:1 fast path: no scaling math === 171 181 if (fineScaleFP == FP_ONE) { 172 182 for (int dstY = dstYStart; dstY < dstYEnd; dstY++) { 173 183 const int outY = cfgY + dstY; 184 + pw.beginRow(outY); 185 + if (caching) cw.beginRow(outY, ctx->config->y); 174 186 const uint8_t* row = &pixels[(dstY - blockY) * stride]; 175 187 for (int dstX = dstXStart; dstX < dstXEnd; dstX++) { 176 188 const int outX = cfgX + dstX; ··· 182 194 dithered = gray / 85; 183 195 if (dithered > 3) dithered = 3; 184 196 } 185 - drawPixelWithRenderMode(renderer, outX, outY, dithered); 186 - if (caching) ctx->cache.setPixel(outX, outY, dithered); 197 + pw.writePixel(outX, dithered); 198 + if (caching) cw.writePixel(outX, dithered); 187 199 } 188 200 } 189 201 return 1; ··· 203 215 204 216 for (int dstY = dstYStart; dstY < dstYEnd; dstY++) { 205 217 const int outY = cfgY + dstY; 218 + pw.beginRow(outY); 219 + if (caching) cw.beginRow(outY, ctx->config->y); 206 220 const int32_t srcFyFP = dstY * invScaleFP; 207 221 const int32_t fy = srcFyFP & FP_MASK; 208 222 const int32_t fyInv = FP_ONE - fy; ··· 239 253 dithered = gray / 85; 240 254 if (dithered > 3) dithered = 3; 241 255 } 242 - drawPixelWithRenderMode(renderer, outX, outY, dithered); 243 - if (caching) ctx->cache.setPixel(outX, outY, dithered); 256 + pw.writePixel(outX, dithered); 257 + if (caching) cw.writePixel(outX, dithered); 244 258 } 245 259 246 260 // Interior (no X boundary checks — lx0 and lx0+1 guaranteed in bounds) ··· 262 276 dithered = gray / 85; 263 277 if (dithered > 3) dithered = 3; 264 278 } 265 - drawPixelWithRenderMode(renderer, outX, outY, dithered); 266 - if (caching) ctx->cache.setPixel(outX, outY, dithered); 279 + pw.writePixel(outX, dithered); 280 + if (caching) cw.writePixel(outX, dithered); 267 281 } 268 282 269 283 // Right edge (with X boundary clamping) ··· 288 302 dithered = gray / 85; 289 303 if (dithered > 3) dithered = 3; 290 304 } 291 - drawPixelWithRenderMode(renderer, outX, outY, dithered); 292 - if (caching) ctx->cache.setPixel(outX, outY, dithered); 305 + pw.writePixel(outX, dithered); 306 + if (caching) cw.writePixel(outX, dithered); 293 307 } 294 308 } 295 309 return 1; ··· 298 312 // === Nearest-neighbor (downscale: fineScale < 1.0) === 299 313 for (int dstY = dstYStart; dstY < dstYEnd; dstY++) { 300 314 const int outY = cfgY + dstY; 315 + pw.beginRow(outY); 316 + if (caching) cw.beginRow(outY, ctx->config->y); 301 317 const int32_t srcFyFP = dstY * invScaleFP; 302 318 int ly = (srcFyFP >> FP_SHIFT) - blockY; 303 319 if (ly < 0) ly = 0; ··· 319 335 dithered = gray / 85; 320 336 if (dithered > 3) dithered = 3; 321 337 } 322 - drawPixelWithRenderMode(renderer, outX, outY, dithered); 323 - if (caching) ctx->cache.setPixel(outX, outY, dithered); 338 + pw.writePixel(outX, dithered); 339 + if (caching) cw.writePixel(outX, dithered); 324 340 } 325 341 } 326 342
+24 -4
lib/Epub/Epub/converters/PngToFramebufferConverter.cpp
··· 9 9 #include <cstdlib> 10 10 #include <new> 11 11 12 + #include "DirectPixelWriter.h" 12 13 #include "DitherUtils.h" 13 14 #include "PixelCache.h" 14 15 ··· 207 208 bool useDithering = ctx->config->useDithering; 208 209 bool caching = ctx->caching; 209 210 211 + // Pre-compute orientation and render-mode state once per row 212 + DirectPixelWriter pw; 213 + pw.init(*ctx->renderer); 214 + pw.beginRow(outY); 215 + 216 + DirectCacheWriter cw; 217 + if (caching) { 218 + cw.init(ctx->cache.buffer, ctx->cache.bytesPerRow, ctx->cache.originX); 219 + cw.beginRow(outY, ctx->config->y); 220 + } 221 + 210 222 int srcX = 0; 211 223 int error = 0; 212 224 ··· 222 234 ditheredGray = gray / 85; 223 235 if (ditheredGray > 3) ditheredGray = 3; 224 236 } 225 - drawPixelWithRenderMode(*ctx->renderer, outX, outY, ditheredGray); 226 - if (caching) ctx->cache.setPixel(outX, outY, ditheredGray); 237 + pw.writePixel(outX, ditheredGray); 238 + if (caching) cw.writePixel(outX, ditheredGray); 227 239 } 228 240 229 241 // Bresenham-style stepping: advance srcX based on ratio srcWidth/dstWidth ··· 356 368 return false; 357 369 } 358 370 359 - // Allocate cache buffer using SCALED dimensions 371 + // Allocate cache buffer using SCALED dimensions. 372 + // PNG decode is fast enough (~135ms for 400x600) that caching provides minimal benefit 373 + // for larger images, while the cache buffer competes with the 44KB PNG decoder for heap. 374 + // Skip caching when the buffer would exceed the framebuffer size (48KB). 375 + static constexpr size_t PNG_MAX_CACHE_BYTES = 48000; 360 376 ctx.caching = !config.cachePath.empty(); 361 377 if (ctx.caching) { 362 - if (!ctx.cache.allocate(ctx.dstWidth, ctx.dstHeight, config.x, config.y)) { 378 + size_t cacheSize = (size_t)((ctx.dstWidth + 3) / 4) * ctx.dstHeight; 379 + if (cacheSize > PNG_MAX_CACHE_BYTES) { 380 + LOG_DBG("PNG", "Skipping cache: %zu bytes exceeds PNG limit (%zu)", cacheSize, PNG_MAX_CACHE_BYTES); 381 + ctx.caching = false; 382 + } else if (!ctx.cache.allocate(ctx.dstWidth, ctx.dstHeight, config.x, config.y)) { 363 383 LOG_ERR("PNG", "Failed to allocate cache buffer, continuing without caching"); 364 384 ctx.caching = false; 365 385 }