A fork of https://github.com/crosspoint-reader/crosspoint-reader
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf: optimize drawPixel() (#748)

## Summary

Ref https://github.com/crosspoint-reader/crosspoint-reader/pull/737

This PR further reduce ~25ms from rendering time, testing inside the
Setting screen:

```
master:
[68440] [GFX] Time = 73 ms from clearScreen to displayBuffer

PR:
[97806] [GFX] Time = 47 ms from clearScreen to displayBuffer
```

And in extreme case (fill the entire screen with black or gray color):

```
master:
[1125] [ ] Test fillRectDither drawn in 327 ms
[1347] [ ] Test fillRect drawn in 222 ms

PR:
[1334] [ ] Test fillRectDither drawn in 225 ms
[1455] [ ] Test fillRect drawn in 121 ms
```

Note that
https://github.com/crosspoint-reader/crosspoint-reader/pull/737 is NOT
applied on top of this PR. But with 2 of them combined, it should reduce
from 47ms --> 42ms

## Details

This PR based on the fact that function calls are costly if the function
is small enough. For example, this simple call:

```
int rotatedX = 0;
int rotatedY = 0;
rotateCoordinates(x, y, &rotatedX, &rotatedY);
```

Generated assembly code:

<img width="771" height="215" alt="image"
src="https://github.com/user-attachments/assets/37991659-3304-41c3-a3b2-fb967da53f82"
/>

This adds ~10 instructions just to prepare the registers prior to the
function call, plus some more instructions for the function's
epilogue/prologue. Inlining it removing all of these:

<img width="1471" height="832" alt="image"
src="https://github.com/user-attachments/assets/b67a22ee-93ba-4017-88ed-c973e28ec914"
/>

Of course, this optimization is not magic. It's only beneficial under 3
conditions:
- The function is small, not in size, but in terms of effective
instructions. For example, the `rotateCoordinates` is simply a jump
table, where each branch is just 3-4 inst
- The function has multiple input arguments, which requires some move to
put it onto the correct place
- The function is called very frequently (i.e. critical path)

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? **NO**

authored by

Xuan-Son Nguyen and committed by
GitHub
a87eacc6 1caad578

+52 -55
+48 -52
lib/GfxRenderer/GfxRenderer.cpp
··· 2 2 3 3 #include <Utf8.h> 4 4 5 + void GfxRenderer::begin() { 6 + frameBuffer = display.getFrameBuffer(); 7 + if (!frameBuffer) { 8 + Serial.printf("[%lu] [GFX] !! No framebuffer\n", millis()); 9 + assert(false); 10 + } 11 + } 12 + 5 13 void GfxRenderer::insertFont(const int fontId, EpdFontFamily font) { fontMap.insert({fontId, font}); } 6 14 7 - void GfxRenderer::rotateCoordinates(const int x, const int y, int* rotatedX, int* rotatedY) const { 15 + // Translate logical (x,y) coordinates to physical panel coordinates based on current orientation 16 + // This should always be inlined for better performance 17 + static inline void rotateCoordinates(const GfxRenderer::Orientation orientation, const int x, const int y, int* phyX, 18 + int* phyY) { 8 19 switch (orientation) { 9 - case Portrait: { 20 + case GfxRenderer::Portrait: { 10 21 // Logical portrait (480x800) → panel (800x480) 11 22 // Rotation: 90 degrees clockwise 12 - *rotatedX = y; 13 - *rotatedY = HalDisplay::DISPLAY_HEIGHT - 1 - x; 23 + *phyX = y; 24 + *phyY = HalDisplay::DISPLAY_HEIGHT - 1 - x; 14 25 break; 15 26 } 16 - case LandscapeClockwise: { 27 + case GfxRenderer::LandscapeClockwise: { 17 28 // Logical landscape (800x480) rotated 180 degrees (swap top/bottom and left/right) 18 - *rotatedX = HalDisplay::DISPLAY_WIDTH - 1 - x; 19 - *rotatedY = HalDisplay::DISPLAY_HEIGHT - 1 - y; 29 + *phyX = HalDisplay::DISPLAY_WIDTH - 1 - x; 30 + *phyY = HalDisplay::DISPLAY_HEIGHT - 1 - y; 20 31 break; 21 32 } 22 - case PortraitInverted: { 33 + case GfxRenderer::PortraitInverted: { 23 34 // Logical portrait (480x800) → panel (800x480) 24 35 // Rotation: 90 degrees counter-clockwise 25 - *rotatedX = HalDisplay::DISPLAY_WIDTH - 1 - y; 26 - *rotatedY = x; 36 + *phyX = HalDisplay::DISPLAY_WIDTH - 1 - y; 37 + *phyY = x; 27 38 break; 28 39 } 29 - case LandscapeCounterClockwise: { 40 + case GfxRenderer::LandscapeCounterClockwise: { 30 41 // Logical landscape (800x480) aligned with panel orientation 31 - *rotatedX = x; 32 - *rotatedY = y; 42 + *phyX = x; 43 + *phyY = y; 33 44 break; 34 45 } 35 46 } 36 47 } 37 48 49 + // IMPORTANT: This function is in critical rendering path and is called for every pixel. Please keep it as simple and 50 + // efficient as possible. 38 51 void GfxRenderer::drawPixel(const int x, const int y, const bool state) const { 39 - uint8_t* frameBuffer = display.getFrameBuffer(); 40 - 41 - // Early return if no framebuffer is set 42 - if (!frameBuffer) { 43 - Serial.printf("[%lu] [GFX] !! No framebuffer\n", millis()); 44 - return; 45 - } 52 + int phyX = 0; 53 + int phyY = 0; 46 54 47 - int rotatedX = 0; 48 - int rotatedY = 0; 49 - rotateCoordinates(x, y, &rotatedX, &rotatedY); 55 + // Note: this call should be inlined for better performance 56 + rotateCoordinates(orientation, x, y, &phyX, &phyY); 50 57 51 58 // Bounds checking against physical panel dimensions 52 - if (rotatedX < 0 || rotatedX >= HalDisplay::DISPLAY_WIDTH || rotatedY < 0 || rotatedY >= HalDisplay::DISPLAY_HEIGHT) { 53 - Serial.printf("[%lu] [GFX] !! Outside range (%d, %d) -> (%d, %d)\n", millis(), x, y, rotatedX, rotatedY); 59 + if (phyX < 0 || phyX >= HalDisplay::DISPLAY_WIDTH || phyY < 0 || phyY >= HalDisplay::DISPLAY_HEIGHT) { 60 + Serial.printf("[%lu] [GFX] !! Outside range (%d, %d) -> (%d, %d)\n", millis(), x, y, phyX, phyY); 54 61 return; 55 62 } 56 63 57 64 // Calculate byte position and bit position 58 - const uint16_t byteIndex = rotatedY * HalDisplay::DISPLAY_WIDTH_BYTES + (rotatedX / 8); 59 - const uint8_t bitPosition = 7 - (rotatedX % 8); // MSB first 65 + const uint16_t byteIndex = phyY * HalDisplay::DISPLAY_WIDTH_BYTES + (phyX / 8); 66 + const uint8_t bitPosition = 7 - (phyX % 8); // MSB first 60 67 61 68 if (state) { 62 69 frameBuffer[byteIndex] &= ~(1 << bitPosition); // Clear bit ··· 376 383 void GfxRenderer::drawImage(const uint8_t bitmap[], const int x, const int y, const int width, const int height) const { 377 384 int rotatedX = 0; 378 385 int rotatedY = 0; 379 - rotateCoordinates(x, y, &rotatedX, &rotatedY); 386 + rotateCoordinates(orientation, x, y, &rotatedX, &rotatedY); 380 387 // Rotate origin corner 381 388 switch (orientation) { 382 389 case Portrait: ··· 632 639 free(nodeX); 633 640 } 634 641 635 - void GfxRenderer::clearScreen(const uint8_t color) const { display.clearScreen(color); } 642 + // For performance measurement (using static to allow "const" methods) 643 + static unsigned long start_ms = 0; 644 + 645 + void GfxRenderer::clearScreen(const uint8_t color) const { 646 + start_ms = millis(); 647 + display.clearScreen(color); 648 + } 636 649 637 650 void GfxRenderer::invertScreen() const { 638 - uint8_t* buffer = display.getFrameBuffer(); 639 - if (!buffer) { 640 - Serial.printf("[%lu] [GFX] !! No framebuffer in invertScreen\n", millis()); 641 - return; 642 - } 643 651 for (int i = 0; i < HalDisplay::BUFFER_SIZE; i++) { 644 - buffer[i] = ~buffer[i]; 652 + frameBuffer[i] = ~frameBuffer[i]; 645 653 } 646 654 } 647 655 648 656 void GfxRenderer::displayBuffer(const HalDisplay::RefreshMode refreshMode) const { 657 + auto elapsed = millis() - start_ms; 658 + Serial.printf("[%lu] [GFX] Time = %lu ms from clearScreen to displayBuffer\n", millis(), elapsed); 649 659 display.displayBuffer(refreshMode, fadingFix); 650 660 } 651 661 ··· 829 839 } 830 840 } 831 841 832 - uint8_t* GfxRenderer::getFrameBuffer() const { return display.getFrameBuffer(); } 842 + uint8_t* GfxRenderer::getFrameBuffer() const { return frameBuffer; } 833 843 834 844 size_t GfxRenderer::getBufferSize() { return HalDisplay::BUFFER_SIZE; } 835 845 836 846 // unused 837 847 // void GfxRenderer::grayscaleRevert() const { display.grayscaleRevert(); } 838 848 839 - void GfxRenderer::copyGrayscaleLsbBuffers() const { display.copyGrayscaleLsbBuffers(display.getFrameBuffer()); } 849 + void GfxRenderer::copyGrayscaleLsbBuffers() const { display.copyGrayscaleLsbBuffers(frameBuffer); } 840 850 841 - void GfxRenderer::copyGrayscaleMsbBuffers() const { display.copyGrayscaleMsbBuffers(display.getFrameBuffer()); } 851 + void GfxRenderer::copyGrayscaleMsbBuffers() const { display.copyGrayscaleMsbBuffers(frameBuffer); } 842 852 843 853 void GfxRenderer::displayGrayBuffer() const { display.displayGrayBuffer(fadingFix); } 844 854 ··· 858 868 * Returns true if buffer was stored successfully, false if allocation failed. 859 869 */ 860 870 bool GfxRenderer::storeBwBuffer() { 861 - const uint8_t* frameBuffer = display.getFrameBuffer(); 862 - if (!frameBuffer) { 863 - Serial.printf("[%lu] [GFX] !! No framebuffer in storeBwBuffer\n", millis()); 864 - return false; 865 - } 866 - 867 871 // Allocate and copy each chunk 868 872 for (size_t i = 0; i < BW_BUFFER_NUM_CHUNKS; i++) { 869 873 // Check if any chunks are already allocated ··· 913 917 return; 914 918 } 915 919 916 - uint8_t* frameBuffer = display.getFrameBuffer(); 917 - if (!frameBuffer) { 918 - Serial.printf("[%lu] [GFX] !! No framebuffer in restoreBwBuffer\n", millis()); 919 - freeBwBufferChunks(); 920 - return; 921 - } 922 - 923 920 for (size_t i = 0; i < BW_BUFFER_NUM_CHUNKS; i++) { 924 921 // Check if chunk is missing 925 922 if (!bwBufferChunks[i]) { ··· 943 940 * Use this when BW buffer was re-rendered instead of stored/restored. 944 941 */ 945 942 void GfxRenderer::cleanupGrayscaleWithFrameBuffer() const { 946 - uint8_t* frameBuffer = display.getFrameBuffer(); 947 943 if (frameBuffer) { 948 944 display.cleanupGrayscaleBuffers(frameBuffer); 949 945 }
+3 -3
lib/GfxRenderer/GfxRenderer.h
··· 33 33 RenderMode renderMode; 34 34 Orientation orientation; 35 35 bool fadingFix; 36 + uint8_t* frameBuffer = nullptr; 36 37 uint8_t* bwBufferChunks[BW_BUFFER_NUM_CHUNKS] = {nullptr}; 37 38 std::map<int, EpdFontFamily> fontMap; 38 39 void renderChar(const EpdFontFamily& fontFamily, uint32_t cp, int* x, const int* y, bool pixelState, 39 40 EpdFontFamily::Style style) const; 40 41 void freeBwBufferChunks(); 41 - void rotateCoordinates(int x, int y, int* rotatedX, int* rotatedY) const; 42 42 template <Color color> 43 43 void drawPixelDither(int x, int y) const; 44 44 template <Color color> ··· 55 55 static constexpr int VIEWABLE_MARGIN_LEFT = 3; 56 56 57 57 // Setup 58 + void begin(); // must be called right after display.begin() 58 59 void insertFont(int fontId, EpdFontFamily font); 59 60 60 61 // Orientation control (affects logical width/height and coordinate transforms) ··· 72 73 // void displayWindow(int x, int y, int width, int height) const; 73 74 void invertScreen() const; 74 75 void clearScreen(uint8_t color = 0xFF) const; 76 + void getOrientedViewableTRBL(int* outTop, int* outRight, int* outBottom, int* outLeft) const; 75 77 76 78 // Drawing 77 79 void drawPixel(int x, int y, bool state = true) const; ··· 125 127 // Low level functions 126 128 uint8_t* getFrameBuffer() const; 127 129 static size_t getBufferSize(); 128 - void grayscaleRevert() const; 129 - void getOrientedViewableTRBL(int* outTop, int* outRight, int* outBottom, int* outLeft) const; 130 130 };
+1
src/main.cpp
··· 253 253 254 254 void setupDisplayAndFonts() { 255 255 display.begin(); 256 + renderer.begin(); 256 257 Serial.printf("[%lu] [ ] Display initialized\n", millis()); 257 258 renderer.insertFont(BOOKERLY_14_FONT_ID, bookerly14FontFamily); 258 259 #ifndef OMIT_FONTS