From b96a950d9e6b4ae45896fc2140e7be6d469ae7e3 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Fri, 25 Jul 2025 23:49:32 +0200 Subject: [PATCH 01/11] Improved heap and PSRAM handling - Segment `allocateData()` uses more elaborate DRAM checking to reduce fragmentation and allow for larger setups to run on low heap - Segment data allocation fails if minimum contiguous block size runs low to keep the UI working - Increased `MAX_SEGMENT_DATA` to account for better segment data handling - Memory allocation functions try to keep enough DRAM for segment data - Added constant `PSRAM_THRESHOLD` to improve PSARM usage - Increase MIN_HEAP_SIZE to reduce risk of breaking UI due to low memory for JSON response - ESP32 makes use of IRAM (no 8bit access) for pixeluffers, freeing up to 50kB of RAM - Fix to properly get available heap on all platforms: added function `getFreeHeapSize()` - Bugfix for effects that divide by SEGLEN: don't run FX in service() if segment is not active -Syntax fix in AR: calloc() uses (numelements, size) as arguments --- usermods/audioreactive/audio_reactive.cpp | 4 +- wled00/FX.h | 17 +++-- wled00/FX_fcn.cpp | 91 +++++++++++++++++------ wled00/cfg.cpp | 2 +- wled00/const.h | 14 +++- wled00/fcn_declare.h | 11 ++- wled00/json.cpp | 2 +- wled00/util.cpp | 74 +++++++++++++----- wled00/wled.cpp | 22 +++--- wled00/wled_server.cpp | 2 +- wled00/ws.cpp | 8 +- 11 files changed, 178 insertions(+), 69 deletions(-) diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp index 06268560a7..25b8135209 100644 --- a/usermods/audioreactive/audio_reactive.cpp +++ b/usermods/audioreactive/audio_reactive.cpp @@ -224,8 +224,8 @@ void FFTcode(void * parameter) DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID()); // allocate FFT buffers on first call - if (vReal == nullptr) vReal = (float*) calloc(sizeof(float), samplesFFT); - if (vImag == nullptr) vImag = (float*) calloc(sizeof(float), samplesFFT); + if (vReal == nullptr) vReal = (float*) calloc(samplesFFT, sizeof(float)); + if (vImag == nullptr) vImag = (float*) calloc(samplesFFT, sizeof(float)); if ((vReal == nullptr) || (vImag == nullptr)) { // something went wrong if (vReal) free(vReal); vReal = nullptr; diff --git a/wled00/FX.h b/wled00/FX.h index 097c857caf..1f35d12d5b 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -88,18 +88,17 @@ extern byte realtimeMode; // used in getMappedPixelIndex() #endif #define FPS_CALC_SHIFT 7 // bit shift for fixed point math -/* each segment uses 82 bytes of SRAM memory, so if you're application fails because of - insufficient memory, decreasing MAX_NUM_SEGMENTS may help */ +// heap memory limit for effects data, pixel buffers try to reserve it if PSRAM is available #ifdef ESP8266 #define MAX_NUM_SEGMENTS 16 /* How much data bytes all segments combined may allocate */ - #define MAX_SEGMENT_DATA 5120 + #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*640) // 10k by default #elif defined(CONFIG_IDF_TARGET_ESP32S2) #define MAX_NUM_SEGMENTS 20 - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*512) // 10k by default (S2 is short on free RAM) + #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1024) // 20k by default (S2 is short on free RAM) #else #define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1280) // 40k by default + #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*2560) // 80k by default #endif /* How much data bytes each segment should max allocate to leave enough space for other segments, @@ -600,8 +599,12 @@ class Segment { , _t(nullptr) { DEBUGFX_PRINTF_P(PSTR("-- Creating segment: %p [%d,%d:%d,%d]\n"), this, (int)start, (int)stop, (int)startY, (int)stopY); - // allocate render buffer (always entire segment) - pixels = static_cast(d_calloc(sizeof(uint32_t), length())); // error handling is also done in isActive() + // allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (~2% with QSPI PSRAM) + #ifdef CONFIG_IDF_TARGET_ESP32 + pixels = static_cast(pixelbuffer_malloc(sizeof(uint32_t) * length())); + #else + pixels = static_cast(p_calloc(length(), sizeof(uint32_t))); // prefer PSRAM. note: error handling is also done in isActive() + #endif if (!pixels) { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); extern byte errorFlag; diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp index a479b655a6..da9449fb65 100755 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -70,6 +70,12 @@ Segment::Segment(const Segment &orig) { if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } if (orig.pixels) { pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); + +// pixels = static_cast(heap_caps_malloc(orig.length()* sizeof(uint32_t), MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL)); // use this for ESP32 +//pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); +//pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); + + if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); else { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); @@ -111,6 +117,10 @@ Segment& Segment::operator= (const Segment &orig) { if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } if (orig.pixels) { pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); + //TODO: also need to put this in 32bit memory on ESP32, maybe make that a function... + //pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); + //pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); + if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); else { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); @@ -143,13 +153,22 @@ Segment& Segment::operator= (Segment &&orig) noexcept { // allocates effect data buffer on heap and initialises (erases) it bool Segment::allocateData(size_t len) { - if (len == 0) return false; // nothing to do - if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation) + if (len == 0) return false; // nothing to do + if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation) if (call == 0) { - //DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this); - memset(data, 0, len); // erase buffer if called during effect initialisation + if(checkHeapHealth()) { + //DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this); + memset(data, 0, len); // erase buffer if called during effect initialisation + return true; // no need to reallocate + } + else { + d_free(data); // free data and try to allocate again + data = nullptr; + Segment::addUsedSegmentData(-_dataLen); // subtract buffer size + } } - return true; + else + return true; } //DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this); if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { @@ -158,23 +177,29 @@ bool Segment::allocateData(size_t len) { errorFlag = ERR_NORAM; return false; } - // prefer DRAM over SPI RAM on ESP32 since it is slow + // prefer DRAM over PSRAM for speed if (data) { data = (byte*)d_realloc_malloc(data, len); // realloc with malloc fallback - if (!data) { - data = nullptr; + if (data == nullptr) { // allocation failed Segment::addUsedSegmentData(-_dataLen); // subtract original buffer size _dataLen = 0; // reset data length + return false; } } else data = (byte*)d_malloc(len); if (data) { - memset(data, 0, len); // erase buffer - Segment::addUsedSegmentData(len - _dataLen); - _dataLen = len; - //DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data); - return true; + if(!checkHeapHealth()) { + d_free(data); + data = nullptr; + } + else { + memset(data, 0, len); // erase buffer + Segment::addUsedSegmentData(len); + _dataLen = len; + //DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data); + return true; + } } // allocation failed DEBUG_PRINTLN(F("!!! Allocation failed. !!!")); @@ -205,7 +230,11 @@ void Segment::deallocateData() { void Segment::resetIfRequired() { if (!reset || !isActive()) return; //DEBUG_PRINTF_P(PSTR("-- Segment reset: %p\n"), this); - if (data && _dataLen > 0) memset(data, 0, _dataLen); // prevent heap fragmentation (just erase buffer instead of deallocateData()) + if (data && _dataLen > 0) { + if(_dataLen > FAIR_DATA_PER_SEG) deallocateData(); // do not keep large allocations + else memset(data, 0, _dataLen); // can prevent heap fragmentation + DEBUG_PRINTF_P(PSTR("-- Segment %p reset, data cleared\n"), this); + } if (pixels) for (size_t i = 0; i < length(); i++) pixels[i] = BLACK; // clear pixel buffer next_time = 0; step = 0; call = 0; aux0 = 0; aux1 = 0; reset = false; @@ -454,16 +483,26 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui stop = 0; return; } - // re-allocate FX render buffer + // allocate FX render buffer if (length() != oldLength) { - if (pixels) d_free(pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it - pixels = static_cast(d_malloc(sizeof(uint32_t) * length())); + if (pixels) free(pixels); // note: using realloc can block larger heap segments + #ifdef ARDUINO_ARCH_ESP32 + pixels = static_cast(pixelbuffer_malloc(izeof(uint32_t) * length()); + #else + pixels = static_cast(p_malloc(sizeof(uint32_t) * length())); + #endif + + if(!checkHeapHealth()) { + d_free(pixels); + pixels = nullptr; + } if (!pixels) { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); errorFlag = ERR_NORAM_PX; stop = 0; return; } + } refreshLightCapabilities(); } @@ -1198,7 +1237,7 @@ void WS2812FX::finalizeInit() { bus->begin(); bus->setBrightness(bri); } - DEBUG_PRINTF_P(PSTR("Heap after buses: %d\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("Heap after buses: %d\n"), getFreeHeapSize()); Segment::maxWidth = _length; Segment::maxHeight = 1; @@ -1210,11 +1249,17 @@ void WS2812FX::finalizeInit() { deserializeMap(); // (re)load default ledmap (will also setUpMatrix() if ledmap does not exist) // allocate frame buffer after matrix has been set up (gaps!) - if (_pixels) d_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it - _pixels = static_cast(d_malloc(getLengthTotal() * sizeof(uint32_t))); + if (_pixels) d_free(_pixels); +#ifdef ARDUINO_ARCH_ESP32 + _pixels = static_cast(pixelbuffer_malloc(getLengthTotal() * sizeof(uint32_t), true)); // use 32bit RAM (IRAM) or PSRAM on ESP32 +#elif !defined(ESP8266) + // use PSRAM on S2 and S3 if available (C3 defaults to DRAM). Note: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM + _pixels = static_cast(heap_caps_malloc_prefer(size, 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); // prefer PSRAM if it exists +#else + _pixels = static_cast(malloc(getLengthTotal() * sizeof(uint32_t))); // ESP8266 does not support advanced allocation API +#endif DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t)); - - DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize()); } void WS2812FX::service() { @@ -1258,7 +1303,7 @@ void WS2812FX::service() { // if segment is in transition and no old segment exists we don't need to run the old mode // (blendSegments() takes care of On/Off transitions and clipping) Segment *segO = seg.getOldSegment(); - if (segO && (seg.mode != segO->mode || blendingStyle != BLEND_STYLE_FADE)) { + if (segO && (seg.mode != segO->mode || blendingStyle != BLEND_STYLE_FADE) && segO->isActive()) { Segment::modeBlend(true); // set semaphore for beginDraw() to blend colors and palette segO->beginDraw(prog); // set up palette & colors (also sets draw dimensions), parent segment has transition progress _currentSegment = segO; // set current segment diff --git a/wled00/cfg.cpp b/wled00/cfg.cpp index fb67e578e0..ac6fedaeb9 100644 --- a/wled00/cfg.cpp +++ b/wled00/cfg.cpp @@ -201,7 +201,7 @@ bool deserializeConfig(JsonObject doc, bool fromFS) { } #endif - DEBUG_PRINTF_P(PSTR("Heap before buses: %d\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("Heap before buses: %d\n"), getFreeHeapSize()); JsonArray ins = hw_led["ins"]; if (!ins.isNull()) { int s = 0; // bus iterator diff --git a/wled00/const.h b/wled00/const.h index 1abf245396..932ccee947 100644 --- a/wled00/const.h +++ b/wled00/const.h @@ -546,8 +546,18 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit"); #endif #endif -// minimum heap size required to process web requests -#define MIN_HEAP_SIZE 8192 +// minimum heap size required to process web requests: try to keep free heap above this value +#define MIN_HEAP_SIZE (12*1024) + +// threshold for PSRAM use: if heap is running low, requests above PSRAM_THRESHOLD will be allocated in PSRAM +// if heap is plenty, requests below PSRAM_THRESHOLD will be allocated in DRAM for speed +#if defined(CONFIG_IDF_TARGET_ESP32S3) + #define PSRAM_THRESHOLD 8192 +#elif defined(CONFIG_IDF_TARGET_ESP32) + #define PSRAM_THRESHOLD 4096 +#else + #define PSRAM_THRESHOLD 2048 // S2 does not have a lot of RAM, C3 and ESP8266 do not support PSRAM: the value is not used +#endif // Web server limits #ifdef ESP8266 diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index d19f89b27d..3959c4ef0a 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -550,7 +550,10 @@ inline uint8_t hw_random8() { return HW_RND_REGISTER; }; inline uint8_t hw_random8(uint32_t upperlimit) { return (hw_random8() * upperlimit) >> 8; }; // input range 0-255 inline uint8_t hw_random8(uint32_t lowerlimit, uint32_t upperlimit) { uint32_t range = upperlimit - lowerlimit; return lowerlimit + hw_random8(range); }; // input range 0-255 -// PSRAM allocation wrappers +// memory allocation wrappers +#ifdef CONFIG_IDF_TARGET_ESP32 +void *pixelbuffer_malloc(size_t size, bool enforcePSRAM = false); // prefer IRAM for pixel buffers if possible +#endif #if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) extern "C" { void *p_malloc(size_t); // prefer PSRAM over DRAM @@ -579,6 +582,12 @@ extern "C" { #define d_realloc_malloc realloc_malloc #define d_free free #endif +bool checkHeapHealth(unsigned minFreeBlockSize = MIN_HEAP_SIZE); // checks heap fragmentation: returns true if contiguous free memory is larger than minFreeBlockSize +#ifndef ESP8266 +inline unsigned getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types) +#else +inline unsigned getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap +#endif // RAII guard class for the JSON Buffer lock // Modeled after std::lock_guard diff --git a/wled00/json.cpp b/wled00/json.cpp index 4414681023..511f1aa7d3 100644 --- a/wled00/json.cpp +++ b/wled00/json.cpp @@ -829,7 +829,7 @@ void serializeInfo(JsonObject root) root[F("lwip")] = LWIP_VERSION_MAJOR; #endif - root[F("freeheap")] = ESP.getFreeHeap(); + root[F("freeheap")] = getFreeHeapSize(); #if defined(ARDUINO_ARCH_ESP32) if (psramFound()) root[F("psram")] = ESP.getFreePsram(); #endif diff --git a/wled00/util.cpp b/wled00/util.cpp index 6ff7b05dfc..50427cdc51 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -619,23 +619,47 @@ int32_t hw_random(int32_t lowerlimit, int32_t upperlimit) { return hw_random(diff) + lowerlimit; } +#ifdef CONFIG_IDF_TARGET_ESP32 +// ESP32 has 200kb 32bit accessible IRAM (usually ~50kB free) that can be used for pixel buffers WARNING: must not be accessed byte-wise! +// try if the buffer fits into 32bit accessible RAM, otherwise use PSRAM or DRAM +void *pixelbuffer_malloc(size_t size, bool enforcePSRAM) { + uint32_t availableDRAM = heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + void *buffer = nullptr; + buffer = static_cast(heap_caps_malloc(size * sizeof(uint32_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_32BIT)); // try to allocate in 32bit DRAM region + if(heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL) < availableDRAM) { // buffer did not fit into 32bit DRAM region + free(_pixels); // free DRAM buffer + if(enforcePSRAM && psramSafe && psramFound()) + buffer = static_cast(heap_caps_malloc(size * sizeof(uint32_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); // allocate in PSRAM + else + buffer = p_malloc(size * sizeof(uint32_t)); // use PSRAM or DRAM depending on availability + } + if(buffer) + memset(buffer, 0, size * sizeof(uint32_t)); + return buffer; +} +#endif + #if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM // p_x prefer PSRAM, d_x prefer DRAM void *p_malloc(size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; + int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; + int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_free_size(caps2) > 3*MIN_HEAP_SIZE && size < 512) std::swap(caps1, caps2); // use DRAM for small alloactions & when heap is plenty + if (heap_caps_get_free_size(caps2) > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA) && size < PSRAM_THRESHOLD) { + std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty + } return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists } return heap_caps_malloc(size, caps2); } void *p_realloc(void *ptr, size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; + int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; + int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_free_size(caps2) > 3*MIN_HEAP_SIZE && size < 512) std::swap(caps1, caps2); // use DRAM for small alloactions & when heap is plenty + if (heap_caps_get_free_size(caps2) > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA) && size < PSRAM_THRESHOLD){ + std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty + } return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists } return heap_caps_realloc(ptr, size, caps2); @@ -650,30 +674,32 @@ void *p_realloc_malloc(void *ptr, size_t size) { } void *p_calloc(size_t count, size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; + int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; + int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_free_size(caps2) > 3*MIN_HEAP_SIZE && size < 512) std::swap(caps1, caps2); // use DRAM for small alloactions & when heap is plenty + if (heap_caps_get_free_size(caps2) > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA) && size < PSRAM_THRESHOLD) { + std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty + } return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists } return heap_caps_calloc(count, size, caps2); } void *d_malloc(size_t size) { - int caps1 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; + int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; + int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 3*MIN_HEAP_SIZE && size > MIN_HEAP_SIZE) std::swap(caps1, caps2); // prefer PSRAM for large alloactions & when DRAM is low + if (heap_caps_get_largest_free_block(caps1) < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer DRAM } return heap_caps_malloc(size, caps1); } void *d_realloc(void *ptr, size_t size) { - int caps1 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; + int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; + int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 3*MIN_HEAP_SIZE && size > MIN_HEAP_SIZE) std::swap(caps1, caps2); // prefer PSRAM for large alloactions & when DRAM is low + if (heap_caps_get_largest_free_block(caps1) < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer DRAM } return heap_caps_realloc(ptr, size, caps1); @@ -688,10 +714,10 @@ void *d_realloc_malloc(void *ptr, size_t size) { } void *d_calloc(size_t count, size_t size) { - int caps1 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; + int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; + int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; if (psramSafe) { - if (size > MIN_HEAP_SIZE) std::swap(caps1, caps2); // prefer PSRAM for large alloactions + if (heap_caps_get_largest_free_block(caps1) < 3*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer DRAM } return heap_caps_calloc(count, size, caps1); @@ -706,6 +732,18 @@ void *realloc_malloc(void *ptr, size_t size) { } #endif +// check heap fragmentation, if there is not enough contiguous heap memory available the UI can stop working +bool checkHeapHealth(unsigned minFreeBlockSize) { + #if defined(ARDUINO_ARCH_ESP32) + if (heap_caps_get_largest_free_block(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT) > minFreeBlockSize) + return true; + #else + if (ESP.getMaxFreeBlockSize() > minFreeBlockSize) + return true; + #endif + return false; +} + /* * Fixed point integer based Perlin noise functions by @dedehai * Note: optimized for speed and to mimic fastled inoise functions, not for accuracy or best randomness diff --git a/wled00/wled.cpp b/wled00/wled.cpp index c372d22abd..8943fae489 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -168,7 +168,11 @@ void WLED::loop() // reconnect WiFi to clear stale allocations if heap gets too low if (millis() - heapTime > 15000) { - uint32_t heap = ESP.getFreeHeap(); + #ifdef ARDUINO_ARCH_ESP32 + uint32_t heap = heap_caps_get_largest_free_block(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + #else + uint32_t heap = ESP.getMaxFreeBlockSize(); // ESP8266 does not support advanced allocation API + #endif if (heap < MIN_HEAP_SIZE && lastHeap < MIN_HEAP_SIZE) { DEBUG_PRINTF_P(PSTR("Heap too low! %u\n"), heap); forceReconnect = true; @@ -240,7 +244,7 @@ void WLED::loop() DEBUG_PRINTLN(F("---DEBUG INFO---")); DEBUG_PRINTF_P(PSTR("Runtime: %lu\n"), millis()); DEBUG_PRINTF_P(PSTR("Unix time: %u,%03u\n"), toki.getTime().sec, toki.getTime().ms); - DEBUG_PRINTF_P(PSTR("Free heap: %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("Free heap: %u\n"), getFreeHeapSize()); #if defined(ARDUINO_ARCH_ESP32) if (psramFound()) { DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); @@ -366,7 +370,7 @@ void WLED::setup() DEBUG_PRINTF_P(PSTR("esp8266 @ %u MHz.\nCore: %s\n"), ESP.getCpuFreqMHz(), ESP.getCoreVersion()); DEBUG_PRINTF_P(PSTR("FLASH: %u MB\n"), (ESP.getFlashChipSize()/1024)/1024); #endif - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #if defined(ARDUINO_ARCH_ESP32) // BOARD_HAS_PSRAM also means that a compiler flag "-mfix-esp32-psram-cache-issue" was used and so PSRAM is safe to use on rev.1 ESP32 @@ -394,7 +398,7 @@ void WLED::setup() PinManager::allocatePin(2, true, PinOwner::DMX); #endif - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); bool fsinit = false; DEBUGFS_PRINTLN(F("Mount FS")); @@ -424,7 +428,7 @@ void WLED::setup() DEBUG_PRINTLN(F("Reading config")); bool needsCfgSave = deserializeConfigFromFS(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #if defined(STATUSLED) && STATUSLED>=0 if (!PinManager::isPinAllocated(STATUSLED)) { @@ -436,12 +440,12 @@ void WLED::setup() DEBUG_PRINTLN(F("Initializing strip")); beginStrip(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); DEBUG_PRINTLN(F("Usermods setup")); userSetup(); UsermodManager::setup(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); if (needsCfgSave) serializeConfigToFS(); // usermods required new parameters; need to wait for strip to be initialised #4752 @@ -506,13 +510,13 @@ void WLED::setup() // HTTP server page init DEBUG_PRINTLN(F("initServer")); initServer(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #ifndef WLED_DISABLE_INFRARED // init IR DEBUG_PRINTLN(F("initIR")); initIR(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #endif // Seed FastLED random functions with an esp random value, which already works properly at this point. diff --git a/wled00/wled_server.cpp b/wled00/wled_server.cpp index 4434a2f3e5..27343f2099 100644 --- a/wled00/wled_server.cpp +++ b/wled00/wled_server.cpp @@ -366,7 +366,7 @@ void initServer() }); server.on(F("/freeheap"), HTTP_GET, [](AsyncWebServerRequest *request){ - request->send(200, FPSTR(CONTENT_TYPE_PLAIN), (String)ESP.getFreeHeap()); + request->send(200, FPSTR(CONTENT_TYPE_PLAIN), (String)getFreeHeapSize()); }); #ifdef WLED_ENABLE_USERMOD_PAGE diff --git a/wled00/ws.cpp b/wled00/ws.cpp index 45640b68ce..4522e1815f 100644 --- a/wled00/ws.cpp +++ b/wled00/ws.cpp @@ -124,8 +124,8 @@ void sendDataWs(AsyncWebSocketClient * client) DEBUG_PRINTF_P(PSTR("JSON buffer size: %u for WS request (%u).\n"), pDoc->memoryUsage(), len); // the following may no longer be necessary as heap management has been fixed by @willmmiles in AWS - size_t heap1 = ESP.getFreeHeap(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + size_t heap1 = getFreeHeapSize(); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #ifdef ESP8266 if (len>heap1) { DEBUG_PRINTLN(F("Out of memory (WS)!")); @@ -134,8 +134,8 @@ void sendDataWs(AsyncWebSocketClient * client) #endif AsyncWebSocketBuffer buffer(len); #ifdef ESP8266 - size_t heap2 = ESP.getFreeHeap(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + size_t heap2 = getFreeHeapSize(); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #else size_t heap2 = 0; // ESP32 variants do not have the same issue and will work without checking heap allocation #endif From 516aa62164e24daa976c6ac6ce27931380a2f6bf Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sat, 26 Jul 2025 23:03:15 +0200 Subject: [PATCH 02/11] Added new functions for allocation and heap checking - added `allocate_buffer()` function that can be used to allocate large buffers: takes parameters to set preferred ram location, including 32bit accessible RAM on ESP32. Returns null if heap runs low or switches to PSRAM - getFreeHeapSize() and getContiguousFreeHeap() helper functions for all platforms to correctly report free useable heap - updated some constants - updated segment data allocation to free the data if it is large --- wled00/FX.h | 10 +--- wled00/FX_fcn.cpp | 90 ++++++++++------------------- wled00/const.h | 11 ++-- wled00/fcn_declare.h | 14 ++++- wled00/json.cpp | 4 +- wled00/util.cpp | 133 ++++++++++++++++++++++++++----------------- wled00/wled.cpp | 12 ++-- 7 files changed, 140 insertions(+), 134 deletions(-) diff --git a/wled00/FX.h b/wled00/FX.h index 1f35d12d5b..986c2457b2 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -98,7 +98,7 @@ extern byte realtimeMode; // used in getMappedPixelIndex() #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1024) // 20k by default (S2 is short on free RAM) #else #define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*2560) // 80k by default + #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1920) // 60k by default #endif /* How much data bytes each segment should max allocate to leave enough space for other segments, @@ -599,12 +599,8 @@ class Segment { , _t(nullptr) { DEBUGFX_PRINTF_P(PSTR("-- Creating segment: %p [%d,%d:%d,%d]\n"), this, (int)start, (int)stop, (int)startY, (int)stopY); - // allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (~2% with QSPI PSRAM) - #ifdef CONFIG_IDF_TARGET_ESP32 - pixels = static_cast(pixelbuffer_malloc(sizeof(uint32_t) * length())); - #else - pixels = static_cast(p_calloc(length(), sizeof(uint32_t))); // prefer PSRAM. note: error handling is also done in isActive() - #endif + // allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (<2% with QSPI PSRAM) + pixels = static_cast(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR)); if (!pixels) { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); extern byte errorFlag; diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp index da9449fb65..f89a5e1da2 100755 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -69,14 +69,9 @@ Segment::Segment(const Segment &orig) { if (orig.name) { name = static_cast(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); } if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } if (orig.pixels) { - pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); - -// pixels = static_cast(heap_caps_malloc(orig.length()* sizeof(uint32_t), MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL)); // use this for ESP32 -//pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); -//pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); - - - if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); + // allocate pixel buffer: prefer PSRAM if DRAM is running low + pixels = static_cast(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); + if (pixels) memcpy(pixels, orig.pixels, orig.length() * sizeof(uint32_t)); else { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); errorFlag = ERR_NORAM_PX; @@ -116,12 +111,9 @@ Segment& Segment::operator= (const Segment &orig) { if (orig.name) { name = static_cast(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); } if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } if (orig.pixels) { - pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); - //TODO: also need to put this in 32bit memory on ESP32, maybe make that a function... - //pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); - //pixels = static_cast(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); - - if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); + // allocate pixel buffer: prefer PSRAM if DRAM is running low + pixels = static_cast(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); + if (pixels) memcpy(pixels, orig.pixels, orig.length() * sizeof(uint32_t)); else { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); errorFlag = ERR_NORAM_PX; @@ -156,50 +148,41 @@ bool Segment::allocateData(size_t len) { if (len == 0) return false; // nothing to do if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation) if (call == 0) { - if(checkHeapHealth()) { + if(_dataLen < FAIR_DATA_PER_SEG) { // segment data is small //DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this); memset(data, 0, len); // erase buffer if called during effect initialisation return true; // no need to reallocate } - else { - d_free(data); // free data and try to allocate again - data = nullptr; - Segment::addUsedSegmentData(-_dataLen); // subtract buffer size - } } else return true; } //DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this); - if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { - // not enough memory - DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData()); - errorFlag = ERR_NORAM; - return false; + // limit to MAX_SEGMENT_DATA if there is no PSRAM, otherwise prefer functionality over speed + #if defined(ARDUINO_ARCH_ESP32) + if(!(psramFound() && psramSafe)) + #endif + { + if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { + // not enough memory + DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData()); + errorFlag = ERR_NORAM; + return false; + } } // prefer DRAM over PSRAM for speed if (data) { - data = (byte*)d_realloc_malloc(data, len); // realloc with malloc fallback - if (data == nullptr) { // allocation failed - Segment::addUsedSegmentData(-_dataLen); // subtract original buffer size - _dataLen = 0; // reset data length - return false; - } + d_free(data); // free data and try to allocate again (segment buffer may be blocking contiguous heap) + Segment::addUsedSegmentData(-_dataLen); // subtract buffer size } - else data = (byte*)d_malloc(len); + + data = static_cast(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); if (data) { - if(!checkHeapHealth()) { - d_free(data); - data = nullptr; - } - else { - memset(data, 0, len); // erase buffer - Segment::addUsedSegmentData(len); - _dataLen = len; - //DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data); - return true; - } + Segment::addUsedSegmentData(len); + _dataLen = len; + //DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data); + return true; } // allocation failed DEBUG_PRINTLN(F("!!! Allocation failed. !!!")); @@ -486,16 +469,7 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui // allocate FX render buffer if (length() != oldLength) { if (pixels) free(pixels); // note: using realloc can block larger heap segments - #ifdef ARDUINO_ARCH_ESP32 - pixels = static_cast(pixelbuffer_malloc(izeof(uint32_t) * length()); - #else - pixels = static_cast(p_malloc(sizeof(uint32_t) * length())); - #endif - - if(!checkHeapHealth()) { - d_free(pixels); - pixels = nullptr; - } + pixels = static_cast(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (!pixels) { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); errorFlag = ERR_NORAM_PX; @@ -1250,14 +1224,8 @@ void WS2812FX::finalizeInit() { // allocate frame buffer after matrix has been set up (gaps!) if (_pixels) d_free(_pixels); -#ifdef ARDUINO_ARCH_ESP32 - _pixels = static_cast(pixelbuffer_malloc(getLengthTotal() * sizeof(uint32_t), true)); // use 32bit RAM (IRAM) or PSRAM on ESP32 -#elif !defined(ESP8266) - // use PSRAM on S2 and S3 if available (C3 defaults to DRAM). Note: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM - _pixels = static_cast(heap_caps_malloc_prefer(size, 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); // prefer PSRAM if it exists -#else - _pixels = static_cast(malloc(getLengthTotal() * sizeof(uint32_t))); // ESP8266 does not support advanced allocation API -#endif + // use PSRAM if available: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM for this buffer + _pixels = static_cast(allocate_buffer(getLengthTotal() * sizeof(uint32_t), BFRALLOC_ENFORCE_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR)); DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t)); DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize()); } diff --git a/wled00/const.h b/wled00/const.h index 932ccee947..43cb239735 100644 --- a/wled00/const.h +++ b/wled00/const.h @@ -547,16 +547,19 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit"); #endif // minimum heap size required to process web requests: try to keep free heap above this value -#define MIN_HEAP_SIZE (12*1024) - +#ifdef ESP8266 + #define MIN_HEAP_SIZE (8*1024) +#else + #define MIN_HEAP_SIZE (12*1024) +#endif // threshold for PSRAM use: if heap is running low, requests above PSRAM_THRESHOLD will be allocated in PSRAM // if heap is plenty, requests below PSRAM_THRESHOLD will be allocated in DRAM for speed #if defined(CONFIG_IDF_TARGET_ESP32S3) - #define PSRAM_THRESHOLD 8192 + #define PSRAM_THRESHOLD 5120 #elif defined(CONFIG_IDF_TARGET_ESP32) #define PSRAM_THRESHOLD 4096 #else - #define PSRAM_THRESHOLD 2048 // S2 does not have a lot of RAM, C3 and ESP8266 do not support PSRAM: the value is not used + #define PSRAM_THRESHOLD 1024 // S2 does not have a lot of RAM. C3 and ESP8266 do not support PSRAM: the value is not used #endif // Web server limits diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index 3959c4ef0a..8c674c1484 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -582,12 +582,20 @@ extern "C" { #define d_realloc_malloc realloc_malloc #define d_free free #endif -bool checkHeapHealth(unsigned minFreeBlockSize = MIN_HEAP_SIZE); // checks heap fragmentation: returns true if contiguous free memory is larger than minFreeBlockSize #ifndef ESP8266 -inline unsigned getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types) +inline size_t getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types) +inline size_t getContiguousFreeHeap() { return heap_caps_get_largest_free_block(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns largest contiguous free block #else -inline unsigned getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap +inline size_t getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap +inline size_t getContiguousFreeHeap() { return ESP.getMaxFreeBlockSize(); } // returns largest contiguous free block #endif +#define BFRALLOC_NOBYTEACCESS (1 << 0) // ESP32 has 32bit accessible DRAM (usually ~50kB free) that must not be byte-accessed +#define BFRALLOC_PREFER_DRAM (1 << 1) // prefer DRAM over PSRAM +#define BFRALLOC_ENFORCE_DRAM (1 << 2) // use DRAM only, no PSRAM +#define BFRALLOC_PREFER_PSRAM (1 << 3) // prefer PSRAM over DRAM +#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise fall back to DRAM +#define BFRALLOC_CLEAR (1 << 5) // clear allocated buffer after allocation +void *allocate_buffer(size_t size, uint32_t type); // RAII guard class for the JSON Buffer lock // Modeled after std::lock_guard diff --git a/wled00/json.cpp b/wled00/json.cpp index 511f1aa7d3..1e219bbfbb 100644 --- a/wled00/json.cpp +++ b/wled00/json.cpp @@ -812,7 +812,7 @@ void serializeInfo(JsonObject root) root[F("clock")] = ESP.getCpuFreqMHz(); root[F("flash")] = (ESP.getFlashChipSize()/1024)/1024; #ifdef WLED_DEBUG - root[F("maxalloc")] = ESP.getMaxAllocHeap(); + root[F("maxalloc")] = getContiguousFreeHeap(); root[F("resetReason0")] = (int)rtc_get_reset_reason(0); root[F("resetReason1")] = (int)rtc_get_reset_reason(1); #endif @@ -823,7 +823,7 @@ void serializeInfo(JsonObject root) root[F("clock")] = ESP.getCpuFreqMHz(); root[F("flash")] = (ESP.getFlashChipSize()/1024)/1024; #ifdef WLED_DEBUG - root[F("maxalloc")] = ESP.getMaxFreeBlockSize(); + root[F("maxalloc")] = getContiguousFreeHeap(); root[F("resetReason")] = (int)ESP.getResetInfoPtr()->reason; #endif root[F("lwip")] = LWIP_VERSION_MAJOR; diff --git a/wled00/util.cpp b/wled00/util.cpp index 50427cdc51..7f5b37c308 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -619,33 +619,13 @@ int32_t hw_random(int32_t lowerlimit, int32_t upperlimit) { return hw_random(diff) + lowerlimit; } -#ifdef CONFIG_IDF_TARGET_ESP32 -// ESP32 has 200kb 32bit accessible IRAM (usually ~50kB free) that can be used for pixel buffers WARNING: must not be accessed byte-wise! -// try if the buffer fits into 32bit accessible RAM, otherwise use PSRAM or DRAM -void *pixelbuffer_malloc(size_t size, bool enforcePSRAM) { - uint32_t availableDRAM = heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); - void *buffer = nullptr; - buffer = static_cast(heap_caps_malloc(size * sizeof(uint32_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_32BIT)); // try to allocate in 32bit DRAM region - if(heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL) < availableDRAM) { // buffer did not fit into 32bit DRAM region - free(_pixels); // free DRAM buffer - if(enforcePSRAM && psramSafe && psramFound()) - buffer = static_cast(heap_caps_malloc(size * sizeof(uint32_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)); // allocate in PSRAM - else - buffer = p_malloc(size * sizeof(uint32_t)); // use PSRAM or DRAM depending on availability - } - if(buffer) - memset(buffer, 0, size * sizeof(uint32_t)); - return buffer; -} -#endif - #if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM // p_x prefer PSRAM, d_x prefer DRAM void *p_malloc(size_t size) { int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_free_size(caps2) > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA) && size < PSRAM_THRESHOLD) { + if (getContiguousFreeHeap() > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA/2) && size < PSRAM_THRESHOLD) { std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty } return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists @@ -657,7 +637,7 @@ void *p_realloc(void *ptr, size_t size) { int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_free_size(caps2) > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA) && size < PSRAM_THRESHOLD){ + if (getContiguousFreeHeap() > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA/2) && size < PSRAM_THRESHOLD) { std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty } return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists @@ -665,19 +645,11 @@ void *p_realloc(void *ptr, size_t size) { return heap_caps_realloc(ptr, size, caps2); } -// realloc with malloc fallback, original buffer is freed if realloc fails but not copied! -void *p_realloc_malloc(void *ptr, size_t size) { - void *newbuf = p_realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - p_free(ptr); // free old buffer if realloc failed - return p_malloc(size); // fallback to malloc -} - void *p_calloc(size_t count, size_t size) { int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_free_size(caps2) > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA) && size < PSRAM_THRESHOLD) { + if (getContiguousFreeHeap() > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA/2) && size < PSRAM_THRESHOLD) { std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty } return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists @@ -685,11 +657,19 @@ void *p_calloc(size_t count, size_t size) { return heap_caps_calloc(count, size, caps2); } +// realloc with malloc fallback, original buffer is freed if realloc fails but not copied! +void *p_realloc_malloc(void *ptr, size_t size) { + void *newbuf = p_realloc(ptr, size); // try realloc first + if (newbuf) return newbuf; // realloc successful + p_free(ptr); // free old buffer if realloc failed + return p_malloc(size); // fallback to malloc +} + void *d_malloc(size_t size) { int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low + if (getContiguousFreeHeap() < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer DRAM } return heap_caps_malloc(size, caps1); @@ -699,29 +679,29 @@ void *d_realloc(void *ptr, size_t size) { int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low + if (getContiguousFreeHeap() < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer DRAM } return heap_caps_realloc(ptr, size, caps1); } -// realloc with malloc fallback, original buffer is freed if realloc fails but not copied! -void *d_realloc_malloc(void *ptr, size_t size) { - void *newbuf = d_realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - d_free(ptr); // free old buffer if realloc failed - return d_malloc(size); // fallback to malloc -} - void *d_calloc(size_t count, size_t size) { int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 3*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low + if (getContiguousFreeHeap() < 3*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer DRAM } return heap_caps_calloc(count, size, caps1); } + +// realloc with malloc fallback, original buffer is freed if realloc fails but not copied! +void *d_realloc_malloc(void *ptr, size_t size) { + void *newbuf = d_realloc(ptr, size); // try realloc first + if (newbuf) return newbuf; // realloc successful + d_free(ptr); // free old buffer if realloc failed + return d_malloc(size); // fallback to malloc +} #else // ESP8266 & ESP32-C3 // realloc with malloc fallback, original buffer is freed if realloc fails but not copied! void *realloc_malloc(void *ptr, size_t size) { @@ -732,16 +712,67 @@ void *realloc_malloc(void *ptr, size_t size) { } #endif -// check heap fragmentation, if there is not enough contiguous heap memory available the UI can stop working -bool checkHeapHealth(unsigned minFreeBlockSize) { - #if defined(ARDUINO_ARCH_ESP32) - if (heap_caps_get_largest_free_block(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT) > minFreeBlockSize) - return true; - #else - if (ESP.getMaxFreeBlockSize() > minFreeBlockSize) - return true; + +// allocation function for large buffers like pixel-buffers and segment data +// ensures that a contiguous block of MIN_HEAP_SIZE remains to keep the UI working, otherwise returns nullptr +// if multiple conflicting types are defined, the lowest bits take priority +void *allocate_buffer(size_t size, uint32_t type) { + void *buffer = nullptr; + #if defined(ESP8266) || defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM + buffer = d_malloc(size); + #else + #ifdef CONFIG_IDF_TARGET_ESP32 + // only classic ESP32 has this memory type. Using it frees up normal DRAM for other purposes + if(type & BFRALLOC_NOBYTEACCESS) { + buffer = static_cast(heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_32BIT)); // try to allocate in 32bit DRAM region + if((uintptr_t)buffer < SOC_DRAM_HIGH) { + // buffer did not fit into 32bit DRAM region (located at SOC_IRAM_LOW) and was allocated in 8bit DRAM memory or is nullptr + // if PSRAM is available and a PSRAM type is set, free the memory and try again + if(psramSafe && psramFound() && (type & (BFRALLOC_PREFER_PSRAM | BFRALLOC_ENFORCE_PSRAM))) { + free(buffer); + buffer = nullptr; + } + } + if(buffer) + type = type & BFRALLOC_CLEAR; // we have a valid buffer, reset any additional flags except BFRALLOC_CLEAR + } #endif - return false; + if(psramSafe && psramFound()) { + if(type & BFRALLOC_PREFER_DRAM) { + buffer = d_malloc(size); + } + else if(type & BFRALLOC_ENFORCE_DRAM) { + buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only + } + else if(type & BFRALLOC_PREFER_PSRAM) { + buffer = p_malloc(size); // try to allocate in PSRAM + } + else if(type & BFRALLOC_ENFORCE_PSRAM) { + buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM if available + } else { + buffer = p_malloc(size); // use PSRAM or DRAM depending on availability + } + } + else { + #ifdef CONFIG_IDF_TARGET_ESP32 + if(!buffer) + buffer = d_malloc(size); // no PSRAM available, use DRAM if not already allocated above + #else + buffer = d_malloc(size); // no PSRAM available, use DRAM + #endif + } + #endif + if(buffer) { + // check if there is enough heap left for the UI to work + if(getFreeHeapSize() < MIN_HEAP_SIZE) + { + free(buffer); // free allocated buffer + return nullptr; // return nullptr to indicate failure + } + if(type & BFRALLOC_CLEAR) + memset(buffer, 0, size); // clear allocated buffer + } + return buffer; } /* diff --git a/wled00/wled.cpp b/wled00/wled.cpp index 8943fae489..e1f9c8f435 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -168,11 +168,7 @@ void WLED::loop() // reconnect WiFi to clear stale allocations if heap gets too low if (millis() - heapTime > 15000) { - #ifdef ARDUINO_ARCH_ESP32 - uint32_t heap = heap_caps_get_largest_free_block(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); - #else - uint32_t heap = ESP.getMaxFreeBlockSize(); // ESP8266 does not support advanced allocation API - #endif + uint32_t heap = getFreeHeapSize(); if (heap < MIN_HEAP_SIZE && lastHeap < MIN_HEAP_SIZE) { DEBUG_PRINTF_P(PSTR("Heap too low! %u\n"), heap); forceReconnect = true; @@ -244,7 +240,11 @@ void WLED::loop() DEBUG_PRINTLN(F("---DEBUG INFO---")); DEBUG_PRINTF_P(PSTR("Runtime: %lu\n"), millis()); DEBUG_PRINTF_P(PSTR("Unix time: %u,%03u\n"), toki.getTime().sec, toki.getTime().ms); - DEBUG_PRINTF_P(PSTR("Free heap: %u\n"), getFreeHeapSize()); + DEBUG_PRINTF_P(PSTR("Free heap/contiguous: %u/%u\n"), getFreeHeapSize(), getContiguousFreeHeap()); + #if defined(CONFIG_IDF_TARGET_ESP32) + int dram32_free = heap_caps_get_free_size(MALLOC_CAP_32BIT|MALLOC_CAP_INTERNAL) - getFreeHeapSize(); + DEBUG_PRINTF_P(PSTR("Free 32bit-heap: %d\n"), dram32_free); + #endif #if defined(ARDUINO_ARCH_ESP32) if (psramFound()) { DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); From 48fbb1d11fe7e8481ba5e4f53ca7182d33d972f4 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sun, 27 Jul 2025 20:21:47 +0200 Subject: [PATCH 03/11] code formatting, removed now unused function declaration --- wled00/FX_fcn.cpp | 6 +++--- wled00/fcn_declare.h | 15 ++++++--------- wled00/util.cpp | 28 ++++++++++++++-------------- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp index f89a5e1da2..8e5504fa26 100755 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -148,7 +148,7 @@ bool Segment::allocateData(size_t len) { if (len == 0) return false; // nothing to do if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation) if (call == 0) { - if(_dataLen < FAIR_DATA_PER_SEG) { // segment data is small + if (_dataLen < FAIR_DATA_PER_SEG) { // segment data is small //DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this); memset(data, 0, len); // erase buffer if called during effect initialisation return true; // no need to reallocate @@ -160,7 +160,7 @@ bool Segment::allocateData(size_t len) { //DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this); // limit to MAX_SEGMENT_DATA if there is no PSRAM, otherwise prefer functionality over speed #if defined(ARDUINO_ARCH_ESP32) - if(!(psramFound() && psramSafe)) + if (!(psramFound() && psramSafe)) #endif { if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { @@ -214,7 +214,7 @@ void Segment::resetIfRequired() { if (!reset || !isActive()) return; //DEBUG_PRINTF_P(PSTR("-- Segment reset: %p\n"), this); if (data && _dataLen > 0) { - if(_dataLen > FAIR_DATA_PER_SEG) deallocateData(); // do not keep large allocations + if (_dataLen > FAIR_DATA_PER_SEG) deallocateData(); // do not keep large allocations else memset(data, 0, _dataLen); // can prevent heap fragmentation DEBUG_PRINTF_P(PSTR("-- Segment %p reset, data cleared\n"), this); } diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index 8c674c1484..53579cd14f 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -551,9 +551,6 @@ inline uint8_t hw_random8(uint32_t upperlimit) { return (hw_random8() * upperlim inline uint8_t hw_random8(uint32_t lowerlimit, uint32_t upperlimit) { uint32_t range = upperlimit - lowerlimit; return lowerlimit + hw_random8(range); }; // input range 0-255 // memory allocation wrappers -#ifdef CONFIG_IDF_TARGET_ESP32 -void *pixelbuffer_malloc(size_t size, bool enforcePSRAM = false); // prefer IRAM for pixel buffers if possible -#endif #if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) extern "C" { void *p_malloc(size_t); // prefer PSRAM over DRAM @@ -589,12 +586,12 @@ inline size_t getContiguousFreeHeap() { return heap_caps_get_largest_free_block( inline size_t getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap inline size_t getContiguousFreeHeap() { return ESP.getMaxFreeBlockSize(); } // returns largest contiguous free block #endif -#define BFRALLOC_NOBYTEACCESS (1 << 0) // ESP32 has 32bit accessible DRAM (usually ~50kB free) that must not be byte-accessed -#define BFRALLOC_PREFER_DRAM (1 << 1) // prefer DRAM over PSRAM -#define BFRALLOC_ENFORCE_DRAM (1 << 2) // use DRAM only, no PSRAM -#define BFRALLOC_PREFER_PSRAM (1 << 3) // prefer PSRAM over DRAM -#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise fall back to DRAM -#define BFRALLOC_CLEAR (1 << 5) // clear allocated buffer after allocation +#define BFRALLOC_NOBYTEACCESS (1 << 0) // ESP32 has 32bit accessible DRAM (usually ~50kB free) that must not be byte-accessed +#define BFRALLOC_PREFER_DRAM (1 << 1) // prefer DRAM over PSRAM +#define BFRALLOC_ENFORCE_DRAM (1 << 2) // use DRAM only, no PSRAM +#define BFRALLOC_PREFER_PSRAM (1 << 3) // prefer PSRAM over DRAM +#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise fall back to DRAM +#define BFRALLOC_CLEAR (1 << 5) // clear allocated buffer after allocation void *allocate_buffer(size_t size, uint32_t type); // RAII guard class for the JSON Buffer lock diff --git a/wled00/util.cpp b/wled00/util.cpp index 7f5b37c308..1ad132f071 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -723,31 +723,31 @@ void *allocate_buffer(size_t size, uint32_t type) { #else #ifdef CONFIG_IDF_TARGET_ESP32 // only classic ESP32 has this memory type. Using it frees up normal DRAM for other purposes - if(type & BFRALLOC_NOBYTEACCESS) { + if (type & BFRALLOC_NOBYTEACCESS) { buffer = static_cast(heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_32BIT)); // try to allocate in 32bit DRAM region - if((uintptr_t)buffer < SOC_DRAM_HIGH) { + if ((uintptr_t)buffer < SOC_DRAM_HIGH) { // buffer did not fit into 32bit DRAM region (located at SOC_IRAM_LOW) and was allocated in 8bit DRAM memory or is nullptr // if PSRAM is available and a PSRAM type is set, free the memory and try again - if(psramSafe && psramFound() && (type & (BFRALLOC_PREFER_PSRAM | BFRALLOC_ENFORCE_PSRAM))) { + if (psramSafe && psramFound() && (type & (BFRALLOC_PREFER_PSRAM | BFRALLOC_ENFORCE_PSRAM))) { free(buffer); buffer = nullptr; } } - if(buffer) + if (buffer) type = type & BFRALLOC_CLEAR; // we have a valid buffer, reset any additional flags except BFRALLOC_CLEAR } #endif - if(psramSafe && psramFound()) { - if(type & BFRALLOC_PREFER_DRAM) { + if (psramSafe && psramFound()) { + if (type & BFRALLOC_PREFER_DRAM) { buffer = d_malloc(size); } - else if(type & BFRALLOC_ENFORCE_DRAM) { + else if (type & BFRALLOC_ENFORCE_DRAM) { buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only } - else if(type & BFRALLOC_PREFER_PSRAM) { + else if (type & BFRALLOC_PREFER_PSRAM) { buffer = p_malloc(size); // try to allocate in PSRAM } - else if(type & BFRALLOC_ENFORCE_PSRAM) { + else if (type & BFRALLOC_ENFORCE_PSRAM) { buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM if available } else { buffer = p_malloc(size); // use PSRAM or DRAM depending on availability @@ -755,21 +755,21 @@ void *allocate_buffer(size_t size, uint32_t type) { } else { #ifdef CONFIG_IDF_TARGET_ESP32 - if(!buffer) - buffer = d_malloc(size); // no PSRAM available, use DRAM if not already allocated above + if (!buffer) + buffer = d_malloc(size); // no PSRAM available, use DRAM if not already allocated above #else buffer = d_malloc(size); // no PSRAM available, use DRAM #endif } #endif - if(buffer) { + if (buffer) { // check if there is enough heap left for the UI to work - if(getFreeHeapSize() < MIN_HEAP_SIZE) + if (getFreeHeapSize() < MIN_HEAP_SIZE) { free(buffer); // free allocated buffer return nullptr; // return nullptr to indicate failure } - if(type & BFRALLOC_CLEAR) + if (type & BFRALLOC_CLEAR) memset(buffer, 0, size); // clear allocated buffer } return buffer; From 9bd9d321bcd6a298f47db3eb22192a198d4d3588 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Wed, 30 Jul 2025 05:51:28 +0200 Subject: [PATCH 04/11] Fixes and clearer comments - Bugfix: `allocate_buffer` did double allocations because of a fallback else that should not be there: if no type is defined, should not allocate anything - Bugfix2: same place: checked for minheap instead of min cotiguous heap - updated comments to make the intent of each memory type clearer --- wled00/FX_fcn.cpp | 6 +++--- wled00/const.h | 2 +- wled00/util.cpp | 36 ++++++++++++++++++------------------ 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp index 8e5504fa26..1eea7b09a7 100755 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -165,18 +165,18 @@ bool Segment::allocateData(size_t len) { { if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { // not enough memory - DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData()); + DEBUG_PRINTF_P(PSTR("SegmentData limit reached: %d/%d\n"), len, Segment::getUsedSegmentData()); errorFlag = ERR_NORAM; return false; } } - // prefer DRAM over PSRAM for speed + if (data) { d_free(data); // free data and try to allocate again (segment buffer may be blocking contiguous heap) Segment::addUsedSegmentData(-_dataLen); // subtract buffer size } - data = static_cast(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); + data = static_cast(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); // prefer DRAM over PSRAM for speed if (data) { Segment::addUsedSegmentData(len); diff --git a/wled00/const.h b/wled00/const.h index 43cb239735..9e6a29fce0 100644 --- a/wled00/const.h +++ b/wled00/const.h @@ -550,7 +550,7 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit"); #ifdef ESP8266 #define MIN_HEAP_SIZE (8*1024) #else - #define MIN_HEAP_SIZE (12*1024) + #define MIN_HEAP_SIZE (9*1024) #endif // threshold for PSRAM use: if heap is running low, requests above PSRAM_THRESHOLD will be allocated in PSRAM // if heap is plenty, requests below PSRAM_THRESHOLD will be allocated in DRAM for speed diff --git a/wled00/util.cpp b/wled00/util.cpp index 1ad132f071..53d47ab5d2 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -715,59 +715,59 @@ void *realloc_malloc(void *ptr, size_t size) { // allocation function for large buffers like pixel-buffers and segment data // ensures that a contiguous block of MIN_HEAP_SIZE remains to keep the UI working, otherwise returns nullptr -// if multiple conflicting types are defined, the lowest bits take priority +// if multiple conflicting types are defined, the lowest bits of "type" take priority (see fcn_declare.h for types) void *allocate_buffer(size_t size, uint32_t type) { void *buffer = nullptr; + #if defined(ESP8266) || defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM buffer = d_malloc(size); #else #ifdef CONFIG_IDF_TARGET_ESP32 - // only classic ESP32 has this memory type. Using it frees up normal DRAM for other purposes + // only classic ESP32 has "32bit accessible only" aka IRAM type. Using it frees up normal DRAM for other purposes + // this memory region is used for IRAM_ATTR functions, whatever is left is unused and can be used for pixel buffers + // prefer this type over PSRAM as it is slightly faster, except for _pixels where it is on-par as PSRAM-caching does a good job for mostly sequential access if (type & BFRALLOC_NOBYTEACCESS) { buffer = static_cast(heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_32BIT)); // try to allocate in 32bit DRAM region if ((uintptr_t)buffer < SOC_DRAM_HIGH) { - // buffer did not fit into 32bit DRAM region (located at SOC_IRAM_LOW) and was allocated in 8bit DRAM memory or is nullptr - // if PSRAM is available and a PSRAM type is set, free the memory and try again + // buffer was allocated in normal DRAM and did not fit into 32bit DRAM region located at SOC_IRAM_LOW (or is nullptr) + // if PSRAM is available and a PSRAM type is set as an option, free the DRAM memory and continue below if (psramSafe && psramFound() && (type & (BFRALLOC_PREFER_PSRAM | BFRALLOC_ENFORCE_PSRAM))) { free(buffer); buffer = nullptr; } } if (buffer) - type = type & BFRALLOC_CLEAR; // we have a valid buffer, reset any additional flags except BFRALLOC_CLEAR + type = type & BFRALLOC_CLEAR; // we have a valid buffer, clear any additional flags except BFRALLOC_CLEAR } #endif if (psramSafe && psramFound()) { if (type & BFRALLOC_PREFER_DRAM) { - buffer = d_malloc(size); + buffer = d_malloc(size); // allocate in DRAM if enough free heap is available, PSRAM as fallback } else if (type & BFRALLOC_ENFORCE_DRAM) { - buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only + buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only, otherwise return nullptr } else if (type & BFRALLOC_PREFER_PSRAM) { - buffer = p_malloc(size); // try to allocate in PSRAM + buffer = p_malloc(size); // prefer PSRAM: uses DRAM if vast amounts are available to optimize speed } else if (type & BFRALLOC_ENFORCE_PSRAM) { - buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM if available - } else { - buffer = p_malloc(size); // use PSRAM or DRAM depending on availability + buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM only, otherwise return nullptr } } else { #ifdef CONFIG_IDF_TARGET_ESP32 if (!buffer) - buffer = d_malloc(size); // no PSRAM available, use DRAM if not already allocated above + buffer = d_malloc(size); // no PSRAM available, use DRAM if not already allocated to 32bit-only region #else - buffer = d_malloc(size); // no PSRAM available, use DRAM + buffer = d_malloc(size); // no PSRAM available, use DRAM #endif } #endif if (buffer) { - // check if there is enough heap left for the UI to work - if (getFreeHeapSize() < MIN_HEAP_SIZE) - { - free(buffer); // free allocated buffer - return nullptr; // return nullptr to indicate failure + // limit check: leave enough free heap for UI and other tasks + if (getContiguousFreeHeap() < MIN_HEAP_SIZE) { + free(buffer); // free allocated buffer + return nullptr; } if (type & BFRALLOC_CLEAR) memset(buffer, 0, size); // clear allocated buffer From 39870f874f51dfd163f0ae8a52df3ea5f4d811e7 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Fri, 29 Aug 2025 17:01:42 +0200 Subject: [PATCH 05/11] removed "psramsafe", updated allocation functions - replaced "psramsafe" variable with it's #ifdef: BOARD_HAS_PSRAM and made accomodating changes - added some compile-time checks to handle invalid env. definitions - updated all allocation functions and some of the logic behind them - added use of fast RTC-Memory where available - increased MIN_HEAP_SIZE for all systems (improved stability in tests) - updated memory calculation in web-UI to account for required segment buffer - added UI alerts if buffer allocation fails - made getUsedSegmentData() non-private (used in buffer alloc function) - changed MAX_SEGMENT_DATA - added more detailed memory log to DEBUG output - added debug output to buffer alloc function --- wled00/FX.h | 10 +- wled00/FX_fcn.cpp | 61 ++++---- wled00/bus_manager.cpp | 46 +++--- wled00/const.h | 14 +- wled00/data/settings_leds.htm | 3 +- wled00/fcn_declare.h | 45 +++--- wled00/file.cpp | 4 +- wled00/json.cpp | 4 +- wled00/util.cpp | 284 ++++++++++++++++++---------------- wled00/wled.cpp | 48 ++++-- wled00/wled.h | 16 +- 11 files changed, 281 insertions(+), 254 deletions(-) diff --git a/wled00/FX.h b/wled00/FX.h index 986c2457b2..5e30b52126 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -92,18 +92,18 @@ extern byte realtimeMode; // used in getMappedPixelIndex() #ifdef ESP8266 #define MAX_NUM_SEGMENTS 16 /* How much data bytes all segments combined may allocate */ - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*640) // 10k by default + #define MAX_SEGMENT_DATA (6*1024) // 6k by default #elif defined(CONFIG_IDF_TARGET_ESP32S2) #define MAX_NUM_SEGMENTS 20 - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1024) // 20k by default (S2 is short on free RAM) + #define MAX_SEGMENT_DATA (20*1024) // 20k by default (S2 is short on free RAM), limit does not apply if PSRAM is available #else #define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1920) // 60k by default + #define MAX_SEGMENT_DATA (64*1024) // 64k by default, limit does not apply if PSRAM is available #endif /* How much data bytes each segment should max allocate to leave enough space for other segments, assuming each segment uses the same amount of data. 256 for ESP8266, 640 for ESP32. */ -#define FAIR_DATA_PER_SEG (MAX_SEGMENT_DATA / WS2812FX::getMaxSegments()) +#define FAIR_DATA_PER_SEG (MAX_SEGMENT_DATA / MAX_NUM_SEGMENTS) #define MIN_SHOW_DELAY (_frametime < 16 ? 8 : 15) @@ -532,7 +532,6 @@ class Segment { protected: - inline static unsigned getUsedSegmentData() { return Segment::_usedSegmentData; } inline static void addUsedSegmentData(int len) { Segment::_usedSegmentData += len; } inline uint32_t *getPixels() const { return pixels; } @@ -671,6 +670,7 @@ class Segment { inline uint16_t dataSize() const { return _dataLen; } bool allocateData(size_t len); // allocates effect data buffer in heap and clears it void deallocateData(); // deallocates (frees) effect data buffer from heap + inline static unsigned getUsedSegmentData() { return Segment::_usedSegmentData; } /** * Flags that before the next effect is calculated, * the internal segment state should be reset. diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp index 2422cc9f7a..6ad597d736 100755 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -68,10 +68,10 @@ Segment::Segment(const Segment &orig) { if (!stop) return; // nothing to do if segment is inactive/invalid if (orig.pixels) { // allocate pixel buffer: prefer IRAM/PSRAM - pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); + pixels = static_cast(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (pixels) { memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); - if (orig.name) { name = static_cast(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); } + if (orig.name) { name = static_cast(allocate_buffer(strlen(orig.name)+1, BFRALLOC_PREFER_PSRAM)); if (name) strcpy(name, orig.name); } if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } } else { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); @@ -97,10 +97,10 @@ Segment& Segment::operator= (const Segment &orig) { //DEBUG_PRINTF_P(PSTR("-- Copying segment: %p -> %p\n"), &orig, this); if (this != &orig) { // clean destination - if (name) { d_free(name); name = nullptr; } + if (name) { p_free(name); name = nullptr; } if (_t) stopTransition(); // also erases _t deallocateData(); - d_free(pixels); + p_free(pixels); // copy source memcpy((void*)this, (void*)&orig, sizeof(Segment)); // erase pointers to allocated data @@ -111,10 +111,10 @@ Segment& Segment::operator= (const Segment &orig) { // copy source data if (orig.pixels) { // allocate pixel buffer: prefer IRAM/PSRAM - pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); + pixels = static_cast(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (pixels) { memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); - if (orig.name) { name = static_cast(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); } + if (orig.name) { name = static_cast(allocate_buffer(strlen(orig.name)+1, BFRALLOC_PREFER_PSRAM)); if (name) strcpy(name, orig.name); } if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } } else { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); @@ -130,10 +130,10 @@ Segment& Segment::operator= (const Segment &orig) { Segment& Segment::operator= (Segment &&orig) noexcept { //DEBUG_PRINTF_P(PSTR("-- Moving segment: %p -> %p\n"), &orig, this); if (this != &orig) { - if (name) { d_free(name); name = nullptr; } // free old name + if (name) { p_free(name); name = nullptr; } // free old name if (_t) stopTransition(); // also erases _t deallocateData(); // free old runtime data - d_free(pixels); // free old pixel buffer + p_free(pixels); // free old pixel buffer // move source data memcpy((void*)this, (void*)&orig, sizeof(Segment)); orig.name = nullptr; @@ -161,24 +161,21 @@ bool Segment::allocateData(size_t len) { } //DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this); // limit to MAX_SEGMENT_DATA if there is no PSRAM, otherwise prefer functionality over speed - #if defined(ARDUINO_ARCH_ESP32) - if (!(psramFound() && psramSafe)) - #endif - { - if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { - // not enough memory - DEBUG_PRINTF_P(PSTR("SegmentData limit reached: %d/%d\n"), len, Segment::getUsedSegmentData()); - errorFlag = ERR_NORAM; - return false; - } + #ifndef BOARD_HAS_PSRAM + if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { + // not enough memory + DEBUG_PRINTF_P(PSTR("SegmentData limit reached: %d/%d\n"), len, Segment::getUsedSegmentData()); + errorFlag = ERR_NORAM; + return false; } + #endif if (data) { d_free(data); // free data and try to allocate again (segment buffer may be blocking contiguous heap) Segment::addUsedSegmentData(-_dataLen); // subtract buffer size } - data = static_cast(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); // prefer DRAM over PSRAM for speed + data = static_cast(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); // prefer DRAM over PSRAM for speed if (data) { Segment::addUsedSegmentData(len); @@ -476,7 +473,7 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui if (length() != oldLength) { // allocate render buffer (always entire segment), prefer IRAM/PSRAM. Note: impact on FPS with PSRAM buffer is low (<2% with QSPI PSRAM) on S2/S3 p_free(pixels); - pixels = static_cast(d_malloc(sizeof(uint32_t) * length())); + pixels = static_cast(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (!pixels) { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); deallocateData(); @@ -591,8 +588,8 @@ Segment &Segment::setName(const char *newName) { if (newName) { const int newLen = min(strlen(newName), (size_t)WLED_MAX_SEGNAME_LEN); if (newLen) { - if (name) d_free(name); // free old name - name = static_cast(d_malloc(newLen+1)); + if (name) p_free(name); // free old name + name = static_cast(allocate_buffer(newLen+1, BFRALLOC_PREFER_PSRAM)); if (mode == FX_MODE_2DSCROLLTEXT) startTransition(strip.getTransition(), true); // if the name changes in scrolling text mode, we need to copy the segment for blending if (name) strlcpy(name, newName, newLen+1); return *this; @@ -1187,7 +1184,10 @@ void WS2812FX::finalizeInit() { mem += bus.memUsage(Bus::isDigital(bus.type) && !Bus::is2Pin(bus.type) ? digitalCount++ : 0); // includes global buffer if (mem <= MAX_LED_MEMORY) { if (BusManager::add(bus) == -1) break; - } else DEBUG_PRINTF_P(PSTR("Out of LED memory! Bus %d (%d) #%u not created."), (int)bus.type, (int)bus.count, digitalCount); + } else { + errorFlag = ERR_NORAM_PX; // alert UI + DEBUG_PRINTF_P(PSTR("Out of LED memory! Bus %d (%d) #%u not created."), (int)bus.type, (int)bus.count, digitalCount); + } } busConfigs.clear(); busConfigs.shrink_to_fit(); @@ -1218,8 +1218,9 @@ void WS2812FX::finalizeInit() { deserializeMap(); // (re)load default ledmap (will also setUpMatrix() if ledmap does not exist) // allocate frame buffer after matrix has been set up (gaps!) - d_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it - _pixels = static_cast(d_malloc(getLengthTotal() * sizeof(uint32_t))); + p_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it + // use PSRAM if available: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM for this buffer + _pixels = static_cast(allocate_buffer(getLengthTotal() * sizeof(uint32_t), BFRALLOC_ENFORCE_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR)); DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t)); DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize()); } @@ -1621,7 +1622,11 @@ static uint8_t estimateCurrentAndLimitBri(uint8_t brightness, uint32_t *pixels) } void WS2812FX::show() { - if (!_pixels) return; // no pixels allocated, nothing to show + if (!_pixels) { + DEBUGFX_PRINTLN(F("Error: no _pixels!")); + errorFlag = ERR_NORAM; + return; // no pixels allocated, nothing to show + } unsigned long showNow = millis(); size_t diff = showNow - _lastShow; @@ -1631,7 +1636,7 @@ void WS2812FX::show() { // we need to keep track of each pixel's CCT when blending segments (if CCT is present) // and then set appropriate CCT from that pixel during paint (see below). if ((hasCCTBus() || correctWB) && !cctFromRgb) - _pixelCCT = static_cast(d_malloc(totalLen * sizeof(uint8_t))); // allocate CCT buffer if necessary + _pixelCCT = static_cast(allocate_buffer(totalLen * sizeof(uint8_t), BFRALLOC_PREFER_PSRAM)); // allocate CCT buffer if necessary, prefer PSRAM if (_pixelCCT) memset(_pixelCCT, 127, totalLen); // set neutral (50:50) CCT if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) { @@ -1665,7 +1670,7 @@ void WS2812FX::show() { } Bus::setCCT(oldCCT); // restore old CCT for ABL adjustments - d_free(_pixelCCT); + p_free(_pixelCCT); _pixelCCT = nullptr; // some buses send asynchronously and this method will return before diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index f513029680..44ed6e0d3f 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -38,35 +38,29 @@ uint32_t colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb); uint8_t realtimeBroadcast(uint8_t type, IPAddress client, uint16_t length, const byte *buffer, uint8_t bri=255, bool isRGBW=false); //util.cpp -// PSRAM allocation wrappers -#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) +// memory allocation wrappers extern "C" { - void *p_malloc(size_t); // prefer PSRAM over DRAM - void *p_calloc(size_t, size_t); // prefer PSRAM over DRAM - void *p_realloc(void *, size_t); // prefer PSRAM over DRAM - void *p_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer PSRAM over DRAM - inline void p_free(void *ptr) { heap_caps_free(ptr); } - void *d_malloc(size_t); // prefer DRAM over PSRAM - void *d_calloc(size_t, size_t); // prefer DRAM over PSRAM - void *d_realloc(void *, size_t); // prefer DRAM over PSRAM - void *d_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer DRAM over PSRAM + // prefer DRAM over PSRAM (if available) in d_ alloc functions + void *d_malloc(size_t); + void *d_calloc(size_t, size_t); + void *d_realloc_malloc(void *ptr, size_t size); + #ifndef ESP8266 inline void d_free(void *ptr) { heap_caps_free(ptr); } + #else + inline void d_free(void *ptr) { free(ptr); } + #endif + #if defined(BOARD_HAS_PSRAM) + // prefer PSRAM over DRAM in p_ alloc functions + void *p_malloc(size_t); + void *p_calloc(size_t, size_t); + void *p_realloc_malloc(void *ptr, size_t size); + inline void p_free(void *ptr) { heap_caps_free(ptr); } + #else + #define p_malloc d_malloc + #define p_calloc d_calloc + #define p_free d_free + #endif } -#else -extern "C" { - void *realloc_malloc(void *ptr, size_t size); -} -#define p_malloc malloc -#define p_calloc calloc -#define p_realloc realloc -#define p_realloc_malloc realloc_malloc -#define p_free free -#define d_malloc malloc -#define d_calloc calloc -#define d_realloc realloc -#define d_realloc_malloc realloc_malloc -#define d_free free -#endif //color mangling macros #define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b)))) diff --git a/wled00/const.h b/wled00/const.h index 9e6a29fce0..7a7ca976c6 100644 --- a/wled00/const.h +++ b/wled00/const.h @@ -548,18 +548,18 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit"); // minimum heap size required to process web requests: try to keep free heap above this value #ifdef ESP8266 - #define MIN_HEAP_SIZE (8*1024) -#else #define MIN_HEAP_SIZE (9*1024) +#else + #define MIN_HEAP_SIZE (15*1024) // WLED allocation functions (util.cpp) try to keep this much contiguous heap free for other tasks #endif -// threshold for PSRAM use: if heap is running low, requests above PSRAM_THRESHOLD will be allocated in PSRAM -// if heap is plenty, requests below PSRAM_THRESHOLD will be allocated in DRAM for speed +// threshold for PSRAM use: if heap is running low, requests to allocate_buffer(prefer DRAM) above PSRAM_THRESHOLD may be put in PSRAM +// if heap is depleted, PSRAM will be used regardless of threshold #if defined(CONFIG_IDF_TARGET_ESP32S3) - #define PSRAM_THRESHOLD 5120 + #define PSRAM_THRESHOLD (12*1024) // S3 has plenty of DRAM #elif defined(CONFIG_IDF_TARGET_ESP32) - #define PSRAM_THRESHOLD 4096 + #define PSRAM_THRESHOLD (5*1024) #else - #define PSRAM_THRESHOLD 1024 // S2 does not have a lot of RAM. C3 and ESP8266 do not support PSRAM: the value is not used + #define PSRAM_THRESHOLD (2*1024) // S2 does not have a lot of RAM. C3 and ESP8266 do not support PSRAM: the value is not used #endif // Web server limits diff --git a/wled00/data/settings_leds.htm b/wled00/data/settings_leds.htm index 928da11753..6424815cc2 100644 --- a/wled00/data/settings_leds.htm +++ b/wled00/data/settings_leds.htm @@ -195,7 +195,6 @@ if (isAna(t)) return 5; // analog let len = parseInt(d.getElementsByName("LC"+n)[0].value); len += parseInt(d.getElementsByName("SL"+n)[0].value); // skipped LEDs are allocated too - let dbl = 0; let ch = 3*hasRGB(t) + hasW(t) + hasCCT(t); let mul = 1; if (isDig(t)) { @@ -207,7 +206,7 @@ mul = 2; } } - return len * ch * mul + dbl; + return len * ch * mul + len * 4; // add 4 bytes per LED for segment buffer (TODO: how to account for global buffer?) } function UI(change=false) diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index 16c81f42d1..e9ea5c4a73 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -560,34 +560,29 @@ inline uint8_t hw_random8(uint32_t upperlimit) { return (hw_random8() * upperlim inline uint8_t hw_random8(uint32_t lowerlimit, uint32_t upperlimit) { uint32_t range = upperlimit - lowerlimit; return lowerlimit + hw_random8(range); }; // input range 0-255 // memory allocation wrappers -#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) extern "C" { - void *p_malloc(size_t); // prefer PSRAM over DRAM - void *p_calloc(size_t, size_t); // prefer PSRAM over DRAM - void *p_realloc(void *, size_t); // prefer PSRAM over DRAM - void *p_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer PSRAM over DRAM - inline void p_free(void *ptr) { heap_caps_free(ptr); } - void *d_malloc(size_t); // prefer DRAM over PSRAM - void *d_calloc(size_t, size_t); // prefer DRAM over PSRAM - void *d_realloc(void *, size_t); // prefer DRAM over PSRAM - void *d_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer DRAM over PSRAM + // prefer DRAM in d_xalloc functions, PSRAM as fallback + void *d_malloc(size_t); + void *d_calloc(size_t, size_t); + void *d_realloc_malloc(void *ptr, size_t size); + #ifndef ESP8266 inline void d_free(void *ptr) { heap_caps_free(ptr); } + #else + inline void d_free(void *ptr) { free(ptr); } + #endif + #if defined(BOARD_HAS_PSRAM) + // prefer PSRAM in p_xalloc functions, DRAM as fallback + void *p_malloc(size_t); + void *p_calloc(size_t, size_t); + void *p_realloc_malloc(void *ptr, size_t size); + inline void p_free(void *ptr) { heap_caps_free(ptr); } + #else + #define p_malloc d_malloc + #define p_calloc d_calloc + #define p_realloc d_realloc + #define p_free d_free + #endif } -#else -extern "C" { - void *realloc_malloc(void *ptr, size_t size); -} -#define p_malloc malloc -#define p_calloc calloc -#define p_realloc realloc -#define p_realloc_malloc realloc_malloc -#define p_free free -#define d_malloc malloc -#define d_calloc calloc -#define d_realloc realloc -#define d_realloc_malloc realloc_malloc -#define d_free free -#endif #ifndef ESP8266 inline size_t getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types) inline size_t getContiguousFreeHeap() { return heap_caps_get_largest_free_block(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns largest contiguous free block diff --git a/wled00/file.cpp b/wled00/file.cpp index 108c41bd44..9f1dd62256 100644 --- a/wled00/file.cpp +++ b/wled00/file.cpp @@ -422,8 +422,8 @@ bool handleFileRead(AsyncWebServerRequest* request, String path){ DEBUGFS_PRINT(F("WS FileRead: ")); DEBUGFS_PRINTLN(path); if(path.endsWith("/")) path += "index.htm"; if(path.indexOf(F("sec")) > -1) return false; - #ifdef ARDUINO_ARCH_ESP32 - if (psramSafe && psramFound() && path.endsWith(FPSTR(getPresetsFileName()))) { + #ifdef BOARD_HAS_PSRAM + if (path.endsWith(FPSTR(getPresetsFileName()))) { size_t psize; const uint8_t *presets = getPresetCache(psize); if (presets) { diff --git a/wled00/json.cpp b/wled00/json.cpp index 1e219bbfbb..fd6365246b 100644 --- a/wled00/json.cpp +++ b/wled00/json.cpp @@ -830,8 +830,8 @@ void serializeInfo(JsonObject root) #endif root[F("freeheap")] = getFreeHeapSize(); - #if defined(ARDUINO_ARCH_ESP32) - if (psramFound()) root[F("psram")] = ESP.getFreePsram(); + #if defined(BOARD_HAS_PSRAM) + root[F("psram")] = ESP.getFreePsram(); #endif root[F("uptime")] = millis()/1000 + rolloverMillis*4294967; diff --git a/wled00/util.cpp b/wled00/util.cpp index e0ef752076..f9f39397aa 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -629,98 +629,181 @@ int32_t hw_random(int32_t lowerlimit, int32_t upperlimit) { return hw_random(diff) + lowerlimit; } -#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM -// p_x prefer PSRAM, d_x prefer DRAM -void *p_malloc(size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; - if (psramSafe) { - if (getContiguousFreeHeap() > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA/2) && size < PSRAM_THRESHOLD) { - std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty - } - return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists - } - return heap_caps_malloc(size, caps2); -} +// PSRAM compile time checks to provide info for misconfigured env +#if defined(BOARD_HAS_PSRAM) + #if defined(IDF_TARGET_ESP32C3) || defined(ESP8266) + #error "ESP32-C3 and ESP8266 with PSRAM is not supported, please remove BOARD_HAS_PSRAM definition" + #else + // BOARD_HAS_PSRAM also means that compiler flag "-mfix-esp32-psram-cache-issue" has to be used + #warning "BOARD_HAS_PSRAM defined, make sure to use -mfix-esp32-psram-cache-issue to prevent issues on rev.1 ESP32 boards \ + see https://docs.espressif.com/projects/esp-idf/en/stable/esp32/api-guides/external-ram.html#esp32-rev-v1-0" + #endif +#else + #if !defined(IDF_TARGET_ESP32C3) && !defined(ESP8266) + #pragma message("BOARD_HAS_PSRAM not defined, not using PSRAM.") + #endif +#endif -void *p_realloc(void *ptr, size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; - if (psramSafe) { - if (getContiguousFreeHeap() > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA/2) && size < PSRAM_THRESHOLD) { - std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty - } - return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists +// memory allocation functions with minimum free heap size check +#ifdef ESP8266 +static void *validateFreeHeap(void *buffer) { + // make sure there is enough free heap left if buffer was allocated in DRAM region, free it if not + if (getContiguousFreeHeap() < MIN_HEAP_SIZE) { + free(buffer); + return nullptr; } - return heap_caps_realloc(ptr, size, caps2); + return buffer; } -void *p_calloc(size_t count, size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; - if (psramSafe) { - if (getContiguousFreeHeap() > (2*MIN_HEAP_SIZE + MAX_SEGMENT_DATA/2) && size < PSRAM_THRESHOLD) { - std::swap(caps1, caps2); // use DRAM for small allocations & when DRAM is plenty - } - return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists - } - return heap_caps_calloc(count, size, caps2); +void *d_malloc(size_t size) { + // note: using "if (getContiguousFreeHeap() > MIN_HEAP_SIZE + size)" did perform worse in tests with regards to keeping heap healthy and UI working + void *buffer = malloc(size); + return validateFreeHeap(buffer); } -// realloc with malloc fallback, original buffer is freed if realloc fails but not copied! -void *p_realloc_malloc(void *ptr, size_t size) { - void *newbuf = p_realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - p_free(ptr); // free old buffer if realloc failed - return p_malloc(size); // fallback to malloc +void *d_calloc(size_t count, size_t size) { + void *buffer = calloc(count, size); + return validateFreeHeap(buffer); } -void *d_malloc(size_t size) { - int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - if (psramSafe) { - if (getContiguousFreeHeap() < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low - return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer DRAM +// realloc with malloc fallback, note: on ESPS8266 there is no safe way to ensure MIN_HEAP_SIZE during realloc()s, free buffer and allocate new one +void *realloc_malloc(void *ptr, size_t size) { + free(ptr); + return d_malloc(size); +} +#else +static void *validateFreeHeap(void *buffer) { + // make sure there is enough free heap left if buffer was allocated in DRAM region, free it if not + // TODO: between allocate and free, heap can run low (async web access), only IDF V5 allows for a pre-allocation-check of all free blocks + if ((uintptr_t)buffer > SOC_DRAM_LOW && (uintptr_t)buffer < SOC_DRAM_HIGH && getContiguousFreeHeap() < MIN_HEAP_SIZE) { + free(buffer); + return nullptr; } - return heap_caps_malloc(size, caps1); + return buffer; } -void *d_realloc(void *ptr, size_t size) { - int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - if (psramSafe) { - if (getContiguousFreeHeap() < 2*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low - return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer DRAM - } - return heap_caps_realloc(ptr, size, caps1); +void *d_malloc(size_t size) { + void *buffer; + #if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) + // the newer ESP32 variants have byte-accessible fast RTC memory that can be used as heap, access speed is on-par with DRAM + // the system does prefer normal DRAM until full, since free RTC memory is ~7.5k only, its below the minimum heap threshold and needs to be allocated explicitly + // use RTC RAM for small allocations to improve fragmentation or if DRAM is running low + if (size < 256 || getContiguousFreeHeap() < 2*MIN_HEAP_SIZE + size) + buffer = heap_caps_malloc_prefer(size, 2, MALLOC_CAP_RTCRAM, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + else + #endif + buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // allocate in any available heap memory + buffer = validateFreeHeap(buffer); // make sure there is enough free heap left + #ifdef BOARD_HAS_PSRAM + if (!buffer) + return heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // DRAM failed, use PSRAM if available + #endif + return buffer; } void *d_calloc(size_t count, size_t size) { - int caps1 = MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - if (psramSafe) { - if (getContiguousFreeHeap() < 3*MIN_HEAP_SIZE && size > PSRAM_THRESHOLD) std::swap(caps1, caps2); // prefer PSRAM for large allocations & when DRAM is low - return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer DRAM - } - return heap_caps_calloc(count, size, caps1); + void *buffer = d_malloc(count * size); + if (buffer) memset(buffer, 0, count * size); // clear allocated buffer + return buffer; } // realloc with malloc fallback, original buffer is freed if realloc fails but not copied! void *d_realloc_malloc(void *ptr, size_t size) { - void *newbuf = d_realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - d_free(ptr); // free old buffer if realloc failed + void *buffer = heap_caps_realloc(ptr, size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + buffer = validateFreeHeap(buffer); + if (buffer) return buffer; // realloc successful + d_free(ptr); // free old buffer if realloc failed (or min heap was exceeded) return d_malloc(size); // fallback to malloc } -#else // ESP8266 & ESP32-C3 + +#ifdef BOARD_HAS_PSRAM +// p_xalloc: prefer PSRAM, use DRAM as fallback +void *p_malloc(size_t size) { + void *buffer = heap_caps_malloc_prefer(size, 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + return validateFreeHeap(buffer); +} + +void *p_calloc(size_t count, size_t size) { + void *buffer = p_malloc(count * size); + if (buffer) memset(buffer, 0, count * size); // clear allocated buffer + return buffer; +} + // realloc with malloc fallback, original buffer is freed if realloc fails but not copied! -void *realloc_malloc(void *ptr, size_t size) { - void *newbuf = realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - free(ptr); // free old buffer if realloc failed - return malloc(size); // fallback to malloc +void *p_realloc_malloc(void *ptr, size_t size) { + void *buffer = heap_caps_realloc(ptr, size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (buffer) return buffer; // realloc successful + p_free(ptr); // free old buffer if realloc failed + return p_malloc(size); // fallback to malloc } #endif +#endif + +// allocation function for buffers like pixel-buffers and segment data +// optimises the use of memory types to balance speed and heap availability, always favours DRAM if possible +// if multiple conflicting types are defined, the lowest bits of "type" take priority (see fcn_declare.h for types) +void *allocate_buffer(size_t size, uint32_t type) { + void *buffer = nullptr; + #ifdef CONFIG_IDF_TARGET_ESP32 + // only classic ESP32 has "32bit accessible only" aka IRAM type. Using it frees up normal DRAM for other purposes + // this memory region is used for IRAM_ATTR functions, whatever is left is unused and can be used for pixel buffers + // prefer this type over PSRAM as it is slightly faster, except for _pixels where it is on-par as PSRAM-caching does a good job for mostly sequential access + if (type & BFRALLOC_NOBYTEACCESS) { + // prefer 32bit region, then PSRAM, fallback to any heap. Note: if adding "INTERNAL"-flag this wont work + buffer = heap_caps_malloc_prefer(size, 3, MALLOC_CAP_32BIT, MALLOC_CAP_SPIRAM, MALLOC_CAP_8BIT); + buffer = validateFreeHeap(buffer); + } + else + #endif + #if !defined(BOARD_HAS_PSRAM) + buffer = d_malloc(size); + #else + if (type & BFRALLOC_PREFER_DRAM) { + if (getContiguousFreeHeap() < 3*(MIN_HEAP_SIZE/2) + size && size > PSRAM_THRESHOLD) + buffer = p_malloc(size); // prefer PSRAM for large allocations & when DRAM is low + else + buffer = d_malloc(size); // allocate in DRAM if enough free heap is available, PSRAM as fallback + } + else if (type & BFRALLOC_ENFORCE_DRAM) + buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only, otherwise return nullptr + else if (type & BFRALLOC_PREFER_PSRAM) { + // if DRAM is plenty, prefer it over PSRAM for speed, reserve enough DRAM for segment data: if MAX_SEGMENT_DATA is exceeded, always uses PSRAM + if (getContiguousFreeHeap() > 4*MIN_HEAP_SIZE + size + ((uint32_t)(MAX_SEGMENT_DATA - Segment::getUsedSegmentData()))) + buffer = d_malloc(size); + else + buffer = p_malloc(size); // prefer PSRAM + } + else if (type & BFRALLOC_ENFORCE_PSRAM) + buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM only, otherwise return nullptr + buffer = validateFreeHeap(buffer); + #endif + if (buffer && (type & BFRALLOC_CLEAR)) + memset(buffer, 0, size); // clear allocated buffer + /* + #if !defined(ESP8266) && defined(WLED_DEBUG) + if (buffer) { + DEBUG_PRINTF_P(PSTR("*Buffer allocated: size:%d, address:%p"), size, (uintptr_t)buffer); + if ((uintptr_t)buffer > SOC_DRAM_LOW && (uintptr_t)buffer < SOC_DRAM_HIGH) + DEBUG_PRINTLN(F(" in DRAM")); + #ifndef CONFIG_IDF_TARGET_ESP32C3 + else if ((uintptr_t)buffer > SOC_EXTRAM_DATA_LOW && (uintptr_t)buffer < SOC_EXTRAM_DATA_HIGH) + DEBUG_PRINTLN(F(" in PSRAM")); + #endif + #ifdef CONFIG_IDF_TARGET_ESP32 + else if ((uintptr_t)buffer > SOC_IRAM_LOW && (uintptr_t)buffer < SOC_IRAM_HIGH) + DEBUG_PRINTLN(F(" in IRAM")); // only used on ESP32 (MALLOC_CAP_32BIT) + #else + else if ((uintptr_t)buffer > SOC_RTC_DRAM_LOW && (uintptr_t)buffer < SOC_RTC_DRAM_HIGH) + DEBUG_PRINTLN(F(" in RTCRAM")); // not available on ESP32 + #endif + else + DEBUG_PRINTLN(F(" in ???")); // unknown (check soc.h for other memory regions) + } else + DEBUG_PRINTF_P(PSTR("Buffer allocation failed: size:%d\n"), size); + #endif + */ + return buffer; +} // bootloop detection and handling // checks if the ESP reboots multiple times due to a crash or watchdog timeout @@ -848,69 +931,6 @@ void handleBootLoop() { ESP.restart(); // restart cleanly and don't wait for another crash } - -// allocation function for large buffers like pixel-buffers and segment data -// ensures that a contiguous block of MIN_HEAP_SIZE remains to keep the UI working, otherwise returns nullptr -// if multiple conflicting types are defined, the lowest bits of "type" take priority (see fcn_declare.h for types) -void *allocate_buffer(size_t size, uint32_t type) { - void *buffer = nullptr; - - #if defined(ESP8266) || defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM - buffer = d_malloc(size); - #else - #ifdef CONFIG_IDF_TARGET_ESP32 - // only classic ESP32 has "32bit accessible only" aka IRAM type. Using it frees up normal DRAM for other purposes - // this memory region is used for IRAM_ATTR functions, whatever is left is unused and can be used for pixel buffers - // prefer this type over PSRAM as it is slightly faster, except for _pixels where it is on-par as PSRAM-caching does a good job for mostly sequential access - if (type & BFRALLOC_NOBYTEACCESS) { - buffer = static_cast(heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_32BIT)); // try to allocate in 32bit DRAM region - if ((uintptr_t)buffer < SOC_DRAM_HIGH) { - // buffer was allocated in normal DRAM and did not fit into 32bit DRAM region located at SOC_IRAM_LOW (or is nullptr) - // if PSRAM is available and a PSRAM type is set as an option, free the DRAM memory and continue below - if (psramSafe && psramFound() && (type & (BFRALLOC_PREFER_PSRAM | BFRALLOC_ENFORCE_PSRAM))) { - free(buffer); - buffer = nullptr; - } - } - if (buffer) - type = type & BFRALLOC_CLEAR; // we have a valid buffer, clear any additional flags except BFRALLOC_CLEAR - } - #endif - if (psramSafe && psramFound()) { - if (type & BFRALLOC_PREFER_DRAM) { - buffer = d_malloc(size); // allocate in DRAM if enough free heap is available, PSRAM as fallback - } - else if (type & BFRALLOC_ENFORCE_DRAM) { - buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only, otherwise return nullptr - } - else if (type & BFRALLOC_PREFER_PSRAM) { - buffer = p_malloc(size); // prefer PSRAM: uses DRAM if vast amounts are available to optimize speed - } - else if (type & BFRALLOC_ENFORCE_PSRAM) { - buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM only, otherwise return nullptr - } - } - else { - #ifdef CONFIG_IDF_TARGET_ESP32 - if (!buffer) - buffer = d_malloc(size); // no PSRAM available, use DRAM if not already allocated to 32bit-only region - #else - buffer = d_malloc(size); // no PSRAM available, use DRAM - #endif - } - #endif - if (buffer) { - // limit check: leave enough free heap for UI and other tasks - if (getContiguousFreeHeap() < MIN_HEAP_SIZE) { - free(buffer); // free allocated buffer - return nullptr; - } - if (type & BFRALLOC_CLEAR) - memset(buffer, 0, size); // clear allocated buffer - } - return buffer; -} - /* * Fixed point integer based Perlin noise functions by @dedehai * Note: optimized for speed and to mimic fastled inoise functions, not for accuracy or best randomness diff --git a/wled00/wled.cpp b/wled00/wled.cpp index a48494587b..5fe056b189 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -243,17 +243,37 @@ void WLED::loop() DEBUG_PRINTLN(F("---DEBUG INFO---")); DEBUG_PRINTF_P(PSTR("Runtime: %lu\n"), millis()); DEBUG_PRINTF_P(PSTR("Unix time: %u,%03u\n"), toki.getTime().sec, toki.getTime().ms); - DEBUG_PRINTF_P(PSTR("Free heap/contiguous: %u/%u\n"), getFreeHeapSize(), getContiguousFreeHeap()); + #if defined(ARDUINO_ARCH_ESP32) + DEBUG_PRINTLN(F("=== Memory Info ===")); + // Internal DRAM (standard 8-bit accessible heap) + size_t dram_free = heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + size_t dram_largest = heap_caps_get_largest_free_block(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + DEBUG_PRINTF_P(PSTR("DRAM 8-bit: Free: %7u bytes | Largest block: %7u bytes\n"), dram_free, dram_largest); + #ifdef BOARD_HAS_PSRAM + size_t psram_free = heap_caps_get_free_size(MALLOC_CAP_SPIRAM); + size_t psram_largest = heap_caps_get_largest_free_block(MALLOC_CAP_SPIRAM); + DEBUG_PRINTF_P(PSTR("PSRAM: Free: %7u bytes | Largest block: %6u bytes\n"), psram_free, psram_largest); + #endif #if defined(CONFIG_IDF_TARGET_ESP32) - int dram32_free = heap_caps_get_free_size(MALLOC_CAP_32BIT|MALLOC_CAP_INTERNAL) - getFreeHeapSize(); - DEBUG_PRINTF_P(PSTR("Free 32bit-heap: %d\n"), dram32_free); + // 32-bit DRAM (not byte accessible, only available on ESP32) + size_t dram32_free = heap_caps_get_free_size(MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL) - dram_free; // returns all 32bit DRAM, subtract 8bit DRAM + //size_t dram32_largest = heap_caps_get_largest_free_block(MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL); // returns largest DRAM block -> not useful + DEBUG_PRINTF_P(PSTR("DRAM 32-bit: Free: %7u bytes | Largest block: N/A\n"), dram32_free); + #else + // Fast RTC Memory (not available on ESP32) + size_t rtcram_free = heap_caps_get_free_size(MALLOC_CAP_RTCRAM); + size_t rtcram_largest = heap_caps_get_largest_free_block(MALLOC_CAP_RTCRAM); + DEBUG_PRINTF_P(PSTR("RTC RAM: Free: %7u bytes | Largest block: %7u bytes\n"), rtcram_free, rtcram_largest); #endif - #if defined(ARDUINO_ARCH_ESP32) if (psramFound()) { DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); - if (!psramSafe) DEBUG_PRINTLN(F("Not using PSRAM.")); + #ifndef BOARD_HAS_PSRAM + DEBUG_PRINTLN(F("BOARD_HAS_PSRAM not defined, not using PSRAM.")); + #endif } DEBUG_PRINTF_P(PSTR("TX power: %d/%d\n"), WiFi.getTxPower(), txPower); + #else // ESP8266 + DEBUG_PRINTF_P(PSTR("Free heap/contiguous: %u/%u\n"), getFreeHeapSize(), getContiguousFreeHeap()); #endif DEBUG_PRINTF_P(PSTR("Wifi state: %d\n"), WiFi.status()); #ifndef WLED_DISABLE_ESPNOW @@ -375,18 +395,14 @@ void WLED::setup() #endif DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); +#if defined(BOARD_HAS_PSRAM) + // if JSON buffer allocation fails requestJsonBufferLock() will always return false preventing crashes + pDoc = new PSRAMDynamicJsonDocument(2 * JSON_BUFFER_SIZE); + DEBUG_PRINTF_P(PSTR("JSON buffer size: %ubytes\n"), (2 * JSON_BUFFER_SIZE)); + DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); +#endif + #if defined(ARDUINO_ARCH_ESP32) - // BOARD_HAS_PSRAM also means that a compiler flag "-mfix-esp32-psram-cache-issue" was used and so PSRAM is safe to use on rev.1 ESP32 - #if !defined(BOARD_HAS_PSRAM) && !(defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3)) - if (psramFound() && ESP.getChipRevision() < 3) psramSafe = false; - if (!psramSafe) DEBUG_PRINTLN(F("Not using PSRAM.")); - #endif - pDoc = new PSRAMDynamicJsonDocument((psramSafe && psramFound() ? 2 : 1)*JSON_BUFFER_SIZE); - DEBUG_PRINTF_P(PSTR("JSON buffer allocated: %u\n"), (psramSafe && psramFound() ? 2 : 1)*JSON_BUFFER_SIZE); - // if the above fails requestJsonBufferLock() will always return false preventing crashes - if (psramFound()) { - DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); - } DEBUG_PRINTF_P(PSTR("TX power: %d/%d\n"), WiFi.getTxPower(), txPower); #endif diff --git a/wled00/wled.h b/wled00/wled.h index 8ec6655977..4eddc828fa 100644 --- a/wled00/wled.h +++ b/wled00/wled.h @@ -167,16 +167,13 @@ // The following is a construct to enable code to compile without it. // There is a code that will still not use PSRAM though: // AsyncJsonResponse is a derived class that implements DynamicJsonDocument (AsyncJson-v6.h) -#if defined(ARDUINO_ARCH_ESP32) -extern bool psramSafe; +#if defined(BOARD_HAS_PSRAM) struct PSRAM_Allocator { void* allocate(size_t size) { - if (psramSafe && psramFound()) return ps_malloc(size); // use PSRAM if it exists - else return malloc(size); // fallback + return ps_malloc(size); // use PSRAM } void* reallocate(void* ptr, size_t new_size) { - if (psramSafe && psramFound()) return ps_realloc(ptr, new_size); // use PSRAM if it exists - else return realloc(ptr, new_size); // fallback + return ps_realloc(ptr, new_size); // use PSRAM } void deallocate(void* pointer) { free(pointer); @@ -893,8 +890,6 @@ WLED_GLOBAL byte optionType; WLED_GLOBAL bool configNeedsWrite _INIT(false); // flag to initiate saving of config WLED_GLOBAL bool doReboot _INIT(false); // flag to initiate reboot from async handlers -WLED_GLOBAL bool psramSafe _INIT(true); // is it safe to use PSRAM (on ESP32 rev.1; compiler fix used "-mfix-esp32-psram-cache-issue") - // status led #if defined(STATUSLED) WLED_GLOBAL unsigned long ledStatusLastMillis _INIT(0); @@ -968,8 +963,11 @@ WLED_GLOBAL int8_t spi_sclk _INIT(SPISCLKPIN); // global ArduinoJson buffer #if defined(ARDUINO_ARCH_ESP32) -WLED_GLOBAL JsonDocument *pDoc _INIT(nullptr); WLED_GLOBAL SemaphoreHandle_t jsonBufferLockMutex _INIT(xSemaphoreCreateRecursiveMutex()); +#endif +#ifdef BOARD_HAS_PSRAM +// if board has PSRAM, use it for JSON document (allocated in setup()) +WLED_GLOBAL JsonDocument *pDoc _INIT(nullptr); #else WLED_GLOBAL StaticJsonDocument gDoc; WLED_GLOBAL JsonDocument *pDoc _INIT(&gDoc); From 0328f5ca8a90aef060957df0b7c13ef5565012c8 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Fri, 29 Aug 2025 21:06:10 +0200 Subject: [PATCH 06/11] code cleanup --- wled00/FX.h | 6 +++--- wled00/bus_manager.cpp | 2 +- wled00/fcn_declare.h | 3 +-- wled00/wled.cpp | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/wled00/FX.h b/wled00/FX.h index 5e30b52126..692927a610 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -94,10 +94,10 @@ extern byte realtimeMode; // used in getMappedPixelIndex() /* How much data bytes all segments combined may allocate */ #define MAX_SEGMENT_DATA (6*1024) // 6k by default #elif defined(CONFIG_IDF_TARGET_ESP32S2) - #define MAX_NUM_SEGMENTS 20 + #define MAX_NUM_SEGMENTS 32 #define MAX_SEGMENT_DATA (20*1024) // 20k by default (S2 is short on free RAM), limit does not apply if PSRAM is available #else - #define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation + #define MAX_NUM_SEGMENTS 64 #define MAX_SEGMENT_DATA (64*1024) // 64k by default, limit does not apply if PSRAM is available #endif @@ -621,7 +621,7 @@ class Segment { #endif clearName(); deallocateData(); - d_free(pixels); + p_free(pixels); } Segment& operator= (const Segment &orig); // copy assignment diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index 44ed6e0d3f..b65570178f 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -912,7 +912,7 @@ void BusManager::esp32RMTInvertIdle() { else if (lvl == RMT_IDLE_LEVEL_LOW) lvl = RMT_IDLE_LEVEL_HIGH; else continue; rmt_set_idle_level(ch, idle_out, lvl); - u++ + u++; } } #endif diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index e9ea5c4a73..c8223999c4 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -579,7 +579,6 @@ extern "C" { #else #define p_malloc d_malloc #define p_calloc d_calloc - #define p_realloc d_realloc #define p_free d_free #endif } @@ -594,7 +593,7 @@ inline size_t getContiguousFreeHeap() { return ESP.getMaxFreeBlockSize(); } // r #define BFRALLOC_PREFER_DRAM (1 << 1) // prefer DRAM over PSRAM #define BFRALLOC_ENFORCE_DRAM (1 << 2) // use DRAM only, no PSRAM #define BFRALLOC_PREFER_PSRAM (1 << 3) // prefer PSRAM over DRAM -#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise fall back to DRAM +#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise uses DRAM #define BFRALLOC_CLEAR (1 << 5) // clear allocated buffer after allocation void *allocate_buffer(size_t size, uint32_t type); diff --git a/wled00/wled.cpp b/wled00/wled.cpp index 5fe056b189..053fbd3fa8 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -171,7 +171,7 @@ void WLED::loop() // reconnect WiFi to clear stale allocations if heap gets too low if (millis() - heapTime > 15000) { - uint32_t heap = getFreeHeapSize(); + uint32_t heap = getContiguousFreeHeap(); if (heap < MIN_HEAP_SIZE && lastHeap < MIN_HEAP_SIZE) { DEBUG_PRINTF_P(PSTR("Heap too low! %u\n"), heap); forceReconnect = true; From b8bd2a7e06e4799e904c3a309af099981cdc2024 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Fri, 29 Aug 2025 21:36:32 +0200 Subject: [PATCH 07/11] fix declaration and function name --- wled00/fcn_declare.h | 3 ++- wled00/util.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index 4f4dd0f56d..87d16c4e96 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -432,7 +432,7 @@ inline uint8_t hw_random8() { return HW_RND_REGISTER; }; inline uint8_t hw_random8(uint32_t upperlimit) { return (hw_random8() * upperlimit) >> 8; }; // input range 0-255 inline uint8_t hw_random8(uint32_t lowerlimit, uint32_t upperlimit) { uint32_t range = upperlimit - lowerlimit; return lowerlimit + hw_random8(range); }; // input range 0-255 -// memory allocation wrappers +// memory allocation wrappers (util.cpp) extern "C" { // prefer DRAM in d_xalloc functions, PSRAM as fallback void *d_malloc(size_t); @@ -452,6 +452,7 @@ extern "C" { #else #define p_malloc d_malloc #define p_calloc d_calloc + #define p_realloc_malloc d_realloc_malloc #define p_free d_free #endif } diff --git a/wled00/util.cpp b/wled00/util.cpp index f9f39397aa..cfe32a2f15 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -667,7 +667,7 @@ void *d_calloc(size_t count, size_t size) { } // realloc with malloc fallback, note: on ESPS8266 there is no safe way to ensure MIN_HEAP_SIZE during realloc()s, free buffer and allocate new one -void *realloc_malloc(void *ptr, size_t size) { +void *d_realloc_malloc(void *ptr, size_t size) { free(ptr); return d_malloc(size); } From 477b13a4ddfd843c3af35dced73b1712e14de567 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sun, 14 Sep 2025 19:46:16 +0200 Subject: [PATCH 08/11] reduced MAX_NUM_SEGMENTS if there is no PSRAM --- wled00/FX.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wled00/FX.h b/wled00/FX.h index 692927a610..89f33d8bbf 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -97,7 +97,11 @@ extern byte realtimeMode; // used in getMappedPixelIndex() #define MAX_NUM_SEGMENTS 32 #define MAX_SEGMENT_DATA (20*1024) // 20k by default (S2 is short on free RAM), limit does not apply if PSRAM is available #else - #define MAX_NUM_SEGMENTS 64 + #ifdef BOARD_HAS_PSRAM + #define MAX_NUM_SEGMENTS 64 + #else + #define MAX_NUM_SEGMENTS 32 + #endif #define MAX_SEGMENT_DATA (64*1024) // 64k by default, limit does not apply if PSRAM is available #endif From 190262f2b583631a369dfa6193f71e3a21e458d0 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sun, 14 Sep 2025 19:55:00 +0200 Subject: [PATCH 09/11] using p_free() for names as it uses prefer PSRAM to be more consistent --- wled00/FX.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wled00/FX.h b/wled00/FX.h index 89f33d8bbf..0563074155 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -648,7 +648,7 @@ class Segment { inline uint16_t groupLength() const { return grouping + spacing; } inline uint8_t getLightCapabilities() const { return _capabilities; } inline void deactivate() { setGeometry(0,0); } - inline Segment &clearName() { d_free(name); name = nullptr; return *this; } + inline Segment &clearName() { p_free(name); name = nullptr; return *this; } inline Segment &setName(const String &name) { return setName(name.c_str()); } inline static unsigned vLength() { return Segment::_vLength; } From c665d5944e8d2223cefd3561786ae7d6c55861db Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Mon, 15 Sep 2025 20:47:37 +0200 Subject: [PATCH 10/11] revert change to free heap check, add commented code (for future use) - free heap check in main loop should not be free contiguous heap check, it leads to random strip resets if heap runs low. --- wled00/util.cpp | 5 +++++ wled00/wled.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index cfe32a2f15..a2caf3e7b0 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -668,6 +668,11 @@ void *d_calloc(size_t count, size_t size) { // realloc with malloc fallback, note: on ESPS8266 there is no safe way to ensure MIN_HEAP_SIZE during realloc()s, free buffer and allocate new one void *d_realloc_malloc(void *ptr, size_t size) { + //void *buffer = realloc(ptr, size); + //buffer = validateFreeHeap(buffer); + //if (buffer) return buffer; // realloc successful + //d_free(ptr); // free old buffer if realloc failed (or min heap was exceeded) + //return d_malloc(size); // fallback to malloc free(ptr); return d_malloc(size); } diff --git a/wled00/wled.cpp b/wled00/wled.cpp index 95b48319d2..923688106d 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -171,7 +171,7 @@ void WLED::loop() // reconnect WiFi to clear stale allocations if heap gets too low if (millis() - heapTime > 15000) { - uint32_t heap = getContiguousFreeHeap(); + uint32_t heap = getFreeHeapSize(); if (heap < MIN_HEAP_SIZE && lastHeap < MIN_HEAP_SIZE) { DEBUG_PRINTF_P(PSTR("Heap too low! %u\n"), heap); forceReconnect = true; From 1560ce4ab7ed65930091d7c2941bf94be112ffcf Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Tue, 16 Sep 2025 19:30:19 +0200 Subject: [PATCH 11/11] use correct free function for consistency --- wled00/FX.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wled00/FX.h b/wled00/FX.h index 0563074155..9ff0cf72c7 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -871,8 +871,8 @@ class WS2812FX { } ~WS2812FX() { - d_free(_pixels); - d_free(_pixelCCT); // just in case + p_free(_pixels); + p_free(_pixelCCT); // just in case d_free(customMappingTable); _mode.clear(); _modeData.clear();