From f6d1f3b43326be2913f049ae1ef09538db165f27 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Fri, 15 Aug 2025 20:43:04 +0200 Subject: [PATCH 01/14] Bootloop detection & recovery (#4793) * added boot loop detection and config backup * automatic OTA rollback if loading backup does not fix it * added new file handling functions * adding verification of json files, added config restore at bootup if broken * added function to compare contents of two files for future use (currently not used) --- wled00/cfg.cpp | 24 ++++++- wled00/fcn_declare.h | 26 +++++-- wled00/file.cpp | 153 +++++++++++++++++++++++++++++++++++++++++ wled00/util.cpp | 125 +++++++++++++++++++++++++++++++++ wled00/wled.cpp | 8 +++ wled00/wled_server.cpp | 8 ++- 6 files changed, 337 insertions(+), 7 deletions(-) diff --git a/wled00/cfg.cpp b/wled00/cfg.cpp index 767bd8f29b..fa89309a27 100644 --- a/wled00/cfg.cpp +++ b/wled00/cfg.cpp @@ -636,9 +636,30 @@ bool deserializeConfig(JsonObject doc, bool fromFS) { return (doc["sv"] | true); } - static const char s_cfg_json[] PROGMEM = "/cfg.json"; +bool backupConfig() { + return backupFile(s_cfg_json); +} + +bool restoreConfig() { + return restoreFile(s_cfg_json); +} + +bool verifyConfig() { + return validateJsonFile(s_cfg_json); +} + +// rename config file and reboot +void resetConfig() { + DEBUG_PRINTLN(F("Reset config")); + char backupname[32]; + strcpy(backupname, s_cfg_json); + strcat(backupname, ".rst.json"); + WLED_FS.rename(s_cfg_json, backupname); + doReboot = true; +} + bool deserializeConfigFromFS() { [[maybe_unused]] bool success = deserializeConfigSec(); #ifdef WLED_ADD_EEPROM_SUPPORT @@ -676,6 +697,7 @@ bool deserializeConfigFromFS() { void serializeConfig() { serializeConfigSec(); + backupConfig(); // backup before writing new config DEBUG_PRINTLN(F("Writing settings to /cfg.json...")); diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index 086c107db2..16e4471f31 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -24,6 +24,10 @@ void handleIO(); void IRAM_ATTR touchButtonISR(); //cfg.cpp +bool backupConfig(); +bool restoreConfig(); +bool verifyConfig(); +void resetConfig(); bool deserializeConfig(JsonObject doc, bool fromFS = false); bool deserializeConfigFromFS(); bool deserializeConfigSec(); @@ -114,10 +118,15 @@ bool readObjectFromFileUsingId(const char* file, uint16_t id, JsonDocument* dest bool readObjectFromFile(const char* file, const char* key, JsonDocument* dest); void updateFSInfo(); void closeFile(); -inline bool writeObjectToFileUsingId(const String &file, uint16_t id, JsonDocument* content) { return writeObjectToFileUsingId(file.c_str(), id, content); }; -inline bool writeObjectToFile(const String &file, const char* key, JsonDocument* content) { return writeObjectToFile(file.c_str(), key, content); }; -inline bool readObjectFromFileUsingId(const String &file, uint16_t id, JsonDocument* dest) { return readObjectFromFileUsingId(file.c_str(), id, dest); }; -inline bool readObjectFromFile(const String &file, const char* key, JsonDocument* dest) { return readObjectFromFile(file.c_str(), key, dest); }; +inline bool writeObjectToFileUsingId(const String &file, uint16_t id, const JsonDocument* content) { return writeObjectToFileUsingId(file.c_str(), id, content); }; +inline bool writeObjectToFile(const String &file, const char* key, const JsonDocument* content) { return writeObjectToFile(file.c_str(), key, content); }; +inline bool readObjectFromFileUsingId(const String &file, uint16_t id, JsonDocument* dest, const JsonDocument* filter = nullptr) { return readObjectFromFileUsingId(file.c_str(), id, dest); }; +inline bool readObjectFromFile(const String &file, const char* key, JsonDocument* dest, const JsonDocument* filter = nullptr) { return readObjectFromFile(file.c_str(), key, dest); }; +bool copyFile(const char* src_path, const char* dst_path); +bool backupFile(const char* filename); +bool restoreFile(const char* filename); +bool validateJsonFile(const char* filename); +void dumpFilesToSerial(); //hue.cpp void handleHue(); @@ -399,6 +408,15 @@ void enumerateLedmaps(); uint8_t get_random_wheel_index(uint8_t pos); float mapf(float x, float in_min, float in_max, float out_min, float out_max); +void handleBootLoop(); // detect and handle bootloops +#ifndef ESP8266 +void bootloopCheckOTA(); // swap boot image if bootloop is detected instead of restoring config +#endif + +void handleBootLoop(); // detect and handle bootloops +#ifndef ESP8266 +void bootloopCheckOTA(); // swap boot image if bootloop is detected instead of restoring config +#endif // RAII guard class for the JSON Buffer lock // Modeled after std::lock_guard class JSONBufferGuard { diff --git a/wled00/file.cpp b/wled00/file.cpp index bc34672023..15d962cf75 100644 --- a/wled00/file.cpp +++ b/wled00/file.cpp @@ -438,3 +438,156 @@ bool handleFileRead(AsyncWebServerRequest* request, String path){ } return false; } + +// copy a file, delete destination file if incomplete to prevent corrupted files +bool copyFile(const char* src_path, const char* dst_path) { + DEBUG_PRINTF("copyFile from %s to %s\n", src_path, dst_path); + if(!WLED_FS.exists(src_path)) { + DEBUG_PRINTLN(F("file not found")); + return false; + } + + bool success = true; // is set to false on error + File src = WLED_FS.open(src_path, "r"); + File dst = WLED_FS.open(dst_path, "w"); + + if (src && dst) { + uint8_t buf[128]; // copy file in 128-byte blocks + while (src.available() > 0) { + size_t bytesRead = src.read(buf, sizeof(buf)); + if (bytesRead == 0) { + success = false; + break; // error, no data read + } + size_t bytesWritten = dst.write(buf, bytesRead); + if (bytesWritten != bytesRead) { + success = false; + break; // error, not all data written + } + } + } else { + success = false; // error, could not open files + } + if(src) src.close(); + if(dst) dst.close(); + if (!success) { + DEBUG_PRINTLN(F("copy failed")); + WLED_FS.remove(dst_path); // delete incomplete file + } + return success; +} + +// compare two files, return true if identical +bool compareFiles(const char* path1, const char* path2) { + DEBUG_PRINTF("compareFile %s and %s\n", path1, path2); + if (!WLED_FS.exists(path1) || !WLED_FS.exists(path2)) { + DEBUG_PRINTLN(F("file not found")); + return false; + } + + bool identical = true; // set to false on mismatch + File f1 = WLED_FS.open(path1, "r"); + File f2 = WLED_FS.open(path2, "r"); + + if (f1 && f2) { + uint8_t buf1[128], buf2[128]; + while (f1.available() > 0 || f2.available() > 0) { + size_t len1 = f1.read(buf1, sizeof(buf1)); + size_t len2 = f2.read(buf2, sizeof(buf2)); + + if (len1 != len2) { + identical = false; + break; // files differ in size or read failed + } + + if (memcmp(buf1, buf2, len1) != 0) { + identical = false; + break; // files differ in content + } + } + } else { + identical = false; // error opening files + } + + if (f1) f1.close(); + if (f2) f2.close(); + return identical; +} + +static const char s_backup_json[] PROGMEM = "/bkp."; + +bool backupFile(const char* filename) { + DEBUG_PRINTF("backup %s \n", filename); + if (!validateJsonFile(filename)) { + DEBUG_PRINTLN(F("broken file")); + return false; + } + char backupname[32]; + snprintf(backupname, sizeof(backupname), "%s%s", s_backup_json, filename + 1); // skip leading '/' in filename + + if (copyFile(filename, backupname)) { + DEBUG_PRINTLN(F("backup ok")); + return true; + } + DEBUG_PRINTLN(F("backup failed")); + return false; +} + +bool restoreFile(const char* filename) { + DEBUG_PRINTF("restore %s \n", filename); + char backupname[32]; + snprintf(backupname, sizeof(backupname), "%s%s", s_backup_json, filename + 1); // skip leading '/' in filename + + if (!WLED_FS.exists(backupname)) { + DEBUG_PRINTLN(F("no backup found")); + return false; + } + + if (!validateJsonFile(backupname)) { + DEBUG_PRINTLN(F("broken backup")); + return false; + } + + if (copyFile(backupname, filename)) { + DEBUG_PRINTLN(F("restore ok")); + return true; + } + DEBUG_PRINTLN(F("restore failed")); + return false; +} + +bool validateJsonFile(const char* filename) { + if (!WLED_FS.exists(filename)) return false; + File file = WLED_FS.open(filename, "r"); + if (!file) return false; + StaticJsonDocument<0> doc, filter; // https://arduinojson.org/v6/how-to/validate-json/ + bool result = deserializeJson(doc, file, DeserializationOption::Filter(filter)) == DeserializationError::Ok; + file.close(); + if (!result) { + DEBUG_PRINTF("Invalid JSON file %s\n", filename); + } else { + DEBUG_PRINTF("Valid JSON file %s\n", filename); + } + return result; +} + +// print contents of all files in root dir to Serial except wsec files +void dumpFilesToSerial() { + File rootdir = WLED_FS.open("/", "r"); + File rootfile = rootdir.openNextFile(); + while (rootfile) { + size_t len = strlen(rootfile.name()); + // skip files starting with "wsec" and dont end in .json + if (strncmp(rootfile.name(), "wsec", 4) != 0 && len >= 6 && strcmp(rootfile.name() + len - 5, ".json") == 0) { + Serial.println(rootfile.name()); + while (rootfile.available()) { + Serial.write(rootfile.read()); + } + Serial.println(); + Serial.println(); + } + rootfile.close(); + rootfile = rootdir.openNextFile(); + } +} + diff --git a/wled00/util.cpp b/wled00/util.cpp index 41e3d6c235..fa48e7546a 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -1,6 +1,12 @@ #include "wled.h" #include "fcn_declare.h" #include "const.h" +#ifdef ESP8266 +#include "user_interface.h" // for bootloop detection +#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) +#include "esp32/rtc.h" // for bootloop detection +#include +#endif //helper to get int value at a position in string @@ -594,3 +600,122 @@ uint8_t get_random_wheel_index(uint8_t pos) { float mapf(float x, float in_min, float in_max, float out_min, float out_max) { return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min; } + +// bootloop detection and handling +// checks if the ESP reboots multiple times due to a crash or watchdog timeout +// if a bootloop is detected: restore settings from backup, then reset settings, then switch boot image (and repeat) + +#define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection +#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /cfg.bak +#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /cfg.fault) +#define BOOTLOOP_ACTION_OTA 2 // swap the boot partition +#define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset) +#ifdef ESP8266 +#define BOOTLOOP_INTERVAL_TICKS (5 * 160000) // time limit between crashes: ~5 seconds in RTC ticks +#define BOOT_TIME_IDX 0 // index in RTC memory for boot time +#define CRASH_COUNTER_IDX 1 // index in RTC memory for crash counter +#define ACTIONT_TRACKER_IDX 2 // index in RTC memory for boot action +#else +#define BOOTLOOP_INTERVAL_TICKS 5000 // time limit between crashes: ~5 seconds in milliseconds +// variables in RTC_NOINIT memory persist between reboots (but not on hardware reset) +RTC_NOINIT_ATTR static uint32_t bl_last_boottime; +RTC_NOINIT_ATTR static uint32_t bl_crashcounter; +RTC_NOINIT_ATTR static uint32_t bl_actiontracker; +void bootloopCheckOTA() { bl_actiontracker = BOOTLOOP_ACTION_OTA; } // swap boot image if bootloop is detected instead of restoring config +#endif + +// detect bootloop by checking the reset reason and the time since last boot +static bool detectBootLoop() { +#if !defined(ESP8266) + #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) + uint32_t rtctime = esp_rtc_get_time_us() / 1000; // convert to milliseconds + esp_reset_reason_t reason = esp_reset_reason(); + + if (!(reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT)) { + // no crash detected, init variables + bl_crashcounter = 0; + bl_last_boottime = rtctime; + if(reason != ESP_RST_SW) + bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler) + } else if (reason == ESP_RST_BROWNOUT) { + // crash due to brownout can't be detected unless using flash memory to store bootloop variables + // this is a simpler way to preemtively revert the config in case current brownout is caused by a bad choice of settings + DEBUG_PRINTLN(F("brownout detected")); + //restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all) + } else { + uint32_t rebootinterval = rtctime - bl_last_boottime; + bl_last_boottime = rtctime; // store current runtime for next reboot + if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) { + bl_crashcounter++; + if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { + DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!")); + bl_crashcounter = 0; + return true; + } + } + } + #endif +#else // ESP8266 + rst_info* resetreason = system_get_rst_info(); + uint32_t bl_last_boottime; + uint32_t bl_crashcounter; + uint32_t bl_actiontracker; + uint32_t rtctime = system_get_rtc_time(); + + if (!(resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST)) { + // no crash detected, init variables + bl_crashcounter = 0; + ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t)); + ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); + if(resetreason->reason != REASON_SOFT_RESTART) { + bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler) + ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t)); + } + } else { + // system has crashed + ESP.rtcUserMemoryRead(BOOT_TIME_IDX, &bl_last_boottime, sizeof(uint32_t)); + ESP.rtcUserMemoryRead(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); + uint32_t rebootinterval = rtctime - bl_last_boottime; + ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t)); // store current ticks for next reboot + if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) { + bl_crashcounter++; + ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); + if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { + DEBUG_PRINTLN(F("BOOTLOOP DETECTED")); + bl_crashcounter = 0; + ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); + return true; + } + } + } +#endif + return false; // no bootloop detected +} + +void handleBootLoop() { + DEBUG_PRINTLN(F("checking for bootloop")); + if (!detectBootLoop()) return; // no bootloop detected +#ifdef ESP8266 + uint32_t bl_actiontracker; + ESP.rtcUserMemoryRead(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t)); +#endif + if (bl_actiontracker == BOOTLOOP_ACTION_RESTORE) { + restoreConfig(); // note: if this fails, could reset immediately. instead just let things play out and save a few lines of code + bl_actiontracker = BOOTLOOP_ACTION_RESET; // reset config if it keeps bootlooping + } else if (bl_actiontracker == BOOTLOOP_ACTION_RESET) { + resetConfig(); + bl_actiontracker = BOOTLOOP_ACTION_OTA; // swap boot partition if it keeps bootlooping. On ESP8266 this is the same as BOOTLOOP_ACTION_NONE + } +#ifndef ESP8266 + else if (bl_actiontracker == BOOTLOOP_ACTION_OTA) { + if(Update.canRollBack()) { + DEBUG_PRINTLN(F("Swapping boot partition...")); + Update.rollBack(); // swap boot partition + } + bl_actiontracker = BOOTLOOP_ACTION_DUMP; // out of options + } + #endif + else + dumpFilesToSerial(); + ESP.restart(); // restart cleanly and don't wait for another crash +} diff --git a/wled00/wled.cpp b/wled00/wled.cpp index 0505095168..51f97e6f1c 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -403,6 +403,9 @@ void WLED::setup() DEBUGFS_PRINTLN(F("FS failed!")); errorFlag = ERR_FS_BEGIN; } + + handleBootLoop(); // check for bootloop and take action (requires WLED_FS) + #ifdef WLED_ADD_EEPROM_SUPPORT else deEEP(); #else @@ -418,6 +421,11 @@ void WLED::setup() WLED_SET_AP_SSID(); // otherwise it is empty on first boot until config is saved multiWiFi.push_back(WiFiConfig(CLIENT_SSID,CLIENT_PASS)); // initialise vector with default WiFi + if(!verifyConfig()) { + if(!restoreConfig()) { + resetConfig(); + } + } DEBUG_PRINTLN(F("Reading config")); bool needsCfgSave = deserializeConfigFromFS(); DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); diff --git a/wled00/wled_server.cpp b/wled00/wled_server.cpp index e8cbb41ae5..70d6cde4f7 100644 --- a/wled00/wled_server.cpp +++ b/wled00/wled_server.cpp @@ -386,7 +386,10 @@ void initServer() if (Update.hasError()) { serveMessage(request, 500, F("Update failed!"), F("Please check your file and retry!"), 254); } else { - serveMessage(request, 200, F("Update successful!"), F("Rebooting..."), 131); + serveMessage(request, 200, F("Update successful!"), FPSTR(s_rebooting), 131); + #ifndef ESP8266 + bootloopCheckOTA(); // let the bootloop-checker know there was an OTA update + #endif doReboot = true; } },[](AsyncWebServerRequest *request, String filename, size_t index, uint8_t *data, size_t len, bool final){ @@ -399,8 +402,9 @@ void initServer() UsermodManager::onUpdateBegin(true); // notify usermods that update is about to begin (some may require task de-init) lastEditTime = millis(); // make sure PIN does not lock during update strip.suspend(); - #ifdef ESP8266 + backupConfig(); // backup current config in case the update ends badly strip.resetSegments(); // free as much memory as you can + #ifdef ESP8266 Update.runAsync(true); #endif Update.begin((ESP.getFreeSketchSpace() - 0x1000) & 0xFFFFF000); From 693f3b0b049eeb6eed3636bca30623815eb14a4d Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sat, 16 Aug 2025 08:45:41 +0200 Subject: [PATCH 02/14] add IDF V3 support for bootloop detection --- wled00/util.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index fa48e7546a..61abdd5361 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -3,9 +3,13 @@ #include "const.h" #ifdef ESP8266 #include "user_interface.h" // for bootloop detection -#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) -#include "esp32/rtc.h" // for bootloop detection +#else #include +#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) + #include "esp32/rtc.h" // for bootloop detection +#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(3, 3, 0) + #include "soc/rtc.h" +#endif #endif @@ -628,7 +632,12 @@ void bootloopCheckOTA() { bl_actiontracker = BOOTLOOP_ACTION_OTA; } // swap boot static bool detectBootLoop() { #if !defined(ESP8266) #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) - uint32_t rtctime = esp_rtc_get_time_us() / 1000; // convert to milliseconds + uint32_t rtctime = esp_rtc_get_time_us() / 1000; // convert to milliseconds + #elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(3, 3, 0) + uint64_t rtc_ticks = rtc_time_get(); + uint32_t rtctime = rtc_time_slowclk_to_us(rtc_ticks, rtc_clk_slow_freq_get_hz()) / 1000; // convert to milliseconds + #endif + esp_reset_reason_t reason = esp_reset_reason(); if (!(reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT)) { @@ -654,7 +663,6 @@ static bool detectBootLoop() { } } } - #endif #else // ESP8266 rst_info* resetreason = system_get_rst_info(); uint32_t bl_last_boottime; From 501b6e7de5e66eaacfd559060f840a98bd13d967 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sat, 16 Aug 2025 09:11:52 +0200 Subject: [PATCH 03/14] fix compile error brought in from upstream --- wled00/wled_server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wled00/wled_server.cpp b/wled00/wled_server.cpp index 70d6cde4f7..13169f66ec 100644 --- a/wled00/wled_server.cpp +++ b/wled00/wled_server.cpp @@ -386,7 +386,7 @@ void initServer() if (Update.hasError()) { serveMessage(request, 500, F("Update failed!"), F("Please check your file and retry!"), 254); } else { - serveMessage(request, 200, F("Update successful!"), FPSTR(s_rebooting), 131); + serveMessage(request, 200, F("Update successful!"), F("Rebooting..."), 131); #ifndef ESP8266 bootloopCheckOTA(); // let the bootloop-checker know there was an OTA update #endif From 79762f45b2aa9de5b00e42011b1b40ecd49ec9db Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 19 Aug 2025 19:01:33 -0400 Subject: [PATCH 04/14] Fix bootloop if config missing/reset Can't reset the config if there's nothing to reset! --- wled00/cfg.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/wled00/cfg.cpp b/wled00/cfg.cpp index fa89309a27..ae9ffe3595 100644 --- a/wled00/cfg.cpp +++ b/wled00/cfg.cpp @@ -651,13 +651,16 @@ bool verifyConfig() { } // rename config file and reboot +// if the file doesn't exist, such as after a reset, do nothing void resetConfig() { - DEBUG_PRINTLN(F("Reset config")); - char backupname[32]; - strcpy(backupname, s_cfg_json); - strcat(backupname, ".rst.json"); - WLED_FS.rename(s_cfg_json, backupname); - doReboot = true; + if (WLED_FS.exists(s_cfg_json)) { + DEBUG_PRINTLN(F("Reset config")); + char backupname[32]; + strcpy(backupname, s_cfg_json); + strcat(backupname, ".rst.json"); + WLED_FS.rename(s_cfg_json, backupname); + doReboot = true; + } } bool deserializeConfigFromFS() { From 4cdaa57dcef11c2a3786300f177f10f00305c03c Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 19 Aug 2025 19:05:05 -0400 Subject: [PATCH 05/14] ESP8266: Commit ACTIONT_TRACKER --- wled00/util.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wled00/util.cpp b/wled00/util.cpp index 61abdd5361..eb90dd9692 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -725,5 +725,8 @@ void handleBootLoop() { #endif else dumpFilesToSerial(); +#ifdef ESP8266 + ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t)); +#endif ESP.restart(); // restart cleanly and don't wait for another crash } From 66573be21264b307b32c53b85f4f83245dd84868 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 19 Aug 2025 19:55:39 -0400 Subject: [PATCH 06/14] Use consistent naming for backups and reset cfgs Use 'rst.cfg.json' instead of 'cfg.json.rst.json' for configs that were reset. --- wled00/cfg.cpp | 5 ++--- wled00/util.cpp | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/wled00/cfg.cpp b/wled00/cfg.cpp index ae9ffe3595..90470d2caf 100644 --- a/wled00/cfg.cpp +++ b/wled00/cfg.cpp @@ -651,13 +651,12 @@ bool verifyConfig() { } // rename config file and reboot -// if the file doesn't exist, such as after a reset, do nothing +// if the cfg file doesn't exist, such as after a reset, do nothing void resetConfig() { if (WLED_FS.exists(s_cfg_json)) { DEBUG_PRINTLN(F("Reset config")); char backupname[32]; - strcpy(backupname, s_cfg_json); - strcat(backupname, ".rst.json"); + snprintf_P(backupname, sizeof(backupname), PSTR("/rst.%s"), &s_cfg_json[1]); WLED_FS.rename(s_cfg_json, backupname); doReboot = true; } diff --git a/wled00/util.cpp b/wled00/util.cpp index eb90dd9692..90e8fb7444 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -610,8 +610,8 @@ float mapf(float x, float in_min, float in_max, float out_min, float out_max) { // if a bootloop is detected: restore settings from backup, then reset settings, then switch boot image (and repeat) #define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection -#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /cfg.bak -#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /cfg.fault) +#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /bak.cfg.json +#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json) #define BOOTLOOP_ACTION_OTA 2 // swap the boot partition #define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset) #ifdef ESP8266 From ed496fb426b527fc0f70ecda5cdff54b512c0aae Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 19 Aug 2025 19:56:26 -0400 Subject: [PATCH 07/14] Add a little more PSTR to bootloop handling --- wled00/file.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wled00/file.cpp b/wled00/file.cpp index 15d962cf75..efd50123c7 100644 --- a/wled00/file.cpp +++ b/wled00/file.cpp @@ -514,7 +514,7 @@ bool compareFiles(const char* path1, const char* path2) { return identical; } -static const char s_backup_json[] PROGMEM = "/bkp."; +static const char s_backup_fmt[] PROGMEM = "/bkp.%s"; bool backupFile(const char* filename) { DEBUG_PRINTF("backup %s \n", filename); @@ -523,7 +523,7 @@ bool backupFile(const char* filename) { return false; } char backupname[32]; - snprintf(backupname, sizeof(backupname), "%s%s", s_backup_json, filename + 1); // skip leading '/' in filename + snprintf_P(backupname, sizeof(backupname), s_backup_fmt, filename + 1); // skip leading '/' in filename if (copyFile(filename, backupname)) { DEBUG_PRINTLN(F("backup ok")); @@ -536,7 +536,7 @@ bool backupFile(const char* filename) { bool restoreFile(const char* filename) { DEBUG_PRINTF("restore %s \n", filename); char backupname[32]; - snprintf(backupname, sizeof(backupname), "%s%s", s_backup_json, filename + 1); // skip leading '/' in filename + snprintf_P(backupname, sizeof(backupname), s_backup_fmt, filename + 1); // skip leading '/' in filename if (!WLED_FS.exists(backupname)) { DEBUG_PRINTLN(F("no backup found")); @@ -564,9 +564,9 @@ bool validateJsonFile(const char* filename) { bool result = deserializeJson(doc, file, DeserializationOption::Filter(filter)) == DeserializationError::Ok; file.close(); if (!result) { - DEBUG_PRINTF("Invalid JSON file %s\n", filename); + DEBUG_PRINTF_P(PSTR("Invalid JSON file %s\n"), filename); } else { - DEBUG_PRINTF("Valid JSON file %s\n", filename); + DEBUG_PRINTF_P(PSTR("Valid JSON file %s\n"), filename); } return result; } From e82b519c927ce5f74dbaac60321798cd9a3c4372 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Wed, 20 Aug 2025 10:22:42 -0400 Subject: [PATCH 08/14] Use direct references to RTC RAM on ESP8266 ESP8266 RTC RAM requires 32-bit accesses, but there's no need to jump through a bunch of functions for it. Use references to simplify access and harmonize the implementation with ESP32. --- wled00/util.cpp | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index 90e8fb7444..f72717dc8f 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -614,11 +614,15 @@ float mapf(float x, float in_min, float in_max, float out_min, float out_max) { #define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json) #define BOOTLOOP_ACTION_OTA 2 // swap the boot partition #define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset) + #ifdef ESP8266 #define BOOTLOOP_INTERVAL_TICKS (5 * 160000) // time limit between crashes: ~5 seconds in RTC ticks -#define BOOT_TIME_IDX 0 // index in RTC memory for boot time -#define CRASH_COUNTER_IDX 1 // index in RTC memory for crash counter -#define ACTIONT_TRACKER_IDX 2 // index in RTC memory for boot action +// Place variables in RTC memory via references, since RTC memory is not exposed via the linker in the Non-OS SDK +// Use an offset of 32 as there's some hints that the first 128 bytes of "user" memory are used by the OTA system +// Ref: https://github.com/esp8266/Arduino/blob/78d0d0aceacc1553f45ad8154592b0af22d1eede/cores/esp8266/Esp.cpp#L168 +static volatile uint32_t& bl_last_boottime = *(RTC_USER_MEM + 32); +static volatile uint32_t& bl_crashcounter = *(RTC_USER_MEM + 33); +static volatile uint32_t& bl_actiontracker = *(RTC_USER_MEM + 34); #else #define BOOTLOOP_INTERVAL_TICKS 5000 // time limit between crashes: ~5 seconds in milliseconds // variables in RTC_NOINIT memory persist between reboots (but not on hardware reset) @@ -665,33 +669,24 @@ static bool detectBootLoop() { } #else // ESP8266 rst_info* resetreason = system_get_rst_info(); - uint32_t bl_last_boottime; - uint32_t bl_crashcounter; - uint32_t bl_actiontracker; uint32_t rtctime = system_get_rtc_time(); if (!(resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST)) { // no crash detected, init variables bl_crashcounter = 0; - ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t)); - ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); + bl_last_boottime = rtctime; if(resetreason->reason != REASON_SOFT_RESTART) { bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler) - ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t)); } } else { // system has crashed - ESP.rtcUserMemoryRead(BOOT_TIME_IDX, &bl_last_boottime, sizeof(uint32_t)); - ESP.rtcUserMemoryRead(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); uint32_t rebootinterval = rtctime - bl_last_boottime; - ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t)); // store current ticks for next reboot + bl_last_boottime = rtctime; if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) { bl_crashcounter++; - ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { DEBUG_PRINTLN(F("BOOTLOOP DETECTED")); bl_crashcounter = 0; - ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t)); return true; } } @@ -701,12 +696,9 @@ static bool detectBootLoop() { } void handleBootLoop() { - DEBUG_PRINTLN(F("checking for bootloop")); + DEBUG_PRINTF_P(PSTR("checking for bootloop: time %d, counter %d, action %d\n"), bl_last_boottime, bl_crashcounter, bl_actiontracker); if (!detectBootLoop()) return; // no bootloop detected -#ifdef ESP8266 - uint32_t bl_actiontracker; - ESP.rtcUserMemoryRead(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t)); -#endif + if (bl_actiontracker == BOOTLOOP_ACTION_RESTORE) { restoreConfig(); // note: if this fails, could reset immediately. instead just let things play out and save a few lines of code bl_actiontracker = BOOTLOOP_ACTION_RESET; // reset config if it keeps bootlooping @@ -725,8 +717,6 @@ void handleBootLoop() { #endif else dumpFilesToSerial(); -#ifdef ESP8266 - ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t)); -#endif - ESP.restart(); // restart cleanly and don't wait for another crash + + ESP.restart(); // restart cleanly and don't wait for another crash } From 69263c198a102f1ea50c01c6c30a31469b39a651 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Wed, 20 Aug 2025 11:32:32 -0400 Subject: [PATCH 09/14] Isolate platform differences in bootloop check Separate the platform-specific code from the logic, so any future changes can be made in fewer places. --- wled00/util.cpp | 172 ++++++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 78 deletions(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index f72717dc8f..51998fab79 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -609,114 +609,130 @@ float mapf(float x, float in_min, float in_max, float out_min, float out_max) { // checks if the ESP reboots multiple times due to a crash or watchdog timeout // if a bootloop is detected: restore settings from backup, then reset settings, then switch boot image (and repeat) -#define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection -#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /bak.cfg.json -#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json) -#define BOOTLOOP_ACTION_OTA 2 // swap the boot partition -#define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset) +#define BOOTLOOP_INTERVAL_MILLIS 5000 // time limit between crashes: 5 seconds +#define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection +#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /bkp.cfg.json +#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json) +#define BOOTLOOP_ACTION_OTA 2 // swap the boot partition +#define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset) + +// Platform-agnostic abstraction +enum class ResetReason { + Power, + Software, + Crash, + Brownout +}; #ifdef ESP8266 -#define BOOTLOOP_INTERVAL_TICKS (5 * 160000) // time limit between crashes: ~5 seconds in RTC ticks // Place variables in RTC memory via references, since RTC memory is not exposed via the linker in the Non-OS SDK // Use an offset of 32 as there's some hints that the first 128 bytes of "user" memory are used by the OTA system // Ref: https://github.com/esp8266/Arduino/blob/78d0d0aceacc1553f45ad8154592b0af22d1eede/cores/esp8266/Esp.cpp#L168 static volatile uint32_t& bl_last_boottime = *(RTC_USER_MEM + 32); static volatile uint32_t& bl_crashcounter = *(RTC_USER_MEM + 33); static volatile uint32_t& bl_actiontracker = *(RTC_USER_MEM + 34); + +static inline ResetReason rebootReason() { + rst_info* resetreason = system_get_rst_info(); + if (resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST) return ResetReason::Crash; + if (resetreason->reason == REASON_SOFT_RESTART) return ResetReason::Software; + return ResetReason::Power; +} + +static inline uint32_t getRtcMillis() { return system_get_rtc_time() / 160; }; // rtc ticks ~160000Hz + #else -#define BOOTLOOP_INTERVAL_TICKS 5000 // time limit between crashes: ~5 seconds in milliseconds // variables in RTC_NOINIT memory persist between reboots (but not on hardware reset) RTC_NOINIT_ATTR static uint32_t bl_last_boottime; RTC_NOINIT_ATTR static uint32_t bl_crashcounter; RTC_NOINIT_ATTR static uint32_t bl_actiontracker; + +static inline ResetReason rebootReason() { + esp_reset_reason_t reason = esp_reset_reason(); + if (reason == ESP_RST_BROWNOUT) return ResetReason::Brownout; + if (reason == ESP_RST_SW) return ResetReason::Software; + if (reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT) return ResetReason::Crash; + return ResetReason::Power; +} + +#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) +static inline uint32_t getRtcMillis() { return esp_rtc_get_time_us() / 1000; } +#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(3, 3, 0) +static inline uint32_t getRtcMillis() { return rtc_time_slowclk_to_us(rtc_time_get(), rtc_clk_slow_freq_get_hz()) / 1000; } +#endif + void bootloopCheckOTA() { bl_actiontracker = BOOTLOOP_ACTION_OTA; } // swap boot image if bootloop is detected instead of restoring config + #endif // detect bootloop by checking the reset reason and the time since last boot static bool detectBootLoop() { -#if !defined(ESP8266) - #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0) - uint32_t rtctime = esp_rtc_get_time_us() / 1000; // convert to milliseconds - #elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(3, 3, 0) - uint64_t rtc_ticks = rtc_time_get(); - uint32_t rtctime = rtc_time_slowclk_to_us(rtc_ticks, rtc_clk_slow_freq_get_hz()) / 1000; // convert to milliseconds - #endif - - esp_reset_reason_t reason = esp_reset_reason(); + uint32_t rtctime = getRtcMillis(); + bool result = false; - if (!(reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT)) { - // no crash detected, init variables - bl_crashcounter = 0; - bl_last_boottime = rtctime; - if(reason != ESP_RST_SW) - bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler) - } else if (reason == ESP_RST_BROWNOUT) { - // crash due to brownout can't be detected unless using flash memory to store bootloop variables - // this is a simpler way to preemtively revert the config in case current brownout is caused by a bad choice of settings - DEBUG_PRINTLN(F("brownout detected")); - //restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all) - } else { - uint32_t rebootinterval = rtctime - bl_last_boottime; - bl_last_boottime = rtctime; // store current runtime for next reboot - if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) { - bl_crashcounter++; - if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { - DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!")); - bl_crashcounter = 0; - return true; - } - } - } -#else // ESP8266 - rst_info* resetreason = system_get_rst_info(); - uint32_t rtctime = system_get_rtc_time(); - - if (!(resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST)) { - // no crash detected, init variables - bl_crashcounter = 0; - bl_last_boottime = rtctime; - if(resetreason->reason != REASON_SOFT_RESTART) { + switch(rebootReason()) { + case ResetReason::Power: bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler) - } - } else { - // system has crashed - uint32_t rebootinterval = rtctime - bl_last_boottime; - bl_last_boottime = rtctime; - if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) { - bl_crashcounter++; - if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { - DEBUG_PRINTLN(F("BOOTLOOP DETECTED")); - bl_crashcounter = 0; - return true; + // fall through + case ResetReason::Software: + // no crash detected, reset counter + bl_crashcounter = 0; + break; + + case ResetReason::Crash: + { + uint32_t rebootinterval = rtctime - bl_last_boottime; + if (rebootinterval < BOOTLOOP_INTERVAL_MILLIS) { + bl_crashcounter++; + if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { + DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!")); + bl_crashcounter = 0; + result = true; + } } + break; } + + case ResetReason::Brownout: + // crash due to brownout can't be detected unless using flash memory to store bootloop variables + DEBUG_PRINTLN(F("brownout detected")); + //restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all) + break; } -#endif - return false; // no bootloop detected + + bl_last_boottime = rtctime; // store current runtime for next reboot + + return result; } void handleBootLoop() { DEBUG_PRINTF_P(PSTR("checking for bootloop: time %d, counter %d, action %d\n"), bl_last_boottime, bl_crashcounter, bl_actiontracker); if (!detectBootLoop()) return; // no bootloop detected - if (bl_actiontracker == BOOTLOOP_ACTION_RESTORE) { - restoreConfig(); // note: if this fails, could reset immediately. instead just let things play out and save a few lines of code - bl_actiontracker = BOOTLOOP_ACTION_RESET; // reset config if it keeps bootlooping - } else if (bl_actiontracker == BOOTLOOP_ACTION_RESET) { - resetConfig(); - bl_actiontracker = BOOTLOOP_ACTION_OTA; // swap boot partition if it keeps bootlooping. On ESP8266 this is the same as BOOTLOOP_ACTION_NONE - } + switch(bl_actiontracker) { + case BOOTLOOP_ACTION_RESTORE: + restoreConfig(); + ++bl_actiontracker; + break; + case BOOTLOOP_ACTION_RESET: + resetConfig(); + ++bl_actiontracker; + break; + case BOOTLOOP_ACTION_OTA: #ifndef ESP8266 - else if (bl_actiontracker == BOOTLOOP_ACTION_OTA) { - if(Update.canRollBack()) { - DEBUG_PRINTLN(F("Swapping boot partition...")); - Update.rollBack(); // swap boot partition - } - bl_actiontracker = BOOTLOOP_ACTION_DUMP; // out of options + if(Update.canRollBack()) { + DEBUG_PRINTLN(F("Swapping boot partition...")); + Update.rollBack(); // swap boot partition + } + ++bl_actiontracker; + break; +#else + // fall through +#endif + case BOOTLOOP_ACTION_DUMP: + dumpFilesToSerial(); + break; } - #endif - else - dumpFilesToSerial(); - ESP.restart(); // restart cleanly and don't wait for another crash + ESP.restart(); // restart cleanly and don't wait for another crash } From 430a82af85ce84ebb429c6077c87559fd6ca835d Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 26 Aug 2025 21:00:54 -0400 Subject: [PATCH 10/14] Bootloop: Include soft wdt on ESP8266 --- wled00/util.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index 51998fab79..2446c61a5e 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -633,9 +633,13 @@ static volatile uint32_t& bl_crashcounter = *(RTC_USER_MEM + 33); static volatile uint32_t& bl_actiontracker = *(RTC_USER_MEM + 34); static inline ResetReason rebootReason() { - rst_info* resetreason = system_get_rst_info(); - if (resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST) return ResetReason::Crash; - if (resetreason->reason == REASON_SOFT_RESTART) return ResetReason::Software; + uint32_t resetReason = system_get_rst_info()->reason; + if (resetReason == REASON_EXCEPTION_RST + || resetReason == REASON_WDT_RST + || resetReason == REASON_SOFT_WDT_RST) + return ResetReason::Crash; + if (resetReason == REASON_SOFT_RESTART) + return ResetReason::Software; return ResetReason::Power; } From ce5f6d7019bb7cb99d8feef8900f54ce639bac9a Mon Sep 17 00:00:00 2001 From: Will Miles Date: Thu, 28 Aug 2025 21:10:20 -0400 Subject: [PATCH 11/14] Reset crash counter after long interval Don't treat consecutive but infrequent crashes as bootloops. The bootloop recovery actions only make sense when there is no opportunity for a user to reconfigure their system. Suggested by @coderabbitai --- wled00/util.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index 2446c61a5e..0abe02814d 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -693,7 +693,11 @@ static bool detectBootLoop() { bl_crashcounter = 0; result = true; } - } + } else { + // Reset counter on long intervals to track only consecutive short-interval crashes + bl_crashcounter = 0; + // TODO: crash reporting goes here + } break; } From d935975ec1828afaca34aa185e900cf24dd45eed Mon Sep 17 00:00:00 2001 From: Will Miles Date: Thu, 28 Aug 2025 21:17:12 -0400 Subject: [PATCH 12/14] Increase boot loop timeout Any repeating crash that prevents a human from logging in and fixing the config should be treated as a boot loop. Increase the detection timeout, so anything that's fast enough to preclude a user fix will trigger the recovery behaviour. --- wled00/util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index 0abe02814d..9e113fb43e 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -609,7 +609,7 @@ float mapf(float x, float in_min, float in_max, float out_min, float out_max) { // checks if the ESP reboots multiple times due to a crash or watchdog timeout // if a bootloop is detected: restore settings from backup, then reset settings, then switch boot image (and repeat) -#define BOOTLOOP_INTERVAL_MILLIS 5000 // time limit between crashes: 5 seconds +#define BOOTLOOP_INTERVAL_MILLIS 120000 // time limit between crashes: 120 seconds (2 minutes) #define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection #define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /bkp.cfg.json #define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /rst.cfg.json) From d12bf77831a8dd743da6e768e063e5e517eadcfe Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Thu, 9 Oct 2025 22:06:17 +0200 Subject: [PATCH 13/14] add out of bound check for action tracker In my test on ESP32 the tracker startet out as a huge number, this brings it back on track --- wled00/util.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/wled00/util.cpp b/wled00/util.cpp index 9e113fb43e..301b037b70 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -632,8 +632,8 @@ static volatile uint32_t& bl_last_boottime = *(RTC_USER_MEM + 32); static volatile uint32_t& bl_crashcounter = *(RTC_USER_MEM + 33); static volatile uint32_t& bl_actiontracker = *(RTC_USER_MEM + 34); -static inline ResetReason rebootReason() { - uint32_t resetReason = system_get_rst_info()->reason; +static inline ResetReason rebootReason() { + uint32_t resetReason = system_get_rst_info()->reason; if (resetReason == REASON_EXCEPTION_RST || resetReason == REASON_WDT_RST || resetReason == REASON_SOFT_WDT_RST) @@ -674,7 +674,7 @@ static bool detectBootLoop() { uint32_t rtctime = getRtcMillis(); bool result = false; - switch(rebootReason()) { + switch(rebootReason()) { case ResetReason::Power: bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler) // fall through @@ -691,23 +691,24 @@ static bool detectBootLoop() { if (bl_crashcounter >= BOOTLOOP_THRESHOLD) { DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!")); bl_crashcounter = 0; + if(bl_actiontracker > BOOTLOOP_ACTION_DUMP) bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // reset action tracker if out of bounds result = true; } } else { // Reset counter on long intervals to track only consecutive short-interval crashes bl_crashcounter = 0; // TODO: crash reporting goes here - } + } break; } - + case ResetReason::Brownout: // crash due to brownout can't be detected unless using flash memory to store bootloop variables DEBUG_PRINTLN(F("brownout detected")); //restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all) break; } - + bl_last_boottime = rtctime; // store current runtime for next reboot return result; From 7d0a33805822484b95a5b28b46f688fe75435ac3 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Sat, 8 Nov 2025 20:27:00 +0100 Subject: [PATCH 14/14] re-adding bootloop handling to ota_update --- wled00/ota_update.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wled00/ota_update.cpp b/wled00/ota_update.cpp index b678def1c9..2f3d6145e6 100644 --- a/wled00/ota_update.cpp +++ b/wled00/ota_update.cpp @@ -73,6 +73,9 @@ static void endOTA(AsyncWebServerRequest *request) { // If the upload is incomplete, Update.end(false) should error out. if (Update.end(context->uploadComplete)) { // Update successful! + #ifndef ESP8266 + bootloopCheckOTA(); // let the bootloop-checker know there was an OTA update + #endif doReboot = true; context->needsRestart = false; } @@ -109,6 +112,7 @@ static bool beginOTA(AsyncWebServerRequest *request, UpdateContext* context) strip.suspend(); strip.resetSegments(); // free as much memory as you can context->needsRestart = true; + backupConfig(); // backup current config in case the update ends badly DEBUG_PRINTF_P(PSTR("OTA Update Start, %x --> %x\n"), (uintptr_t)request,(uintptr_t) context);