ESPHome 2026.3.0
Loading...
Searching...
No Matches
crash_handler.cpp
Go to the documentation of this file.
1#ifdef USE_RP2040
4#ifdef USE_RP2040_CRASH_HANDLER
6#include "crash_handler.h"
7#include "esphome/core/log.h"
8
9#include <cinttypes>
10#include <hardware/regs/addressmap.h>
11#include <hardware/structs/watchdog.h>
12#include <hardware/watchdog.h>
13
14// Cortex-M0+ exception frame offsets (words)
15// When a fault occurs, the CPU pushes: R0, R1, R2, R3, R12, LR, PC, xPSR
16static constexpr uint32_t EF_LR = 5;
17static constexpr uint32_t EF_PC = 6;
18
19// Version encoded in the magic value: upper 16 bits are sentinel (0xDEAD),
20// lower 16 bits are the version number. This avoids using a separate scratch
21// register for versioning (we only have 8 total). Future firmware reads the
22// sentinel to confirm it's crash data, then the version to know the layout.
23static constexpr uint32_t CRASH_MAGIC_SENTINEL = 0xDEAD0000;
24static constexpr uint32_t CRASH_DATA_VERSION = 1;
25static constexpr uint32_t CRASH_MAGIC_V1 = CRASH_MAGIC_SENTINEL | CRASH_DATA_VERSION;
26
27// We only have 8 scratch registers (32 bytes) that survive watchdog reboot.
28// Use them for the most important data, then scan the stack for code addresses.
29//
30// Scratch register layout:
31// [0] = versioned magic (upper 16 bits = 0xDEAD sentinel, lower 16 bits = version)
32// [1] = PC (program counter at fault)
33// [2] = LR (link register from exception frame)
34// [3] = SP (stack pointer at fault)
35// [4..7] = up to 4 additional code addresses found by scanning the stack
36// (return addresses from callers, giving a deeper backtrace)
37
38// Flash is mapped at XIP_BASE (0x10000000). We use a conservative upper bound
39// to keep false positives low during stack scanning. Wider ranges would match
40// more stale data on the stack that happens to look like code addresses.
41#if defined(PICO_RP2350)
42static constexpr uint32_t FLASH_SCAN_END = XIP_BASE + 0x400000; // 4MB — RP2350 typical max
43#else
44static constexpr uint32_t FLASH_SCAN_END = XIP_BASE + 0x200000; // 2MB — RP2040 typical max
45#endif
46
47static inline bool is_code_addr(uint32_t val) {
48 uint32_t cleared = val & ~1u; // Clear Thumb bit
49 return cleared >= XIP_BASE && cleared < FLASH_SCAN_END;
50}
51
52static constexpr size_t MAX_BACKTRACE = 4;
53
54namespace esphome::rp2040 {
55
56static const char *const TAG = "rp2040.crash";
57
58// Placed in .noinit so BSS zero-init cannot race with crash_handler_read_and_clear().
59// The valid field is explicitly cleared in crash_handler_read_and_clear() instead.
60static struct CrashData {
61 bool valid;
62 uint32_t pc;
63 uint32_t lr;
64 uint32_t sp;
65 uint32_t backtrace[MAX_BACKTRACE];
66 uint8_t backtrace_count;
67} s_crash_data __attribute__((section(".noinit")));
68
69bool crash_handler_has_data() { return s_crash_data.valid; }
70
72 s_crash_data.valid = false;
73 uint32_t magic = watchdog_hw->scratch[0];
74 if ((magic & 0xFFFF0000) == CRASH_MAGIC_SENTINEL && (magic & 0xFFFF) == CRASH_DATA_VERSION) {
75 s_crash_data.valid = true;
76 s_crash_data.pc = watchdog_hw->scratch[1];
77 s_crash_data.lr = watchdog_hw->scratch[2];
78 s_crash_data.sp = watchdog_hw->scratch[3];
79 s_crash_data.backtrace_count = 0;
80 for (size_t i = 0; i < MAX_BACKTRACE; i++) {
81 uint32_t addr = watchdog_hw->scratch[4 + i];
82 if (addr == 0)
83 break;
84 s_crash_data.backtrace[i] = addr;
85 s_crash_data.backtrace_count++;
86 }
87 }
88 // Clear scratch registers regardless
89 for (int i = 0; i < 8; i++) {
90 watchdog_hw->scratch[i] = 0;
91 }
92}
93
94// Intentionally uses separate ESP_LOGE calls per line instead of combining into
95// one multi-line log message. This ensures each address appears as its own line
96// on the serial console (miniterm), making it possible to see partial output if
97// the device crashes again during boot, and allowing the CLI's process_stacktrace
98// to match and decode each address individually.
100 if (!s_crash_data.valid)
101 return;
102
103 ESP_LOGE(TAG, "*** CRASH DETECTED ON PREVIOUS BOOT ***");
104 ESP_LOGE(TAG, " PC: 0x%08" PRIX32 " (fault location)", s_crash_data.pc);
105 ESP_LOGE(TAG, " LR: 0x%08" PRIX32 " (return address)", s_crash_data.lr);
106 ESP_LOGE(TAG, " SP: 0x%08" PRIX32, s_crash_data.sp);
107 for (uint8_t i = 0; i < s_crash_data.backtrace_count; i++) {
108 ESP_LOGE(TAG, " BT%d: 0x%08" PRIX32 " (stack backtrace)", i, s_crash_data.backtrace[i]);
109 }
110 // Build addr2line hint with all captured addresses for easy copy-paste
111 char hint[160];
112 int pos = snprintf(hint, sizeof(hint), "Use: addr2line -pfiaC -e firmware.elf 0x%08" PRIX32 " 0x%08" PRIX32,
113 s_crash_data.pc, s_crash_data.lr);
114 for (uint8_t i = 0; i < s_crash_data.backtrace_count && pos < (int) sizeof(hint) - 12; i++) {
115 pos += snprintf(hint + pos, sizeof(hint) - pos, " 0x%08" PRIX32, s_crash_data.backtrace[i]);
116 }
117 ESP_LOGE(TAG, "%s", hint);
118}
119
120} // namespace esphome::rp2040
121
122// --- HardFault handler ---
123// Overrides the weak isr_hardfault from arduino-pico's crt0.S.
124// On Cortex-M0+, the CPU pushes {R0,R1,R2,R3,R12,LR,PC,xPSR} onto the
125// active stack (MSP or PSP). We determine which stack was active,
126// extract key registers, store them in watchdog scratch registers
127// (which survive watchdog reboot), then trigger a reboot.
128
129// Check if a pointer falls within SRAM (valid for stack access).
130// SRAM_BASE and SRAM_END are chip-specific SDK defines:
131// RP2040: 0x20000000 - 0x20042000 (264KB)
132// RP2350: 0x20000000 - 0x20082000 (520KB)
133static inline bool is_valid_sram_ptr(const uint32_t *ptr) {
134 auto addr = reinterpret_cast<uintptr_t>(ptr);
135 // Exception frame is 8 words (32 bytes), so frame+7 must also be in SRAM.
136 // Check alignment (must be word-aligned) and that the full frame fits.
137 return (addr % 4 == 0) && addr >= SRAM_BASE && (addr + 32) <= SRAM_END;
138}
139
140// C handler called from the asm wrapper with the exception frame pointer.
141static void __attribute__((used, noreturn)) hard_fault_handler_c(uint32_t *frame, uint32_t /*exc_return*/) {
142 // watchdog_reboot() overwrites scratch[4]-[7], so we must call it first
143 // then write ALL our data after. The 10ms timeout gives us plenty of time.
144 watchdog_reboot(0, 0, 10);
145
146 // Validate frame pointer before dereferencing. If the HardFault was caused
147 // by a stacking error or corrupted SP, frame may be invalid. Write a minimal
148 // crash marker so we at least know a crash occurred.
149 if (!is_valid_sram_ptr(frame)) {
150 watchdog_hw->scratch[0] = CRASH_MAGIC_V1;
151 watchdog_hw->scratch[1] = 0; // PC unknown
152 watchdog_hw->scratch[2] = 0; // LR unknown
153 watchdog_hw->scratch[3] = reinterpret_cast<uintptr_t>(frame); // Record the bad SP for diagnosis
154 for (uint32_t i = 0; i < MAX_BACKTRACE; i++) {
155 watchdog_hw->scratch[4 + i] = 0;
156 }
157 while (true) {
158 __asm volatile("nop");
159 }
160 }
161
162 // Pre-fault SP: the exception frame is 8 words pushed onto the stack,
163 // so the SP before the fault was frame + 8 words. If xPSR bit 9 is set,
164 // the hardware pushed an extra alignment word to maintain 8-byte stack
165 // alignment (ARMv6-M/ARMv7-M spec), so add 1 more word.
166 static constexpr uint32_t EF_XPSR = 7;
167 uint32_t extra_align = (frame[EF_XPSR] & (1u << 9)) ? 1 : 0;
169 uint32_t pre_fault_sp = reinterpret_cast<uintptr_t>(post_frame);
170
171 // Write key registers
172 watchdog_hw->scratch[0] = CRASH_MAGIC_V1;
173 watchdog_hw->scratch[1] = frame[EF_PC];
174 watchdog_hw->scratch[2] = frame[EF_LR];
175 watchdog_hw->scratch[3] = pre_fault_sp;
176
177 // Scan stack for code addresses to build a deeper backtrace.
178 // The exception frame is 8 words (32 bytes) at 'frame', plus an optional
179 // alignment word. Walk up to 64 words looking for return addresses.
181 // SRAM_END is chip-specific: 0x20042000 (RP2040) or 0x20082000 (RP2350)
182 uint32_t *stack_top = reinterpret_cast<uint32_t *>(SRAM_END);
183 // Scan up to 64 words (256 bytes) — covers typical nested call frames
184 // without scanning too much stale stack data that could produce false positives.
186
187 for (uint32_t *p = scan_start; p < stack_top && p < scan_start + 64 && bt_count < MAX_BACKTRACE; p++) {
188 uint32_t val = *p;
189 // Check if this looks like a code address in flash
190 // Skip if it's the same as PC or LR we already saved
191 if (is_code_addr(val) && val != frame[EF_PC] && val != frame[EF_LR]) {
192 watchdog_hw->scratch[4 + bt_count] = val;
193 bt_count++;
194 }
195 }
196 // Zero remaining slots
197 for (uint32_t i = bt_count; i < MAX_BACKTRACE; i++) {
198 watchdog_hw->scratch[4 + i] = 0;
199 }
200
201 while (true) {
202 __asm volatile("nop");
203 }
204}
205
206// Naked asm wrapper - Cortex-M0+ compatible (no ITE/conditional execution).
207// Determines active stack pointer and branches to C handler.
208// Uses literal pool (.word) for addresses since M0+ has limited immediate encoding.
209//
210// Based on the standard Cortex-M0+ HardFault handler pattern described in:
211// - ARM Application Note AN209: "Using Cortex-M3/M4/M7 Fault Exceptions"
212// (adapted for M0+ which lacks conditional execution instructions)
213// - Memfault: "How to debug a HardFault on an ARM Cortex-M MCU"
214// https://interrupt.memfault.com/blog/cortex-m-hardfault-debug
215// - Raspberry Pi Forums: "Cortex-M0+ Hard Fault handler porting"
216// https://www.eevblog.com/forum/microcontrollers/cortex-m0-hard-fault-handler-porting/
217//
218// The key M0+ adaptation: replaces ITE/MRSEQ/MRSNE (Cortex-M3+) with
219// MOVS+TST+BEQ branch sequence, and uses a literal pool for the C handler address.
220extern "C" void __attribute__((naked, used)) isr_hardfault() {
221 __asm volatile("movs r0, #4 \n" // Prepare bit 2 mask
222 "mov r1, lr \n" // r1 = EXC_RETURN
223 "tst r1, r0 \n" // Test bit 2
224 "beq 1f \n" // If 0, was using MSP
225 "mrs r0, psp \n" // Bit 2 set = PSP was active
226 "b 2f \n"
227 "1: \n"
228 "mrs r0, msp \n" // Bit 2 clear = MSP was active
229 "2: \n"
230 // r0 = exception frame pointer, r1 = EXC_RETURN (still in r1)
231 "ldr r2, 3f \n" // Load C handler address from literal pool
232 "bx r2 \n" // Branch to handler (r0=frame, r1=exc_return)
233 ".align 2 \n"
234 "3: .word %c0 \n" // Literal pool: address of C handler
235 :
236 : "i"(hard_fault_handler_c));
237}
238
239#endif // USE_RP2040_CRASH_HANDLER
240#endif // USE_RP2040
struct @65::@66 __attribute__
mopeka_std_values val[3]
void crash_handler_log()
Log crash data if a crash was detected on previous boot.
bool crash_handler_has_data()
Returns true if crash data was found this boot.
void crash_handler_read_and_clear()
Read crash data from watchdog scratch registers and clear them.
size_t size_t pos
Definition helpers.h:929
uint32_t * scan_start
static void uint32_t
uint32_t * stack_top
uint32_t bt_count
uint32_t extra_align
uint32_t pre_fault_sp
uint32_t * post_frame