Skip to content

Commit ae8761f

Browse files
committed
drivers: nrf_mram: verify writes by detecting Bus faults
Fix the case where the MRAM writes get corrupted in some conditions. After each "MRAM_WORD_SIZE write", a read of the written/erased MRAM word is performed while masking the bus faults to avoid halting the system. If an error is detected in the BFSR status bits, retry the write for a maximum of CONFIG_NRF_MRAM_MAX_RETRIES (default 20) times. Signed-off-by: Riadh Ghaddab <riadh.ghaddab@nordicsemi.no>
1 parent 09a2768 commit ae8761f

2 files changed

Lines changed: 180 additions & 44 deletions

File tree

drivers/flash/Kconfig.nrf_mram

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ config SOC_FLASH_NRF_MRAM
88
bool "Nordic Semiconductor flash driver for MRAM"
99
default y
1010
depends on DT_HAS_NORDIC_MRAM_ENABLED
11-
select NRFX_MRAMC
11+
depends on HAS_NRFX
1212
select FLASH_HAS_DRIVER_ENABLED
1313
select FLASH_HAS_PAGE_LAYOUT
1414
select FLASH_HAS_NO_EXPLICIT_ERASE
@@ -21,3 +21,15 @@ config SOC_FLASH_NRF_MRAM
2121
Note that MRAM words are auto-erased when written to, but writing to a
2222
pre-erased area is faster. Hence, the erase API is not required, but
2323
it can be used to amortize write performance for some use cases.
24+
25+
config NRF_MRAM_MAX_RETRIES
26+
int "Maximum number of retries for MRAM write/erase operations"
27+
default 20
28+
range 0 255
29+
depends on SOC_FLASH_NRF_MRAM
30+
help
31+
This option sets the maximum number of retry attempts for MRAM
32+
write and erase operations when verification fails. Each retry
33+
attempts to rewrite or re-erase the data to ensure reliability.
34+
Higher values increase reliability but may impact performance
35+
in case of persistent failures.

drivers/flash/soc_flash_nrf_mram.c

Lines changed: 167 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include <zephyr/logging/log.h>
1111
#include <zephyr/sys/barrier.h>
1212
#include <zephyr/cache.h>
13-
#include <nrfx_mramc.h>
1413
#include <ironside/se/versions.h>
1514
#if defined(CONFIG_MRAM_LATENCY)
1615
#include "../soc/nordic/common/mram_latency.h"
@@ -32,6 +31,13 @@ LOG_MODULE_REGISTER(flash_nrf_mram, CONFIG_FLASH_LOG_LEVEL);
3231
#define WRITE_BLOCK_SIZE DT_INST_PROP_OR(0, write_block_size, MRAM_WORD_SIZE)
3332
#define ERASE_BLOCK_SIZE DT_INST_PROP_OR(0, erase_block_size, WRITE_BLOCK_SIZE)
3433

34+
#if (WRITE_BLOCK_SIZE & MRAM_WORD_MASK)
35+
#warning "write-block-size is not a multiple of 16 bytes. \
36+
This may lead to data corruption in corner cases where partial writes to MRAM words are needed. \
37+
If a partial write fails and a read-modify-write is needed for recovery, the read may return \
38+
corrupted data due to the previous corrupted write, which can cause the recovery to fail as well."
39+
#endif
40+
3541
#define ERASE_VALUE 0xff
3642

3743
#define SOC_NRF_MRAM_BANK_11_OFFSET (MRAM_SIZE / 2)
@@ -45,8 +51,123 @@ BUILD_ASSERT((ERASE_BLOCK_SIZE % WRITE_BLOCK_SIZE) == 0,
4551

4652
struct nrf_mram_data_t {
4753
uint8_t ironside_se_ver;
54+
struct k_mutex nrf_mram_mutex;
4855
};
4956

57+
/**
58+
* Safely probes one MRAM word to detect a BusFault.
59+
*
60+
* The function temporarily masks fault handling and ignores BusFault escalation
61+
* while reading one aligned MRAM word. It then checks and clears BFSR status
62+
* bits to determine whether the read faulted.
63+
*
64+
* @param addr Address to probe. The read is performed on the 16-byte-aligned
65+
* base address that contains this location.
66+
*
67+
* @retval 0 Read completed without BusFault.
68+
* @retval non-zero BusFault was observed during the read.
69+
*/
70+
uint32_t nrf_mram_safe_read_word(uint32_t addr)
71+
{
72+
uint8_t rbuf[MRAM_WORD_SIZE];
73+
uint32_t faulted;
74+
unsigned int irq_lock_key = irq_lock();
75+
76+
/* Clear any pre-existing BusFault status bits (write-1-to-clear) */
77+
SCB->CFSR = SCB_CFSR_BUSFAULTSR_Msk; /* 0x0000FF00 — entire BFSR byte */
78+
79+
__set_FAULTMASK(1);
80+
SCB->CCR |= SCB_CCR_BFHFNMIGN_Msk;
81+
82+
/* Ensure that all operations are in effect before doing the risky read */
83+
barrier_dsync_fence_full();
84+
barrier_isync_fence_full();
85+
86+
/* Read operation that may fault */
87+
memcpy(rbuf, (void *)(addr & ~MRAM_WORD_MASK), MRAM_WORD_SIZE);
88+
89+
/* DSB ensures the load and any resulting fault status write complete */
90+
barrier_dsync_fence_full();
91+
92+
/* Read fault status BEFORE re-enabling faults */
93+
faulted = (SCB->CFSR & SCB_CFSR_BUSFAULTSR_Msk) != 0;
94+
95+
/* Clear whatever was recorded so we don't leave stale status */
96+
SCB->CFSR = SCB_CFSR_BUSFAULTSR_Msk;
97+
98+
__set_FAULTMASK(0);
99+
SCB->CCR &= ~SCB_CCR_BFHFNMIGN_Msk;
100+
101+
barrier_dsync_fence_full();
102+
barrier_isync_fence_full();
103+
104+
irq_unlock(irq_lock_key);
105+
106+
return faulted; /* 0 = read succeeded, non-zero = bus fault occurred */
107+
}
108+
109+
static int nrf_mram_write_and_verify(uint32_t addr, const void *data, size_t len)
110+
{
111+
uint8_t retries = CONFIG_NRF_MRAM_MAX_RETRIES;
112+
#if (WRITE_BLOCK_SIZE & MRAM_WORD_MASK)
113+
uint8_t rbuf[MRAM_WORD_SIZE];
114+
115+
if (len % MRAM_WORD_SIZE) {
116+
/* For partial word writes, we need to read the existing data first to avoid
117+
* overwriting the unwritten bytes in the same word.
118+
*/
119+
memcpy(rbuf + len, (void *)(addr + len), MRAM_WORD_SIZE - len);
120+
memcpy(rbuf, data, len);
121+
data = rbuf;
122+
len = MRAM_WORD_SIZE;
123+
}
124+
#endif
125+
126+
while (retries--) {
127+
memcpy((void *)addr, data, len);
128+
if (!nrf_mram_safe_read_word(addr)) {
129+
return 0;
130+
}
131+
LOG_ERR("MRAM write verification failed at address 0x%x, retrying... (%u retries "
132+
"left)",
133+
addr, retries);
134+
}
135+
136+
return -EIO;
137+
}
138+
139+
static int nrf_mram_erase_and_verify(uint32_t addr, size_t len)
140+
{
141+
uint8_t retries = CONFIG_NRF_MRAM_MAX_RETRIES;
142+
#if (WRITE_BLOCK_SIZE & MRAM_WORD_MASK)
143+
uint8_t rbuf[MRAM_WORD_SIZE];
144+
145+
if (len % MRAM_WORD_SIZE) {
146+
/* For partial word writes, we need to read the existing data first to avoid
147+
* overwriting the unwritten bytes in the same word.
148+
*/
149+
memcpy(rbuf, (void *)(addr + len), MRAM_WORD_SIZE - len);
150+
}
151+
#endif
152+
153+
while (retries--) {
154+
memset((void *)addr, ERASE_VALUE, len);
155+
#if (WRITE_BLOCK_SIZE & MRAM_WORD_MASK)
156+
if (len % MRAM_WORD_SIZE) {
157+
memcpy((void *)addr, rbuf, MRAM_WORD_SIZE - len);
158+
}
159+
#endif
160+
if (!nrf_mram_safe_read_word(addr)) {
161+
return 0;
162+
}
163+
LOG_ERR("MRAM erase verification failed at address 0x%x, retrying... (%u retries "
164+
"left)",
165+
addr, retries);
166+
}
167+
168+
return -EIO;
169+
}
170+
50171
#ifdef CONFIG_MRAM_LATENCY
51172
static inline bool nrf_mram_ready(uint32_t addr, uint8_t ironside_se_ver)
52173
{
@@ -55,9 +176,9 @@ static inline bool nrf_mram_ready(uint32_t addr, uint8_t ironside_se_ver)
55176
}
56177

57178
if (addr < SOC_NRF_MRAM_BANK_11_ADDRESS) {
58-
return nrf_mramc_ready_get(NRF_MRAMC110);
179+
return (bool)NRF_MRAMC110->READY;
59180
} else {
60-
return nrf_mramc_ready_get(NRF_MRAMC111);
181+
return (bool)NRF_MRAMC111->READY;
61182
}
62183
}
63184
#else
@@ -86,7 +207,7 @@ static uintptr_t validate_and_map_addr(off_t offset, size_t len, bool must_align
86207

87208
const uintptr_t addr = MRAM_START + offset;
88209

89-
if (WRITE_BLOCK_SIZE > 1 && must_align &&
210+
if ((WRITE_BLOCK_SIZE > 1) && must_align &&
90211
unlikely((addr % WRITE_BLOCK_SIZE) != 0 || (len % WRITE_BLOCK_SIZE) != 0)) {
91212
LOG_ERR("invalid alignment: %p:%zu", (void *)addr, len);
92213
return 0;
@@ -95,33 +216,6 @@ static uintptr_t validate_and_map_addr(off_t offset, size_t len, bool must_align
95216
return addr;
96217
}
97218

98-
/**
99-
* @param[in] addr_end Last modified MRAM address (not inclusive).
100-
*/
101-
static void commit_changes(uintptr_t addr_end)
102-
{
103-
/* Barrier following our last write. */
104-
barrier_dmem_fence_full();
105-
106-
if ((WRITE_BLOCK_SIZE & MRAM_WORD_MASK) == 0 || (addr_end & MRAM_WORD_MASK) == 0) {
107-
/* Our last operation was MRAM word-aligned, so we're done.
108-
* Note: if WRITE_BLOCK_SIZE is a multiple of MRAM_WORD_SIZE,
109-
* then this was already checked in validate_and_map_addr().
110-
*/
111-
return;
112-
}
113-
114-
/* Get the most significant byte (MSB) of the last MRAM word we were modifying.
115-
* Writing to this byte makes the MRAM controller commit other pending writes to that word.
116-
*/
117-
addr_end |= MRAM_WORD_MASK;
118-
119-
/* Issue a dummy write, since we didn't have anything to write here.
120-
* Doing this lets us finalize our changes before we exit the driver API.
121-
*/
122-
sys_write8(sys_read8(addr_end), addr_end);
123-
}
124-
125219
static int nrf_mram_read(const struct device *dev, off_t offset, void *data, size_t len)
126220
{
127221
ARG_UNUSED(dev);
@@ -143,6 +237,7 @@ static int nrf_mram_write(const struct device *dev, off_t offset, const void *da
143237
{
144238
struct nrf_mram_data_t *nrf_mram_data = dev->data;
145239
uint8_t ironside_se_ver = nrf_mram_data->ironside_se_ver;
240+
int ret = 0;
146241

147242
const uintptr_t addr = validate_and_map_addr(offset, len, true);
148243

@@ -152,40 +247,54 @@ static int nrf_mram_write(const struct device *dev, off_t offset, const void *da
152247

153248
LOG_DBG("write: %p:%zu", (void *)addr, len);
154249

250+
k_mutex_lock(&nrf_mram_data->nrf_mram_mutex, K_FOREVER);
251+
155252
if (ironside_se_ver >= IRONSIDE_SE_SUPPORT_READY_VER) {
156253
#if defined(CONFIG_MRAM_LATENCY)
157254
mram_no_latency_sync_request();
158255
#endif
159256
}
257+
160258
for (uint32_t i = 0; i < (len / MRAM_WORD_SIZE); i++) {
161259
while (!nrf_mram_ready(addr + (i * MRAM_WORD_SIZE), ironside_se_ver)) {
162260
/* Wait until MRAM controller is ready */
163261
}
164-
memcpy((void *)(addr + (i * MRAM_WORD_SIZE)),
165-
(void *)((uintptr_t)data + (i * MRAM_WORD_SIZE)), MRAM_WORD_SIZE);
262+
ret = nrf_mram_write_and_verify(addr + (i * MRAM_WORD_SIZE),
263+
(void *)((uintptr_t)data + (i * MRAM_WORD_SIZE)),
264+
MRAM_WORD_SIZE);
265+
if (ret) {
266+
goto unlock;
267+
}
166268
}
167-
269+
#if (WRITE_BLOCK_SIZE & MRAM_WORD_MASK)
168270
if (len % MRAM_WORD_SIZE) {
169271
while (!nrf_mram_ready(addr + (len & ~MRAM_WORD_MASK), ironside_se_ver)) {
170272
/* Wait until MRAM controller is ready */
171273
}
172-
memcpy((void *)(addr + (len & ~MRAM_WORD_MASK)),
173-
(void *)((uintptr_t)data + (len & ~MRAM_WORD_MASK)), len & MRAM_WORD_MASK);
274+
ret = nrf_mram_write_and_verify(addr + (len & ~MRAM_WORD_MASK),
275+
(void *)((uintptr_t)data + (len & ~MRAM_WORD_MASK)),
276+
len & MRAM_WORD_MASK);
277+
if (ret) {
278+
goto unlock;
279+
}
174280
}
175-
commit_changes(addr + len);
281+
#endif
176282
if (ironside_se_ver >= IRONSIDE_SE_SUPPORT_READY_VER) {
177283
#if defined(CONFIG_MRAM_LATENCY)
178284
mram_no_latency_sync_release();
179285
#endif
180286
}
181287

182-
return 0;
288+
unlock:
289+
k_mutex_unlock(&nrf_mram_data->nrf_mram_mutex);
290+
return ret;
183291
}
184292

185293
static int nrf_mram_erase(const struct device *dev, off_t offset, size_t size)
186294
{
187295
struct nrf_mram_data_t *nrf_mram_data = dev->data;
188296
uint8_t ironside_se_ver = nrf_mram_data->ironside_se_ver;
297+
int ret = 0;
189298

190299
const uintptr_t addr = validate_and_map_addr(offset, size, true);
191300

@@ -195,6 +304,8 @@ static int nrf_mram_erase(const struct device *dev, off_t offset, size_t size)
195304

196305
LOG_DBG("erase: %p:%zu", (void *)addr, size);
197306

307+
k_mutex_lock(&nrf_mram_data->nrf_mram_mutex, K_FOREVER);
308+
198309
/* Ensure that the mramc banks are powered on */
199310
if (ironside_se_ver >= IRONSIDE_SE_SUPPORT_READY_VER) {
200311
#if defined(CONFIG_MRAM_LATENCY)
@@ -205,23 +316,34 @@ static int nrf_mram_erase(const struct device *dev, off_t offset, size_t size)
205316
while (!nrf_mram_ready(addr + (i * MRAM_WORD_SIZE), ironside_se_ver)) {
206317
/* Wait until MRAM controller is ready */
207318
}
208-
memset((void *)(addr + (i * MRAM_WORD_SIZE)), ERASE_VALUE, MRAM_WORD_SIZE);
319+
ret = nrf_mram_erase_and_verify(addr + (i * MRAM_WORD_SIZE), MRAM_WORD_SIZE);
320+
if (ret) {
321+
goto unlock;
322+
}
209323
}
324+
325+
#if (WRITE_BLOCK_SIZE & MRAM_WORD_MASK)
210326
if (size % MRAM_WORD_SIZE) {
211327
while (!nrf_mram_ready(addr + (size & ~MRAM_WORD_MASK), ironside_se_ver)) {
212328
/* Wait until MRAM controller is ready */
213329
}
214-
memset((void *)(addr + (size & ~MRAM_WORD_MASK)), ERASE_VALUE,
215-
size & MRAM_WORD_MASK);
330+
ret = nrf_mram_erase_and_verify(addr + (size & ~MRAM_WORD_MASK),
331+
size & MRAM_WORD_MASK);
332+
if (ret) {
333+
goto unlock;
334+
}
216335
}
217-
commit_changes(addr + size);
336+
#endif
337+
218338
if (ironside_se_ver >= IRONSIDE_SE_SUPPORT_READY_VER) {
219339
#if defined(CONFIG_MRAM_LATENCY)
220340
mram_no_latency_sync_release();
221341
#endif
222342
}
223343

224-
return 0;
344+
unlock:
345+
k_mutex_unlock(&nrf_mram_data->nrf_mram_mutex);
346+
return ret;
225347
}
226348

227349
static int nrf_mram_get_size(const struct device *dev, uint64_t *size)
@@ -285,6 +407,8 @@ static int nrf_mram_init(const struct device *dev)
285407
#endif
286408
LOG_DBG("Ironside SE version: %u", nrf_mram_data->ironside_se_ver);
287409

410+
k_mutex_init(&nrf_mram_data->nrf_mram_mutex);
411+
288412
return 0;
289413
}
290414

0 commit comments

Comments
 (0)