From a6df18e60a78d7ad5600ea920fa0b1063692f5a9 Mon Sep 17 00:00:00 2001 From: Clive Blackledge Date: Wed, 15 Oct 2025 04:08:06 -0700 Subject: [PATCH] Guarding PhoneAPI node-info staging with mutex to prevent BLE future foot-gun (#8354) * Eliminating foot-gun and placing Phone NodeInfo into a mutex * Swapping over to concurrency::Lock from mutex --- src/mesh/PhoneAPI.cpp | 103 ++++++++++++++++++++++----------- src/mesh/PhoneAPI.h | 3 + src/nimble/NimbleBluetooth.cpp | 2 +- 3 files changed, 72 insertions(+), 36 deletions(-) diff --git a/src/mesh/PhoneAPI.cpp b/src/mesh/PhoneAPI.cpp index 9eeadf5a2..36bd577e1 100644 --- a/src/mesh/PhoneAPI.cpp +++ b/src/mesh/PhoneAPI.cpp @@ -15,6 +15,7 @@ #include "Router.h" #include "SPILock.h" #include "TypeConversions.h" +#include "concurrency/LockGuard.h" #include "main.h" #include "xmodem.h" @@ -71,8 +72,12 @@ void PhoneAPI::handleStartConfig() LOG_DEBUG("Got %d files in manifest", filesManifest.size()); LOG_INFO("Start API client config"); - nodeInfoForPhone.num = 0; // Don't keep returning old nodeinfos - nodeInfoQueue.clear(); + // Protect against concurrent BLE callbacks: they run in NimBLE's FreeRTOS task and also touch nodeInfoQueue. + { + concurrency::LockGuard guard(&nodeInfoMutex); + nodeInfoForPhone = {}; + nodeInfoQueue.clear(); + } resetReadIndex(); } @@ -94,8 +99,12 @@ void PhoneAPI::close() onConnectionChanged(false); fromRadioScratch = {}; toRadioScratch = {}; - nodeInfoForPhone = {}; - nodeInfoQueue.clear(); + // Clear cached node info under lock because NimBLE callbacks can still be draining it. + { + concurrency::LockGuard guard(&nodeInfoMutex); + nodeInfoForPhone = {}; + nodeInfoQueue.clear(); + } packetForPhone = NULL; filesManifest.clear(); fromRadioNum = 0; @@ -241,13 +250,20 @@ size_t PhoneAPI::getFromRadio(uint8_t *buf) LOG_DEBUG("Send My NodeInfo"); auto us = nodeDB->readNextMeshNode(readIndex); if (us) { - nodeInfoForPhone = TypeConversions::ConvertToNodeInfo(us); - nodeInfoForPhone.has_hops_away = false; - nodeInfoForPhone.is_favorite = true; + auto info = TypeConversions::ConvertToNodeInfo(us); + info.has_hops_away = false; + info.is_favorite = true; + { + concurrency::LockGuard guard(&nodeInfoMutex); + nodeInfoForPhone = info; + } fromRadioScratch.which_payload_variant = meshtastic_FromRadio_node_info_tag; - fromRadioScratch.node_info = nodeInfoForPhone; + fromRadioScratch.node_info = info; // Should allow us to resume sending NodeInfo in STATE_SEND_OTHER_NODEINFOS - nodeInfoForPhone.num = 0; + { + concurrency::LockGuard guard(&nodeInfoMutex); + nodeInfoForPhone.num = 0; + } } if (config_nonce == SPECIAL_NONCE_ONLY_NODES) { // If client only wants node info, jump directly to sending nodes @@ -434,23 +450,30 @@ size_t PhoneAPI::getFromRadio(uint8_t *buf) case STATE_SEND_OTHER_NODEINFOS: { LOG_DEBUG("Send known nodes"); - if (nodeInfoForPhone.num == 0 && !nodeInfoQueue.empty()) { - // Serve the next cached node without re-reading from the DB iterator. - nodeInfoForPhone = nodeInfoQueue.front(); - nodeInfoQueue.pop_front(); + meshtastic_NodeInfo infoToSend = {}; + { + concurrency::LockGuard guard(&nodeInfoMutex); + if (nodeInfoForPhone.num == 0 && !nodeInfoQueue.empty()) { + // Serve the next cached node without re-reading from the DB iterator. + nodeInfoForPhone = nodeInfoQueue.front(); + nodeInfoQueue.pop_front(); + } + infoToSend = nodeInfoForPhone; + if (infoToSend.num != 0) + nodeInfoForPhone = {}; } - if (nodeInfoForPhone.num != 0) { + if (infoToSend.num != 0) { // Just in case we stored a different user.id in the past, but should never happen going forward - sprintf(nodeInfoForPhone.user.id, "!%08x", nodeInfoForPhone.num); - LOG_DEBUG("nodeinfo: num=0x%x, lastseen=%u, id=%s, name=%s", nodeInfoForPhone.num, nodeInfoForPhone.last_heard, - nodeInfoForPhone.user.id, nodeInfoForPhone.user.long_name); + sprintf(infoToSend.user.id, "!%08x", infoToSend.num); + LOG_DEBUG("nodeinfo: num=0x%x, lastseen=%u, id=%s, name=%s", infoToSend.num, infoToSend.last_heard, + infoToSend.user.id, infoToSend.user.long_name); fromRadioScratch.which_payload_variant = meshtastic_FromRadio_node_info_tag; - fromRadioScratch.node_info = nodeInfoForPhone; - nodeInfoForPhone = {}; + fromRadioScratch.node_info = infoToSend; prefetchNodeInfos(); } else { LOG_DEBUG("Done sending nodeinfo"); + concurrency::LockGuard guard(&nodeInfoMutex); nodeInfoQueue.clear(); state = STATE_SEND_FILEMANIFEST; // Go ahead and send that ID right now @@ -559,20 +582,23 @@ void PhoneAPI::prefetchNodeInfos() { bool added = false; // Keep the queue topped up so BLE reads stay responsive even if DB fetches take a moment. - while (nodeInfoQueue.size() < kNodePrefetchDepth) { - auto nextNode = nodeDB->readNextMeshNode(readIndex); - if (!nextNode) - break; + { + concurrency::LockGuard guard(&nodeInfoMutex); + while (nodeInfoQueue.size() < kNodePrefetchDepth) { + auto nextNode = nodeDB->readNextMeshNode(readIndex); + if (!nextNode) + break; - auto info = TypeConversions::ConvertToNodeInfo(nextNode); - bool isUs = info.num == nodeDB->getNodeNum(); - info.hops_away = isUs ? 0 : info.hops_away; - info.last_heard = isUs ? getValidTime(RTCQualityFromNet) : info.last_heard; - info.snr = isUs ? 0 : info.snr; - info.via_mqtt = isUs ? false : info.via_mqtt; - info.is_favorite = info.is_favorite || isUs; - nodeInfoQueue.push_back(info); - added = true; + auto info = TypeConversions::ConvertToNodeInfo(nextNode); + bool isUs = info.num == nodeDB->getNodeNum(); + info.hops_away = isUs ? 0 : info.hops_away; + info.last_heard = isUs ? getValidTime(RTCQualityFromNet) : info.last_heard; + info.snr = isUs ? 0 : info.snr; + info.via_mqtt = isUs ? false : info.via_mqtt; + info.is_favorite = info.is_favorite || isUs; + nodeInfoQueue.push_back(info); + added = true; + } } if (added) @@ -614,10 +640,17 @@ bool PhoneAPI::available() case STATE_SEND_COMPLETE_ID: return true; - case STATE_SEND_OTHER_NODEINFOS: - if (nodeInfoQueue.empty()) - prefetchNodeInfos(); + case STATE_SEND_OTHER_NODEINFOS: { + concurrency::LockGuard guard(&nodeInfoMutex); + if (nodeInfoQueue.empty()) { + // Drop the lock before prefetching; prefetchNodeInfos() will re-acquire it. + goto PREFETCH_NODEINFO; + } + } return true; // Always say we have something, because we might need to advance our state machine + PREFETCH_NODEINFO: + prefetchNodeInfos(); + return true; case STATE_SEND_PACKETS: { if (!queueStatusPacketForPhone) queueStatusPacketForPhone = service->getQueueStatusForPhone(); diff --git a/src/mesh/PhoneAPI.h b/src/mesh/PhoneAPI.h index 692fdd0b9..a8d0faa28 100644 --- a/src/mesh/PhoneAPI.h +++ b/src/mesh/PhoneAPI.h @@ -1,6 +1,7 @@ #pragma once #include "Observer.h" +#include "concurrency/Lock.h" #include "mesh-pb-constants.h" #include "meshtastic/portnums.pb.h" #include @@ -84,6 +85,8 @@ class PhoneAPI std::deque nodeInfoQueue; // Tunable size of the node info cache so we can keep BLE reads non-blocking. static constexpr size_t kNodePrefetchDepth = 4; + // Protect nodeInfoForPhone + nodeInfoQueue because NimBLE callbacks run in a separate FreeRTOS task. + concurrency::Lock nodeInfoMutex; meshtastic_ToRadio toRadioScratch = { 0}; // this is a static scratch object, any data must be copied elsewhere before returning diff --git a/src/nimble/NimbleBluetooth.cpp b/src/nimble/NimbleBluetooth.cpp index 4b0c33609..eb1d909f1 100644 --- a/src/nimble/NimbleBluetooth.cpp +++ b/src/nimble/NimbleBluetooth.cpp @@ -139,7 +139,7 @@ class NimbleBluetoothFromRadioCallback : public NimBLECharacteristicCallbacks { bluetoothPhoneAPI->phoneWants = true; bluetoothPhoneAPI->setIntervalFromNow(0); - std::lock_guard guard(bluetoothPhoneAPI->nimble_mutex); + std::lock_guard guard(bluetoothPhoneAPI->nimble_mutex); // BLE callbacks run in NimBLE task if (!bluetoothPhoneAPI->hasChecked) { // Fetch payload on demand; prefetch keeps this fast for the first read.