Fixing handshake stalls for Android devices

This commit is contained in:
Clive Blackledge 2025-10-14 21:23:14 -07:00
parent 1314e5d105
commit 4d8518f433
3 changed files with 95 additions and 12 deletions

View File

@ -33,7 +33,6 @@
// Flag to indicate a heartbeat was received and we should send queue status
bool heartbeatReceived = false;
PhoneAPI::PhoneAPI()
{
lastContactMsec = millis();
@ -74,6 +73,9 @@ void PhoneAPI::handleStartConfig()
nodeInfoForPhone.num = 0; // Don't keep returning old nodeinfos
nodeInfoQueue.clear();
resetReadIndex();
configStartMsec = millis();
configHandshakeRestarted = false;
onConfigHandshakeStarted();
}
void PhoneAPI::close()
@ -101,8 +103,12 @@ void PhoneAPI::close()
fromRadioNum = 0;
config_nonce = 0;
config_state = 0;
// Reset duplicate filter so each new connection starts clean
std::fill(std::begin(recentToRadioPacketIds), std::end(recentToRadioPacketIds), 0);
pauseBluetoothLogging = false;
heartbeatReceived = false;
configStartMsec = 0;
configHandshakeRestarted = false;
}
}
@ -757,7 +763,35 @@ int PhoneAPI::onNotify(uint32_t newValue)
onNowHasData(newValue);
} else {
LOG_DEBUG("Client not yet interested in packets (state=%d)", state);
checkConfigHandshakeTimeout();
}
return timeout ? -1 : 0; // If we timed out, MeshService should stop iterating through observers as we just removed one
}
bool PhoneAPI::isConfigHandshakeActive() const
{
return !configHandshakeRestarted && state == STATE_SEND_MY_INFO && configStartMsec != 0;
}
uint32_t PhoneAPI::getConfigHandshakeElapsedMs() const
{
if (configStartMsec == 0)
return 0;
return millis() - configStartMsec;
}
bool PhoneAPI::checkConfigHandshakeTimeout()
{
if (!isConfigHandshakeActive())
return false;
uint32_t elapsedMs = getConfigHandshakeElapsedMs();
if (elapsedMs > kConfigHandshakeTimeoutMs) {
LOG_WARN("Config handshake stuck in state=%d for %u ms, forcing transport restart", state, elapsedMs);
configHandshakeRestarted = true;
onConfigHandshakeTimeout();
return true;
}
return false;
}

View File

@ -91,6 +91,8 @@ class PhoneAPI
/// Use to ensure that clients don't get confused about old messages from the radio
uint32_t config_nonce = 0;
uint32_t readIndex = 0;
uint32_t configStartMsec = 0;
bool configHandshakeRestarted = false;
std::vector<meshtastic_FileInfo> filesManifest = {};
@ -135,6 +137,7 @@ class PhoneAPI
bool isConnected() { return state != STATE_SEND_NOTHING; }
protected:
static constexpr uint32_t kConfigHandshakeTimeoutMs = 3000;
/// Our fromradio packet while it is being assembled
meshtastic_FromRadio fromRadioScratch = {};
@ -144,12 +147,20 @@ class PhoneAPI
/// Hookable to find out when connection changes
virtual void onConnectionChanged(bool connected) {}
/// Invoked if the config handshake stalls long enough that we want to drop the BLE link.
virtual void onConfigHandshakeTimeout() {}
virtual void onConfigHandshakeStarted() {}
/// If we haven't heard from the other side in a while then say not connected. Returns true if timeout occurred
bool checkConnectionTimeout();
/// Check the current underlying physical link to see if the client is currently connected
virtual bool checkIsConnected() = 0;
bool checkConfigHandshakeTimeout();
bool isConfigHandshakeActive() const;
uint32_t getConfigHandshakeElapsedMs() const;
/**
* Subclasses can use this as a hook to provide custom notifications for their transport (i.e. bluetooth notifies)
*/

View File

@ -54,22 +54,39 @@ class BluetoothPhoneAPI : public PhoneAPI, public concurrency::OSThread
protected:
virtual int32_t runOnce() override
{
std::lock_guard<std::mutex> guard(nimble_mutex);
if (queue_size > 0) {
for (uint8_t i = 0; i < queue_size; i++) {
handleToRadio(nimble_queue.at(i).data(), nimble_queue.at(i).length());
bool scheduledImmediate = false;
{
std::lock_guard<std::mutex> guard(nimble_mutex);
if (queue_size > 0) {
for (uint8_t i = 0; i < queue_size; i++) {
handleToRadio(nimble_queue.at(i).data(), nimble_queue.at(i).length());
}
LOG_DEBUG("Queue_size %u", queue_size);
queue_size = 0;
// Reset our timer so any newly queued work is handled right away.
setIntervalFromNow(0);
scheduledImmediate = true;
}
if (!hasChecked && phoneWants) {
// Pull fresh data while we're outside of the NimBLE callback context.
numBytes = getFromRadio(fromRadioBytes);
hasChecked = true;
// Make sure we wake immediately to publish the prefetched data.
setIntervalFromNow(0);
scheduledImmediate = true;
}
LOG_DEBUG("Queue_size %u", queue_size);
queue_size = 0;
}
if (!hasChecked && phoneWants) {
// Pull fresh data while we're outside of the NimBLE callback context.
numBytes = getFromRadio(fromRadioBytes);
hasChecked = true;
bool timedOut = checkConfigHandshakeTimeout();
if (!timedOut && !scheduledImmediate && isConfigHandshakeActive()) {
uint32_t elapsed = getConfigHandshakeElapsedMs();
uint32_t remaining = elapsed >= kConfigHandshakeTimeoutMs ? 1 : (kConfigHandshakeTimeoutMs - elapsed);
// Keep nudging the thread while the config handshake is in flight.
setIntervalFromNow(remaining);
}
// the run is triggered via NimbleBluetoothToRadioCallback and NimbleBluetoothFromRadioCallback
return INT32_MAX;
return RUN_SAME;
}
/**
* Subclasses can use this as a hook to provide custom notifications for their transport (i.e. bluetooth notifies)
@ -92,6 +109,27 @@ class BluetoothPhoneAPI : public PhoneAPI, public concurrency::OSThread
#endif
}
virtual void onConfigHandshakeStarted() override { setIntervalFromNow(kConfigHandshakeTimeoutMs); }
virtual void onConfigHandshakeTimeout() override
{
LOG_WARN("Config handshake stalled; restarting BLE connection");
if (!bleServer) {
return;
}
auto peers = bleServer->getPeerDevices();
if (peers.empty()) {
LOG_WARN("No BLE peers to disconnect during restart");
return;
}
for (auto connHandle : peers) {
int rc = bleServer->disconnect(connHandle);
if (rc != 0) {
LOG_WARN("Failed to disconnect BLE handle %u (rc=%d)", connHandle, rc);
}
}
}
/// Check the current underlying physical link to see if the client is currently connected
virtual bool checkIsConnected() { return bleServer && bleServer->getConnectedCount() > 0; }
};