diff --git a/llarp/consensus/reachability_testing.cpp b/llarp/consensus/reachability_testing.cpp index 7b08c6a31..52c53d7b0 100644 --- a/llarp/consensus/reachability_testing.cpp +++ b/llarp/consensus/reachability_testing.cpp @@ -114,7 +114,7 @@ namespace llarp::consensus } std::vector> - reachability_testing::get_failing(AbstractRouter*, const time_point_t& now) + reachability_testing::get_failing(const time_point_t& now) { // Our failing_queue puts the oldest retest times at the top, so pop them off into our result // until the top node should be retested sometime in the future @@ -148,4 +148,10 @@ namespace llarp::consensus failing_queue.emplace(pk, steady_clock::now() + next_test_in, previous_failures + 1); } + void + reachability_testing::remove_node_from_failing(const RouterID& pk) + { + failing.erase(pk); + } + } // namespace llarp::consensus diff --git a/llarp/consensus/reachability_testing.hpp b/llarp/consensus/reachability_testing.hpp index 1109500a9..ecea88039 100644 --- a/llarp/consensus/reachability_testing.hpp +++ b/llarp/consensus/reachability_testing.hpp @@ -123,7 +123,7 @@ namespace llarp::consensus // Removes and returns up to MAX_RETESTS_PER_TICK nodes that are due to be tested (i.e. // next-testing-time <= now). Returns [snrecord, #previous-failures] for each. std::vector> - get_failing(AbstractRouter* router, const time_point_t& now = clock_t::now()); + get_failing(const time_point_t& now = clock_t::now()); // Adds a bad node pubkey to the failing list, to be re-tested soon (with a backoff depending on // `failures`; see TESTING_BACKOFF). `previous_failures` should be the number of previous @@ -132,6 +132,10 @@ namespace llarp::consensus void add_failing_node(const RouterID& pk, int previous_failures = 0); + /// removes the public key from the failing set + void + remove_node_from_failing(const RouterID& pk); + // Called when this router receives an incomming session void incoming_ping(const time_point_t& now = clock_t::now()); diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index 403959ed0..f66eb0616 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -1197,13 +1197,21 @@ namespace llarp // dont run tests if we are decommissioned if (LooksDecommissioned()) return; - auto tests = m_routerTesting.get_failing(this); + auto tests = m_routerTesting.get_failing(); if (auto maybe = m_routerTesting.next_random(this)) { tests.emplace_back(*maybe, 0); } for (const auto& [router, fails] : tests) { + if (not SessionToRouterAllowed(router)) + { + LogDebug( + router, + " is no longer a registered service node so we remove it from the testing list"); + m_routerTesting.remove_node_from_failing(router); + continue; + } LogDebug("Establishing session to ", router, " for SN testing"); // try to make a session to this random router // this will do a dht lookup if needed @@ -1222,16 +1230,23 @@ namespace llarp previous_fails + 1, " consecutive failures)"); } - else if (previous_fails > 0) - LogInfo( - "Successful SN connection test to ", - router, - " after ", - previous_fails, - " failures"); else - LogDebug("Successful SN connection test to ", router); - + { + m_routerTesting.remove_node_from_failing(router); + if (previous_fails > 0) + { + LogInfo( + "Successful SN connection test to ", + router, + " after ", + previous_fails, + " failures"); + } + else + { + LogDebug("Successful SN connection test to ", router); + } + } if (rpc) { // inform as needed