* get_failing does not need abstract router as paramter so we remove it

* add remove_node_from_failing to remove a node by pubkey from the failing set
* if a router is deregistered we remove it from the failing set so we don't retest it
* remove a router from the failing set if we get a test success
pull/1659/head
Jeff Becker 3 years ago
parent d88ed4eee0
commit f3deabdb96
No known key found for this signature in database
GPG Key ID: F357B3B42F6F9B05

@ -114,7 +114,7 @@ namespace llarp::consensus
}
std::vector<std::pair<RouterID, int>>
reachability_testing::get_failing(AbstractRouter*, const time_point_t& now)
reachability_testing::get_failing(const time_point_t& now)
{
// Our failing_queue puts the oldest retest times at the top, so pop them off into our result
// until the top node should be retested sometime in the future
@ -148,4 +148,10 @@ namespace llarp::consensus
failing_queue.emplace(pk, steady_clock::now() + next_test_in, previous_failures + 1);
}
void
reachability_testing::remove_node_from_failing(const RouterID& pk)
{
failing.erase(pk);
}
} // namespace llarp::consensus

@ -123,7 +123,7 @@ namespace llarp::consensus
// Removes and returns up to MAX_RETESTS_PER_TICK nodes that are due to be tested (i.e.
// next-testing-time <= now). Returns [snrecord, #previous-failures] for each.
std::vector<std::pair<RouterID, int>>
get_failing(AbstractRouter* router, const time_point_t& now = clock_t::now());
get_failing(const time_point_t& now = clock_t::now());
// Adds a bad node pubkey to the failing list, to be re-tested soon (with a backoff depending on
// `failures`; see TESTING_BACKOFF). `previous_failures` should be the number of previous
@ -132,6 +132,10 @@ namespace llarp::consensus
void
add_failing_node(const RouterID& pk, int previous_failures = 0);
/// removes the public key from the failing set
void
remove_node_from_failing(const RouterID& pk);
// Called when this router receives an incomming session
void
incoming_ping(const time_point_t& now = clock_t::now());

@ -1197,13 +1197,21 @@ namespace llarp
// dont run tests if we are decommissioned
if (LooksDecommissioned())
return;
auto tests = m_routerTesting.get_failing(this);
auto tests = m_routerTesting.get_failing();
if (auto maybe = m_routerTesting.next_random(this))
{
tests.emplace_back(*maybe, 0);
}
for (const auto& [router, fails] : tests)
{
if (not SessionToRouterAllowed(router))
{
LogDebug(
router,
" is no longer a registered service node so we remove it from the testing list");
m_routerTesting.remove_node_from_failing(router);
continue;
}
LogDebug("Establishing session to ", router, " for SN testing");
// try to make a session to this random router
// this will do a dht lookup if needed
@ -1222,16 +1230,23 @@ namespace llarp
previous_fails + 1,
" consecutive failures)");
}
else if (previous_fails > 0)
LogInfo(
"Successful SN connection test to ",
router,
" after ",
previous_fails,
" failures");
else
LogDebug("Successful SN connection test to ", router);
{
m_routerTesting.remove_node_from_failing(router);
if (previous_fails > 0)
{
LogInfo(
"Successful SN connection test to ",
router,
" after ",
previous_fails,
" failures");
}
else
{
LogDebug("Successful SN connection test to ", router);
}
}
if (rpc)
{
// inform as needed

Loading…
Cancel
Save