lokinet/llarp/consensus/reachability_testing.hpp

#pragma once

#include <llarp/router_id.hpp>
#include <llarp/util/time.hpp>

#include <chrono>
#include <queue>
#include <random>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace llarp
{
  struct Router;
}

namespace llarp::consensus
{
  namespace detail
  {
    using clock_t = std::chrono::steady_clock;
    using time_point_t = std::chrono::time_point<clock_t>;

    // Returns std::greater on the std::get<N>(v)th element value.
    template <typename T, size_t N>
    struct nth_greater
    {
      constexpr bool
      operator()(const T& lhs, const T& rhs) const
      {
        return std::greater<std::tuple_element_t<N, T>>{}(std::get<N>(lhs), std::get<N>(rhs));
      }
    };

    struct incoming_test_state
    {
      time_point_t last_test{};
      time_point_t last_whine{};
      bool was_failing = false;
    };

  }  // namespace detail
  using time_point_t = detail::time_point_t;
  using clock_t = detail::clock_t;

  // How often we tick the timer to check whether we need to do any tests.
  constexpr auto REACHABILITY_TESTING_TIMER_INTERVAL = 50ms;

  class reachability_testing
  {
   public:
    // Distribution for the seconds between node tests: we throw in some randomness to avoid
    // potential clustering of tests.  (Note that there is some granularity here as the test timer
    // only runs every REACHABILITY_TESTING_TIMER_INTERVAL).
    std::normal_distribution<float> TESTING_INTERVAL{10.0, 3.0};

    // The linear backoff after each consecutive test failure before we re-test.  Specifically we
    // schedule the next re-test for (TESTING_BACKOFF*previous_failures) + TESTING_INTERVAL(rng).
    inline static constexpr auto TESTING_BACKOFF = 10s;

    // The upper bound for the re-test interval.
    inline static constexpr auto TESTING_BACKOFF_MAX = 2min;

    // The maximum number of nodes that we will re-test at once (i.e. per TESTING_TIMING_INTERVAL);
    // mainly intended to throttle ourselves if, for instance, our own connectivity loss makes us
    // accumulate tons of nodes to test all at once.  (Despite the random intervals, this can happen
    // if we also get decommissioned during which we can't test at all but still have lots of
    // failing nodes we want to test right away when we get recommissioned).
    inline static constexpr int MAX_RETESTS_PER_TICK = 4;

    // Maximum time without a ping before we start whining about it.
    //
    // We have a probability of about 0.368* of *not* getting pinged within a ping interval (10s),
    // and so the probability of not getting a ping for 2 minutes (i.e. 12 test spans) just because
    // we haven't been selected is extremely small (0.0000061).  It also coincides nicely with
    // blockchain time (i.e. two minutes) and our max testing backoff.
    //
    // * = approx value of ((n-1)/n)^n for non-tiny values of n
    inline static constexpr auto MAX_TIME_WITHOUT_PING = 2min;

    // How often we whine in the logs about being unreachable
    inline static constexpr auto WHINING_INTERVAL = 2min;

   private:
    // Queue of pubkeys of service nodes to test; we pop off the back of this until the queue
    // empties then we refill it with a shuffled list of all pubkeys then pull off of it until it is
    // empty again, etc.
    std::vector<RouterID> testing_queue;

    // The next time for a general test
    time_point_t next_general_test = time_point_t::min();

    // When we started, so that we know not to hold off on whining about no pings for a while.
    const time_point_t startup = clock_t::now();

    // Pubkeys, next test times, and sequential failure counts of service nodes that are currently
    // in "failed" status along with the last time they failed; we retest them first after 10s then
    // back off linearly by an additional 10s up to a max testing interval of 2m30s, until we get a
    // successful response.
    using FailingPK = std::tuple<RouterID, time_point_t, int>;
    std::priority_queue<FailingPK, std::vector<FailingPK>, detail::nth_greater<FailingPK, 1>>
        failing_queue;
    std::unordered_set<RouterID> failing;

    // Track the last time *this node* was tested by other network nodes; used to detect and warn
    // about possible network issues.
    detail::incoming_test_state last;

   public:
    // If it is time to perform another random test, this returns the next node to test from the
    // testing queue and returns it, also updating the timer for the next test.  If it is not yet
    // time, or if the queue is empty and cannot current be replenished, returns std::nullopt.  If
    // the queue empties then this builds a new one by shuffling current public keys in the swarm's
    // "all nodes" then starts using the new queue for this an subsequent calls.
    //
    // `requeue` is mainly for internal use: if false it avoids rebuilding the queue if we run
    // out (and instead just return nullopt).
    std::optional<RouterID>
    next_random(Router* router, const time_point_t& now = clock_t::now(), bool requeue = true);

    // Removes and returns up to MAX_RETESTS_PER_TICK nodes that are due to be tested (i.e.
    // next-testing-time <= now).  Returns [snrecord, #previous-failures] for each.
    std::vector<std::pair<RouterID, int>>
    get_failing(const time_point_t& now = clock_t::now());

    // Adds a bad node pubkey to the failing list, to be re-tested soon (with a backoff depending on
    // `failures`; see TESTING_BACKOFF).  `previous_failures` should be the number of previous
    // failures *before* this one, i.e. 0 for a random general test; or the failure count returned
    // by `get_failing` for repeated failures.
    void
    add_failing_node(const RouterID& pk, int previous_failures = 0);

    /// removes the public key from the failing set
    void
    remove_node_from_failing(const RouterID& pk);

    // Called when this router receives an incomming session
    void
    incoming_ping(const time_point_t& now = clock_t::now());

    // Check whether we received incoming pings recently
    void
    check_incoming_tests(const time_point_t& now = clock_t::now());
  };

}  // namespace llarp::consensus