Oxend error ping + unfunded tracking

Currently (from a recent PR) we aren't pinging oxend if not active, but
that behaviour ended up being quite wrong because lokinet needs to ping
even when decommissioned or deregistered (when decommissioned we need
the ping to get commissioned again, and if not registered we need the
ping to get past the "lokinet isn't pinging" nag screen to prepare a
registration).

This considerably revises the pinging behaviour:

- We ping oxend *unless* there is a specific error with our connections
  (i.e. we *should* be establishing peer connections but don't have any)
- If we do have such an error, we send a new oxend "error" ping to
  report the error to oxend and get oxend to hold off on sending uptime
  proofs.

Along the way this also changes how we handle the current node state:
instead of just tracking deregistered/decommissioned, we now track three
states:

- LooksRegistered -- which means the SN is known to the network (but not
  necessarily active or fully staked)
- LooksFunded -- which means it is known *and* is fully funded, but not
  necessarily active
- LooksDecommissioned -- which means it is known, funded, and not
  currently active (which implies decommissioned).

The funded (or more precisely, unfunded) state is now tracked in
rc_lookup_handler in a "greenlist" -- i.e. new SNs that are so new (i.e.
"green") that they aren't even fully staked or active yet.
pull/2015/head
Jason Rhinelander 2 years ago
parent bd869b3b07
commit c5e787b8cb

@ -199,14 +199,12 @@ namespace llarp
virtual bool virtual bool
IsServiceNode() const = 0; IsServiceNode() const = 0;
virtual bool /// Called to determine if we're in a bad state (which gets reported to our oxend) that should
IsActiveServiceNode() const = 0; /// prevent uptime proofs from going out to the network (so that the error state gets noticed).
/// Currently this means we require a decent number of peers whenever we are fully staked
/// If we are running as a service node and appear active, i.e. registered and not /// (active or decommed).
/// decommissioned, we should *not* ping core if we know of too few peers, to indicate to core virtual std::optional<std::string>
/// we are not in a good state. OxendErrorState() const = 0;
virtual bool
ShouldPingOxen() const = 0;
virtual bool virtual bool
StartRpcServer() = 0; StartRpcServer() = 0;
@ -315,7 +313,9 @@ namespace llarp
/// set router's service node whitelist /// set router's service node whitelist
virtual void virtual void
SetRouterWhitelist( SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) = 0; const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& unfundedlist) = 0;
virtual std::unordered_set<RouterID> virtual std::unordered_set<RouterID>
GetRouterWhitelist() const = 0; GetRouterWhitelist() const = 0;

@ -34,7 +34,9 @@ namespace llarp
virtual void virtual void
SetRouterWhitelist( SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) = 0; const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& greenlist) = 0;
virtual void virtual void
GetRC(const RouterID& router, RCRequestCallback callback, bool forceLookup = false) = 0; GetRC(const RouterID& router, RCRequestCallback callback, bool forceLookup = false) = 0;
@ -48,6 +50,12 @@ namespace llarp
virtual bool virtual bool
IsGreylisted(const RouterID& remote) const = 0; IsGreylisted(const RouterID& remote) const = 0;
virtual bool
IsGreenlisted(const RouterID& remote) const = 0;
virtual bool
IsRegistered(const RouterID& remote) const = 0;
virtual bool virtual bool
CheckRC(const RouterContact& rc) const = 0; CheckRC(const RouterContact& rc) const = 0;

@ -32,26 +32,28 @@ namespace llarp
whitelistRouters.erase(router); whitelistRouters.erase(router);
} }
static void
loadColourList(std::unordered_set<RouterID>& beigelist, const std::vector<RouterID>& new_beige)
{
beigelist.clear();
beigelist.insert(new_beige.begin(), new_beige.end());
}
void void
RCLookupHandler::SetRouterWhitelist( RCLookupHandler::SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& greenlist)
{ {
if (whitelist.empty()) if (whitelist.empty())
return; return;
util::Lock l(_mutex); util::Lock l(_mutex);
whitelistRouters.clear(); loadColourList(whitelistRouters, whitelist);
greylistRouters.clear(); loadColourList(greylistRouters, greylist);
for (auto& router : whitelist) loadColourList(greenlistRouters, greenlist);
{
whitelistRouters.emplace(router);
}
for (auto& router : greylist)
{
greylistRouters.emplace(router);
}
LogInfo("lokinet service node list now has ", whitelistRouters.size(), " routers"); LogInfo("lokinet service node list now has ", whitelistRouters.size(), " active routers");
} }
bool bool
@ -140,6 +142,20 @@ namespace llarp
return greylistRouters.count(remote); return greylistRouters.count(remote);
} }
bool
RCLookupHandler::IsGreenlisted(const RouterID& remote) const
{
util::Lock lock{_mutex};
return greenlistRouters.count(remote);
}
bool
RCLookupHandler::IsRegistered(const RouterID& remote) const
{
util::Lock lock{_mutex};
return whitelistRouters.count(remote) || greylistRouters.count(remote) || greenlistRouters.count(remote);
}
bool bool
RCLookupHandler::PathIsAllowed(const RouterID& remote) const RCLookupHandler::PathIsAllowed(const RouterID& remote) const
{ {

@ -42,8 +42,11 @@ namespace llarp
void void
SetRouterWhitelist( SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) override const std::vector<RouterID>& whitelist,
EXCLUDES(_mutex); const std::vector<RouterID>& greylist,
const std::vector<RouterID>& greenlist
) override EXCLUDES(_mutex);
bool bool
HaveReceivedWhitelist() const override; HaveReceivedWhitelist() const override;
@ -61,6 +64,16 @@ namespace llarp
bool bool
IsGreylisted(const RouterID& remote) const override EXCLUDES(_mutex); IsGreylisted(const RouterID& remote) const override EXCLUDES(_mutex);
// "greenlist" = new routers (i.e. "green") that aren't fully funded yet
bool
IsGreenlisted(const RouterID& remote) const override EXCLUDES(_mutex);
// registered just means that there is at least an operator stake, but doesn't require the node
// be fully funded, active, or not decommed. (In other words: it is any of the white, grey, or
// green list).
bool
IsRegistered(const RouterID& remote) const override EXCLUDES(_mutex);
bool bool
CheckRC(const RouterContact& rc) const override; CheckRC(const RouterContact& rc) const override;
@ -134,8 +147,12 @@ namespace llarp
bool useWhitelist = false; bool useWhitelist = false;
bool isServiceNode = false; bool isServiceNode = false;
// whitelist = active routers
std::unordered_set<RouterID> whitelistRouters GUARDED_BY(_mutex); std::unordered_set<RouterID> whitelistRouters GUARDED_BY(_mutex);
// greylist = fully funded, but decommissioned routers
std::unordered_set<RouterID> greylistRouters GUARDED_BY(_mutex); std::unordered_set<RouterID> greylistRouters GUARDED_BY(_mutex);
// greenlist = registered but not fully-staked routers
std::unordered_set<RouterID> greenlistRouters GUARDED_BY(_mutex);
using TimePoint = std::chrono::steady_clock::time_point; using TimePoint = std::chrono::steady_clock::time_point;
std::unordered_map<RouterID, TimePoint> _routerLookupTimes; std::unordered_map<RouterID, TimePoint> _routerLookupTimes;

@ -471,16 +471,14 @@ namespace llarp
return nodedb()->NumLoaded() < KnownPeerWarningThreshold; return nodedb()->NumLoaded() < KnownPeerWarningThreshold;
} }
bool std::optional<std::string>
Router::IsActiveServiceNode() const Router::OxendErrorState() const
{ {
return IsServiceNode() and not(LooksDeregistered() or LooksDecommissioned()); // If we're in the white or gray list then we *should* be establishing connections to other
} // routers, so if we have almost no peers then something is almost certainly wrong.
if (LooksFunded() and TooFewPeers())
bool return "too few peer connections; lokinet is not adequately connected to the network";
Router::ShouldPingOxen() const return std::nullopt;
{
return IsActiveServiceNode() and not TooFewPeers();
} }
void void
@ -508,10 +506,17 @@ namespace llarp
} }
bool bool
Router::LooksDeregistered() const Router::LooksFunded() const
{ {
return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist() return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist()
and not _rcLookupHandler.SessionIsAllowed(pubkey()); and _rcLookupHandler.SessionIsAllowed(pubkey());
}
bool
Router::LooksRegistered() const
{
return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist()
and _rcLookupHandler.IsRegistered(pubkey());
} }
bool bool
@ -1061,12 +1066,16 @@ namespace llarp
if (now >= m_NextDecommissionWarn) if (now >= m_NextDecommissionWarn)
{ {
constexpr auto DecommissionWarnInterval = 5min; constexpr auto DecommissionWarnInterval = 5min;
if (auto dereg = LooksDeregistered(); dereg or decom) if (auto registered = LooksRegistered(), funded = LooksFunded();
not(registered and funded and not decom))
{ {
// complain about being deregistered // complain about being deregistered/decommed/unfunded
LogError( log::error(
"We are running as a service node but we seem to be ", logcat,
dereg ? "deregistered" : "decommissioned"); "We are running as a service node but we seem to be {}",
not registered ? "deregistered"
: decom ? "decommissioned"
: "not fully staked");
m_NextDecommissionWarn = now + DecommissionWarnInterval; m_NextDecommissionWarn = now + DecommissionWarnInterval;
} }
else if (isSvcNode and TooFewPeers()) else if (isSvcNode and TooFewPeers())
@ -1081,7 +1090,7 @@ namespace llarp
// if we need more sessions to routers and we are not a service node kicked from the network // if we need more sessions to routers and we are not a service node kicked from the network
// we shall connect out to others // we shall connect out to others
if (connected < connectToNum and not LooksDeregistered()) if (connected < connectToNum and LooksFunded())
{ {
size_t dlt = connectToNum - connected; size_t dlt = connectToNum - connected;
LogDebug("connecting to ", dlt, " random routers to keep alive"); LogDebug("connecting to ", dlt, " random routers to keep alive");
@ -1233,9 +1242,11 @@ namespace llarp
void void
Router::SetRouterWhitelist( Router::SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& unfundedlist)
{ {
_rcLookupHandler.SetRouterWhitelist(whitelist, greylist); _rcLookupHandler.SetRouterWhitelist(whitelist, greylist, unfundedlist);
} }
bool bool

@ -143,7 +143,9 @@ namespace llarp
void void
SetRouterWhitelist( SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) override; const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& unfunded) override;
std::unordered_set<RouterID> std::unordered_set<RouterID>
GetRouterWhitelist() const override GetRouterWhitelist() const override
@ -203,9 +205,16 @@ namespace llarp
bool bool
LooksDecommissioned() const; LooksDecommissioned() const;
/// return true if we look like we are a deregistered service node /// return true if we look like we are a registered, fully-staked service node (either active or
/// decommissioned). This condition determines when we are allowed to (and attempt to) connect
/// to other peers when running as a service node.
bool bool
LooksDeregistered() const; LooksFunded() const;
/// return true if we a registered service node; not that this only requires a partial stake,
/// and does not imply that this service node is *active* or fully funded.
bool
LooksRegistered() const;
/// return true if we look like we are allowed and able to test other routers /// return true if we look like we are allowed and able to test other routers
bool bool
@ -378,12 +387,8 @@ namespace llarp
bool bool
IsServiceNode() const override; IsServiceNode() const override;
/// return true if service node *and* not deregistered or decommissioned std::optional<std::string>
bool OxendErrorState() const override;
IsActiveServiceNode() const override;
bool
ShouldPingOxen() const override;
void void
Close(); Close();
@ -556,8 +561,11 @@ namespace llarp
bool m_isServiceNode = false; bool m_isServiceNode = false;
// Delay warning about being decommed/dereged until we've had enough time to sync up with oxend
static constexpr auto DECOMM_WARNING_STARTUP_DELAY = 15s;
llarp_time_t m_LastStatsReport = 0s; llarp_time_t m_LastStatsReport = 0s;
llarp_time_t m_NextDecommissionWarn = 0s; llarp_time_t m_NextDecommissionWarn = time_now_ms() + DECOMM_WARNING_STARTUP_DELAY;
std::shared_ptr<llarp::KeyManager> m_keyManager; std::shared_ptr<llarp::KeyManager> m_keyManager;
std::shared_ptr<PeerDb> m_peerDb; std::shared_ptr<PeerDb> m_peerDb;

@ -174,25 +174,27 @@ namespace llarp
auto makePingRequest = [self = shared_from_this()]() { auto makePingRequest = [self = shared_from_this()]() {
// send a ping // send a ping
PubKey pk{}; PubKey pk{};
bool should_ping = false; auto r = self->m_Router.lock();
if (auto r = self->m_Router.lock()) if (not r)
{ return; // router has gone away, maybe shutting down?
pk = r->pubkey();
should_ping = r->ShouldPingOxen(); pk = r->pubkey();
}
if (should_ping) nlohmann::json payload = {
{ {"pubkey_ed25519", oxenc::to_hex(pk.begin(), pk.end())},
nlohmann::json payload = { {"version", {VERSION[0], VERSION[1], VERSION[2]}}};
{"pubkey_ed25519", oxenc::to_hex(pk.begin(), pk.end())},
{"version", {VERSION[0], VERSION[1], VERSION[2]}}}; if (auto err = r->OxendErrorState())
self->Request( payload["error"] = *err;
"admin.lokinet_ping",
[](bool success, std::vector<std::string> data) { self->Request(
(void)data; "admin.lokinet_ping",
LogDebug("Received response for ping. Successful: ", success); [](bool success, std::vector<std::string> data) {
}, (void)data;
payload.dump()); LogDebug("Received response for ping. Successful: ", success);
} },
payload.dump());
// subscribe to block updates // subscribe to block updates
self->Request("sub.block", [](bool success, std::vector<std::string> data) { self->Request("sub.block", [](bool success, std::vector<std::string> data) {
if (data.empty() or not success) if (data.empty() or not success)
@ -216,18 +218,13 @@ namespace llarp
LokidRpcClient::HandleNewServiceNodeList(const nlohmann::json& j) LokidRpcClient::HandleNewServiceNodeList(const nlohmann::json& j)
{ {
std::unordered_map<RouterID, PubKey> keymap; std::unordered_map<RouterID, PubKey> keymap;
std::vector<RouterID> activeNodeList, nonActiveNodeList; std::vector<RouterID> activeNodeList, decommNodeList, unfundedNodeList;
if (not j.is_array()) if (not j.is_array())
throw std::runtime_error{ throw std::runtime_error{
"Invalid service node list: expected array of service node states"}; "Invalid service node list: expected array of service node states"};
for (auto& snode : j) for (auto& snode : j)
{ {
// Skip unstaked snodes:
if (const auto funded_itr = snode.find("funded"); funded_itr == snode.end()
or not funded_itr->is_boolean() or not funded_itr->get<bool>())
continue;
const auto ed_itr = snode.find("pubkey_ed25519"); const auto ed_itr = snode.find("pubkey_ed25519");
if (ed_itr == snode.end() or not ed_itr->is_string()) if (ed_itr == snode.end() or not ed_itr->is_string())
continue; continue;
@ -238,6 +235,10 @@ namespace llarp
if (active_itr == snode.end() or not active_itr->is_boolean()) if (active_itr == snode.end() or not active_itr->is_boolean())
continue; continue;
const bool active = active_itr->get<bool>(); const bool active = active_itr->get<bool>();
const auto funded_itr = snode.find("funded");
if (funded_itr == snode.end() or not funded_itr->is_boolean())
continue;
const bool funded = funded_itr->get<bool>();
RouterID rid; RouterID rid;
PubKey pk; PubKey pk;
@ -246,7 +247,10 @@ namespace llarp
continue; continue;
keymap[rid] = pk; keymap[rid] = pk;
(active ? activeNodeList : nonActiveNodeList).push_back(std::move(rid)); (active ? activeNodeList
: funded ? decommNodeList
: unfundedNodeList)
.push_back(std::move(rid));
} }
if (activeNodeList.empty()) if (activeNodeList.empty())
@ -254,17 +258,19 @@ namespace llarp
LogWarn("got empty service node list, ignoring."); LogWarn("got empty service node list, ignoring.");
return; return;
} }
// inform router about the new list // inform router about the new list
if (auto router = m_Router.lock()) if (auto router = m_Router.lock())
{ {
auto& loop = router->loop(); auto& loop = router->loop();
loop->call([this, loop->call([this,
active = std::move(activeNodeList), active = std::move(activeNodeList),
inactive = std::move(nonActiveNodeList), decomm = std::move(decommNodeList),
unfunded = std::move(unfundedNodeList),
keymap = std::move(keymap), keymap = std::move(keymap),
router = std::move(router)]() mutable { router = std::move(router)]() mutable {
m_KeyMap = std::move(keymap); m_KeyMap = std::move(keymap);
router->SetRouterWhitelist(active, inactive); router->SetRouterWhitelist(active, decomm, unfunded);
}); });
} }
else else

Loading…
Cancel
Save