From 6952e8f7054654b2ea34e5e6608ebff9a2a5a6e9 Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Fri, 17 Nov 2023 02:41:42 -0500 Subject: [PATCH 1/7] Add command to fetch RCs from remote node This command will be called periodically by clients to maintain a list of RCs of active relay nodes. It will require another command (future commit) to fetch the RouterIDs from many nodes and reconcile those so we have some notion of good-ness of the RCs we're getting; if we get what seems to be a bad set of RCs (this concept not yet implemented), we will choose a different relay to fetch RCs from. These are left as TODOs for now. --- external/oxen-encoding | 2 +- llarp/link/link_manager.cpp | 117 ++++++++++++++++++++++++++++++++++++ llarp/link/link_manager.hpp | 6 ++ llarp/messages/rc.hpp | 28 +++++++++ llarp/nodedb.cpp | 61 ++++++++++++++++++- llarp/nodedb.hpp | 58 ++++++++++++++++-- llarp/router/router.cpp | 7 +++ llarp/router/router.hpp | 5 ++ 8 files changed, 276 insertions(+), 8 deletions(-) create mode 100644 llarp/messages/rc.hpp diff --git a/external/oxen-encoding b/external/oxen-encoding index a7de63756..f6172d58d 160000 --- a/external/oxen-encoding +++ b/external/oxen-encoding @@ -1 +1 @@ -Subproject commit a7de63756dcc5c31cb899a4b810e6434b1a7c01c +Subproject commit f6172d58d3358473a4c98d96270058a32e166d5f diff --git a/llarp/link/link_manager.cpp b/llarp/link/link_manager.cpp index 6f75b3d8d..a42d98b92 100644 --- a/llarp/link/link_manager.cpp +++ b/llarp/link/link_manager.cpp @@ -6,10 +6,13 @@ #include #include #include +#include #include #include #include +#include + #include #include @@ -427,6 +430,120 @@ namespace llarp } } + void + LinkManager::fetch_rcs( + const RouterID& source, rc_time since, const std::vector& explicit_ids) + { + send_control_message( + source, + "fetch_rcs", + RCFetchMessage::serialize(since, explicit_ids), + [this, source = source](oxen::quic::message m) { + if (m.timed_out) + { + // TODO: keep track of this failure for relay quality metrics? + log::info(link_cat, "RC Fetch to {} timed out", source); + return; + } + if (not m) + { + log::info(link_cat, "RC Fetch to {} returned error.", source); + return; + } + + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + btdc.required("rcs"); + auto btlc = btdc.consume_list_consumer(); + auto timestamp = rc_time{std::chrono::seconds{btdc.require("time")}}; + + std::vector rcs; + while (not btlc.is_finished()) + { + // TODO: maybe make RemoteRC constructor throw a bespoke exception type + // and catch it below so we know what about parsing failed? + rcs.emplace_back(btlc.consume_dict_consumer()); + } + + node_db->ingest_rcs(source, std::move(rcs), timestamp); + } + catch (const std::exception& e) + { + // TODO: Inform NodeDB of failure (perhaps just a call to rotate_rc_source()) + log::info(link_cat, "Failed to parse RC Fetch response from {}", source); + return; + } + }); + } + + void + LinkManager::handle_fetch_rcs(oxen::quic::message m) + { + // this handler should not be registered for clients + assert(_router.is_service_node()); + + const auto& rcs = node_db->get_rcs(); + const auto now = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + + btdc.required("explicit_ids"); + auto explicit_ids = btdc.consume_list>(); + auto since_time = rc_time{std::chrono::seconds{btdc.require("since")}}; + + if (explicit_ids.size() > (rcs.size() / 4)) + { + log::info( + link_cat, "Remote requested too many relay IDs (greater than 1/4 of what we have)."); + m.respond( + serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + return; + } + + std::unordered_set explicit_relays; + for (auto& sv : explicit_ids) + { + if (sv.size() != RouterID::SIZE) + { + m.respond(serialize_response( + {{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + return; + } + explicit_relays.emplace(reinterpret_cast(sv.data())); + } + + oxenc::bt_dict_producer resp; + + { + auto btlp = resp.append_list("rcs"); + + const auto& last_time = node_db->get_last_rc_update_times(); + + // if since_time isn't epoch start, subtract a bit for buffer + if (since_time != decltype(since_time)::min()) + since_time -= 5s; + + for (const auto& [_, rc] : rcs) + { + if (last_time.at(rc.router_id()) > since_time or explicit_relays.count(rc.router_id())) + btlp.append_encoded(rc.view()); + } + } + + resp.append("time", now.time_since_epoch().count()); + + m.respond(std::move(resp).str(), false); + } + catch (const std::exception& e) + { + log::info(link_cat, "Exception handling RC Fetch request: {}", e.what()); + m.respond(messages::ERROR_RESPONSE); + } + } + bool LinkManager::have_connection_to(const RouterID& remote, bool client_only) const { diff --git a/llarp/link/link_manager.hpp b/llarp/link/link_manager.hpp index 9967123cc..219cd7ca8 100644 --- a/llarp/link/link_manager.hpp +++ b/llarp/link/link_manager.hpp @@ -226,6 +226,12 @@ namespace llarp void handle_gossip_rc(oxen::quic::message m); + void + fetch_rcs(const RouterID& source, rc_time since, const std::vector& explicit_ids); + + void + handle_fetch_rcs(oxen::quic::message m); + bool have_connection_to(const RouterID& remote, bool client_only = false) const; diff --git a/llarp/messages/rc.hpp b/llarp/messages/rc.hpp new file mode 100644 index 000000000..3aafc5e4c --- /dev/null +++ b/llarp/messages/rc.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "common.hpp" + +namespace llarp::RCFetchMessage +{ + inline constexpr auto INVALID_REQUEST = "Invalid relay ID requested."sv; + + inline static std::string + serialize(std::chrono::system_clock::time_point since, const std::vector& explicit_ids) + { + oxenc::bt_dict_producer btdp; + + try + { + btdp.append("since", since.time_since_epoch() / 1s); + auto id_list = btdp.append_list("explicit_ids"); + for (const auto& rid : explicit_ids) + id_list.append(rid.ToView()); + } + catch (...) + { + log::error(link_cat, "Error: RCFetchMessage failed to bt encode contents!"); + } + + return std::move(btdp).str(); + } +} // namespace llarp::RCFetchMessage diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index 092bebb6d..9fbf8b4ad 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -112,6 +112,60 @@ namespace llarp bootstraps.emplace(rc.router_id(), rc); } + void + NodeDB::rotate_rc_source() + {} + + // TODO: trust model + void + NodeDB::ingest_rcs(RouterID source, std::vector rcs, rc_time timestamp) + { + (void)source; + + // TODO: if we don't currently have a "trusted" relay we've been fetching from, + // this will be a full list of RCs. We need to first check if it aligns closely + // with our trusted RouterID list, then replace our RCs with the incoming set. + + for (auto& rc : rcs) + put_rc_if_newer(std::move(rc), timestamp); + + // TODO: if we have a "trusted" relay we've been fetching from, this will be + // an incremental update to the RC list, so *after* insertion we check if the + // RCs' RouterIDs closely match our trusted RouterID list. + + last_rc_update_relay_timestamp = timestamp; + } + + // TODO: trust model + void + NodeDB::ingest_router_ids(RouterID source, std::vector ids) + { + router_id_fetch_responses[source] = std::move(ids); + + router_id_response_count++; + if (router_id_response_count == router_id_fetch_sources.size()) + { + // TODO: reconcile all the responses + } + } + + void + NodeDB::update_rcs() + { + std::vector needed; + + const auto now = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + for (const auto& [rid, rc] : known_rcs) + { + if (now - rc.timestamp() > RouterContact::OUTDATED_AGE) + needed.push_back(rid); + } + + router.link_manager().fetch_rcs( + rc_fetch_source, last_rc_update_relay_timestamp, std::move(needed)); + } + void NodeDB::set_router_whitelist( const std::vector& whitelist, @@ -277,13 +331,14 @@ namespace llarp } bool - NodeDB::put_rc(RemoteRC rc) + NodeDB::put_rc(RemoteRC rc, rc_time now) { const auto& rid = rc.router_id(); if (not want_rc(rid)) return false; known_rcs.erase(rid); known_rcs.emplace(rid, std::move(rc)); + last_rc_update_times[rid] = now; return true; } @@ -294,12 +349,12 @@ namespace llarp } bool - NodeDB::put_rc_if_newer(RemoteRC rc) + NodeDB::put_rc_if_newer(RemoteRC rc, rc_time now) { auto itr = known_rcs.find(rc.router_id()); if (itr == known_rcs.end() or itr->second.other_is_newer(rc)) { - return put_rc(std::move(rc)); + return put_rc(std::move(rc), now); } return false; } diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index e126004b8..58eea3b33 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -26,7 +26,7 @@ namespace llarp { std::unordered_map known_rcs; - const Router& router; + Router& router; const fs::path m_Root; const std::function)> disk; @@ -42,19 +42,31 @@ namespace llarp std::unordered_map bootstraps; + // Router lists for snodes // whitelist = active routers std::unordered_set router_whitelist; // greylist = fully funded, but decommissioned routers std::unordered_set router_greylist; // greenlist = registered but not fully-staked routers std::unordered_set router_greenlist; - // all registered relays (snodes) std::unordered_set registered_routers; + std::unordered_map last_rc_update_times; + + // Router list for clients + std::unordered_set client_known_rcs; // only ever use to specific edges as path first-hops std::unordered_set pinned_edges; + // rc update info + RouterID rc_fetch_source; + rc_time last_rc_update_relay_timestamp; + std::unordered_set router_id_fetch_sources; + std::unordered_map> router_id_fetch_responses; + // process responses once all are received (or failed/timed out) + size_t router_id_response_count{0}; + bool want_rc(const RouterID& rid) const; @@ -80,6 +92,38 @@ namespace llarp return registered_routers; } + const std::unordered_map& + get_rcs() const + { + return known_rcs; + } + + const std::unordered_map& + get_last_rc_update_times() const + { + return last_rc_update_times; + } + + // If we receive a set of RCs from our current RC source relay, we consider + // that relay to be a bad source of RCs and we randomly choose a new one. + // + // When using a new RC fetch relay, we first re-fetch the full RC list and, if + // that aligns with our RouterID list, we go back to periodic updates from that relay. + // + // This will respect edge-pinning and attempt to use a relay we already have + // a connection with. + void + rotate_rc_source(); + + void + ingest_rcs(RouterID source, std::vector rcs, rc_time timestamp); + + void + ingest_router_ids(RouterID source, std::vector ids); + + void + update_rcs(); + void set_router_whitelist( const std::vector& whitelist, @@ -225,12 +269,18 @@ namespace llarp /// put (or replace) the RC if we consider it valid (want_rc). returns true if put. bool - put_rc(RemoteRC rc); + put_rc( + RemoteRC rc, + rc_time now = + std::chrono::time_point_cast(std::chrono::system_clock::now())); /// if we consider it valid (want_rc), /// put this rc into the cache if it is not there or is newer than the one there already /// returns true if the rc was inserted bool - put_rc_if_newer(RemoteRC rc); + put_rc_if_newer( + RemoteRC rc, + rc_time now = + std::chrono::time_point_cast(std::chrono::system_clock::now())); }; } // namespace llarp diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index 8eb2c33c8..1ac96bb7d 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -835,6 +835,13 @@ namespace llarp next_rc_gossip = now_timepoint + RouterContact::STALE_AGE - random_delta; } + // (client-only) periodically fetch updated RCs + if (now_timepoint - last_rc_fetch > RC_UPDATE_INTERVAL) + { + node_db()->update_rcs(); + last_rc_fetch = now_timepoint; + } + // remove RCs for nodes that are no longer allowed by network policy node_db()->RemoveIf([&](const RemoteRC& rc) -> bool { // don't purge bootstrap nodes from nodedb diff --git a/llarp/router/router.hpp b/llarp/router/router.hpp index edf7cabad..cf2e60f7f 100644 --- a/llarp/router/router.hpp +++ b/llarp/router/router.hpp @@ -57,6 +57,8 @@ namespace llarp static constexpr size_t INTROSET_STORAGE_REDUNDANCY = (INTROSET_RELAY_REDUNDANCY * INTROSET_REQS_PER_RELAY); + static const std::chrono::seconds RC_UPDATE_INTERVAL = 5min; + struct Contacts; struct Router : std::enable_shared_from_this @@ -129,6 +131,9 @@ namespace llarp std::chrono::system_clock::time_point next_rc_gossip{ std::chrono::system_clock::time_point::min()}; + std::chrono::system_clock::time_point last_rc_fetch{ + std::chrono::system_clock::time_point::min()}; + // should we be sending padded messages every interval? bool send_padding = false; From c30a4dd44aafdc1045faa0a8ac7c865d2da05f1a Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Fri, 24 Nov 2023 19:40:51 -0500 Subject: [PATCH 2/7] Implement fetch RouterIDs method and usage Periodically clients will fetch the set of RouterIDs for all relays on the network. It will request this list from a number (12, currently) of relays, but as we are likely to be requesting from more relays than we want to have edge connections, this request will itself be relayed to the target source via one of our edges. As we can't trust our edge to do this honestly, the responses are signed by the source relay. TODO: the responses from all (12) relays are collected, then processed together. The reconciliation of their responses is not yet implemented. TODO: the source selection for this method obviously requires sources to begin with, but this is the method by which we learn of those...bootstrapping is still a bit in-progress, and will need to be finished for this. TODO: make Router call this periodically, as with RC fetching. --- llarp/link/link_manager.cpp | 116 +++++++++++++++++++++++++++++++++++ llarp/link/link_manager.hpp | 6 ++ llarp/messages/router_id.hpp | 17 +++++ llarp/nodedb.cpp | 88 +++++++++++++++++++++++++- llarp/nodedb.hpp | 12 +++- 5 files changed, 235 insertions(+), 4 deletions(-) create mode 100644 llarp/messages/router_id.hpp diff --git a/llarp/link/link_manager.cpp b/llarp/link/link_manager.cpp index a42d98b92..71fd79e38 100644 --- a/llarp/link/link_manager.cpp +++ b/llarp/link/link_manager.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include +#include #include namespace llarp @@ -544,6 +546,120 @@ namespace llarp } } + void + LinkManager::fetch_router_ids(const RouterID& source) + { + if (ep.conns.empty()) + { + log::debug(link_cat, "Not attempting to fetch Router IDs: not connected to any relays."); + return; + } + // TODO: randomize? Also, keep track of successful responses and drop this edge + // if not many come back successfully. + RouterID edge = ep.conns.begin()->first; + send_control_message( + edge, + "fetch_router_ids"s, + RouterIDFetch::serialize(source), + [this, source = source, edge = std::move(edge)](oxen::quic::message m) { + if (not m) + { + log::info( + link_cat, + "Error fetching RouterIDs from source \"{}\" via edge \"{}\"", + source, + edge); + node_db->ingest_router_ids(edge, {}); // empty response == failure + return; + } + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + btdc.required("routers"); + auto router_id_strings = btdc.consume_list>(); + btdc.require_signature("signature", [&edge](ustring_view msg, ustring_view sig) { + if (sig.size() != 64) + throw std::runtime_error{"Invalid signature: not 64 bytes"}; + if (not crypto::verify(edge, msg, sig)) + throw std::runtime_error{ + "Failed to verify signature for fetch RouterIDs response."}; + }); + std::vector router_ids; + for (const auto& s : router_id_strings) + { + if (s.size() != RouterID::SIZE) + { + log::warning(link_cat, "Got bad RouterID from edge \"{}\".", edge); + return; + } + router_ids.emplace_back(s.data()); + } + node_db->ingest_router_ids(edge, std::move(router_ids)); + return; + } + catch (const std::exception& e) + { + log::info(link_cat, "Error handling fetch RouterIDs response: {}", e.what()); + } + node_db->ingest_router_ids(edge, {}); // empty response == failure + }); + } + + void + LinkManager::handle_fetch_router_ids(oxen::quic::message m) + { + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + + auto source = btdc.require("source"); + + // if bad request, silently fail + if (source.size() != RouterID::SIZE) + return; + + const auto source_rid = RouterID{reinterpret_cast(source.data())}; + const auto our_rid = RouterID{router().pubkey()}; + + if (source_rid == our_rid) + { + oxenc::bt_dict_producer btdp; + { + auto btlp = btdp.append_list("routers"); + for (const auto& relay : node_db->whitelist()) + { + btlp.append(relay.ToView()); + } + } + btdp.append_signature("signature", [this](ustring_view to_sign) { + std::array sig; + + if (!crypto::sign(const_cast(sig.data()), _router.identity(), to_sign)) + throw std::runtime_error{"Failed to sign fetch RouterIDs response"}; + + return sig; + }); + m.respond(std::move(btdp).str()); + return; + } + + send_control_message( + source_rid, + "fetch_router_ids"s, + m.body_str(), + [source_rid = std::move(source_rid), + orig_mess = std::move(m)](oxen::quic::message m) mutable { + if (not m.timed_out) + orig_mess.respond(m.body_str()); + // on timeout, just silently drop (as original requester will just time out anyway) + }); + } + catch (const std::exception& e) + { + log::info(link_cat, "Error fulfilling fetch RouterIDs request: {}", e.what()); + } + } + bool LinkManager::have_connection_to(const RouterID& remote, bool client_only) const { diff --git a/llarp/link/link_manager.hpp b/llarp/link/link_manager.hpp index 219cd7ca8..899b25af4 100644 --- a/llarp/link/link_manager.hpp +++ b/llarp/link/link_manager.hpp @@ -232,6 +232,12 @@ namespace llarp void handle_fetch_rcs(oxen::quic::message m); + void + fetch_router_ids(const RouterID& source); + + void + handle_fetch_router_ids(oxen::quic::message m); + bool have_connection_to(const RouterID& remote, bool client_only = false) const; diff --git a/llarp/messages/router_id.hpp b/llarp/messages/router_id.hpp new file mode 100644 index 000000000..dbdd897de --- /dev/null +++ b/llarp/messages/router_id.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include "common.hpp" + +namespace llarp::RouterIDFetch +{ + inline constexpr auto INVALID_REQUEST = "Invalid relay ID requested to relay response from."sv; + + inline static std::string + serialize(const RouterID& source) + { + // serialize_response is a bit weird here, and perhaps could have a sister function + // with the same purpose but as a request, but...it works. + return messages::serialize_response({{"source", source.ToView()}}); + } + +} // namespace llarp::RouterIDFetch diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index 9fbf8b4ad..3b5e6784c 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -114,7 +114,27 @@ namespace llarp void NodeDB::rotate_rc_source() - {} + { + auto conn_count = router.link_manager().get_num_connected(); + if (conn_count == 0) + { + // not connected to any nodes yet, so no sensible source + return; + } + RemoteRC new_source{}; + router.link_manager().get_random_connected(new_source); + if (conn_count == 1) + { + // only one connection, use it + rc_fetch_source = new_source.router_id(); + } + + while (new_source.router_id() == rc_fetch_source) + { + router.link_manager().get_random_connected(new_source); + } + rc_fetch_source = new_source.router_id(); + } // TODO: trust model void @@ -145,7 +165,16 @@ namespace llarp router_id_response_count++; if (router_id_response_count == router_id_fetch_sources.size()) { - // TODO: reconcile all the responses + // TODO: reconcile all the responses, for now just insert all + for (const auto& [rid, responses] : router_id_fetch_responses) + { + // TODO: empty == failure, handle that case + for (const auto& response : responses) + { + client_known_routers.insert(std::move(response)); + } + } + router_id_fetch_in_progress = false; } } @@ -166,6 +195,61 @@ namespace llarp rc_fetch_source, last_rc_update_relay_timestamp, std::move(needed)); } + void + NodeDB::fetch_router_ids() + { + if (router_id_fetch_in_progress) + return; + if (router_id_fetch_sources.empty()) + select_router_id_sources({}); + + // if we *still* don't have fetch sources, we can't exactly fetch... + if (router_id_fetch_sources.empty()) + { + log::info(logcat, "Attempting to fetch RouterIDs, but have no source from which to do so."); + return; + } + + router_id_fetch_in_progress = true; + router_id_response_count = 0; + router_id_fetch_responses.clear(); + for (const auto& rid : router_id_fetch_sources) + router.link_manager().fetch_router_ids(rid); + } + + void + NodeDB::select_router_id_sources(std::unordered_set excluded) + { + // TODO: bootstrapping should be finished before this is called, so this + // shouldn't happen; need to make sure that's the case. + if (client_known_routers.empty()) + return; + + // keep using any we've been using, but remove `excluded` ones + for (const auto& r : excluded) + router_id_fetch_sources.erase(r); + + // only know so many routers, so no need to randomize + if (client_known_routers.size() <= (ROUTER_ID_SOURCE_COUNT + excluded.size())) + { + for (const auto& r : client_known_routers) + { + if (excluded.count(r)) + continue; + router_id_fetch_sources.insert(r); + } + } + + // select at random until we have chosen enough + while (router_id_fetch_sources.size() < ROUTER_ID_SOURCE_COUNT) + { + RouterID r; + std::sample(client_known_routers.begin(), client_known_routers.end(), &r, 1, csrng); + if (excluded.count(r) == 0) + router_id_fetch_sources.insert(r); + } + } + void NodeDB::set_router_whitelist( const std::vector& whitelist, diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index 58eea3b33..8146c9c71 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -54,7 +54,7 @@ namespace llarp std::unordered_map last_rc_update_times; // Router list for clients - std::unordered_set client_known_rcs; + std::unordered_set client_known_routers; // only ever use to specific edges as path first-hops std::unordered_set pinned_edges; @@ -62,10 +62,12 @@ namespace llarp // rc update info RouterID rc_fetch_source; rc_time last_rc_update_relay_timestamp; + static constexpr auto ROUTER_ID_SOURCE_COUNT = 12; std::unordered_set router_id_fetch_sources; std::unordered_map> router_id_fetch_responses; // process responses once all are received (or failed/timed out) size_t router_id_response_count{0}; + bool router_id_fetch_in_progress{false}; bool want_rc(const RouterID& rid) const; @@ -104,7 +106,7 @@ namespace llarp return last_rc_update_times; } - // If we receive a set of RCs from our current RC source relay, we consider + // If we receive a bad set of RCs from our current RC source relay, we consider // that relay to be a bad source of RCs and we randomly choose a new one. // // When using a new RC fetch relay, we first re-fetch the full RC list and, if @@ -124,6 +126,12 @@ namespace llarp void update_rcs(); + void + fetch_router_ids(); + + void + select_router_id_sources(std::unordered_set excluded); + void set_router_whitelist( const std::vector& whitelist, From b353fd4095ad67f7123597981ac5d354d477c32a Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Mon, 27 Nov 2023 11:31:43 -0500 Subject: [PATCH 3/7] Minor RC load/store/prune fixups --- llarp/nodedb.cpp | 111 ++++++++++++++++++-------------- llarp/nodedb.hpp | 7 +- llarp/router_contact_remote.cpp | 18 +++--- 3 files changed, 75 insertions(+), 61 deletions(-) diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index 3b5e6784c..d8e16e49d 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -109,7 +109,9 @@ namespace llarp { bootstraps.clear(); // this function really shouldn't be called more than once, but... for (const auto& rc : rcs) + { bootstraps.emplace(rc.router_id(), rc); + } } void @@ -315,57 +317,56 @@ namespace llarp if (m_Root.empty()) return; - router.loop()->call([this]() { - std::set purge; + std::set purge; - for (const char& ch : skiplist_subdirs) - { - if (!ch) - continue; - std::string p; - p += ch; - fs::path sub = m_Root / p; - - llarp::util::IterDir(sub, [&](const fs::path& f) -> bool { - // skip files that are not suffixed with .signed - if (not(fs::is_regular_file(f) and f.extension() == RC_FILE_EXT)) - return true; - - RemoteRC rc{}; - - if (not rc.read(f)) - { - // try loading it, purge it if it is junk - purge.emplace(f); - return true; - } - - if (rc.is_expired(time_now_ms())) - { - // rc expired dont load it and purge it later - purge.emplace(f); - return true; - } - - // validate signature and purge known_rcs with invalid signatures - // load ones with valid signatures - if (rc.verify()) - known_rcs.emplace(rc.router_id(), rc); - else - purge.emplace(f); + const auto now = time_now_ms(); + + for (const char& ch : skiplist_subdirs) + { + if (!ch) + continue; + std::string p; + p += ch; + fs::path sub = m_Root / p; + llarp::util::IterDir(sub, [&](const fs::path& f) -> bool { + // skip files that are not suffixed with .signed + if (not(fs::is_regular_file(f) and f.extension() == RC_FILE_EXT)) return true; - }); - } - if (not purge.empty()) - { - log::warning(logcat, "removing {} invalid RCs from disk", purge.size()); + RemoteRC rc{}; - for (const auto& fpath : purge) - fs::remove(fpath); - } - }); + if (not rc.read(f)) + { + // try loading it, purge it if it is junk + purge.emplace(f); + return true; + } + + if (rc.is_expired(now)) + { + // rc expired dont load it and purge it later + purge.emplace(f); + return true; + } + + known_rcs.emplace(rc.router_id(), rc); + // TODO: the list of relays should be maintained and stored separately from + // the RCs, as we keep older RCs around in case we go offline and need to + // bootstrap, but they shouldn't be in the "good relays" list. + client_known_routers.insert(rc.router_id()); + + return true; + }); + } + + if (not purge.empty()) + { + log::warning(logcat, "removing {} invalid RCs from disk", purge.size()); + + for (const auto& fpath : purge) + fs::remove(fpath); + } } void @@ -407,11 +408,21 @@ namespace llarp } void - NodeDB::remove_stale_rcs(std::unordered_set keep, llarp_time_t cutoff) + NodeDB::remove_stale_rcs() { - (void)keep; - (void)cutoff; - // TODO: handling of "stale" is pending change, removing here for now. + auto cutoff_time = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + cutoff_time -= router.is_service_node() ? RouterContact::OUTDATED_AGE : RouterContact::LIFETIME; + for (auto itr = known_rcs.begin(); itr != known_rcs.end();) + { + if (cutoff_time > itr->second.timestamp()) + { + log::info(logcat, "Pruning RC for {}, as it is too old to keep.", itr->first); + known_rcs.erase(itr); + continue; + } + itr++; + } } bool diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index 8146c9c71..a411df4f1 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -271,9 +271,12 @@ namespace llarp }); } - /// remove rcs that are not in keep and have been inserted before cutoff + /// remove rcs that are older than we want to keep. For relays, this is when + /// they become "outdated" (i.e. 12hrs). Clients will hang on to them until + /// they are fully "expired" (i.e. 30 days), as the client may go offline for + /// some time and can still try to use those RCs to re-learn the network. void - remove_stale_rcs(std::unordered_set keep, llarp_time_t cutoff); + remove_stale_rcs(); /// put (or replace) the RC if we consider it valid (want_rc). returns true if put. bool diff --git a/llarp/router_contact_remote.cpp b/llarp/router_contact_remote.cpp index 8cb127d4c..0d7eeb05b 100644 --- a/llarp/router_contact_remote.cpp +++ b/llarp/router_contact_remote.cpp @@ -52,8 +52,8 @@ namespace llarp if (sig.size() != 64) throw std::runtime_error{"Invalid signature: not 64 bytes"}; - if (is_expired(time_now_ms()) and reject_expired) - throw std::runtime_error{"Unable to verify expired RemoteRC!"}; + if (reject_expired and is_expired(time_now_ms())) + throw std::runtime_error{"Rejecting expired RemoteRC!"}; // TODO: revisit if this is needed; detail from previous implementation const auto* net = net::Platform::Default_ptr(); @@ -79,19 +79,19 @@ namespace llarp try { util::file_to_buffer(fname, buf.data(), MAX_RC_SIZE); + + oxenc::bt_dict_consumer btdc{buf}; + bt_load(btdc); + bt_verify(btdc); + + _payload = buf; } catch (const std::exception& e) { - log::error(logcat, "Failed to read RC from {}: {}", fname, e.what()); + log::error(logcat, "Failed to read or validate RC from {}: {}", fname, e.what()); return false; } - oxenc::bt_dict_consumer btdc{buf}; - bt_load(btdc); - bt_verify(btdc); - - _payload = buf; - return true; } From 14b9aa20eed459aa840e20b78eb22a5429bcbb1f Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Mon, 27 Nov 2023 11:46:14 -0500 Subject: [PATCH 4/7] (client) periodically fetch RouterIDs from peers --- llarp/nodedb.cpp | 2 +- llarp/nodedb.hpp | 2 +- llarp/router/router.cpp | 18 ++++++++++++++---- llarp/router/router.hpp | 3 +++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index d8e16e49d..d285b7900 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -181,7 +181,7 @@ namespace llarp } void - NodeDB::update_rcs() + NodeDB::fetch_rcs() { std::vector needed; diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index a411df4f1..c1d49b3ea 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -124,7 +124,7 @@ namespace llarp ingest_router_ids(RouterID source, std::vector ids); void - update_rcs(); + fetch_rcs(); void fetch_router_ids(); diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index 1ac96bb7d..7a23a3748 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -835,11 +835,21 @@ namespace llarp next_rc_gossip = now_timepoint + RouterContact::STALE_AGE - random_delta; } - // (client-only) periodically fetch updated RCs - if (now_timepoint - last_rc_fetch > RC_UPDATE_INTERVAL) + if (not is_snode) { - node_db()->update_rcs(); - last_rc_fetch = now_timepoint; + // (client-only) periodically fetch updated RCs + if (now_timepoint - last_rc_fetch > RC_UPDATE_INTERVAL) + { + node_db()->fetch_rcs(); + last_rc_fetch = now_timepoint; + } + + // (client-only) periodically fetch updated RouterID list + if (now_timepoint - last_routerid_fetch > ROUTERID_UPDATE_INTERVAL) + { + node_db()->fetch_router_ids(); + last_routerid_fetch = now_timepoint; + } } // remove RCs for nodes that are no longer allowed by network policy diff --git a/llarp/router/router.hpp b/llarp/router/router.hpp index cf2e60f7f..cdd6da7ab 100644 --- a/llarp/router/router.hpp +++ b/llarp/router/router.hpp @@ -58,6 +58,7 @@ namespace llarp (INTROSET_RELAY_REDUNDANCY * INTROSET_REQS_PER_RELAY); static const std::chrono::seconds RC_UPDATE_INTERVAL = 5min; + static const std::chrono::seconds ROUTERID_UPDATE_INTERVAL = 1h; struct Contacts; @@ -133,6 +134,8 @@ namespace llarp std::chrono::system_clock::time_point last_rc_fetch{ std::chrono::system_clock::time_point::min()}; + std::chrono::system_clock::time_point last_routerid_fetch{ + std::chrono::system_clock::time_point::min()}; // should we be sending padded messages every interval? bool send_padding = false; From dab1e06892f70583ae6ee077ca9551ac7d759474 Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Mon, 27 Nov 2023 13:42:39 -0500 Subject: [PATCH 5/7] FindRouterMessage is gone, also clang-format --- llarp/link/link_manager.cpp | 6 ++--- llarp/messages/dht.hpp | 45 ------------------------------------- 2 files changed, 2 insertions(+), 49 deletions(-) diff --git a/llarp/link/link_manager.cpp b/llarp/link/link_manager.cpp index 71fd79e38..e555e0697 100644 --- a/llarp/link/link_manager.cpp +++ b/llarp/link/link_manager.cpp @@ -500,8 +500,7 @@ namespace llarp { log::info( link_cat, "Remote requested too many relay IDs (greater than 1/4 of what we have)."); - m.respond( - serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + m.respond(serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); return; } @@ -510,8 +509,7 @@ namespace llarp { if (sv.size() != RouterID::SIZE) { - m.respond(serialize_response( - {{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + m.respond(serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); return; } explicit_relays.emplace(reinterpret_cast(sv.data())); diff --git a/llarp/messages/dht.hpp b/llarp/messages/dht.hpp index e86d1f153..2a0e38805 100644 --- a/llarp/messages/dht.hpp +++ b/llarp/messages/dht.hpp @@ -4,51 +4,6 @@ namespace llarp { - namespace FindRouterMessage - { - inline auto RETRY_EXP = "RETRY AS EXPLORATORY"sv; - inline auto RETRY_ITER = "RETRY AS ITERATIVE"sv; - inline auto RETRY_NEW = "RETRY WITH NEW RECIPIENT"sv; - - inline static std::string - serialize(const RouterID& rid, bool is_iterative, bool is_exploratory) - { - oxenc::bt_dict_producer btdp; - - try - { - btdp.append("E", is_exploratory ? 1 : 0); - btdp.append("I", is_iterative ? 1 : 0); - btdp.append("K", rid.ToView()); - } - catch (...) - { - log::error(link_cat, "Error: FindRouterMessage failed to bt encode contents!"); - } - - return std::move(btdp).str(); - } - - inline static std::string - serialize(const std::string& rid, bool is_iterative, bool is_exploratory) - { - oxenc::bt_dict_producer btdp; - - try - { - btdp.append("E", is_exploratory ? 1 : 0); - btdp.append("I", is_iterative ? 1 : 0); - btdp.append("K", std::move(rid)); - } - catch (...) - { - log::error(link_cat, "Error: FindRouterMessage failed to bt encode contents!"); - } - - return std::move(btdp).str(); - } - } // namespace FindRouterMessage - namespace FindIntroMessage { inline auto NOT_FOUND = "NOT FOUND"sv; From 70d2052cd4e5785abcfdfdcd93569218670a5b32 Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Mon, 27 Nov 2023 14:05:11 -0500 Subject: [PATCH 6/7] minor fixups --- llarp/link/link_manager.cpp | 24 ++++++++++++------------ llarp/messages/rc.hpp | 8 +++++--- llarp/nodedb.cpp | 2 +- llarp/nodedb.hpp | 2 +- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/llarp/link/link_manager.cpp b/llarp/link/link_manager.cpp index e555e0697..77f699740 100644 --- a/llarp/link/link_manager.cpp +++ b/llarp/link/link_manager.cpp @@ -447,18 +447,18 @@ namespace llarp log::info(link_cat, "RC Fetch to {} timed out", source); return; } - if (not m) - { - log::info(link_cat, "RC Fetch to {} returned error.", source); - return; - } - try { oxenc::bt_dict_consumer btdc{m.body()}; - btdc.required("rcs"); - auto btlc = btdc.consume_list_consumer(); - auto timestamp = rc_time{std::chrono::seconds{btdc.require("time")}}; + if (not m) + { + auto reason = btdc.require(messages::STATUS_KEY); + log::info(link_cat, "RC Fetch to {} returned error: {}", source, reason); + return; + } + + auto btlc = btdc.require("rcs"sv); + auto timestamp = rc_time{std::chrono::seconds{btdc.require("time"sv)}}; std::vector rcs; while (not btlc.is_finished()) @@ -473,7 +473,7 @@ namespace llarp catch (const std::exception& e) { // TODO: Inform NodeDB of failure (perhaps just a call to rotate_rc_source()) - log::info(link_cat, "Failed to parse RC Fetch response from {}", source); + log::info(link_cat, "Failed to parse RC Fetch response from {}: {}", source, e.what()); return; } }); @@ -518,7 +518,7 @@ namespace llarp oxenc::bt_dict_producer resp; { - auto btlp = resp.append_list("rcs"); + auto rc_bt_list = resp.append_list("rcs"); const auto& last_time = node_db->get_last_rc_update_times(); @@ -529,7 +529,7 @@ namespace llarp for (const auto& [_, rc] : rcs) { if (last_time.at(rc.router_id()) > since_time or explicit_relays.count(rc.router_id())) - btlp.append_encoded(rc.view()); + rc_bt_list.append_encoded(rc.view()); } } diff --git a/llarp/messages/rc.hpp b/llarp/messages/rc.hpp index 3aafc5e4c..d9eb2ab82 100644 --- a/llarp/messages/rc.hpp +++ b/llarp/messages/rc.hpp @@ -14,9 +14,11 @@ namespace llarp::RCFetchMessage try { btdp.append("since", since.time_since_epoch() / 1s); - auto id_list = btdp.append_list("explicit_ids"); - for (const auto& rid : explicit_ids) - id_list.append(rid.ToView()); + { + auto id_list = btdp.append_list("explicit_ids"); + for (const auto& rid : explicit_ids) + id_list.append(rid.ToView()); + } } catch (...) { diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index d285b7900..f5e7018fc 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -203,7 +203,7 @@ namespace llarp if (router_id_fetch_in_progress) return; if (router_id_fetch_sources.empty()) - select_router_id_sources({}); + select_router_id_sources(); // if we *still* don't have fetch sources, we can't exactly fetch... if (router_id_fetch_sources.empty()) diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index c1d49b3ea..f4c3b629d 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -130,7 +130,7 @@ namespace llarp fetch_router_ids(); void - select_router_id_sources(std::unordered_set excluded); + select_router_id_sources(std::unordered_set excluded = {}); void set_router_whitelist( From 2b2d8c51a139d7001912dc59d5ae03a6e30f8bde Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Mon, 27 Nov 2023 19:07:29 -0500 Subject: [PATCH 7/7] rework rotate_rc_source function, just for 'normal operation' now --- llarp/nodedb.cpp | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index f5e7018fc..7861b0041 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -114,23 +114,52 @@ namespace llarp } } + /// Called in normal operation when the relay we fetched RCs from gives either a "bad" + /// response or a timeout. Attempts to switch to a new relay as our RC source, using + /// existing connections if possible, and respecting pinned edges. void NodeDB::rotate_rc_source() { auto conn_count = router.link_manager().get_num_connected(); + + // This function makes no sense to be called if we have no connections... if (conn_count == 0) - { - // not connected to any nodes yet, so no sensible source - return; - } + throw std::runtime_error{"Called rotate_rc_source with no connections, does not make sense!"}; + + // We should not be in this function if client_known_routers isn't populated + if (client_known_routers.size() <= 1) + throw std::runtime_error{"Cannot rotate RC source without RC source(s) to rotate to!"}; + RemoteRC new_source{}; router.link_manager().get_random_connected(new_source); if (conn_count == 1) { - // only one connection, use it - rc_fetch_source = new_source.router_id(); + // if we only have one connection, it must be current rc fetch source + assert(new_source.router_id() == rc_fetch_source); + + if (pinned_edges.size() == 1) + { + // only one pinned edge set, use it even though it gave unsatisfactory RCs + assert(rc_fetch_source == *(pinned_edges.begin())); + log::warning( + logcat, + "Single pinned edge {} gave bad RC response; still using it despite this.", + rc_fetch_source); + return; + } + + // only one connection, choose a new relay to connect to for rc fetching + + RouterID r = rc_fetch_source; + while (r == rc_fetch_source) + { + std::sample(client_known_routers.begin(), client_known_routers.end(), &r, 1, csrng); + } + rc_fetch_source = std::move(r); + return; } + // choose one of our other existing connections to use as the RC fetch source while (new_source.router_id() == rc_fetch_source) { router.link_manager().get_random_connected(new_source);