adding kademlia explicitly

macdev
quadrismegistus 4 years ago
parent e9885c5360
commit 73432aea41

@ -5,6 +5,13 @@ DEFAULT_SCREEN='feed'
HORIZONTAL = False
WINDOW_SIZE = (1136,640) if HORIZONTAL else (640,1136)
# monkeypatching the things that asyncio needs
import subprocess
subprocess.PIPE = -1 # noqa
subprocess.STDOUT = -2 # noqa
subprocess.DEVNULL = -3 # noqa
import asyncio
import os
os.environ['KIVY_EVENTLOOP'] = 'async'
@ -45,7 +52,8 @@ import sys
sys.path.append("..") # Adds higher directory to python modules path.
from p2p import p2p,crypto,api
from kivy.event import EventDispatcher
import threading,asyncio
import threading,asyncio,sys
Window.size = WINDOW_SIZE
@ -152,6 +160,12 @@ class MainApp(MDApp):
def __init__(self, **kwargs):
super().__init__(**kwargs)
# start looping
# self.log('PATH',sys.path)
# sys.path.append('./p2p')
self.event_loop_worker = None
self.loop=asyncio.get_event_loop()

@ -1,11 +1,21 @@
import os,time
import os,time,sys,logging
from pathlib import Path
import asyncio
# handler = logging.StreamHandler()
# formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
# handler.setFormatter(formatter)
# logger = logging.getLogger(__file__)
# logger.addHandler(handler)
# logger.setLevel(logging.DEBUG)
sys.path.append('../p2p')
# logger.info(os.getcwd(), sys.path)
try:
from .crypto import *
from .p2p import *
from .kad import *
except ImportError:
except ModuleNotFoundError:
from crypto import *
from p2p import *
from kad import KadServer
@ -23,7 +33,7 @@ UPLOAD_DIR = 'uploads/'
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
# PORT_SPEAK = 8468
PORT_LISTEN = 8468
PORT_LISTEN = 5637
# Api Functions
from threading import Thread
@ -61,28 +71,28 @@ class Api(object):
# self.selfless.start()
# connect?
self._node=self.connect()
#self._node=self.connect()
pass
@property
def node(self):
if not hasattr(self,'_node'):
self._node=self.connect()
return self._node
# @property
# def node(self):
# if not hasattr(self,'_node'):
# self._node=self.connect()
# return self._node
def connect(self,port=PORT_LISTEN):
self.log('connecting...')
async def _connect():
return await _getdb(self,port)
return asyncio.run(_connect())
# def connect(self,port=PORT_LISTEN):
# self.log('connecting...')
# async def _connect():
# return await _getdb(self,port)
# return asyncio.run(_connect())
def get(self,key_or_keys):
async def _get():
# self.log('async _get()',self.node)
#node=await _getdb(self,PORT_LISTEN+1)
node=self.node
node=await _getdb(self)
#node=self.node
if type(key_or_keys) in {list,tuple,dict}:
keys = key_or_keys
@ -93,7 +103,7 @@ class Api(object):
key = key_or_keys
res = await node.get(key)
# node.stop()
node.stop()
return res
return asyncio.run(_get())
@ -112,16 +122,16 @@ class Api(object):
return None if res is None else json.loads(res)
def set(self,key_or_keys,value_or_values):
async def _set():
async def _go():
# self.log('async _set()',self.node)
# node=self.node
#node=await _getdb(self,PORT_LISTEN+1)
node=self.node
node=await _getdb(self)
# node=self.node
if type(key_or_keys) in {list,tuple,dict}:
keys = key_or_keys
values = value_or_values
self.log('# keys and values?',len(keys),len(values))
assert len(keys)==len(values)
res = await asyncio.gather(*[node.set(key,value) for key,value in zip(keys,values)])
# self.log('RES?',res)
@ -130,13 +140,38 @@ class Api(object):
value = value_or_values
res = await node.set(key,value) #'this is a test')
#node.stop()
node.stop()
return res
# loop=asyncio.get_event_loop()
# loop.create_task(_set())
return asyncio.run(_set(), debug=True)
# async def _set(key,value):
# import asyncio
# from kademlia.network import Server
# # Create a node and start listening on port 5678
# node = Server()
# await node.listen(5678)
# # Bootstrap the node by connecting to other known nodes, in this case
# # replace 123.123.123.123 with the IP of another node and optionally
# # give as many ip/port combos as you can for other nodes.
# await node.bootstrap(NODES_PRIME)
# # set a value for the key "my-key" on the network
# await node.set(key, value)
# # get the value associated with "my-key" from the network
# result = await node.get(key)
# print(result)
# return result
res= asyncio.run(_go(), debug=True)#
# res= asyncio.run(_set(key_or_keys,value_or_values), debug=True)#
print('res = ',res)
return res
def set_json(self,key,value):
value_json = jsonify(value)
@ -292,11 +327,11 @@ class Api(object):
self.log('Api.post() got data back from set_json():',res)
# ## add to channels
# self.append_json('/posts/channel/earth', post_id)
self.append_json('/posts/channel/earth', post_id)
# ## add to user
# un=data.get('author')
# if un: self.append_json('/posts/author/'+un, post_id)
un=data.get('author')
if un: self.append_json('/posts/author/'+un, post_id)
if res:
return {'success':'Posted! %s' % post_id, 'post_id':post_id}
@ -469,22 +504,68 @@ def bytes_from_file(filename,chunksize=8192):
# bufsize = io.DEFAULT_BUFFER_SIZE
# return bufsize
def test_api():
api = Api()
# api.set(['a','b','c'],[1,2,3])
api.set_json('whattttt',{'aaaaa':12222})
def test_basic():
import asyncio
from kademlia.network import Server
def test():
#api = Api()
# not working!
#api.set_json('my key',{'a':'value'})
# direct node
node = asyncio.run(_getdb(None, port=8368))
print(node)
#async def set(node,key,value):
# await node.set(key,value)
key='blah blah'
value='blee blee'
res = asyncio.run(node.set(key,value))
print(res)
async def run():
# Create a node and start listening on port 5678
node = Server()
await node.listen(5678)
# Bootstrap the node by connecting to other known nodes, in this case
# replace 123.123.123.123 with the IP of another node and optionally
# give as many ip/port combos as you can for other nodes.
await node.bootstrap(NODES_PRIME)
# set a value for the key "my-key" on the network
await node.set("my-key", "my awesome value")
# get the value associated with "my-key" from the network
result = await node.get("my-key")
print(result)
return result
res = asyncio.run(run())
print('res = ',res)
# res = asyncio.run(node.set(key,value))
# print(res)
def test_provided_eg():
import asyncio
from kademlia.network import Server
async def run():
# Create a node and start listening on port 5678
node = Server()
await node.listen(5678)
# Bootstrap the node by connecting to other known nodes, in this case
# replace 123.123.123.123 with the IP of another node and optionally
# give as many ip/port combos as you can for other nodes.
await node.bootstrap(NODES_PRIME)
# set a value for the key "my-key" on the network
await node.set("my-key", "my awesome value")
# get the value associated with "my-key" from the network
result = await node.get("my-key")
print(result)
asyncio.run(run())
if __name__=='__main__':
test()
test_api()

@ -0,0 +1,5 @@
"""
Kademlia is a Python implementation of the Kademlia protocol which
utilizes the asyncio library.
"""
__version__ = "2.2.1"

@ -0,0 +1,183 @@
from collections import Counter
import logging
from kademlia.node import Node, NodeHeap
from kademlia.utils import gather_dict
log = logging.getLogger(__name__) # pylint: disable=invalid-name
# pylint: disable=too-few-public-methods
class SpiderCrawl:
"""
Crawl the network and look for given 160-bit keys.
"""
def __init__(self, protocol, node, peers, ksize, alpha):
"""
Create a new C{SpiderCrawl}er.
Args:
protocol: A :class:`~kademlia.protocol.KademliaProtocol` instance.
node: A :class:`~kademlia.node.Node` representing the key we're
looking for
peers: A list of :class:`~kademlia.node.Node` instances that
provide the entry point for the network
ksize: The value for k based on the paper
alpha: The value for alpha based on the paper
"""
self.protocol = protocol
self.ksize = ksize
self.alpha = alpha
self.node = node
self.nearest = NodeHeap(self.node, self.ksize)
self.last_ids_crawled = []
log.info("creating spider with peers: %s", peers)
self.nearest.push(peers)
async def _find(self, rpcmethod):
"""
Get either a value or list of nodes.
Args:
rpcmethod: The protocol's callfindValue or call_find_node.
The process:
1. calls find_* to current ALPHA nearest not already queried nodes,
adding results to current nearest list of k nodes.
2. current nearest list needs to keep track of who has been queried
already sort by nearest, keep KSIZE
3. if list is same as last time, next call should be to everyone not
yet queried
4. repeat, unless nearest list has all been queried, then ur done
"""
log.info("crawling network with nearest: %s", str(tuple(self.nearest)))
count = self.alpha
if self.nearest.get_ids() == self.last_ids_crawled:
count = len(self.nearest)
self.last_ids_crawled = self.nearest.get_ids()
dicts = {}
for peer in self.nearest.get_uncontacted()[:count]:
dicts[peer.id] = rpcmethod(peer, self.node)
self.nearest.mark_contacted(peer)
found = await gather_dict(dicts)
return await self._nodes_found(found)
async def _nodes_found(self, responses):
raise NotImplementedError
class ValueSpiderCrawl(SpiderCrawl):
def __init__(self, protocol, node, peers, ksize, alpha):
SpiderCrawl.__init__(self, protocol, node, peers, ksize, alpha)
# keep track of the single nearest node without value - per
# section 2.3 so we can set the key there if found
self.nearest_without_value = NodeHeap(self.node, 1)
async def find(self):
"""
Find either the closest nodes or the value requested.
"""
return await self._find(self.protocol.call_find_value)
async def _nodes_found(self, responses):
"""
Handle the result of an iteration in _find.
"""
toremove = []
found_values = []
for peerid, response in responses.items():
response = RPCFindResponse(response)
if not response.happened():
toremove.append(peerid)
elif response.has_value():
found_values.append(response.get_value())
else:
peer = self.nearest.get_node(peerid)
self.nearest_without_value.push(peer)
self.nearest.push(response.get_node_list())
self.nearest.remove(toremove)
if found_values:
return await self._handle_found_values(found_values)
if self.nearest.have_contacted_all():
# not found!
return None
return await self.find()
async def _handle_found_values(self, values):
"""
We got some values! Exciting. But let's make sure
they're all the same or freak out a little bit. Also,
make sure we tell the nearest node that *didn't* have
the value to store it.
"""
value_counts = Counter(values)
if len(value_counts) != 1:
log.warning("Got multiple values for key %i: %s",
self.node.long_id, str(values))
value = value_counts.most_common(1)[0][0]
peer = self.nearest_without_value.popleft()
if peer:
await self.protocol.call_store(peer, self.node.id, value)
return value
class NodeSpiderCrawl(SpiderCrawl):
async def find(self):
"""
Find the closest nodes.
"""
return await self._find(self.protocol.call_find_node)
async def _nodes_found(self, responses):
"""
Handle the result of an iteration in _find.
"""
toremove = []
for peerid, response in responses.items():
response = RPCFindResponse(response)
if not response.happened():
toremove.append(peerid)
else:
self.nearest.push(response.get_node_list())
self.nearest.remove(toremove)
if self.nearest.have_contacted_all():
return list(self.nearest)
return await self.find()
class RPCFindResponse:
def __init__(self, response):
"""
A wrapper for the result of a RPC find.
Args:
response: This will be a tuple of (<response received>, <value>)
where <value> will be a list of tuples if not found or
a dictionary of {'value': v} where v is the value desired
"""
self.response = response
def happened(self):
"""
Did the other host actually respond?
"""
return self.response[0]
def has_value(self):
return isinstance(self.response[1], dict)
def get_value(self):
return self.response[1]['value']
def get_node_list(self):
"""
Get the node list in the response. If there's no value, this should
be set.
"""
nodelist = self.response[1] or []
return [Node(*nodeple) for nodeple in nodelist]

@ -0,0 +1,261 @@
"""
Package for interacting on the network at a high level.
"""
import random
import pickle
import asyncio
import logging
from kademlia.protocol import KademliaProtocol
from kademlia.utils import digest
from kademlia.storage import ForgetfulStorage
from kademlia.node import Node
from kademlia.crawling import ValueSpiderCrawl
from kademlia.crawling import NodeSpiderCrawl
log = logging.getLogger(__name__) # pylint: disable=invalid-name
# pylint: disable=too-many-instance-attributes
class Server:
"""
High level view of a node instance. This is the object that should be
created to start listening as an active node on the network.
"""
protocol_class = KademliaProtocol
def __init__(self, ksize=20, alpha=3, node_id=None, storage=None):
"""
Create a server instance. This will start listening on the given port.
Args:
ksize (int): The k parameter from the paper
alpha (int): The alpha parameter from the paper
node_id: The id for this node on the network.
storage: An instance that implements the interface
:class:`~kademlia.storage.IStorage`
"""
self.ksize = ksize
self.alpha = alpha
self.storage = storage or ForgetfulStorage()
self.node = Node(node_id or digest(random.getrandbits(255)))
self.transport = None
self.protocol = None
self.refresh_loop = None
self.save_state_loop = None
def stop(self):
if self.transport is not None:
self.transport.close()
if self.refresh_loop:
self.refresh_loop.cancel()
if self.save_state_loop:
self.save_state_loop.cancel()
def _create_protocol(self):
return self.protocol_class(self.node, self.storage, self.ksize)
async def listen(self, port, interface='0.0.0.0'):
"""
Start listening on the given port.
Provide interface="::" to accept ipv6 address
"""
loop = asyncio.get_event_loop()
listen = loop.create_datagram_endpoint(self._create_protocol,
local_addr=(interface, port))
log.info("Node %i listening on %s:%i",
self.node.long_id, interface, port)
self.transport, self.protocol = await listen
# finally, schedule refreshing table
self.refresh_table()
def refresh_table(self):
log.debug("Refreshing routing table")
asyncio.ensure_future(self._refresh_table())
loop = asyncio.get_event_loop()
self.refresh_loop = loop.call_later(3600, self.refresh_table)
async def _refresh_table(self):
"""
Refresh buckets that haven't had any lookups in the last hour
(per section 2.3 of the paper).
"""
results = []
for node_id in self.protocol.get_refresh_ids():
node = Node(node_id)
nearest = self.protocol.router.find_neighbors(node, self.alpha)
spider = NodeSpiderCrawl(self.protocol, node, nearest,
self.ksize, self.alpha)
results.append(spider.find())
# do our crawling
await asyncio.gather(*results)
# now republish keys older than one hour
for dkey, value in self.storage.iter_older_than(3600):
await self.set_digest(dkey, value)
def bootstrappable_neighbors(self):
"""
Get a :class:`list` of (ip, port) :class:`tuple` pairs suitable for
use as an argument to the bootstrap method.
The server should have been bootstrapped
already - this is just a utility for getting some neighbors and then
storing them if this server is going down for a while. When it comes
back up, the list of nodes can be used to bootstrap.
"""
neighbors = self.protocol.router.find_neighbors(self.node)
return [tuple(n)[-2:] for n in neighbors]
async def bootstrap(self, addrs):
"""
Bootstrap the server by connecting to other known nodes in the network.
Args:
addrs: A `list` of (ip, port) `tuple` pairs. Note that only IP
addresses are acceptable - hostnames will cause an error.
"""
log.debug("Attempting to bootstrap node with %i initial contacts",
len(addrs))
cos = list(map(self.bootstrap_node, addrs))
gathered = await asyncio.gather(*cos)
nodes = [node for node in gathered if node is not None]
spider = NodeSpiderCrawl(self.protocol, self.node, nodes,
self.ksize, self.alpha)
return await spider.find()
async def bootstrap_node(self, addr):
result = await self.protocol.ping(addr, self.node.id)
return Node(result[1], addr[0], addr[1]) if result[0] else None
async def get(self, key):
"""
Get a key if the network has it.
Returns:
:class:`None` if not found, the value otherwise.
"""
log.info("Looking up key %s", key)
dkey = digest(key)
# if this node has it, return it
if self.storage.get(dkey) is not None:
return self.storage.get(dkey)
node = Node(dkey)
nearest = self.protocol.router.find_neighbors(node)
if not nearest:
log.warning("There are no known neighbors to get key %s", key)
return None
spider = ValueSpiderCrawl(self.protocol, node, nearest,
self.ksize, self.alpha)
return await spider.find()
async def set(self, key, value):
"""
Set the given string key to the given value in the network.
"""
if not check_dht_value_type(value):
raise TypeError(
"Value must be of type int, float, bool, str, or bytes"
)
log.info("setting '%s' = '%s' on network", key, value)
dkey = digest(key)
return await self.set_digest(dkey, value)
async def set_digest(self, dkey, value):
"""
Set the given SHA1 digest key (bytes) to the given value in the
network.
"""
node = Node(dkey)
nearest = self.protocol.router.find_neighbors(node)
if not nearest:
log.warning("There are no known neighbors to set key %s",
dkey.hex())
return False
spider = NodeSpiderCrawl(self.protocol, node, nearest,
self.ksize, self.alpha)
nodes = await spider.find()
log.info("setting '%s' on %s", dkey.hex(), list(map(str, nodes)))
# if this node is close too, then store here as well
biggest = max([n.distance_to(node) for n in nodes])
if self.node.distance_to(node) < biggest:
self.storage[dkey] = value
results = [self.protocol.call_store(n, dkey, value) for n in nodes]
# return true only if at least one store call succeeded
return any(await asyncio.gather(*results))
def save_state(self, fname):
"""
Save the state of this node (the alpha/ksize/id/immediate neighbors)
to a cache file with the given fname.
"""
log.info("Saving state to %s", fname)
data = {
'ksize': self.ksize,
'alpha': self.alpha,
'id': self.node.id,
'neighbors': self.bootstrappable_neighbors()
}
if not data['neighbors']:
log.warning("No known neighbors, so not writing to cache.")
return
with open(fname, 'wb') as file:
pickle.dump(data, file)
@classmethod
async def load_state(cls, fname, port, interface='0.0.0.0'):
"""
Load the state of this node (the alpha/ksize/id/immediate neighbors)
from a cache file with the given fname and then bootstrap the node
(using the given port/interface to start listening/bootstrapping).
"""
log.info("Loading state from %s", fname)
with open(fname, 'rb') as file:
data = pickle.load(file)
svr = Server(data['ksize'], data['alpha'], data['id'])
await svr.listen(port, interface)
if data['neighbors']:
await svr.bootstrap(data['neighbors'])
return svr
def save_state_regularly(self, fname, frequency=600):
"""
Save the state of node with a given regularity to the given
filename.
Args:
fname: File name to save retularly to
frequency: Frequency in seconds that the state should be saved.
By default, 10 minutes.
"""
self.save_state(fname)
loop = asyncio.get_event_loop()
self.save_state_loop = loop.call_later(frequency,
self.save_state_regularly,
fname,
frequency)
def check_dht_value_type(value):
"""
Checks to see if the type of the value is a valid type for
placing in the dht.
"""
typeset = [
int,
float,
bool,
str,
bytes
]
return type(value) in typeset # pylint: disable=unidiomatic-typecheck

@ -0,0 +1,127 @@
from operator import itemgetter
import heapq
class Node:
"""
Simple object to encapsulate the concept of a Node (minimally an ID, but
also possibly an IP and port if this represents a node on the network).
This class should generally not be instantiated directly, as it is a low
level construct mostly used by the router.
"""
def __init__(self, node_id, ip=None, port=None):
"""
Create a Node instance.
Args:
node_id (int): A value between 0 and 2^160
ip (string): Optional IP address where this Node lives
port (int): Optional port for this Node (set when IP is set)
"""
self.id = node_id # pylint: disable=invalid-name
self.ip = ip # pylint: disable=invalid-name
self.port = port
self.long_id = int(node_id.hex(), 16)
def same_home_as(self, node):
return self.ip == node.ip and self.port == node.port
def distance_to(self, node):
"""
Get the distance between this node and another.
"""
return self.long_id ^ node.long_id
def __iter__(self):
"""
Enables use of Node as a tuple - i.e., tuple(node) works.
"""
return iter([self.id, self.ip, self.port])
def __repr__(self):
return repr([self.long_id, self.ip, self.port])
def __str__(self):
return "%s:%s" % (self.ip, str(self.port))
class NodeHeap:
"""
A heap of nodes ordered by distance to a given node.
"""
def __init__(self, node, maxsize):
"""
Constructor.
@param node: The node to measure all distnaces from.
@param maxsize: The maximum size that this heap can grow to.
"""
self.node = node
self.heap = []
self.contacted = set()
self.maxsize = maxsize
def remove(self, peers):
"""
Remove a list of peer ids from this heap. Note that while this
heap retains a constant visible size (based on the iterator), it's
actual size may be quite a bit larger than what's exposed. Therefore,
removal of nodes may not change the visible size as previously added
nodes suddenly become visible.
"""
peers = set(peers)
if not peers:
return
nheap = []
for distance, node in self.heap:
if node.id not in peers:
heapq.heappush(nheap, (distance, node))
self.heap = nheap
def get_node(self, node_id):
for _, node in self.heap:
if node.id == node_id:
return node
return None
def have_contacted_all(self):
return len(self.get_uncontacted()) == 0
def get_ids(self):
return [n.id for n in self]
def mark_contacted(self, node):
self.contacted.add(node.id)
def popleft(self):
return heapq.heappop(self.heap)[1] if self else None
def push(self, nodes):
"""
Push nodes onto heap.
@param nodes: This can be a single item or a C{list}.
"""
if not isinstance(nodes, list):
nodes = [nodes]
for node in nodes:
if node not in self:
distance = self.node.distance_to(node)
heapq.heappush(self.heap, (distance, node))
def __len__(self):
return min(len(self.heap), self.maxsize)
def __iter__(self):
nodes = heapq.nsmallest(self.maxsize, self.heap)
return iter(map(itemgetter(1), nodes))
def __contains__(self, node):
for _, other in self.heap:
if node.id == other.id:
return True
return False
def get_uncontacted(self):
return [n for n in self if n.id not in self.contacted]

@ -0,0 +1,177 @@
import random
import asyncio
import logging
from rpcudp.protocol import RPCProtocol
from kademlia.node import Node
from kademlia.routing import RoutingTable
from kademlia.utils import digest
log = logging.getLogger(__name__) # pylint: disable=invalid-name
#### PROXY PROTOCOL
class ProxyDatagramProtocol(asyncio.DatagramProtocol):
def __init__(self, remote_address):
self.remote_address = remote_address
self.remotes = {}
super().__init__()
def connection_made(self, transport):
self.transport = transport
def datagram_received(self, data, addr):
if addr in self.remotes:
self.remotes[addr].transport.sendto(data)
return
loop = asyncio.get_event_loop()
self.remotes[addr] = RemoteDatagramProtocol(self, addr, data)
coro = loop.create_datagram_endpoint(
lambda: self.remotes[addr], remote_addr=self.remote_address)
asyncio.ensure_future(coro)
class RemoteDatagramProtocol(asyncio.DatagramProtocol):
def __init__(self, proxy, addr, data):
self.proxy = proxy
self.addr = addr
self.data = data
super().__init__()
def connection_made(self, transport):
self.transport = transport
self.transport.sendto(self.data)
def datagram_received(self, data, _):
self.proxy.transport.sendto(data, self.addr)
def connection_lost(self, exc):
self.proxy.remotes.pop(self.attr)
#####
class KademliaProtocol(RPCProtocol):
def __init__(self, source_node, storage, ksize):
RPCProtocol.__init__(self)
self.router = RoutingTable(self, ksize, source_node)
self.storage = storage
self.source_node = source_node
def get_refresh_ids(self):
"""
Get ids to search for to keep old buckets up to date.
"""
ids = []
for bucket in self.router.lonely_buckets():
rid = random.randint(*bucket.range).to_bytes(20, byteorder='big')
ids.append(rid)
return ids
def rpc_stun(self, sender): # pylint: disable=no-self-use
return sender
def rpc_ping(self, sender, nodeid):
source = Node(nodeid, sender[0], sender[1])
self.welcome_if_new(source)
return self.source_node.id
def rpc_store(self, sender, nodeid, key, value):
source = Node(nodeid, sender[0], sender[1])
self.welcome_if_new(source)
log.debug("got a store request from %s, storing '%s'='%s'",
sender, key.hex(), value)
self.storage[key] = value
return True
def rpc_find_node(self, sender, nodeid, key):
log.info("finding neighbors of %i in local table",
int(nodeid.hex(), 16))
source = Node(nodeid, sender[0], sender[1])
self.welcome_if_new(source)
node = Node(key)
neighbors = self.router.find_neighbors(node, exclude=source)
return list(map(tuple, neighbors))
def rpc_find_value(self, sender, nodeid, key):
source = Node(nodeid, sender[0], sender[1])
self.welcome_if_new(source)
value = self.storage.get(key, None)
if value is None:
return self.rpc_find_node(sender, nodeid, key)
return {'value': value}
async def call_find_node(self, node_to_ask, node_to_find):
address = (node_to_ask.ip, node_to_ask.port)
result = await self.find_node(address, self.source_node.id,
node_to_find.id)
return self.handle_call_response(result, node_to_ask)
async def call_find_value(self, node_to_ask, node_to_find):
address = (node_to_ask.ip, node_to_ask.port)
result = await self.find_value(address, self.source_node.id,
node_to_find.id)
return self.handle_call_response(result, node_to_ask)
async def call_ping(self, node_to_ask):
address = (node_to_ask.ip, node_to_ask.port)
result = await self.ping(address, self.source_node.id)
return self.handle_call_response(result, node_to_ask)
async def call_store(self, node_to_ask, key, value):
address = (node_to_ask.ip, node_to_ask.port)
result = await self.store(address, self.source_node.id, key, value)
return self.handle_call_response(result, node_to_ask)
def welcome_if_new(self, node):
"""
Given a new node, send it all the keys/values it should be storing,
then add it to the routing table.
@param node: A new node that just joined (or that we just found out
about).
Process:
For each key in storage, get k closest nodes. If newnode is closer
than the furtherst in that list, and the node for this server
is closer than the closest in that list, then store the key/value
on the new node (per section 2.5 of the paper)
"""
if not self.router.is_new_node(node):
return
log.info("never seen %s before, adding to router", node)
for key, value in self.storage:
keynode = Node(digest(key))
neighbors = self.router.find_neighbors(keynode)
if neighbors:
last = neighbors[-1].distance_to(keynode)
new_node_close = node.distance_to(keynode) < last
first = neighbors[0].distance_to(keynode)
this_closest = self.source_node.distance_to(keynode) < first
if not neighbors or (new_node_close and this_closest):
asyncio.ensure_future(self.call_store(node, key, value))
self.router.add_contact(node)
def handle_call_response(self, result, node):
"""
If we get a response, add the node to the routing table. If
we get no response, make sure it's removed from the routing table.
"""
if not result[0]:
log.warning("no response from %s, removing from router", node)
self.router.remove_contact(node)
return result
log.info("got successful response from %s", node)
self.welcome_if_new(node)
return result

@ -0,0 +1,199 @@
import heapq
import time
import operator
import asyncio
from itertools import chain
from collections import OrderedDict
from kademlia.utils import shared_prefix, bytes_to_bit_string
# EXCLUDE_PORTS = {5637}
EXCLUDE_PORTS = {}
class KBucket:
def __init__(self, rangeLower, rangeUpper, ksize, replacementNodeFactor=5):
self.range = (rangeLower, rangeUpper)
self.nodes = OrderedDict()
self.replacement_nodes = OrderedDict()
self.touch_last_updated()
self.ksize = ksize
self.max_replacement_nodes = self.ksize * replacementNodeFactor
def touch_last_updated(self):
self.last_updated = time.monotonic()
def get_nodes(self):
return list(self.nodes.values())
def split(self):
midpoint = (self.range[0] + self.range[1]) // 2
one = KBucket(self.range[0], midpoint, self.ksize)
two = KBucket(midpoint + 1, self.range[1], self.ksize)
nodes = chain(self.nodes.values(), self.replacement_nodes.values())
for node in nodes:
bucket = one if node.long_id <= midpoint else two
bucket.add_node(node)
return (one, two)
def remove_node(self, node):
if node.id in self.replacement_nodes:
del self.replacement_nodes[node.id]
if node.id in self.nodes:
del self.nodes[node.id]
if self.replacement_nodes:
newnode_id, newnode = self.replacement_nodes.popitem()
self.nodes[newnode_id] = newnode
def has_in_range(self, node):
return self.range[0] <= node.long_id <= self.range[1]
def is_new_node(self, node):
return node.id not in self.nodes
def add_node(self, node):
"""
Add a C{Node} to the C{KBucket}. Return True if successful,
False if the bucket is full.
If the bucket is full, keep track of node in a replacement list,
per section 4.1 of the paper.
"""
if node.id in self.nodes:
del self.nodes[node.id]
self.nodes[node.id] = node
elif len(self) < self.ksize:
self.nodes[node.id] = node
else:
if node.id in self.replacement_nodes:
del self.replacement_nodes[node.id]
self.replacement_nodes[node.id] = node
while len(self.replacement_nodes) > self.max_replacement_nodes:
self.replacement_nodes.popitem(last=False)
return False
return True
def depth(self):
vals = self.nodes.values()
sprefix = shared_prefix([bytes_to_bit_string(n.id) for n in vals])
return len(sprefix)
def head(self):
return list(self.nodes.values())[0]
def __getitem__(self, node_id):
return self.nodes.get(node_id, None)
def __len__(self):
return len(self.nodes)
class TableTraverser:
def __init__(self, table, startNode):
index = table.get_bucket_for(startNode)
table.buckets[index].touch_last_updated()
self.current_nodes = table.buckets[index].get_nodes()
self.left_buckets = table.buckets[:index]
self.right_buckets = table.buckets[(index + 1):]
self.left = True
def __iter__(self):
return self
def __next__(self):
"""
Pop an item from the left subtree, then right, then left, etc.
"""
if self.current_nodes:
return self.current_nodes.pop()
if self.left and self.left_buckets:
self.current_nodes = self.left_buckets.pop().get_nodes()
self.left = False
return next(self)
if self.right_buckets:
self.current_nodes = self.right_buckets.pop(0).get_nodes()
self.left = True
return next(self)
raise StopIteration
class RoutingTable:
def __init__(self, protocol, ksize, node):
"""
@param node: The node that represents this server. It won't
be added to the routing table, but will be needed later to
determine which buckets to split or not.
"""
self.node = node
self.protocol = protocol
self.ksize = ksize
self.flush()
def flush(self):
self.buckets = [KBucket(0, 2 ** 160, self.ksize)]
def split_bucket(self, index):
one, two = self.buckets[index].split()
self.buckets[index] = one
self.buckets.insert(index + 1, two)
def lonely_buckets(self):
"""
Get all of the buckets that haven't been updated in over
an hour.
"""
hrago = time.monotonic() - 3600
return [b for b in self.buckets if b.last_updated < hrago]
def remove_contact(self, node):
index = self.get_bucket_for(node)
self.buckets[index].remove_node(node)
def is_new_node(self, node):
index = self.get_bucket_for(node)
return self.buckets[index].is_new_node(node)
def add_contact(self, node):
index = self.get_bucket_for(node)
bucket = self.buckets[index]
# this will succeed unless the bucket is full
if bucket.add_node(node):
return
# Per section 4.2 of paper, split if the bucket has the node
# in its range or if the depth is not congruent to 0 mod 5
if bucket.has_in_range(self.node) or bucket.depth() % 5 != 0:
self.split_bucket(index)
self.add_contact(node)
else:
asyncio.ensure_future(self.protocol.call_ping(bucket.head()))
def get_bucket_for(self, node):
"""
Get the index of the bucket that the given node would fall into.
"""
for index, bucket in enumerate(self.buckets):
if node.long_id < bucket.range[1]:
return index
# we should never be here, but make linter happy
return None
def find_neighbors(self, node, k=None, exclude=None, exclude_ports=EXCLUDE_PORTS):
k = k or self.ksize
nodes = []
for neighbor in TableTraverser(self, node):
notexcluded = exclude is None or not neighbor.same_home_as(exclude)
notexcluded_port = exclude_ports is None or neighbor.port not in exclude_ports
print('EXCLUDING_PORTS',notexcluded_port,exclude_ports)
if neighbor.id != node.id and notexcluded:
heapq.heappush(nodes, (node.distance_to(neighbor), neighbor))
if len(nodes) == k:
break
return list(map(operator.itemgetter(1), heapq.nsmallest(k, nodes)))

@ -0,0 +1,94 @@
import time
from itertools import takewhile
import operator
from collections import OrderedDict
from abc import abstractmethod, ABC
class IStorage(ABC):
"""
Local storage for this node.
IStorage implementations of get must return the same type as put in by set
"""
@abstractmethod
def __setitem__(self, key, value):
"""
Set a key to the given value.
"""
@abstractmethod
def __getitem__(self, key):
"""
Get the given key. If item doesn't exist, raises C{KeyError}
"""
@abstractmethod
def get(self, key, default=None):
"""
Get given key. If not found, return default.
"""
@abstractmethod
def iter_older_than(self, seconds_old):
"""
Return the an iterator over (key, value) tuples for items older
than the given secondsOld.
"""
@abstractmethod
def __iter__(self):
"""
Get the iterator for this storage, should yield tuple of (key, value)
"""
class ForgetfulStorage(IStorage):
def __init__(self, ttl=604800):
"""
By default, max age is a week.
"""
self.data = OrderedDict()
self.ttl = ttl
def __setitem__(self, key, value):
if key in self.data:
del self.data[key]
self.data[key] = (time.monotonic(), value)
self.cull()
def cull(self):
for _, _ in self.iter_older_than(self.ttl):
self.data.popitem(last=False)
def get(self, key, default=None):
self.cull()
if key in self.data:
return self[key]
return default
def __getitem__(self, key):
self.cull()
return self.data[key][1]
def __repr__(self):
self.cull()
return repr(self.data)
def iter_older_than(self, seconds_old):
min_birthday = time.monotonic() - seconds_old
zipped = self._triple_iter()
matches = takewhile(lambda r: min_birthday >= r[1], zipped)
return list(map(operator.itemgetter(0, 2), matches))
def _triple_iter(self):
ikeys = self.data.keys()
ibirthday = map(operator.itemgetter(0), self.data.values())
ivalues = map(operator.itemgetter(1), self.data.values())
return zip(ikeys, ibirthday, ivalues)
def __iter__(self):
self.cull()
ikeys = self.data.keys()
ivalues = map(operator.itemgetter(1), self.data.values())
return zip(ikeys, ivalues)

@ -0,0 +1,3 @@
"""
Tests live here.
"""

@ -0,0 +1,57 @@
import random
import hashlib
from struct import pack
import pytest
from kademlia.network import Server
from kademlia.node import Node
from kademlia.routing import RoutingTable
@pytest.yield_fixture
def bootstrap_node(event_loop):
server = Server()
event_loop.run_until_complete(server.listen(8468))
try:
yield ('127.0.0.1', 8468)
finally:
server.stop()
# pylint: disable=redefined-outer-name
@pytest.fixture()
def mknode():
def _mknode(node_id=None, ip_addy=None, port=None, intid=None):
"""
Make a node. Created a random id if not specified.
"""
if intid is not None:
node_id = pack('>l', intid)
if not node_id:
randbits = str(random.getrandbits(255))
node_id = hashlib.sha1(randbits.encode()).digest()
return Node(node_id, ip_addy, port)
return _mknode
# pylint: disable=too-few-public-methods
class FakeProtocol: # pylint: disable=too-few-public-methods
def __init__(self, source_id, ksize=20):
self.router = RoutingTable(self, ksize, Node(source_id))
self.storage = {}
self.source_id = source_id
# pylint: disable=too-few-public-methods
class FakeServer:
def __init__(self, node_id):
self.id = node_id # pylint: disable=invalid-name
self.protocol = FakeProtocol(self.id)
self.router = self.protocol.router
@pytest.fixture
def fake_server(mknode):
return FakeServer(mknode().id)

@ -0,0 +1,26 @@
from glob import glob
import pycodestyle
from pylint import epylint as lint
class LintError(Exception):
pass
class TestCodeLinting:
# pylint: disable=no-self-use
def test_pylint(self):
(stdout, _) = lint.py_run('kademlia', return_std=True)
errors = stdout.read()
if errors.strip():
raise LintError(errors)
# pylint: disable=no-self-use
def test_pep8(self):
style = pycodestyle.StyleGuide()
files = glob('kademlia/**/*.py', recursive=True)
result = style.check_files(files)
if result.total_errors > 0:
raise LintError("Code style errors found.")

@ -0,0 +1,54 @@
import random
import hashlib
from kademlia.node import Node, NodeHeap
class TestNode:
def test_long_id(self): # pylint: disable=no-self-use
rid = hashlib.sha1(str(random.getrandbits(255)).encode()).digest()
node = Node(rid)
assert node.long_id == int(rid.hex(), 16)
def test_distance_calculation(self): # pylint: disable=no-self-use
ridone = hashlib.sha1(str(random.getrandbits(255)).encode())
ridtwo = hashlib.sha1(str(random.getrandbits(255)).encode())
shouldbe = int(ridone.hexdigest(), 16) ^ int(ridtwo.hexdigest(), 16)
none = Node(ridone.digest())
ntwo = Node(ridtwo.digest())
assert none.distance_to(ntwo) == shouldbe
class TestNodeHeap:
def test_max_size(self, mknode): # pylint: disable=no-self-use
node = NodeHeap(mknode(intid=0), 3)
assert not node
for digit in range(10):
node.push(mknode(intid=digit))
assert len(node) == 3
assert len(list(node)) == 3
def test_iteration(self, mknode): # pylint: disable=no-self-use
heap = NodeHeap(mknode(intid=0), 5)
nodes = [mknode(intid=x) for x in range(10)]
for index, node in enumerate(nodes):
heap.push(node)
for index, node in enumerate(heap):
assert index == node.long_id
assert index < 5
def test_remove(self, mknode): # pylint: disable=no-self-use
heap = NodeHeap(mknode(intid=0), 5)
nodes = [mknode(intid=x) for x in range(10)]
for node in nodes:
heap.push(node)
heap.remove([nodes[0].id, nodes[1].id])
assert len(list(heap)) == 5
for index, node in enumerate(heap):
assert index + 2 == node.long_id
assert index < 5

@ -0,0 +1,121 @@
from random import shuffle
from kademlia.routing import KBucket, TableTraverser
class TestKBucket:
def test_split(self, mknode): # pylint: disable=no-self-use
bucket = KBucket(0, 10, 5)
bucket.add_node(mknode(intid=5))
bucket.add_node(mknode(intid=6))
one, two = bucket.split()
assert len(one) == 1
assert one.range == (0, 5)
assert len(two) == 1
assert two.range == (6, 10)
def test_split_no_overlap(self): # pylint: disable=no-self-use
left, right = KBucket(0, 2 ** 160, 20).split()
assert (right.range[0] - left.range[1]) == 1
def test_add_node(self, mknode): # pylint: disable=no-self-use
# when full, return false
bucket = KBucket(0, 10, 2)
assert bucket.add_node(mknode()) is True
assert bucket.add_node(mknode()) is True
assert bucket.add_node(mknode()) is False
assert len(bucket) == 2
# make sure when a node is double added it's put at the end
bucket = KBucket(0, 10, 3)
nodes = [mknode(), mknode(), mknode()]
for node in nodes:
bucket.add_node(node)
for index, node in enumerate(bucket.get_nodes()):
assert node == nodes[index]
def test_remove_node(self, mknode): # pylint: disable=no-self-use
k = 3
bucket = KBucket(0, 10, k)
nodes = [mknode() for _ in range(10)]
for node in nodes:
bucket.add_node(node)
replacement_nodes = bucket.replacement_nodes
assert list(bucket.nodes.values()) == nodes[:k]
assert list(replacement_nodes.values()) == nodes[k:]
bucket.remove_node(nodes.pop())
assert list(bucket.nodes.values()) == nodes[:k]
assert list(replacement_nodes.values()) == nodes[k:]
bucket.remove_node(nodes.pop(0))
assert list(bucket.nodes.values()) == nodes[:k-1] + nodes[-1:]
assert list(replacement_nodes.values()) == nodes[k-1:-1]
shuffle(nodes)
for node in nodes:
bucket.remove_node(node)
assert not bucket
assert not replacement_nodes
def test_in_range(self, mknode): # pylint: disable=no-self-use
bucket = KBucket(0, 10, 10)
assert bucket.has_in_range(mknode(intid=5)) is True
assert bucket.has_in_range(mknode(intid=11)) is False
assert bucket.has_in_range(mknode(intid=10)) is True
assert bucket.has_in_range(mknode(intid=0)) is True
def test_replacement_factor(self, mknode): # pylint: disable=no-self-use
k = 3
factor = 2
bucket = KBucket(0, 10, k, replacementNodeFactor=factor)
nodes = [mknode() for _ in range(10)]
for node in nodes:
bucket.add_node(node)
replacement_nodes = bucket.replacement_nodes
assert len(list(replacement_nodes.values())) == k * factor
assert list(replacement_nodes.values()) == nodes[k + 1:]
assert nodes[k] not in list(replacement_nodes.values())
# pylint: disable=too-few-public-methods
class TestRoutingTable:
# pylint: disable=no-self-use
def test_add_contact(self, fake_server, mknode):
fake_server.router.add_contact(mknode())
assert len(fake_server.router.buckets) == 1
assert len(fake_server.router.buckets[0].nodes) == 1
# pylint: disable=too-few-public-methods
class TestTableTraverser:
# pylint: disable=no-self-use
def test_iteration(self, fake_server, mknode):
"""
Make 10 nodes, 5 buckets, two nodes add to one bucket in order,
All buckets: [node0, node1], [node2, node3], [node4, node5],
[node6, node7], [node8, node9]
Test traver result starting from node4.
"""
nodes = [mknode(intid=x) for x in range(10)]
buckets = []
for i in range(5):
bucket = KBucket(2 * i, 2 * i + 1, 2)
bucket.add_node(nodes[2 * i])
bucket.add_node(nodes[2 * i + 1])
buckets.append(bucket)
# replace router's bucket with our test buckets
fake_server.router.buckets = buckets
# expected nodes order
expected_nodes = [nodes[5], nodes[4], nodes[3], nodes[2], nodes[7],
nodes[6], nodes[1], nodes[0], nodes[9], nodes[8]]
start_node = nodes[4]
table_traverser = TableTraverser(fake_server.router, start_node)
for index, node in enumerate(table_traverser):
assert node == expected_nodes[index]

@ -0,0 +1,62 @@
import asyncio
import pytest
from kademlia.network import Server
from kademlia.protocol import KademliaProtocol
@pytest.mark.asyncio
async def test_storing(bootstrap_node):
server = Server()
await server.listen(bootstrap_node[1] + 1)
await server.bootstrap([bootstrap_node])
await server.set('key', 'value')
result = await server.get('key')
assert result == 'value'
server.stop()
class TestSwappableProtocol:
def test_default_protocol(self): # pylint: disable=no-self-use
"""
An ordinary Server object will initially not have a protocol, but will
have a KademliaProtocol object as its protocol after its listen()
method is called.
"""
loop = asyncio.get_event_loop()
server = Server()
assert server.protocol is None
loop.run_until_complete(server.listen(8469))
assert isinstance(server.protocol, KademliaProtocol)
server.stop()
def test_custom_protocol(self): # pylint: disable=no-self-use
"""
A subclass of Server which overrides the protocol_class attribute will
have an instance of that class as its protocol after its listen()
method is called.
"""
# Make a custom Protocol and Server to go with hit.
class CoconutProtocol(KademliaProtocol):
pass
class HuskServer(Server):
protocol_class = CoconutProtocol
# An ordinary server does NOT have a CoconutProtocol as its protocol...
loop = asyncio.get_event_loop()
server = Server()
loop.run_until_complete(server.listen(8469))
assert not isinstance(server.protocol, CoconutProtocol)
server.stop()
# ...but our custom server does.
husk_server = HuskServer()
loop.run_until_complete(husk_server.listen(8469))
assert isinstance(husk_server.protocol, CoconutProtocol)
husk_server.stop()

@ -0,0 +1,27 @@
from kademlia.storage import ForgetfulStorage
class ForgetfulStorageTest:
def test_storing(self): # pylint: disable=no-self-use
storage = ForgetfulStorage(10)
storage['one'] = 'two'
assert storage['one'] == 'two'
def test_forgetting(self): # pylint: disable=no-self-use
storage = ForgetfulStorage(0)
storage['one'] = 'two'
assert storage.get('one') is None
def test_iter(self): # pylint: disable=no-self-use
storage = ForgetfulStorage(10)
storage['one'] = 'two'
for key, value in storage:
assert key == 'one'
assert value == 'two'
def test_iter_old(self): # pylint: disable=no-self-use
storage = ForgetfulStorage(10)
storage['one'] = 'two'
for key, value in storage.iter_older_than(0):
assert key == 'one'
assert value == 'two'

@ -0,0 +1,25 @@
import hashlib
from kademlia.utils import digest, shared_prefix
class TestUtils:
def test_digest(self): # pylint: disable=no-self-use
dig = hashlib.sha1(b'1').digest()
assert dig == digest(1)
dig = hashlib.sha1(b'another').digest()
assert dig == digest('another')
def test_shared_prefix(self): # pylint: disable=no-self-use
args = ['prefix', 'prefixasdf', 'prefix', 'prefixxxx']
assert shared_prefix(args) == 'prefix'
args = ['p', 'prefixasdf', 'prefix', 'prefixxxx']
assert shared_prefix(args) == 'p'
args = ['one', 'two']
assert shared_prefix(args) == ''
args = ['hi']
assert shared_prefix(args) == 'hi'

@ -0,0 +1,41 @@
"""
General catchall for functions that don't make sense as methods.
"""
import hashlib
import operator
import asyncio
async def gather_dict(dic):
cors = list(dic.values())
results = await asyncio.gather(*cors)
return dict(zip(dic.keys(), results))
def digest(string):
if not isinstance(string, bytes):
string = str(string).encode('utf8')
return hashlib.sha1(string).digest()
def shared_prefix(args):
"""
Find the shared prefix between the strings.
For instance:
sharedPrefix(['blahblah', 'blahwhat'])
returns 'blah'.
"""
i = 0
while i < min(map(len, args)):
if len(set(map(operator.itemgetter(i), args))) != 1:
break
i += 1
return args[0][:i]
def bytes_to_bit_string(bites):
bits = [bin(bite)[2:].rjust(8, '0') for bite in bites]
return "".join(bits)
Loading…
Cancel
Save