mirror of
https://github.com/varun-r-mallya/py-libp2p.git
synced 2026-02-12 16:10:57 +00:00
Merge branch 'master' into feature/porting-to-trio
This commit is contained in:
@ -1,173 +0,0 @@
|
||||
from collections import Counter
|
||||
import logging
|
||||
|
||||
from .kad_peerinfo import KadPeerHeap, create_kad_peerinfo
|
||||
from .utils import gather_dict
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpiderCrawl:
|
||||
"""Crawl the network and look for given 160-bit keys."""
|
||||
|
||||
def __init__(self, protocol, node, peers, ksize, alpha):
|
||||
"""
|
||||
Create a new C{SpiderCrawl}er.
|
||||
|
||||
Args:
|
||||
protocol: A :class:`~kademlia.protocol.KademliaProtocol` instance.
|
||||
node: A :class:`~kademlia.node.Node` representing the key we're
|
||||
looking for
|
||||
peers: A list of :class:`~kademlia.node.Node` instances that
|
||||
provide the entry point for the network
|
||||
ksize: The value for k based on the paper
|
||||
alpha: The value for alpha based on the paper
|
||||
"""
|
||||
self.protocol = protocol
|
||||
self.ksize = ksize
|
||||
self.alpha = alpha
|
||||
self.node = node
|
||||
self.nearest = KadPeerHeap(self.node, self.ksize)
|
||||
self.last_ids_crawled = []
|
||||
log.info("creating spider with peers: %s", peers)
|
||||
self.nearest.push(peers)
|
||||
|
||||
async def _find(self, rpcmethod):
|
||||
"""
|
||||
Get either a value or list of nodes.
|
||||
|
||||
Args:
|
||||
rpcmethod: The protocol's callfindValue or call_find_node.
|
||||
|
||||
The process:
|
||||
1. calls find_* to current ALPHA nearest not already queried nodes,
|
||||
adding results to current nearest list of k nodes.
|
||||
2. current nearest list needs to keep track of who has been queried
|
||||
already sort by nearest, keep KSIZE
|
||||
3. if list is same as last time, next call should be to everyone not
|
||||
yet queried
|
||||
4. repeat, unless nearest list has all been queried, then ur done
|
||||
"""
|
||||
log.info("crawling network with nearest: %s", str(tuple(self.nearest)))
|
||||
count = self.alpha
|
||||
if self.nearest.get_ids() == self.last_ids_crawled:
|
||||
count = len(self.nearest)
|
||||
self.last_ids_crawled = self.nearest.get_ids()
|
||||
|
||||
dicts = {}
|
||||
for peer in self.nearest.get_uncontacted()[:count]:
|
||||
dicts[peer.peer_id_bytes] = rpcmethod(peer, self.node)
|
||||
self.nearest.mark_contacted(peer)
|
||||
found = await gather_dict(dicts)
|
||||
return await self._nodes_found(found)
|
||||
|
||||
async def _nodes_found(self, responses):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ValueSpiderCrawl(SpiderCrawl):
|
||||
def __init__(self, protocol, node, peers, ksize, alpha):
|
||||
SpiderCrawl.__init__(self, protocol, node, peers, ksize, alpha)
|
||||
# keep track of the single nearest node without value - per
|
||||
# section 2.3 so we can set the key there if found
|
||||
self.nearest_without_value = KadPeerHeap(self.node, 1)
|
||||
|
||||
async def find(self):
|
||||
"""Find either the closest nodes or the value requested."""
|
||||
return await self._find(self.protocol.call_find_value)
|
||||
|
||||
async def _nodes_found(self, responses):
|
||||
"""Handle the result of an iteration in _find."""
|
||||
toremove = []
|
||||
found_values = []
|
||||
for peerid, response in responses.items():
|
||||
response = RPCFindResponse(response)
|
||||
if not response.happened():
|
||||
toremove.append(peerid)
|
||||
elif response.has_value():
|
||||
found_values.append(response.get_value())
|
||||
else:
|
||||
peer = self.nearest.get_node(peerid)
|
||||
self.nearest_without_value.push(peer)
|
||||
self.nearest.push(response.get_node_list())
|
||||
self.nearest.remove(toremove)
|
||||
|
||||
if found_values:
|
||||
return await self._handle_found_values(found_values)
|
||||
if self.nearest.have_contacted_all():
|
||||
# not found!
|
||||
return None
|
||||
return await self.find()
|
||||
|
||||
async def _handle_found_values(self, values):
|
||||
"""
|
||||
We got some values!
|
||||
|
||||
Exciting. But let's make sure they're all the same or freak out
|
||||
a little bit. Also, make sure we tell the nearest node that
|
||||
*didn't* have the value to store it.
|
||||
"""
|
||||
value_counts = Counter(values)
|
||||
if len(value_counts) != 1:
|
||||
log.warning(
|
||||
"Got multiple values for key %i: %s", self.node.xor_id, str(values)
|
||||
)
|
||||
value = value_counts.most_common(1)[0][0]
|
||||
|
||||
peer = self.nearest_without_value.popleft()
|
||||
if peer:
|
||||
await self.protocol.call_store(peer, self.node.peer_id_bytes, value)
|
||||
return value
|
||||
|
||||
|
||||
class NodeSpiderCrawl(SpiderCrawl):
|
||||
async def find(self):
|
||||
"""Find the closest nodes."""
|
||||
return await self._find(self.protocol.call_find_node)
|
||||
|
||||
async def _nodes_found(self, responses):
|
||||
"""Handle the result of an iteration in _find."""
|
||||
toremove = []
|
||||
for peerid, response in responses.items():
|
||||
response = RPCFindResponse(response)
|
||||
if not response.happened():
|
||||
toremove.append(peerid)
|
||||
else:
|
||||
self.nearest.push(response.get_node_list())
|
||||
self.nearest.remove(toremove)
|
||||
|
||||
if self.nearest.have_contacted_all():
|
||||
return list(self.nearest)
|
||||
return await self.find()
|
||||
|
||||
|
||||
class RPCFindResponse:
|
||||
def __init__(self, response):
|
||||
"""
|
||||
A wrapper for the result of a RPC find.
|
||||
|
||||
Args:
|
||||
response: This will be a tuple of (<response received>, <value>)
|
||||
where <value> will be a list of tuples if not found or
|
||||
a dictionary of {'value': v} where v is the value desired
|
||||
"""
|
||||
self.response = response
|
||||
|
||||
def happened(self):
|
||||
"""Did the other host actually respond?"""
|
||||
return self.response[0]
|
||||
|
||||
def has_value(self):
|
||||
return isinstance(self.response[1], dict)
|
||||
|
||||
def get_value(self):
|
||||
return self.response[1]["value"]
|
||||
|
||||
def get_node_list(self):
|
||||
"""
|
||||
Get the node list in the response.
|
||||
|
||||
If there's no value, this should be set.
|
||||
"""
|
||||
nodelist = self.response[1] or []
|
||||
return [create_kad_peerinfo(*nodeple) for nodeple in nodelist]
|
||||
@ -1,153 +0,0 @@
|
||||
import heapq
|
||||
from operator import itemgetter
|
||||
import random
|
||||
from typing import List
|
||||
|
||||
from multiaddr import Multiaddr
|
||||
|
||||
from libp2p.peer.id import ID
|
||||
from libp2p.peer.peerinfo import PeerInfo
|
||||
|
||||
from .utils import digest
|
||||
|
||||
P_IP = "ip4"
|
||||
P_UDP = "udp"
|
||||
|
||||
|
||||
class KadPeerInfo(PeerInfo):
|
||||
def __init__(self, peer_id, addrs):
|
||||
super(KadPeerInfo, self).__init__(peer_id, addrs)
|
||||
|
||||
self.peer_id_bytes = peer_id.to_bytes()
|
||||
self.xor_id = peer_id.xor_id
|
||||
|
||||
self.addrs = addrs
|
||||
|
||||
self.ip = self.addrs[0].value_for_protocol(P_IP) if addrs else None
|
||||
self.port = int(self.addrs[0].value_for_protocol(P_UDP)) if addrs else None
|
||||
|
||||
def same_home_as(self, node):
|
||||
return sorted(self.addrs) == sorted(node.addrs)
|
||||
|
||||
def distance_to(self, node):
|
||||
"""Get the distance between this node and another."""
|
||||
return self.xor_id ^ node.xor_id
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
Enables use of Node as a tuple - i.e., tuple(node) works.
|
||||
"""
|
||||
return iter([self.peer_id_bytes, self.ip, self.port])
|
||||
|
||||
def __repr__(self):
|
||||
return repr([self.xor_id, self.ip, self.port, self.peer_id_bytes])
|
||||
|
||||
def __str__(self):
|
||||
return "%s:%s" % (self.ip, str(self.port))
|
||||
|
||||
def encode(self):
|
||||
return (
|
||||
str(self.peer_id_bytes)
|
||||
+ "\n"
|
||||
+ str("/ip4/" + str(self.ip) + "/udp/" + str(self.port))
|
||||
)
|
||||
|
||||
|
||||
class KadPeerHeap:
|
||||
"""A heap of peers ordered by distance to a given node."""
|
||||
|
||||
def __init__(self, node, maxsize):
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
@param node: The node to measure all distnaces from.
|
||||
@param maxsize: The maximum size that this heap can grow to.
|
||||
"""
|
||||
self.node = node
|
||||
self.heap = []
|
||||
self.contacted = set()
|
||||
self.maxsize = maxsize
|
||||
|
||||
def remove(self, peers):
|
||||
"""
|
||||
Remove a list of peer ids from this heap.
|
||||
|
||||
Note that while this heap retains a constant visible size (based
|
||||
on the iterator), it's actual size may be quite a bit larger
|
||||
than what's exposed. Therefore, removal of nodes may not change
|
||||
the visible size as previously added nodes suddenly become
|
||||
visible.
|
||||
"""
|
||||
peers = set(peers)
|
||||
if not peers:
|
||||
return
|
||||
nheap = []
|
||||
for distance, node in self.heap:
|
||||
if node.peer_id_bytes not in peers:
|
||||
heapq.heappush(nheap, (distance, node))
|
||||
self.heap = nheap
|
||||
|
||||
def get_node(self, node_id):
|
||||
for _, node in self.heap:
|
||||
if node.peer_id_bytes == node_id:
|
||||
return node
|
||||
return None
|
||||
|
||||
def have_contacted_all(self):
|
||||
return len(self.get_uncontacted()) == 0
|
||||
|
||||
def get_ids(self):
|
||||
return [n.peer_id_bytes for n in self]
|
||||
|
||||
def mark_contacted(self, node):
|
||||
self.contacted.add(node.peer_id_bytes)
|
||||
|
||||
def popleft(self):
|
||||
return heapq.heappop(self.heap)[1] if self else None
|
||||
|
||||
def push(self, nodes):
|
||||
"""
|
||||
Push nodes onto heap.
|
||||
|
||||
@param nodes: This can be a single item or a C{list}.
|
||||
"""
|
||||
if not isinstance(nodes, list):
|
||||
nodes = [nodes]
|
||||
|
||||
for node in nodes:
|
||||
if node not in self:
|
||||
distance = self.node.distance_to(node)
|
||||
heapq.heappush(self.heap, (distance, node))
|
||||
|
||||
def __len__(self):
|
||||
return min(len(self.heap), self.maxsize)
|
||||
|
||||
def __iter__(self):
|
||||
nodes = heapq.nsmallest(self.maxsize, self.heap)
|
||||
return iter(map(itemgetter(1), nodes))
|
||||
|
||||
def __contains__(self, node):
|
||||
for _, other in self.heap:
|
||||
if node.peer_id_bytes == other.peer_id_bytes:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_uncontacted(self):
|
||||
return [n for n in self if n.peer_id_bytes not in self.contacted]
|
||||
|
||||
|
||||
def create_kad_peerinfo(node_id_bytes=None, sender_ip=None, sender_port=None):
|
||||
node_id = (
|
||||
ID(node_id_bytes) if node_id_bytes else ID(digest(random.getrandbits(255)))
|
||||
)
|
||||
addrs: List[Multiaddr]
|
||||
if sender_ip and sender_port:
|
||||
addrs = [
|
||||
Multiaddr(
|
||||
"/" + P_IP + "/" + str(sender_ip) + "/" + P_UDP + "/" + str(sender_port)
|
||||
)
|
||||
]
|
||||
else:
|
||||
addrs = []
|
||||
|
||||
return KadPeerInfo(node_id, addrs)
|
||||
@ -1,251 +0,0 @@
|
||||
"""Package for interacting on the network at a high level."""
|
||||
import asyncio
|
||||
import logging
|
||||
import pickle
|
||||
|
||||
from .crawling import NodeSpiderCrawl, ValueSpiderCrawl
|
||||
from .kad_peerinfo import create_kad_peerinfo
|
||||
from .protocol import KademliaProtocol
|
||||
from .storage import ForgetfulStorage
|
||||
from .utils import digest
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KademliaServer:
|
||||
"""
|
||||
High level view of a node instance.
|
||||
|
||||
This is the object that should be created to start listening as an
|
||||
active node on the network.
|
||||
"""
|
||||
|
||||
protocol_class = KademliaProtocol
|
||||
|
||||
def __init__(self, ksize=20, alpha=3, node_id=None, storage=None):
|
||||
"""
|
||||
Create a server instance. This will start listening on the given port.
|
||||
|
||||
Args:
|
||||
ksize (int): The k parameter from the paper
|
||||
alpha (int): The alpha parameter from the paper
|
||||
node_id: The id for this node on the network.
|
||||
storage: An instance that implements
|
||||
:interface:`~kademlia.storage.IStorage`
|
||||
"""
|
||||
self.ksize = ksize
|
||||
self.alpha = alpha
|
||||
self.storage = storage or ForgetfulStorage()
|
||||
self.node = create_kad_peerinfo(node_id)
|
||||
self.transport = None
|
||||
self.protocol = None
|
||||
self.refresh_loop = None
|
||||
self.save_state_loop = None
|
||||
|
||||
def stop(self):
|
||||
if self.transport is not None:
|
||||
self.transport.close()
|
||||
|
||||
if self.refresh_loop:
|
||||
self.refresh_loop.cancel()
|
||||
|
||||
if self.save_state_loop:
|
||||
self.save_state_loop.cancel()
|
||||
|
||||
def _create_protocol(self):
|
||||
return self.protocol_class(self.node, self.storage, self.ksize)
|
||||
|
||||
async def listen(self, port=0, interface="0.0.0.0"):
|
||||
"""
|
||||
Start listening on the given port.
|
||||
|
||||
Provide interface="::" to accept ipv6 address
|
||||
"""
|
||||
loop = asyncio.get_event_loop()
|
||||
listen = loop.create_datagram_endpoint(
|
||||
self._create_protocol, local_addr=(interface, port)
|
||||
)
|
||||
self.transport, self.protocol = await listen
|
||||
socket = self.transport.get_extra_info("socket")
|
||||
self.address = socket.getsockname()
|
||||
log.info(
|
||||
"Node %i listening on %s:%i",
|
||||
self.node.xor_id,
|
||||
self.address[0],
|
||||
self.address[1],
|
||||
)
|
||||
# finally, schedule refreshing table
|
||||
self.refresh_table()
|
||||
|
||||
def refresh_table(self):
|
||||
log.debug("Refreshing routing table")
|
||||
asyncio.ensure_future(self._refresh_table())
|
||||
loop = asyncio.get_event_loop()
|
||||
self.refresh_loop = loop.call_later(3600, self.refresh_table)
|
||||
|
||||
async def _refresh_table(self):
|
||||
"""Refresh buckets that haven't had any lookups in the last hour (per
|
||||
section 2.3 of the paper)."""
|
||||
results = []
|
||||
for node_id in self.protocol.get_refresh_ids():
|
||||
node = create_kad_peerinfo(node_id)
|
||||
nearest = self.protocol.router.find_neighbors(node, self.alpha)
|
||||
spider = NodeSpiderCrawl(
|
||||
self.protocol, node, nearest, self.ksize, self.alpha
|
||||
)
|
||||
results.append(spider.find())
|
||||
|
||||
# do our crawling
|
||||
await asyncio.gather(*results)
|
||||
|
||||
# now republish keys older than one hour
|
||||
for dkey, value in self.storage.iter_older_than(3600):
|
||||
await self.set_digest(dkey, value)
|
||||
|
||||
def bootstrappable_neighbors(self):
|
||||
"""
|
||||
Get a :class:`list` of (ip, port) :class:`tuple` pairs suitable for use
|
||||
as an argument to the bootstrap method.
|
||||
|
||||
The server should have been bootstrapped
|
||||
already - this is just a utility for getting some neighbors and then
|
||||
storing them if this server is going down for a while. When it comes
|
||||
back up, the list of nodes can be used to bootstrap.
|
||||
"""
|
||||
neighbors = self.protocol.router.find_neighbors(self.node)
|
||||
return [tuple(n)[-2:] for n in neighbors]
|
||||
|
||||
async def bootstrap(self, addrs):
|
||||
"""
|
||||
Bootstrap the server by connecting to other known nodes in the network.
|
||||
|
||||
Args:
|
||||
addrs: A `list` of (ip, port) `tuple` pairs. Note that only IP
|
||||
addresses are acceptable - hostnames will cause an error.
|
||||
"""
|
||||
log.debug("Attempting to bootstrap node with %i initial contacts", len(addrs))
|
||||
cos = list(map(self.bootstrap_node, addrs))
|
||||
gathered = await asyncio.gather(*cos)
|
||||
nodes = [node for node in gathered if node is not None]
|
||||
spider = NodeSpiderCrawl(
|
||||
self.protocol, self.node, nodes, self.ksize, self.alpha
|
||||
)
|
||||
return await spider.find()
|
||||
|
||||
async def bootstrap_node(self, addr):
|
||||
result = await self.protocol.ping(addr, self.node.peer_id_bytes)
|
||||
return create_kad_peerinfo(result[1], addr[0], addr[1]) if result[0] else None
|
||||
|
||||
async def get(self, key):
|
||||
"""
|
||||
Get a key if the network has it.
|
||||
|
||||
Returns:
|
||||
:class:`None` if not found, the value otherwise.
|
||||
"""
|
||||
log.info("Looking up key %s", key)
|
||||
dkey = digest(key)
|
||||
# if this node has it, return it
|
||||
if self.storage.get(dkey) is not None:
|
||||
return self.storage.get(dkey)
|
||||
|
||||
node = create_kad_peerinfo(dkey)
|
||||
nearest = self.protocol.router.find_neighbors(node)
|
||||
if not nearest:
|
||||
log.warning("There are no known neighbors to get key %s", key)
|
||||
return None
|
||||
spider = ValueSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha)
|
||||
return await spider.find()
|
||||
|
||||
async def set(self, key, value):
|
||||
"""Set the given string key to the given value in the network."""
|
||||
if not check_dht_value_type(value):
|
||||
raise TypeError("Value must be of type int, float, bool, str, or bytes")
|
||||
log.info("setting '%s' = '%s' on network", key, value)
|
||||
dkey = digest(key)
|
||||
return await self.set_digest(dkey, value)
|
||||
|
||||
async def provide(self, key):
|
||||
"""publish to the network that it provides for a particular key."""
|
||||
neighbors = self.protocol.router.find_neighbors(self.node)
|
||||
return [
|
||||
await self.protocol.call_add_provider(n, key, self.node.peer_id_bytes)
|
||||
for n in neighbors
|
||||
]
|
||||
|
||||
async def get_providers(self, key):
|
||||
"""get the list of providers for a key."""
|
||||
neighbors = self.protocol.router.find_neighbors(self.node)
|
||||
return [await self.protocol.call_get_providers(n, key) for n in neighbors]
|
||||
|
||||
async def set_digest(self, dkey, value):
|
||||
"""Set the given SHA1 digest key (bytes) to the given value in the
|
||||
network."""
|
||||
node = create_kad_peerinfo(dkey)
|
||||
|
||||
nearest = self.protocol.router.find_neighbors(node)
|
||||
if not nearest:
|
||||
log.warning("There are no known neighbors to set key %s", dkey.hex())
|
||||
return False
|
||||
|
||||
spider = NodeSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha)
|
||||
nodes = await spider.find()
|
||||
log.info("setting '%s' on %s", dkey.hex(), list(map(str, nodes)))
|
||||
|
||||
# if this node is close too, then store here as well
|
||||
biggest = max([n.distance_to(node) for n in nodes])
|
||||
if self.node.distance_to(node) < biggest:
|
||||
self.storage[dkey] = value
|
||||
results = [self.protocol.call_store(n, dkey, value) for n in nodes]
|
||||
# return true only if at least one store call succeeded
|
||||
return any(await asyncio.gather(*results))
|
||||
|
||||
def save_state(self, fname):
|
||||
"""Save the state of this node (the alpha/ksize/id/immediate neighbors)
|
||||
to a cache file with the given fname."""
|
||||
log.info("Saving state to %s", fname)
|
||||
data = {
|
||||
"ksize": self.ksize,
|
||||
"alpha": self.alpha,
|
||||
"id": self.node.peer_id_bytes,
|
||||
"neighbors": self.bootstrappable_neighbors(),
|
||||
}
|
||||
if not data["neighbors"]:
|
||||
log.warning("No known neighbors, so not writing to cache.")
|
||||
return
|
||||
with open(fname, "wb") as file:
|
||||
pickle.dump(data, file)
|
||||
|
||||
@classmethod
|
||||
def load_state(cls, fname):
|
||||
"""Load the state of this node (the alpha/ksize/id/immediate neighbors)
|
||||
from a cache file with the given fname."""
|
||||
log.info("Loading state from %s", fname)
|
||||
with open(fname, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
svr = KademliaServer(data["ksize"], data["alpha"], data["id"])
|
||||
if data["neighbors"]:
|
||||
svr.bootstrap(data["neighbors"])
|
||||
return svr
|
||||
|
||||
def save_state_regularly(self, fname, frequency=600):
|
||||
"""
|
||||
Save the state of node with a given regularity to the given filename.
|
||||
|
||||
Args:
|
||||
fname: File name to save retularly to
|
||||
frequency: Frequency in seconds that the state should be saved.
|
||||
By default, 10 minutes.
|
||||
"""
|
||||
self.save_state(fname)
|
||||
loop = asyncio.get_event_loop()
|
||||
self.save_state_loop = loop.call_later(
|
||||
frequency, self.save_state_regularly, fname, frequency
|
||||
)
|
||||
|
||||
|
||||
def check_dht_value_type(value):
|
||||
"""Checks to see if the type of the value is a valid type for placing in
|
||||
the dht."""
|
||||
typeset = [int, float, bool, str, bytes]
|
||||
return type(value) in typeset
|
||||
@ -1,188 +0,0 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
|
||||
from rpcudp.protocol import RPCProtocol
|
||||
|
||||
from .kad_peerinfo import create_kad_peerinfo
|
||||
from .routing import RoutingTable
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KademliaProtocol(RPCProtocol):
|
||||
"""
|
||||
There are four main RPCs in the Kademlia protocol PING, STORE, FIND_NODE,
|
||||
FIND_VALUE.
|
||||
|
||||
- PING probes if a node is still online
|
||||
- STORE instructs a node to store (key, value)
|
||||
- FIND_NODE takes a 160-bit ID and gets back
|
||||
(ip, udp_port, node_id) for k closest nodes to target
|
||||
- FIND_VALUE behaves like FIND_NODE unless a value is stored.
|
||||
"""
|
||||
|
||||
def __init__(self, source_node, storage, ksize):
|
||||
RPCProtocol.__init__(self)
|
||||
self.router = RoutingTable(self, ksize, source_node)
|
||||
self.storage = storage
|
||||
self.source_node = source_node
|
||||
|
||||
def get_refresh_ids(self):
|
||||
"""Get ids to search for to keep old buckets up to date."""
|
||||
ids = []
|
||||
for bucket in self.router.lonely_buckets():
|
||||
rid = random.randint(*bucket.range).to_bytes(20, byteorder="big")
|
||||
ids.append(rid)
|
||||
return ids
|
||||
|
||||
def rpc_stun(self, sender):
|
||||
return sender
|
||||
|
||||
def rpc_ping(self, sender, nodeid):
|
||||
source = create_kad_peerinfo(nodeid, sender[0], sender[1])
|
||||
|
||||
self.welcome_if_new(source)
|
||||
return self.source_node.peer_id_bytes
|
||||
|
||||
def rpc_store(self, sender, nodeid, key, value):
|
||||
source = create_kad_peerinfo(nodeid, sender[0], sender[1])
|
||||
|
||||
self.welcome_if_new(source)
|
||||
log.debug(
|
||||
"got a store request from %s, storing '%s'='%s'", sender, key.hex(), value
|
||||
)
|
||||
self.storage[key] = value
|
||||
return True
|
||||
|
||||
def rpc_find_node(self, sender, nodeid, key):
|
||||
log.info("finding neighbors of %i in local table", int(nodeid.hex(), 16))
|
||||
source = create_kad_peerinfo(nodeid, sender[0], sender[1])
|
||||
|
||||
self.welcome_if_new(source)
|
||||
node = create_kad_peerinfo(key)
|
||||
neighbors = self.router.find_neighbors(node, exclude=source)
|
||||
return list(map(tuple, neighbors))
|
||||
|
||||
def rpc_find_value(self, sender, nodeid, key):
|
||||
source = create_kad_peerinfo(nodeid, sender[0], sender[1])
|
||||
|
||||
self.welcome_if_new(source)
|
||||
value = self.storage.get(key, None)
|
||||
if value is None:
|
||||
return self.rpc_find_node(sender, nodeid, key)
|
||||
return {"value": value}
|
||||
|
||||
def rpc_add_provider(self, sender, nodeid, key, provider_id):
|
||||
"""rpc when receiving an add_provider call should validate received
|
||||
PeerInfo matches sender nodeid if it does, receipient must store a
|
||||
record in its datastore we store a map of content_id to peer_id (non
|
||||
xor)"""
|
||||
if nodeid == provider_id:
|
||||
log.info(
|
||||
"adding provider %s for key %s in local table", provider_id, str(key)
|
||||
)
|
||||
self.storage[key] = provider_id
|
||||
return True
|
||||
return False
|
||||
|
||||
def rpc_get_providers(self, sender, key):
|
||||
"""rpc when receiving a get_providers call should look up key in data
|
||||
store and respond with records plus a list of closer peers in its
|
||||
routing table."""
|
||||
providers = []
|
||||
record = self.storage.get(key, None)
|
||||
|
||||
if record:
|
||||
providers.append(record)
|
||||
|
||||
keynode = create_kad_peerinfo(key)
|
||||
neighbors = self.router.find_neighbors(keynode)
|
||||
for neighbor in neighbors:
|
||||
if neighbor.peer_id_bytes != record:
|
||||
providers.append(neighbor.peer_id_bytes)
|
||||
|
||||
return providers
|
||||
|
||||
async def call_find_node(self, node_to_ask, node_to_find):
|
||||
address = (node_to_ask.ip, node_to_ask.port)
|
||||
result = await self.find_node(
|
||||
address, self.source_node.peer_id_bytes, node_to_find.peer_id_bytes
|
||||
)
|
||||
return self.handle_call_response(result, node_to_ask)
|
||||
|
||||
async def call_find_value(self, node_to_ask, node_to_find):
|
||||
address = (node_to_ask.ip, node_to_ask.port)
|
||||
result = await self.find_value(
|
||||
address, self.source_node.peer_id_bytes, node_to_find.peer_id_bytes
|
||||
)
|
||||
return self.handle_call_response(result, node_to_ask)
|
||||
|
||||
async def call_ping(self, node_to_ask):
|
||||
address = (node_to_ask.ip, node_to_ask.port)
|
||||
result = await self.ping(address, self.source_node.peer_id_bytes)
|
||||
return self.handle_call_response(result, node_to_ask)
|
||||
|
||||
async def call_store(self, node_to_ask, key, value):
|
||||
address = (node_to_ask.ip, node_to_ask.port)
|
||||
result = await self.store(address, self.source_node.peer_id_bytes, key, value)
|
||||
return self.handle_call_response(result, node_to_ask)
|
||||
|
||||
async def call_add_provider(self, node_to_ask, key, provider_id):
|
||||
address = (node_to_ask.ip, node_to_ask.port)
|
||||
result = await self.add_provider(
|
||||
address, self.source_node.peer_id_bytes, key, provider_id
|
||||
)
|
||||
|
||||
return self.handle_call_response(result, node_to_ask)
|
||||
|
||||
async def call_get_providers(self, node_to_ask, key):
|
||||
address = (node_to_ask.ip, node_to_ask.port)
|
||||
result = await self.get_providers(address, key)
|
||||
return self.handle_call_response(result, node_to_ask)
|
||||
|
||||
def welcome_if_new(self, node):
|
||||
"""
|
||||
Given a new node, send it all the keys/values it should be storing,
|
||||
then add it to the routing table.
|
||||
|
||||
@param node: A new node that just joined (or that we just found out
|
||||
about).
|
||||
|
||||
Process:
|
||||
For each key in storage, get k closest nodes. If newnode is closer
|
||||
than the furtherst in that list, and the node for this server
|
||||
is closer than the closest in that list, then store the key/value
|
||||
on the new node (per section 2.5 of the paper)
|
||||
"""
|
||||
if not self.router.is_new_node(node):
|
||||
return
|
||||
|
||||
log.info("never seen %s before, adding to router", node)
|
||||
for key, value in self.storage:
|
||||
keynode = create_kad_peerinfo(key)
|
||||
neighbors = self.router.find_neighbors(keynode)
|
||||
if neighbors:
|
||||
last = neighbors[-1].distance_to(keynode)
|
||||
new_node_close = node.distance_to(keynode) < last
|
||||
first = neighbors[0].distance_to(keynode)
|
||||
this_closest = self.source_node.distance_to(keynode) < first
|
||||
if not neighbors or (new_node_close and this_closest):
|
||||
asyncio.ensure_future(self.call_store(node, key, value))
|
||||
self.router.add_contact(node)
|
||||
|
||||
def handle_call_response(self, result, node):
|
||||
"""
|
||||
If we get a response, add the node to the routing table.
|
||||
|
||||
If we get no response, make sure it's removed from the routing
|
||||
table.
|
||||
"""
|
||||
if not result[0]:
|
||||
log.warning("no response from %s, removing from router", node)
|
||||
self.router.remove_contact(node)
|
||||
return result
|
||||
|
||||
log.info("got successful response from %s", node)
|
||||
self.welcome_if_new(node)
|
||||
return result
|
||||
@ -1,184 +0,0 @@
|
||||
import asyncio
|
||||
from collections import OrderedDict
|
||||
import heapq
|
||||
import operator
|
||||
import time
|
||||
|
||||
from .utils import OrderedSet, bytes_to_bit_string, shared_prefix
|
||||
|
||||
|
||||
class KBucket:
|
||||
"""each node keeps a list of (ip, udp_port, node_id) for nodes of distance
|
||||
between 2^i and 2^(i+1) this list that every node keeps is a k-bucket each
|
||||
k-bucket implements a last seen eviction policy except that live nodes are
|
||||
never removed."""
|
||||
|
||||
def __init__(self, rangeLower, rangeUpper, ksize):
|
||||
self.range = (rangeLower, rangeUpper)
|
||||
self.nodes = OrderedDict()
|
||||
self.replacement_nodes = OrderedSet()
|
||||
self.touch_last_updated()
|
||||
self.ksize = ksize
|
||||
|
||||
def touch_last_updated(self):
|
||||
self.last_updated = time.monotonic()
|
||||
|
||||
def get_nodes(self):
|
||||
return list(self.nodes.values())
|
||||
|
||||
def split(self):
|
||||
midpoint = (self.range[0] + self.range[1]) / 2
|
||||
one = KBucket(self.range[0], midpoint, self.ksize)
|
||||
two = KBucket(midpoint + 1, self.range[1], self.ksize)
|
||||
for node in self.nodes.values():
|
||||
bucket = one if node.xor_id <= midpoint else two
|
||||
bucket.nodes[node.peer_id_bytes] = node
|
||||
return (one, two)
|
||||
|
||||
def remove_node(self, node):
|
||||
if node.peer_id_bytes not in self.nodes:
|
||||
return
|
||||
|
||||
# delete node, and see if we can add a replacement
|
||||
del self.nodes[node.peer_id_bytes]
|
||||
if self.replacement_nodes:
|
||||
newnode = self.replacement_nodes.pop()
|
||||
self.nodes[newnode.peer_id_bytes] = newnode
|
||||
|
||||
def has_in_range(self, node):
|
||||
return self.range[0] <= node.xor_id <= self.range[1]
|
||||
|
||||
def is_new_node(self, node):
|
||||
return node.peer_id_bytes not in self.nodes
|
||||
|
||||
def add_node(self, node):
|
||||
"""
|
||||
Add a C{Node} to the C{KBucket}. Return True if successful, False if
|
||||
the bucket is full.
|
||||
|
||||
If the bucket is full, keep track of node in a replacement list,
|
||||
per section 4.1 of the paper.
|
||||
"""
|
||||
if node.peer_id_bytes in self.nodes:
|
||||
del self.nodes[node.peer_id_bytes]
|
||||
self.nodes[node.peer_id_bytes] = node
|
||||
elif len(self) < self.ksize:
|
||||
self.nodes[node.peer_id_bytes] = node
|
||||
else:
|
||||
self.replacement_nodes.push(node)
|
||||
return False
|
||||
return True
|
||||
|
||||
def depth(self):
|
||||
vals = self.nodes.values()
|
||||
sprefix = shared_prefix([bytes_to_bit_string(n.peer_id_bytes) for n in vals])
|
||||
return len(sprefix)
|
||||
|
||||
def head(self):
|
||||
return list(self.nodes.values())[0]
|
||||
|
||||
def __getitem__(self, node_id):
|
||||
return self.nodes.get(node_id, None)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.nodes)
|
||||
|
||||
|
||||
class TableTraverser:
|
||||
def __init__(self, table, startNode):
|
||||
index = table.get_bucket_for(startNode)
|
||||
table.buckets[index].touch_last_updated()
|
||||
self.current_nodes = table.buckets[index].get_nodes()
|
||||
self.left_buckets = table.buckets[:index]
|
||||
self.right_buckets = table.buckets[(index + 1) :]
|
||||
self.left = True
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Pop an item from the left subtree, then right, then left, etc."""
|
||||
if self.current_nodes:
|
||||
return self.current_nodes.pop()
|
||||
|
||||
if self.left and self.left_buckets:
|
||||
self.current_nodes = self.left_buckets.pop().get_nodes()
|
||||
self.left = False
|
||||
return next(self)
|
||||
|
||||
if self.right_buckets:
|
||||
self.current_nodes = self.right_buckets.pop(0).get_nodes()
|
||||
self.left = True
|
||||
return next(self)
|
||||
|
||||
raise StopIteration
|
||||
|
||||
|
||||
class RoutingTable:
|
||||
def __init__(self, protocol, ksize, node):
|
||||
"""
|
||||
@param node: The node that represents this server. It won't
|
||||
be added to the routing table, but will be needed later to
|
||||
determine which buckets to split or not.
|
||||
"""
|
||||
self.node = node
|
||||
self.protocol = protocol
|
||||
self.ksize = ksize
|
||||
self.flush()
|
||||
|
||||
def flush(self):
|
||||
self.buckets = [KBucket(0, 2 ** 160, self.ksize)]
|
||||
|
||||
def split_bucket(self, index):
|
||||
one, two = self.buckets[index].split()
|
||||
self.buckets[index] = one
|
||||
self.buckets.insert(index + 1, two)
|
||||
|
||||
def lonely_buckets(self):
|
||||
"""Get all of the buckets that haven't been updated in over an hour."""
|
||||
hrago = time.monotonic() - 3600
|
||||
return [b for b in self.buckets if b.last_updated < hrago]
|
||||
|
||||
def remove_contact(self, node):
|
||||
index = self.get_bucket_for(node)
|
||||
self.buckets[index].remove_node(node)
|
||||
|
||||
def is_new_node(self, node):
|
||||
index = self.get_bucket_for(node)
|
||||
return self.buckets[index].is_new_node(node)
|
||||
|
||||
def add_contact(self, node):
|
||||
index = self.get_bucket_for(node)
|
||||
bucket = self.buckets[index]
|
||||
|
||||
# this will succeed unless the bucket is full
|
||||
if bucket.add_node(node):
|
||||
return
|
||||
|
||||
# Per section 4.2 of paper, split if the bucket has the node
|
||||
# in its range or if the depth is not congruent to 0 mod 5
|
||||
if bucket.has_in_range(self.node) or bucket.depth() % 5 != 0:
|
||||
self.split_bucket(index)
|
||||
self.add_contact(node)
|
||||
else:
|
||||
asyncio.ensure_future(self.protocol.call_ping(bucket.head()))
|
||||
|
||||
def get_bucket_for(self, node):
|
||||
"""Get the index of the bucket that the given node would fall into."""
|
||||
for index, bucket in enumerate(self.buckets):
|
||||
if node.xor_id < bucket.range[1]:
|
||||
return index
|
||||
# we should never be here, but make linter happy
|
||||
return None
|
||||
|
||||
def find_neighbors(self, node, k=None, exclude=None):
|
||||
k = k or self.ksize
|
||||
nodes = []
|
||||
for neighbor in TableTraverser(self, node):
|
||||
notexcluded = exclude is None or not neighbor.same_home_as(exclude)
|
||||
if neighbor.peer_id_bytes != node.peer_id_bytes and notexcluded:
|
||||
heapq.heappush(nodes, (node.distance_to(neighbor), neighbor))
|
||||
if len(nodes) == k:
|
||||
break
|
||||
|
||||
return list(map(operator.itemgetter(1), heapq.nsmallest(k, nodes)))
|
||||
@ -1,78 +0,0 @@
|
||||
// Record represents a dht record that contains a value
|
||||
// for a key value pair
|
||||
message Record {
|
||||
// The key that references this record
|
||||
bytes key = 1;
|
||||
|
||||
// The actual value this record is storing
|
||||
bytes value = 2;
|
||||
|
||||
// Note: These fields were removed from the Record message
|
||||
// hash of the authors public key
|
||||
//optional string author = 3;
|
||||
// A PKI signature for the key+value+author
|
||||
//optional bytes signature = 4;
|
||||
|
||||
// Time the record was received, set by receiver
|
||||
string timeReceived = 5;
|
||||
};
|
||||
|
||||
message Message {
|
||||
enum MessageType {
|
||||
PUT_VALUE = 0;
|
||||
GET_VALUE = 1;
|
||||
ADD_PROVIDER = 2;
|
||||
GET_PROVIDERS = 3;
|
||||
FIND_NODE = 4;
|
||||
PING = 5;
|
||||
}
|
||||
|
||||
enum ConnectionType {
|
||||
// sender does not have a connection to peer, and no extra information (default)
|
||||
NOT_CONNECTED = 0;
|
||||
|
||||
// sender has a live connection to peer
|
||||
CONNECTED = 1;
|
||||
|
||||
// sender recently connected to peer
|
||||
CAN_CONNECT = 2;
|
||||
|
||||
// sender recently tried to connect to peer repeatedly but failed to connect
|
||||
// ("try" here is loose, but this should signal "made strong effort, failed")
|
||||
CANNOT_CONNECT = 3;
|
||||
}
|
||||
|
||||
message Peer {
|
||||
// ID of a given peer.
|
||||
bytes id = 1;
|
||||
|
||||
// multiaddrs for a given peer
|
||||
repeated bytes addrs = 2;
|
||||
|
||||
// used to signal the sender's connection capabilities to the peer
|
||||
ConnectionType connection = 3;
|
||||
}
|
||||
|
||||
// defines what type of message it is.
|
||||
MessageType type = 1;
|
||||
|
||||
// defines what coral cluster level this query/response belongs to.
|
||||
// in case we want to implement coral's cluster rings in the future.
|
||||
int32 clusterLevelRaw = 10; // NOT USED
|
||||
|
||||
// Used to specify the key associated with this message.
|
||||
// PUT_VALUE, GET_VALUE, ADD_PROVIDER, GET_PROVIDERS
|
||||
bytes key = 2;
|
||||
|
||||
// Used to return a value
|
||||
// PUT_VALUE, GET_VALUE
|
||||
Record record = 3;
|
||||
|
||||
// Used to return peers closer to a key in a query
|
||||
// GET_VALUE, GET_PROVIDERS, FIND_NODE
|
||||
repeated Peer closerPeers = 8;
|
||||
|
||||
// Used to return Providers
|
||||
// GET_VALUE, ADD_PROVIDER, GET_PROVIDERS
|
||||
repeated Peer providerPeers = 9;
|
||||
}
|
||||
@ -1,93 +0,0 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import OrderedDict
|
||||
from itertools import takewhile
|
||||
import operator
|
||||
import time
|
||||
|
||||
|
||||
class IStorage(ABC):
|
||||
"""
|
||||
Local storage for this node.
|
||||
|
||||
IStorage implementations of get must return the same type as put in
|
||||
by set
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __setitem__(self, key, value):
|
||||
"""Set a key to the given value."""
|
||||
|
||||
@abstractmethod
|
||||
def __getitem__(self, key):
|
||||
"""
|
||||
Get the given key.
|
||||
|
||||
If item doesn't exist, raises C{KeyError}
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get(self, key, default=None):
|
||||
"""
|
||||
Get given key.
|
||||
|
||||
If not found, return default.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def iter_older_than(self, seconds_old):
|
||||
"""Return the an iterator over (key, value) tuples for items older than
|
||||
the given seconds_old."""
|
||||
|
||||
@abstractmethod
|
||||
def __iter__(self):
|
||||
"""Get the iterator for this storage, should yield tuple of (key,
|
||||
value)"""
|
||||
|
||||
|
||||
class ForgetfulStorage(IStorage):
|
||||
def __init__(self, ttl=604800):
|
||||
"""By default, max age is a week."""
|
||||
self.data = OrderedDict()
|
||||
self.ttl = ttl
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key in self.data:
|
||||
del self.data[key]
|
||||
self.data[key] = (time.monotonic(), value)
|
||||
self.cull()
|
||||
|
||||
def cull(self):
|
||||
for _, _ in self.iter_older_than(self.ttl):
|
||||
self.data.popitem(last=False)
|
||||
|
||||
def get(self, key, default=None):
|
||||
self.cull()
|
||||
if key in self.data:
|
||||
return self[key]
|
||||
return default
|
||||
|
||||
def __getitem__(self, key):
|
||||
self.cull()
|
||||
return self.data[key][1]
|
||||
|
||||
def __repr__(self):
|
||||
self.cull()
|
||||
return repr(self.data)
|
||||
|
||||
def iter_older_than(self, seconds_old):
|
||||
min_birthday = time.monotonic() - seconds_old
|
||||
zipped = self._triple_iter()
|
||||
matches = takewhile(lambda r: min_birthday >= r[1], zipped)
|
||||
return list(map(operator.itemgetter(0, 2), matches))
|
||||
|
||||
def _triple_iter(self):
|
||||
ikeys = self.data.keys()
|
||||
ibirthday = map(operator.itemgetter(0), self.data.values())
|
||||
ivalues = map(operator.itemgetter(1), self.data.values())
|
||||
return zip(ikeys, ibirthday, ivalues)
|
||||
|
||||
def __iter__(self):
|
||||
self.cull()
|
||||
ikeys = self.data.keys()
|
||||
ivalues = map(operator.itemgetter(1), self.data.values())
|
||||
return zip(ikeys, ivalues)
|
||||
@ -1,56 +0,0 @@
|
||||
"""General catchall for functions that don't make sense as methods."""
|
||||
import asyncio
|
||||
import hashlib
|
||||
import operator
|
||||
|
||||
|
||||
async def gather_dict(dic):
|
||||
cors = list(dic.values())
|
||||
results = await asyncio.gather(*cors)
|
||||
return dict(zip(dic.keys(), results))
|
||||
|
||||
|
||||
def digest(string):
|
||||
if not isinstance(string, bytes):
|
||||
string = str(string).encode("utf8")
|
||||
return hashlib.sha1(string).digest()
|
||||
|
||||
|
||||
class OrderedSet(list):
|
||||
"""
|
||||
Acts like a list in all ways, except in the behavior of the.
|
||||
|
||||
:meth:`push` method.
|
||||
"""
|
||||
|
||||
def push(self, thing):
|
||||
"""
|
||||
1. If the item exists in the list, it's removed
|
||||
2. The item is pushed to the end of the list
|
||||
"""
|
||||
if thing in self:
|
||||
self.remove(thing)
|
||||
self.append(thing)
|
||||
|
||||
|
||||
def shared_prefix(args):
|
||||
"""
|
||||
Find the shared prefix between the strings.
|
||||
|
||||
For instance:
|
||||
|
||||
sharedPrefix(['blahblah', 'blahwhat'])
|
||||
|
||||
returns 'blah'.
|
||||
"""
|
||||
i = 0
|
||||
while i < min(map(len, args)):
|
||||
if len(set(map(operator.itemgetter(i), args))) != 1:
|
||||
break
|
||||
i += 1
|
||||
return args[0][:i]
|
||||
|
||||
|
||||
def bytes_to_bit_string(bites):
|
||||
bits = [bin(bite)[2:].rjust(8, "0") for bite in bites]
|
||||
return "".join(bits)
|
||||
Reference in New Issue
Block a user