mirror of
https://github.com/varun-r-mallya/py-libp2p.git
synced 2025-12-31 20:36:24 +00:00
Kademlia DHT implementation in py-libp2p (#579)
* initialise the module * added content routing * added routing module * added peer routing * added value store * added utilities functions * added main kademlia file * fixed create_key_from_binary function * example to test kademlia dht * added protocol ID and enhanced logging for peer store size in provider and consumer nodes * refactor: specify stream type in handle_stream method and add peer in routing table * removed content routing * added default value of count for finding closest peers * added functions to find close peers * refactor: remove content routing and enhance peer discovery * added put value function * added get value function * fix: improve logging and handle key encoding in get_value method * refactor: remove ContentRouting import from __init__.py * refactor: improved basic kademlia example * added protobuf files * replaced json with protobuf * refactor: enhance peer discovery and routing logic in KadDHT * refactor: enhance Kademlia routing table to use PeerInfo objects and improve peer management * refactor: enhance peer addition logic to utilize PeerInfo objects in routing table * feat: implement content provider functionality in Kademlia DHT * refactor: update value store to use datetime for validity management * refactor: update RoutingTable initialization to include host reference * refactor: enhance KBucket and RoutingTable for improved peer management and functionality * refactor: streamline peer discovery and value storage methods in KadDHT * refactor: update KadDHT and related classes for async peer management and enhanced value storage * refactor: enhance ProviderStore initialization and improve peer routing integration * test: add tests for Kademlia DHT functionality * fix linting issues * pydocstyle issues fixed * CICD pipeline issues solved * fix: update docstring format for find_peer method * refactor: improve logging and remove unused code in DHT implementation * refactor: clean up logging and remove unused imports in DHT and test files * Refactor logging setup and improve DHT stream handling with varint length prefixes * Update bootstrap peer handling in basic_dht example and refactor peer routing to accept string addresses * Enhance peer querying in Kademlia DHT by implementing parallel queries using Trio. * Enhance peer querying by adding deduplication checks * Refactor DHT implementation to use varint for length prefixes and enhance logging for better traceability * Add base58 encoding for value storage and enhance logging in basic_dht example * Refactor Kademlia DHT to support server/client modes * Added unit tests * Refactor documentation to fixsome warning * Add unit tests and remove outdated tests * Fixed precommit errora * Refactor error handling test to raise StringParseError for invalid bootstrap addresses * Add libp2p.kad_dht to the list of subpackages in documentation * Fix expiration and republish checks to use inclusive comparison * Add __init__.py file to libp2p.kad_dht.pb package * Refactor get value and put value to run in parallel with query timeout * Refactor provider message handling to use parallel processing with timeout * Add methods for provider store in KadDHT class * Refactor KadDHT and ProviderStore methods to improve type hints and enhance parallel processing * Add documentation for libp2p.kad_dht.pb module. * Update documentation for libp2p.kad_dht package to include subpackages and correct formatting * Fix formatting in documentation for libp2p.kad_dht package by correcting the subpackage reference * Fix header formatting in libp2p.kad_dht.pb documentation * Change log level from info to debug for various logging statements. * fix CICD issues (post revamp) * fixed value store unit test * Refactored kademlia example * Refactor Kademlia example: enhance logging, improve bootstrap node connection, and streamline server address handling * removed bootstrap module * Refactor Kademlia DHT example and core modules: enhance logging, remove unused code, and improve peer handling * Added docs of kad dht example * Update server address log file path to use the script's directory * Refactor: Introduce DHTMode enum for clearer mode management * moved xor_distance function to utils.py * Enhance logging in ValueStore and KadDHT: include decoded value in debug logs and update parameter description for validity * Add handling for closest peers in GET_VALUE response when value is not found * Handled failure scenario for PUT_VALUE * Remove kademlia demo from project scripts and contributing documentation * spelling and logging --------- Co-authored-by: pacrob <5199899+pacrob@users.noreply.github.com>
This commit is contained in:
300
examples/kademlia/kademlia.py
Normal file
300
examples/kademlia/kademlia.py
Normal file
@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
A basic example of using the Kademlia DHT implementation, with all setup logic inlined.
|
||||
This example demonstrates both value storage/retrieval and content server
|
||||
advertisement/discovery.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import secrets
|
||||
import sys
|
||||
|
||||
import base58
|
||||
from multiaddr import (
|
||||
Multiaddr,
|
||||
)
|
||||
import trio
|
||||
|
||||
from libp2p import (
|
||||
new_host,
|
||||
)
|
||||
from libp2p.abc import (
|
||||
IHost,
|
||||
)
|
||||
from libp2p.crypto.secp256k1 import (
|
||||
create_new_key_pair,
|
||||
)
|
||||
from libp2p.kad_dht.kad_dht import (
|
||||
DHTMode,
|
||||
KadDHT,
|
||||
)
|
||||
from libp2p.kad_dht.utils import (
|
||||
create_key_from_binary,
|
||||
)
|
||||
from libp2p.tools.async_service import (
|
||||
background_trio_service,
|
||||
)
|
||||
from libp2p.tools.utils import (
|
||||
info_from_p2p_addr,
|
||||
)
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
handlers=[logging.StreamHandler()],
|
||||
)
|
||||
logger = logging.getLogger("kademlia-example")
|
||||
|
||||
# Configure DHT module loggers to inherit from the parent logger
|
||||
# This ensures all kademlia-example.* loggers use the same configuration
|
||||
# Get the directory where this script is located
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
SERVER_ADDR_LOG = os.path.join(SCRIPT_DIR, "server_node_addr.txt")
|
||||
|
||||
# Set the level for all child loggers
|
||||
for module in [
|
||||
"kad_dht",
|
||||
"value_store",
|
||||
"peer_routing",
|
||||
"routing_table",
|
||||
"provider_store",
|
||||
]:
|
||||
child_logger = logging.getLogger(f"kademlia-example.{module}")
|
||||
child_logger.setLevel(logging.INFO)
|
||||
child_logger.propagate = True # Allow propagation to parent
|
||||
|
||||
# File to store node information
|
||||
bootstrap_nodes = []
|
||||
|
||||
|
||||
# function to take bootstrap_nodes as input and connects to them
|
||||
async def connect_to_bootstrap_nodes(host: IHost, bootstrap_addrs: list[str]) -> None:
|
||||
"""
|
||||
Connect to the bootstrap nodes provided in the list.
|
||||
|
||||
params: host: The host instance to connect to
|
||||
bootstrap_addrs: List of bootstrap node addresses
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
"""
|
||||
for addr in bootstrap_addrs:
|
||||
try:
|
||||
peerInfo = info_from_p2p_addr(Multiaddr(addr))
|
||||
host.get_peerstore().add_addrs(peerInfo.peer_id, peerInfo.addrs, 3600)
|
||||
await host.connect(peerInfo)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to bootstrap node {addr}: {e}")
|
||||
|
||||
|
||||
def save_server_addr(addr: str) -> None:
|
||||
"""Append the server's multiaddress to the log file."""
|
||||
try:
|
||||
with open(SERVER_ADDR_LOG, "w") as f:
|
||||
f.write(addr + "\n")
|
||||
logger.info(f"Saved server address to log: {addr}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save server address: {e}")
|
||||
|
||||
|
||||
def load_server_addrs() -> list[str]:
|
||||
"""Load all server multiaddresses from the log file."""
|
||||
if not os.path.exists(SERVER_ADDR_LOG):
|
||||
return []
|
||||
try:
|
||||
with open(SERVER_ADDR_LOG) as f:
|
||||
return [line.strip() for line in f if line.strip()]
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load server addresses: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def run_node(
|
||||
port: int, mode: str, bootstrap_addrs: list[str] | None = None
|
||||
) -> None:
|
||||
"""Run a node that serves content in the DHT with setup inlined."""
|
||||
try:
|
||||
if port <= 0:
|
||||
port = random.randint(10000, 60000)
|
||||
logger.debug(f"Using port: {port}")
|
||||
|
||||
# Convert string mode to DHTMode enum
|
||||
if mode is None or mode.upper() == "CLIENT":
|
||||
dht_mode = DHTMode.CLIENT
|
||||
elif mode.upper() == "SERVER":
|
||||
dht_mode = DHTMode.SERVER
|
||||
else:
|
||||
logger.error(f"Invalid mode: {mode}. Must be 'client' or 'server'")
|
||||
sys.exit(1)
|
||||
|
||||
# Load server addresses for client mode
|
||||
if dht_mode == DHTMode.CLIENT:
|
||||
server_addrs = load_server_addrs()
|
||||
if server_addrs:
|
||||
logger.info(f"Loaded {len(server_addrs)} server addresses from log")
|
||||
bootstrap_nodes.append(server_addrs[0]) # Use the first server address
|
||||
else:
|
||||
logger.warning("No server addresses found in log file")
|
||||
|
||||
if bootstrap_addrs:
|
||||
for addr in bootstrap_addrs:
|
||||
bootstrap_nodes.append(addr)
|
||||
|
||||
key_pair = create_new_key_pair(secrets.token_bytes(32))
|
||||
host = new_host(key_pair=key_pair)
|
||||
listen_addr = Multiaddr(f"/ip4/127.0.0.1/tcp/{port}")
|
||||
|
||||
async with host.run(listen_addrs=[listen_addr]):
|
||||
peer_id = host.get_id().pretty()
|
||||
addr_str = f"/ip4/127.0.0.1/tcp/{port}/p2p/{peer_id}"
|
||||
await connect_to_bootstrap_nodes(host, bootstrap_nodes)
|
||||
dht = KadDHT(host, dht_mode)
|
||||
# take all peer ids from the host and add them to the dht
|
||||
for peer_id in host.get_peerstore().peer_ids():
|
||||
await dht.routing_table.add_peer(peer_id)
|
||||
logger.info(f"Connected to bootstrap nodes: {host.get_connected_peers()}")
|
||||
bootstrap_cmd = f"--bootstrap {addr_str}"
|
||||
logger.info("To connect to this node, use: %s", bootstrap_cmd)
|
||||
|
||||
# Save server address in server mode
|
||||
if dht_mode == DHTMode.SERVER:
|
||||
save_server_addr(addr_str)
|
||||
|
||||
# Start the DHT service
|
||||
async with background_trio_service(dht):
|
||||
logger.info(f"DHT service started in {dht_mode.value} mode")
|
||||
val_key = create_key_from_binary(b"py-libp2p kademlia example value")
|
||||
content = b"Hello from python node "
|
||||
content_key = create_key_from_binary(content)
|
||||
|
||||
if dht_mode == DHTMode.SERVER:
|
||||
# Store a value in the DHT
|
||||
msg = "Hello message from Sumanjeet"
|
||||
val_data = msg.encode()
|
||||
await dht.put_value(val_key, val_data)
|
||||
logger.info(
|
||||
f"Stored value '{val_data.decode()}'"
|
||||
f"with key: {base58.b58encode(val_key).decode()}"
|
||||
)
|
||||
|
||||
# Advertise as content server
|
||||
success = await dht.provider_store.provide(content_key)
|
||||
if success:
|
||||
logger.info(
|
||||
"Successfully advertised as server"
|
||||
f"for content: {content_key.hex()}"
|
||||
)
|
||||
else:
|
||||
logger.warning("Failed to advertise as content server")
|
||||
|
||||
else:
|
||||
# retrieve the value
|
||||
logger.info(
|
||||
"Looking up key: %s", base58.b58encode(val_key).decode()
|
||||
)
|
||||
val_data = await dht.get_value(val_key)
|
||||
if val_data:
|
||||
try:
|
||||
logger.info(f"Retrieved value: {val_data.decode()}")
|
||||
except UnicodeDecodeError:
|
||||
logger.info(f"Retrieved value (bytes): {val_data!r}")
|
||||
else:
|
||||
logger.warning("Failed to retrieve value")
|
||||
|
||||
# Also check if we can find servers for our own content
|
||||
logger.info("Looking for servers of content: %s", content_key.hex())
|
||||
providers = await dht.provider_store.find_providers(content_key)
|
||||
if providers:
|
||||
logger.info(
|
||||
"Found %d servers for content: %s",
|
||||
len(providers),
|
||||
[p.peer_id.pretty() for p in providers],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"No servers found for content %s", content_key.hex()
|
||||
)
|
||||
|
||||
# Keep the node running
|
||||
while True:
|
||||
logger.debug(
|
||||
"Status - Connected peers: %d,"
|
||||
"Peers in store: %d, Values in store: %d",
|
||||
len(dht.host.get_connected_peers()),
|
||||
len(dht.host.get_peerstore().peer_ids()),
|
||||
len(dht.value_store.store),
|
||||
)
|
||||
await trio.sleep(10)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Server node error: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Kademlia DHT example with content server functionality"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
default="server",
|
||||
help="Run as a server or client node",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Port to listen on (0 for random)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bootstrap",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help=(
|
||||
"Multiaddrs of bootstrap nodes. "
|
||||
"Provide a space-separated list of addresses. "
|
||||
"This is required for client mode."
|
||||
),
|
||||
)
|
||||
# add option to use verbose logging
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Enable verbose logging",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
# Set logging level based on verbosity
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the kademlia demo."""
|
||||
try:
|
||||
args = parse_args()
|
||||
logger.info(
|
||||
"Running in %s mode on port %d",
|
||||
args.mode,
|
||||
args.port,
|
||||
)
|
||||
trio.run(run_node, args.port, args.mode, args.bootstrap)
|
||||
except Exception as e:
|
||||
logger.critical(f"Script failed: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user