From c1e90b9d466210d8c25f5bdb8f5fb1997db599e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 20 Jan 2026 22:06:00 +0000 Subject: [PATCH] Add comprehensive Sphinx documentation structure and content Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com> --- docs/Makefile | 20 + docs/_static/.gitkeep | 0 docs/api/index.md | 471 ++++++++++++++++++++++ docs/conf.py | 106 +++++ docs/getting-started/index.md | 45 +++ docs/getting-started/installation.md | 159 ++++++++ docs/getting-started/quickstart.md | 236 +++++++++++ docs/index.md | 96 +++++ docs/make.bat | 35 ++ docs/requirements.txt | 4 + docs/user-guide/compilation.md | 529 ++++++++++++++++++++++++ docs/user-guide/decorators.md | 459 +++++++++++++++++++++ docs/user-guide/helpers.md | 574 +++++++++++++++++++++++++++ docs/user-guide/index.md | 100 +++++ docs/user-guide/maps.md | 484 ++++++++++++++++++++++ docs/user-guide/structs.md | 542 +++++++++++++++++++++++++ pyproject.toml | 8 + 17 files changed, 3868 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/_static/.gitkeep create mode 100644 docs/api/index.md create mode 100644 docs/conf.py create mode 100644 docs/getting-started/index.md create mode 100644 docs/getting-started/installation.md create mode 100644 docs/getting-started/quickstart.md create mode 100644 docs/index.md create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/user-guide/compilation.md create mode 100644 docs/user-guide/decorators.md create mode 100644 docs/user-guide/helpers.md create mode 100644 docs/user-guide/index.md create mode 100644 docs/user-guide/maps.md create mode 100644 docs/user-guide/structs.md diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/api/index.md b/docs/api/index.md new file mode 100644 index 0000000..5bf2313 --- /dev/null +++ b/docs/api/index.md @@ -0,0 +1,471 @@ +# API Reference + +This section provides detailed API documentation for all PythonBPF modules, classes, and functions. + +## Module Overview + +PythonBPF is organized into several modules: + +* `pythonbpf` - Main module with decorators and compilation functions +* `pythonbpf.maps` - BPF map types +* `pythonbpf.helper` - BPF helper functions +* `pythonbpf.structs` - Struct type handling +* `pythonbpf.codegen` - Code generation and compilation + +## Public API + +The main `pythonbpf` module exports the following public API: + +```python +from pythonbpf import ( + # Decorators + bpf, + map, + section, + bpfglobal, + struct, + + # Compilation + compile_to_ir, + compile, + BPF, + + # Utilities + trace_pipe, + trace_fields, +) +``` + +## Decorators + +```{eval-rst} +.. automodule:: pythonbpf.decorators + :members: + :undoc-members: + :show-inheritance: +``` + +### bpf + +```python +@bpf +def my_function(): + pass +``` + +Decorator to mark a function or class for BPF compilation. Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler. + +**See also:** {doc}`../user-guide/decorators` + +### map + +```python +@bpf +@map +def my_map() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=1024) +``` + +Decorator to mark a function as a BPF map definition. The function must return a map type. + +**See also:** {doc}`../user-guide/maps` + +### section + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + return c_int64(0) +``` + +Decorator to specify which kernel hook to attach the BPF program to. + +**Parameters:** +* `name` (str) - The section name (e.g., "tracepoint/...", "kprobe/...", "xdp") + +**See also:** {doc}`../user-guide/decorators` + +### bpfglobal + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +Decorator to mark a function as a BPF global variable definition. + +**See also:** {doc}`../user-guide/decorators` + +### struct + +```python +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 +``` + +Decorator to mark a class as a BPF struct definition. + +**See also:** {doc}`../user-guide/structs` + +## Compilation Functions + +```{eval-rst} +.. automodule:: pythonbpf.codegen + :members: compile_to_ir, compile, BPF + :undoc-members: + :show-inheritance: +``` + +### compile_to_ir() + +```python +def compile_to_ir( + filename: str, + output: str, + loglevel=logging.INFO +) -> None +``` + +Compile Python source to LLVM Intermediate Representation. + +**Parameters:** +* `filename` (str) - Path to the Python source file +* `output` (str) - Path for the output LLVM IR file (.ll) +* `loglevel` - Logging level (default: logging.INFO) + +**See also:** {doc}`../user-guide/compilation` + +### compile() + +```python +def compile( + filename: str = None, + output: str = None, + loglevel=logging.INFO +) -> None +``` + +Compile Python source to BPF object file. + +**Parameters:** +* `filename` (str, optional) - Path to the Python source file (default: calling file) +* `output` (str, optional) - Path for the output object file (default: same name with .o extension) +* `loglevel` - Logging level (default: logging.INFO) + +**See also:** {doc}`../user-guide/compilation` + +### BPF + +```python +class BPF: + def __init__( + self, + filename: str = None, + loglevel=logging.INFO + ) + + def load(self) -> BpfObject + def attach_all(self) -> None + def load_and_attach(self) -> BpfObject +``` + +High-level interface to compile, load, and attach BPF programs. + +**Parameters:** +* `filename` (str, optional) - Path to Python source file (default: calling file) +* `loglevel` - Logging level (default: logging.INFO) + +**Methods:** +* `load()` - Load the compiled BPF program into the kernel +* `attach_all()` - Attach all BPF programs to their hooks +* `load_and_attach()` - Convenience method that loads and attaches + +**See also:** {doc}`../user-guide/compilation` + +## Utilities + +```{eval-rst} +.. automodule:: pythonbpf.utils + :members: + :undoc-members: + :show-inheritance: +``` + +### trace_pipe() + +```python +def trace_pipe() -> None +``` + +Read and display output from the kernel trace pipe. + +Blocks until interrupted with Ctrl+C. Displays BPF program output from `print()` statements. + +**See also:** {doc}`../user-guide/helpers` + +### trace_fields() + +```python +def trace_fields() -> tuple +``` + +Parse one line from the trace pipe into structured fields. + +**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)` +* `task` (str) - Task/process name +* `pid` (int) - Process ID +* `cpu` (int) - CPU number +* `flags` (bytes) - Trace flags +* `timestamp` (float) - Timestamp in seconds +* `message` (str) - The trace message + +**See also:** {doc}`../user-guide/helpers` + +## Map Types + +```{eval-rst} +.. automodule:: pythonbpf.maps.maps + :members: + :undoc-members: + :show-inheritance: +``` + +### HashMap + +```python +class HashMap: + def __init__( + self, + key, + value, + max_entries: int + ) + + def lookup(self, key) + def update(self, key, value, flags=None) + def delete(self, key) +``` + +Hash map for efficient key-value storage. + +**Parameters:** +* `key` - The type of the key (ctypes type) +* `value` - The type of the value (ctypes type or struct) +* `max_entries` (int) - Maximum number of entries + +**Methods:** +* `lookup(key)` - Look up a value by key +* `update(key, value, flags=None)` - Update or insert a key-value pair +* `delete(key)` - Remove an entry from the map + +**See also:** {doc}`../user-guide/maps` + +### PerfEventArray + +```python +class PerfEventArray: + def __init__( + self, + key_size, + value_size + ) + + def output(self, data) +``` + +Perf event array for sending data to userspace. + +**Parameters:** +* `key_size` - Type for the key +* `value_size` - Type for the value + +**Methods:** +* `output(data)` - Send data to userspace + +**See also:** {doc}`../user-guide/maps` + +### RingBuffer + +```python +class RingBuffer: + def __init__(self, max_entries: int) + + def output(self, data, flags=0) + def reserve(self, size: int) + def submit(self, data, flags=0) + def discard(self, data, flags=0) +``` + +Ring buffer for efficient event delivery. + +**Parameters:** +* `max_entries` (int) - Maximum size in bytes (must be power of 2) + +**Methods:** +* `output(data, flags=0)` - Send data to the ring buffer +* `reserve(size)` - Reserve space in the buffer +* `submit(data, flags=0)` - Submit previously reserved space +* `discard(data, flags=0)` - Discard previously reserved space + +**See also:** {doc}`../user-guide/maps` + +## Helper Functions + +```{eval-rst} +.. automodule:: pythonbpf.helper.helpers + :members: + :undoc-members: + :show-inheritance: +``` + +### Process Information + +* `pid()` - Get current process ID +* `comm()` - Get current process command name +* `uid()` - Get current user ID + +### Time + +* `ktime()` - Get current kernel time in nanoseconds + +### CPU + +* `smp_processor_id()` - Get current CPU ID + +### Memory + +* `probe_read(dst, size, src)` - Safely read kernel memory +* `probe_read_str(dst, src)` - Safely read string from kernel memory +* `deref(ptr)` - Dereference a pointer + +### Random + +* `random()` - Get pseudo-random number + +**See also:** {doc}`../user-guide/helpers` + +## Type System + +PythonBPF uses Python's `ctypes` module for type definitions: + +### Integer Types + +* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers +* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers + +### Other Types + +* `c_char`, `c_bool` - Characters and booleans +* `c_void_p` - Void pointers +* `str(N)` - Fixed-length strings + +## Examples + +### Basic Usage + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +### With Maps + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def counters() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=256) + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def count_clones(ctx: c_void_p) -> c_int64: + process_id = pid() + count = counters.lookup(process_id) + + if count: + counters.update(process_id, count + 1) + else: + counters.update(process_id, c_uint64(1)) + + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +``` + +### With Structs + +```python +from pythonbpf import bpf, struct, map, section, bpfglobal, BPF +from pythonbpf.maps import RingBuffer +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_exec(ctx: c_void_p) -> c_int64: + event = Event() + event.timestamp = ktime() + event.pid = pid() + event.comm = comm() + + events.output(event) + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +``` + +## See Also + +* {doc}`../user-guide/index` - Comprehensive user guide +* {doc}`../getting-started/quickstart` - Quick start tutorial +* [GitHub Repository](https://github.com/pythonbpf/Python-BPF) - Source code and examples diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..0a39eee --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,106 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +import os +import sys + +# Add the parent directory to the path so we can import pythonbpf +sys.path.insert(0, os.path.abspath('..')) + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'PythonBPF' +copyright = '2024, r41k0u, varun-r-mallya' +author = 'r41k0u, varun-r-mallya' +release = '0.1.8' +version = '0.1.8' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'myst_parser', + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx.ext.intersphinx', + 'sphinx_copybutton', +] + +# MyST-Parser configuration +myst_enable_extensions = [ + "colon_fence", + "deflist", + "fieldlist", +] + +# Napoleon settings for Google/NumPy style docstrings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = True +napoleon_use_admonition_for_notes = True +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_type_aliases = None + +# Intersphinx mapping +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'llvmlite': ('https://llvmlite.readthedocs.io/en/latest/', None), +} + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# Source file suffixes +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# The master toctree document +master_doc = 'index' + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] + +# Theme options +html_theme_options = { + 'logo_only': False, + 'display_version': True, + 'prev_next_buttons_location': 'bottom', + 'style_external_links': False, + 'vcs_pageview_mode': '', + # Toc options + 'collapse_navigation': False, + 'sticky_navigation': True, + 'navigation_depth': 4, + 'includehidden': True, + 'titles_only': False +} + +# Add any paths that contain custom static files (such as style sheets) +html_static_path = ['_static'] + +# -- Options for autodoc ----------------------------------------------------- + +autodoc_default_options = { + 'members': True, + 'member-order': 'bysource', + 'special-members': '__init__', + 'undoc-members': True, + 'exclude-members': '__weakref__' +} + +autodoc_typehints = 'description' diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md new file mode 100644 index 0000000..3ba114b --- /dev/null +++ b/docs/getting-started/index.md @@ -0,0 +1,45 @@ +# Getting Started + +Welcome to PythonBPF! This section will help you get started with writing eBPF programs in Python. + +## What You'll Learn + +In this section, you'll learn how to: + +1. **Install PythonBPF** - Set up your development environment with all necessary dependencies +2. **Write Your First Program** - Create a simple BPF program to understand the basics +3. **Understand Core Concepts** - Learn about decorators, compilation, and program structure + +## Prerequisites + +Before you begin, make sure you have: + +* A Linux system (eBPF requires Linux kernel 4.15+) +* Python 3.10 or higher +* Root or sudo access (required for loading BPF programs) +* Basic understanding of Python programming + +## Quick Navigation + +```{toctree} +:maxdepth: 1 + +installation +quickstart +``` + +## Next Steps + +After completing the getting started guide, you can: + +* Explore the {doc}`../user-guide/index` for detailed information on features +* Check out the {doc}`../api/index` for complete API reference +* Browse the [examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) for more complex programs + +## Need Help? + +If you encounter any issues: + +* Check the [GitHub Issues](https://github.com/pythonbpf/Python-BPF/issues) for known problems +* Review the [README](https://github.com/pythonbpf/Python-BPF/blob/master/README.md) for additional information +* Reach out to the maintainers: [@r41k0u](https://github.com/r41k0u) and [@varun-r-mallya](https://github.com/varun-r-mallya) diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..d2adc6a --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,159 @@ +# Installation + +This guide will walk you through installing PythonBPF and its dependencies. + +## Prerequisites + +### System Requirements + +PythonBPF requires: + +* **Linux** - eBPF is a Linux kernel feature (kernel 4.15 or higher recommended) +* **Python 3.10+** - Python 3.10 or higher is required +* **Root/sudo access** - Loading BPF programs into the kernel requires elevated privileges + +### Required System Packages + +Before installing PythonBPF, you need to install the following system packages: + +#### On Ubuntu/Debian: + +```bash +sudo apt-get update +sudo apt-get install -y bpftool clang llvm +``` + +#### On Fedora/RHEL/CentOS: + +```bash +sudo dnf install -y bpftool clang llvm +``` + +#### On Arch Linux: + +```bash +sudo pacman -S bpf clang llvm +``` + +```{note} +The `llvm` package provides `llc`, the LLVM compiler that is used to compile LLVM IR to BPF bytecode. +``` + +## Installing PythonBPF + +### From PyPI (Recommended) + +The easiest way to install PythonBPF is using pip: + +```bash +pip install pythonbpf pylibbpf +``` + +This will install: +* `pythonbpf` - The main package for writing and compiling BPF programs +* `pylibbpf` - Python bindings for libbpf, used to load and attach BPF programs + +### Development Installation + +If you want to contribute to PythonBPF or work with the latest development version: + +1. Clone the repository: + +```bash +git clone https://github.com/pythonbpf/Python-BPF.git +cd Python-BPF +``` + +2. Create and activate a virtual environment: + +```bash +python3 -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +``` + +3. Install in development mode: + +```bash +pip install -e . +pip install pylibbpf +``` + +4. Install development dependencies: + +```bash +make install +``` + +### Installing Documentation Dependencies + +If you want to build the documentation locally: + +```bash +pip install pythonbpf[docs] +# Or from the repository root: +pip install -e .[docs] +``` + +## Generating vmlinux.py + +Some examples require access to kernel data structures. To use these features, you need to generate a `vmlinux.py` file: + +1. Install additional dependencies: + +```bash +pip install ctypeslib2 +``` + +2. Generate the vmlinux.py file: + +```bash +sudo tools/vmlinux-gen.py +``` + +3. Copy the generated file to your working directory or the examples directory as needed. + +```{warning} +The `vmlinux.py` file is kernel-specific. If you upgrade your kernel, you may need to regenerate this file. +``` + +## Verifying Installation + +To verify that PythonBPF is installed correctly, run: + +```bash +python3 -c "import pythonbpf; print(pythonbpf.__all__)" +``` + +You should see output similar to: + +``` +['bpf', 'map', 'section', 'bpfglobal', 'struct', 'compile_to_ir', 'compile', 'BPF', 'trace_pipe', 'trace_fields'] +``` + +## Troubleshooting + +### Permission Errors + +If you encounter permission errors when running BPF programs: + +* Make sure you're running with `sudo` or as root +* Check that `/sys/kernel/tracing/` is accessible + +### LLVM/Clang Not Found + +If you get errors about `llc` or `clang` not being found: + +* Verify they're installed: `which llc` and `which clang` +* Check your PATH environment variable includes the LLVM bin directory + +### Import Errors + +If Python can't find the `pythonbpf` module: + +* Make sure you've activated your virtual environment +* Verify installation with `pip list | grep pythonbpf` +* Try reinstalling: `pip install --force-reinstall pythonbpf` + +## Next Steps + +Now that you have PythonBPF installed, continue to the {doc}`quickstart` guide to write your first BPF program! diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 0000000..e11b9c4 --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,236 @@ +# Quick Start + +This guide will walk you through creating your first BPF program with PythonBPF. + +## Your First BPF Program + +Let's create a simple "Hello World" program that prints a message every time a process is executed on your system. + +### Step 1: Create the Program + +Create a new file called `hello_world.py`: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +### Step 2: Run the Program + +Run the program with sudo (required for BPF operations): + +```bash +sudo python3 hello_world.py +``` + +### Step 3: See it in Action + +Open another terminal and run any command: + +```bash +ls +echo "test" +date +``` + +You should see "Hello, World!" printed in the first terminal for each command executed! + +Press `Ctrl+C` to stop the program. + +## Understanding the Code + +Let's break down what each part does: + +### Imports + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 +``` + +* `bpf` - Decorator to mark functions for BPF compilation +* `section` - Decorator to specify which kernel event to attach to +* `bpfglobal` - Decorator for BPF global variables +* `BPF` - Class to compile, load, and attach BPF programs +* `trace_pipe` - Utility to read kernel trace output +* `c_void_p`, `c_int64` - C types for function signatures + +### The BPF Function + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) +``` + +* `@bpf` - Marks this function to be compiled to BPF bytecode +* `@section("tracepoint/syscalls/sys_enter_execve")` - Attaches to the execve syscall tracepoint (called when processes start) +* `ctx: c_void_p` - Context parameter (required for all BPF functions) +* `print()` - In BPF context, this outputs to the kernel trace buffer +* `return c_int64(0)` - BPF functions must return an integer + +### License Declaration + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +* The Linux kernel requires BPF programs to declare a license +* Most kernel features require GPL-compatible licenses +* This is defined as a BPF global variable + +### Compilation and Execution + +```python +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +* `BPF()` - Creates a BPF object and compiles the current file +* `b.load()` - Loads the compiled BPF program into the kernel +* `b.attach_all()` - Attaches all BPF programs to their specified hooks +* `trace_pipe()` - Reads and displays output from the kernel trace buffer + +## Next Example: Tracking Process IDs + +Let's make a more interesting program that tracks which processes are being created: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from pythonbpf.helper import pid, comm +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_exec(ctx: c_void_p) -> c_int64: + process_id = pid() + process_name = comm() + print(f"Process {process_name} (PID: {process_id}) is starting") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +This program uses BPF helper functions: + +* `pid()` - Gets the current process ID +* `comm()` - Gets the current process command name + +Run it with `sudo python3 track_exec.py` and watch processes being created! + +## Common Patterns + +### Tracepoints + +Tracepoints are predefined hooks in the kernel. Common ones include: + +```python +# System calls +@section("tracepoint/syscalls/sys_enter_execve") +@section("tracepoint/syscalls/sys_enter_clone") +@section("tracepoint/syscalls/sys_enter_open") + +# Scheduler events +@section("tracepoint/sched/sched_process_fork") +@section("tracepoint/sched/sched_switch") +``` + +### Kprobes + +Kprobes allow you to attach to any kernel function: + +```python +@section("kprobe/do_sys_open") +def trace_open(ctx: c_void_p) -> c_int64: + print("File is being opened") + return c_int64(0) +``` + +### XDP (eXpress Data Path) + +For network packet processing: + +```python +from ctypes import c_uint32 + +@section("xdp") +def xdp_pass(ctx: c_void_p) -> c_uint32: + # XDP_PASS = 2 + return c_uint32(2) +``` + +## Best Practices + +1. **Always include a LICENSE** - Required by the kernel +2. **Use type hints** - Helps PythonBPF generate correct code +3. **Return the correct type** - Match the expected return type for your program type +4. **Test incrementally** - Start simple and add complexity gradually +5. **Check kernel logs** - Use `dmesg` to see BPF verifier messages if loading fails + +## Common Issues + +### Program Won't Load + +If your BPF program fails to load: + +* Check `dmesg` for verifier error messages +* Ensure your LICENSE is GPL-compatible +* Verify you're using supported BPF features +* Make sure return types match function signatures + +### No Output + +If you don't see output: + +* Verify the tracepoint/kprobe is being triggered +* Check that you're running with sudo +* Ensure `/sys/kernel/tracing/trace_pipe` is accessible + +### Compilation Errors + +If compilation fails: + +* Check that `llc` is installed and in your PATH +* Verify your Python syntax is correct +* Ensure all imported types are from `ctypes` + +## Next Steps + +Now that you understand the basics, explore: + +* {doc}`../user-guide/decorators` - Learn about all available decorators +* {doc}`../user-guide/maps` - Use BPF maps for data storage and communication +* {doc}`../user-guide/structs` - Define custom data structures +* {doc}`../user-guide/helpers` - Discover all available BPF helper functions +* [Examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) - See more complex examples diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..0bb5488 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,96 @@ +# PythonBPF Documentation + +Welcome to **PythonBPF** - a Python frontend for writing eBPF programs without embedding C code. PythonBPF uses [llvmlite](https://github.com/numba/llvmlite) to generate LLVM IR and compiles directly to eBPF object files that can be loaded into the Linux kernel. + +```{note} +This project is under active development and not ready for production use. +``` + +## What is PythonBPF? + +PythonBPF is an LLVM IR generator for eBPF programs written in Python. It provides: + +* **Pure Python syntax** - Write eBPF programs in Python using familiar decorators and type annotations +* **Direct compilation** - Compile to LLVM object files without relying on BCC +* **Full eBPF features** - Support for maps, helpers, global definitions, and more +* **Integration with libbpf** - Works with [pylibbpf](https://github.com/pythonbpf/pylibbpf) for object loading and execution + +## Quick Example + +Here's a simple "Hello World" BPF program that traces process creation: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +## Features + +* Generate eBPF programs directly from Python +* Compile to LLVM object files for kernel execution +* Built with `llvmlite` for IR generation +* Supports maps, helpers, and global definitions for BPF +* Companion project: [pylibbpf](https://github.com/pythonbpf/pylibbpf), which provides bindings for object loading + +## Table of Contents + +```{toctree} +:maxdepth: 2 +:caption: Getting Started + +getting-started/index +getting-started/installation +getting-started/quickstart +``` + +```{toctree} +:maxdepth: 2 +:caption: User Guide + +user-guide/index +user-guide/decorators +user-guide/maps +user-guide/structs +user-guide/compilation +user-guide/helpers +``` + +```{toctree} +:maxdepth: 2 +:caption: API Reference + +api/index +``` + +## Links + +* **GitHub Repository**: [pythonbpf/Python-BPF](https://github.com/pythonbpf/Python-BPF) +* **PyPI Package**: [pythonbpf](https://pypi.org/project/pythonbpf/) +* **Video Demo**: [YouTube](https://youtu.be/eMyLW8iWbks) +* **Slide Deck**: [Google Slides](https://docs.google.com/presentation/d/1DsWDIVrpJhM4RgOETO9VWqUtEHo3-c7XIWmNpi6sTSo/edit?usp=sharing) + +## License + +PythonBPF is licensed under the Apache License 2.0. + +## Indices and tables + +* {ref}`genindex` +* {ref}`modindex` +* {ref}`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..954237b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..9122e51 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,4 @@ +sphinx>=7.0 +myst-parser>=2.0 +sphinx-rtd-theme>=2.0 +sphinx-copybutton diff --git a/docs/user-guide/compilation.md b/docs/user-guide/compilation.md new file mode 100644 index 0000000..46b0efd --- /dev/null +++ b/docs/user-guide/compilation.md @@ -0,0 +1,529 @@ +# Compilation + +PythonBPF provides several functions and classes for compiling Python code into BPF bytecode and loading it into the kernel. + +## Overview + +The compilation process transforms Python code into executable BPF programs: + +1. **Python Source** → AST parsing +2. **AST** → LLVM IR generation (using llvmlite) +3. **LLVM IR** → BPF bytecode (using llc) +4. **BPF Object** → Kernel loading (using libbpf) + +## Compilation Functions + +### compile_to_ir() + +Compile Python source to LLVM Intermediate Representation. + +#### Signature + +```python +def compile_to_ir(filename: str, output: str, loglevel=logging.INFO) +``` + +#### Parameters + +* `filename` - Path to the Python source file to compile +* `output` - Path where the LLVM IR file (.ll) should be written +* `loglevel` - Logging level (default: `logging.INFO`) + +#### Usage + +```python +from pythonbpf import compile_to_ir +import logging + +# Compile to LLVM IR +compile_to_ir( + filename="my_bpf_program.py", + output="my_bpf_program.ll", + loglevel=logging.DEBUG +) +``` + +#### Output + +This function generates an `.ll` file containing LLVM IR, which is human-readable assembly-like code. This is useful for: + +* Debugging compilation issues +* Understanding code generation +* Manual optimization +* Educational purposes + +#### Example IR Output + +```llvm +; ModuleID = 'bpf_module' +source_filename = "bpf_module" +target triple = "bpf" + +define i64 @hello_world(i8* %ctx) { +entry: + ; BPF code here + ret i64 0 +} +``` + +### compile() + +Compile Python source to BPF object file. + +#### Signature + +```python +def compile(filename: str = None, output: str = None, loglevel=logging.INFO) +``` + +#### Parameters + +* `filename` - Path to the Python source file (default: calling file) +* `output` - Path for the output object file (default: same name with `.o` extension) +* `loglevel` - Logging level (default: `logging.INFO`) + +#### Usage + +```python +from pythonbpf import compile +import logging + +# Compile current file +compile() + +# Compile specific file +compile(filename="my_program.py", output="my_program.o") + +# Compile with debug logging +compile(loglevel=logging.DEBUG) +``` + +#### Output + +This function generates a `.o` file containing BPF bytecode that can be: + +* Loaded into the kernel +* Inspected with `bpftool` +* Verified with the BPF verifier +* Distributed as a compiled binary + +#### Compilation Steps + +The `compile()` function performs these steps: + +1. Parse Python source to AST +2. Process decorators and find BPF functions +3. Generate LLVM IR +4. Write IR to temporary `.ll` file +5. Invoke `llc` to compile to BPF object +6. Write final `.o` file + +### BPF Class + +The `BPF` class provides a high-level interface to compile, load, and attach BPF programs. + +#### Signature + +```python +class BPF: + def __init__(self, filename: str = None, loglevel=logging.INFO) + def load(self) + def attach_all(self) + def load_and_attach(self) +``` + +#### Parameters + +* `filename` - Path to Python source file (default: calling file) +* `loglevel` - Logging level (default: `logging.INFO`) + +#### Methods + +##### __init__() + +Create a BPF object and compile the source. + +```python +from pythonbpf import BPF + +# Compile current file +b = BPF() + +# Compile specific file +b = BPF(filename="my_program.py") +``` + +##### load() + +Load the compiled BPF program into the kernel. + +```python +b = BPF() +b.load() +``` + +This method: +* Loads the BPF object file into the kernel +* Creates maps +* Verifies the BPF program +* Returns a `BpfObject` instance + +##### attach_all() + +Attach all BPF programs to their specified hooks. + +```python +b = BPF() +b.load() +b.attach_all() +``` + +This method: +* Attaches tracepoints +* Attaches kprobes/kretprobes +* Attaches XDP programs +* Enables all hooks + +##### load_and_attach() + +Convenience method that loads and attaches in one call. + +```python +b = BPF() +b.load_and_attach() +``` + +Equivalent to: +```python +b = BPF() +b.load() +b.attach_all() +``` + +## Complete Example + +Here's a complete example showing the compilation workflow: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print("Process started") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +if __name__ == "__main__": + # Method 1: Simple compilation and loading + b = BPF() + b.load_and_attach() + trace_pipe() + + # Method 2: Step-by-step + # b = BPF() + # b.load() + # b.attach_all() + # trace_pipe() + + # Method 3: Manual compilation + # from pythonbpf import compile + # compile(filename="my_program.py", output="my_program.o") + # # Then load with pylibbpf directly +``` + +## Compilation Pipeline Details + +### AST Parsing + +The Python `ast` module parses your source code: + +```python +import ast +tree = ast.parse(source_code, filename) +``` + +The AST is then walked to find: +* Functions decorated with `@bpf` +* Classes decorated with `@struct` +* Map definitions with `@map` +* Global variables with `@bpfglobal` + +### IR Generation + +PythonBPF uses `llvmlite` to generate LLVM IR: + +```python +from llvmlite import ir + +# Create module +module = ir.Module(name='bpf_module') +module.triple = 'bpf' + +# Generate IR for each BPF function +# ... +``` + +Key aspects of IR generation: + +* Type conversion (Python types → LLVM types) +* Function definitions +* Map declarations +* Global variable initialization +* Debug information + +### BPF Compilation + +The LLVM IR is compiled to BPF bytecode using `llc`: + +```bash +llc -march=bpf -filetype=obj input.ll -o output.o +``` + +Compiler flags: +* `-march=bpf` - Target BPF architecture +* `-filetype=obj` - Generate object file +* `-O2` - Optimization level (sometimes used) + +### Kernel Loading + +The compiled object is loaded using `pylibbpf`: + +```python +from pylibbpf import BpfObject + +obj = BpfObject(path="program.o") +obj.load() +``` + +The kernel verifier checks: +* Memory access patterns +* Pointer usage +* Loop bounds +* Instruction count +* Helper function calls + +## Debugging Compilation + +### Logging + +Enable debug logging to see compilation details: + +```python +import logging +from pythonbpf import BPF + +b = BPF(loglevel=logging.DEBUG) +``` + +This will show: +* AST parsing details +* IR generation steps +* Compilation commands +* Loading status + +### Inspecting LLVM IR + +Generate and inspect the IR file: + +```python +from pythonbpf import compile_to_ir + +compile_to_ir("program.py", "program.ll") +``` + +Then examine `program.ll` to understand the generated code. + +### Using bpftool + +Inspect compiled objects with `bpftool`: + +```bash +# Show program info +bpftool prog show + +# Dump program instructions +bpftool prog dump xlated id + +# Dump program JIT code +bpftool prog dump jited id + +# Show maps +bpftool map show + +# Dump map contents +bpftool map dump id +``` + +### Verifier Errors + +If the kernel verifier rejects your program: + +1. Check `dmesg` for detailed error messages: + ```bash + sudo dmesg | tail -50 + ``` + +2. Common issues: + * Unbounded loops + * Invalid pointer arithmetic + * Exceeding instruction limit + * Invalid helper calls + * License incompatibility + +3. Solutions: + * Simplify logic + * Use bounded loops + * Check pointer operations + * Verify GPL license + +## Compilation Options + +### Optimization Levels + +While PythonBPF doesn't expose optimization flags directly, you can: + +1. Manually compile IR with specific flags: + ```bash + llc -march=bpf -O2 -filetype=obj program.ll -o program.o + ``` + +2. Modify the compilation pipeline in your code + +### Target Options + +BPF compilation targets the BPF architecture: + +* **Architecture**: `bpf` +* **Endianness**: Typically little-endian +* **Pointer size**: 64-bit + +### Debug Information + +PythonBPF automatically generates debug information (DWARF) for: + +* Function names +* Line numbers +* Variable names +* Type information + +This helps with: +* Stack traces +* Debugging with `bpftool` +* Source-level debugging + +## Working with Compiled Objects + +### Loading Pre-compiled Objects + +You can load previously compiled objects: + +```python +from pylibbpf import BpfObject + +# Load object file +obj = BpfObject(path="my_program.o") +obj.load() + +# Attach programs +# (specific attachment depends on program type) +``` + +### Distribution + +Distribute compiled BPF objects: + +1. Compile once: + ```python + from pythonbpf import compile + compile(filename="program.py", output="program.o") + ``` + +2. Ship `program.o` file + +3. Load on target systems: + ```python + from pylibbpf import BpfObject + obj = BpfObject(path="program.o") + obj.load() + ``` + +### Version Compatibility + +BPF objects are generally compatible across kernel versions, but: + +* Some features require specific kernel versions +* Helper functions may not be available on older kernels +* BTF (BPF Type Format) requirements vary + +## Best Practices + +1. **Keep compilation separate from runtime** + ```python + if __name__ == "__main__": + b = BPF() + b.load_and_attach() + # Runtime code + ``` + +2. **Handle compilation errors gracefully** + ```python + try: + b = BPF() + b.load() + except Exception as e: + print(f"Failed to load BPF program: {e}") + exit(1) + ``` + +3. **Use appropriate logging levels** + * `DEBUG` for development + * `INFO` for production + * `ERROR` for critical issues + +4. **Cache compiled objects** + * Compile once, load many times + * Store `.o` files for reuse + * Version your compiled objects + +5. **Test incrementally** + * Compile after each change + * Verify programs load successfully + * Test attachment before full deployment + +## Troubleshooting + +### Compilation Fails + +If compilation fails: +* Check Python syntax +* Verify all decorators are correct +* Ensure type hints are present +* Check for unsupported Python features + +### Loading Fails + +If loading fails: +* Check `dmesg` for verifier errors +* Verify LICENSE is set correctly +* Ensure helper functions are valid +* Check map definitions + +### Programs Don't Attach + +If attachment fails: +* Verify section names are correct +* Check that hooks exist on your kernel +* Ensure you have sufficient permissions +* Verify kernel version supports the feature + +## Next Steps + +* Learn about {doc}`helpers` for available BPF helper functions +* Explore {doc}`maps` for data storage +* See {doc}`decorators` for compilation markers diff --git a/docs/user-guide/decorators.md b/docs/user-guide/decorators.md new file mode 100644 index 0000000..5b9f9b6 --- /dev/null +++ b/docs/user-guide/decorators.md @@ -0,0 +1,459 @@ +# Decorators + +Decorators are the primary way to mark Python code for BPF compilation. PythonBPF provides five core decorators that control how your code is transformed into eBPF bytecode. + +## @bpf + +The `@bpf` decorator marks functions or classes for BPF compilation. + +### Usage + +```python +from pythonbpf import bpf + +@bpf +def my_function(ctx): + # This function will be compiled to BPF bytecode + pass +``` + +### Description + +Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler and transformed into LLVM IR, then compiled to BPF bytecode. This is the fundamental decorator that enables BPF compilation. + +### Rules + +* Must be used on top-level functions or classes +* The function must have proper type hints +* Return types must be BPF-compatible +* Only BPF-compatible operations are allowed inside + +### Example + +```python +from pythonbpf import bpf, section +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print("Process started") + return c_int64(0) +``` + +## @section + +The `@section(name)` decorator specifies which kernel hook to attach the BPF program to. + +### Usage + +```python +from pythonbpf import bpf, section + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx): + pass +``` + +### Section Types + +#### Tracepoints + +Tracepoints are stable kernel hooks defined in `/sys/kernel/tracing/events/`: + +```python +# System call tracepoints +@section("tracepoint/syscalls/sys_enter_execve") +@section("tracepoint/syscalls/sys_enter_clone") +@section("tracepoint/syscalls/sys_enter_open") +@section("tracepoint/syscalls/sys_exit_read") + +# Scheduler tracepoints +@section("tracepoint/sched/sched_process_fork") +@section("tracepoint/sched/sched_process_exit") +@section("tracepoint/sched/sched_switch") + +# Block I/O tracepoints +@section("tracepoint/block/block_rq_insert") +@section("tracepoint/block/block_rq_complete") +``` + +#### Kprobes + +Kprobes allow attaching to any kernel function: + +```python +@section("kprobe/do_sys_open") +def trace_sys_open(ctx): + pass + +@section("kprobe/__x64_sys_write") +def trace_write(ctx): + pass +``` + +#### Kretprobes + +Kretprobes trigger when a kernel function returns: + +```python +@section("kretprobe/do_sys_open") +def trace_open_return(ctx): + pass +``` + +#### XDP (eXpress Data Path) + +For network packet processing at the earliest point: + +```python +from ctypes import c_uint32 + +@section("xdp") +def xdp_prog(ctx: c_void_p) -> c_uint32: + # XDP_PASS = 2, XDP_DROP = 1, XDP_ABORTED = 0 + return c_uint32(2) +``` + +#### TC (Traffic Control) + +For network traffic filtering: + +```python +@section("classifier") +def tc_filter(ctx): + pass +``` + +### Finding Tracepoints + +To find available tracepoints on your system: + +```bash +# List all tracepoints +ls /sys/kernel/tracing/events/ + +# List syscall tracepoints +ls /sys/kernel/tracing/events/syscalls/ + +# View tracepoint format +cat /sys/kernel/tracing/events/syscalls/sys_enter_open/format +``` + +## @map + +The `@map` decorator marks a function as a BPF map definition. + +### Usage + +```python +from pythonbpf import bpf, map +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@map +def my_map() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=1024) +``` + +### Description + +Maps are BPF data structures used to: + +* Store state between BPF program invocations +* Communicate data between BPF programs +* Share data with userspace + +The function must return a map type (HashMap, PerfEventArray, RingBuffer) and the return type must be annotated. + +### Example + +```python +from pythonbpf import bpf, map, section +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def process_count() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def count_clones(ctx: c_void_p) -> c_int64: + process_id = pid() + count = process_count.lookup(process_id) + if count: + process_count.update(process_id, count + 1) + else: + process_count.update(process_id, c_uint64(1)) + return c_int64(0) +``` + +See {doc}`maps` for more details on available map types. + +## @struct + +The `@struct` decorator marks a class as a BPF struct definition. + +### Usage + +```python +from pythonbpf import bpf, struct +from ctypes import c_uint64, c_uint32 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + cpu: c_uint32 +``` + +### Description + +Structs allow you to define custom data types for use in BPF programs. They can be used: + +* As map values +* For perf event output +* In ring buffer submissions +* As local variables + +### Field Types + +Supported field types include: + +* **Integer types**: `c_int8`, `c_int16`, `c_int32`, `c_int64`, `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` +* **Pointers**: `c_void_p`, `c_char_p` +* **Fixed strings**: `str(N)` where N is the size (e.g., `str(16)`) +* **Nested structs**: Other `@struct` decorated classes + +### Example + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import RingBuffer +from pythonbpf.helper import pid, ktime +from ctypes import c_void_p, c_int64, c_uint64, c_uint32 + +@bpf +@struct +class ProcessEvent: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_processes(ctx: c_void_p) -> c_int64: + event = ProcessEvent() + event.timestamp = ktime() + event.pid = pid() + event.comm = comm() + + events.output(event) + return c_int64(0) +``` + +See {doc}`structs` for more details on working with structs. + +## @bpfglobal + +The `@bpfglobal` decorator marks a function as a BPF global variable definition. + +### Usage + +```python +from pythonbpf import bpf, bpfglobal + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +### Description + +BPF global variables are values that: + +* Are initialized when the program loads +* Can be read by all BPF functions +* Must be constant (cannot be modified at runtime in current implementation) + +### Common Global Variables + +#### LICENSE (Required) + +Every BPF program must declare a license: + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +Valid licenses include: +* `"GPL"` - GNU General Public License +* `"GPL v2"` - GPL version 2 +* `"Dual BSD/GPL"` - Dual licensed +* `"Dual MIT/GPL"` - Dual licensed + +```{warning} +Many BPF features require a GPL-compatible license. Using a non-GPL license may prevent your program from loading or accessing certain kernel features. +``` + +#### Custom Global Variables + +You can define other global variables: + +```python +@bpf +@bpfglobal +def DEBUG_MODE() -> int: + return 1 + +@bpf +@bpfglobal +def MAX_EVENTS() -> int: + return 1000 +``` + +These can be referenced in your BPF functions, though modifying them at runtime is currently not supported. + +## Combining Decorators + +Decorators are often used together. The order matters: + +### Correct Order + +```python +@bpf # Always first +@section("...") # Section before other decorators +def my_function(): + pass + +@bpf # Always first +@map # Map/struct/bpfglobal after @bpf +def my_map(): + pass + +@bpf # Always first +@struct # Map/struct/bpfglobal after @bpf +class MyStruct: + pass + +@bpf # Always first +@bpfglobal # Map/struct/bpfglobal after @bpf +def LICENSE(): + return "GPL" +``` + +### Examples by Use Case + +#### Simple Tracepoint + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + return c_int64(0) +``` + +#### Map Definition + +```python +@bpf +@map +def counters() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=256) +``` + +#### Struct Definition + +```python +@bpf +@struct +class Event: + timestamp: c_uint64 + value: c_uint32 +``` + +#### Global Variable + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +## Best Practices + +1. **Always use @bpf first** - It must be the outermost decorator +2. **Provide type hints** - Required for proper code generation +3. **Use descriptive names** - Makes code easier to understand and debug +4. **Keep functions simple** - BPF has restrictions on complexity +5. **Test incrementally** - Verify each component works before combining + +## Common Errors + +### Missing @bpf Decorator + +```python +# Wrong - missing @bpf +@section("tracepoint/syscalls/sys_enter_open") +def my_func(ctx): + pass + +# Correct +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def my_func(ctx): + pass +``` + +### Wrong Decorator Order + +```python +# Wrong - @section before @bpf +@section("tracepoint/syscalls/sys_enter_open") +@bpf +def my_func(ctx): + pass + +# Correct +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def my_func(ctx): + pass +``` + +### Missing Type Hints + +```python +# Wrong - no type hints +@bpf +def my_func(ctx): + pass + +# Correct +@bpf +def my_func(ctx: c_void_p) -> c_int64: + pass +``` + +## Next Steps + +* Learn about {doc}`maps` for data storage and communication +* Explore {doc}`structs` for defining custom data types +* Understand {doc}`compilation` to see how code is transformed +* Check out {doc}`helpers` for available BPF helper functions diff --git a/docs/user-guide/helpers.md b/docs/user-guide/helpers.md new file mode 100644 index 0000000..c19beb9 --- /dev/null +++ b/docs/user-guide/helpers.md @@ -0,0 +1,574 @@ +# Helper Functions and Utilities + +PythonBPF provides helper functions and utilities for BPF programs and userspace code. + +## BPF Helper Functions + +BPF helper functions are kernel-provided functions that BPF programs can call to interact with the system. PythonBPF exposes these through the `pythonbpf.helper` module. + +```python +from pythonbpf.helper import pid, ktime, comm +``` + +### Process and Task Information + +#### pid() + +Get the current process ID. + +```python +from pythonbpf.helper import pid + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + process_id = pid() + print(f"Process {process_id} opened a file") + return c_int64(0) +``` + +**Returns:** `c_int32` - The process ID of the current task + +#### comm() + +Get the current process command name (up to 16 characters). + +```python +from pythonbpf.helper import comm + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + process_name = comm() + print(f"Executing: {process_name}") + return c_int64(0) +``` + +**Returns:** `str(16)` - The command name of the current task + +**Note:** The returned string is limited to 16 characters (TASK_COMM_LEN). + +#### uid() + +Get the current user ID. + +```python +from pythonbpf.helper import uid + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + user_id = uid() + if user_id == 0: + print("Root user opened a file") + return c_int64(0) +``` + +**Returns:** `c_int32` - The user ID of the current task + +### Time and Timing + +#### ktime() + +Get the current kernel time in nanoseconds since system boot. + +```python +from pythonbpf.helper import ktime + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def measure_latency(ctx: c_void_p) -> c_int64: + start_time = ktime() + # Store for later comparison + return c_int64(0) +``` + +**Returns:** `c_int64` - Current time in nanoseconds + +**Use cases:** +* Measuring latency +* Timestamping events +* Rate limiting +* Timeout detection + +### CPU Information + +#### smp_processor_id() + +Get the ID of the CPU on which the BPF program is running. + +```python +from pythonbpf.helper import smp_processor_id + +@bpf +@section("tracepoint/sched/sched_switch") +def track_cpu(ctx: c_void_p) -> c_int64: + cpu = smp_processor_id() + print(f"Running on CPU {cpu}") + return c_int64(0) +``` + +**Returns:** `c_int32` - The current CPU ID + +**Use cases:** +* Per-CPU statistics +* Load balancing analysis +* CPU affinity tracking + +### Memory Operations + +#### probe_read() + +Safely read data from kernel memory. + +```python +from pythonbpf.helper import probe_read + +@bpf +def read_kernel_data(ctx: c_void_p) -> c_int64: + dst = c_uint64(0) + size = 8 + src = c_void_p(...) # kernel address + + result = probe_read(dst, size, src) + if result == 0: + print(f"Read value: {dst}") + return c_int64(0) +``` + +**Parameters:** +* `dst` - Destination buffer +* `size` - Number of bytes to read +* `src` - Source kernel address + +**Returns:** `c_int64` - 0 on success, negative on error + +**Safety:** This function performs bounds checking and prevents invalid memory access. + +#### probe_read_str() + +Safely read a null-terminated string from kernel memory. + +```python +from pythonbpf.helper import probe_read_str + +@bpf +def read_filename(ctx: c_void_p) -> c_int64: + filename = str(256) + src = c_void_p(...) # pointer to filename in kernel + + result = probe_read_str(filename, src) + if result > 0: + print(f"Filename: {filename}") + return c_int64(0) +``` + +**Parameters:** +* `dst` - Destination buffer (string) +* `src` - Source kernel address + +**Returns:** `c_int64` - Length of string on success, negative on error + +#### deref() + +Dereference a pointer safely. + +```python +from pythonbpf.helper import deref + +@bpf +def access_pointer(ctx: c_void_p) -> c_int64: + ptr = c_void_p(...) + value = deref(ptr) + print(f"Value at pointer: {value}") + return c_int64(0) +``` + +**Parameters:** +* `ptr` - Pointer to dereference + +**Returns:** The dereferenced value or 0 if null + +### Random Numbers + +#### random() + +Generate a pseudo-random 32-bit number. + +```python +from pythonbpf.helper import random + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def sample_events(ctx: c_void_p) -> c_int64: + # Sample 1% of events + if (random() % 100) == 0: + print("Sampled event") + return c_int64(0) +``` + +**Returns:** `c_int32` - A pseudo-random number + +**Use cases:** +* Event sampling +* Load shedding +* A/B testing +* Randomized algorithms + +### Network Helpers + +#### skb_store_bytes() + +Store bytes into a socket buffer (for network programs). + +```python +from pythonbpf.helper import skb_store_bytes + +@bpf +@section("classifier") +def modify_packet(ctx: c_void_p) -> c_int32: + offset = 14 # Skip Ethernet header + data = b"\x00\x01\x02\x03" + size = len(data) + + result = skb_store_bytes(offset, data, size) + return c_int32(0) +``` + +**Parameters:** +* `offset` - Offset in the socket buffer +* `from_buf` - Data to write +* `size` - Number of bytes to write +* `flags` - Optional flags + +**Returns:** `c_int64` - 0 on success, negative on error + +## Userspace Utilities + +PythonBPF provides utilities for working with BPF programs from Python userspace code. + +### trace_pipe() + +Read and display output from the kernel trace pipe. + +```python +from pythonbpf import trace_pipe + +# After loading and attaching BPF programs +trace_pipe() +``` + +**Description:** + +The `trace_pipe()` function reads from `/sys/kernel/tracing/trace_pipe` and displays BPF program output to stdout. This is the output from `print()` statements in BPF programs. + +**Usage:** + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print("Process started") # This goes to trace_pipe + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() # Display BPF output +``` + +**Behavior:** + +* Blocks until Ctrl+C is pressed +* Displays output in real-time +* Shows task name, PID, CPU, timestamp, and message +* Automatically handles trace pipe access errors + +**Requirements:** + +* Root or sudo access +* Accessible `/sys/kernel/tracing/trace_pipe` + +### trace_fields() + +Parse one line from the trace pipe into structured fields. + +```python +from pythonbpf import trace_fields + +# Read and parse trace output +task, pid, cpu, flags, ts, msg = trace_fields() +print(f"Task: {task}, PID: {pid}, CPU: {cpu}, Time: {ts}, Message: {msg}") +``` + +**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)` + +* `task` - String: Task/process name (up to 16 chars) +* `pid` - Integer: Process ID +* `cpu` - Integer: CPU number +* `flags` - Bytes: Trace flags +* `timestamp` - Float: Timestamp in seconds +* `message` - String: The actual trace message + +**Description:** + +The `trace_fields()` function reads one line from the trace pipe and parses it into individual fields. This is useful when you need programmatic access to trace data rather than just displaying it. + +**Usage:** + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print(f"PID:{pid()}") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() + +# Process trace events +try: + while True: + task, pid, cpu, flags, ts, msg = trace_fields() + print(f"[{ts:.6f}] {task}({pid}) on CPU{cpu}: {msg}") +except KeyboardInterrupt: + print("Stopped") +``` + +**Error Handling:** + +* Raises `ValueError` if line cannot be parsed +* Skips lines about lost events +* Blocks waiting for next line + +## Helper Function Examples + +### Example 1: Latency Measurement + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_pipe +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid, ktime +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def start_times() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def read_start(ctx: c_void_p) -> c_int64: + process_id = pid() + start = ktime() + start_times.update(process_id, start) + return c_int64(0) + +@bpf +@section("tracepoint/syscalls/sys_exit_read") +def read_end(ctx: c_void_p) -> c_int64: + process_id = pid() + start = start_times.lookup(process_id) + + if start: + latency = ktime() - start + print(f"Read latency: {latency} ns") + start_times.delete(process_id) + + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +### Example 2: Process Tracking + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from pythonbpf.helper import pid, comm, uid +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_exec(ctx: c_void_p) -> c_int64: + process_id = pid() + process_name = comm() + user_id = uid() + + print(f"User {user_id} started {process_name} (PID: {process_id})") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +### Example 3: CPU Load Monitoring + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from pythonbpf.helper import smp_processor_id +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def cpu_counts() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=256) + +@bpf +@section("tracepoint/sched/sched_switch") +def count_switches(ctx: c_void_p) -> c_int64: + cpu = smp_processor_id() + count = cpu_counts.lookup(cpu) + + if count: + cpu_counts.update(cpu, count + 1) + else: + cpu_counts.update(cpu, c_uint64(1)) + + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() + +import time +time.sleep(5) + +# Read results +from pylibbpf import BpfMap +map_obj = BpfMap(b, cpu_counts) +for cpu, count in map_obj.items(): + print(f"CPU {cpu}: {count} context switches") +``` + +### Example 4: Event Sampling + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from pythonbpf.helper import random, pid, comm +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def sample_opens(ctx: c_void_p) -> c_int64: + # Sample 5% of events + if (random() % 100) < 5: + process_id = pid() + process_name = comm() + print(f"Sampled: {process_name} ({process_id}) opening file") + + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +## Best Practices + +1. **Use appropriate helpers** - Choose the right helper for your use case +2. **Handle errors** - Check return values from helpers like `probe_read()` +3. **Minimize overhead** - Helper calls have cost; use judiciously +4. **Sample when appropriate** - Use `random()` for high-frequency events +5. **Clean up resources** - Delete map entries when done + +## Common Patterns + +### Store-and-Compare Pattern + +```python +# Store a value +key = pid() +value = ktime() +my_map.update(key, value) + +# Later: compare +stored = my_map.lookup(key) +if stored: + difference = ktime() - stored +``` + +### Filtering Pattern + +```python +# Filter by user +user_id = uid() +if user_id == 0: # Only root + # Process event + pass +``` + +### Sampling Pattern + +```python +# Sample 1 in N events +if (random() % N) == 0: + # Process sampled event + pass +``` + +## Troubleshooting + +### Helper Not Available + +If a helper function doesn't work: +* Check your kernel version (some helpers are newer) +* Verify the helper is available with `bpftool feature` +* Ensure your LICENSE is GPL-compatible + +### Trace Pipe Access Denied + +If `trace_pipe()` fails: +* Run with sudo/root +* Check `/sys/kernel/tracing/` is accessible +* Verify tracing is enabled in kernel config + +### probe_read Failures + +If `probe_read()` returns errors: +* Ensure the source address is valid kernel memory +* Check that the size is reasonable +* Verify you're not reading from restricted areas + +## Next Steps + +* Explore {doc}`maps` for data storage with helpers +* Learn about {doc}`compilation` to understand helper implementation +* See {doc}`decorators` for marking BPF functions diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md new file mode 100644 index 0000000..04d8ba4 --- /dev/null +++ b/docs/user-guide/index.md @@ -0,0 +1,100 @@ +# User Guide + +This user guide provides comprehensive documentation for all PythonBPF features. Whether you're building simple tracing tools or complex performance monitoring systems, this guide will help you master PythonBPF. + +## Overview + +PythonBPF transforms Python code into eBPF bytecode that runs in the Linux kernel. It provides a Pythonic interface to eBPF features through decorators, type annotations, and familiar programming patterns. + +## Core Concepts + +### Decorators + +PythonBPF uses decorators to mark code for BPF compilation: + +* `@bpf` - Mark functions and classes for BPF compilation +* `@map` - Define BPF maps for data storage +* `@struct` - Define custom data structures +* `@section(name)` - Specify attachment points +* `@bpfglobal` - Define global variables + +### Compilation Pipeline + +Your Python code goes through several stages: + +1. **AST Parsing** - Python code is parsed into an Abstract Syntax Tree +2. **IR Generation** - The AST is transformed into LLVM IR using llvmlite +3. **BPF Compilation** - LLVM IR is compiled to BPF bytecode using `llc` +4. **Loading** - The BPF object is loaded into the kernel using libbpf +5. **Attachment** - Programs are attached to kernel hooks (tracepoints, kprobes, etc.) + +## Guide Contents + +```{toctree} +:maxdepth: 2 + +decorators +maps +structs +compilation +helpers +``` + +## Code Organization + +When writing BPF programs with PythonBPF, we recommend: + +1. **Keep BPF code in separate files** - Easier to manage and test +2. **Use type hints** - Required for proper code generation +3. **Follow naming conventions** - Use descriptive names for maps and functions +4. **Document your code** - Add comments explaining BPF-specific logic +5. **Test incrementally** - Verify each component works before adding complexity + +## Type System + +PythonBPF uses Python's `ctypes` module for type definitions: + +* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers +* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers +* `c_char`, `c_bool` - Characters and booleans +* `c_void_p` - Void pointers +* `str(N)` - Fixed-length strings (e.g., `str(16)` for 16-byte string) + +## Example Structure + +A typical PythonBPF program follows this structure: + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from ctypes import c_void_p, c_int64, c_uint32 + +# Define maps +@bpf +@map +def my_map() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=1024) + +# Define BPF function +@bpf +@section("tracepoint/...") +def my_function(ctx: c_void_p) -> c_int64: + # BPF logic here + return c_int64(0) + +# License (required) +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +# Compile, load, and run +if __name__ == "__main__": + b = BPF() + b.load_and_attach() + # Use the program... +``` + +## Next Steps + +Start with {doc}`decorators` to learn about all available decorators, then explore the other sections to master specific features. diff --git a/docs/user-guide/maps.md b/docs/user-guide/maps.md new file mode 100644 index 0000000..3eacebd --- /dev/null +++ b/docs/user-guide/maps.md @@ -0,0 +1,484 @@ +# BPF Maps + +Maps are BPF data structures that provide storage and communication mechanisms. They allow BPF programs to: + +* Store state between invocations +* Share data between multiple BPF programs +* Communicate with userspace applications + +## Map Types + +PythonBPF supports several map types, each optimized for different use cases. + +### HashMap + +Hash maps provide efficient key-value storage with O(1) lookup time. + +#### Definition + +```python +from pythonbpf import bpf, map +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@map +def my_map() -> HashMap: + return HashMap( + key=c_uint32, + value=c_uint64, + max_entries=1024 + ) +``` + +#### Parameters + +* `key` - The type of the key (must be a ctypes type) +* `value` - The type of the value (must be a ctypes type or struct) +* `max_entries` - Maximum number of entries the map can hold + +#### Operations + +##### lookup(key) + +Look up a value by key. Returns the value if found, `None` otherwise. + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + key = c_uint32(1) + value = my_map.lookup(key) + if value: + print(f"Found value: {value}") + return c_int64(0) +``` + +##### update(key, value, flags=None) + +Update or insert a key-value pair. + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def track_opens(ctx: c_void_p) -> c_int64: + key = pid() + count = my_map.lookup(key) + if count: + my_map.update(key, count + 1) + else: + my_map.update(key, c_uint64(1)) + return c_int64(0) +``` + +##### delete(key) + +Remove an entry from the map. + +```python +@bpf +def cleanup(ctx: c_void_p) -> c_int64: + key = c_uint32(1) + my_map.delete(key) + return c_int64(0) +``` + +#### Use Cases + +* Counting events per process/CPU +* Storing timestamps for latency calculations +* Caching lookup results +* Implementing rate limiters + +#### Example: Process Counter + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def process_count() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def count_processes(ctx: c_void_p) -> c_int64: + process_id = pid() + count = process_count.lookup(process_id) + + if count: + new_count = count + 1 + process_count.update(process_id, new_count) + else: + process_count.update(process_id, c_uint64(1)) + + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +if __name__ == "__main__": + b = BPF() + b.load_and_attach() + # Access map from userspace + from pylibbpf import BpfMap + map_obj = BpfMap(b, process_count) + # Read values... +``` + +### PerfEventArray + +Perf event arrays are used to send data from BPF programs to userspace with high throughput. + +#### Definition + +```python +from pythonbpf.maps import PerfEventArray + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray( + key_size=c_uint32, + value_size=c_uint32 + ) +``` + +#### Parameters + +* `key_size` - Type for the key (typically `c_uint32`) +* `value_size` - Type for the value (typically `c_uint32`) + +#### Operations + +##### output(data) + +Send data to userspace. The data can be a struct or basic type. + +```python +@bpf +@struct +class Event: + pid: c_uint32 + timestamp: c_uint64 + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint32, value_size=c_uint32) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def send_event(ctx: c_void_p) -> c_int64: + event = Event() + event.pid = pid() + event.timestamp = ktime() + events.output(event) + return c_int64(0) +``` + +#### Use Cases + +* Sending detailed event data to userspace +* Real-time monitoring and alerting +* Collecting samples for analysis +* High-throughput data collection + +#### Example: Event Logging + +```python +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.maps import PerfEventArray +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@struct +class ProcessEvent: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint32, value_size=c_uint32) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def log_exec(ctx: c_void_p) -> c_int64: + event = ProcessEvent() + event.timestamp = ktime() + event.pid = pid() + event.comm = comm() + events.output(event) + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +### RingBuffer + +Ring buffers provide efficient, ordered event delivery with lower overhead than perf event arrays. + +#### Definition + +```python +from pythonbpf.maps import RingBuffer + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) +``` + +#### Parameters + +* `max_entries` - Maximum size of the ring buffer in bytes (must be power of 2) + +#### Operations + +##### output(data, flags=0) + +Send data to the ring buffer. + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def log_event(ctx: c_void_p) -> c_int64: + event = Event() + event.pid = pid() + events.output(event) + return c_int64(0) +``` + +##### reserve(size) + +Reserve space in the ring buffer. Returns a pointer to the reserved space or 0 if no space available. + +```python +@bpf +def reserve_space(ctx: c_void_p) -> c_int64: + ptr = events.reserve(64) # Reserve 64 bytes + if ptr: + # Use the reserved space + events.submit(ptr) + return c_int64(0) +``` + +##### submit(data, flags=0) + +Submit previously reserved space. + +##### discard(data, flags=0) + +Discard previously reserved space without submitting. + +#### Use Cases + +* Modern event streaming (preferred over PerfEventArray) +* Lower overhead event delivery +* Ordered event processing +* Kernel 5.8+ systems + +#### Advantages over PerfEventArray + +* Lower memory overhead +* Better performance +* Simpler API +* Ordered delivery guarantees + +### BPFMapType Enum + +PythonBPF supports various BPF map types through the `BPFMapType` enum: + +```python +from pythonbpf.maps import BPFMapType + +# Common map types +BPFMapType.BPF_MAP_TYPE_HASH # Hash map +BPFMapType.BPF_MAP_TYPE_ARRAY # Array map +BPFMapType.BPF_MAP_TYPE_PERF_EVENT_ARRAY # Perf event array +BPFMapType.BPF_MAP_TYPE_RINGBUF # Ring buffer +BPFMapType.BPF_MAP_TYPE_STACK_TRACE # Stack trace storage +BPFMapType.BPF_MAP_TYPE_LRU_HASH # LRU hash map +``` + +## Using Maps with Structs + +Maps can store complex data types using structs as values: + +```python +from pythonbpf import bpf, map, struct, section +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@struct +class Stats: + count: c_uint64 + total_time: c_uint64 + max_time: c_uint64 + +@bpf +@map +def process_stats() -> HashMap: + return HashMap( + key=c_uint32, # PID as key + value=Stats, # Struct as value + max_entries=1024 + ) + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def track_stats(ctx: c_void_p) -> c_int64: + process_id = pid() + stats = process_stats.lookup(process_id) + + if stats: + stats.count = stats.count + 1 + process_stats.update(process_id, stats) + else: + new_stats = Stats() + new_stats.count = c_uint64(1) + new_stats.total_time = c_uint64(0) + new_stats.max_time = c_uint64(0) + process_stats.update(process_id, new_stats) + + return c_int64(0) +``` + +## Accessing Maps from Userspace + +After loading a BPF program, you can access maps from Python using `pylibbpf`: + +```python +from pythonbpf import BPF +from pylibbpf import BpfMap + +# Load BPF program +b = BPF() +b.load_and_attach() + +# Get map reference +map_obj = BpfMap(b, my_map) + +# Read all key-value pairs +for key, value in map_obj.items(): + print(f"Key: {key}, Value: {value}") + +# Get all keys +keys = list(map_obj.keys()) + +# Get all values +values = list(map_obj.values()) + +# Lookup specific key +value = map_obj[key] + +# Update from userspace +map_obj[key] = new_value + +# Delete from userspace +del map_obj[key] +``` + +## Best Practices + +1. **Choose the right map type** + * Use `HashMap` for key-value storage + * Use `RingBuffer` for event streaming (kernel 5.8+) + * Use `PerfEventArray` for older kernels + +2. **Size maps appropriately** + * Consider maximum expected entries + * Balance memory usage vs. capacity + * Use LRU maps for automatic eviction + +3. **Handle lookup failures** + * Always check if `lookup()` returns `None` + * Initialize new entries properly + +4. **Minimize map operations** + * BPF has instruction limits + * Reduce unnecessary lookups + * Batch operations when possible + +5. **Use structs for complex data** + * More efficient than multiple lookups + * Atomic updates of related fields + * Better cache locality + +## Common Patterns + +### Counter Pattern + +```python +count = my_map.lookup(key) +if count: + my_map.update(key, count + 1) +else: + my_map.update(key, c_uint64(1)) +``` + +### Latency Tracking + +```python +# Store start time +start = ktime() +start_map.update(key, start) + +# Later: calculate latency +start_time = start_map.lookup(key) +if start_time: + latency = ktime() - start_time + latency_map.update(key, latency) + start_map.delete(key) +``` + +### Event Sampling + +```python +# Only process every Nth event +count = counter.lookup(key) +if count and (count % 100) == 0: + events.output(data) +counter.update(key, count + 1 if count else c_uint64(1)) +``` + +## Troubleshooting + +### Map Not Found + +If you get "map not found" errors: +* Ensure the map is defined with `@bpf` and `@map` +* Check that the map name matches exactly +* Verify the BPF program loaded successfully + +### Map Full + +If updates fail due to map being full: +* Increase `max_entries` +* Use LRU maps for automatic eviction +* Add cleanup logic to delete old entries + +### Type Errors + +If you get type-related errors: +* Verify key and value types match the definition +* Check that structs are properly defined +* Ensure ctypes are used correctly + +## Next Steps + +* Learn about {doc}`structs` for defining custom value types +* Explore {doc}`helpers` for BPF helper functions +* See {doc}`compilation` to understand how maps are compiled diff --git a/docs/user-guide/structs.md b/docs/user-guide/structs.md new file mode 100644 index 0000000..2d27d74 --- /dev/null +++ b/docs/user-guide/structs.md @@ -0,0 +1,542 @@ +# BPF Structs + +Structs allow you to define custom data types for use in BPF programs. They provide a way to group related fields together and can be used as map values, event payloads, or local variables. + +## Defining Structs + +Use the `@bpf` and `@struct` decorators to define a BPF struct: + +```python +from pythonbpf import bpf, struct +from ctypes import c_uint64, c_uint32 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + cpu: c_uint32 +``` + +## Field Types + +Structs support various field types from Python's `ctypes` module. + +### Integer Types + +```python +from ctypes import ( + c_int8, c_int16, c_int32, c_int64, + c_uint8, c_uint16, c_uint32, c_uint64 +) + +@bpf +@struct +class Numbers: + small_int: c_int8 # -128 to 127 + short_int: c_int16 # -32768 to 32767 + int_val: c_int32 # -2^31 to 2^31-1 + long_int: c_int64 # -2^63 to 2^63-1 + + byte: c_uint8 # 0 to 255 + word: c_uint16 # 0 to 65535 + dword: c_uint32 # 0 to 2^32-1 + qword: c_uint64 # 0 to 2^64-1 +``` + +### String Types + +Fixed-length strings are defined using `str(N)` where N is the size: + +```python +@bpf +@struct +class ProcessInfo: + name: str(16) # 16-byte string + path: str(256) # 256-byte string +``` + +```{note} +Strings in BPF are fixed-length and null-terminated. The size includes the null terminator. +``` + +### Pointer Types + +```python +from ctypes import c_void_p, c_char_p + +@bpf +@struct +class Pointers: + ptr: c_void_p # Generic pointer + str_ptr: c_char_p # Character pointer +``` + +### Nested Structs + +Structs can contain other structs as fields: + +```python +@bpf +@struct +class Address: + street: str(64) + city: str(32) + zip_code: c_uint32 + +@bpf +@struct +class Person: + name: str(32) + age: c_uint32 + address: Address # Nested struct +``` + +## Using Structs + +### As Local Variables + +Create and use struct instances within BPF functions: + +```python +from pythonbpf import bpf, struct, section +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint64, c_uint32 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def capture_event(ctx: c_void_p) -> c_int64: + # Create an instance + event = Event() + + # Set fields + event.timestamp = ktime() + event.pid = pid() + event.comm = comm() + + # Use the struct + print(f"Process {event.comm} with PID {event.pid}") + + return c_int64(0) +``` + +### As Map Values + +Use structs as values in maps for complex state storage: + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@struct +class ProcessStats: + syscall_count: c_uint64 + total_time: c_uint64 + max_latency: c_uint64 + +@bpf +@map +def stats() -> HashMap: + return HashMap( + key=c_uint32, + value=ProcessStats, + max_entries=1024 + ) + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def track_syscalls(ctx: c_void_p) -> c_int64: + process_id = pid() + + # Lookup existing stats + s = stats.lookup(process_id) + + if s: + # Update existing stats + s.syscall_count = s.syscall_count + 1 + stats.update(process_id, s) + else: + # Create new stats + new_stats = ProcessStats() + new_stats.syscall_count = c_uint64(1) + new_stats.total_time = c_uint64(0) + new_stats.max_latency = c_uint64(0) + stats.update(process_id, new_stats) + + return c_int64(0) +``` + +### With Perf Events + +Send struct data to userspace using PerfEventArray: + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import PerfEventArray +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@struct +class ProcessEvent: + timestamp: c_uint64 + pid: c_uint32 + ppid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint32, value_size=c_uint32) + +@bpf +@section("tracepoint/sched/sched_process_fork") +def trace_fork(ctx: c_void_p) -> c_int64: + event = ProcessEvent() + event.timestamp = ktime() + event.pid = pid() + event.comm = comm() + + # Send to userspace + events.output(event) + + return c_int64(0) +``` + +### With Ring Buffers + +Ring buffers provide efficient event delivery: + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import RingBuffer + +@bpf +@struct +class FileEvent: + timestamp: c_uint64 + pid: c_uint32 + filename: str(256) + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_openat") +def trace_open(ctx: c_void_p) -> c_int64: + event = FileEvent() + event.timestamp = ktime() + event.pid = pid() + # event.filename would be populated from ctx + + events.output(event) + + return c_int64(0) +``` + +## Field Access and Modification + +### Reading Fields + +Access struct fields using dot notation: + +```python +event = Event() +ts = event.timestamp +process_id = event.pid +``` + +### Writing Fields + +Assign values to fields: + +```python +event = Event() +event.timestamp = ktime() +event.pid = pid() +event.comm = comm() +``` + +### String Fields + +String fields have special handling: + +```python +@bpf +@struct +class Message: + text: str(64) + +@bpf +def example(ctx: c_void_p) -> c_int64: + msg = Message() + + # Assign string value + msg.text = "Hello from BPF" + + # Use helper to get process name + msg.text = comm() + + return c_int64(0) +``` + +## StructType Class + +PythonBPF provides a `StructType` class for working with struct metadata: + +```python +from pythonbpf.structs import StructType + +# Define a struct +@bpf +@struct +class MyStruct: + field1: c_uint64 + field2: c_uint32 + +# Access struct information (from userspace) +# This is typically used internally by the compiler +``` + +## Complex Examples + +### Network Packet Event + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import RingBuffer +from ctypes import c_void_p, c_int64, c_uint8, c_uint16, c_uint32, c_uint64 + +@bpf +@struct +class PacketEvent: + timestamp: c_uint64 + src_ip: c_uint32 + dst_ip: c_uint32 + src_port: c_uint16 + dst_port: c_uint16 + protocol: c_uint8 + length: c_uint16 + +@bpf +@map +def packets() -> RingBuffer: + return RingBuffer(max_entries=8192) + +@bpf +@section("xdp") +def capture_packets(ctx: c_void_p) -> c_uint32: + pkt = PacketEvent() + pkt.timestamp = ktime() + # Parse packet data from ctx... + + packets.output(pkt) + + # XDP_PASS + return c_uint32(2) +``` + +### Process Lifecycle Tracking + +```python +@bpf +@struct +class ProcessLifecycle: + pid: c_uint32 + ppid: c_uint32 + start_time: c_uint64 + exit_time: c_uint64 + exit_code: c_int32 + comm: str(16) + +@bpf +@map +def process_info() -> HashMap: + return HashMap( + key=c_uint32, + value=ProcessLifecycle, + max_entries=4096 + ) + +@bpf +@section("tracepoint/sched/sched_process_fork") +def track_fork(ctx: c_void_p) -> c_int64: + process_id = pid() + + info = ProcessLifecycle() + info.pid = process_id + info.start_time = ktime() + info.comm = comm() + + process_info.update(process_id, info) + + return c_int64(0) + +@bpf +@section("tracepoint/sched/sched_process_exit") +def track_exit(ctx: c_void_p) -> c_int64: + process_id = pid() + + info = process_info.lookup(process_id) + if info: + info.exit_time = ktime() + process_info.update(process_id, info) + + return c_int64(0) +``` + +### Aggregated Statistics + +```python +@bpf +@struct +class FileStats: + read_count: c_uint64 + write_count: c_uint64 + total_bytes_read: c_uint64 + total_bytes_written: c_uint64 + last_access: c_uint64 + +@bpf +@map +def file_stats() -> HashMap: + return HashMap( + key=str(256), # Filename as key + value=FileStats, + max_entries=1024 + ) +``` + +## Memory Layout + +Structs in BPF follow C struct layout rules: + +* Fields are laid out in order +* Padding may be added for alignment +* Size is rounded up to alignment + +Example: + +```python +@bpf +@struct +class Aligned: + a: c_uint8 # 1 byte + # 3 bytes padding + b: c_uint32 # 4 bytes + c: c_uint64 # 8 bytes + # Total: 16 bytes +``` + +```{tip} +For optimal memory usage, order fields from largest to smallest to minimize padding. +``` + +## Best Practices + +1. **Use descriptive field names** - Makes code self-documenting +2. **Order fields by size** - Reduces padding and memory usage +3. **Use appropriate sizes** - Don't use `c_uint64` when `c_uint32` suffices +4. **Document complex structs** - Add comments explaining field purposes +5. **Keep structs focused** - Each struct should represent one logical entity +6. **Use fixed-size strings** - Always specify string lengths explicitly + +## Common Patterns + +### Timestamp + Data Pattern + +```python +@bpf +@struct +class TimestampedEvent: + timestamp: c_uint64 # Always first for sorting + # ... other fields +``` + +### Identification Pattern + +```python +@bpf +@struct +class Identifiable: + pid: c_uint32 + tid: c_uint32 + cpu: c_uint32 + # ... additional fields +``` + +### Stats Aggregation Pattern + +```python +@bpf +@struct +class Statistics: + count: c_uint64 + sum: c_uint64 + min: c_uint64 + max: c_uint64 + avg: c_uint64 # Computed in userspace +``` + +## Troubleshooting + +### Struct Size Issues + +If you encounter size-related errors: +* Check for excessive padding +* Verify field types are correct +* Consider reordering fields + +### Initialization Problems + +If fields aren't initialized correctly: +* Always initialize all fields explicitly +* Set default values where appropriate +* Use helper functions for dynamic values + +### Type Mismatch Errors + +If you get type errors: +* Ensure field types match assignments +* Check that imported types are from `ctypes` +* Verify nested struct definitions + +## Reading Struct Data in Userspace + +After capturing struct data, read it in Python: + +```python +import ctypes +from pylibbpf import BpfMap + +# Define matching Python class +class Event(ctypes.Structure): + _fields_ = [ + ("timestamp", ctypes.c_uint64), + ("pid", ctypes.c_uint32), + ("comm", ctypes.c_char * 16), + ] + +# Read from map +map_obj = BpfMap(b, stats) +for key, value_bytes in map_obj.items(): + value = Event.from_buffer_copy(value_bytes) + print(f"PID: {value.pid}, Comm: {value.comm.decode()}") +``` + +## Next Steps + +* Learn about {doc}`maps` for storing struct data +* Explore {doc}`helpers` for populating struct fields +* See {doc}`compilation` to understand how structs are compiled diff --git a/pyproject.toml b/pyproject.toml index 851906b..c548ca7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,14 @@ dependencies = [ "pylibbpf" ] +[project.optional-dependencies] +docs = [ + "sphinx>=7.0", + "myst-parser>=2.0", + "sphinx-rtd-theme>=2.0", + "sphinx-copybutton", +] + [tool.setuptools.packages.find] where = ["."] include = ["pythonbpf*"]