Compare commits

..

35 Commits

Author SHA1 Message Date
ec4a6852ec PythonBPF: Add Compilation Context to allow parallel compilation of multiple bpf programs 2026-02-21 18:59:33 +05:30
45d85c416f BCC-Examples: Fix returns in disksnoop.py 2026-01-29 22:33:07 +05:30
cb18ab67d9 docs: Add docs badge to README 2026-01-29 12:06:22 +05:30
d1872bc868 docs: Add readthedocs workflow file 2026-01-29 11:58:31 +05:30
2c2ed473d8 Merge pull request #80 from pythonbpf/copilot/create-documentation-project
Add comprehensive Sphinx documentation with MyST-Parser
2026-01-29 11:38:44 +05:30
b6ecec9889 docs: Format requirements 2026-01-29 11:37:50 +05:30
4f56f8c426 docs: Exclude README.md from toctree 2026-01-29 11:35:24 +05:30
3bff930e98 docs: Include cross-references to BCC-Examples 2026-01-29 11:31:25 +05:30
036830c200 docs: Fix API reference 2026-01-29 03:13:49 +05:30
aded125cba docs: Fix helpers and maps guide 2026-01-29 02:54:46 +05:30
581269e52b docs: Fix user-guide/compilation 2026-01-28 16:34:48 +05:30
8bfd998863 docs: Fix user-guide/bpf-structs 2026-01-28 04:12:35 +05:30
a31ef3997a docs: Fix user-guide/maps.md 2026-01-27 12:59:22 +05:30
217e760e98 docs: Fix decorators page in user-guide 2026-01-27 02:16:03 +05:30
06c81ae55e docs: fix TC section in decorators 2026-01-27 01:58:06 +05:30
9131d044dc docs: fix user-guide index 2026-01-26 08:45:56 +05:30
2840a5c101 docs: fix common issues section of quickstart 2026-01-25 13:31:21 +05:30
9ff33229a0 docs: fix type hints misconception in quickstart 2026-01-25 13:25:45 +05:30
2e95b77ceb docs: Fix quickstart and add alternative compile option 2026-01-25 13:23:09 +05:30
c6aa1077de docs: remove unnecessary quick navigation from index 2026-01-25 04:04:27 +05:30
220adaf011 docs: remove unnecessary c_int64 calls from quickstart guide 2026-01-23 04:01:31 +05:30
92162e5cb4 docs: Fix copyright year and authors in conf.py 2026-01-23 03:21:29 +05:30
e0251a05bf docs: remove redundant c_int64 calls from quickstart 2026-01-23 03:17:19 +05:30
f03c08703a docs: Add more context for vmlinux in getting-started/installation 2026-01-23 02:47:44 +05:30
4edfb18609 docs: Remove unnecessary note from getting-started/installation 2026-01-23 02:42:28 +05:30
c58483ab81 Fix documentation: correct comm() usage, XDP types, copyright year, and add uv support
Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com>
2026-01-21 23:10:17 +00:00
2d8c6c144c docs: Fix links and fluff in getting-started/index.md 2026-01-22 04:31:11 +05:30
c1f32a2839 docs: remove unnecessary fluff from README 2026-01-22 03:30:56 +05:30
b6d8b71308 Remove unnecessary make.bat 2026-01-22 03:28:27 +05:30
ab881772af docs: Fix features and video link in index.md 2026-01-22 03:27:10 +05:30
b03924836e docs: remove c_int64 helper usage in return statement 2026-01-22 03:17:41 +05:30
4ab1e26b92 docs: Remove 'not for production' advice 2026-01-22 03:15:13 +05:30
73f6e83445 Add .gitignore for docs build artifacts and docs README
Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com>
2026-01-20 22:07:36 +00:00
c1e90b9d46 Add comprehensive Sphinx documentation structure and content
Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com>
2026-01-20 22:06:00 +00:00
917d386d33 Initial plan 2026-01-20 21:55:17 +00:00
35 changed files with 4064 additions and 499 deletions

4
.gitignore vendored
View File

@ -10,3 +10,7 @@ __pycache__/
vmlinux.py
~*
vmlinux.h
# Documentation build artifacts
docs/_build/
docs/_templates/

21
.readthedocs.yaml Normal file
View File

@ -0,0 +1,21 @@
# Read the Docs configuration file for Sphinx
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2
# Set the OS, Python version, and other tools you might need
build:
os: ubuntu-24.04
tools:
python: "3.12"
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
# Optionally, but recommended, declare the Python requirements
python:
install:
- requirements: docs/requirements.txt
- method: pip
path: .

View File

@ -27,7 +27,7 @@ def trace_completion(ctx: struct_pt_regs) -> c_int64:
print(f"{data_len} {cmd_flags:x} {delta_us}\n")
start.delete(req_ptr)
return c_int64(0)
return 0 # type: ignore [return-value]
@bpf
@ -36,7 +36,7 @@ def trace_start(ctx1: struct_pt_regs) -> c_int32:
req = ctx1.di
ts = ktime()
start.update(req, ts)
return c_int32(0)
return 0 # type: ignore [return-value]
@bpf

View File

@ -19,6 +19,8 @@
<a href="https://pepy.tech/project/pythonbpf"><img src="https://pepy.tech/badge/pythonbpf" alt="Downloads"></a>
<!-- Build & CI -->
<a href="https://github.com/pythonbpf/python-bpf/actions"><img src="https://github.com/pythonbpf/python-bpf/actions/workflows/python-publish.yml/badge.svg" alt="Build Status"></a>
<!-- Documentation -->
<a href="https://python-bpf.readthedocs.io/en/latest/?badge=latest"><img src="https://readthedocs.org/projects/python-bpf/badge/?version=latest" alt="Documentation Status"></a>
<!-- Meta -->
<a href="https://github.com/pythonbpf/python-bpf/blob/main/LICENSE"><img src="https://img.shields.io/github/license/pythonbpf/python-bpf" alt="License"></a>
</p>

20
docs/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

52
docs/README.md Normal file
View File

@ -0,0 +1,52 @@
# PythonBPF Documentation
This directory contains the Sphinx documentation for PythonBPF.
## Building the Documentation
### Prerequisites
Install the documentation dependencies:
**Using uv (recommended):**
```bash
uv pip install -r requirements.txt
# Or install the optional docs dependencies
uv pip install pythonbpf[docs]
```
**Using pip:**
```bash
pip install -r requirements.txt
# Or install the optional docs dependencies
pip install pythonbpf[docs]
```
### Build HTML Documentation
```bash
make html
```
The generated documentation will be in `_build/html/`. Open `_build/html/index.html` in a browser to view.
### Other Build Formats
```bash
make latexpdf # Build PDF documentation
make epub # Build ePub format
make clean # Clean build artifacts
```
## Documentation Structure
- `index.md` - Main landing page
- `getting-started/` - Installation and quick start guides
- `user-guide/` - Comprehensive user documentation
- `api/` - API reference documentation
- `conf.py` - Sphinx configuration
- `_static/` - Static files (images, CSS, etc.)
## Writing Documentation
Documentation is written in Markdown using [MyST-Parser](https://myst-parser.readthedocs.io/). See the existing files for examples.

0
docs/_static/.gitkeep vendored Normal file
View File

471
docs/api/index.md Normal file
View File

@ -0,0 +1,471 @@
# API Reference
This section provides detailed API documentation for all PythonBPF modules, classes, and functions.
## Module Overview
PythonBPF is organized into several modules:
* `pythonbpf` - Main module with decorators and compilation functions
* `pythonbpf.maps` - BPF map types
* `pythonbpf.helper` - BPF helper functions
* `pythonbpf.structs` - Struct type handling
* `pythonbpf.codegen` - Code generation and compilation
## Public API
The main `pythonbpf` module exports the following public API:
```python
from pythonbpf import (
# Decorators
bpf,
map,
section,
bpfglobal,
struct,
# Compilation
compile_to_ir,
compile,
BPF,
# Utilities
trace_pipe,
trace_fields,
)
```
## Decorators
```{eval-rst}
.. automodule:: pythonbpf.decorators
:members:
:undoc-members:
:show-inheritance:
```
### bpf
```python
@bpf
def my_function():
pass
```
Decorator to mark a function or class for BPF compilation. Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler.
**See also:** {doc}`../user-guide/decorators`
### map
```python
@bpf
@map
def my_map() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
```
Decorator to mark a function as a BPF map definition. The function must return a map type.
**See also:** {doc}`../user-guide/maps`
### section
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
return c_int64(0)
```
Decorator to specify which kernel hook to attach the BPF program to.
**Parameters:**
* `name` (str) - The section name (e.g., "tracepoint/...", "kprobe/...", "xdp")
**See also:** {doc}`../user-guide/decorators`
### bpfglobal
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
Decorator to mark a function as a BPF global variable definition.
**See also:** {doc}`../user-guide/decorators`
### struct
```python
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
```
Decorator to mark a class as a BPF struct definition.
**See also:** {doc}`../user-guide/structs`
## Compilation Functions
```{eval-rst}
.. automodule:: pythonbpf.codegen
:members: compile_to_ir, compile, BPF
:undoc-members:
:show-inheritance:
```
### compile_to_ir()
```python
def compile_to_ir(
filename: str,
output: str,
loglevel=logging.WARNING
) -> None
```
Compile Python source to LLVM Intermediate Representation.
**Parameters:**
* `filename` (str) - Path to the Python source file
* `output` (str) - Path for the output LLVM IR file (.ll)
* `loglevel` - Logging level (default: logging.WARNING)
**See also:** {doc}`../user-guide/compilation`
### compile()
```python
def compile(
filename: str = None,
output: str = None,
loglevel=logging.WARNING
) -> None
```
Compile Python source to BPF object file.
**Parameters:**
* `filename` (str, optional) - Path to the Python source file (default: calling file)
* `output` (str, optional) - Path for the output object file (default: same name with .o extension)
* `loglevel` - Logging level (default: logging.WARNING)
**See also:** {doc}`../user-guide/compilation`
### BPF
```python
class BPF:
def __init__(
self,
filename: str = None,
loglevel=logging.WARNING
)
def load(self) -> BpfObject
def attach_all(self) -> None
def load_and_attach(self) -> BpfObject
```
High-level interface to compile, load, and attach BPF programs.
**Parameters:**
* `filename` (str, optional) - Path to Python source file (default: calling file)
* `loglevel` - Logging level (default: logging.WARNING)
**Methods:**
* `load()` - Load the compiled BPF program into the kernel
* `attach_all()` - Attach all BPF programs to their hooks
* `load_and_attach()` - Convenience method that loads and attaches
**See also:** {doc}`../user-guide/compilation`
## Utilities
```{eval-rst}
.. automodule:: pythonbpf.utils
:members:
:undoc-members:
:show-inheritance:
```
### trace_pipe()
```python
def trace_pipe() -> None
```
Read and display output from the kernel trace pipe.
Blocks until interrupted with Ctrl+C. Displays BPF program output from `print()` statements.
**See also:** {doc}`../user-guide/helpers`
### trace_fields()
```python
def trace_fields() -> tuple
```
Parse one line from the trace pipe into structured fields.
**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)`
* `task` (str) - Task/process name
* `pid` (int) - Process ID
* `cpu` (int) - CPU number
* `flags` (bytes) - Trace flags
* `timestamp` (float) - Timestamp in seconds
* `message` (str) - The trace message
**See also:** {doc}`../user-guide/helpers`
## Map Types
```{eval-rst}
.. automodule:: pythonbpf.maps.maps
:members:
:undoc-members:
:show-inheritance:
```
### HashMap
```python
class HashMap:
def __init__(
self,
key,
value,
max_entries: int
)
def lookup(self, key)
def update(self, key, value, flags=None)
def delete(self, key)
```
Hash map for efficient key-value storage.
**Parameters:**
* `key` - The type of the key (ctypes type or struct)
* `value` - The type of the value (ctypes type or struct)
* `max_entries` (int) - Maximum number of entries
**Methods:**
* `lookup(key)` - Look up a value by key
* `update(key, value, flags=None)` - Update or insert a key-value pair
* `delete(key)` - Remove an entry from the map
**See also:** {doc}`../user-guide/maps`
### PerfEventArray
```python
class PerfEventArray:
def __init__(
self,
key_size,
value_size
)
def output(self, data)
```
Perf event array for sending data to userspace.
**Parameters:**
* `key_size` - Type for the key
* `value_size` - Type for the value
**Methods:**
* `output(data)` - Send data to userspace
**See also:** {doc}`../user-guide/maps`
### RingBuffer
```python
class RingBuffer:
def __init__(self, max_entries: int)
def output(self, data, flags=0)
def reserve(self, size: int)
def submit(self, data, flags=0)
def discard(self, data, flags=0)
```
Ring buffer for efficient event delivery.
**Parameters:**
* `max_entries` (int) - Maximum size in bytes (must be power of 2)
**Methods:**
* `output(data, flags=0)` - Send data to the ring buffer
* `reserve(size)` - Reserve space in the buffer
* `submit(data, flags=0)` - Submit previously reserved space
* `discard(data, flags=0)` - Discard previously reserved space
**See also:** {doc}`../user-guide/maps`
## Helper Functions
```{eval-rst}
.. automodule:: pythonbpf.helper.helpers
:members:
:undoc-members:
:show-inheritance:
```
### Process Information
* `pid()` - Get current process ID
* `comm(buf)` - Get current process command name (requires buffer parameter)
* `uid()` - Get current user ID
### Time
* `ktime()` - Get current kernel time in nanoseconds
### CPU
* `smp_processor_id()` - Get current CPU ID
### Memory
* `probe_read(dst, size, src)` - Safely read kernel memory
* `probe_read_str(dst, src)` - Safely read string from kernel memory
* `deref(ptr)` - Dereference a pointer
### Random
* `random()` - Get pseudo-random number
**See also:** {doc}`../user-guide/helpers`
## Type System
PythonBPF uses Python's `ctypes` module for type definitions:
### Integer Types
* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers
* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers
### Other Types
* `c_char`, `c_bool` - Characters and booleans
* `c_void_p` - Void pointers
* `str(N)` - Fixed-length strings
## Examples
### Basic Usage
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
### With Maps
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def counters() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def count_clones(ctx: c_void_p) -> c_int64:
process_id = pid()
count = counters.lookup(process_id)
if count:
counters.update(process_id, count + 1)
else:
counters.update(process_id, 1)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
```
### With Structs
```python
from pythonbpf import bpf, struct, map, section, bpfglobal, BPF
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_exec(ctx: c_void_p) -> c_int64:
event = Event()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm)
events.output(event)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
```
## See Also
* {doc}`../user-guide/index` - Comprehensive user guide
* {doc}`../getting-started/quickstart` - Quick start tutorial
* [GitHub Repository](https://github.com/pythonbpf/Python-BPF) - Source code and examples

105
docs/conf.py Normal file
View File

@ -0,0 +1,105 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
import os
import sys
# Add the parent directory to the path so we can import pythonbpf
sys.path.insert(0, os.path.abspath(".."))
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = "PythonBPF"
copyright = "2026, Pragyansh Chaturvedi, Varun Mallya"
author = "Pragyansh Chaturvedi, Varun Mallya"
release = "0.1.8"
version = "0.1.8"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
"myst_parser",
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
]
# MyST-Parser configuration
myst_enable_extensions = [
"colon_fence",
"deflist",
"fieldlist",
]
# Napoleon settings for Google/NumPy style docstrings
napoleon_google_docstring = True
napoleon_numpy_docstring = True
napoleon_include_init_with_doc = True
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = True
napoleon_use_admonition_for_examples = True
napoleon_use_admonition_for_notes = True
napoleon_use_admonition_for_references = False
napoleon_use_ivar = False
napoleon_use_param = True
napoleon_use_rtype = True
napoleon_type_aliases = None
# Intersphinx mapping
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"llvmlite": ("https://llvmlite.readthedocs.io/en/latest/", None),
}
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# Source file suffixes
source_suffix = {
".rst": "restructuredtext",
".md": "markdown",
}
# The master toctree document
master_doc = "index"
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
# Theme options
html_theme_options = {
"logo_only": False,
"display_version": True,
"prev_next_buttons_location": "bottom",
"style_external_links": False,
"vcs_pageview_mode": "",
# Toc options
"collapse_navigation": False,
"sticky_navigation": True,
"navigation_depth": 4,
"includehidden": True,
"titles_only": False,
}
# -- Options for autodoc -----------------------------------------------------
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"special-members": "__init__",
"undoc-members": True,
"exclude-members": "__weakref__",
}
autodoc_typehints = "description"
exclude_patterns = ["README.md"]

View File

@ -0,0 +1,35 @@
# Getting Started
Welcome to PythonBPF! This section will help you get started with writing eBPF programs in Python.
## What You'll Learn
In this section, you'll learn how to:
1. **Install PythonBPF** - Set up your development environment with all necessary dependencies
2. **Write Your First Program** - Create a simple BPF program to understand the basics
3. **Understand Core Concepts** - Learn about decorators, compilation, and program structure
## Prerequisites
Before you begin, make sure you have:
* A Linux system (eBPF requires Linux kernel 4.15+)
* Python 3.10 or higher
* Root or sudo access (required for loading BPF programs)
## Next Steps
After completing the getting started guide, you can:
* Explore the {doc}`../user-guide/index` for detailed information on features
* Check out the {doc}`../api/index`
* Browse the [examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) and the [BCC examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/BCC-Examples)
## Need Help?
If you encounter any issues:
* Check the [GitHub Issues](https://github.com/pythonbpf/Python-BPF/issues) for known problems
* Review the [README](https://github.com/pythonbpf/Python-BPF/blob/master/README.md) for additional information
* Reach out to the maintainers: [@r41k0u](https://github.com/r41k0u) and [@varun-r-mallya](https://github.com/varun-r-mallya)

View File

@ -0,0 +1,182 @@
# Installation
This guide will walk you through installing PythonBPF and its dependencies.
## Prerequisites
### System Requirements
PythonBPF requires:
* **Linux** - eBPF is a Linux kernel feature (kernel 4.15 or higher recommended)
* **Python 3.10+** - Python 3.10 or higher is required
* **Root/sudo access** - Loading BPF programs into the kernel requires elevated privileges
### Required System Packages
Before installing PythonBPF, you need to install the following system packages:
#### On Ubuntu/Debian:
```bash
sudo apt-get update
sudo apt-get install -y bpftool clang llvm
```
#### On Fedora/RHEL/CentOS:
```bash
sudo dnf install -y bpftool clang llvm
```
#### On Arch Linux:
```bash
sudo pacman -S bpf clang llvm
```
## Installing PythonBPF
### From PyPI (Recommended)
The easiest way to install PythonBPF is using uv or pip:
**Using uv (recommended):**
```bash
uv pip install pythonbpf pylibbpf
```
**Using pip:**
```bash
pip install pythonbpf pylibbpf
```
This will install:
* `pythonbpf` - The main package for writing and compiling BPF programs
* `pylibbpf` - Python bindings for libbpf, used to load and attach BPF programs
### Development Installation
If you want to contribute to PythonBPF or work with the latest development version:
1. Clone the repository:
```bash
git clone https://github.com/pythonbpf/Python-BPF.git
cd Python-BPF
```
2. Create and activate a virtual environment:
```bash
python3 -m venv .venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
```
3. Install in development mode:
**Using uv (recommended):**
```bash
uv pip install -e .
uv pip install pylibbpf
```
**Using pip:**
```bash
pip install -e .
pip install pylibbpf
```
4. Install development dependencies:
```bash
make install
```
### Installing Documentation Dependencies
If you want to build the documentation locally:
**Using uv (recommended):**
```bash
uv pip install pythonbpf[docs]
# Or from the repository root:
uv pip install -e .[docs]
```
**Using pip:**
```bash
pip install pythonbpf[docs]
# Or from the repository root:
pip install -e .[docs]
```
## Generating vmlinux.py
`vmlinux.py` contains the running kernel's data structures and is analogous to `vmlinux.h` included in eBPF programs written in C. Some examples require access to it. To use these features, you need to generate a `vmlinux.py` file:
1. Install additional dependencies:
**Using uv (recommended):**
```bash
uv pip install ctypeslib2
```
**Using pip:**
```bash
pip install ctypeslib2
```
2. Generate the vmlinux.py file:
```bash
sudo tools/vmlinux-gen.py
```
3. Copy the generated file to your working directory or the examples directory as needed.
```{warning}
The `vmlinux.py` file is kernel-specific. If you upgrade your kernel, you may need to regenerate this file.
```
## Verifying Installation
To verify that PythonBPF is installed correctly, run:
```bash
python3 -c "import pythonbpf; print(pythonbpf.__all__)"
```
You should see output similar to:
```
['bpf', 'map', 'section', 'bpfglobal', 'struct', 'compile_to_ir', 'compile', 'BPF', 'trace_pipe', 'trace_fields']
```
## Troubleshooting
### Permission Errors
If you encounter permission errors when running BPF programs:
* Make sure you're running with `sudo` or as root
* Check that `/sys/kernel/tracing/` is accessible
### LLVM/Clang Not Found
If you get errors about `llc` or `clang` not being found:
* Verify they're installed: `which llc` and `which clang`
* Check your PATH environment variable includes the LLVM bin directory
### Import Errors
If Python can't find the `pythonbpf` module:
* Make sure you've activated your virtual environment
* Verify installation with `uv pip list | grep pythonbpf` or `pip list | grep pythonbpf`
* Try reinstalling: `uv pip install --force-reinstall pythonbpf` or `pip install --force-reinstall pythonbpf`
## Next Steps
Now that you have PythonBPF installed, continue to the {doc}`quickstart` guide to write your first BPF program!

View File

@ -0,0 +1,249 @@
# Quick Start
This guide will walk you through creating your first BPF program with PythonBPF.
## Your First BPF Program
Let's create a simple "Hello World" program that prints a message every time a process is executed on your system.
### Step 1: Create the Program
Create a new file called `hello_world.py`:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
### Step 2: Run the Program
Run the program with sudo (required for BPF operations):
```bash
sudo python3 hello_world.py
```
### Step 3: See it in Action
Open another terminal and run any command:
```bash
ls
echo "test"
date
```
You should see "Hello, World!" printed in the first terminal for each command executed!
Press `Ctrl+C` to stop the program.
## Understanding the Code
Let's break down what each part does:
### Imports
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
```
* `bpf` - Decorator to mark functions for BPF compilation
* `section` - Decorator to specify which kernel event to attach to
* `bpfglobal` - Decorator for BPF global variables
* `BPF` - Class to compile, load, and attach BPF programs
* `trace_pipe` - Utility to read kernel trace output (similar to BCC)
* `c_void_p`, `c_int64` - C types for function signatures
### The BPF Function
```python
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
```
* `@bpf` - Marks this function to be compiled to BPF bytecode
* `@section("tracepoint/syscalls/sys_enter_execve")` - Attaches to the execve syscall tracepoint (called when processes start)
* `ctx: c_void_p` - Context parameter (required for all BPF functions)
* `print()` - the PythonBPF API for `bpf_printk` helper function
* `return 0` - BPF functions must return an integer
### License Declaration
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
* The Linux kernel requires BPF programs to declare a license
* Most kernel features require GPL-compatible licenses
* This is defined as a BPF global variable
### Compilation and Execution
```python
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
* `BPF()` - Creates a BPF object and compiles the current file
* `b.load()` - Loads the compiled BPF program into the kernel
* `b.attach_all()` - Attaches all BPF programs to their specified hooks
* `trace_pipe()` - Reads and displays output from the kernel trace buffer
Alternatively, you can also use the `compile()` function to compile the BPF code to an object file:
```python
from pythonbpf import compile
```
This object file can then be loaded using any other userspace library in any language.
## Next Example: Tracking Process IDs
Let's make a more interesting program that tracks which processes are being created:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_exec(ctx: c_void_p) -> c_int64:
process_id = pid()
print(f"Process with PID: {process_id} is starting")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
This program uses BPF helper functions:
* `pid()` - Gets the current process ID
Run it with `sudo python3 track_exec.py` and watch processes being created!
## Common Patterns
### Tracepoints
Tracepoints are predefined hooks in the kernel. Common ones include:
```python
# System calls
@section("tracepoint/syscalls/sys_enter_execve")
@section("tracepoint/syscalls/sys_enter_clone")
@section("tracepoint/syscalls/sys_enter_open")
# Scheduler events
@section("tracepoint/sched/sched_process_fork")
@section("tracepoint/sched/sched_switch")
```
### Kprobes
Kprobes allow you to attach to any kernel function:
```python
@section("kprobe/do_sys_open")
def trace_open(ctx: c_void_p) -> c_int64:
print("File is being opened")
return 0
```
### XDP (eXpress Data Path)
For network packet processing:
```python
from pythonbpf.helper import XDP_PASS
@section("xdp")
def xdp_pass(ctx: c_void_p) -> c_int64:
return XDP_PASS
```
## Best Practices
1. **Always include a LICENSE** - Required by the kernel
2. **Use type hints** - Required by PythonBPF to generate correct code
3. **Return the correct type** - Match the expected return type for your program type
4. **Test incrementally** - Start simple and add complexity gradually
5. **Check kernel logs** - Use `dmesg` to see BPF verifier messages if loading fails
## Common Issues
### Program Won't Load
If your BPF program fails to load:
* Check `dmesg` for verifier error messages
* Ensure your LICENSE is GPL-compatible
* Verify you're using supported BPF features
* Make sure return types match function signatures
### No Output
If you don't see output:
* Verify the tracepoint/kprobe is being triggered
* Check that you're running with sudo
* Ensure `/sys/kernel/tracing/trace_pipe` is accessible
### Compilation Errors
If compilation fails:
* Check that `llc` is installed and in your PATH
* Verify your Python syntax is correct
* Ensure all imported types are from `ctypes`
* In the worst case, compile object files manually using `compile_to_ir()` and `llc` to get detailed errors
### Verification Failure
If verification fails:
* Compile the object files using `compile()` function instead of loading directly
* Run `sudo check.sh check <bpf>.o` to get detailed verification output
## Next Steps
Now that you understand the basics, explore:
* {doc}`../user-guide/decorators` - Learn about all available decorators
* {doc}`../user-guide/maps` - Use BPF maps for data storage and communication
* {doc}`../user-guide/structs` - Define custom data structures
* {doc}`../user-guide/helpers` - Discover all available BPF helper functions
* [Examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) - See more complex examples

95
docs/index.md Normal file
View File

@ -0,0 +1,95 @@
# PythonBPF Documentation
Welcome to **PythonBPF** - a Python frontend for writing eBPF programs without embedding C code. PythonBPF uses [llvmlite](https://github.com/numba/llvmlite) to generate LLVM IR and compiles directly to eBPF object files that can be loaded into the Linux kernel.
```{note}
This project is under active development.
```
## What is PythonBPF?
PythonBPF is an LLVM IR generator for eBPF programs written in Python. It provides:
* **Pure Python syntax** - Write eBPF programs in Python using familiar decorators and type annotations
* **Direct compilation** - Compile to LLVM object files without relying on BCC
* **Full eBPF features** - Support for maps, helpers, global definitions, and more
* **Integration with libbpf** - Works with [pylibbpf](https://github.com/pythonbpf/pylibbpf) for object loading and execution
## Quick Example
Here's a simple "Hello World" BPF program that traces process creation:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
## Features
* Generate eBPF programs directly using Python syntax
* Compile to LLVM object files for kernel execution
* Built with `llvmlite` for IR generation
* Supports maps, helpers, and global definitions for BPF
* Companion project: [pylibbpf](https://github.com/pythonbpf/pylibbpf), which provides bindings for libbpf
## Table of Contents
```{toctree}
:maxdepth: 2
:caption: Getting Started
getting-started/index
getting-started/installation
getting-started/quickstart
```
```{toctree}
:maxdepth: 2
:caption: User Guide
user-guide/index
user-guide/decorators
user-guide/maps
user-guide/structs
user-guide/compilation
user-guide/helpers
```
```{toctree}
:maxdepth: 2
:caption: API Reference
api/index
```
## Links
* **GitHub Repository**: [pythonbpf/Python-BPF](https://github.com/pythonbpf/Python-BPF)
* **PyPI Package**: [pythonbpf](https://pypi.org/project/pythonbpf/)
* **Video Demo**: [YouTube](https://www.youtube.com/watch?v=eFVhLnWFxtE)
## License
PythonBPF is licensed under the Apache License 2.0.
## Indices and tables
* {ref}`genindex`
* {ref}`modindex`
* {ref}`search`

4
docs/requirements.txt Normal file
View File

@ -0,0 +1,4 @@
myst-parser>=2.0
sphinx>=7.0
sphinx-copybutton
sphinx-rtd-theme>=2.0

View File

@ -0,0 +1,432 @@
# Compilation
PythonBPF provides several functions and classes for compiling Python code into BPF bytecode and loading it into the kernel.
## Overview
The compilation process transforms Python code into executable BPF programs:
1. **Python AST** → LLVM IR generation (using llvmlite)
2. **LLVM IR** → BPF bytecode (using llc)
3. **BPF Object** → Kernel loading (using libbpf)
## Compilation Functions
### compile_to_ir()
Compile Python source to LLVM Intermediate Representation.
#### Signature
```python
def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING)
```
#### Parameters
* `filename` - Path to the Python source file to compile
* `output` - Path where the LLVM IR file (.ll) should be written
* `loglevel` - Logging level (default: `logging.WARNING`)
#### Usage
```python
from pythonbpf import compile_to_ir
import logging
# Compile to LLVM IR
compile_to_ir(
filename="my_bpf_program.py",
output="my_bpf_program.ll",
loglevel=logging.DEBUG
)
```
#### Output
This function generates an `.ll` file containing LLVM IR, which is human-readable assembly-like code. This is useful for:
* Debugging compilation issues
* Understanding code generation
### compile()
Compile Python source to BPF object file.
#### Signature
```python
def compile(filename: str = None, output: str = None, loglevel=logging.WARNING)
```
#### Parameters
* `filename` - Path to the Python source file (default: calling file)
* `output` - Path for the output object file (default: same name with `.o` extension)
* `loglevel` - Logging level (default: `logging.WARNING`)
#### Usage
```python
from pythonbpf import compile
import logging
# Compile current file
compile()
# Compile specific file
compile(filename="my_program.py", output="my_program.o")
# Compile with debug logging
compile(loglevel=logging.DEBUG)
```
#### Output
This function generates a `.o` file containing BPF bytecode that can be:
* Loaded into the kernel
* Inspected with `bpftool`
* Verified with the BPF verifier
* Distributed as a compiled binary
### BPF Class
The `BPF` class provides a high-level interface to compile, load, and attach BPF programs.
#### Signature
```python
class BPF:
def __init__(self, filename: str = None, loglevel=logging.WARNING)
def load(self)
def attach_all(self)
def load_and_attach(self)
```
#### Parameters
* `filename` - Path to Python source file (default: calling file)
* `loglevel` - Logging level (default: `logging.WARNING`)
#### Methods
##### __init__()
Create a BPF object and compile the source.
```python
from pythonbpf import BPF
# Compile current file
b = BPF()
# Compile specific file
b = BPF(filename="my_program.py")
```
##### load()
Load the compiled BPF program into the kernel.
```python
b = BPF()
b.load()
```
This method:
* Loads the BPF object file into the kernel
* Creates maps
* Verifies the BPF program
* Returns a `BpfObject` instance
##### attach_all()
Attach all BPF programs to their specified hooks.
```python
b = BPF()
b.load()
b.attach_all()
```
This method:
* Attaches tracepoints
* Attaches kprobes/kretprobes
* Attaches XDP programs
* Enables all hooks
##### load_and_attach()
Convenience method that loads and attaches in one call.
```python
b = BPF()
b.load_and_attach()
```
Equivalent to:
```python
b = BPF()
b.load()
b.attach_all()
```
## Complete Example
Here's a complete example showing the compilation workflow:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print("Process started")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
if __name__ == "__main__":
# Method 1: Simple compilation and loading
b = BPF()
b.load_and_attach()
trace_pipe()
# Method 2: Step-by-step
# b = BPF()
# b.load()
# b.attach_all()
# trace_pipe()
# Method 3: Manual compilation
# from pythonbpf import compile
# compile(filename="my_program.py", output="my_program.o")
# # Then load with pylibbpf directly
```
## Compilation Pipeline Details
### AST Parsing
The Python `ast` module parses your source code:
```python
import ast
tree = ast.parse(source_code, filename)
```
The AST is then walked to find:
* Functions decorated with `@bpf`
* Classes decorated with `@struct`
* Map definitions with `@map`
* Global variables with `@bpfglobal`
### IR Generation
PythonBPF uses `llvmlite` to generate LLVM IR:
```python
from llvmlite import ir
# Create module
module = ir.Module(name='bpf_module')
module.triple = 'bpf'
# Generate IR for each BPF function
# ...
```
Key aspects of IR generation:
* Type conversion (Python types → LLVM types)
* Function definitions
* Map declarations
* Global variable initialization
* Debug information
### BPF Compilation
The LLVM IR is compiled to BPF bytecode using `llc`:
```bash
llc -march=bpf -filetype=obj input.ll -o output.o
```
### Kernel Loading
The compiled object is loaded using `pylibbpf`:
```python
from pylibbpf import BpfObject
obj = BpfObject(path="program.o")
obj.load()
```
## Debugging Compilation
### Logging
Enable debug logging to see compilation details:
```python
import logging
from pythonbpf import BPF
b = BPF(loglevel=logging.DEBUG)
```
This will show:
* AST parsing details
* IR generation steps
* Compilation commands
* Loading status
### Inspecting LLVM IR
Generate and inspect the IR file:
```python
from pythonbpf import compile_to_ir
compile_to_ir("program.py", "program.ll")
```
Then examine `program.ll` to understand the generated code.
### Using bpftool
Inspect compiled objects with `bpftool`:
```bash
# Show program info
bpftool prog show
# Dump program instructions
bpftool prog dump xlated id <ID>
# Dump program JIT code
bpftool prog dump jited id <ID>
# Show maps
bpftool map show
# Dump map contents
bpftool map dump id <ID>
```
### Verifier Errors
If the kernel verifier rejects your program:
* Check `dmesg` for detailed error messages:
```bash
sudo dmesg | tail -50
```
## Compilation Options
### Optimization Levels
While PythonBPF doesn't expose optimization flags directly, you can:
1. Manually compile IR with specific flags:
```bash
llc -march=bpf -O2 -filetype=obj program.ll -o program.o
```
2. Modify the compilation pipeline in your code
### Debug Information
PythonBPF automatically generates debug information (DWARF) for:
* Function names
* Variable names
* Type information
This helps with:
* Stack traces
* Debugging with `bpftool`
* Source-level debugging
## Working with Compiled Objects
### Loading Pre-compiled Objects
You can load previously compiled objects:
```python
from pylibbpf import BpfObject
# Load object file
obj = BpfObject(path="my_program.o")
obj.load()
# Attach programs
# (specific attachment depends on program type)
```
### Distribution
Distribute compiled BPF objects:
1. Compile once:
```python
from pythonbpf import compile
compile(filename="program.py", output="program.o")
```
2. Ship `program.o` file
3. Load on target systems:
```python
from pylibbpf import BpfObject
obj = BpfObject(path="program.o")
obj.load()
```
### Version Compatibility
BPF objects are generally compatible across kernel versions, but:
* Some features require specific kernel versions
* Helper functions may not be available on older kernels
* BTF (BPF Type Format) requirements vary
## Troubleshooting
### Compilation Fails
If compilation fails:
* Check Python syntax
* Verify all decorators are correct
* Ensure type hints are present
* Check for unsupported Python features
### Loading Fails
If loading fails:
* Check `dmesg` for verifier errors
* Verify LICENSE is set correctly
* Ensure helper functions are valid
* Check map definitions
### Programs Don't Attach
If attachment fails:
* Verify section names are correct
* Check that hooks exist on your kernel
* Ensure you have sufficient permissions
* Verify kernel version supports the feature
## Next Steps
* Learn about {doc}`helpers` for available BPF helper functions
* Explore {doc}`maps` for data storage
* See {doc}`decorators` for compilation markers

View File

@ -0,0 +1,448 @@
# Decorators
Decorators are the primary way to mark Python code for BPF compilation. PythonBPF provides five core decorators that control how your code is transformed into eBPF bytecode.
## @bpf
The `@bpf` decorator marks functions or classes for BPF compilation.
### Usage
```python
from pythonbpf import bpf
@bpf
def my_function(ctx):
# This function will be compiled to BPF bytecode
pass
```
### Description
Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler and transformed into LLVM IR, then compiled to BPF bytecode. This is the fundamental decorator that enables BPF compilation.
### Rules
* Must be used on top-level functions or classes
* The function must have proper type hints
* Return types must be BPF-compatible
* Only BPF-compatible operations are allowed inside
### Example
```python
from pythonbpf import bpf, section
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print("Process started")
return c_int64(0)
```
## @section
The `@section(name)` decorator specifies which kernel hook to attach the BPF program to.
### Usage
```python
from pythonbpf import bpf, section
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx):
pass
```
### Section Types
#### Tracepoints
Tracepoints are stable kernel hooks defined in `/sys/kernel/tracing/events/`:
```python
# System call tracepoints
@section("tracepoint/syscalls/sys_enter_execve")
@section("tracepoint/syscalls/sys_enter_clone")
@section("tracepoint/syscalls/sys_enter_open")
@section("tracepoint/syscalls/sys_exit_read")
# Scheduler tracepoints
@section("tracepoint/sched/sched_process_fork")
@section("tracepoint/sched/sched_process_exit")
@section("tracepoint/sched/sched_switch")
# Block I/O tracepoints
@section("tracepoint/block/block_rq_insert")
@section("tracepoint/block/block_rq_complete")
```
#### Kprobes
Kprobes allow attaching to any kernel function:
```python
@section("kprobe/do_sys_open")
def trace_sys_open(ctx):
pass
@section("kprobe/__x64_sys_write")
def trace_write(ctx):
pass
```
#### Kretprobes
Kretprobes trigger when a kernel function returns:
```python
@section("kretprobe/do_sys_open")
def trace_open_return(ctx):
pass
```
#### XDP (eXpress Data Path)
For network packet processing at the earliest point:
```python
from pythonbpf.helper import XDP_PASS
from ctypes import c_void_p, c_int64
@section("xdp")
def xdp_prog(ctx: c_void_p) -> c_int64:
# XDP_PASS, XDP_DROP, XDP_ABORTED constants available from pythonbpf.helper
return XDP_PASS
```
### Finding Tracepoints
To find available tracepoints on your system:
```bash
# List all tracepoints
ls /sys/kernel/tracing/events/
# List syscall tracepoints
ls /sys/kernel/tracing/events/syscalls/
# View tracepoint format
cat /sys/kernel/tracing/events/syscalls/sys_enter_open/format
```
## @map
The `@map` decorator marks a function as a BPF map definition.
### Usage
```python
from pythonbpf import bpf, map
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@map
def my_map() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
```
### Description
Maps are BPF data structures used to:
* Store state between BPF program invocations
* Communicate data between BPF programs
* Share data with userspace
The function must return a map type (HashMap, PerfEventArray, RingBuffer) and the return type must be annotated.
### Example
```python
from pythonbpf import bpf, map, section
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def process_count() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def count_clones(ctx: c_void_p) -> c_int64:
process_id = pid()
count = process_count.lookup(process_id)
if count:
process_count.update(process_id, count + 1)
else:
process_count.update(process_id, c_uint64(1))
return 0
```
See {doc}`maps` for more details on available map types.
## @struct
The `@struct` decorator marks a class as a BPF struct definition.
### Usage
```python
from pythonbpf import bpf, struct
from ctypes import c_uint64, c_uint32
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
cpu: c_uint32
```
### Description
Structs allow you to define custom data types for use in BPF programs. They can be used:
* As map keys and values
* For perf event output
* In ring buffer submissions
* As local variables
### Field Types
Supported field types include:
* **Integer types**: `c_int8`, `c_int16`, `c_int32`, `c_int64`, `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64`
* **Pointers**: `c_void_p`, `c_char_p`
* **Fixed strings**: `str(N)` where N is the size (e.g., `str(16)`)
* **Nested structs**: Other `@struct` decorated classes
### Example
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import pid, ktime
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
@bpf
@struct
class ProcessEvent:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_processes(ctx: c_void_p) -> c_int64:
event = ProcessEvent()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
events.output(event)
return 0
```
See {doc}`structs` for more details on working with structs.
## @bpfglobal
The `@bpfglobal` decorator marks a function as a BPF global variable definition.
### Usage
```python
from pythonbpf import bpf, bpfglobal
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
### Description
BPF global variables are values that:
* Are initialized when the program loads
* Can be read by all BPF functions
* Must be constant (cannot be modified at runtime in current implementation)
### Common Global Variables
#### LICENSE (Required)
Every BPF program must declare a license:
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
Valid licenses include:
* `"GPL"` - GNU General Public License
* `"GPL v2"` - GPL version 2
* `"Dual BSD/GPL"` - Dual licensed
* `"Dual MIT/GPL"` - Dual licensed
```{warning}
Many BPF features require a GPL-compatible license. Using a non-GPL license may prevent your program from loading or accessing certain kernel features.
```
#### Custom Global Variables
You can define other global variables:
```python
@bpf
@bpfglobal
def DEBUG_MODE() -> int:
return 1
@bpf
@bpfglobal
def MAX_EVENTS() -> int:
return 1000
```
These can be referenced in your BPF functions, though modifying them at runtime is currently not supported.
## Combining Decorators
Decorators are often used together. The order matters:
### Correct Order
```python
@bpf # Always first
@section("...") # Section before other decorators
def my_function():
pass
@bpf # Always first
@map # Map/struct/bpfglobal after @bpf
def my_map():
pass
@bpf # Always first
@struct # Map/struct/bpfglobal after @bpf
class MyStruct:
pass
@bpf # Always first
@bpfglobal # Map/struct/bpfglobal after @bpf
def LICENSE():
return "GPL"
```
### Examples by Use Case
#### Simple Tracepoint
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
return c_int64(0)
```
#### Map Definition
```python
@bpf
@map
def counters() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
```
#### Struct Definition
```python
@bpf
@struct
class Event:
timestamp: c_uint64
value: c_uint32
```
#### Global Variable
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
## Best Practices
1. **Always use @bpf first** - It must be the outermost decorator
2. **Provide type hints** - Required for proper code generation
3. **Test incrementally** - Verify each component works before combining
## Common Errors
### Missing @bpf Decorator
```python
# Wrong - missing @bpf
@section("tracepoint/syscalls/sys_enter_open")
def my_func(ctx):
pass
# Correct
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def my_func(ctx):
pass
```
### Wrong Decorator Order
```python
# Wrong - @section before @bpf
@section("tracepoint/syscalls/sys_enter_open")
@bpf
def my_func(ctx):
pass
# Correct
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def my_func(ctx):
pass
```
### Missing Type Hints
```python
# Wrong - no type hints
@bpf
def my_func(ctx):
pass
# Correct
@bpf
def my_func(ctx: c_void_p) -> c_int64:
pass
```
## Next Steps
* Learn about {doc}`maps` for data storage and communication
* Explore {doc}`structs` for defining custom data types
* Understand {doc}`compilation` to see how code is transformed
* Check out {doc}`helpers` for available BPF helper functions

503
docs/user-guide/helpers.md Normal file
View File

@ -0,0 +1,503 @@
# Helper Functions and Utilities
PythonBPF provides helper functions and utilities for BPF programs and userspace code.
```{note}
**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more helpers, kfuncs, and map types. Check back for updates!
```
For comprehensive documentation on BPF helpers, see the [eBPF Helper Functions documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#helper-calls).
## BPF Helper Functions
BPF helper functions are kernel-provided functions that BPF programs can call to interact with the system. PythonBPF exposes these through the `pythonbpf.helper` module.
```python
from pythonbpf.helper import pid, ktime, comm
```
### Process and Task Information
#### pid()
Get the current process ID.
> **Linux Kernel Helper:** `bpf_get_current_pid_tgid()`
```python
from pythonbpf.helper import pid
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
process_id = pid()
print(f"Process {process_id} opened a file")
return 0
```
**Returns:** `c_int32` - The process ID of the current task
#### comm()
Get the current process command name.
> **Linux Kernel Helper:** `bpf_get_current_comm()`
**Parameters:**
* `buf` - Buffer to fill with the process command name
**Returns:** `c_int64` - 0 on success, negative on error
#### uid()
Get the current user ID.
> **Linux Kernel Helper:** `bpf_get_current_uid_gid()`
```python
from pythonbpf.helper import uid
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
user_id = uid()
if user_id == 0:
print("Root user opened a file")
return 0
```
**Returns:** `c_int32` - The user ID of the current task
### Time and Timing
#### ktime()
Get the current kernel time in nanoseconds since system boot.
> **Linux Kernel Helper:** `bpf_ktime_get_ns()`
```python
from pythonbpf.helper import ktime
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def measure_latency(ctx: c_void_p) -> c_int64:
start_time = ktime()
# Store for later comparison
return 0
```
**Returns:** `c_int64` - Current time in nanoseconds
**Use cases:**
* Measuring latency
* Timestamping events
* Rate limiting
* Timeout detection
### CPU Information
#### smp_processor_id()
Get the ID of the CPU on which the BPF program is running.
> **Linux Kernel Helper:** `bpf_get_smp_processor_id()`
```python
from pythonbpf.helper import smp_processor_id
@bpf
@section("tracepoint/sched/sched_switch")
def track_cpu(ctx: c_void_p) -> c_int64:
cpu = smp_processor_id()
print(f"Running on CPU {cpu}")
return 0
```
**Returns:** `c_int32` - The current CPU ID
**Use cases:**
* Per-CPU statistics
* Load balancing analysis
* CPU affinity tracking
### Memory Operations
#### probe_read()
Safely read data from kernel memory.
> **Linux Kernel Helper:** `bpf_probe_read()`
```python
from pythonbpf.helper import probe_read
@bpf
def read_kernel_data(ctx: c_void_p) -> c_int64:
dst = 0
size = 8
src = ctx # kernel address
result = probe_read(dst, size, src)
if result == 0:
print(f"Read value: {dst}")
return 0
```
**Parameters:**
* `dst` - Destination buffer
* `size` - Number of bytes to read
* `src` - Source kernel address
**Returns:** `c_int64` - 0 on success, negative on error
**Safety:** This function performs bounds checking and prevents invalid memory access.
#### probe_read_str()
Safely read a null-terminated string from kernel memory.
> **Linux Kernel Helper:** `bpf_probe_read_str()`
**Parameters:**
* `dst` - Destination buffer (string)
* `src` - Source kernel address
**Returns:** `c_int64` - Length of string on success, negative on error
### Random Numbers
#### random()
Generate a pseudo-random 32-bit number.
> **Linux Kernel Helper:** `bpf_get_prandom_u32()`
```python
from pythonbpf.helper import random
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def sample_events(ctx: c_void_p) -> c_int64:
# Sample 1% of events
if (random() % 100) == 0:
print("Sampled event")
return 0
```
**Returns:** `c_int32` - A pseudo-random number
### Network Helpers
#### skb_store_bytes()
Store bytes into a socket buffer (for network programs).
> **Linux Kernel Helper:** `bpf_skb_store_bytes()`
```python
from pythonbpf.helper import skb_store_bytes
@bpf
@section("classifier")
def modify_packet(ctx: c_void_p) -> c_int32:
offset = 14 # Skip Ethernet header
data = b"\x00\x01\x02\x03"
size = len(data)
result = skb_store_bytes(offset, data, size)
return 0
```
**Parameters:**
* `offset` - Offset in the socket buffer
* `from_buf` - Data to write
* `size` - Number of bytes to write
* `flags` - Optional flags
**Returns:** `c_int64` - 0 on success, negative on error
## Userspace Utilities
PythonBPF provides utilities for working with BPF programs from Python userspace code.
### trace_pipe()
Read and display output from the kernel trace pipe.
```python
from pythonbpf import trace_pipe
# After loading and attaching BPF programs
trace_pipe()
```
**Description:**
The `trace_pipe()` function reads from `/sys/kernel/tracing/trace_pipe` and displays BPF program output to stdout. This is the output from `print()` statements in BPF programs.
**Usage:**
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print("Process started") # This goes to trace_pipe
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe() # Display BPF output
```
**Behavior:**
* Blocks until Ctrl+C is pressed
* Displays output in real-time
* Shows task name, PID, CPU, timestamp, and message
* Automatically handles trace pipe access errors
**Requirements:**
* Root or sudo access
* Accessible `/sys/kernel/tracing/trace_pipe`
### trace_fields()
Parse one line from the trace pipe into structured fields.
```python
from pythonbpf import trace_fields
# Read and parse trace output
task, pid, cpu, flags, ts, msg = trace_fields()
print(f"Task: {task}, PID: {pid}, CPU: {cpu}, Time: {ts}, Message: {msg}")
```
**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)`
* `task` - String: Task/process name (up to 16 chars)
* `pid` - Integer: Process ID
* `cpu` - Integer: CPU number
* `flags` - Bytes: Trace flags
* `timestamp` - Float: Timestamp in seconds
* `message` - String: The actual trace message
**Description:**
The `trace_fields()` function reads one line from the trace pipe and parses it into individual fields. This is useful when you need programmatic access to trace data rather than just displaying it.
**Usage:**
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print(f"PID:{pid()}")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
# Process trace events
try:
while True:
task, pid, cpu, flags, ts, msg = trace_fields()
print(f"[{ts:.6f}] {task}({pid}) on CPU{cpu}: {msg}")
except KeyboardInterrupt:
print("Stopped")
```
**Error Handling:**
* Raises `ValueError` if line cannot be parsed
* Skips lines about lost events
* Blocks waiting for next line
## Helper Function Examples
### Example 1: Latency Measurement
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_pipe
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid, ktime
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def start_times() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def read_start(ctx: c_void_p) -> c_int64:
process_id = pid()
start = ktime()
start_times.update(process_id, start)
return 0
@bpf
@section("tracepoint/syscalls/sys_exit_read")
def read_end(ctx: c_void_p) -> c_int64:
process_id = pid()
start = start_times.lookup(process_id)
if start:
latency = ktime() - start
print(f"Read latency: {latency} ns")
start_times.delete(process_id)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
### Example 2: Process Tracking
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import pid, uid
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_exec(ctx: c_void_p) -> c_int64:
process_id = pid()
user_id = uid()
print(f"User {user_id} started process (PID: {process_id})")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
### Example 3: CPU Load Monitoring
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import smp_processor_id
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def cpu_counts() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
@bpf
@section("tracepoint/sched/sched_switch")
def count_switches(ctx: c_void_p) -> c_int64:
cpu = smp_processor_id()
count = cpu_counts.lookup(cpu)
if count:
cpu_counts.update(cpu, count + 1)
else:
cpu_counts.update(cpu, 1)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
import time
time.sleep(5)
# Read results
from pylibbpf import BpfMap
map_obj = BpfMap(b, cpu_counts)
for cpu, count in map_obj.items():
print(f"CPU {cpu}: {count} context switches")
```
### Example 4: Event Sampling
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import random, pid
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def sample_opens(ctx: c_void_p) -> c_int64:
# Sample 5% of events
if (random() % 100) < 5:
process_id = pid()
print(f"Sampled: PID {process_id} opening file")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
## Troubleshooting
### Helper Not Available
If a helper function doesn't work:
* Check your kernel version (some helpers are newer)
* Ensure your LICENSE is GPL-compatible
### Trace Pipe Access Denied
If `trace_pipe()` fails:
* Run with sudo/root
* Check `/sys/kernel/tracing/` is accessible
* Verify tracing is enabled in kernel config
## Examples
Check out these examples in the `BCC-Examples/` directory that demonstrate helper functions:
* [hello_world.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_world.py) - Basic tracing with `print()`
* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - Using `ktime()` for timing measurements
* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - Using `pid()`, `ktime()`, and `comm()` with perf events
* [vfsreadlat.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/vfsreadlat.py) - Latency measurement with `ktime()` in kprobes
## Next Steps
* Explore {doc}`maps` for data storage with helpers
* Learn about {doc}`compilation` to understand helper implementation
* See {doc}`decorators` for marking BPF functions

87
docs/user-guide/index.md Normal file
View File

@ -0,0 +1,87 @@
# User Guide
This user guide provides comprehensive documentation for all PythonBPF features. Whether you're building simple tracing tools or complex performance monitoring systems, this guide will help you master PythonBPF.
## Overview
PythonBPF transforms Python code into eBPF bytecode that runs in the Linux kernel. It provides a Pythonic interface to eBPF features through decorators, type annotations, and familiar programming patterns.
## Core Concepts
### Decorators
PythonBPF uses decorators to mark code for BPF compilation:
* `@bpf` - Mark functions and classes for BPF compilation
* `@map` - Define BPF maps for data storage
* `@struct` - Define custom data structures
* `@section(name)` - Specify attachment points
* `@bpfglobal` - Define global variables
### Compilation Pipeline
Your Python code goes through several stages:
1. **IR Generation** - The Python AST is transformed into LLVM IR using llvmlite
2. **BPF Compilation** - LLVM IR is compiled to BPF bytecode using `llc`
3. **Loading** - The BPF object is loaded into the kernel using libbpf
4. **Attachment** - Programs are attached to kernel hooks (tracepoints, kprobes, etc.)
## Code Organization
When writing BPF programs with PythonBPF, we recommend:
1. **Use type hints** - Required for proper code generation
2. **Test incrementally** - Verify each component works before adding complexity
## Type System
PythonBPF uses Python's `ctypes` module for type definitions:
* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers
* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers
* `c_char`, `c_bool` - Characters and booleans
* `c_void_p` - Void pointers
* `str(N)` - Fixed-length strings (e.g., `str(16)` for 16-byte string)
## Example Structure
A typical PythonBPF program follows this structure:
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF, compile
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_uint32
# Define maps
@bpf
@map
def my_map() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
# Define BPF function
@bpf
@section("tracepoint/...")
def my_function(ctx: c_void_p) -> c_int64:
# BPF logic here
return 0
# License (required)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile, load, and run
if __name__ == "__main__":
b = BPF()
b.load_and_attach()
# Use the program...
# Or, compile to an object file
compile()
```
## Next Steps
Start with {doc}`decorators` to learn about all available decorators, then explore the other sections to master specific features.

476
docs/user-guide/maps.md Normal file
View File

@ -0,0 +1,476 @@
# BPF Maps
Maps are BPF data structures that provide storage and communication mechanisms. They allow BPF programs to:
* Store state between invocations
* Share data between multiple BPF programs
* Communicate with userspace applications
```{note}
**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more map types, helpers, and kfuncs. Check back for updates!
```
For comprehensive documentation on BPF maps, see the [eBPF Maps documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#maps).
## Map Types
PythonBPF supports several map types, each optimized for different use cases.
### HashMap
Hash maps provide efficient key-value storage with O(1) lookup time.
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_HASH`
#### Definition
```python
from pythonbpf import bpf, map
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@map
def my_map() -> HashMap:
return HashMap(
key=c_uint32,
value=c_uint64,
max_entries=1024
)
```
#### Parameters
* `key` - The type of the key (must be a ctypes type or struct)
* `value` - The type of the value (must be a ctypes type or struct)
* `max_entries` - Maximum number of entries the map can hold
#### Operations
##### lookup(key)
Look up a value by key. Returns the value if found, `None` otherwise.
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
value = my_map.lookup(1)
if value:
print(f"Found value: {value}")
return 0
```
##### update(key, value, flags=None)
Update or insert a key-value pair.
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def track_opens(ctx: c_void_p) -> c_int64:
key = pid()
count = my_map.lookup(key)
if count:
my_map.update(key, count + 1)
else:
my_map.update(key, 1)
return 0
```
##### delete(key)
Remove an entry from the map.
```python
@bpf
def cleanup(ctx: c_void_p) -> c_int64:
my_map.delete(1)
return 0
```
#### Use Cases
* Counting events per process/CPU
* Storing timestamps for latency calculations
* Caching lookup results
* Implementing rate limiters
#### Example: Process Counter
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def process_count() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def count_processes(ctx: c_void_p) -> c_int64:
process_id = pid()
count = process_count.lookup(process_id)
if count:
new_count = count + 1
process_count.update(process_id, new_count)
else:
process_count.update(process_id, 1)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
if __name__ == "__main__":
b = BPF()
b.load_and_attach()
# Access map from userspace
from pylibbpf import BpfMap
map_obj = BpfMap(b, process_count)
# Read values...
```
### PerfEventArray
Perf event arrays are used to send data from BPF programs to userspace with high throughput.
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_PERF_EVENT_ARRAY`
#### Definition
```python
from pythonbpf.maps import PerfEventArray
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(
key_size=c_uint32,
value_size=c_uint32
)
```
#### Parameters
* `key_size` - Type for the key (typically `c_uint32`)
* `value_size` - Type for the value (typically `c_uint32`)
#### Operations
##### output(data)
Send data to userspace. The data can be a struct or basic type.
```python
@bpf
@struct
class Event:
pid: c_uint32
timestamp: c_uint64
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def send_event(ctx: c_void_p) -> c_int64:
event = Event()
event.pid = pid()
event.timestamp = ktime()
events.output(event)
return 0
```
#### Use Cases
* Sending detailed event data to userspace
* Real-time monitoring and alerting
* Collecting samples for analysis
* High-throughput data collection
#### Example: Event Logging
```python
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.maps import PerfEventArray
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@struct
class ProcessEvent:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def log_exec(ctx: c_void_p) -> c_int64:
event = ProcessEvent()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
events.output(event)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
### RingBuffer
Ring buffers provide efficient, ordered event delivery with lower overhead than perf event arrays.
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_RINGBUF`
#### Definition
```python
from pythonbpf.maps import RingBuffer
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
```
#### Parameters
* `max_entries` - Maximum size of the ring buffer in bytes (must be power of 2)
#### Operations
##### output(data, flags=0)
Send data to the ring buffer.
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def log_event(ctx: c_void_p) -> c_int64:
event = Event()
event.pid = pid()
events.output(event)
return 0
```
##### reserve(size)
Reserve space in the ring buffer. Returns a pointer to the reserved space or 0 if no space available.
```python
@bpf
def reserve_space(ctx: c_void_p) -> c_int64:
ptr = events.reserve(64) # Reserve 64 bytes
if ptr:
# Use the reserved space
events.submit(ptr)
return 0
```
##### submit(data, flags=0)
Submit previously reserved space.
##### discard(data, flags=0)
Discard previously reserved space without submitting.
#### Use Cases
* Modern event streaming (preferred over PerfEventArray)
* Lower overhead event delivery
* Ordered event processing
* Kernel 5.8+ systems
#### Advantages over PerfEventArray
* Lower memory overhead
* Better performance
* Simpler API
* Ordered delivery guarantees
### BPFMapType Enum
PythonBPF supports various BPF map types through the `BPFMapType` enum:
```python
from pythonbpf.maps import BPFMapType
# Common map types
BPFMapType.BPF_MAP_TYPE_HASH # Hash map
BPFMapType.BPF_MAP_TYPE_ARRAY # Array map
BPFMapType.BPF_MAP_TYPE_PERF_EVENT_ARRAY # Perf event array
BPFMapType.BPF_MAP_TYPE_RINGBUF # Ring buffer
BPFMapType.BPF_MAP_TYPE_STACK_TRACE # Stack trace storage
BPFMapType.BPF_MAP_TYPE_LRU_HASH # LRU hash map
```
## Using Maps with Structs
Maps can store complex data types using structs as values:
```python
from pythonbpf import bpf, map, struct, section
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@struct
class Stats:
count: c_uint64
total_time: c_uint64
max_time: c_uint64
@bpf
@map
def process_stats() -> HashMap:
return HashMap(
key=c_uint32, # PID as key
value=Stats, # Struct as value
max_entries=1024
)
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def track_stats(ctx: c_void_p) -> c_int64:
process_id = pid()
stats = process_stats.lookup(process_id)
if stats:
stats.count = stats.count + 1
process_stats.update(process_id, stats)
else:
new_stats = Stats()
new_stats.count = 1
new_stats.total_time = 0
new_stats.max_time = 0
process_stats.update(process_id, new_stats)
return 0
```
## Accessing Maps from Userspace
After loading a BPF program, you can access maps from Python using `pylibbpf`:
```python
from pythonbpf import BPF
from pylibbpf import BpfMap
# Load BPF program
b = BPF()
b.load_and_attach()
# Get map reference
map_obj = BpfMap(b, my_map)
# Read all key-value pairs
for key, value in map_obj.items():
print(f"Key: {key}, Value: {value}")
# Get all keys
keys = list(map_obj.keys())
# Get all values
values = list(map_obj.values())
# Lookup specific key
value = map_obj[key]
# Update from userspace
map_obj[key] = new_value
# Delete from userspace
del map_obj[key]
```
## Common Patterns
### Counter Pattern
```python
count = my_map.lookup(key)
if count:
my_map.update(key, count + 1)
else:
my_map.update(key, 1)
```
### Latency Tracking
```python
# Store start time
start = ktime()
start_map.update(key, start)
# Later: calculate latency
start_time = start_map.lookup(key)
if start_time:
latency = ktime() - start_time
latency_map.update(key, latency)
start_map.delete(key)
```
### Event Sampling
```python
# Only process every Nth event
count = counter.lookup(key)
if count and (count % 100) == 0:
events.output(data)
counter.update(key, count + 1 if count else 1)
```
## Troubleshooting
### Map Not Found
If you get "map not found" errors:
* Ensure the map is defined with `@bpf` and `@map`
* Check that the map name matches exactly
* Verify the BPF program loaded successfully
### Map Full
If updates fail due to map being full:
* Increase `max_entries`
* Use LRU maps for automatic eviction
* Add cleanup logic to delete old entries
### Type Errors
If you get type-related errors:
* Verify key and value types match the definition
* Check that structs are properly defined
## Examples
Check out these examples in the `BCC-Examples/` directory that demonstrate map usage:
* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - HashMap for storing timestamps
* [sync_count.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_count.py) - HashMap for counting events
* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - PerfEventArray for sending structs to userspace
* [sync_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_perf_output.py) - PerfEventArray with timing data
* [disksnoop.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/disksnoop.py) - HashMap for tracking disk I/O
## Next Steps
* Learn about {doc}`structs` for defining custom value types
* Explore {doc}`helpers` for BPF helper functions
* See {doc}`compilation` to understand how maps are compiled

413
docs/user-guide/structs.md Normal file
View File

@ -0,0 +1,413 @@
# BPF Structs
Structs allow you to define custom data types for use in BPF programs. They provide a way to group related fields together and can be used as map values, event payloads, or local variables.
## Defining Structs
Use the `@bpf` and `@struct` decorators to define a BPF struct:
```python
from pythonbpf import bpf, struct
from ctypes import c_uint64, c_uint32
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
cpu: c_uint32
```
## Field Types
Structs support various field types from Python's `ctypes` module.
### Integer Types
```python
from ctypes import (
c_int8, c_int16, c_int32, c_int64,
c_uint8, c_uint16, c_uint32, c_uint64
)
@bpf
@struct
class Numbers:
small_int: c_int8 # -128 to 127
short_int: c_int16 # -32768 to 32767
int_val: c_int32 # -2^31 to 2^31-1
long_int: c_int64 # -2^63 to 2^63-1
byte: c_uint8 # 0 to 255
word: c_uint16 # 0 to 65535
dword: c_uint32 # 0 to 2^32-1
qword: c_uint64 # 0 to 2^64-1
```
### String Types
Fixed-length strings are defined using `str(N)` where N is the size:
```python
@bpf
@struct
class ProcessInfo:
name: str(16) # 16-byte string
path: str(256) # 256-byte string
```
```{note}
Strings in BPF are fixed-length and null-terminated. The size includes the null terminator.
```
### Pointer Types
```python
from ctypes import c_void_p, c_char_p
@bpf
@struct
class Pointers:
ptr: c_void_p # Generic pointer
str_ptr: c_char_p # Character pointer
```
### Nested Structs
Structs can contain other structs as fields:
```python
@bpf
@struct
class Address:
street: str(64)
city: str(32)
zip_code: c_uint32
@bpf
@struct
class Person:
name: str(32)
age: c_uint32
address: Address # Nested struct
```
## Using Structs
### As Local Variables
Create and use struct instances within BPF functions:
```python
from pythonbpf import bpf, struct, section
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def capture_event(ctx: c_void_p) -> c_int64:
# Create an instance
event = Event()
# Set fields
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
# Use the struct
print(f"Process with PID {event.pid}")
return 0
```
### As Map Keys and Values
Use structs as keys and values in maps for complex state storage:
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@struct
class ProcessStats:
syscall_count: c_uint64
total_time: c_uint64
max_latency: c_uint64
@bpf
@map
def stats() -> HashMap:
return HashMap(
key=c_uint32,
value=ProcessStats,
max_entries=1024
)
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def track_syscalls(ctx: c_void_p) -> c_int64:
process_id = pid()
# Lookup existing stats
s = stats.lookup(process_id)
if s:
# Update existing stats
s.syscall_count = s.syscall_count + 1
stats.update(process_id, s)
else:
# Create new stats
new_stats = ProcessStats()
new_stats.syscall_count = 1
new_stats.total_time = 0
new_stats.max_latency = 0
stats.update(process_id, new_stats)
return 0
```
### With Perf Events
Send struct data to userspace using PerfEventArray:
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import PerfEventArray
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@struct
class ProcessEvent:
timestamp: c_uint64
pid: c_uint32
ppid: c_uint32
comm: str(16)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
@bpf
@section("tracepoint/sched/sched_process_fork")
def trace_fork(ctx: c_void_p) -> c_int64:
event = ProcessEvent()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
# Send to userspace
events.output(event)
return 0
```
### With Ring Buffers
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import RingBuffer
@bpf
@struct
class FileEvent:
timestamp: c_uint64
pid: c_uint32
filename: str(256)
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_openat")
def trace_open(ctx: c_void_p) -> c_int64:
event = FileEvent()
event.timestamp = ktime()
event.pid = pid()
events.output(event)
return 0
```
## Field Access and Modification
### Reading Fields
Access struct fields using dot notation:
```python
event = Event()
ts = event.timestamp
process_id = event.pid
```
### Writing Fields
Assign values to fields:
```python
event = Event()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm)
```
## StructType Class
PythonBPF provides a `StructType` class for working with struct metadata:
```python
from pythonbpf.structs import StructType
# Define a struct
@bpf
@struct
class MyStruct:
field1: c_uint64
field2: c_uint32
# Access struct information (from userspace)
# This is typically used internally by the compiler
```
## Complex Examples
### Network Packet Event
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import ktime, XDP_PASS
from ctypes import c_void_p, c_int64, c_uint8, c_uint16, c_uint32, c_uint64
@bpf
@struct
class PacketEvent:
timestamp: c_uint64
src_ip: c_uint32
dst_ip: c_uint32
src_port: c_uint16
dst_port: c_uint16
protocol: c_uint8
length: c_uint16
@bpf
@map
def packets() -> RingBuffer:
return RingBuffer(max_entries=8192)
@bpf
@section("xdp")
def capture_packets(ctx: c_void_p) -> c_int64:
pkt = PacketEvent()
pkt.timestamp = ktime()
# Parse packet data from ctx...
packets.output(pkt)
return XDP_PASS
```
### Process Lifecycle Tracking
```python
@bpf
@struct
class ProcessLifecycle:
pid: c_uint32
ppid: c_uint32
start_time: c_uint64
exit_time: c_uint64
exit_code: c_int32
comm: str(16)
@bpf
@map
def process_info() -> HashMap:
return HashMap(
key=c_uint32,
value=ProcessLifecycle,
max_entries=4096
)
@bpf
@section("tracepoint/sched/sched_process_fork")
def track_fork(ctx: c_void_p) -> c_int64:
process_id = pid()
info = ProcessLifecycle()
info.pid = process_id
info.start_time = ktime()
process_info.update(process_id, info)
return 0
@bpf
@section("tracepoint/sched/sched_process_exit")
def track_exit(ctx: c_void_p) -> c_int64:
process_id = pid()
info = process_info.lookup(process_id)
if info:
info.exit_time = ktime()
process_info.update(process_id, info)
return 0
```
## Troubleshooting
### Struct Size Issues
If you encounter size-related errors:
* Check for excessive padding
* Verify field types are correct
* Consider reordering fields
### Initialization Problems
If fields aren't initialized correctly:
* Always initialize all fields explicitly
* Set default values where appropriate
* Use helper functions for dynamic values
### Type Mismatch Errors
If you get type errors:
* Ensure field types match assignments
* Check that imported types are from `ctypes`
* Verify nested struct definitions
## Reading Struct Data in Userspace
After capturing struct data, read it in Python:
```python
from pylibbpf import BpfMap
# Read from map
map_obj = BpfMap(b, stats)
for key, value_bytes in map_obj.items():
value = Event.from_buffer_copy(value_bytes)
print(f"PID: {value.pid}, Comm: {value.comm.decode()}")
```
## Next Steps
* Learn about {doc}`maps` for storing struct data
* Explore {doc}`helpers` for populating struct fields
* See {doc}`compilation` to understand how structs are compiled

View File

@ -34,6 +34,14 @@ dependencies = [
"pylibbpf"
]
[project.optional-dependencies]
docs = [
"sphinx>=7.0",
"myst-parser>=2.0",
"sphinx-rtd-theme>=2.0",
"sphinx-copybutton",
]
[tool.setuptools.packages.find]
where = ["."]
include = ["pythonbpf*"]

View File

@ -26,9 +26,7 @@ def create_targets_and_rvals(stmt):
return stmt.targets, [stmt.value]
def handle_assign_allocation(
builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab
):
def handle_assign_allocation(compilation_context, builder, stmt, local_sym_tab):
"""Handle memory allocation for assignment statements."""
logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}")
@ -59,7 +57,7 @@ def handle_assign_allocation(
# Determine type and allocate based on rval
if isinstance(rval, ast.Call):
_allocate_for_call(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
builder, var_name, rval, local_sym_tab, compilation_context
)
elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
@ -71,7 +69,7 @@ def handle_assign_allocation(
elif isinstance(rval, ast.Attribute):
# Struct field-to-variable assignment (a = dat.fld)
_allocate_for_attribute(
builder, var_name, rval, local_sym_tab, structs_sym_tab
builder, var_name, rval, local_sym_tab, compilation_context
)
else:
logger.warning(
@ -79,10 +77,9 @@ def handle_assign_allocation(
)
def _allocate_for_call(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
def _allocate_for_call(builder, var_name, rval, local_sym_tab, compilation_context):
"""Allocate memory for variable assigned from a call."""
structs_sym_tab = compilation_context.structs_sym_tab
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
@ -149,7 +146,7 @@ def _allocate_for_call(
elif isinstance(rval.func, ast.Attribute):
# Map method calls - need double allocation for ptr handling
_allocate_for_map_method(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
builder, var_name, rval, local_sym_tab, compilation_context
)
else:
@ -157,9 +154,11 @@ def _allocate_for_call(
def _allocate_for_map_method(
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
builder, var_name, rval, local_sym_tab, compilation_context
):
"""Allocate memory for variable assigned from map method (double alloc)."""
map_sym_tab = compilation_context.map_sym_tab
structs_sym_tab = compilation_context.structs_sym_tab
map_name = rval.func.value.id
method_name = rval.func.attr
@ -299,6 +298,15 @@ def allocate_temp_pool(builder, max_temps, local_sym_tab):
logger.debug(f"Allocated temp variable: {temp_name}")
def _get_alignment(tmp_type):
"""Return alignment for a given type."""
if isinstance(tmp_type, ir.PointerType):
return 8
elif isinstance(tmp_type, ir.IntType):
return tmp_type.width // 8
return 8
def _allocate_for_name(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable-to-variable assignment (b = a)."""
source_var = rval.id
@ -321,8 +329,22 @@ def _allocate_for_name(builder, var_name, rval, local_sym_tab):
)
def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab):
def _allocate_with_type(builder, var_name, ir_type):
"""Allocate memory for a variable with a specific type."""
var = builder.alloca(ir_type, name=var_name)
if isinstance(ir_type, ir.IntType):
var.align = ir_type.width // 8
elif isinstance(ir_type, ir.PointerType):
var.align = 8
return var
def _allocate_for_attribute(
builder, var_name, rval, local_sym_tab, compilation_context
):
"""Allocate memory for struct field-to-variable assignment (a = dat.fld)."""
structs_sym_tab = compilation_context.structs_sym_tab
if not isinstance(rval.value, ast.Name):
logger.warning(f"Complex attribute access not supported for {var_name}")
return
@ -455,20 +477,3 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
logger.info(
f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {alloc_type}"
)
def _allocate_with_type(builder, var_name, ir_type):
"""Allocate variable with appropriate alignment for type."""
var = builder.alloca(ir_type, name=var_name)
var.align = _get_alignment(ir_type)
return var
def _get_alignment(ir_type):
"""Get appropriate alignment for IR type."""
if isinstance(ir_type, ir.IntType):
return ir_type.width // 8
elif isinstance(ir_type, ir.ArrayType) and isinstance(ir_type.element, ir.IntType):
return ir_type.element.width // 8
else:
return 8 # Default: pointer size

View File

@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
def handle_struct_field_assignment(
func, module, builder, target, rval, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, builder, target, rval, local_sym_tab
):
"""Handle struct field assignment (obj.field = value)."""
@ -24,7 +24,7 @@ def handle_struct_field_assignment(
return
struct_type = local_sym_tab[var_name].metadata
struct_info = structs_sym_tab[struct_type]
struct_info = compilation_context.structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
@ -33,9 +33,7 @@ def handle_struct_field_assignment(
# Get field pointer and evaluate value
field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name)
field_type = struct_info.field_type(field_name)
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
val_result = eval_expr(func, compilation_context, builder, rval, local_sym_tab)
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}.{field_name}")
@ -47,14 +45,12 @@ def handle_struct_field_assignment(
if _is_char_array(field_type) and _is_i8_ptr(val_type):
_copy_string_to_char_array(
func,
module,
compilation_context,
builder,
val,
field_ptr,
field_type,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
logger.info(f"Copied string to char array {var_name}.{field_name}")
return
@ -66,14 +62,12 @@ def handle_struct_field_assignment(
def _copy_string_to_char_array(
func,
module,
compilation_context,
builder,
src_ptr,
dst_ptr,
array_type,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
):
"""Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str"""
@ -109,7 +103,7 @@ def _is_i8_ptr(ir_type):
def handle_variable_assignment(
func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, builder, var_name, rval, local_sym_tab
):
"""Handle single named variable assignment."""
@ -120,6 +114,8 @@ def handle_variable_assignment(
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
structs_sym_tab = compilation_context.structs_sym_tab
# NOTE: Special case for struct initialization
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
struct_name = rval.func.id
@ -142,9 +138,7 @@ def handle_variable_assignment(
logger.info(f"Assigned char array pointer to {var_name}")
return True
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
val_result = eval_expr(func, compilation_context, builder, rval, local_sym_tab)
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}")
return False

View File

@ -1,5 +1,6 @@
import ast
from llvmlite import ir
from .context import CompilationContext
from .license_pass import license_processing
from .functions import func_proc
from .maps import maps_proc
@ -67,9 +68,10 @@ def find_bpf_chunks(tree):
return bpf_functions
def processor(source_code, filename, module):
def processor(source_code, filename, compilation_context):
tree = ast.parse(source_code, filename)
logger.debug(ast.dump(tree, indent=4))
module = compilation_context.module
bpf_chunks = find_bpf_chunks(tree)
for func_node in bpf_chunks:
@ -81,15 +83,18 @@ def processor(source_code, filename, module):
if vmlinux_symtab:
handler = VmlinuxHandler.initialize(vmlinux_symtab)
VmlinuxHandlerRegistry.set_handler(handler)
compilation_context.vmlinux_handler = handler
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks, structs_sym_tab)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
populate_global_symbol_table(tree, compilation_context)
license_processing(tree, compilation_context)
globals_processing(tree, compilation_context)
structs_sym_tab = structs_proc(tree, compilation_context, bpf_chunks)
globals_list_creation(tree, module)
map_sym_tab = maps_proc(tree, compilation_context, bpf_chunks)
func_proc(tree, compilation_context, bpf_chunks)
globals_list_creation(tree, compilation_context)
return structs_sym_tab, map_sym_tab
@ -104,6 +109,8 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
module.triple = "bpf"
compilation_context = CompilationContext(module)
if not hasattr(module, "_debug_compile_unit"):
debug_generator = DebugInfoGenerator(module)
debug_generator.generate_file_metadata(filename, os.path.dirname(filename))
@ -116,7 +123,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
True,
)
structs_sym_tab, maps_sym_tab = processor(source, filename, module)
structs_sym_tab, maps_sym_tab = processor(source, filename, compilation_context)
wchar_size = module.add_metadata(
[

82
pythonbpf/context.py Normal file
View File

@ -0,0 +1,82 @@
from llvmlite import ir
import logging
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pythonbpf.structs.struct_type import StructType
from pythonbpf.maps.maps_utils import MapSymbol
logger = logging.getLogger(__name__)
class ScratchPoolManager:
"""Manage the temporary helper variables in local_sym_tab"""
def __init__(self):
self._counters = {}
@property
def counter(self):
return sum(self._counters.values())
def reset(self):
self._counters.clear()
logger.debug("Scratch pool counter reset to 0")
def _get_type_name(self, ir_type):
if isinstance(ir_type, ir.PointerType):
return "ptr"
elif isinstance(ir_type, ir.IntType):
return f"i{ir_type.width}"
elif isinstance(ir_type, ir.ArrayType):
return f"[{ir_type.count}x{self._get_type_name(ir_type.element)}]"
else:
return str(ir_type).replace(" ", "")
def get_next_temp(self, local_sym_tab, expected_type=None):
# Default to i64 if no expected type provided
type_name = self._get_type_name(expected_type) if expected_type else "i64"
if type_name not in self._counters:
self._counters[type_name] = 0
counter = self._counters[type_name]
temp_name = f"__helper_temp_{type_name}_{counter}"
self._counters[type_name] += 1
if temp_name not in local_sym_tab:
raise ValueError(
f"Scratch pool exhausted or inadequate: {temp_name}. "
f"Type: {type_name} Counter: {counter}"
)
logger.debug(f"Using {temp_name} for type {type_name}")
return local_sym_tab[temp_name].var, temp_name
class CompilationContext:
"""
Holds the state for a single compilation run.
This replaces global mutable state modules.
"""
def __init__(self, module: ir.Module):
self.module = module
# Symbol tables
self.global_sym_tab: list[ir.GlobalVariable] = []
self.structs_sym_tab: dict[str, "StructType"] = {}
self.map_sym_tab: dict[str, "MapSymbol"] = {}
# Helper management
self.scratch_pool = ScratchPoolManager()
# Vmlinux handling (optional, specialized)
self.vmlinux_handler = None # Can be VmlinuxHandler instance
# Current function context (optional, if needed globally during function processing)
self.current_func = None
def reset(self):
"""Reset state between functions if necessary, though new context per compile is preferred."""
self.scratch_pool.reset()
self.current_func = None

View File

@ -9,12 +9,8 @@ class CallHandlerRegistry:
cls._handler = handler
@classmethod
def handle_call(
cls, call, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
):
def handle_call(cls, call, compilation_context, builder, func, local_sym_tab):
"""Handle a call using the registered handler"""
if cls._handler is None:
return None
return cls._handler(
call, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
)
return cls._handler(call, compilation_context, builder, func, local_sym_tab)

View File

@ -37,7 +37,7 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder
raise SyntaxError(f"Undefined variable {expr.id}")
def _handle_constant_expr(module, builder, expr: ast.Constant):
def _handle_constant_expr(compilation_context, builder, expr: ast.Constant):
"""Handle ast.Constant expressions."""
if isinstance(expr.value, int) or isinstance(expr.value, bool):
return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64)
@ -48,7 +48,9 @@ def _handle_constant_expr(module, builder, expr: ast.Constant):
str_constant = ir.Constant(str_type, bytearray(str_bytes))
# Create global variable
global_str = ir.GlobalVariable(module, str_type, name=str_name)
global_str = ir.GlobalVariable(
compilation_context.module, str_type, name=str_name
)
global_str.linkage = "internal"
global_str.global_constant = True
global_str.initializer = str_constant
@ -64,10 +66,11 @@ def _handle_attribute_expr(
func,
expr: ast.Attribute,
local_sym_tab: Dict,
structs_sym_tab: Dict,
compilation_context,
builder: ir.IRBuilder,
):
"""Handle ast.Attribute expressions for struct field access."""
structs_sym_tab = compilation_context.structs_sym_tab
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
@ -157,9 +160,7 @@ def _handle_deref_call(expr: ast.Call, local_sym_tab: Dict, builder: ir.IRBuilde
# ============================================================================
def get_operand_value(
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
def get_operand_value(func, compilation_context, operand, builder, local_sym_tab):
"""Extract the value from an operand, handling variables and constants."""
logger.info(f"Getting operand value for: {ast.dump(operand)}")
if isinstance(operand, ast.Name):
@ -187,13 +188,11 @@ def get_operand_value(
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
res = _handle_binary_op_impl(
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, operand, builder, local_sym_tab
)
return res
else:
res = eval_expr(
func, module, builder, operand, local_sym_tab, map_sym_tab, structs_sym_tab
)
res = eval_expr(func, compilation_context, builder, operand, local_sym_tab)
if res is None:
raise ValueError(f"Failed to evaluate call expression: {operand}")
val, _ = res
@ -205,15 +204,13 @@ def get_operand_value(
raise TypeError(f"Unsupported operand type: {type(operand)}")
def _handle_binary_op_impl(
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
def _handle_binary_op_impl(func, compilation_context, rval, builder, local_sym_tab):
op = rval.op
left = get_operand_value(
func, module, rval.left, builder, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, rval.left, builder, local_sym_tab
)
right = get_operand_value(
func, module, rval.right, builder, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, rval.right, builder, local_sym_tab
)
logger.info(f"left is {left}, right is {right}, op is {op}")
@ -249,16 +246,14 @@ def _handle_binary_op_impl(
def _handle_binary_op(
func,
module,
compilation_context,
rval,
builder,
var_name,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
result = _handle_binary_op_impl(
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, rval, builder, local_sym_tab
)
if var_name and var_name in local_sym_tab:
logger.info(
@ -275,12 +270,10 @@ def _handle_binary_op(
def _handle_ctypes_call(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle ctypes type constructor calls."""
if len(expr.args) != 1:
@ -290,12 +283,10 @@ def _handle_ctypes_call(
arg = expr.args[0]
val = eval_expr(
func,
module,
compilation_context,
builder,
arg,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if val is None:
logger.info("Failed to evaluate argument to ctypes constructor")
@ -344,9 +335,7 @@ def _handle_ctypes_call(
return value, expected_type
def _handle_compare(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
def _handle_compare(func, compilation_context, builder, cond, local_sym_tab):
"""Handle ast.Compare expressions."""
if len(cond.ops) != 1 or len(cond.comparators) != 1:
@ -354,21 +343,17 @@ def _handle_compare(
return None
lhs = eval_expr(
func,
module,
compilation_context,
builder,
cond.left,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
rhs = eval_expr(
func,
module,
compilation_context,
builder,
cond.comparators[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if lhs is None or rhs is None:
@ -382,12 +367,10 @@ def _handle_compare(
def _handle_unary_op(
func,
module,
compilation_context,
builder,
expr: ast.UnaryOp,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle ast.UnaryOp expressions."""
if not isinstance(expr.op, ast.Not) and not isinstance(expr.op, ast.USub):
@ -395,7 +378,7 @@ def _handle_unary_op(
return None
operand = get_operand_value(
func, module, expr.operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, expr.operand, builder, local_sym_tab
)
if operand is None:
logger.error("Failed to evaluate operand for unary operation")
@ -418,7 +401,7 @@ def _handle_unary_op(
# ============================================================================
def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
def _handle_and_op(func, builder, expr, local_sym_tab, compilation_context):
"""Handle `and` boolean operations."""
logger.debug(f"Handling 'and' operator with {len(expr.values)} operands")
@ -433,7 +416,7 @@ def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_
# Evaluate current operand
operand_result = eval_expr(
func, None, builder, value, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, builder, value, local_sym_tab
)
if operand_result is None:
logger.error(f"Failed to evaluate operand {i} in 'and' expression")
@ -471,7 +454,7 @@ def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_
return phi, ir.IntType(1)
def _handle_or_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
def _handle_or_op(func, builder, expr, local_sym_tab, compilation_context):
"""Handle `or` boolean operations."""
logger.debug(f"Handling 'or' operator with {len(expr.values)} operands")
@ -486,7 +469,7 @@ def _handle_or_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_t
# Evaluate current operand
operand_result = eval_expr(
func, None, builder, value, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, builder, value, local_sym_tab
)
if operand_result is None:
logger.error(f"Failed to evaluate operand {i} in 'or' expression")
@ -526,23 +509,17 @@ def _handle_or_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_t
def _handle_boolean_op(
func,
module,
compilation_context,
builder,
expr: ast.BoolOp,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle `and` and `or` boolean operations."""
if isinstance(expr.op, ast.And):
return _handle_and_op(
func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
return _handle_and_op(func, builder, expr, local_sym_tab, compilation_context)
elif isinstance(expr.op, ast.Or):
return _handle_or_op(
func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
return _handle_or_op(func, builder, expr, local_sym_tab, compilation_context)
else:
logger.error(f"Unsupported boolean operator: {type(expr.op).__name__}")
return None
@ -555,12 +532,10 @@ def _handle_boolean_op(
def _handle_vmlinux_cast(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
# handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux
# struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64
@ -576,12 +551,10 @@ def _handle_vmlinux_cast(
# Evaluate the argument (e.g., ctx.di which is a c_uint64)
arg_result = eval_expr(
func,
module,
compilation_context,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
@ -614,18 +587,17 @@ def _handle_vmlinux_cast(
def _handle_user_defined_struct_cast(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
):
"""Handle user-defined struct cast expressions like iphdr(nh).
This casts a pointer/integer value to a pointer to the user-defined struct,
similar to how vmlinux struct casts work but for user-defined @struct types.
"""
structs_sym_tab = compilation_context.structs_sym_tab
if len(expr.args) != 1:
logger.info("User-defined struct cast takes exactly one argument")
return None
@ -643,12 +615,10 @@ def _handle_user_defined_struct_cast(
# an address/pointer value)
arg_result = eval_expr(
func,
module,
compilation_context,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
@ -683,30 +653,28 @@ def _handle_user_defined_struct_cast(
def eval_expr(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
structs_sym_tab = compilation_context.structs_sym_tab
logger.info(f"Evaluating expression: {ast.dump(expr)}")
if isinstance(expr, ast.Name):
return _handle_name_expr(expr, local_sym_tab, builder)
elif isinstance(expr, ast.Constant):
return _handle_constant_expr(module, builder, expr)
return _handle_constant_expr(compilation_context, builder, expr)
elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(
expr.func.id
):
return _handle_vmlinux_cast(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder)
@ -714,26 +682,23 @@ def eval_expr(
if isinstance(expr.func, ast.Name) and is_ctypes(expr.func.id):
return _handle_ctypes_call(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and (expr.func.id in structs_sym_tab):
return _handle_user_defined_struct_cast(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# NOTE: Updated handle_call signature
result = CallHandlerRegistry.handle_call(
expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
expr, compilation_context, builder, func, local_sym_tab
)
if result is not None:
return result
@ -742,30 +707,24 @@ def eval_expr(
return None
elif isinstance(expr, ast.Attribute):
return _handle_attribute_expr(
func, expr, local_sym_tab, structs_sym_tab, builder
func, expr, local_sym_tab, compilation_context, builder
)
elif isinstance(expr, ast.BinOp):
return _handle_binary_op(
func,
module,
compilation_context,
expr,
builder,
None,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr, ast.Compare):
return _handle_compare(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
return _handle_compare(func, compilation_context, builder, expr, local_sym_tab)
elif isinstance(expr, ast.UnaryOp):
return _handle_unary_op(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
return _handle_unary_op(func, compilation_context, builder, expr, local_sym_tab)
elif isinstance(expr, ast.BoolOp):
return _handle_boolean_op(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
func, compilation_context, builder, expr, local_sym_tab
)
logger.info("Unsupported expression evaluation")
return None
@ -773,12 +732,10 @@ def eval_expr(
def handle_expr(
func,
module,
compilation_context,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
):
"""Handle expression statements in the function body."""
logger.info(f"Handling expression: {ast.dump(expr)}")
@ -786,12 +743,10 @@ def handle_expr(
if isinstance(call, ast.Call):
eval_expr(
func,
module,
compilation_context,
builder,
call,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
else:
logger.info("Unsupported expression type")

View File

@ -4,7 +4,6 @@ import logging
from pythonbpf.helper import (
HelperHandlerRegistry,
reset_scratch_pool,
)
from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.expr import (
@ -76,36 +75,30 @@ def count_temps_in_call(call_node, local_sym_tab):
def handle_if_allocation(
module, builder, stmt, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
compilation_context, builder, stmt, func, ret_type, local_sym_tab
):
"""Recursively handle allocations in if/else branches."""
if stmt.body:
allocate_mem(
module,
compilation_context,
builder,
stmt.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
if stmt.orelse:
allocate_mem(
module,
compilation_context,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
def allocate_mem(compilation_context, builder, body, func, ret_type, local_sym_tab):
max_temps_needed = {}
def merge_type_counts(count_dict):
@ -137,19 +130,15 @@ def allocate_mem(
# Handle allocations
if isinstance(stmt, ast.If):
handle_if_allocation(
module,
compilation_context,
builder,
stmt,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
handle_assign_allocation(
builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab
)
handle_assign_allocation(compilation_context, builder, stmt, local_sym_tab)
allocate_temp_pool(builder, max_temps_needed, local_sym_tab)
@ -161,9 +150,7 @@ def allocate_mem(
# ============================================================================
def handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
):
def handle_assign(func, compilation_context, builder, stmt, local_sym_tab):
"""Handle assignment statements in the function body."""
# NOTE: Support multi-target assignments (e.g.: a, b = 1, 2)
@ -175,13 +162,11 @@ def handle_assign(
var_name = target.id
result = handle_variable_assignment(
func,
module,
compilation_context,
builder,
var_name,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if not result:
logger.error(f"Failed to handle assignment to {var_name}")
@ -191,13 +176,11 @@ def handle_assign(
# NOTE: Struct field assignment case: pkt.field = value
handle_struct_field_assignment(
func,
module,
compilation_context,
builder,
target,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
continue
@ -205,18 +188,12 @@ def handle_assign(
logger.error(f"Unsupported assignment target: {ast.dump(target)}")
def handle_cond(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
val = eval_expr(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab
)[0]
def handle_cond(func, compilation_context, builder, cond, local_sym_tab):
val = eval_expr(func, compilation_context, builder, cond, local_sym_tab)[0]
return convert_to_bool(builder, val)
def handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None
):
def handle_if(func, compilation_context, builder, stmt, local_sym_tab):
"""Handle if statements in the function body."""
logger.info("Handling if statement")
# start = builder.block.parent
@ -227,9 +204,7 @@ def handle_if(
else:
else_block = None
cond = handle_cond(
func, module, builder, stmt.test, local_sym_tab, map_sym_tab, structs_sym_tab
)
cond = handle_cond(func, compilation_context, builder, stmt.test, local_sym_tab)
if else_block:
builder.cbranch(cond, then_block, else_block)
else:
@ -237,9 +212,7 @@ def handle_if(
builder.position_at_end(then_block)
for s in stmt.body:
process_stmt(
func, module, builder, s, local_sym_tab, map_sym_tab, structs_sym_tab, False
)
process_stmt(func, compilation_context, builder, s, local_sym_tab, False)
if not builder.block.is_terminated:
builder.branch(merge_block)
@ -248,12 +221,10 @@ def handle_if(
for s in stmt.orelse:
process_stmt(
func,
module,
compilation_context,
builder,
s,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
False,
)
if not builder.block.is_terminated:
@ -262,21 +233,25 @@ def handle_if(
builder.position_at_end(merge_block)
def handle_return(builder, stmt, local_sym_tab, ret_type):
def handle_return(builder, stmt, local_sym_tab, ret_type, compilation_context=None):
logger.info(f"Handling return statement: {ast.dump(stmt)}")
if stmt.value is None:
return handle_none_return(builder)
elif isinstance(stmt.value, ast.Name) and is_xdp_name(stmt.value.id):
return handle_xdp_return(stmt, builder, ret_type)
else:
# Fallback for now if ctx not passed, but caller should pass it
if compilation_context is None:
raise RuntimeError(
"CompilationContext required for return statement evaluation"
)
val = eval_expr(
func=None,
module=None,
compilation_context=compilation_context,
builder=builder,
expr=stmt.value,
local_sym_tab=local_sym_tab,
map_sym_tab={},
structs_sym_tab={},
)
logger.info(f"Evaluated return expression to {val}")
builder.ret(val[0])
@ -285,43 +260,34 @@ def handle_return(builder, stmt, local_sym_tab, ret_type):
def process_stmt(
func,
module,
compilation_context,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
did_return,
ret_type=ir.IntType(64),
):
logger.info(f"Processing statement: {ast.dump(stmt)}")
reset_scratch_pool()
# Use context scratch pool
compilation_context.scratch_pool.reset()
if isinstance(stmt, ast.Expr):
handle_expr(
func,
module,
compilation_context,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
)
handle_assign(func, compilation_context, builder, stmt, local_sym_tab)
elif isinstance(stmt, ast.AugAssign):
raise SyntaxError("Augmented assignment not supported")
elif isinstance(stmt, ast.If):
handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
)
handle_if(func, compilation_context, builder, stmt, local_sym_tab)
elif isinstance(stmt, ast.Return):
did_return = handle_return(
builder,
stmt,
local_sym_tab,
ret_type,
builder, stmt, local_sym_tab, ret_type, compilation_context
)
return did_return
@ -332,13 +298,11 @@ def process_stmt(
def process_func_body(
module,
compilation_context,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
):
"""Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now
@ -360,6 +324,9 @@ def process_func_body(
raise TypeError(
f"Unsupported annotation type: {ast.dump(context_arg.annotation)}"
)
# Use context's handler if available, else usage of VmlinuxHandlerRegistry
# For now relying on VmlinuxHandlerRegistry which relies on codegen setting it
if VmlinuxHandlerRegistry.is_vmlinux_struct(context_type_name):
resolved_type = VmlinuxHandlerRegistry.get_struct_type(
context_type_name
@ -370,14 +337,12 @@ def process_func_body(
# pre-allocate dynamic variables
local_sym_tab = allocate_mem(
module,
compilation_context,
builder,
func_node.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
logger.info(f"Local symbol table: {local_sym_tab.keys()}")
@ -385,12 +350,10 @@ def process_func_body(
for stmt in func_node.body:
did_return = process_stmt(
func,
module,
compilation_context,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
did_return,
ret_type,
)
@ -399,9 +362,12 @@ def process_func_body(
builder.ret(ir.Constant(ir.IntType(64), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):
def process_bpf_chunk(func_node, compilation_context, return_type):
"""Process a single BPF chunk (function) and emit corresponding LLVM IR."""
# Set current function in context (optional but good for future)
compilation_context.current_func = func_node
func_name = func_node.name
ret_type = return_type
@ -413,7 +379,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
param_types.append(ir.PointerType())
func_ty = ir.FunctionType(ret_type, param_types)
func = ir.Function(module, func_ty, func_name)
func = ir.Function(compilation_context.module, func_ty, func_name)
func.linkage = "dso_local"
func.attributes.add("nounwind")
@ -433,13 +399,11 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
builder = ir.IRBuilder(block)
process_func_body(
module,
compilation_context,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
)
return func
@ -449,23 +413,32 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
# ============================================================================
def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
def func_proc(tree, compilation_context, chunks):
"""Process all functions decorated with @bpf and @bpfglobal"""
for func_node in chunks:
# Ignore structs and maps
# Check against the lists
if (
func_node.name in compilation_context.structs_sym_tab
or func_node.name in compilation_context.map_sym_tab
):
continue
# Also check decorators to be sure
decorators = [d.id for d in func_node.decorator_list if isinstance(d, ast.Name)]
if "struct" in decorators or "map" in decorators:
continue
if is_global_function(func_node):
continue
func_type = get_probe_string(func_node)
logger.info(f"Found probe_string of {func_node.name}: {func_type}")
func = process_bpf_chunk(
func_node,
module,
ctypes_to_ir(infer_return_type(func_node)),
map_sym_tab,
structs_sym_tab,
)
return_type = ctypes_to_ir(infer_return_type(func_node))
func = process_bpf_chunk(func_node, compilation_context, return_type)
logger.info(f"Generating Debug Info for Function {func_node.name}")
generate_function_debug_info(func_node, module, func)
generate_function_debug_info(func_node, compilation_context.module, func)
# TODO: WIP, for string assignment to fixed-size arrays

View File

@ -7,11 +7,11 @@ from .type_deducer import ctypes_to_ir
logger: Logger = logging.getLogger(__name__)
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
def populate_global_symbol_table(tree, compilation_context):
"""
compilation_context: CompilationContext
"""
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
@ -23,12 +23,12 @@ def populate_global_symbol_table(tree, module: ir.Module):
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
compilation_context.global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
compilation_context.global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
compilation_context.global_sym_tab.append(node)
return False
@ -74,9 +74,12 @@ def emit_global(module: ir.Module, node, name):
return gvar
def globals_processing(tree, module):
def globals_processing(tree, compilation_context):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
# Local tracking for duplicate checking if needed, or we can iterate context
# But for now, we process specific nodes
current_globals = []
for node in tree.body:
# Skip non-assignment and non-function nodes
@ -90,10 +93,10 @@ def globals_processing(tree, module):
continue
# Check for duplicate names
if name in globals_sym_tab:
if name in current_globals:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
current_globals.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
@ -108,7 +111,7 @@ def globals_processing(tree, module):
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
emit_global(compilation_context.module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
@ -137,8 +140,9 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata"
def globals_list_creation(tree, module: ir.Module):
def globals_list_creation(tree, compilation_context):
collected = ["LICENSE"]
module = compilation_context.module
for node in tree.body:
if isinstance(node, ast.FunctionDef):

View File

@ -1,5 +1,5 @@
from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import (
handle_helper_call,
emit_probe_read_kernel_str_call,
@ -28,9 +28,7 @@ def _register_helper_handler():
"""Register helper call handler with the expression evaluator"""
from pythonbpf.expr.expr_pass import CallHandlerRegistry
def helper_call_handler(
call, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab
):
def helper_call_handler(call, compilation_context, builder, func, local_sym_tab):
"""Check if call is a helper and handle it"""
import ast
@ -39,17 +37,16 @@ def _register_helper_handler():
if HelperHandlerRegistry.has_handler(call.func.id):
return handle_helper_call(
call,
module,
compilation_context,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# Check for method calls (e.g., map.lookup())
elif isinstance(call.func, ast.Attribute):
method_name = call.func.attr
map_sym_tab = compilation_context.map_sym_tab
# Handle: my_map.lookup(key)
if isinstance(call.func.value, ast.Name):
@ -58,12 +55,10 @@ def _register_helper_handler():
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
call,
module,
compilation_context,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
return None
@ -76,7 +71,6 @@ _register_helper_handler()
__all__ = [
"HelperHandlerRegistry",
"reset_scratch_pool",
"handle_helper_call",
"emit_probe_read_kernel_str_call",
"emit_probe_read_kernel_call",

View File

@ -50,12 +50,10 @@ class BPFHelperID(Enum):
def bpf_ktime_get_ns_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ktime_get_ns helper function call.
@ -77,12 +75,10 @@ def bpf_ktime_get_ns_emitter(
def bpf_get_current_cgroup_id(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_cgroup_id helper function call.
@ -104,12 +100,10 @@ def bpf_get_current_cgroup_id(
def bpf_map_lookup_elem_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_map_lookup_elem helper function call.
@ -119,7 +113,7 @@ def bpf_map_lookup_elem_emitter(
f"Map lookup expects exactly one argument (key), got {len(call.args)}"
)
key_ptr = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
func, compilation_context, call.args[0], builder, local_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -147,12 +141,10 @@ def bpf_map_lookup_elem_emitter(
def bpf_printk_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_printk helper function call."""
if not hasattr(func, "_fmt_counter"):
@ -165,16 +157,18 @@ def bpf_printk_emitter(
if isinstance(call.args[0], ast.JoinedStr):
args = handle_fstring_print(
call.args[0],
module,
compilation_context.module,
builder,
func,
local_sym_tab,
struct_sym_tab,
compilation_context.structs_sym_tab,
)
elif isinstance(call.args[0], ast.Constant) and isinstance(call.args[0].value, str):
# TODO: We are only supporting single arguments for now.
# In case of multiple args, the first one will be taken.
args = simple_string_print(call.args[0].value, module, builder, func)
args = simple_string_print(
call.args[0].value, compilation_context.module, builder, func
)
else:
raise NotImplementedError(
"Only simple strings or f-strings are supported in bpf_printk."
@ -203,12 +197,10 @@ def bpf_printk_emitter(
def bpf_map_update_elem_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_map_update_elem helper function call.
@ -224,10 +216,10 @@ def bpf_map_update_elem_emitter(
flags_arg = call.args[2] if len(call.args) > 2 else None
key_ptr = get_or_create_ptr_from_arg(
func, module, key_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
func, compilation_context, key_arg, builder, local_sym_tab
)
value_ptr = get_or_create_ptr_from_arg(
func, module, value_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
func, compilation_context, value_arg, builder, local_sym_tab
)
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
@ -262,12 +254,10 @@ def bpf_map_update_elem_emitter(
def bpf_map_delete_elem_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_map_delete_elem helper function call.
@ -278,7 +268,7 @@ def bpf_map_delete_elem_emitter(
f"Map delete expects exactly one argument (key), got {len(call.args)}"
)
key_ptr = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
func, compilation_context, call.args[0], builder, local_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -306,12 +296,10 @@ def bpf_map_delete_elem_emitter(
def bpf_get_current_comm_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_comm helper function call.
@ -327,7 +315,7 @@ def bpf_get_current_comm_emitter(
# Extract buffer pointer and size
buf_ptr, buf_size = get_buffer_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab
buf_arg, builder, local_sym_tab, compilation_context
)
# Validate it's a char array
@ -367,12 +355,10 @@ def bpf_get_current_comm_emitter(
def bpf_get_current_pid_tgid_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
@ -394,12 +380,10 @@ def bpf_get_current_pid_tgid_emitter(
def bpf_perf_event_output_handler(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_perf_event_output helper function call.
@ -412,7 +396,9 @@ def bpf_perf_event_output_handler(
data_arg = call.args[0]
ctx_ptr = func.args[0] # First argument to the function is ctx
data_ptr, size_val = get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab)
data_ptr, size_val = get_data_ptr_and_size(
data_arg, local_sym_tab, compilation_context.structs_sym_tab
)
# BPF_F_CURRENT_CPU is -1 in 32 bit
flags_val = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
@ -445,12 +431,10 @@ def bpf_perf_event_output_handler(
def bpf_ringbuf_output_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ringbuf_output helper function call.
@ -461,7 +445,9 @@ def bpf_ringbuf_output_emitter(
f"Ringbuf output expects exactly one argument, got {len(call.args)}"
)
data_arg = call.args[0]
data_ptr, size_val = get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab)
data_ptr, size_val = get_data_ptr_and_size(
data_arg, local_sym_tab, compilation_context.structs_sym_tab
)
flags_val = ir.Constant(ir.IntType(64), 0)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -496,38 +482,32 @@ def bpf_ringbuf_output_emitter(
def handle_output_helper(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Route output helper to the appropriate emitter based on map type.
"""
match map_sym_tab[map_ptr.name].type:
match compilation_context.map_sym_tab[map_ptr.name].type:
case BPFMapType.PERF_EVENT_ARRAY:
return bpf_perf_event_output_handler(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab,
struct_sym_tab,
map_sym_tab,
)
case BPFMapType.RINGBUF:
return bpf_ringbuf_output_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab,
struct_sym_tab,
map_sym_tab,
)
case _:
logger.error("Unsupported map type for output helper.")
@ -572,12 +552,10 @@ def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr):
def bpf_probe_read_kernel_str_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel_str helper."""
@ -588,12 +566,12 @@ def bpf_probe_read_kernel_str_emitter(
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
func, compilation_context, call.args[0], builder, local_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
call.args[1], func, compilation_context, builder, local_sym_tab
)
# Emit the helper call
@ -641,12 +619,10 @@ def emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr):
def bpf_probe_read_kernel_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel helper."""
@ -657,12 +633,12 @@ def bpf_probe_read_kernel_emitter(
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
func, compilation_context, call.args[0], builder, local_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
call.args[1], func, compilation_context, builder, local_sym_tab
)
# Emit the helper call
@ -680,12 +656,10 @@ def bpf_probe_read_kernel_emitter(
def bpf_get_prandom_u32_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_prandom_u32 helper function call.
@ -710,12 +684,10 @@ def bpf_get_prandom_u32_emitter(
def bpf_probe_read_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_probe_read helper function
@ -726,31 +698,25 @@ def bpf_probe_read_emitter(
return
dst_ptr = get_or_create_ptr_from_arg(
func,
module,
compilation_context,
call.args[0],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.IntType(8),
)
size_val = get_int_value_from_arg(
call.args[1],
func,
module,
compilation_context,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
src_ptr = get_or_create_ptr_from_arg(
func,
module,
compilation_context,
call.args[2],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.IntType(8),
)
fn_type = ir.FunctionType(
@ -783,12 +749,10 @@ def bpf_probe_read_emitter(
def bpf_get_smp_processor_id_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_smp_processor_id helper function call.
@ -810,12 +774,10 @@ def bpf_get_smp_processor_id_emitter(
def bpf_get_current_uid_gid_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_uid_gid helper function call.
@ -846,12 +808,10 @@ def bpf_get_current_uid_gid_emitter(
def bpf_skb_store_bytes_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_skb_store_bytes helper function call.
@ -875,30 +835,24 @@ def bpf_skb_store_bytes_emitter(
offset_val = get_int_value_from_arg(
call.args[0],
func,
module,
compilation_context,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
from_ptr = get_or_create_ptr_from_arg(
func,
module,
compilation_context,
call.args[1],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
args_signature[2],
)
len_val = get_int_value_from_arg(
call.args[2],
func,
module,
compilation_context,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
if len(call.args) == 4:
flags_val = get_flags_val(call.args[3], builder, local_sym_tab)
@ -940,12 +894,10 @@ def bpf_skb_store_bytes_emitter(
def bpf_ringbuf_reserve_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ringbuf_reserve helper function call.
@ -960,11 +912,9 @@ def bpf_ringbuf_reserve_emitter(
size_val = get_int_value_from_arg(
call.args[0],
func,
module,
compilation_context,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -991,12 +941,10 @@ def bpf_ringbuf_reserve_emitter(
def bpf_ringbuf_submit_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_ringbuf_submit helper function call.
@ -1013,12 +961,10 @@ def bpf_ringbuf_submit_emitter(
data_ptr = get_or_create_ptr_from_arg(
func,
module,
compilation_context,
data_arg,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.PointerType(ir.IntType(8)),
)
@ -1050,12 +996,10 @@ def bpf_ringbuf_submit_emitter(
def bpf_get_stack_emitter(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_stack helper function call.
@ -1068,7 +1012,7 @@ def bpf_get_stack_emitter(
buf_arg = call.args[0]
flags_arg = call.args[1] if len(call.args) == 2 else None
buf_ptr, buf_size = get_buffer_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab
buf_arg, builder, local_sym_tab, compilation_context
)
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
if isinstance(flags_val, int):
@ -1098,12 +1042,10 @@ def bpf_get_stack_emitter(
def handle_helper_call(
call,
module,
compilation_context,
builder,
func,
local_sym_tab=None,
map_sym_tab=None,
struct_sym_tab=None,
):
"""Process a BPF helper function call and emit the appropriate LLVM IR."""
@ -1117,14 +1059,14 @@ def handle_helper_call(
return handler(
call,
map_ptr,
module,
compilation_context,
builder,
func,
local_sym_tab,
struct_sym_tab,
map_sym_tab,
)
map_sym_tab = compilation_context.map_sym_tab
# Handle direct function calls (e.g., print(), ktime())
if isinstance(call.func, ast.Name):
return invoke_helper(call.func.id)

View File

@ -3,7 +3,6 @@ import logging
from llvmlite import ir
from pythonbpf.expr import (
get_operand_value,
eval_expr,
access_struct_field,
)
@ -11,56 +10,38 @@ from pythonbpf.expr import (
logger = logging.getLogger(__name__)
class ScratchPoolManager:
"""Manage the temporary helper variables in local_sym_tab"""
# NOTE: ScratchPoolManager is now in context.py
def __init__(self):
self._counters = {}
@property
def counter(self):
return sum(self._counters.values())
def get_ptr_from_arg(arg, compilation_context, builder, local_sym_tab):
"""Helper to get a pointer value from an argument."""
# This is a bit duplicative of logic in eval_expr but simplified for helpers
# We might need to handle more cases here or defer to eval_expr
def reset(self):
self._counters.clear()
logger.debug("Scratch pool counter reset to 0")
# Simple check for name
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
sym = local_sym_tab[arg.id]
if isinstance(sym.ir_type, ir.PointerType):
return builder.load(sym.var)
# If it's an array/struct we might need GEP depending on how it was allocated
# For now assume load returns the pointer/value
return builder.load(sym.var)
def _get_type_name(self, ir_type):
if isinstance(ir_type, ir.PointerType):
return "ptr"
elif isinstance(ir_type, ir.IntType):
return f"i{ir_type.width}"
elif isinstance(ir_type, ir.ArrayType):
return f"[{ir_type.count}x{self._get_type_name(ir_type.element)}]"
else:
return str(ir_type).replace(" ", "")
def get_next_temp(self, local_sym_tab, expected_type=None):
# Default to i64 if no expected type provided
type_name = self._get_type_name(expected_type) if expected_type else "i64"
if type_name not in self._counters:
self._counters[type_name] = 0
counter = self._counters[type_name]
temp_name = f"__helper_temp_{type_name}_{counter}"
self._counters[type_name] += 1
if temp_name not in local_sym_tab:
raise ValueError(
f"Scratch pool exhausted or inadequate: {temp_name}. "
f"Type: {type_name} Counter: {counter}"
# Use eval_expr for general case
val = eval_expr(
None,
compilation_context.module,
builder,
arg,
local_sym_tab,
compilation_context.map_sym_tab,
compilation_context.structs_sym_tab,
)
if val and isinstance(val[0].type, ir.PointerType):
return val[0]
logger.debug(f"Using {temp_name} for type {type_name}")
return local_sym_tab[temp_name].var, temp_name
_temp_pool_manager = ScratchPoolManager() # Singleton instance
def reset_scratch_pool():
"""Reset the scratch pool counter"""
_temp_pool_manager.reset()
return None
# ============================================================================
@ -75,11 +56,15 @@ def get_var_ptr_from_name(var_name, local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found in local symbol table")
def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64):
def create_int_constant_ptr(
value, builder, compilation_context, local_sym_tab, int_width=64
):
"""Create a pointer to an integer constant."""
int_type = ir.IntType(int_width)
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab, int_type)
ptr, temp_name = compilation_context.scratch_pool.get_next_temp(
local_sym_tab, int_type
)
logger.info(f"Using temp variable '{temp_name}' for int constant {value}")
const_val = ir.Constant(int_type, value)
builder.store(const_val, ptr)
@ -88,12 +73,10 @@ def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64):
def get_or_create_ptr_from_arg(
func,
module,
compilation_context,
arg,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab=None,
expected_type=None,
):
"""Extract or create pointer from the call arguments."""
@ -102,16 +85,22 @@ def get_or_create_ptr_from_arg(
sz = None
if isinstance(arg, ast.Name):
# Stack space is already allocated
ptr = get_var_ptr_from_name(arg.id, local_sym_tab)
if arg.id in local_sym_tab:
ptr = local_sym_tab[arg.id].var
else:
raise ValueError(f"Variable '{arg.id}' not found")
elif isinstance(arg, ast.Constant) and isinstance(arg.value, int):
int_width = 64 # Default to i64
if expected_type and isinstance(expected_type, ir.IntType):
int_width = expected_type.width
ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab, int_width)
ptr = create_int_constant_ptr(
arg.value, builder, compilation_context, local_sym_tab, int_width
)
elif isinstance(arg, ast.Attribute):
# A struct field
struct_name = arg.value.id
field_name = arg.attr
struct_sym_tab = compilation_context.structs_sym_tab
if not local_sym_tab or struct_name not in local_sym_tab:
raise ValueError(f"Struct '{struct_name}' not found")
@ -136,7 +125,7 @@ def get_or_create_ptr_from_arg(
and field_type.element.width == 8
):
ptr, sz = get_char_array_ptr_and_size(
arg, builder, local_sym_tab, struct_sym_tab, func
arg, builder, local_sym_tab, compilation_context, func
)
if not ptr:
raise ValueError("Failed to get char array pointer from struct field")
@ -146,13 +135,15 @@ def get_or_create_ptr_from_arg(
else:
# NOTE: For any integer expression reaching this branch, it is probably a struct field or a binop
# Evaluate the expression and store the result in a temp variable
val = get_operand_value(
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
val = eval_expr(func, compilation_context, builder, arg, local_sym_tab)
if val:
val = val[0]
if val is None:
raise ValueError("Failed to evaluate expression for helper arg.")
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab, expected_type)
ptr, temp_name = compilation_context.scratch_pool.get_next_temp(
local_sym_tab, expected_type
)
logger.info(f"Using temp variable '{temp_name}' for expression result")
if (
isinstance(val.type, ir.IntType)
@ -188,8 +179,9 @@ def get_flags_val(arg, builder, local_sym_tab):
)
def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab):
def get_data_ptr_and_size(data_arg, local_sym_tab, compilation_context):
"""Extract data pointer and size information for perf event output."""
struct_sym_tab = compilation_context.structs_sym_tab
if isinstance(data_arg, ast.Name):
data_name = data_arg.id
if local_sym_tab and data_name in local_sym_tab:
@ -213,8 +205,9 @@ def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab):
)
def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, compilation_context):
"""Extract buffer pointer and size from either a struct field or variable."""
struct_sym_tab = compilation_context.structs_sym_tab
# Case 1: Struct field (obj.field)
if isinstance(buf_arg, ast.Attribute):
@ -268,9 +261,10 @@ def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
def get_char_array_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab, func=None
buf_arg, builder, local_sym_tab, compilation_context, func=None
):
"""Get pointer to char array and its size."""
struct_sym_tab = compilation_context.structs_sym_tab
# Struct field: obj.field
if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name):
@ -351,34 +345,10 @@ def _is_char_array(ir_type):
)
def get_ptr_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
"""Evaluate argument and return pointer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
if not result:
raise ValueError("Failed to evaluate argument")
val, val_type = result
if not isinstance(val_type, ir.PointerType):
raise ValueError(f"Expected pointer type, got {val_type}")
return val, val_type
def get_int_value_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
def get_int_value_from_arg(arg, func, compilation_context, builder, local_sym_tab):
"""Evaluate argument and return integer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
result = eval_expr(func, compilation_context, builder, arg, local_sym_tab)
if not result:
raise ValueError("Failed to evaluate argument")

View File

@ -23,7 +23,7 @@ def emit_license(module: ir.Module, license_str: str):
return gvar
def license_processing(tree, module):
def license_processing(tree, compilation_context):
"""Process the LICENSE function decorated with @bpf and @bpfglobal and return the section name"""
count = 0
for node in tree.body:
@ -42,12 +42,14 @@ def license_processing(tree, module):
and isinstance(node.body[0].value, ast.Constant)
and isinstance(node.body[0].value.value, str)
):
emit_license(module, node.body[0].value.value)
emit_license(
compilation_context.module, node.body[0].value.value
)
return "LICENSE"
else:
logger.info("ERROR: LICENSE() must return a string literal")
return None
raise SyntaxError(
"ERROR: LICENSE() must return a string literal"
)
else:
logger.info("ERROR: LICENSE already defined")
return None
raise SyntaxError("ERROR: Multiple LICENSE globals defined")
return None

View File

@ -12,14 +12,14 @@ from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
logger: Logger = logging.getLogger(__name__)
def maps_proc(tree, module, chunks, structs_sym_tab):
def maps_proc(tree, compilation_context, chunks):
"""Process all functions decorated with @map to find BPF maps"""
map_sym_tab = {}
map_sym_tab = compilation_context.map_sym_tab
for func_node in chunks:
if is_map(func_node):
logger.info(f"Found BPF map: {func_node.name}")
map_sym_tab[func_node.name] = process_bpf_map(
func_node, module, structs_sym_tab
func_node, compilation_context
)
return map_sym_tab
@ -51,11 +51,11 @@ def create_bpf_map(module, map_name, map_params):
return MapSymbol(type=map_params["type"], sym=map_global, params=map_params)
def _parse_map_params(rval, expected_args=None):
def _parse_map_params(rval, compilation_context, expected_args=None):
"""Parse map parameters from call arguments and keywords."""
params = {}
handler = VmlinuxHandlerRegistry.get_handler()
handler = compilation_context.vmlinux_handler
# Parse positional arguments
if expected_args:
for i, arg_name in enumerate(expected_args):
@ -83,12 +83,23 @@ def _get_vmlinux_enum(handler, name):
if handler and handler.is_vmlinux_enum(name):
return handler.get_vmlinux_enum_value(name)
# Fallback to VmlinuxHandlerRegistry if handler invalid
# This is for backward compatibility or if refactoring isn't complete
if (
VmlinuxHandlerRegistry.get_handler()
and VmlinuxHandlerRegistry.get_handler().is_vmlinux_enum(name)
):
return VmlinuxHandlerRegistry.get_handler().get_vmlinux_enum_value(name)
return None
@MapProcessorRegistry.register("RingBuffer")
def process_ringbuf_map(map_name, rval, module, structs_sym_tab):
def process_ringbuf_map(map_name, rval, compilation_context):
"""Process a BPF_RINGBUF map declaration"""
logger.info(f"Processing Ringbuf: {map_name}")
map_params = _parse_map_params(rval, expected_args=["max_entries"])
map_params = _parse_map_params(
rval, compilation_context, expected_args=["max_entries"]
)
map_params["type"] = BPFMapType.RINGBUF
# NOTE: constraints borrowed from https://docs.ebpf.io/linux/map-type/BPF_MAP_TYPE_RINGBUF/
@ -104,42 +115,62 @@ def process_ringbuf_map(map_name, rval, module, structs_sym_tab):
logger.info(f"Ringbuf map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
map_global = create_bpf_map(compilation_context.module, map_name, map_params)
create_ringbuf_debug_info(
module, map_global.sym, map_name, map_params, structs_sym_tab
compilation_context.module,
map_global.sym,
map_name,
map_params,
compilation_context.structs_sym_tab,
)
return map_global
@MapProcessorRegistry.register("HashMap")
def process_hash_map(map_name, rval, module, structs_sym_tab):
def process_hash_map(map_name, rval, compilation_context):
"""Process a BPF_HASH map declaration"""
logger.info(f"Processing HashMap: {map_name}")
map_params = _parse_map_params(rval, expected_args=["key", "value", "max_entries"])
map_params = _parse_map_params(
rval, compilation_context, expected_args=["key", "value", "max_entries"]
)
map_params["type"] = BPFMapType.HASH
logger.info(f"Map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
map_global = create_bpf_map(compilation_context.module, map_name, map_params)
# Generate debug info for BTF
create_map_debug_info(module, map_global.sym, map_name, map_params, structs_sym_tab)
create_map_debug_info(
compilation_context.module,
map_global.sym,
map_name,
map_params,
compilation_context.structs_sym_tab,
)
return map_global
@MapProcessorRegistry.register("PerfEventArray")
def process_perf_event_map(map_name, rval, module, structs_sym_tab):
def process_perf_event_map(map_name, rval, compilation_context):
"""Process a BPF_PERF_EVENT_ARRAY map declaration"""
logger.info(f"Processing PerfEventArray: {map_name}")
map_params = _parse_map_params(rval, expected_args=["key_size", "value_size"])
map_params = _parse_map_params(
rval, compilation_context, expected_args=["key_size", "value_size"]
)
map_params["type"] = BPFMapType.PERF_EVENT_ARRAY
logger.info(f"Map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
map_global = create_bpf_map(compilation_context.module, map_name, map_params)
# Generate debug info for BTF
create_map_debug_info(module, map_global.sym, map_name, map_params, structs_sym_tab)
create_map_debug_info(
compilation_context.module,
map_global.sym,
map_name,
map_params,
compilation_context.structs_sym_tab,
)
return map_global
def process_bpf_map(func_node, module, structs_sym_tab):
def process_bpf_map(func_node, compilation_context):
"""Process a BPF map (a function decorated with @map)"""
map_name = func_node.name
logger.info(f"Processing BPF map: {map_name}")
@ -158,9 +189,9 @@ def process_bpf_map(func_node, module, structs_sym_tab):
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
handler = MapProcessorRegistry.get_processor(rval.func.id)
if handler:
return handler(map_name, rval, module, structs_sym_tab)
return handler(map_name, rval, compilation_context)
else:
logger.warning(f"Unknown map type {rval.func.id}, defaulting to HashMap")
return process_hash_map(map_name, rval, module)
return process_hash_map(map_name, rval, compilation_context)
else:
raise ValueError("Function under @map must return a map")

View File

@ -14,14 +14,17 @@ logger = logging.getLogger(__name__)
# Shall we just int64, int32 and uint32 similarly?
def structs_proc(tree, module, chunks):
def structs_proc(tree, compilation_context, chunks):
"""Process all class definitions to find BPF structs"""
structs_sym_tab = {}
# Use the context's symbol table
structs_sym_tab = compilation_context.structs_sym_tab
for cls_node in chunks:
if is_bpf_struct(cls_node):
logger.info(f"Found BPF struct: {cls_node.name}")
struct_info = process_bpf_struct(cls_node, module)
struct_info = process_bpf_struct(cls_node, compilation_context)
structs_sym_tab[cls_node.name] = struct_info
return structs_sym_tab
@ -32,7 +35,7 @@ def is_bpf_struct(cls_node):
)
def process_bpf_struct(cls_node, module):
def process_bpf_struct(cls_node, compilation_context):
"""Process a single BPF struct definition"""
fields = parse_struct_fields(cls_node)