90 Commits

Author SHA1 Message Date
45d85c416f BCC-Examples: Fix returns in disksnoop.py 2026-01-29 22:33:07 +05:30
cb18ab67d9 docs: Add docs badge to README 2026-01-29 12:06:22 +05:30
d1872bc868 docs: Add readthedocs workflow file 2026-01-29 11:58:31 +05:30
2c2ed473d8 Merge pull request #80 from pythonbpf/copilot/create-documentation-project
Add comprehensive Sphinx documentation with MyST-Parser
2026-01-29 11:38:44 +05:30
b6ecec9889 docs: Format requirements 2026-01-29 11:37:50 +05:30
4f56f8c426 docs: Exclude README.md from toctree 2026-01-29 11:35:24 +05:30
3bff930e98 docs: Include cross-references to BCC-Examples 2026-01-29 11:31:25 +05:30
036830c200 docs: Fix API reference 2026-01-29 03:13:49 +05:30
aded125cba docs: Fix helpers and maps guide 2026-01-29 02:54:46 +05:30
581269e52b docs: Fix user-guide/compilation 2026-01-28 16:34:48 +05:30
8bfd998863 docs: Fix user-guide/bpf-structs 2026-01-28 04:12:35 +05:30
a31ef3997a docs: Fix user-guide/maps.md 2026-01-27 12:59:22 +05:30
217e760e98 docs: Fix decorators page in user-guide 2026-01-27 02:16:03 +05:30
06c81ae55e docs: fix TC section in decorators 2026-01-27 01:58:06 +05:30
9131d044dc docs: fix user-guide index 2026-01-26 08:45:56 +05:30
2840a5c101 docs: fix common issues section of quickstart 2026-01-25 13:31:21 +05:30
9ff33229a0 docs: fix type hints misconception in quickstart 2026-01-25 13:25:45 +05:30
2e95b77ceb docs: Fix quickstart and add alternative compile option 2026-01-25 13:23:09 +05:30
c6aa1077de docs: remove unnecessary quick navigation from index 2026-01-25 04:04:27 +05:30
220adaf011 docs: remove unnecessary c_int64 calls from quickstart guide 2026-01-23 04:01:31 +05:30
92162e5cb4 docs: Fix copyright year and authors in conf.py 2026-01-23 03:21:29 +05:30
e0251a05bf docs: remove redundant c_int64 calls from quickstart 2026-01-23 03:17:19 +05:30
f03c08703a docs: Add more context for vmlinux in getting-started/installation 2026-01-23 02:47:44 +05:30
4edfb18609 docs: Remove unnecessary note from getting-started/installation 2026-01-23 02:42:28 +05:30
c58483ab81 Fix documentation: correct comm() usage, XDP types, copyright year, and add uv support
Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com>
2026-01-21 23:10:17 +00:00
2d8c6c144c docs: Fix links and fluff in getting-started/index.md 2026-01-22 04:31:11 +05:30
c1f32a2839 docs: remove unnecessary fluff from README 2026-01-22 03:30:56 +05:30
b6d8b71308 Remove unnecessary make.bat 2026-01-22 03:28:27 +05:30
ab881772af docs: Fix features and video link in index.md 2026-01-22 03:27:10 +05:30
b03924836e docs: remove c_int64 helper usage in return statement 2026-01-22 03:17:41 +05:30
4ab1e26b92 docs: Remove 'not for production' advice 2026-01-22 03:15:13 +05:30
73f6e83445 Add .gitignore for docs build artifacts and docs README
Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com>
2026-01-20 22:07:36 +00:00
c1e90b9d46 Add comprehensive Sphinx documentation structure and content
Co-authored-by: r41k0u <76248539+r41k0u@users.noreply.github.com>
2026-01-20 22:06:00 +00:00
917d386d33 Initial plan 2026-01-20 21:55:17 +00:00
ef128f3752 Merge pull request #79 from pythonbpf/dependabot/github_actions/actions-c2e7f7cad0
Bump the actions group with 2 updates
2025-12-25 17:40:36 +05:30
b92208ed0d Bump the actions group with 2 updates
Bumps the actions group with 2 updates: [actions/upload-artifact](https://github.com/actions/upload-artifact) and [actions/download-artifact](https://github.com/actions/download-artifact).


Updates `actions/upload-artifact` from 5 to 6
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v5...v6)

Updates `actions/download-artifact` from 6 to 7
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](https://github.com/actions/download-artifact/compare/v6...v7)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
- dependency-name: actions/download-artifact
  dependency-version: '7'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-15 10:24:00 +00:00
2bd8e73724 Add symbolization example 2025-12-09 00:36:50 +05:30
641f8bacbe Merge pull request #78 from pythonbpf/kfunc
move examples to the correct directory
2025-12-08 21:41:52 +05:30
749b06020d move examples to examples folder 2025-12-08 21:39:50 +05:30
0ce5add39b compact the disksnoop.py example
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-12-08 16:23:44 +05:30
d0e2360f46 Add anomaly-detection example 2025-12-02 04:01:41 +05:30
049ec55e85 bump version
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-11-30 05:55:22 +05:30
77901accf2 fix xdp test c form test
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-11-30 05:53:32 +05:30
0616a2fccb Add requirements.txt for BCC-Examples 2025-11-30 05:39:58 +05:30
526425a267 Add command to copy vmlinux.py for container-monitor 2025-11-30 05:37:15 +05:30
466ecdb6a4 Update Python package installation in README 2025-11-30 05:36:16 +05:30
752a10fa5f Add installation instructions to README
Added installation instructions and dependencies section to README.
2025-11-30 05:35:50 +05:30
3602b502f4 Add steps to run container-example in BCC-Examples/README.md 2025-11-30 05:34:08 +05:30
808db2722d Add instructions to run examples in README 2025-11-30 05:27:12 +05:30
99fc5d75cc Refactor disksnoop.py to remove logging and main block
Removed logging setup and main execution block from disksnoop.py.
2025-11-30 04:53:53 +05:30
c91e69e2f7 Add bpftool to installation dependencies 2025-11-30 04:35:15 +05:30
dc995a1448 Fix variable initialization in BPF tracepoint example 2025-11-30 04:31:39 +05:30
0fd6bea211 Fix return value in README example 2025-11-30 04:29:31 +05:30
01d234ac86 Merge pull request #77 from pythonbpf/fix-vmlinux-ir-gen
Add a web dashboard to container monitor
2025-11-28 22:12:47 +05:30
c97efb2570 change web version 2025-11-28 22:11:41 +05:30
76c982e15e Add a web dashboard 2025-11-28 21:30:41 +05:30
2543826e85 Merge pull request #75 from pythonbpf/fix-vmlinux-ir-gen
add container-monitor example
2025-11-28 21:12:45 +05:30
650744f843 beautify container monitor 2025-11-28 21:10:39 +05:30
d73c793989 format chore 2025-11-28 21:02:29 +05:30
bbe4990878 add container-monitor example 2025-11-28 21:02:29 +05:30
600993f626 add syscall monitor 2025-11-28 21:02:28 +05:30
6c55d56ef0 add file io and network stats in container monitor example 2025-11-28 21:02:28 +05:30
704b0d8cd3 Fix debug info generation of PerfEventArray maps 2025-11-28 21:02:27 +05:30
0e50079d88 Add passing test struct_pylib.py 2025-11-28 21:02:27 +05:30
d457f87410 Bump actions/checkout from 5 to 6 in the actions group
Bumps the actions group with 1 update: [actions/checkout](https://github.com/actions/checkout).


Updates `actions/checkout` from 5 to 6
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-28 21:02:26 +05:30
4ea02745b3 Janitorial: Remove useless comments 2025-11-28 21:02:26 +05:30
84edddb685 Fix passing test hash_map_struct to include char array test 2025-11-28 21:02:25 +05:30
6f017a9176 Unify struct and pointer to struct handling, abstract null check in ir_ops 2025-11-28 21:02:25 +05:30
24e5829b80 format chore 2025-11-28 14:52:45 +05:30
2daedc5882 Fix debug info generation of PerfEventArray maps 2025-11-28 14:50:40 +05:30
14af7ec4dd add file_io.bpf.py to make container-monitor 2025-11-28 14:47:29 +05:30
536ea4855e add cgroup helper 2025-11-28 14:47:01 +05:30
5ba29db362 fix the c form of the xdp program 2025-11-27 23:44:44 +05:30
0ca835079d Merge pull request #74 from pythonbpf/fix-vmlinux-ir-gen
Fix some issues with vmlinux struct usage in syntax
TODO: THIS DOES NOT FIX the XDP example. 
The issue with the XDP example here is that the range is not being tracked due to multiple loads from stack which is making the verifier lose track of the range on that value.
2025-11-27 23:03:45 +05:30
de8c486461 fix: remove deref_to_depth on single depth pointers 2025-11-27 22:59:34 +05:30
f135cdbcc0 format chore 2025-11-27 14:03:12 +05:30
a8595ff1d2 feat: allocate tmp variable for pointer to vmlinux struct field access. 2025-11-27 14:02:00 +05:30
d43d3ad637 clear disksnoop output 2025-11-27 12:45:48 +05:30
9becee8f77 add expected type check 2025-11-27 12:44:12 +05:30
189526d5ca format chore 2025-11-27 12:42:10 +05:30
1593b7bcfe feat:user defined struct casting 2025-11-27 12:41:57 +05:30
127852ee9f Add passing test struct_pylib.py 2025-11-27 04:45:34 +05:30
4905649700 feat:non struct field values can be cast 2025-11-26 14:18:40 +05:30
7b7b00dbe7 add disksnoop ipynb 2025-11-25 22:51:24 +05:30
102e4ca78c add disksnoop example 2025-11-24 22:50:39 +05:30
2fd4fefbcc Merge pull request #73 from pythonbpf/dependabot/github_actions/actions-76468cb07f
Bump actions/checkout from 5 to 6 in the actions group
2025-11-24 17:15:55 +05:30
016fd5de5c Bump actions/checkout from 5 to 6 in the actions group
Bumps the actions group with 1 update: [actions/checkout](https://github.com/actions/checkout).


Updates `actions/checkout` from 5 to 6
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 11:44:07 +00:00
8ad5fb8a3a Janitorial: Remove useless comments 2025-11-23 06:29:44 +05:30
bf9635e324 Fix passing test hash_map_struct to include char array test 2025-11-23 06:27:01 +05:30
cbe365d760 Unify struct and pointer to struct handling, abstract null check in ir_ops 2025-11-23 06:26:09 +05:30
61 changed files with 8386 additions and 174 deletions

View File

@ -12,7 +12,7 @@ jobs:
name: Format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: "3.x"

View File

@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
@ -33,7 +33,7 @@ jobs:
python -m build
- name: Upload distributions
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v6
with:
name: release-dists
path: dist/
@ -59,7 +59,7 @@ jobs:
steps:
- name: Retrieve release distributions
uses: actions/download-artifact@v6
uses: actions/download-artifact@v7
with:
name: release-dists
path: dist/

4
.gitignore vendored
View File

@ -10,3 +10,7 @@ __pycache__/
vmlinux.py
~*
vmlinux.h
# Documentation build artifacts
docs/_build/
docs/_templates/

21
.readthedocs.yaml Normal file
View File

@ -0,0 +1,21 @@
# Read the Docs configuration file for Sphinx
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2
# Set the OS, Python version, and other tools you might need
build:
os: ubuntu-24.04
tools:
python: "3.12"
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
# Optionally, but recommended, declare the Python requirements
python:
install:
- requirements: docs/requirements.txt
- method: pip
path: .

View File

@ -7,14 +7,25 @@ This folder contains examples of BCC tutorial examples that have been ported to
- You will also need `matplotlib` for vfsreadlat.py example.
- You will also need `rich` for vfsreadlat_rich.py example.
- You will also need `plotly` and `dash` for vfsreadlat_plotly.py example.
- All of these are added to `requirements.txt` file. You can install them using the following command:
```bash
pip install -r requirements.txt
```
## Usage
- You'll need root privileges to run these examples. If you are using a virtualenv, use the following command to run the scripts:
```bash
sudo <path_to_virtualenv>/bin/python3 <script_name>.py
```
- For the disksnoop and container-monitor examples, you need to generate the vmlinux.py file first. Follow the instructions in the [main README](https://github.com/pythonbpf/Python-BPF/tree/master?tab=readme-ov-file#first-generate-the-vmlinuxpy-file-for-your-kernel) to generate the vmlinux.py file.
- For vfsreadlat_plotly.py, run the following command to start the Dash server:
```bash
sudo <path_to_virtualenv>/bin/python3 vfsreadlat_plotly/bpf_program.py
```
Then open your web browser and navigate to the given URL.
- For container-monitor, you need to first copy the vmlinux.py to `container-monitor/` directory.
Then run the following command to run the example:
```bash
cp vmlinux.py container-monitor/
sudo <path_to_virtualenv>/bin/python3 container-monitor/container_monitor.py
```

View File

@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "c3520e58-e50f-4bc1-8f9d-a6fecbf6e9f0",
"metadata": {},
"outputs": [],
"source": [
"from vmlinux import struct_request, struct_pt_regs\n",
"from pythonbpf import bpf, section, bpfglobal, map, BPF\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap\n",
"from ctypes import c_int64, c_uint64, c_int32\n",
"\n",
"REQ_WRITE = 1\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def start() -> HashMap:\n",
" return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"kprobe/blk_mq_end_request\")\n",
"def trace_completion(ctx: struct_pt_regs) -> c_int64:\n",
" # Get request pointer from first argument\n",
" req_ptr = ctx.di\n",
" req = struct_request(ctx.di)\n",
" # Print: data_len, cmd_flags, latency_us\n",
" data_len = req.__data_len\n",
" cmd_flags = req.cmd_flags\n",
" # Lookup start timestamp\n",
" req_tsp = start.lookup(req_ptr)\n",
" if req_tsp:\n",
" # Calculate delta in nanoseconds\n",
" delta = ktime() - req_tsp\n",
"\n",
" # Convert to microseconds for printing\n",
" delta_us = delta // 1000\n",
"\n",
" print(f\"{data_len} {cmd_flags:x} {delta_us}\\n\")\n",
"\n",
" # Delete the entry\n",
" start.delete(req_ptr)\n",
"\n",
" return c_int64(0)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"kprobe/blk_mq_start_request\")\n",
"def trace_start(ctx1: struct_pt_regs) -> c_int32:\n",
" req = ctx1.di\n",
" ts = ktime()\n",
" start.update(req, ts)\n",
" return c_int32(0)\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"b = BPF()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97040f73-98e0-4993-94c6-125d1b42d931",
"metadata": {},
"outputs": [],
"source": [
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1bd4f51-fa25-42e1-877c-e48a2605189f",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import trace_pipe"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96b4b59b-b0db-4952-9534-7a714f685089",
"metadata": {},
"outputs": [],
"source": [
"trace_pipe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

48
BCC-Examples/disksnoop.py Normal file
View File

@ -0,0 +1,48 @@
from ctypes import c_int32, c_int64, c_uint64
from vmlinux import struct_pt_regs, struct_request
from pythonbpf import bpf, bpfglobal, compile, map, section
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap
@bpf
@map
def start() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@section("kprobe/blk_mq_end_request")
def trace_completion(ctx: struct_pt_regs) -> c_int64:
req_ptr = ctx.di
req = struct_request(ctx.di)
data_len = req.__data_len
cmd_flags = req.cmd_flags
req_tsp = start.lookup(req_ptr)
if req_tsp:
delta = ktime() - req_tsp
delta_us = delta // 1000
print(f"{data_len} {cmd_flags:x} {delta_us}\n")
start.delete(req_ptr)
return 0 # type: ignore [return-value]
@bpf
@section("kprobe/blk_mq_start_request")
def trace_start(ctx1: struct_pt_regs) -> c_int32:
req = ctx1.di
ts = ktime()
start.update(req, ts)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,9 @@
# =============================================================================
# Requirements for PythonBPF BCC-Examples
# =============================================================================
dash
matplotlib
numpy
plotly
rich

View File

@ -19,6 +19,8 @@
<a href="https://pepy.tech/project/pythonbpf"><img src="https://pepy.tech/badge/pythonbpf" alt="Downloads"></a>
<!-- Build & CI -->
<a href="https://github.com/pythonbpf/python-bpf/actions"><img src="https://github.com/pythonbpf/python-bpf/actions/workflows/python-publish.yml/badge.svg" alt="Build Status"></a>
<!-- Documentation -->
<a href="https://python-bpf.readthedocs.io/en/latest/?badge=latest"><img src="https://readthedocs.org/projects/python-bpf/badge/?version=latest" alt="Documentation Status"></a>
<!-- Meta -->
<a href="https://github.com/pythonbpf/python-bpf/blob/main/LICENSE"><img src="https://img.shields.io/github/license/pythonbpf/python-bpf" alt="License"></a>
</p>
@ -40,16 +42,11 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses
---
## Try It Out!
Run
```bash
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
```
## Installation
Dependencies:
* `bpftool`
* `clang`
* Python ≥ 3.8
@ -61,6 +58,38 @@ pip install pythonbpf pylibbpf
---
## Try It Out!
#### First, generate the vmlinux.py file for your kernel:
- Install the required dependencies:
- On Ubuntu:
```bash
sudo apt-get install bpftool clang
pip install pythonbpf pylibbpf ctypeslib2
```
- Generate the `vmlinux.py` using:
```bash
sudo tools/vmlinux-gen.py
```
- Copy this file to `BCC-Examples/`
#### Next, install requirements for BCC-Examples:
- These requirements are only required for the python notebooks, vfsreadlat and container-monitor examples.
```bash
pip install -r BCC-Examples/requirements.txt
```
- Now, follow the instructions in the [BCC-Examples/README.md](https://github.com/pythonbpf/Python-BPF/blob/master/BCC-Examples/README.md) to run the examples.
#### To spin up jupyter notebook examples:
- Run and follow the instructions on screen
```bash
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
```
- Check the jupyter server on the web browser and run the notebooks in the `BCC-Examples/` folder.
---
## Example Usage
```python
@ -88,16 +117,15 @@ def hist() -> HashMap:
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
process_id = pid()
one = 1
prev = hist.lookup(process_id)
if prev:
previous_value = prev + 1
print(f"count: {previous_value} with {process_id}")
hist.update(process_id, previous_value)
return c_int64(0)
return 0
else:
hist.update(process_id, one)
return c_int64(0)
hist.update(process_id, 1)
return 0
@bpf

405
blazesym-example/Cargo.lock generated Normal file
View File

@ -0,0 +1,405 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "anstream"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "blazesym"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace0ab71bbe9a25cb82f6d0e513ae11aebd1a38787664475bb2ed5cbe2329736"
dependencies = [
"cpp_demangle",
"gimli",
"libc",
"memmap2",
"miniz_oxide",
"rustc-demangle",
]
[[package]]
name = "cc"
version = "1.2.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "clap"
version = "4.5.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "colorchoice"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "cpp_demangle"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def"
dependencies = [
"cfg-if",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "find-msvc-tools"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
[[package]]
name = "gimli"
version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
dependencies = [
"fallible-iterator",
"indexmap",
"stable_deref_trait",
]
[[package]]
name = "hashbrown"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indexmap"
version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "libbpf-rs"
version = "0.24.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93edd9cd673087fa7518fd63ad6c87be2cd9b4e35034b1873f3e3258c018275b"
dependencies = [
"bitflags",
"libbpf-sys",
"libc",
"vsprintf",
]
[[package]]
name = "libbpf-sys"
version = "1.6.2+v1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba0346fc595fa2c8e274903e8a0e3ed5e6a29183af167567f6289fd3b116881b"
dependencies = [
"cc",
"nix",
"pkg-config",
]
[[package]]
name = "libc"
version = "0.2.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
[[package]]
name = "memmap2"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
dependencies = [
"libc",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "nix"
version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags",
"cfg-if",
"cfg_aliases",
"libc",
]
[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "proc-macro2"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
dependencies = [
"unicode-ident",
]
[[package]]
name = "blazesym-example"
version = "0.1.0"
dependencies = [
"anyhow",
"blazesym",
"clap",
"libbpf-rs",
"libc",
"plain",
]
[[package]]
name = "quote"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc-demangle"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "simd-adler32"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.110"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "vsprintf"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aec2f81b75ca063294776b4f7e8da71d1d5ae81c2b1b149c8d89969230265d63"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]

View File

@ -0,0 +1,14 @@
[package]
name = "blazesym-example"
version = "0.1.0"
edition = "2024"
[dependencies]
libbpf-rs = "0.24"
blazesym = "0.2.0-rc.4"
anyhow = "1.0"
clap = { version = "4.5", features = ["derive"] }
libc = "0.2"
plain = "0.2"
[build-dependencies]

View File

@ -0,0 +1,333 @@
// src/main.rs - Fixed imports and error handling
use std::mem;
use std::path::PathBuf;
use std::time::Duration;
use anyhow::{anyhow, Context, Result};
use blazesym::symbolize::{CodeInfo, Input, Symbolized, Symbolizer};
use blazesym::symbolize::source::{Source, Kernel, Process};
use clap::Parser;
use libbpf_rs::{MapCore, ObjectBuilder, RingBufferBuilder}; // Added MapCore
// Match your Python struct exactly
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct ExecEvent {
pid: i64,
cpu: i32,
timestamp: i64,
comm: [u8; 16],
kstack_sz: i64,
ustack_sz: i64,
kstack: [u8; 128], // str(128) in Python
ustack: [u8; 128], // str(128) in Python
}
unsafe impl plain::Plain for ExecEvent {}
// Define perf_event constants (not in libc on all platforms)
const PERF_TYPE_HARDWARE: u32 = 0;
const PERF_TYPE_SOFTWARE: u32 = 1;
const PERF_COUNT_HW_CPU_CYCLES: u64 = 0;
const PERF_COUNT_SW_CPU_CLOCK: u64 = 0;
#[repr(C)]
struct PerfEventAttr {
type_: u32,
size: u32,
config: u64,
sample_period_or_freq: u64,
sample_type: u64,
read_format: u64,
flags: u64,
// ... rest can be zeroed
_padding: [u64; 64],
}
#[derive(Parser, Debug)]
struct Args {
/// Path to the BPF object file
#[arg(default_value = "stack_traces.o")]
object_file: PathBuf,
/// Sampling frequency
#[arg(short, long, default_value_t = 50)]
freq: u64,
/// Use software events
#[arg(long)]
sw_event: bool,
/// Verbose output
#[arg(short, long)]
verbose: bool,
}
fn open_perf_event(cpu: i32, freq: u64, sw_event: bool) -> Result<i32> {
let mut attr: PerfEventAttr = unsafe { mem::zeroed() };
attr.size = mem::size_of::<PerfEventAttr>() as u32;
attr.type_ = if sw_event {
PERF_TYPE_SOFTWARE
} else {
PERF_TYPE_HARDWARE
};
attr.config = if sw_event {
PERF_COUNT_SW_CPU_CLOCK
} else {
PERF_COUNT_HW_CPU_CYCLES
};
// Use frequency-based sampling
attr.sample_period_or_freq = freq;
attr.flags = 1 << 10; // freq = 1, disabled = 1
let fd = unsafe {
libc::syscall(
libc::SYS_perf_event_open,
&attr as *const _,
-1, // pid = -1 (all processes)
cpu, // cpu
-1, // group_fd
0, // flags
)
};
if fd < 0 {
Err(anyhow!("Failed to open perf event on CPU {}: {}", cpu,
std::io::Error::last_os_error()))
} else {
Ok(fd as i32)
}
}
fn print_stack_trace(
addrs: &[u64],
symbolizer: &Symbolizer,
pid: u32,
is_kernel: bool,
) {
if addrs.is_empty() {
return;
}
let src = if is_kernel {
Source::Kernel(Kernel::default())
} else {
Source::Process(Process::new(pid.into()))
};
let syms = match symbolizer.symbolize(&src, Input::AbsAddr(addrs)) {
Ok(syms) => syms,
Err(e) => {
eprintln!(" Failed to symbolize: {}", e);
for addr in addrs {
println!("0x{:016x}: <no-symbol>", addr);
}
return;
}
};
for (addr, sym) in addrs.iter().zip(syms.iter()) {
match sym {
Symbolized::Sym(sym_info) => {
print!("0x{:016x}: {} @ 0x{:x}+0x{:x}",
addr, sym_info.name, sym_info.addr, sym_info.offset);
if let Some(ref code_info) = sym_info.code_info {
print_code_info(code_info);
}
println!();
// Print inlined frames
for inlined in &sym_info.inlined {
print!(" {} (inlined)", inlined.name);
if let Some(ref code_info) = inlined.code_info {
print_code_info(code_info);
}
println!();
}
}
Symbolized::Unknown(..) => {
println!("0x{:016x}: <no-symbol>", addr);
}
}
}
}
fn print_code_info(code_info: &CodeInfo) {
let path = code_info.to_path();
let path_str = path.display();
match (code_info.line, code_info.column) {
(Some(line), Some(col)) => print!(" {}:{}:{}", path_str, line, col),
(Some(line), None) => print!(" {}:{}", path_str, line),
(None, _) => print!(" {}", path_str),
}
}
fn handle_event(symbolizer: &Symbolizer, data: &[u8]) -> i32 {
let event = plain::from_bytes::<ExecEvent>(data).expect("Invalid event data");
// Extract comm string
let comm = std::str::from_utf8(&event.comm)
.unwrap_or("<unknown>")
.trim_end_matches('\0');
println!("[{:.9}] COMM: {} (pid={}) @ CPU {}",
event.timestamp as f64 / 1_000_000_000.0,
comm,
event.pid,
event.cpu);
// Handle kernel stack
if event.kstack_sz > 0 {
println!("Kernel:");
let num_frames = (event.kstack_sz / 8) as usize;
let kstack_u64 = unsafe {
std::slice::from_raw_parts(
event.kstack.as_ptr() as *const u64,
num_frames.min(16),
)
};
// Filter out zero addresses
let kstack: Vec<u64> = kstack_u64.iter()
.copied()
.take_while(|&addr| addr != 0)
.collect();
print_stack_trace(&kstack, symbolizer, 0, true);
} else {
println!("No Kernel Stack");
}
// Handle user stack
if event.ustack_sz > 0 {
println!("Userspace:");
let num_frames = (event.ustack_sz / 8) as usize;
let ustack_u64 = unsafe {
std::slice::from_raw_parts(
event.ustack.as_ptr() as *const u64,
num_frames.min(16),
)
};
// Filter out zero addresses
let ustack: Vec<u64> = ustack_u64.iter()
.copied()
.take_while(|&addr| addr != 0)
.collect();
print_stack_trace(&ustack, symbolizer, event.pid as u32, false);
} else {
println!("No Userspace Stack");
}
println!();
0
}
fn main() -> Result<()> {
let args = Args::parse();
if !args.object_file.exists() {
return Err(anyhow!("Object file not found: {:?}", args.object_file));
}
println!("Loading BPF object: {:?}", args.object_file);
// Load BPF object
let mut obj_builder = ObjectBuilder::default();
obj_builder.debug(args.verbose);
let open_obj = obj_builder
.open_file(&args.object_file)
.context("Failed to open BPF object")?;
let mut obj = open_obj.load().context("Failed to load BPF object")?;
println!("✓ BPF object loaded");
// Find the program
let prog = obj
.progs_mut()
.find(|p| p.name() == "trace_exec_enter")
.ok_or_else(|| anyhow!("Program 'trace_exec_enter' not found"))?;
println!("✓ Found program: trace_exec_enter");
// Find the map
let map = obj
.maps()
.find(|m| m.name() == "exec_events")
.ok_or_else(|| anyhow!("Map 'exec_events' not found"))?;
println!("✓ Found map: exec_events");
// Get number of CPUs
let num_cpus = libbpf_rs::num_possible_cpus()?;
println!("✓ Detected {} CPUs\n", num_cpus);
// Open perf events and attach BPF program
println!("Setting up perf events...");
let mut links = Vec::new();
for cpu in 0..num_cpus {
match open_perf_event(cpu as i32, args.freq, args.sw_event) {
Ok(perf_fd) => {
match prog.attach_perf_event(perf_fd) {
Ok(link) => {
links.push(link);
if args.verbose {
println!(" ✓ Attached to CPU {}", cpu);
}
}
Err(e) => {
eprintln!(" ✗ Failed to attach to CPU {}: {}", cpu, e);
unsafe { libc::close(perf_fd); }
}
}
}
Err(e) => {
if args.verbose {
eprintln!(" ✗ Failed to open perf event on CPU {}: {}", cpu, e);
}
}
}
}
println!("✓ Attached to {} CPUs\n", links.len());
if links.is_empty() {
return Err(anyhow!("Failed to attach to any CPU"));
}
// Initialize symbolizer
let symbolizer = Symbolizer::new();
// Set up ring buffer
let mut builder = RingBufferBuilder::new();
builder.add(&map, move |data: &[u8]| -> i32 {
handle_event(&symbolizer, data)
})?;
let ringbuf = builder.build()?;
println!("========================================");
println!("Profiling started. Press Ctrl+C to stop.");
println!("========================================\n");
// Poll for events - just keep polling until error
loop {
if let Err(e) = ringbuf.poll(Duration::from_millis(100)) {
// Any error breaks the loop (including Ctrl+C)
eprintln!("\nStopping: {}", e);
break;
}
}
println!("Done.");
Ok(())
}

View File

@ -0,0 +1,49 @@
# tests/passing_tests/ringbuf_advanced.py
from pythonbpf import bpf, map, section, bpfglobal, struct, compile
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import ktime, pid, smp_processor_id, comm, get_stack
from ctypes import c_void_p, c_int32, c_int64
import logging
@bpf
@struct
class exec_event:
pid: c_int64
cpu: c_int32
timestamp: c_int64
comm: str(16) # type: ignore [valid-type]
kstack_sz: c_int64
ustack_sz: c_int64
kstack: str(128) # type: ignore [valid-type]
ustack: str(128) # type: ignore [valid-type]
@bpf
@map
def exec_events() -> RingBuffer:
return RingBuffer(max_entries=1048576)
@bpf
@section("perf_event")
def trace_exec_enter(ctx: c_void_p) -> c_int64:
evt = exec_event()
evt.pid = pid()
evt.cpu = smp_processor_id()
evt.timestamp = ktime()
comm(evt.comm)
evt.kstack_sz = get_stack(evt.kstack)
evt.ustack_sz = get_stack(evt.ustack, 256)
exec_events.output(evt)
print(f"Submitted exec_event for pid: {evt.pid}, cpu: {evt.cpu}")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile(logging.INFO)

20
docs/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

52
docs/README.md Normal file
View File

@ -0,0 +1,52 @@
# PythonBPF Documentation
This directory contains the Sphinx documentation for PythonBPF.
## Building the Documentation
### Prerequisites
Install the documentation dependencies:
**Using uv (recommended):**
```bash
uv pip install -r requirements.txt
# Or install the optional docs dependencies
uv pip install pythonbpf[docs]
```
**Using pip:**
```bash
pip install -r requirements.txt
# Or install the optional docs dependencies
pip install pythonbpf[docs]
```
### Build HTML Documentation
```bash
make html
```
The generated documentation will be in `_build/html/`. Open `_build/html/index.html` in a browser to view.
### Other Build Formats
```bash
make latexpdf # Build PDF documentation
make epub # Build ePub format
make clean # Clean build artifacts
```
## Documentation Structure
- `index.md` - Main landing page
- `getting-started/` - Installation and quick start guides
- `user-guide/` - Comprehensive user documentation
- `api/` - API reference documentation
- `conf.py` - Sphinx configuration
- `_static/` - Static files (images, CSS, etc.)
## Writing Documentation
Documentation is written in Markdown using [MyST-Parser](https://myst-parser.readthedocs.io/). See the existing files for examples.

0
docs/_static/.gitkeep vendored Normal file
View File

471
docs/api/index.md Normal file
View File

@ -0,0 +1,471 @@
# API Reference
This section provides detailed API documentation for all PythonBPF modules, classes, and functions.
## Module Overview
PythonBPF is organized into several modules:
* `pythonbpf` - Main module with decorators and compilation functions
* `pythonbpf.maps` - BPF map types
* `pythonbpf.helper` - BPF helper functions
* `pythonbpf.structs` - Struct type handling
* `pythonbpf.codegen` - Code generation and compilation
## Public API
The main `pythonbpf` module exports the following public API:
```python
from pythonbpf import (
# Decorators
bpf,
map,
section,
bpfglobal,
struct,
# Compilation
compile_to_ir,
compile,
BPF,
# Utilities
trace_pipe,
trace_fields,
)
```
## Decorators
```{eval-rst}
.. automodule:: pythonbpf.decorators
:members:
:undoc-members:
:show-inheritance:
```
### bpf
```python
@bpf
def my_function():
pass
```
Decorator to mark a function or class for BPF compilation. Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler.
**See also:** {doc}`../user-guide/decorators`
### map
```python
@bpf
@map
def my_map() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
```
Decorator to mark a function as a BPF map definition. The function must return a map type.
**See also:** {doc}`../user-guide/maps`
### section
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
return c_int64(0)
```
Decorator to specify which kernel hook to attach the BPF program to.
**Parameters:**
* `name` (str) - The section name (e.g., "tracepoint/...", "kprobe/...", "xdp")
**See also:** {doc}`../user-guide/decorators`
### bpfglobal
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
Decorator to mark a function as a BPF global variable definition.
**See also:** {doc}`../user-guide/decorators`
### struct
```python
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
```
Decorator to mark a class as a BPF struct definition.
**See also:** {doc}`../user-guide/structs`
## Compilation Functions
```{eval-rst}
.. automodule:: pythonbpf.codegen
:members: compile_to_ir, compile, BPF
:undoc-members:
:show-inheritance:
```
### compile_to_ir()
```python
def compile_to_ir(
filename: str,
output: str,
loglevel=logging.WARNING
) -> None
```
Compile Python source to LLVM Intermediate Representation.
**Parameters:**
* `filename` (str) - Path to the Python source file
* `output` (str) - Path for the output LLVM IR file (.ll)
* `loglevel` - Logging level (default: logging.WARNING)
**See also:** {doc}`../user-guide/compilation`
### compile()
```python
def compile(
filename: str = None,
output: str = None,
loglevel=logging.WARNING
) -> None
```
Compile Python source to BPF object file.
**Parameters:**
* `filename` (str, optional) - Path to the Python source file (default: calling file)
* `output` (str, optional) - Path for the output object file (default: same name with .o extension)
* `loglevel` - Logging level (default: logging.WARNING)
**See also:** {doc}`../user-guide/compilation`
### BPF
```python
class BPF:
def __init__(
self,
filename: str = None,
loglevel=logging.WARNING
)
def load(self) -> BpfObject
def attach_all(self) -> None
def load_and_attach(self) -> BpfObject
```
High-level interface to compile, load, and attach BPF programs.
**Parameters:**
* `filename` (str, optional) - Path to Python source file (default: calling file)
* `loglevel` - Logging level (default: logging.WARNING)
**Methods:**
* `load()` - Load the compiled BPF program into the kernel
* `attach_all()` - Attach all BPF programs to their hooks
* `load_and_attach()` - Convenience method that loads and attaches
**See also:** {doc}`../user-guide/compilation`
## Utilities
```{eval-rst}
.. automodule:: pythonbpf.utils
:members:
:undoc-members:
:show-inheritance:
```
### trace_pipe()
```python
def trace_pipe() -> None
```
Read and display output from the kernel trace pipe.
Blocks until interrupted with Ctrl+C. Displays BPF program output from `print()` statements.
**See also:** {doc}`../user-guide/helpers`
### trace_fields()
```python
def trace_fields() -> tuple
```
Parse one line from the trace pipe into structured fields.
**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)`
* `task` (str) - Task/process name
* `pid` (int) - Process ID
* `cpu` (int) - CPU number
* `flags` (bytes) - Trace flags
* `timestamp` (float) - Timestamp in seconds
* `message` (str) - The trace message
**See also:** {doc}`../user-guide/helpers`
## Map Types
```{eval-rst}
.. automodule:: pythonbpf.maps.maps
:members:
:undoc-members:
:show-inheritance:
```
### HashMap
```python
class HashMap:
def __init__(
self,
key,
value,
max_entries: int
)
def lookup(self, key)
def update(self, key, value, flags=None)
def delete(self, key)
```
Hash map for efficient key-value storage.
**Parameters:**
* `key` - The type of the key (ctypes type or struct)
* `value` - The type of the value (ctypes type or struct)
* `max_entries` (int) - Maximum number of entries
**Methods:**
* `lookup(key)` - Look up a value by key
* `update(key, value, flags=None)` - Update or insert a key-value pair
* `delete(key)` - Remove an entry from the map
**See also:** {doc}`../user-guide/maps`
### PerfEventArray
```python
class PerfEventArray:
def __init__(
self,
key_size,
value_size
)
def output(self, data)
```
Perf event array for sending data to userspace.
**Parameters:**
* `key_size` - Type for the key
* `value_size` - Type for the value
**Methods:**
* `output(data)` - Send data to userspace
**See also:** {doc}`../user-guide/maps`
### RingBuffer
```python
class RingBuffer:
def __init__(self, max_entries: int)
def output(self, data, flags=0)
def reserve(self, size: int)
def submit(self, data, flags=0)
def discard(self, data, flags=0)
```
Ring buffer for efficient event delivery.
**Parameters:**
* `max_entries` (int) - Maximum size in bytes (must be power of 2)
**Methods:**
* `output(data, flags=0)` - Send data to the ring buffer
* `reserve(size)` - Reserve space in the buffer
* `submit(data, flags=0)` - Submit previously reserved space
* `discard(data, flags=0)` - Discard previously reserved space
**See also:** {doc}`../user-guide/maps`
## Helper Functions
```{eval-rst}
.. automodule:: pythonbpf.helper.helpers
:members:
:undoc-members:
:show-inheritance:
```
### Process Information
* `pid()` - Get current process ID
* `comm(buf)` - Get current process command name (requires buffer parameter)
* `uid()` - Get current user ID
### Time
* `ktime()` - Get current kernel time in nanoseconds
### CPU
* `smp_processor_id()` - Get current CPU ID
### Memory
* `probe_read(dst, size, src)` - Safely read kernel memory
* `probe_read_str(dst, src)` - Safely read string from kernel memory
* `deref(ptr)` - Dereference a pointer
### Random
* `random()` - Get pseudo-random number
**See also:** {doc}`../user-guide/helpers`
## Type System
PythonBPF uses Python's `ctypes` module for type definitions:
### Integer Types
* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers
* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers
### Other Types
* `c_char`, `c_bool` - Characters and booleans
* `c_void_p` - Void pointers
* `str(N)` - Fixed-length strings
## Examples
### Basic Usage
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
### With Maps
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def counters() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def count_clones(ctx: c_void_p) -> c_int64:
process_id = pid()
count = counters.lookup(process_id)
if count:
counters.update(process_id, count + 1)
else:
counters.update(process_id, 1)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
```
### With Structs
```python
from pythonbpf import bpf, struct, map, section, bpfglobal, BPF
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_exec(ctx: c_void_p) -> c_int64:
event = Event()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm)
events.output(event)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
```
## See Also
* {doc}`../user-guide/index` - Comprehensive user guide
* {doc}`../getting-started/quickstart` - Quick start tutorial
* [GitHub Repository](https://github.com/pythonbpf/Python-BPF) - Source code and examples

105
docs/conf.py Normal file
View File

@ -0,0 +1,105 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
import os
import sys
# Add the parent directory to the path so we can import pythonbpf
sys.path.insert(0, os.path.abspath(".."))
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = "PythonBPF"
copyright = "2026, Pragyansh Chaturvedi, Varun Mallya"
author = "Pragyansh Chaturvedi, Varun Mallya"
release = "0.1.8"
version = "0.1.8"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
"myst_parser",
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
]
# MyST-Parser configuration
myst_enable_extensions = [
"colon_fence",
"deflist",
"fieldlist",
]
# Napoleon settings for Google/NumPy style docstrings
napoleon_google_docstring = True
napoleon_numpy_docstring = True
napoleon_include_init_with_doc = True
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = True
napoleon_use_admonition_for_examples = True
napoleon_use_admonition_for_notes = True
napoleon_use_admonition_for_references = False
napoleon_use_ivar = False
napoleon_use_param = True
napoleon_use_rtype = True
napoleon_type_aliases = None
# Intersphinx mapping
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"llvmlite": ("https://llvmlite.readthedocs.io/en/latest/", None),
}
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# Source file suffixes
source_suffix = {
".rst": "restructuredtext",
".md": "markdown",
}
# The master toctree document
master_doc = "index"
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
# Theme options
html_theme_options = {
"logo_only": False,
"display_version": True,
"prev_next_buttons_location": "bottom",
"style_external_links": False,
"vcs_pageview_mode": "",
# Toc options
"collapse_navigation": False,
"sticky_navigation": True,
"navigation_depth": 4,
"includehidden": True,
"titles_only": False,
}
# -- Options for autodoc -----------------------------------------------------
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"special-members": "__init__",
"undoc-members": True,
"exclude-members": "__weakref__",
}
autodoc_typehints = "description"
exclude_patterns = ["README.md"]

View File

@ -0,0 +1,35 @@
# Getting Started
Welcome to PythonBPF! This section will help you get started with writing eBPF programs in Python.
## What You'll Learn
In this section, you'll learn how to:
1. **Install PythonBPF** - Set up your development environment with all necessary dependencies
2. **Write Your First Program** - Create a simple BPF program to understand the basics
3. **Understand Core Concepts** - Learn about decorators, compilation, and program structure
## Prerequisites
Before you begin, make sure you have:
* A Linux system (eBPF requires Linux kernel 4.15+)
* Python 3.10 or higher
* Root or sudo access (required for loading BPF programs)
## Next Steps
After completing the getting started guide, you can:
* Explore the {doc}`../user-guide/index` for detailed information on features
* Check out the {doc}`../api/index`
* Browse the [examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) and the [BCC examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/BCC-Examples)
## Need Help?
If you encounter any issues:
* Check the [GitHub Issues](https://github.com/pythonbpf/Python-BPF/issues) for known problems
* Review the [README](https://github.com/pythonbpf/Python-BPF/blob/master/README.md) for additional information
* Reach out to the maintainers: [@r41k0u](https://github.com/r41k0u) and [@varun-r-mallya](https://github.com/varun-r-mallya)

View File

@ -0,0 +1,182 @@
# Installation
This guide will walk you through installing PythonBPF and its dependencies.
## Prerequisites
### System Requirements
PythonBPF requires:
* **Linux** - eBPF is a Linux kernel feature (kernel 4.15 or higher recommended)
* **Python 3.10+** - Python 3.10 or higher is required
* **Root/sudo access** - Loading BPF programs into the kernel requires elevated privileges
### Required System Packages
Before installing PythonBPF, you need to install the following system packages:
#### On Ubuntu/Debian:
```bash
sudo apt-get update
sudo apt-get install -y bpftool clang llvm
```
#### On Fedora/RHEL/CentOS:
```bash
sudo dnf install -y bpftool clang llvm
```
#### On Arch Linux:
```bash
sudo pacman -S bpf clang llvm
```
## Installing PythonBPF
### From PyPI (Recommended)
The easiest way to install PythonBPF is using uv or pip:
**Using uv (recommended):**
```bash
uv pip install pythonbpf pylibbpf
```
**Using pip:**
```bash
pip install pythonbpf pylibbpf
```
This will install:
* `pythonbpf` - The main package for writing and compiling BPF programs
* `pylibbpf` - Python bindings for libbpf, used to load and attach BPF programs
### Development Installation
If you want to contribute to PythonBPF or work with the latest development version:
1. Clone the repository:
```bash
git clone https://github.com/pythonbpf/Python-BPF.git
cd Python-BPF
```
2. Create and activate a virtual environment:
```bash
python3 -m venv .venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
```
3. Install in development mode:
**Using uv (recommended):**
```bash
uv pip install -e .
uv pip install pylibbpf
```
**Using pip:**
```bash
pip install -e .
pip install pylibbpf
```
4. Install development dependencies:
```bash
make install
```
### Installing Documentation Dependencies
If you want to build the documentation locally:
**Using uv (recommended):**
```bash
uv pip install pythonbpf[docs]
# Or from the repository root:
uv pip install -e .[docs]
```
**Using pip:**
```bash
pip install pythonbpf[docs]
# Or from the repository root:
pip install -e .[docs]
```
## Generating vmlinux.py
`vmlinux.py` contains the running kernel's data structures and is analogous to `vmlinux.h` included in eBPF programs written in C. Some examples require access to it. To use these features, you need to generate a `vmlinux.py` file:
1. Install additional dependencies:
**Using uv (recommended):**
```bash
uv pip install ctypeslib2
```
**Using pip:**
```bash
pip install ctypeslib2
```
2. Generate the vmlinux.py file:
```bash
sudo tools/vmlinux-gen.py
```
3. Copy the generated file to your working directory or the examples directory as needed.
```{warning}
The `vmlinux.py` file is kernel-specific. If you upgrade your kernel, you may need to regenerate this file.
```
## Verifying Installation
To verify that PythonBPF is installed correctly, run:
```bash
python3 -c "import pythonbpf; print(pythonbpf.__all__)"
```
You should see output similar to:
```
['bpf', 'map', 'section', 'bpfglobal', 'struct', 'compile_to_ir', 'compile', 'BPF', 'trace_pipe', 'trace_fields']
```
## Troubleshooting
### Permission Errors
If you encounter permission errors when running BPF programs:
* Make sure you're running with `sudo` or as root
* Check that `/sys/kernel/tracing/` is accessible
### LLVM/Clang Not Found
If you get errors about `llc` or `clang` not being found:
* Verify they're installed: `which llc` and `which clang`
* Check your PATH environment variable includes the LLVM bin directory
### Import Errors
If Python can't find the `pythonbpf` module:
* Make sure you've activated your virtual environment
* Verify installation with `uv pip list | grep pythonbpf` or `pip list | grep pythonbpf`
* Try reinstalling: `uv pip install --force-reinstall pythonbpf` or `pip install --force-reinstall pythonbpf`
## Next Steps
Now that you have PythonBPF installed, continue to the {doc}`quickstart` guide to write your first BPF program!

View File

@ -0,0 +1,249 @@
# Quick Start
This guide will walk you through creating your first BPF program with PythonBPF.
## Your First BPF Program
Let's create a simple "Hello World" program that prints a message every time a process is executed on your system.
### Step 1: Create the Program
Create a new file called `hello_world.py`:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
### Step 2: Run the Program
Run the program with sudo (required for BPF operations):
```bash
sudo python3 hello_world.py
```
### Step 3: See it in Action
Open another terminal and run any command:
```bash
ls
echo "test"
date
```
You should see "Hello, World!" printed in the first terminal for each command executed!
Press `Ctrl+C` to stop the program.
## Understanding the Code
Let's break down what each part does:
### Imports
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
```
* `bpf` - Decorator to mark functions for BPF compilation
* `section` - Decorator to specify which kernel event to attach to
* `bpfglobal` - Decorator for BPF global variables
* `BPF` - Class to compile, load, and attach BPF programs
* `trace_pipe` - Utility to read kernel trace output (similar to BCC)
* `c_void_p`, `c_int64` - C types for function signatures
### The BPF Function
```python
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
```
* `@bpf` - Marks this function to be compiled to BPF bytecode
* `@section("tracepoint/syscalls/sys_enter_execve")` - Attaches to the execve syscall tracepoint (called when processes start)
* `ctx: c_void_p` - Context parameter (required for all BPF functions)
* `print()` - the PythonBPF API for `bpf_printk` helper function
* `return 0` - BPF functions must return an integer
### License Declaration
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
* The Linux kernel requires BPF programs to declare a license
* Most kernel features require GPL-compatible licenses
* This is defined as a BPF global variable
### Compilation and Execution
```python
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
* `BPF()` - Creates a BPF object and compiles the current file
* `b.load()` - Loads the compiled BPF program into the kernel
* `b.attach_all()` - Attaches all BPF programs to their specified hooks
* `trace_pipe()` - Reads and displays output from the kernel trace buffer
Alternatively, you can also use the `compile()` function to compile the BPF code to an object file:
```python
from pythonbpf import compile
```
This object file can then be loaded using any other userspace library in any language.
## Next Example: Tracking Process IDs
Let's make a more interesting program that tracks which processes are being created:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_exec(ctx: c_void_p) -> c_int64:
process_id = pid()
print(f"Process with PID: {process_id} is starting")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
This program uses BPF helper functions:
* `pid()` - Gets the current process ID
Run it with `sudo python3 track_exec.py` and watch processes being created!
## Common Patterns
### Tracepoints
Tracepoints are predefined hooks in the kernel. Common ones include:
```python
# System calls
@section("tracepoint/syscalls/sys_enter_execve")
@section("tracepoint/syscalls/sys_enter_clone")
@section("tracepoint/syscalls/sys_enter_open")
# Scheduler events
@section("tracepoint/sched/sched_process_fork")
@section("tracepoint/sched/sched_switch")
```
### Kprobes
Kprobes allow you to attach to any kernel function:
```python
@section("kprobe/do_sys_open")
def trace_open(ctx: c_void_p) -> c_int64:
print("File is being opened")
return 0
```
### XDP (eXpress Data Path)
For network packet processing:
```python
from pythonbpf.helper import XDP_PASS
@section("xdp")
def xdp_pass(ctx: c_void_p) -> c_int64:
return XDP_PASS
```
## Best Practices
1. **Always include a LICENSE** - Required by the kernel
2. **Use type hints** - Required by PythonBPF to generate correct code
3. **Return the correct type** - Match the expected return type for your program type
4. **Test incrementally** - Start simple and add complexity gradually
5. **Check kernel logs** - Use `dmesg` to see BPF verifier messages if loading fails
## Common Issues
### Program Won't Load
If your BPF program fails to load:
* Check `dmesg` for verifier error messages
* Ensure your LICENSE is GPL-compatible
* Verify you're using supported BPF features
* Make sure return types match function signatures
### No Output
If you don't see output:
* Verify the tracepoint/kprobe is being triggered
* Check that you're running with sudo
* Ensure `/sys/kernel/tracing/trace_pipe` is accessible
### Compilation Errors
If compilation fails:
* Check that `llc` is installed and in your PATH
* Verify your Python syntax is correct
* Ensure all imported types are from `ctypes`
* In the worst case, compile object files manually using `compile_to_ir()` and `llc` to get detailed errors
### Verification Failure
If verification fails:
* Compile the object files using `compile()` function instead of loading directly
* Run `sudo check.sh check <bpf>.o` to get detailed verification output
## Next Steps
Now that you understand the basics, explore:
* {doc}`../user-guide/decorators` - Learn about all available decorators
* {doc}`../user-guide/maps` - Use BPF maps for data storage and communication
* {doc}`../user-guide/structs` - Define custom data structures
* {doc}`../user-guide/helpers` - Discover all available BPF helper functions
* [Examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) - See more complex examples

95
docs/index.md Normal file
View File

@ -0,0 +1,95 @@
# PythonBPF Documentation
Welcome to **PythonBPF** - a Python frontend for writing eBPF programs without embedding C code. PythonBPF uses [llvmlite](https://github.com/numba/llvmlite) to generate LLVM IR and compiles directly to eBPF object files that can be loaded into the Linux kernel.
```{note}
This project is under active development.
```
## What is PythonBPF?
PythonBPF is an LLVM IR generator for eBPF programs written in Python. It provides:
* **Pure Python syntax** - Write eBPF programs in Python using familiar decorators and type annotations
* **Direct compilation** - Compile to LLVM object files without relying on BCC
* **Full eBPF features** - Support for maps, helpers, global definitions, and more
* **Integration with libbpf** - Works with [pylibbpf](https://github.com/pythonbpf/pylibbpf) for object loading and execution
## Quick Example
Here's a simple "Hello World" BPF program that traces process creation:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load()
b.attach_all()
trace_pipe()
```
## Features
* Generate eBPF programs directly using Python syntax
* Compile to LLVM object files for kernel execution
* Built with `llvmlite` for IR generation
* Supports maps, helpers, and global definitions for BPF
* Companion project: [pylibbpf](https://github.com/pythonbpf/pylibbpf), which provides bindings for libbpf
## Table of Contents
```{toctree}
:maxdepth: 2
:caption: Getting Started
getting-started/index
getting-started/installation
getting-started/quickstart
```
```{toctree}
:maxdepth: 2
:caption: User Guide
user-guide/index
user-guide/decorators
user-guide/maps
user-guide/structs
user-guide/compilation
user-guide/helpers
```
```{toctree}
:maxdepth: 2
:caption: API Reference
api/index
```
## Links
* **GitHub Repository**: [pythonbpf/Python-BPF](https://github.com/pythonbpf/Python-BPF)
* **PyPI Package**: [pythonbpf](https://pypi.org/project/pythonbpf/)
* **Video Demo**: [YouTube](https://www.youtube.com/watch?v=eFVhLnWFxtE)
## License
PythonBPF is licensed under the Apache License 2.0.
## Indices and tables
* {ref}`genindex`
* {ref}`modindex`
* {ref}`search`

4
docs/requirements.txt Normal file
View File

@ -0,0 +1,4 @@
myst-parser>=2.0
sphinx>=7.0
sphinx-copybutton
sphinx-rtd-theme>=2.0

View File

@ -0,0 +1,432 @@
# Compilation
PythonBPF provides several functions and classes for compiling Python code into BPF bytecode and loading it into the kernel.
## Overview
The compilation process transforms Python code into executable BPF programs:
1. **Python AST** → LLVM IR generation (using llvmlite)
2. **LLVM IR** → BPF bytecode (using llc)
3. **BPF Object** → Kernel loading (using libbpf)
## Compilation Functions
### compile_to_ir()
Compile Python source to LLVM Intermediate Representation.
#### Signature
```python
def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING)
```
#### Parameters
* `filename` - Path to the Python source file to compile
* `output` - Path where the LLVM IR file (.ll) should be written
* `loglevel` - Logging level (default: `logging.WARNING`)
#### Usage
```python
from pythonbpf import compile_to_ir
import logging
# Compile to LLVM IR
compile_to_ir(
filename="my_bpf_program.py",
output="my_bpf_program.ll",
loglevel=logging.DEBUG
)
```
#### Output
This function generates an `.ll` file containing LLVM IR, which is human-readable assembly-like code. This is useful for:
* Debugging compilation issues
* Understanding code generation
### compile()
Compile Python source to BPF object file.
#### Signature
```python
def compile(filename: str = None, output: str = None, loglevel=logging.WARNING)
```
#### Parameters
* `filename` - Path to the Python source file (default: calling file)
* `output` - Path for the output object file (default: same name with `.o` extension)
* `loglevel` - Logging level (default: `logging.WARNING`)
#### Usage
```python
from pythonbpf import compile
import logging
# Compile current file
compile()
# Compile specific file
compile(filename="my_program.py", output="my_program.o")
# Compile with debug logging
compile(loglevel=logging.DEBUG)
```
#### Output
This function generates a `.o` file containing BPF bytecode that can be:
* Loaded into the kernel
* Inspected with `bpftool`
* Verified with the BPF verifier
* Distributed as a compiled binary
### BPF Class
The `BPF` class provides a high-level interface to compile, load, and attach BPF programs.
#### Signature
```python
class BPF:
def __init__(self, filename: str = None, loglevel=logging.WARNING)
def load(self)
def attach_all(self)
def load_and_attach(self)
```
#### Parameters
* `filename` - Path to Python source file (default: calling file)
* `loglevel` - Logging level (default: `logging.WARNING`)
#### Methods
##### __init__()
Create a BPF object and compile the source.
```python
from pythonbpf import BPF
# Compile current file
b = BPF()
# Compile specific file
b = BPF(filename="my_program.py")
```
##### load()
Load the compiled BPF program into the kernel.
```python
b = BPF()
b.load()
```
This method:
* Loads the BPF object file into the kernel
* Creates maps
* Verifies the BPF program
* Returns a `BpfObject` instance
##### attach_all()
Attach all BPF programs to their specified hooks.
```python
b = BPF()
b.load()
b.attach_all()
```
This method:
* Attaches tracepoints
* Attaches kprobes/kretprobes
* Attaches XDP programs
* Enables all hooks
##### load_and_attach()
Convenience method that loads and attaches in one call.
```python
b = BPF()
b.load_and_attach()
```
Equivalent to:
```python
b = BPF()
b.load()
b.attach_all()
```
## Complete Example
Here's a complete example showing the compilation workflow:
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print("Process started")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
if __name__ == "__main__":
# Method 1: Simple compilation and loading
b = BPF()
b.load_and_attach()
trace_pipe()
# Method 2: Step-by-step
# b = BPF()
# b.load()
# b.attach_all()
# trace_pipe()
# Method 3: Manual compilation
# from pythonbpf import compile
# compile(filename="my_program.py", output="my_program.o")
# # Then load with pylibbpf directly
```
## Compilation Pipeline Details
### AST Parsing
The Python `ast` module parses your source code:
```python
import ast
tree = ast.parse(source_code, filename)
```
The AST is then walked to find:
* Functions decorated with `@bpf`
* Classes decorated with `@struct`
* Map definitions with `@map`
* Global variables with `@bpfglobal`
### IR Generation
PythonBPF uses `llvmlite` to generate LLVM IR:
```python
from llvmlite import ir
# Create module
module = ir.Module(name='bpf_module')
module.triple = 'bpf'
# Generate IR for each BPF function
# ...
```
Key aspects of IR generation:
* Type conversion (Python types → LLVM types)
* Function definitions
* Map declarations
* Global variable initialization
* Debug information
### BPF Compilation
The LLVM IR is compiled to BPF bytecode using `llc`:
```bash
llc -march=bpf -filetype=obj input.ll -o output.o
```
### Kernel Loading
The compiled object is loaded using `pylibbpf`:
```python
from pylibbpf import BpfObject
obj = BpfObject(path="program.o")
obj.load()
```
## Debugging Compilation
### Logging
Enable debug logging to see compilation details:
```python
import logging
from pythonbpf import BPF
b = BPF(loglevel=logging.DEBUG)
```
This will show:
* AST parsing details
* IR generation steps
* Compilation commands
* Loading status
### Inspecting LLVM IR
Generate and inspect the IR file:
```python
from pythonbpf import compile_to_ir
compile_to_ir("program.py", "program.ll")
```
Then examine `program.ll` to understand the generated code.
### Using bpftool
Inspect compiled objects with `bpftool`:
```bash
# Show program info
bpftool prog show
# Dump program instructions
bpftool prog dump xlated id <ID>
# Dump program JIT code
bpftool prog dump jited id <ID>
# Show maps
bpftool map show
# Dump map contents
bpftool map dump id <ID>
```
### Verifier Errors
If the kernel verifier rejects your program:
* Check `dmesg` for detailed error messages:
```bash
sudo dmesg | tail -50
```
## Compilation Options
### Optimization Levels
While PythonBPF doesn't expose optimization flags directly, you can:
1. Manually compile IR with specific flags:
```bash
llc -march=bpf -O2 -filetype=obj program.ll -o program.o
```
2. Modify the compilation pipeline in your code
### Debug Information
PythonBPF automatically generates debug information (DWARF) for:
* Function names
* Variable names
* Type information
This helps with:
* Stack traces
* Debugging with `bpftool`
* Source-level debugging
## Working with Compiled Objects
### Loading Pre-compiled Objects
You can load previously compiled objects:
```python
from pylibbpf import BpfObject
# Load object file
obj = BpfObject(path="my_program.o")
obj.load()
# Attach programs
# (specific attachment depends on program type)
```
### Distribution
Distribute compiled BPF objects:
1. Compile once:
```python
from pythonbpf import compile
compile(filename="program.py", output="program.o")
```
2. Ship `program.o` file
3. Load on target systems:
```python
from pylibbpf import BpfObject
obj = BpfObject(path="program.o")
obj.load()
```
### Version Compatibility
BPF objects are generally compatible across kernel versions, but:
* Some features require specific kernel versions
* Helper functions may not be available on older kernels
* BTF (BPF Type Format) requirements vary
## Troubleshooting
### Compilation Fails
If compilation fails:
* Check Python syntax
* Verify all decorators are correct
* Ensure type hints are present
* Check for unsupported Python features
### Loading Fails
If loading fails:
* Check `dmesg` for verifier errors
* Verify LICENSE is set correctly
* Ensure helper functions are valid
* Check map definitions
### Programs Don't Attach
If attachment fails:
* Verify section names are correct
* Check that hooks exist on your kernel
* Ensure you have sufficient permissions
* Verify kernel version supports the feature
## Next Steps
* Learn about {doc}`helpers` for available BPF helper functions
* Explore {doc}`maps` for data storage
* See {doc}`decorators` for compilation markers

View File

@ -0,0 +1,448 @@
# Decorators
Decorators are the primary way to mark Python code for BPF compilation. PythonBPF provides five core decorators that control how your code is transformed into eBPF bytecode.
## @bpf
The `@bpf` decorator marks functions or classes for BPF compilation.
### Usage
```python
from pythonbpf import bpf
@bpf
def my_function(ctx):
# This function will be compiled to BPF bytecode
pass
```
### Description
Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler and transformed into LLVM IR, then compiled to BPF bytecode. This is the fundamental decorator that enables BPF compilation.
### Rules
* Must be used on top-level functions or classes
* The function must have proper type hints
* Return types must be BPF-compatible
* Only BPF-compatible operations are allowed inside
### Example
```python
from pythonbpf import bpf, section
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print("Process started")
return c_int64(0)
```
## @section
The `@section(name)` decorator specifies which kernel hook to attach the BPF program to.
### Usage
```python
from pythonbpf import bpf, section
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx):
pass
```
### Section Types
#### Tracepoints
Tracepoints are stable kernel hooks defined in `/sys/kernel/tracing/events/`:
```python
# System call tracepoints
@section("tracepoint/syscalls/sys_enter_execve")
@section("tracepoint/syscalls/sys_enter_clone")
@section("tracepoint/syscalls/sys_enter_open")
@section("tracepoint/syscalls/sys_exit_read")
# Scheduler tracepoints
@section("tracepoint/sched/sched_process_fork")
@section("tracepoint/sched/sched_process_exit")
@section("tracepoint/sched/sched_switch")
# Block I/O tracepoints
@section("tracepoint/block/block_rq_insert")
@section("tracepoint/block/block_rq_complete")
```
#### Kprobes
Kprobes allow attaching to any kernel function:
```python
@section("kprobe/do_sys_open")
def trace_sys_open(ctx):
pass
@section("kprobe/__x64_sys_write")
def trace_write(ctx):
pass
```
#### Kretprobes
Kretprobes trigger when a kernel function returns:
```python
@section("kretprobe/do_sys_open")
def trace_open_return(ctx):
pass
```
#### XDP (eXpress Data Path)
For network packet processing at the earliest point:
```python
from pythonbpf.helper import XDP_PASS
from ctypes import c_void_p, c_int64
@section("xdp")
def xdp_prog(ctx: c_void_p) -> c_int64:
# XDP_PASS, XDP_DROP, XDP_ABORTED constants available from pythonbpf.helper
return XDP_PASS
```
### Finding Tracepoints
To find available tracepoints on your system:
```bash
# List all tracepoints
ls /sys/kernel/tracing/events/
# List syscall tracepoints
ls /sys/kernel/tracing/events/syscalls/
# View tracepoint format
cat /sys/kernel/tracing/events/syscalls/sys_enter_open/format
```
## @map
The `@map` decorator marks a function as a BPF map definition.
### Usage
```python
from pythonbpf import bpf, map
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@map
def my_map() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
```
### Description
Maps are BPF data structures used to:
* Store state between BPF program invocations
* Communicate data between BPF programs
* Share data with userspace
The function must return a map type (HashMap, PerfEventArray, RingBuffer) and the return type must be annotated.
### Example
```python
from pythonbpf import bpf, map, section
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def process_count() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def count_clones(ctx: c_void_p) -> c_int64:
process_id = pid()
count = process_count.lookup(process_id)
if count:
process_count.update(process_id, count + 1)
else:
process_count.update(process_id, c_uint64(1))
return 0
```
See {doc}`maps` for more details on available map types.
## @struct
The `@struct` decorator marks a class as a BPF struct definition.
### Usage
```python
from pythonbpf import bpf, struct
from ctypes import c_uint64, c_uint32
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
cpu: c_uint32
```
### Description
Structs allow you to define custom data types for use in BPF programs. They can be used:
* As map keys and values
* For perf event output
* In ring buffer submissions
* As local variables
### Field Types
Supported field types include:
* **Integer types**: `c_int8`, `c_int16`, `c_int32`, `c_int64`, `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64`
* **Pointers**: `c_void_p`, `c_char_p`
* **Fixed strings**: `str(N)` where N is the size (e.g., `str(16)`)
* **Nested structs**: Other `@struct` decorated classes
### Example
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import pid, ktime
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
@bpf
@struct
class ProcessEvent:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_processes(ctx: c_void_p) -> c_int64:
event = ProcessEvent()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
events.output(event)
return 0
```
See {doc}`structs` for more details on working with structs.
## @bpfglobal
The `@bpfglobal` decorator marks a function as a BPF global variable definition.
### Usage
```python
from pythonbpf import bpf, bpfglobal
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
### Description
BPF global variables are values that:
* Are initialized when the program loads
* Can be read by all BPF functions
* Must be constant (cannot be modified at runtime in current implementation)
### Common Global Variables
#### LICENSE (Required)
Every BPF program must declare a license:
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
Valid licenses include:
* `"GPL"` - GNU General Public License
* `"GPL v2"` - GPL version 2
* `"Dual BSD/GPL"` - Dual licensed
* `"Dual MIT/GPL"` - Dual licensed
```{warning}
Many BPF features require a GPL-compatible license. Using a non-GPL license may prevent your program from loading or accessing certain kernel features.
```
#### Custom Global Variables
You can define other global variables:
```python
@bpf
@bpfglobal
def DEBUG_MODE() -> int:
return 1
@bpf
@bpfglobal
def MAX_EVENTS() -> int:
return 1000
```
These can be referenced in your BPF functions, though modifying them at runtime is currently not supported.
## Combining Decorators
Decorators are often used together. The order matters:
### Correct Order
```python
@bpf # Always first
@section("...") # Section before other decorators
def my_function():
pass
@bpf # Always first
@map # Map/struct/bpfglobal after @bpf
def my_map():
pass
@bpf # Always first
@struct # Map/struct/bpfglobal after @bpf
class MyStruct:
pass
@bpf # Always first
@bpfglobal # Map/struct/bpfglobal after @bpf
def LICENSE():
return "GPL"
```
### Examples by Use Case
#### Simple Tracepoint
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
return c_int64(0)
```
#### Map Definition
```python
@bpf
@map
def counters() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
```
#### Struct Definition
```python
@bpf
@struct
class Event:
timestamp: c_uint64
value: c_uint32
```
#### Global Variable
```python
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
## Best Practices
1. **Always use @bpf first** - It must be the outermost decorator
2. **Provide type hints** - Required for proper code generation
3. **Test incrementally** - Verify each component works before combining
## Common Errors
### Missing @bpf Decorator
```python
# Wrong - missing @bpf
@section("tracepoint/syscalls/sys_enter_open")
def my_func(ctx):
pass
# Correct
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def my_func(ctx):
pass
```
### Wrong Decorator Order
```python
# Wrong - @section before @bpf
@section("tracepoint/syscalls/sys_enter_open")
@bpf
def my_func(ctx):
pass
# Correct
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def my_func(ctx):
pass
```
### Missing Type Hints
```python
# Wrong - no type hints
@bpf
def my_func(ctx):
pass
# Correct
@bpf
def my_func(ctx: c_void_p) -> c_int64:
pass
```
## Next Steps
* Learn about {doc}`maps` for data storage and communication
* Explore {doc}`structs` for defining custom data types
* Understand {doc}`compilation` to see how code is transformed
* Check out {doc}`helpers` for available BPF helper functions

503
docs/user-guide/helpers.md Normal file
View File

@ -0,0 +1,503 @@
# Helper Functions and Utilities
PythonBPF provides helper functions and utilities for BPF programs and userspace code.
```{note}
**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more helpers, kfuncs, and map types. Check back for updates!
```
For comprehensive documentation on BPF helpers, see the [eBPF Helper Functions documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#helper-calls).
## BPF Helper Functions
BPF helper functions are kernel-provided functions that BPF programs can call to interact with the system. PythonBPF exposes these through the `pythonbpf.helper` module.
```python
from pythonbpf.helper import pid, ktime, comm
```
### Process and Task Information
#### pid()
Get the current process ID.
> **Linux Kernel Helper:** `bpf_get_current_pid_tgid()`
```python
from pythonbpf.helper import pid
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
process_id = pid()
print(f"Process {process_id} opened a file")
return 0
```
**Returns:** `c_int32` - The process ID of the current task
#### comm()
Get the current process command name.
> **Linux Kernel Helper:** `bpf_get_current_comm()`
**Parameters:**
* `buf` - Buffer to fill with the process command name
**Returns:** `c_int64` - 0 on success, negative on error
#### uid()
Get the current user ID.
> **Linux Kernel Helper:** `bpf_get_current_uid_gid()`
```python
from pythonbpf.helper import uid
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
user_id = uid()
if user_id == 0:
print("Root user opened a file")
return 0
```
**Returns:** `c_int32` - The user ID of the current task
### Time and Timing
#### ktime()
Get the current kernel time in nanoseconds since system boot.
> **Linux Kernel Helper:** `bpf_ktime_get_ns()`
```python
from pythonbpf.helper import ktime
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def measure_latency(ctx: c_void_p) -> c_int64:
start_time = ktime()
# Store for later comparison
return 0
```
**Returns:** `c_int64` - Current time in nanoseconds
**Use cases:**
* Measuring latency
* Timestamping events
* Rate limiting
* Timeout detection
### CPU Information
#### smp_processor_id()
Get the ID of the CPU on which the BPF program is running.
> **Linux Kernel Helper:** `bpf_get_smp_processor_id()`
```python
from pythonbpf.helper import smp_processor_id
@bpf
@section("tracepoint/sched/sched_switch")
def track_cpu(ctx: c_void_p) -> c_int64:
cpu = smp_processor_id()
print(f"Running on CPU {cpu}")
return 0
```
**Returns:** `c_int32` - The current CPU ID
**Use cases:**
* Per-CPU statistics
* Load balancing analysis
* CPU affinity tracking
### Memory Operations
#### probe_read()
Safely read data from kernel memory.
> **Linux Kernel Helper:** `bpf_probe_read()`
```python
from pythonbpf.helper import probe_read
@bpf
def read_kernel_data(ctx: c_void_p) -> c_int64:
dst = 0
size = 8
src = ctx # kernel address
result = probe_read(dst, size, src)
if result == 0:
print(f"Read value: {dst}")
return 0
```
**Parameters:**
* `dst` - Destination buffer
* `size` - Number of bytes to read
* `src` - Source kernel address
**Returns:** `c_int64` - 0 on success, negative on error
**Safety:** This function performs bounds checking and prevents invalid memory access.
#### probe_read_str()
Safely read a null-terminated string from kernel memory.
> **Linux Kernel Helper:** `bpf_probe_read_str()`
**Parameters:**
* `dst` - Destination buffer (string)
* `src` - Source kernel address
**Returns:** `c_int64` - Length of string on success, negative on error
### Random Numbers
#### random()
Generate a pseudo-random 32-bit number.
> **Linux Kernel Helper:** `bpf_get_prandom_u32()`
```python
from pythonbpf.helper import random
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def sample_events(ctx: c_void_p) -> c_int64:
# Sample 1% of events
if (random() % 100) == 0:
print("Sampled event")
return 0
```
**Returns:** `c_int32` - A pseudo-random number
### Network Helpers
#### skb_store_bytes()
Store bytes into a socket buffer (for network programs).
> **Linux Kernel Helper:** `bpf_skb_store_bytes()`
```python
from pythonbpf.helper import skb_store_bytes
@bpf
@section("classifier")
def modify_packet(ctx: c_void_p) -> c_int32:
offset = 14 # Skip Ethernet header
data = b"\x00\x01\x02\x03"
size = len(data)
result = skb_store_bytes(offset, data, size)
return 0
```
**Parameters:**
* `offset` - Offset in the socket buffer
* `from_buf` - Data to write
* `size` - Number of bytes to write
* `flags` - Optional flags
**Returns:** `c_int64` - 0 on success, negative on error
## Userspace Utilities
PythonBPF provides utilities for working with BPF programs from Python userspace code.
### trace_pipe()
Read and display output from the kernel trace pipe.
```python
from pythonbpf import trace_pipe
# After loading and attaching BPF programs
trace_pipe()
```
**Description:**
The `trace_pipe()` function reads from `/sys/kernel/tracing/trace_pipe` and displays BPF program output to stdout. This is the output from `print()` statements in BPF programs.
**Usage:**
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print("Process started") # This goes to trace_pipe
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe() # Display BPF output
```
**Behavior:**
* Blocks until Ctrl+C is pressed
* Displays output in real-time
* Shows task name, PID, CPU, timestamp, and message
* Automatically handles trace pipe access errors
**Requirements:**
* Root or sudo access
* Accessible `/sys/kernel/tracing/trace_pipe`
### trace_fields()
Parse one line from the trace pipe into structured fields.
```python
from pythonbpf import trace_fields
# Read and parse trace output
task, pid, cpu, flags, ts, msg = trace_fields()
print(f"Task: {task}, PID: {pid}, CPU: {cpu}, Time: {ts}, Message: {msg}")
```
**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)`
* `task` - String: Task/process name (up to 16 chars)
* `pid` - Integer: Process ID
* `cpu` - Integer: CPU number
* `flags` - Bytes: Trace flags
* `timestamp` - Float: Timestamp in seconds
* `message` - String: The actual trace message
**Description:**
The `trace_fields()` function reads one line from the trace pipe and parses it into individual fields. This is useful when you need programmatic access to trace data rather than just displaying it.
**Usage:**
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_exec(ctx: c_void_p) -> c_int64:
print(f"PID:{pid()}")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
# Process trace events
try:
while True:
task, pid, cpu, flags, ts, msg = trace_fields()
print(f"[{ts:.6f}] {task}({pid}) on CPU{cpu}: {msg}")
except KeyboardInterrupt:
print("Stopped")
```
**Error Handling:**
* Raises `ValueError` if line cannot be parsed
* Skips lines about lost events
* Blocks waiting for next line
## Helper Function Examples
### Example 1: Latency Measurement
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_pipe
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid, ktime
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def start_times() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def read_start(ctx: c_void_p) -> c_int64:
process_id = pid()
start = ktime()
start_times.update(process_id, start)
return 0
@bpf
@section("tracepoint/syscalls/sys_exit_read")
def read_end(ctx: c_void_p) -> c_int64:
process_id = pid()
start = start_times.lookup(process_id)
if start:
latency = ktime() - start
print(f"Read latency: {latency} ns")
start_times.delete(process_id)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
### Example 2: Process Tracking
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import pid, uid
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def track_exec(ctx: c_void_p) -> c_int64:
process_id = pid()
user_id = uid()
print(f"User {user_id} started process (PID: {process_id})")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
### Example 3: CPU Load Monitoring
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import smp_processor_id
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def cpu_counts() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
@bpf
@section("tracepoint/sched/sched_switch")
def count_switches(ctx: c_void_p) -> c_int64:
cpu = smp_processor_id()
count = cpu_counts.lookup(cpu)
if count:
cpu_counts.update(cpu, count + 1)
else:
cpu_counts.update(cpu, 1)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
import time
time.sleep(5)
# Read results
from pylibbpf import BpfMap
map_obj = BpfMap(b, cpu_counts)
for cpu, count in map_obj.items():
print(f"CPU {cpu}: {count} context switches")
```
### Example 4: Event Sampling
```python
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import random, pid
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def sample_opens(ctx: c_void_p) -> c_int64:
# Sample 5% of events
if (random() % 100) < 5:
process_id = pid()
print(f"Sampled: PID {process_id} opening file")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
trace_pipe()
```
## Troubleshooting
### Helper Not Available
If a helper function doesn't work:
* Check your kernel version (some helpers are newer)
* Ensure your LICENSE is GPL-compatible
### Trace Pipe Access Denied
If `trace_pipe()` fails:
* Run with sudo/root
* Check `/sys/kernel/tracing/` is accessible
* Verify tracing is enabled in kernel config
## Examples
Check out these examples in the `BCC-Examples/` directory that demonstrate helper functions:
* [hello_world.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_world.py) - Basic tracing with `print()`
* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - Using `ktime()` for timing measurements
* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - Using `pid()`, `ktime()`, and `comm()` with perf events
* [vfsreadlat.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/vfsreadlat.py) - Latency measurement with `ktime()` in kprobes
## Next Steps
* Explore {doc}`maps` for data storage with helpers
* Learn about {doc}`compilation` to understand helper implementation
* See {doc}`decorators` for marking BPF functions

87
docs/user-guide/index.md Normal file
View File

@ -0,0 +1,87 @@
# User Guide
This user guide provides comprehensive documentation for all PythonBPF features. Whether you're building simple tracing tools or complex performance monitoring systems, this guide will help you master PythonBPF.
## Overview
PythonBPF transforms Python code into eBPF bytecode that runs in the Linux kernel. It provides a Pythonic interface to eBPF features through decorators, type annotations, and familiar programming patterns.
## Core Concepts
### Decorators
PythonBPF uses decorators to mark code for BPF compilation:
* `@bpf` - Mark functions and classes for BPF compilation
* `@map` - Define BPF maps for data storage
* `@struct` - Define custom data structures
* `@section(name)` - Specify attachment points
* `@bpfglobal` - Define global variables
### Compilation Pipeline
Your Python code goes through several stages:
1. **IR Generation** - The Python AST is transformed into LLVM IR using llvmlite
2. **BPF Compilation** - LLVM IR is compiled to BPF bytecode using `llc`
3. **Loading** - The BPF object is loaded into the kernel using libbpf
4. **Attachment** - Programs are attached to kernel hooks (tracepoints, kprobes, etc.)
## Code Organization
When writing BPF programs with PythonBPF, we recommend:
1. **Use type hints** - Required for proper code generation
2. **Test incrementally** - Verify each component works before adding complexity
## Type System
PythonBPF uses Python's `ctypes` module for type definitions:
* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers
* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers
* `c_char`, `c_bool` - Characters and booleans
* `c_void_p` - Void pointers
* `str(N)` - Fixed-length strings (e.g., `str(16)` for 16-byte string)
## Example Structure
A typical PythonBPF program follows this structure:
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF, compile
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_uint32
# Define maps
@bpf
@map
def my_map() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
# Define BPF function
@bpf
@section("tracepoint/...")
def my_function(ctx: c_void_p) -> c_int64:
# BPF logic here
return 0
# License (required)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile, load, and run
if __name__ == "__main__":
b = BPF()
b.load_and_attach()
# Use the program...
# Or, compile to an object file
compile()
```
## Next Steps
Start with {doc}`decorators` to learn about all available decorators, then explore the other sections to master specific features.

476
docs/user-guide/maps.md Normal file
View File

@ -0,0 +1,476 @@
# BPF Maps
Maps are BPF data structures that provide storage and communication mechanisms. They allow BPF programs to:
* Store state between invocations
* Share data between multiple BPF programs
* Communicate with userspace applications
```{note}
**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more map types, helpers, and kfuncs. Check back for updates!
```
For comprehensive documentation on BPF maps, see the [eBPF Maps documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#maps).
## Map Types
PythonBPF supports several map types, each optimized for different use cases.
### HashMap
Hash maps provide efficient key-value storage with O(1) lookup time.
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_HASH`
#### Definition
```python
from pythonbpf import bpf, map
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@map
def my_map() -> HashMap:
return HashMap(
key=c_uint32,
value=c_uint64,
max_entries=1024
)
```
#### Parameters
* `key` - The type of the key (must be a ctypes type or struct)
* `value` - The type of the value (must be a ctypes type or struct)
* `max_entries` - Maximum number of entries the map can hold
#### Operations
##### lookup(key)
Look up a value by key. Returns the value if found, `None` otherwise.
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def trace_open(ctx: c_void_p) -> c_int64:
value = my_map.lookup(1)
if value:
print(f"Found value: {value}")
return 0
```
##### update(key, value, flags=None)
Update or insert a key-value pair.
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def track_opens(ctx: c_void_p) -> c_int64:
key = pid()
count = my_map.lookup(key)
if count:
my_map.update(key, count + 1)
else:
my_map.update(key, 1)
return 0
```
##### delete(key)
Remove an entry from the map.
```python
@bpf
def cleanup(ctx: c_void_p) -> c_int64:
my_map.delete(1)
return 0
```
#### Use Cases
* Counting events per process/CPU
* Storing timestamps for latency calculations
* Caching lookup results
* Implementing rate limiters
#### Example: Process Counter
```python
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@map
def process_count() -> HashMap:
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def count_processes(ctx: c_void_p) -> c_int64:
process_id = pid()
count = process_count.lookup(process_id)
if count:
new_count = count + 1
process_count.update(process_id, new_count)
else:
process_count.update(process_id, 1)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
if __name__ == "__main__":
b = BPF()
b.load_and_attach()
# Access map from userspace
from pylibbpf import BpfMap
map_obj = BpfMap(b, process_count)
# Read values...
```
### PerfEventArray
Perf event arrays are used to send data from BPF programs to userspace with high throughput.
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_PERF_EVENT_ARRAY`
#### Definition
```python
from pythonbpf.maps import PerfEventArray
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(
key_size=c_uint32,
value_size=c_uint32
)
```
#### Parameters
* `key_size` - Type for the key (typically `c_uint32`)
* `value_size` - Type for the value (typically `c_uint32`)
#### Operations
##### output(data)
Send data to userspace. The data can be a struct or basic type.
```python
@bpf
@struct
class Event:
pid: c_uint32
timestamp: c_uint64
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def send_event(ctx: c_void_p) -> c_int64:
event = Event()
event.pid = pid()
event.timestamp = ktime()
events.output(event)
return 0
```
#### Use Cases
* Sending detailed event data to userspace
* Real-time monitoring and alerting
* Collecting samples for analysis
* High-throughput data collection
#### Example: Event Logging
```python
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.maps import PerfEventArray
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@struct
class ProcessEvent:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def log_exec(ctx: c_void_p) -> c_int64:
event = ProcessEvent()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
events.output(event)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
```
### RingBuffer
Ring buffers provide efficient, ordered event delivery with lower overhead than perf event arrays.
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_RINGBUF`
#### Definition
```python
from pythonbpf.maps import RingBuffer
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
```
#### Parameters
* `max_entries` - Maximum size of the ring buffer in bytes (must be power of 2)
#### Operations
##### output(data, flags=0)
Send data to the ring buffer.
```python
@bpf
@section("tracepoint/syscalls/sys_enter_open")
def log_event(ctx: c_void_p) -> c_int64:
event = Event()
event.pid = pid()
events.output(event)
return 0
```
##### reserve(size)
Reserve space in the ring buffer. Returns a pointer to the reserved space or 0 if no space available.
```python
@bpf
def reserve_space(ctx: c_void_p) -> c_int64:
ptr = events.reserve(64) # Reserve 64 bytes
if ptr:
# Use the reserved space
events.submit(ptr)
return 0
```
##### submit(data, flags=0)
Submit previously reserved space.
##### discard(data, flags=0)
Discard previously reserved space without submitting.
#### Use Cases
* Modern event streaming (preferred over PerfEventArray)
* Lower overhead event delivery
* Ordered event processing
* Kernel 5.8+ systems
#### Advantages over PerfEventArray
* Lower memory overhead
* Better performance
* Simpler API
* Ordered delivery guarantees
### BPFMapType Enum
PythonBPF supports various BPF map types through the `BPFMapType` enum:
```python
from pythonbpf.maps import BPFMapType
# Common map types
BPFMapType.BPF_MAP_TYPE_HASH # Hash map
BPFMapType.BPF_MAP_TYPE_ARRAY # Array map
BPFMapType.BPF_MAP_TYPE_PERF_EVENT_ARRAY # Perf event array
BPFMapType.BPF_MAP_TYPE_RINGBUF # Ring buffer
BPFMapType.BPF_MAP_TYPE_STACK_TRACE # Stack trace storage
BPFMapType.BPF_MAP_TYPE_LRU_HASH # LRU hash map
```
## Using Maps with Structs
Maps can store complex data types using structs as values:
```python
from pythonbpf import bpf, map, struct, section
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@struct
class Stats:
count: c_uint64
total_time: c_uint64
max_time: c_uint64
@bpf
@map
def process_stats() -> HashMap:
return HashMap(
key=c_uint32, # PID as key
value=Stats, # Struct as value
max_entries=1024
)
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def track_stats(ctx: c_void_p) -> c_int64:
process_id = pid()
stats = process_stats.lookup(process_id)
if stats:
stats.count = stats.count + 1
process_stats.update(process_id, stats)
else:
new_stats = Stats()
new_stats.count = 1
new_stats.total_time = 0
new_stats.max_time = 0
process_stats.update(process_id, new_stats)
return 0
```
## Accessing Maps from Userspace
After loading a BPF program, you can access maps from Python using `pylibbpf`:
```python
from pythonbpf import BPF
from pylibbpf import BpfMap
# Load BPF program
b = BPF()
b.load_and_attach()
# Get map reference
map_obj = BpfMap(b, my_map)
# Read all key-value pairs
for key, value in map_obj.items():
print(f"Key: {key}, Value: {value}")
# Get all keys
keys = list(map_obj.keys())
# Get all values
values = list(map_obj.values())
# Lookup specific key
value = map_obj[key]
# Update from userspace
map_obj[key] = new_value
# Delete from userspace
del map_obj[key]
```
## Common Patterns
### Counter Pattern
```python
count = my_map.lookup(key)
if count:
my_map.update(key, count + 1)
else:
my_map.update(key, 1)
```
### Latency Tracking
```python
# Store start time
start = ktime()
start_map.update(key, start)
# Later: calculate latency
start_time = start_map.lookup(key)
if start_time:
latency = ktime() - start_time
latency_map.update(key, latency)
start_map.delete(key)
```
### Event Sampling
```python
# Only process every Nth event
count = counter.lookup(key)
if count and (count % 100) == 0:
events.output(data)
counter.update(key, count + 1 if count else 1)
```
## Troubleshooting
### Map Not Found
If you get "map not found" errors:
* Ensure the map is defined with `@bpf` and `@map`
* Check that the map name matches exactly
* Verify the BPF program loaded successfully
### Map Full
If updates fail due to map being full:
* Increase `max_entries`
* Use LRU maps for automatic eviction
* Add cleanup logic to delete old entries
### Type Errors
If you get type-related errors:
* Verify key and value types match the definition
* Check that structs are properly defined
## Examples
Check out these examples in the `BCC-Examples/` directory that demonstrate map usage:
* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - HashMap for storing timestamps
* [sync_count.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_count.py) - HashMap for counting events
* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - PerfEventArray for sending structs to userspace
* [sync_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_perf_output.py) - PerfEventArray with timing data
* [disksnoop.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/disksnoop.py) - HashMap for tracking disk I/O
## Next Steps
* Learn about {doc}`structs` for defining custom value types
* Explore {doc}`helpers` for BPF helper functions
* See {doc}`compilation` to understand how maps are compiled

413
docs/user-guide/structs.md Normal file
View File

@ -0,0 +1,413 @@
# BPF Structs
Structs allow you to define custom data types for use in BPF programs. They provide a way to group related fields together and can be used as map values, event payloads, or local variables.
## Defining Structs
Use the `@bpf` and `@struct` decorators to define a BPF struct:
```python
from pythonbpf import bpf, struct
from ctypes import c_uint64, c_uint32
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
cpu: c_uint32
```
## Field Types
Structs support various field types from Python's `ctypes` module.
### Integer Types
```python
from ctypes import (
c_int8, c_int16, c_int32, c_int64,
c_uint8, c_uint16, c_uint32, c_uint64
)
@bpf
@struct
class Numbers:
small_int: c_int8 # -128 to 127
short_int: c_int16 # -32768 to 32767
int_val: c_int32 # -2^31 to 2^31-1
long_int: c_int64 # -2^63 to 2^63-1
byte: c_uint8 # 0 to 255
word: c_uint16 # 0 to 65535
dword: c_uint32 # 0 to 2^32-1
qword: c_uint64 # 0 to 2^64-1
```
### String Types
Fixed-length strings are defined using `str(N)` where N is the size:
```python
@bpf
@struct
class ProcessInfo:
name: str(16) # 16-byte string
path: str(256) # 256-byte string
```
```{note}
Strings in BPF are fixed-length and null-terminated. The size includes the null terminator.
```
### Pointer Types
```python
from ctypes import c_void_p, c_char_p
@bpf
@struct
class Pointers:
ptr: c_void_p # Generic pointer
str_ptr: c_char_p # Character pointer
```
### Nested Structs
Structs can contain other structs as fields:
```python
@bpf
@struct
class Address:
street: str(64)
city: str(32)
zip_code: c_uint32
@bpf
@struct
class Person:
name: str(32)
age: c_uint32
address: Address # Nested struct
```
## Using Structs
### As Local Variables
Create and use struct instances within BPF functions:
```python
from pythonbpf import bpf, struct, section
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
@bpf
@struct
class Event:
timestamp: c_uint64
pid: c_uint32
comm: str(16)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def capture_event(ctx: c_void_p) -> c_int64:
# Create an instance
event = Event()
# Set fields
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
# Use the struct
print(f"Process with PID {event.pid}")
return 0
```
### As Map Keys and Values
Use structs as keys and values in maps for complex state storage:
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import HashMap
from ctypes import c_uint32, c_uint64
@bpf
@struct
class ProcessStats:
syscall_count: c_uint64
total_time: c_uint64
max_latency: c_uint64
@bpf
@map
def stats() -> HashMap:
return HashMap(
key=c_uint32,
value=ProcessStats,
max_entries=1024
)
@bpf
@section("tracepoint/syscalls/sys_enter_read")
def track_syscalls(ctx: c_void_p) -> c_int64:
process_id = pid()
# Lookup existing stats
s = stats.lookup(process_id)
if s:
# Update existing stats
s.syscall_count = s.syscall_count + 1
stats.update(process_id, s)
else:
# Create new stats
new_stats = ProcessStats()
new_stats.syscall_count = 1
new_stats.total_time = 0
new_stats.max_latency = 0
stats.update(process_id, new_stats)
return 0
```
### With Perf Events
Send struct data to userspace using PerfEventArray:
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import PerfEventArray
from pythonbpf.helper import pid, ktime, comm
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
@bpf
@struct
class ProcessEvent:
timestamp: c_uint64
pid: c_uint32
ppid: c_uint32
comm: str(16)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
@bpf
@section("tracepoint/sched/sched_process_fork")
def trace_fork(ctx: c_void_p) -> c_int64:
event = ProcessEvent()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm) # Fills event.comm with process name
# Send to userspace
events.output(event)
return 0
```
### With Ring Buffers
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import RingBuffer
@bpf
@struct
class FileEvent:
timestamp: c_uint64
pid: c_uint32
filename: str(256)
@bpf
@map
def events() -> RingBuffer:
return RingBuffer(max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_openat")
def trace_open(ctx: c_void_p) -> c_int64:
event = FileEvent()
event.timestamp = ktime()
event.pid = pid()
events.output(event)
return 0
```
## Field Access and Modification
### Reading Fields
Access struct fields using dot notation:
```python
event = Event()
ts = event.timestamp
process_id = event.pid
```
### Writing Fields
Assign values to fields:
```python
event = Event()
event.timestamp = ktime()
event.pid = pid()
comm(event.comm)
```
## StructType Class
PythonBPF provides a `StructType` class for working with struct metadata:
```python
from pythonbpf.structs import StructType
# Define a struct
@bpf
@struct
class MyStruct:
field1: c_uint64
field2: c_uint32
# Access struct information (from userspace)
# This is typically used internally by the compiler
```
## Complex Examples
### Network Packet Event
```python
from pythonbpf import bpf, struct, map, section
from pythonbpf.maps import RingBuffer
from pythonbpf.helper import ktime, XDP_PASS
from ctypes import c_void_p, c_int64, c_uint8, c_uint16, c_uint32, c_uint64
@bpf
@struct
class PacketEvent:
timestamp: c_uint64
src_ip: c_uint32
dst_ip: c_uint32
src_port: c_uint16
dst_port: c_uint16
protocol: c_uint8
length: c_uint16
@bpf
@map
def packets() -> RingBuffer:
return RingBuffer(max_entries=8192)
@bpf
@section("xdp")
def capture_packets(ctx: c_void_p) -> c_int64:
pkt = PacketEvent()
pkt.timestamp = ktime()
# Parse packet data from ctx...
packets.output(pkt)
return XDP_PASS
```
### Process Lifecycle Tracking
```python
@bpf
@struct
class ProcessLifecycle:
pid: c_uint32
ppid: c_uint32
start_time: c_uint64
exit_time: c_uint64
exit_code: c_int32
comm: str(16)
@bpf
@map
def process_info() -> HashMap:
return HashMap(
key=c_uint32,
value=ProcessLifecycle,
max_entries=4096
)
@bpf
@section("tracepoint/sched/sched_process_fork")
def track_fork(ctx: c_void_p) -> c_int64:
process_id = pid()
info = ProcessLifecycle()
info.pid = process_id
info.start_time = ktime()
process_info.update(process_id, info)
return 0
@bpf
@section("tracepoint/sched/sched_process_exit")
def track_exit(ctx: c_void_p) -> c_int64:
process_id = pid()
info = process_info.lookup(process_id)
if info:
info.exit_time = ktime()
process_info.update(process_id, info)
return 0
```
## Troubleshooting
### Struct Size Issues
If you encounter size-related errors:
* Check for excessive padding
* Verify field types are correct
* Consider reordering fields
### Initialization Problems
If fields aren't initialized correctly:
* Always initialize all fields explicitly
* Set default values where appropriate
* Use helper functions for dynamic values
### Type Mismatch Errors
If you get type errors:
* Ensure field types match assignments
* Check that imported types are from `ctypes`
* Verify nested struct definitions
## Reading Struct Data in Userspace
After capturing struct data, read it in Python:
```python
from pylibbpf import BpfMap
# Read from map
map_obj = BpfMap(b, stats)
for key, value_bytes in map_obj.items():
value = Event.from_buffer_copy(value_bytes)
print(f"PID: {value.pid}, Comm: {value.comm.decode()}")
```
## Next Steps
* Learn about {doc}`maps` for storing struct data
* Explore {doc}`helpers` for populating struct fields
* See {doc}`compilation` to understand how structs are compiled

View File

@ -0,0 +1,22 @@
"""
Process Anomaly Detection - Constants and Utilities
"""
import logging
logger = logging.getLogger(__name__)
MAX_SYSCALLS = 548
def comm_for_pid(pid: int) -> bytes | None:
"""Get process name from /proc."""
try:
with open(f"/proc/{pid}/comm", "rb") as f:
return f.read().strip()
except FileNotFoundError:
logger.warning(f"Process with PID {pid} not found.")
except PermissionError:
logger.warning(f"Permission denied when accessing /proc/{pid}/comm.")
except Exception as e:
logger.warning(f"Error reading /proc/{pid}/comm: {e}")
return None

View File

@ -0,0 +1,173 @@
"""
Autoencoder for Process Behavior Anomaly Detection
Uses Keras/TensorFlow to train an autoencoder on syscall patterns.
Anomalies are detected when reconstruction error exceeds threshold.
"""
import logging
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from lib import MAX_SYSCALLS
logger = logging.getLogger(__name__)
def create_autoencoder(n_inputs: int = MAX_SYSCALLS) -> keras.Model:
"""
Create the autoencoder architecture.
Architecture: input → encoder → bottleneck → decoder → output
"""
inp = keras.Input(shape=(n_inputs,))
# Encoder
encoder = keras.layers.Dense(n_inputs)(inp)
encoder = keras.layers.ReLU()(encoder)
# Bottleneck (compressed representation)
bottleneck = keras.layers.Dense(n_inputs // 2)(encoder)
# Decoder
decoder = keras.layers.Dense(n_inputs)(bottleneck)
decoder = keras.layers.ReLU()(decoder)
output = keras.layers.Dense(n_inputs, activation="linear")(decoder)
model = keras.Model(inp, output)
model.compile(optimizer="adam", loss="mse")
return model
class AutoEncoder:
"""
Autoencoder for syscall pattern anomaly detection.
Usage:
# Training
ae = AutoEncoder('model.keras')
model, threshold = ae.train('data.csv', epochs=200)
# Inference
ae = AutoEncoder('model.keras', load=True)
_, errors, total_error = ae.predict([features])
"""
def __init__(self, filename: str, load: bool = False):
self.filename = filename
self.model = None
if load:
self._load_model()
def _load_model(self) -> None:
"""Load a trained model from disk."""
if not os.path.exists(self.filename):
raise FileNotFoundError(f"Model file not found: {self.filename}")
logger.info(f"Loading model from {self.filename}")
self.model = keras.models.load_model(self.filename)
def train(
self,
datafile: str,
epochs: int,
batch_size: int,
test_size: float = 0.1,
) -> tuple[keras.Model, float]:
"""
Train the autoencoder on collected data.
Args:
datafile: Path to CSV file with training data
epochs: Number of training epochs
batch_size: Training batch size
test_size: Fraction of data to use for validation
Returns:
Tuple of (trained model, error threshold)
"""
if not os.path.exists(datafile):
raise FileNotFoundError(f"Data file not found: {datafile}")
logger.info(f"Loading training data from {datafile}")
# Load and prepare data
df = pd.read_csv(datafile)
features = df.drop(["sample_time"], axis=1).values
logger.info(f"Loaded {len(features)} samples with {features.shape[1]} features")
# Split train/test
train_data, test_data = train_test_split(
features,
test_size=test_size,
random_state=42,
)
logger.info(f"Training set: {len(train_data)} samples")
logger.info(f"Test set: {len(test_data)} samples")
# Create and train model
self.model = create_autoencoder()
if self.model is None:
raise RuntimeError("Failed to create the autoencoder model.")
logger.info("Training autoencoder...")
self.model.fit(
train_data,
train_data,
validation_data=(test_data, test_data),
epochs=epochs,
batch_size=batch_size,
verbose=1,
)
# Save model (use .keras format for Keras 3.x compatibility)
self.model.save(self.filename)
logger.info(f"Model saved to {self.filename}")
# Calculate error threshold from test data
threshold = self._calculate_threshold(test_data)
return self.model, threshold
def _calculate_threshold(self, test_data: np.ndarray) -> float:
"""Calculate error threshold from test data."""
logger.info(f"Calculating error threshold from {len(test_data)} test samples")
if self.model is None:
raise RuntimeError("Model not loaded. Use load=True or train first.")
predictions = self.model.predict(test_data, verbose=0)
errors = np.abs(test_data - predictions).sum(axis=1)
return float(errors.max())
def predict(self, X: list | np.ndarray) -> tuple[np.ndarray, np.ndarray, float]:
"""
Run prediction and return reconstruction error.
Args:
X: Input data (list of feature vectors)
Returns:
Tuple of (reconstructed, per_feature_errors, total_error)
"""
if self.model is None:
raise RuntimeError("Model not loaded. Use load=True or train first.")
X = np.asarray(X, dtype=np.float32)
y = self.model.predict(X, verbose=0)
# Per-feature reconstruction error
errors = np.abs(X[0] - y[0])
total_error = float(errors.sum())
return y, errors, total_error

View File

@ -0,0 +1,448 @@
# Copyright 2017 Sasha Goldshtein
# Copyright 2018 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
syscall.py contains functions useful for mapping between syscall names and numbers
"""
# Syscall table for Linux x86_64, not very recent. Automatically generated from
# https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/entry/syscalls/syscall_64.tbl?h=linux-6.17.y
# using the following command:
#
# cat arch/x86/entry/syscalls/syscall_64.tbl \
# | awk 'BEGIN { print "syscalls = {" }
# /^[0-9]/ { print " "$1": b\""$3"\"," }
# END { print "}" }'
SYSCALLS = {
0: b"read",
1: b"write",
2: b"open",
3: b"close",
4: b"stat",
5: b"fstat",
6: b"lstat",
7: b"poll",
8: b"lseek",
9: b"mmap",
10: b"mprotect",
11: b"munmap",
12: b"brk",
13: b"rt_sigaction",
14: b"rt_sigprocmask",
15: b"rt_sigreturn",
16: b"ioctl",
17: b"pread64",
18: b"pwrite64",
19: b"readv",
20: b"writev",
21: b"access",
22: b"pipe",
23: b"select",
24: b"sched_yield",
25: b"mremap",
26: b"msync",
27: b"mincore",
28: b"madvise",
29: b"shmget",
30: b"shmat",
31: b"shmctl",
32: b"dup",
33: b"dup2",
34: b"pause",
35: b"nanosleep",
36: b"getitimer",
37: b"alarm",
38: b"setitimer",
39: b"getpid",
40: b"sendfile",
41: b"socket",
42: b"connect",
43: b"accept",
44: b"sendto",
45: b"recvfrom",
46: b"sendmsg",
47: b"recvmsg",
48: b"shutdown",
49: b"bind",
50: b"listen",
51: b"getsockname",
52: b"getpeername",
53: b"socketpair",
54: b"setsockopt",
55: b"getsockopt",
56: b"clone",
57: b"fork",
58: b"vfork",
59: b"execve",
60: b"exit",
61: b"wait4",
62: b"kill",
63: b"uname",
64: b"semget",
65: b"semop",
66: b"semctl",
67: b"shmdt",
68: b"msgget",
69: b"msgsnd",
70: b"msgrcv",
71: b"msgctl",
72: b"fcntl",
73: b"flock",
74: b"fsync",
75: b"fdatasync",
76: b"truncate",
77: b"ftruncate",
78: b"getdents",
79: b"getcwd",
80: b"chdir",
81: b"fchdir",
82: b"rename",
83: b"mkdir",
84: b"rmdir",
85: b"creat",
86: b"link",
87: b"unlink",
88: b"symlink",
89: b"readlink",
90: b"chmod",
91: b"fchmod",
92: b"chown",
93: b"fchown",
94: b"lchown",
95: b"umask",
96: b"gettimeofday",
97: b"getrlimit",
98: b"getrusage",
99: b"sysinfo",
100: b"times",
101: b"ptrace",
102: b"getuid",
103: b"syslog",
104: b"getgid",
105: b"setuid",
106: b"setgid",
107: b"geteuid",
108: b"getegid",
109: b"setpgid",
110: b"getppid",
111: b"getpgrp",
112: b"setsid",
113: b"setreuid",
114: b"setregid",
115: b"getgroups",
116: b"setgroups",
117: b"setresuid",
118: b"getresuid",
119: b"setresgid",
120: b"getresgid",
121: b"getpgid",
122: b"setfsuid",
123: b"setfsgid",
124: b"getsid",
125: b"capget",
126: b"capset",
127: b"rt_sigpending",
128: b"rt_sigtimedwait",
129: b"rt_sigqueueinfo",
130: b"rt_sigsuspend",
131: b"sigaltstack",
132: b"utime",
133: b"mknod",
134: b"uselib",
135: b"personality",
136: b"ustat",
137: b"statfs",
138: b"fstatfs",
139: b"sysfs",
140: b"getpriority",
141: b"setpriority",
142: b"sched_setparam",
143: b"sched_getparam",
144: b"sched_setscheduler",
145: b"sched_getscheduler",
146: b"sched_get_priority_max",
147: b"sched_get_priority_min",
148: b"sched_rr_get_interval",
149: b"mlock",
150: b"munlock",
151: b"mlockall",
152: b"munlockall",
153: b"vhangup",
154: b"modify_ldt",
155: b"pivot_root",
156: b"_sysctl",
157: b"prctl",
158: b"arch_prctl",
159: b"adjtimex",
160: b"setrlimit",
161: b"chroot",
162: b"sync",
163: b"acct",
164: b"settimeofday",
165: b"mount",
166: b"umount2",
167: b"swapon",
168: b"swapoff",
169: b"reboot",
170: b"sethostname",
171: b"setdomainname",
172: b"iopl",
173: b"ioperm",
174: b"create_module",
175: b"init_module",
176: b"delete_module",
177: b"get_kernel_syms",
178: b"query_module",
179: b"quotactl",
180: b"nfsservctl",
181: b"getpmsg",
182: b"putpmsg",
183: b"afs_syscall",
184: b"tuxcall",
185: b"security",
186: b"gettid",
187: b"readahead",
188: b"setxattr",
189: b"lsetxattr",
190: b"fsetxattr",
191: b"getxattr",
192: b"lgetxattr",
193: b"fgetxattr",
194: b"listxattr",
195: b"llistxattr",
196: b"flistxattr",
197: b"removexattr",
198: b"lremovexattr",
199: b"fremovexattr",
200: b"tkill",
201: b"time",
202: b"futex",
203: b"sched_setaffinity",
204: b"sched_getaffinity",
205: b"set_thread_area",
206: b"io_setup",
207: b"io_destroy",
208: b"io_getevents",
209: b"io_submit",
210: b"io_cancel",
211: b"get_thread_area",
212: b"lookup_dcookie",
213: b"epoll_create",
214: b"epoll_ctl_old",
215: b"epoll_wait_old",
216: b"remap_file_pages",
217: b"getdents64",
218: b"set_tid_address",
219: b"restart_syscall",
220: b"semtimedop",
221: b"fadvise64",
222: b"timer_create",
223: b"timer_settime",
224: b"timer_gettime",
225: b"timer_getoverrun",
226: b"timer_delete",
227: b"clock_settime",
228: b"clock_gettime",
229: b"clock_getres",
230: b"clock_nanosleep",
231: b"exit_group",
232: b"epoll_wait",
233: b"epoll_ctl",
234: b"tgkill",
235: b"utimes",
236: b"vserver",
237: b"mbind",
238: b"set_mempolicy",
239: b"get_mempolicy",
240: b"mq_open",
241: b"mq_unlink",
242: b"mq_timedsend",
243: b"mq_timedreceive",
244: b"mq_notify",
245: b"mq_getsetattr",
246: b"kexec_load",
247: b"waitid",
248: b"add_key",
249: b"request_key",
250: b"keyctl",
251: b"ioprio_set",
252: b"ioprio_get",
253: b"inotify_init",
254: b"inotify_add_watch",
255: b"inotify_rm_watch",
256: b"migrate_pages",
257: b"openat",
258: b"mkdirat",
259: b"mknodat",
260: b"fchownat",
261: b"futimesat",
262: b"newfstatat",
263: b"unlinkat",
264: b"renameat",
265: b"linkat",
266: b"symlinkat",
267: b"readlinkat",
268: b"fchmodat",
269: b"faccessat",
270: b"pselect6",
271: b"ppoll",
272: b"unshare",
273: b"set_robust_list",
274: b"get_robust_list",
275: b"splice",
276: b"tee",
277: b"sync_file_range",
278: b"vmsplice",
279: b"move_pages",
280: b"utimensat",
281: b"epoll_pwait",
282: b"signalfd",
283: b"timerfd_create",
284: b"eventfd",
285: b"fallocate",
286: b"timerfd_settime",
287: b"timerfd_gettime",
288: b"accept4",
289: b"signalfd4",
290: b"eventfd2",
291: b"epoll_create1",
292: b"dup3",
293: b"pipe2",
294: b"inotify_init1",
295: b"preadv",
296: b"pwritev",
297: b"rt_tgsigqueueinfo",
298: b"perf_event_open",
299: b"recvmmsg",
300: b"fanotify_init",
301: b"fanotify_mark",
302: b"prlimit64",
303: b"name_to_handle_at",
304: b"open_by_handle_at",
305: b"clock_adjtime",
306: b"syncfs",
307: b"sendmmsg",
308: b"setns",
309: b"getcpu",
310: b"process_vm_readv",
311: b"process_vm_writev",
312: b"kcmp",
313: b"finit_module",
314: b"sched_setattr",
315: b"sched_getattr",
316: b"renameat2",
317: b"seccomp",
318: b"getrandom",
319: b"memfd_create",
320: b"kexec_file_load",
321: b"bpf",
322: b"execveat",
323: b"userfaultfd",
324: b"membarrier",
325: b"mlock2",
326: b"copy_file_range",
327: b"preadv2",
328: b"pwritev2",
329: b"pkey_mprotect",
330: b"pkey_alloc",
331: b"pkey_free",
332: b"statx",
333: b"io_pgetevents",
334: b"rseq",
335: b"uretprobe",
424: b"pidfd_send_signal",
425: b"io_uring_setup",
426: b"io_uring_enter",
427: b"io_uring_register",
428: b"open_tree",
429: b"move_mount",
430: b"fsopen",
431: b"fsconfig",
432: b"fsmount",
433: b"fspick",
434: b"pidfd_open",
435: b"clone3",
436: b"close_range",
437: b"openat2",
438: b"pidfd_getfd",
439: b"faccessat2",
440: b"process_madvise",
441: b"epoll_pwait2",
442: b"mount_setattr",
443: b"quotactl_fd",
444: b"landlock_create_ruleset",
445: b"landlock_add_rule",
446: b"landlock_restrict_self",
447: b"memfd_secret",
448: b"process_mrelease",
449: b"futex_waitv",
450: b"set_mempolicy_home_node",
451: b"cachestat",
452: b"fchmodat2",
453: b"map_shadow_stack",
454: b"futex_wake",
455: b"futex_wait",
456: b"futex_requeue",
457: b"statmount",
458: b"listmount",
459: b"lsm_get_self_attr",
460: b"lsm_set_self_attr",
461: b"lsm_list_modules",
462: b"mseal",
463: b"setxattrat",
464: b"getxattrat",
465: b"listxattrat",
466: b"removexattrat",
467: b"open_tree_attr",
468: b"file_getattr",
469: b"file_setattr",
512: b"rt_sigaction",
513: b"rt_sigreturn",
514: b"ioctl",
515: b"readv",
516: b"writev",
517: b"recvfrom",
518: b"sendmsg",
519: b"recvmsg",
520: b"execve",
521: b"ptrace",
522: b"rt_sigpending",
523: b"rt_sigtimedwait",
524: b"rt_sigqueueinfo",
525: b"sigaltstack",
526: b"timer_create",
527: b"mq_notify",
528: b"kexec_load",
529: b"waitid",
530: b"set_robust_list",
531: b"get_robust_list",
532: b"vmsplice",
533: b"move_pages",
534: b"preadv",
535: b"pwritev",
536: b"rt_tgsigqueueinfo",
537: b"recvmmsg",
538: b"sendmmsg",
539: b"process_vm_readv",
540: b"process_vm_writev",
541: b"setsockopt",
542: b"getsockopt",
543: b"io_setup",
544: b"io_submit",
545: b"execveat",
546: b"preadv2",
547: b"pwritev2",
}

View File

@ -0,0 +1,117 @@
"""
PythonBPF eBPF Probe for Syscall Histogram Collection
"""
from vmlinux import struct_trace_event_raw_sys_enter
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.helper import pid
from pythonbpf.maps import HashMap
from ctypes import c_int64
from lib import MAX_SYSCALLS, comm_for_pid
@bpf
@map
def histogram() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1024)
@bpf
@map
def target_pid_map() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("tracepoint/raw_syscalls/sys_enter")
def trace_syscall(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
syscall_id = ctx.id
current_pid = pid()
target = target_pid_map.lookup(0)
if target:
if current_pid != target:
return 0 # type: ignore
if syscall_id < 0 or syscall_id >= 548:
return 0 # type: ignore
count = histogram.lookup(syscall_id)
if count:
histogram.update(syscall_id, count + 1)
else:
histogram.update(syscall_id, 1)
return 0 # type: ignore
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
ebpf_prog = BPF()
class Probe:
"""
Syscall histogram probe for a target process.
Usage:
probe = Probe(target_pid=1234)
probe.start()
histogram = probe.get_histogram()
"""
def __init__(self, target_pid: int, max_syscalls: int = MAX_SYSCALLS):
self.target_pid = target_pid
self.max_syscalls = max_syscalls
self.comm = comm_for_pid(target_pid)
if self.comm is None:
raise ValueError(f"Cannot find process with PID {target_pid}")
self._bpf = None
self._histogram_map = None
self._target_map = None
def start(self):
"""Compile, load, and attach the BPF probe."""
# Compile and load
self._bpf = ebpf_prog
self._bpf.load()
self._bpf.attach_all()
# Get map references
self._histogram_map = self._bpf["histogram"]
self._target_map = self._bpf["target_pid_map"]
# Set target PID in the map
self._target_map.update(0, self.target_pid)
return self
def get_histogram(self) -> list:
"""Read current histogram values as a list."""
if self._histogram_map is None:
raise RuntimeError("Probe not started. Call start() first.")
result = [0] * self.max_syscalls
for syscall_id in range(self.max_syscalls):
try:
count = self._histogram_map.lookup(syscall_id)
if count is not None:
result[syscall_id] = int(count)
except Exception:
pass
return result
def __getitem__(self, syscall_id: int) -> int:
"""Allow indexing: probe[syscall_id]"""
if self._histogram_map is None:
raise RuntimeError("Probe not started")
try:
count = self._histogram_map.lookup(syscall_id)
return int(count) if count is not None else 0
except Exception:
return 0

View File

@ -0,0 +1,335 @@
#!/usr/bin/env python3
"""
Process Behavior Anomaly Detection using PythonBPF and Autoencoders
Ported from evilsocket's BCC implementation to PythonBPF.
https://github.com/evilsocket/ebpf-process-anomaly-detection
Usage:
# 1.Learn normal behavior from a process
sudo python main.py --learn --pid 1234 --data normal.csv
# 2.Train the autoencoder (no sudo needed)
python main.py --train --data normal.csv --model model.h5
# 3.Monitor for anomalies
sudo python main.py --run --pid 1234 --model model.h5
"""
import argparse
import logging
import os
import sys
import time
from collections import Counter
from lib import MAX_SYSCALLS
from lib.ml import AutoEncoder
from lib.platform import SYSCALLS
from lib.probe import Probe
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
def learn(pid: int, data_path: str, poll_interval_ms: int) -> None:
"""
Capture syscall patterns from target process.
Args:
pid: Target process ID
data_path: Path to save CSV data
poll_interval_ms: Polling interval in milliseconds
"""
if os.path.exists(data_path):
logger.error(
f"{data_path} already exists.Delete it or use a different filename."
)
sys.exit(1)
try:
probe = Probe(pid)
except ValueError as e:
logger.error(str(e))
sys.exit(1)
probe_comm = probe.comm.decode() if probe.comm else "unknown"
print(f"📊 Learning from process {pid} ({probe_comm})")
print(f"📁 Saving data to {data_path}")
print(f"⏱️ Polling interval: {poll_interval_ms}ms")
print("Press Ctrl+C to stop...\n")
probe.start()
prev_histogram = [0.0] * MAX_SYSCALLS
prev_report_time = time.time()
sample_count = 0
poll_interval_sec = poll_interval_ms / 1000.0
header = "sample_time," + ",".join(f"sys_{i}" for i in range(MAX_SYSCALLS))
with open(data_path, "w") as fp:
fp.write(header + "\n")
try:
while True:
histogram = [float(x) for x in probe.get_histogram()]
if histogram != prev_histogram:
deltas = _compute_deltas(prev_histogram, histogram)
prev_histogram = histogram.copy()
row = f"{time.time()},{','.join(map(str, deltas))}"
fp.write(row + "\n")
fp.flush()
sample_count += 1
now = time.time()
if now - prev_report_time >= 1.0:
print(f" {sample_count} samples saved...")
prev_report_time = now
time.sleep(poll_interval_sec)
except KeyboardInterrupt:
print(f"\n✅ Stopped. Saved {sample_count} samples to {data_path}")
def train(data_path: str, model_path: str, epochs: int, batch_size: int) -> None:
"""
Train autoencoder on captured data.
Args:
data_path: Path to training CSV data
model_path: Path to save trained model
epochs: Number of training epochs
batch_size: Training batch size
"""
if not os.path.exists(data_path):
logger.error(f"Data file {data_path} not found.Run --learn first.")
sys.exit(1)
print(f"🧠 Training autoencoder on {data_path}")
print(f" Epochs: {epochs}")
print(f" Batch size: {batch_size}")
print()
ae = AutoEncoder(model_path)
_, threshold = ae.train(data_path, epochs, batch_size)
print()
print("=" * 50)
print("✅ Training complete!")
print(f" Model saved to: {model_path}")
print(f" Error threshold: {threshold:.6f}")
print()
print(f"💡 Use --max-error {threshold:.4f} when running detection")
print("=" * 50)
def run(pid: int, model_path: str, max_error: float, poll_interval_ms: int) -> None:
"""
Monitor process and detect anomalies.
Args:
pid: Target process ID
model_path: Path to trained model
max_error: Anomaly detection threshold
poll_interval_ms: Polling interval in milliseconds
"""
if not os.path.exists(model_path):
logger.error(f"Model file {model_path} not found. Run --train first.")
sys.exit(1)
try:
probe = Probe(pid)
except ValueError as e:
logger.error(str(e))
sys.exit(1)
ae = AutoEncoder(model_path, load=True)
probe_comm = probe.comm.decode() if probe.comm else "unknown"
print(f"🔍 Monitoring process {pid} ({probe_comm}) for anomalies")
print(f" Error threshold: {max_error}")
print(f" Polling interval: {poll_interval_ms}ms")
print("Press Ctrl+C to stop...\n")
probe.start()
prev_histogram = [0.0] * MAX_SYSCALLS
anomaly_count = 0
check_count = 0
poll_interval_sec = poll_interval_ms / 1000.0
try:
while True:
histogram = [float(x) for x in probe.get_histogram()]
if histogram != prev_histogram:
deltas = _compute_deltas(prev_histogram, histogram)
prev_histogram = histogram.copy()
check_count += 1
_, feat_errors, total_error = ae.predict([deltas])
if total_error > max_error:
anomaly_count += 1
_report_anomaly(anomaly_count, total_error, max_error, feat_errors)
time.sleep(poll_interval_sec)
except KeyboardInterrupt:
print("\n✅ Stopped.")
print(f" Checks performed: {check_count}")
print(f" Anomalies detected: {anomaly_count}")
def _compute_deltas(prev: list[float], current: list[float]) -> list[float]:
"""Compute rate of change between two histograms."""
deltas = []
for p, c in zip(prev, current):
if c != 0.0:
delta = 1.0 - (p / c)
else:
delta = 0.0
deltas.append(delta)
return deltas
def _report_anomaly(
count: int,
total_error: float,
threshold: float,
feat_errors: list[float],
) -> None:
"""Print anomaly report with top offending syscalls."""
print(f"🚨 ANOMALY #{count} detected!")
print(f" Total error: {total_error:.4f} (threshold: {threshold})")
errors_by_syscall = {idx: err for idx, err in enumerate(feat_errors)}
top3 = Counter(errors_by_syscall).most_common(3)
print(" Top anomalous syscalls:")
for idx, err in top3:
name = SYSCALLS.get(idx, f"syscall_{idx}")
print(f"{name!r}: {err:.4f}")
print()
def parse_args() -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Process anomaly detection with PythonBPF and Autoencoders",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Learn from a process (e.g., Firefox) for a few minutes
sudo python main.py --learn --pid $(pgrep -o firefox) --data firefox.csv
# Train the model (no sudo needed)
python main.py --train --data firefox.csv --model firefox.h5
# Monitor the same process for anomalies
sudo python main.py --run --pid $(pgrep -o firefox) --model firefox.h5
# Full workflow for nginx:
sudo python main.py --learn --pid $(pgrep -o nginx) --data nginx_normal.csv
python main.py --train --data nginx_normal.csv --model nginx.h5 --epochs 100
sudo python main.py --run --pid $(pgrep -o nginx) --model nginx.h5 --max-error 0.05
""",
)
actions = parser.add_mutually_exclusive_group()
actions.add_argument(
"--learn",
action="store_true",
help="Capture syscall patterns from a process",
)
actions.add_argument(
"--train",
action="store_true",
help="Train autoencoder on captured data",
)
actions.add_argument(
"--run",
action="store_true",
help="Monitor process for anomalies",
)
parser.add_argument(
"--pid",
type=int,
default=0,
help="Target process ID",
)
parser.add_argument(
"--data",
default="data.csv",
help="CSV file for training data (default: data.csv)",
)
parser.add_argument(
"--model",
default="model.keras",
help="Model file path (default: model.h5)",
)
parser.add_argument(
"--time",
type=int,
default=100,
help="Polling interval in milliseconds (default: 100)",
)
parser.add_argument(
"--epochs",
type=int,
default=200,
help="Training epochs (default: 200)",
)
parser.add_argument(
"--batch-size",
type=int,
default=16,
help="Training batch size (default: 16)",
)
parser.add_argument(
"--max-error",
type=float,
default=0.09,
help="Anomaly detection threshold (default: 0.09)",
)
return parser.parse_args()
def main() -> None:
"""Main entry point."""
args = parse_args()
if not any([args.learn, args.train, args.run]):
print("No action specified.Use --learn, --train, or --run.")
print("Run with --help for usage information.")
sys.exit(0)
if args.learn:
if args.pid == 0:
logger.error("--pid required for --learn")
sys.exit(1)
learn(args.pid, args.data, args.time)
elif args.train:
train(args.data, args.model, args.epochs, args.batch_size)
elif args.run:
if args.pid == 0:
logger.error("--pid required for --run")
sys.exit(1)
run(args.pid, args.model, args.max_error, args.time)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,49 @@
# Container Monitor TUI
A beautiful terminal-based container monitoring tool that combines syscall tracking, file I/O monitoring, and network traffic analysis using eBPF.
## Features
- 🎯 **Interactive Cgroup Selection** - Navigate and select cgroups with arrow keys
- 📊 **Real-time Monitoring** - Live graphs and statistics
- 🔥 **Syscall Tracking** - Total syscall count per cgroup
- 💾 **File I/O Monitoring** - Read/write operations and bytes with graphs
- 🌐 **Network Traffic** - RX/TX packets and bytes with live graphs
-**Efficient Caching** - Reduced /proc lookups for better performance
- 🎨 **Beautiful TUI** - Clean, colorful terminal interface
## Requirements
- Python 3.7+
- pythonbpf
- Root privileges (for eBPF)
## Installation
```bash
# Ensure you have pythonbpf installed
pip install pythonbpf
# Run the monitor
sudo $(which python) container_monitor.py
```
## Usage
1. **Selection Screen**: Use ↑↓ arrow keys to navigate through cgroups, press ENTER to select
2. **Monitoring Screen**: View real-time graphs and statistics, press ESC or 'b' to go back
3. **Exit**: Press 'q' at any time to quit
## Architecture
- `container_monitor.py` - Main BPF program combining all three tracers
- `data_collector.py` - Data collection, caching, and history management
- `tui. py` - Terminal user interface with selection and monitoring screens
## BPF Programs
- **vfs_read/vfs_write** - Track file I/O operations
- **__netif_receive_skb/__dev_queue_xmit** - Track network traffic
- **raw_syscalls/sys_enter** - Count all syscalls
All programs filter by cgroup ID for per-container monitoring.

View File

@ -0,0 +1,220 @@
"""Container Monitor - TUI-based cgroup monitoring combining syscall, file I/O, and network tracking."""
from pythonbpf import bpf, map, section, bpfglobal, struct, BPF
from pythonbpf.maps import HashMap
from pythonbpf.helper import get_current_cgroup_id
from ctypes import c_int32, c_uint64, c_void_p
from vmlinux import struct_pt_regs, struct_sk_buff
from data_collection import ContainerDataCollector
from tui import ContainerMonitorTUI
# ==================== BPF Structs ====================
@bpf
@struct
class read_stats:
bytes: c_uint64
ops: c_uint64
@bpf
@struct
class write_stats:
bytes: c_uint64
ops: c_uint64
@bpf
@struct
class net_stats:
rx_packets: c_uint64
tx_packets: c_uint64
rx_bytes: c_uint64
tx_bytes: c_uint64
# ==================== BPF Maps ====================
@bpf
@map
def read_map() -> HashMap:
return HashMap(key=c_uint64, value=read_stats, max_entries=1024)
@bpf
@map
def write_map() -> HashMap:
return HashMap(key=c_uint64, value=write_stats, max_entries=1024)
@bpf
@map
def net_stats_map() -> HashMap:
return HashMap(key=c_uint64, value=net_stats, max_entries=1024)
@bpf
@map
def syscall_count() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=1024)
# ==================== File I/O Tracing ====================
@bpf
@section("kprobe/vfs_read")
def trace_read(ctx: struct_pt_regs) -> c_int32:
cg = get_current_cgroup_id()
count = c_uint64(ctx.dx)
ptr = read_map.lookup(cg)
if ptr:
s = read_stats()
s.bytes = ptr.bytes + count
s.ops = ptr.ops + 1
read_map.update(cg, s)
else:
s = read_stats()
s.bytes = count
s.ops = c_uint64(1)
read_map.update(cg, s)
return c_int32(0)
@bpf
@section("kprobe/vfs_write")
def trace_write(ctx1: struct_pt_regs) -> c_int32:
cg = get_current_cgroup_id()
count = c_uint64(ctx1.dx)
ptr = write_map.lookup(cg)
if ptr:
s = write_stats()
s.bytes = ptr.bytes + count
s.ops = ptr.ops + 1
write_map.update(cg, s)
else:
s = write_stats()
s.bytes = count
s.ops = c_uint64(1)
write_map.update(cg, s)
return c_int32(0)
# ==================== Network I/O Tracing ====================
@bpf
@section("kprobe/__netif_receive_skb")
def trace_netif_rx(ctx2: struct_pt_regs) -> c_int32:
cgroup_id = get_current_cgroup_id()
skb = struct_sk_buff(ctx2.di)
pkt_len = c_uint64(skb.len)
stats_ptr = net_stats_map.lookup(cgroup_id)
if stats_ptr:
stats = net_stats()
stats.rx_packets = stats_ptr.rx_packets + 1
stats.tx_packets = stats_ptr.tx_packets
stats.rx_bytes = stats_ptr.rx_bytes + pkt_len
stats.tx_bytes = stats_ptr.tx_bytes
net_stats_map.update(cgroup_id, stats)
else:
stats = net_stats()
stats.rx_packets = c_uint64(1)
stats.tx_packets = c_uint64(0)
stats.rx_bytes = pkt_len
stats.tx_bytes = c_uint64(0)
net_stats_map.update(cgroup_id, stats)
return c_int32(0)
@bpf
@section("kprobe/__dev_queue_xmit")
def trace_dev_xmit(ctx3: struct_pt_regs) -> c_int32:
cgroup_id = get_current_cgroup_id()
skb = struct_sk_buff(ctx3.di)
pkt_len = c_uint64(skb.len)
stats_ptr = net_stats_map.lookup(cgroup_id)
if stats_ptr:
stats = net_stats()
stats.rx_packets = stats_ptr.rx_packets
stats.tx_packets = stats_ptr.tx_packets + 1
stats.rx_bytes = stats_ptr.rx_bytes
stats.tx_bytes = stats_ptr.tx_bytes + pkt_len
net_stats_map.update(cgroup_id, stats)
else:
stats = net_stats()
stats.rx_packets = c_uint64(0)
stats.tx_packets = c_uint64(1)
stats.rx_bytes = c_uint64(0)
stats.tx_bytes = pkt_len
net_stats_map.update(cgroup_id, stats)
return c_int32(0)
# ==================== Syscall Tracing ====================
@bpf
@section("tracepoint/raw_syscalls/sys_enter")
def count_syscalls(ctx: c_void_p) -> c_int32:
cgroup_id = get_current_cgroup_id()
count_ptr = syscall_count.lookup(cgroup_id)
if count_ptr:
new_count = count_ptr + c_uint64(1)
syscall_count.update(cgroup_id, new_count)
else:
syscall_count.update(cgroup_id, c_uint64(1))
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# ==================== Main ====================
if __name__ == "__main__":
print("🔥 Loading BPF programs...")
# Load and attach BPF program
b = BPF()
b.load()
b.attach_all()
# Get map references and enable struct deserialization
read_map_ref = b["read_map"]
write_map_ref = b["write_map"]
net_stats_map_ref = b["net_stats_map"]
syscall_count_ref = b["syscall_count"]
read_map_ref.set_value_struct("read_stats")
write_map_ref.set_value_struct("write_stats")
net_stats_map_ref.set_value_struct("net_stats")
print("✅ BPF programs loaded and attached")
# Setup data collector
collector = ContainerDataCollector(
read_map_ref, write_map_ref, net_stats_map_ref, syscall_count_ref
)
# Create and run TUI
tui = ContainerMonitorTUI(collector)
tui.run()

View File

@ -0,0 +1,208 @@
"""Data collection and management for container monitoring."""
import os
import time
from pathlib import Path
from typing import Dict, List, Set, Optional
from dataclasses import dataclass
from collections import deque, defaultdict
@dataclass
class CgroupInfo:
"""Information about a cgroup."""
id: int
name: str
path: str
@dataclass
class ContainerStats:
"""Statistics for a container/cgroup."""
cgroup_id: int
cgroup_name: str
# File I/O
read_ops: int = 0
read_bytes: int = 0
write_ops: int = 0
write_bytes: int = 0
# Network I/O
rx_packets: int = 0
rx_bytes: int = 0
tx_packets: int = 0
tx_bytes: int = 0
# Syscalls
syscall_count: int = 0
# Timestamp
timestamp: float = 0.0
class ContainerDataCollector:
"""Collects and manages container monitoring data from BPF."""
def __init__(
self, read_map, write_map, net_stats_map, syscall_map, history_size: int = 100
):
self.read_map = read_map
self.write_map = write_map
self.net_stats_map = net_stats_map
self.syscall_map = syscall_map
# Caching
self._cgroup_cache: Dict[int, CgroupInfo] = {}
self._cgroup_cache_time = 0
self._cache_ttl = 5.0
0 # Refresh cache every 5 seconds
# Historical data for graphing
self._history_size = history_size
self._history: Dict[int, deque] = defaultdict(
lambda: deque(maxlen=history_size)
)
def get_all_cgroups(self) -> List[CgroupInfo]:
"""Get all cgroups with caching."""
current_time = time.time()
# Use cached data if still valid
if current_time - self._cgroup_cache_time < self._cache_ttl:
return list(self._cgroup_cache.values())
# Refresh cache
self._refresh_cgroup_cache()
return list(self._cgroup_cache.values())
def _refresh_cgroup_cache(self):
"""Refresh the cgroup cache from /proc."""
cgroup_map: Dict[int, Set[str]] = defaultdict(set)
# Scan /proc to find all cgroups
for proc_dir in Path("/proc").glob("[0-9]*"):
try:
cgroup_file = proc_dir / "cgroup"
if not cgroup_file.exists():
continue
with open(cgroup_file) as f:
for line in f:
parts = line.strip().split(":")
if len(parts) >= 3:
cgroup_path = parts[2]
cgroup_mount = f"/sys/fs/cgroup{cgroup_path}"
if os.path.exists(cgroup_mount):
stat_info = os.stat(cgroup_mount)
cgroup_id = stat_info.st_ino
cgroup_map[cgroup_id].add(cgroup_path)
except (PermissionError, FileNotFoundError, OSError):
continue
# Update cache with best names
new_cache = {}
for cgroup_id, paths in cgroup_map.items():
# Pick the most descriptive path
best_path = self._get_best_cgroup_path(paths)
name = self._get_cgroup_name(best_path)
new_cache[cgroup_id] = CgroupInfo(id=cgroup_id, name=name, path=best_path)
self._cgroup_cache = new_cache
self._cgroup_cache_time = time.time()
def _get_best_cgroup_path(self, paths: Set[str]) -> str:
"""Select the most descriptive cgroup path."""
path_list = list(paths)
# Prefer paths with more components (more specific)
# Prefer paths containing docker, podman, etc.
for keyword in ["docker", "podman", "kubernetes", "k8s", "systemd"]:
for path in path_list:
if keyword in path.lower():
return path
# Return longest path (most specific)
return max(path_list, key=lambda p: (len(p.split("/")), len(p)))
def _get_cgroup_name(self, path: str) -> str:
"""Extract a friendly name from cgroup path."""
if not path or path == "/":
return "root"
# Remove leading/trailing slashes
path = path.strip("/")
# Try to extract container ID or service name
parts = path.split("/")
# For Docker: /docker/<container_id>
if "docker" in path.lower():
for i, part in enumerate(parts):
if part.lower() == "docker" and i + 1 < len(parts):
container_id = parts[i + 1][:12] # Short ID
return f"docker:{container_id}"
# For systemd services
if "system.slice" in path:
for part in parts:
if part.endswith(".service"):
return part.replace(".service", "")
# For user slices
if "user.slice" in path:
return f"user:{parts[-1]}" if parts else "user"
# Default: use last component
return parts[-1] if parts else path
def get_stats_for_cgroup(self, cgroup_id: int) -> ContainerStats:
"""Get current statistics for a specific cgroup."""
cgroup_info = self._cgroup_cache.get(cgroup_id)
cgroup_name = cgroup_info.name if cgroup_info else f"cgroup-{cgroup_id}"
stats = ContainerStats(
cgroup_id=cgroup_id, cgroup_name=cgroup_name, timestamp=time.time()
)
# Get file I/O stats
read_stat = self.read_map.lookup(cgroup_id)
if read_stat:
stats.read_ops = int(read_stat.ops)
stats.read_bytes = int(read_stat.bytes)
write_stat = self.write_map.lookup(cgroup_id)
if write_stat:
stats.write_ops = int(write_stat.ops)
stats.write_bytes = int(write_stat.bytes)
# Get network stats
net_stat = self.net_stats_map.lookup(cgroup_id)
if net_stat:
stats.rx_packets = int(net_stat.rx_packets)
stats.rx_bytes = int(net_stat.rx_bytes)
stats.tx_packets = int(net_stat.tx_packets)
stats.tx_bytes = int(net_stat.tx_bytes)
# Get syscall count
syscall_cnt = self.syscall_map.lookup(cgroup_id)
if syscall_cnt is not None:
stats.syscall_count = int(syscall_cnt)
# Add to history
self._history[cgroup_id].append(stats)
return stats
def get_history(self, cgroup_id: int) -> List[ContainerStats]:
"""Get historical statistics for graphing."""
return list(self._history[cgroup_id])
def get_cgroup_info(self, cgroup_id: int) -> Optional[CgroupInfo]:
"""Get cached cgroup information."""
return self._cgroup_cache.get(cgroup_id)

View File

@ -0,0 +1,752 @@
"""Terminal User Interface for container monitoring."""
import time
import curses
import threading
from typing import Optional, List
from data_collection import ContainerDataCollector
from web_dashboard import WebDashboard
def _safe_addstr(stdscr, y: int, x: int, text: str, *args):
"""Safely add string to screen with bounds checking."""
try:
height, width = stdscr.getmaxyx()
if 0 <= y < height and 0 <= x < width:
# Truncate text to fit
max_len = width - x - 1
if max_len > 0:
stdscr.addstr(y, x, text[:max_len], *args)
except curses.error:
pass
def _draw_fancy_header(stdscr, title: str, subtitle: str):
"""Draw a fancy header with title and subtitle."""
height, width = stdscr.getmaxyx()
# Top border
_safe_addstr(stdscr, 0, 0, "" * width, curses.color_pair(6) | curses.A_BOLD)
# Title
_safe_addstr(
stdscr,
0,
max(0, (width - len(title)) // 2),
f" {title} ",
curses.color_pair(6) | curses.A_BOLD,
)
# Subtitle
_safe_addstr(
stdscr,
1,
max(0, (width - len(subtitle)) // 2),
subtitle,
curses.color_pair(1),
)
# Bottom border
_safe_addstr(stdscr, 2, 0, "" * width, curses.color_pair(6))
def _draw_metric_box(
stdscr,
y: int,
x: int,
width: int,
label: str,
value: str,
detail: str,
color_pair: int,
):
"""Draw a fancy box for displaying a metric."""
height, _ = stdscr.getmaxyx()
if y + 4 >= height:
return
# Top border
_safe_addstr(
stdscr, y, x, "" + "" * (width - 2) + "", color_pair | curses.A_BOLD
)
# Label
_safe_addstr(stdscr, y + 1, x, "", color_pair | curses.A_BOLD)
_safe_addstr(stdscr, y + 1, x + 2, label, color_pair | curses.A_BOLD)
_safe_addstr(stdscr, y + 1, x + width - 1, "", color_pair | curses.A_BOLD)
# Value
_safe_addstr(stdscr, y + 2, x, "", color_pair | curses.A_BOLD)
_safe_addstr(stdscr, y + 2, x + 4, value, curses.color_pair(2) | curses.A_BOLD)
_safe_addstr(
stdscr,
y + 2,
min(x + width - len(detail) - 3, x + width - 2),
detail,
color_pair | curses.A_BOLD,
)
_safe_addstr(stdscr, y + 2, x + width - 1, "", color_pair | curses.A_BOLD)
# Bottom border
_safe_addstr(
stdscr, y + 3, x, "" + "" * (width - 2) + "", color_pair | curses.A_BOLD
)
def _draw_section_header(stdscr, y: int, title: str, color_pair: int):
"""Draw a section header."""
height, width = stdscr.getmaxyx()
if y >= height:
return
_safe_addstr(stdscr, y, 2, title, curses.color_pair(color_pair) | curses.A_BOLD)
_safe_addstr(
stdscr,
y,
len(title) + 3,
"" * (width - len(title) - 5),
curses.color_pair(color_pair) | curses.A_BOLD,
)
def _calculate_rates(history: List) -> dict:
"""Calculate per-second rates from history."""
if len(history) < 2:
return {
"syscalls_per_sec": 0.0,
"rx_bytes_per_sec": 0.0,
"tx_bytes_per_sec": 0.0,
"rx_pkts_per_sec": 0.0,
"tx_pkts_per_sec": 0.0,
"read_bytes_per_sec": 0.0,
"write_bytes_per_sec": 0.0,
"read_ops_per_sec": 0.0,
"write_ops_per_sec": 0.0,
}
# Calculate delta between last two samples
recent = history[-1]
previous = history[-2]
time_delta = recent.timestamp - previous.timestamp
if time_delta <= 0:
time_delta = 1.0
return {
"syscalls_per_sec": (recent.syscall_count - previous.syscall_count)
/ time_delta,
"rx_bytes_per_sec": (recent.rx_bytes - previous.rx_bytes) / time_delta,
"tx_bytes_per_sec": (recent.tx_bytes - previous.tx_bytes) / time_delta,
"rx_pkts_per_sec": (recent.rx_packets - previous.rx_packets) / time_delta,
"tx_pkts_per_sec": (recent.tx_packets - previous.tx_packets) / time_delta,
"read_bytes_per_sec": (recent.read_bytes - previous.read_bytes) / time_delta,
"write_bytes_per_sec": (recent.write_bytes - previous.write_bytes) / time_delta,
"read_ops_per_sec": (recent.read_ops - previous.read_ops) / time_delta,
"write_ops_per_sec": (recent.write_ops - previous.write_ops) / time_delta,
}
def _format_bytes(bytes_val: float) -> str:
"""Format bytes into human-readable string."""
if bytes_val < 0:
bytes_val = 0
for unit in ["B", "KB", "MB", "GB", "TB"]:
if bytes_val < 1024.0:
return f"{bytes_val:.1f}{unit}"
bytes_val /= 1024.0
return f"{bytes_val:.1f}PB"
def _draw_bar_graph_enhanced(
stdscr,
y: int,
x: int,
width: int,
height: int,
data: List[float],
color_pair: int,
):
"""Draw an enhanced bar graph with axis and scale."""
screen_height, screen_width = stdscr.getmaxyx()
if not data or width < 2 or y + height >= screen_height:
return
# Calculate statistics
max_val = max(data) if max(data) > 0 else 1
min_val = min(data)
avg_val = sum(data) / len(data)
# Take last 'width - 12' data points (leave room for Y-axis)
graph_width = max(1, width - 12)
recent_data = data[-graph_width:] if len(data) > graph_width else data
# Draw Y-axis labels (with bounds checking)
if y < screen_height:
_safe_addstr(
stdscr, y, x, f"{_format_bytes(max_val):>9}", curses.color_pair(7)
)
if y + height // 2 < screen_height:
_safe_addstr(
stdscr,
y + height // 2,
x,
f"{_format_bytes(avg_val):>9}",
curses.color_pair(7),
)
if y + height - 1 < screen_height:
_safe_addstr(
stdscr,
y + height - 1,
x,
f"{_format_bytes(min_val):>9}",
curses.color_pair(7),
)
# Draw bars
for row in range(height):
if y + row >= screen_height:
break
threshold = (height - row) / height
bar_line = ""
for val in recent_data:
normalized = val / max_val if max_val > 0 else 0
if normalized >= threshold:
bar_line += ""
elif normalized >= threshold - 0.15:
bar_line += ""
elif normalized >= threshold - 0.35:
bar_line += ""
elif normalized >= threshold - 0.5:
bar_line += ""
else:
bar_line += " "
_safe_addstr(stdscr, y + row, x + 11, bar_line, color_pair)
# Draw X-axis
if y + height < screen_height:
_safe_addstr(
stdscr,
y + height,
x + 10,
"" + "" * len(recent_data),
curses.color_pair(7),
)
_safe_addstr(
stdscr,
y + height,
x + 10 + len(recent_data),
"→ time",
curses.color_pair(7),
)
def _draw_labeled_graph(
stdscr,
y: int,
x: int,
width: int,
height: int,
label: str,
rate: str,
detail: str,
data: List[float],
color_pair: int,
description: str,
):
"""Draw a graph with labels and legend."""
screen_height, screen_width = stdscr.getmaxyx()
if y >= screen_height or y + height + 2 >= screen_height:
return
# Header with metrics
_safe_addstr(stdscr, y, x, label, curses.color_pair(1) | curses.A_BOLD)
_safe_addstr(stdscr, y, x + len(label) + 2, rate, curses.color_pair(2))
_safe_addstr(
stdscr, y, x + len(label) + len(rate) + 4, detail, curses.color_pair(7)
)
# Draw the graph
if len(data) > 1:
_draw_bar_graph_enhanced(stdscr, y + 1, x, width, height, data, color_pair)
else:
_safe_addstr(stdscr, y + 2, x + 2, "Collecting data...", curses.color_pair(7))
# Graph legend
if y + height + 1 < screen_height:
_safe_addstr(
stdscr, y + height + 1, x, f"└─ {description}", curses.color_pair(7)
)
class ContainerMonitorTUI:
"""TUI for container monitoring with cgroup selection and live graphs."""
def __init__(self, collector: ContainerDataCollector):
self.collector = collector
self.selected_cgroup: Optional[int] = None
self.current_screen = "selection" # "selection" or "monitoring"
self.selected_index = 0
self.scroll_offset = 0
self.web_dashboard = None
self.web_thread = None
def run(self):
"""Run the TUI application."""
curses.wrapper(self._main_loop)
def _main_loop(self, stdscr):
"""Main curses loop."""
# Configure curses
curses.curs_set(0) # Hide cursor
stdscr.nodelay(True) # Non-blocking input
stdscr.timeout(100) # Refresh every 100ms
# Initialize colors
curses.start_color()
curses.init_pair(1, curses.COLOR_CYAN, curses.COLOR_BLACK)
curses.init_pair(2, curses.COLOR_GREEN, curses.COLOR_BLACK)
curses.init_pair(3, curses.COLOR_YELLOW, curses.COLOR_BLACK)
curses.init_pair(4, curses.COLOR_RED, curses.COLOR_BLACK)
curses.init_pair(5, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
curses.init_pair(6, curses.COLOR_WHITE, curses.COLOR_BLUE)
curses.init_pair(7, curses.COLOR_BLUE, curses.COLOR_BLACK)
curses.init_pair(8, curses.COLOR_WHITE, curses.COLOR_CYAN)
while True:
stdscr.clear()
try:
height, width = stdscr.getmaxyx()
# Check minimum terminal size
if height < 25 or width < 80:
msg = "Terminal too small! Minimum: 80x25"
stdscr.attron(curses.color_pair(4) | curses.A_BOLD)
stdscr.addstr(
height // 2, max(0, (width - len(msg)) // 2), msg[: width - 1]
)
stdscr.attroff(curses.color_pair(4) | curses.A_BOLD)
stdscr.refresh()
key = stdscr.getch()
if key == ord("q") or key == ord("Q"):
break
continue
if self.current_screen == "selection":
self._draw_selection_screen(stdscr)
elif self.current_screen == "monitoring":
self._draw_monitoring_screen(stdscr)
stdscr.refresh()
# Handle input
key = stdscr.getch()
if key != -1:
if not self._handle_input(key, stdscr):
break # Exit requested
except KeyboardInterrupt:
break
except curses.error:
# Curses error - likely terminal too small, just continue
pass
except Exception as e:
# Show error briefly
height, width = stdscr.getmaxyx()
error_msg = f"Error: {str(e)[: width - 10]}"
stdscr.addstr(0, 0, error_msg[: width - 1])
stdscr.refresh()
time.sleep(1)
def _draw_selection_screen(self, stdscr):
"""Draw the cgroup selection screen."""
height, width = stdscr.getmaxyx()
# Draw fancy header box
_draw_fancy_header(stdscr, "🐳 CONTAINER MONITOR", "Select a Cgroup to Monitor")
# Instructions
instructions = (
"↑↓: Navigate | ENTER: Select | w: Web Mode | q: Quit | r: Refresh"
)
_safe_addstr(
stdscr,
3,
max(0, (width - len(instructions)) // 2),
instructions,
curses.color_pair(3),
)
# Get cgroups
cgroups = self.collector.get_all_cgroups()
if not cgroups:
msg = "No cgroups found. Waiting for activity..."
_safe_addstr(
stdscr,
height // 2,
max(0, (width - len(msg)) // 2),
msg,
curses.color_pair(4),
)
return
# Sort cgroups by name
cgroups.sort(key=lambda c: c.name)
# Adjust selection bounds
if self.selected_index >= len(cgroups):
self.selected_index = len(cgroups) - 1
if self.selected_index < 0:
self.selected_index = 0
# Calculate visible range
list_height = max(1, height - 8)
if self.selected_index < self.scroll_offset:
self.scroll_offset = self.selected_index
elif self.selected_index >= self.scroll_offset + list_height:
self.scroll_offset = self.selected_index - list_height + 1
# Calculate max name length and ID width for alignment
max_name_len = min(50, max(len(cg.name) for cg in cgroups))
max_id_len = max(len(str(cg.id)) for cg in cgroups)
# Draw cgroup list with fancy borders
start_y = 5
_safe_addstr(
stdscr, start_y, 2, "" + "" * (width - 6) + "", curses.color_pair(1)
)
# Header row
header = f" {'CGROUP NAME':<{max_name_len}}{'ID':>{max_id_len}} "
_safe_addstr(stdscr, start_y + 1, 2, "", curses.color_pair(1))
_safe_addstr(
stdscr, start_y + 1, 3, header, curses.color_pair(1) | curses.A_BOLD
)
_safe_addstr(stdscr, start_y + 1, width - 3, "", curses.color_pair(1))
# Separator
_safe_addstr(
stdscr, start_y + 2, 2, "" + "" * (width - 6) + "", curses.color_pair(1)
)
for i in range(list_height):
idx = self.scroll_offset + i
y = start_y + 3 + i
if y >= height - 2:
break
_safe_addstr(stdscr, y, 2, "", curses.color_pair(1))
_safe_addstr(stdscr, y, width - 3, "", curses.color_pair(1))
if idx >= len(cgroups):
continue
cgroup = cgroups[idx]
# Truncate name if too long
display_name = (
cgroup.name
if len(cgroup.name) <= max_name_len
else cgroup.name[: max_name_len - 3] + "..."
)
if idx == self.selected_index:
# Highlight selected with proper alignment
line = f"{display_name:<{max_name_len}}{cgroup.id:>{max_id_len}} "
_safe_addstr(stdscr, y, 3, line, curses.color_pair(8) | curses.A_BOLD)
else:
line = f" {display_name:<{max_name_len}}{cgroup.id:>{max_id_len}} "
_safe_addstr(stdscr, y, 3, line, curses.color_pair(7))
# Bottom border
bottom_y = min(start_y + 3 + list_height, height - 3)
_safe_addstr(
stdscr, bottom_y, 2, "" + "" * (width - 6) + "", curses.color_pair(1)
)
# Footer
footer = f"Total: {len(cgroups)} cgroups"
if len(cgroups) > list_height:
footer += f" │ Showing {self.scroll_offset + 1}-{min(self.scroll_offset + list_height, len(cgroups))}"
_safe_addstr(
stdscr,
height - 2,
max(0, (width - len(footer)) // 2),
footer,
curses.color_pair(1),
)
def _draw_monitoring_screen(self, stdscr):
"""Draw the monitoring screen for selected cgroup."""
height, width = stdscr.getmaxyx()
if self.selected_cgroup is None:
return
# Get current stats
stats = self.collector.get_stats_for_cgroup(self.selected_cgroup)
history = self.collector.get_history(self.selected_cgroup)
# Draw fancy header
_draw_fancy_header(
stdscr, f"📊 {stats.cgroup_name[:40]}", "Live Performance Metrics"
)
# Instructions
instructions = "ESC/b: Back to List | w: Web Mode | q: Quit"
_safe_addstr(
stdscr,
3,
max(0, (width - len(instructions)) // 2),
instructions,
curses.color_pair(3),
)
# Calculate metrics for rate display
rates = _calculate_rates(history)
y = 5
# Syscall count in a fancy box
if y + 4 < height:
_draw_metric_box(
stdscr,
y,
2,
min(width - 4, 80),
"⚡ SYSTEM CALLS",
f"{stats.syscall_count:,}",
f"Rate: {rates['syscalls_per_sec']:.1f}/sec",
curses.color_pair(5),
)
y += 4
# Network I/O Section
if y + 8 < height:
_draw_section_header(stdscr, y, "🌐 NETWORK I/O", 1)
y += 1
# RX graph
rx_label = f"RX: {_format_bytes(stats.rx_bytes)}"
rx_rate = f"{_format_bytes(rates['rx_bytes_per_sec'])}/s"
rx_pkts = f"{stats.rx_packets:,} pkts ({rates['rx_pkts_per_sec']:.1f}/s)"
_draw_labeled_graph(
stdscr,
y,
2,
width - 4,
4,
rx_label,
rx_rate,
rx_pkts,
[s.rx_bytes for s in history],
curses.color_pair(2),
"Received Traffic (last 100 samples)",
)
y += 6
# TX graph
if y + 8 < height:
tx_label = f"TX: {_format_bytes(stats.tx_bytes)}"
tx_rate = f"{_format_bytes(rates['tx_bytes_per_sec'])}/s"
tx_pkts = f"{stats.tx_packets:,} pkts ({rates['tx_pkts_per_sec']:.1f}/s)"
_draw_labeled_graph(
stdscr,
y,
2,
width - 4,
4,
tx_label,
tx_rate,
tx_pkts,
[s.tx_bytes for s in history],
curses.color_pair(3),
"Transmitted Traffic (last 100 samples)",
)
y += 6
# File I/O Section
if y + 8 < height:
_draw_section_header(stdscr, y, "💾 FILE I/O", 1)
y += 1
# Read graph
read_label = f"READ: {_format_bytes(stats.read_bytes)}"
read_rate = f"{_format_bytes(rates['read_bytes_per_sec'])}/s"
read_ops = f"{stats.read_ops:,} ops ({rates['read_ops_per_sec']:.1f}/s)"
_draw_labeled_graph(
stdscr,
y,
2,
width - 4,
4,
read_label,
read_rate,
read_ops,
[s.read_bytes for s in history],
curses.color_pair(4),
"Read Operations (last 100 samples)",
)
y += 6
# Write graph
if y + 8 < height:
write_label = f"WRITE: {_format_bytes(stats.write_bytes)}"
write_rate = f"{_format_bytes(rates['write_bytes_per_sec'])}/s"
write_ops = f"{stats.write_ops:,} ops ({rates['write_ops_per_sec']:.1f}/s)"
_draw_labeled_graph(
stdscr,
y,
2,
width - 4,
4,
write_label,
write_rate,
write_ops,
[s.write_bytes for s in history],
curses.color_pair(5),
"Write Operations (last 100 samples)",
)
def _launch_web_mode(self, stdscr):
"""Launch web dashboard mode."""
height, width = stdscr.getmaxyx()
# Show transition message
stdscr.clear()
msg1 = "🌐 LAUNCHING WEB DASHBOARD"
_safe_addstr(
stdscr,
height // 2 - 2,
max(0, (width - len(msg1)) // 2),
msg1,
curses.color_pair(6) | curses.A_BOLD,
)
msg2 = "Server starting at http://localhost:8050"
_safe_addstr(
stdscr,
height // 2,
max(0, (width - len(msg2)) // 2),
msg2,
curses.color_pair(2),
)
msg3 = "Press 'q' to stop web server and return to TUI"
_safe_addstr(
stdscr,
height // 2 + 2,
max(0, (width - len(msg3)) // 2),
msg3,
curses.color_pair(3),
)
stdscr.refresh()
time.sleep(1)
try:
# Create and start web dashboard
self.web_dashboard = WebDashboard(
self.collector, selected_cgroup=self.selected_cgroup
)
# Start in background thread
self.web_thread = threading.Thread(
target=self.web_dashboard.run, daemon=True
)
self.web_thread.start()
time.sleep(2) # Give server time to start
# Wait for user to press 'q' to return
msg4 = "Web dashboard running at http://localhost:8050"
msg5 = "Press 'q' to return to TUI"
_safe_addstr(
stdscr,
height // 2 + 4,
max(0, (width - len(msg4)) // 2),
msg4,
curses.color_pair(1) | curses.A_BOLD,
)
_safe_addstr(
stdscr,
height // 2 + 5,
max(0, (width - len(msg5)) // 2),
msg5,
curses.color_pair(3) | curses.A_BOLD,
)
stdscr.refresh()
stdscr.nodelay(False) # Blocking mode
while True:
key = stdscr.getch()
if key == ord("q") or key == ord("Q"):
break
# Stop web server
if self.web_dashboard:
self.web_dashboard.stop()
except Exception as e:
error_msg = f"Error starting web dashboard: {str(e)}"
_safe_addstr(
stdscr,
height // 2 + 4,
max(0, (width - len(error_msg)) // 2),
error_msg,
curses.color_pair(4),
)
stdscr.refresh()
time.sleep(3)
# Restore TUI settings
stdscr.nodelay(True)
stdscr.timeout(100)
def _handle_input(self, key: int, stdscr) -> bool:
"""Handle keyboard input. Returns False to exit."""
if key == ord("q") or key == ord("Q"):
return False # Exit
if key == ord("w") or key == ord("W"):
# Launch web mode
self._launch_web_mode(stdscr)
return True
if self.current_screen == "selection":
if key == curses.KEY_UP:
self.selected_index = max(0, self.selected_index - 1)
elif key == curses.KEY_DOWN:
cgroups = self.collector.get_all_cgroups()
self.selected_index = min(len(cgroups) - 1, self.selected_index + 1)
elif key == ord("\n") or key == curses.KEY_ENTER or key == 10:
# Select cgroup
cgroups = self.collector.get_all_cgroups()
if cgroups and 0 <= self.selected_index < len(cgroups):
cgroups.sort(key=lambda c: c.name)
self.selected_cgroup = cgroups[self.selected_index].id
self.current_screen = "monitoring"
elif key == ord("r") or key == ord("R"):
# Force refresh cache
self.collector._cgroup_cache_time = 0
elif self.current_screen == "monitoring":
if key == 27 or key == ord("b") or key == ord("B"): # ESC or 'b'
self.current_screen = "selection"
self.selected_cgroup = None
return True # Continue running

View File

@ -0,0 +1,826 @@
"""Beautiful web dashboard for container monitoring using Plotly Dash."""
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from typing import Optional
from data_collection import ContainerDataCollector
class WebDashboard:
"""Beautiful web dashboard for container monitoring."""
def __init__(
self,
collector: ContainerDataCollector,
selected_cgroup: Optional[int] = None,
host: str = "0.0.0.0",
port: int = 8050,
):
self.collector = collector
self.selected_cgroup = selected_cgroup
self.host = host
self.port = port
# Suppress Dash dev tools and debug output
self.app = dash.Dash(
__name__,
title="pythonBPF Container Monitor",
suppress_callback_exceptions=True,
)
self._setup_layout()
self._setup_callbacks()
self._running = False
def _setup_layout(self):
"""Create the dashboard layout."""
self.app.layout = html.Div(
[
# Futuristic Header with pythonBPF branding
html.Div(
[
html.Div(
[
html.Div(
[
html.Span(
"python",
style={
"fontSize": "52px",
"fontWeight": "300",
"color": "#00ff88",
"fontFamily": "'Courier New', monospace",
"textShadow": "0 0 20px rgba(0,255,136,0.5)",
},
),
html.Span(
"BPF",
style={
"fontSize": "52px",
"fontWeight": "900",
"color": "#00d4ff",
"fontFamily": "'Courier New', monospace",
"textShadow": "0 0 20px rgba(0,212,255,0.5)",
},
),
],
style={"marginBottom": "5px"},
),
html.Div(
"CONTAINER PERFORMANCE MONITOR",
style={
"fontSize": "16px",
"letterSpacing": "8px",
"color": "#8899ff",
"fontWeight": "300",
"fontFamily": "'Courier New', monospace",
},
),
],
style={
"textAlign": "center",
},
),
html.Div(
id="cgroup-name",
style={
"textAlign": "center",
"color": "#00ff88",
"fontSize": "20px",
"marginTop": "15px",
"fontFamily": "'Courier New', monospace",
"fontWeight": "bold",
"textShadow": "0 0 10px rgba(0,255,136,0.3)",
},
),
],
style={
"background": "linear-gradient(135deg, #0a0e27 0%, #1a1f3a 50%, #0a0e27 100%)",
"padding": "40px 20px",
"borderRadius": "0",
"marginBottom": "0",
"boxShadow": "0 10px 40px rgba(0,212,255,0.2)",
"border": "1px solid rgba(0,212,255,0.3)",
"borderTop": "3px solid #00d4ff",
"borderBottom": "3px solid #00ff88",
"position": "relative",
"overflow": "hidden",
},
),
# Cgroup selector (if no cgroup selected)
html.Div(
[
html.Label(
"SELECT CGROUP:",
style={
"fontSize": "14px",
"fontWeight": "bold",
"color": "#00d4ff",
"marginRight": "15px",
"fontFamily": "'Courier New', monospace",
"letterSpacing": "2px",
},
),
dcc.Dropdown(
id="cgroup-selector",
style={
"width": "600px",
"display": "inline-block",
"background": "#1a1f3a",
"border": "1px solid #00d4ff",
},
),
],
id="selector-container",
style={
"textAlign": "center",
"marginTop": "30px",
"marginBottom": "30px",
"padding": "20px",
"background": "rgba(26,31,58,0.5)",
"borderRadius": "10px",
"border": "1px solid rgba(0,212,255,0.2)",
"display": "block" if self.selected_cgroup is None else "none",
},
),
# Stats cards row
html.Div(
[
self._create_stat_card(
"syscall-card", "⚡ SYSCALLS", "#00ff88"
),
self._create_stat_card("network-card", "🌐 NETWORK", "#00d4ff"),
self._create_stat_card("file-card", "💾 FILE I/O", "#ff0088"),
],
style={
"display": "flex",
"justifyContent": "space-around",
"marginBottom": "30px",
"marginTop": "30px",
"gap": "25px",
"flexWrap": "wrap",
"padding": "0 20px",
},
),
# Graphs container
html.Div(
[
# Network graphs
html.Div(
[
html.Div(
[
html.Span("🌐 ", style={"fontSize": "24px"}),
html.Span(
"NETWORK",
style={
"fontFamily": "'Courier New', monospace",
"letterSpacing": "3px",
"fontWeight": "bold",
},
),
html.Span(
" I/O",
style={
"fontFamily": "'Courier New', monospace",
"letterSpacing": "3px",
"color": "#00d4ff",
},
),
],
style={
"color": "#ffffff",
"fontSize": "20px",
"borderBottom": "2px solid #00d4ff",
"paddingBottom": "15px",
"marginBottom": "25px",
"textShadow": "0 0 10px rgba(0,212,255,0.3)",
},
),
dcc.Graph(
id="network-graph", style={"height": "400px"}
),
],
style={
"background": "linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%)",
"padding": "30px",
"borderRadius": "15px",
"boxShadow": "0 8px 32px rgba(0,212,255,0.15)",
"marginBottom": "30px",
"border": "1px solid rgba(0,212,255,0.2)",
},
),
# File I/O graphs
html.Div(
[
html.Div(
[
html.Span("💾 ", style={"fontSize": "24px"}),
html.Span(
"FILE",
style={
"fontFamily": "'Courier New', monospace",
"letterSpacing": "3px",
"fontWeight": "bold",
},
),
html.Span(
" I/O",
style={
"fontFamily": "'Courier New', monospace",
"letterSpacing": "3px",
"color": "#ff0088",
},
),
],
style={
"color": "#ffffff",
"fontSize": "20px",
"borderBottom": "2px solid #ff0088",
"paddingBottom": "15px",
"marginBottom": "25px",
"textShadow": "0 0 10px rgba(255,0,136,0.3)",
},
),
dcc.Graph(
id="file-io-graph", style={"height": "400px"}
),
],
style={
"background": "linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%)",
"padding": "30px",
"borderRadius": "15px",
"boxShadow": "0 8px 32px rgba(255,0,136,0.15)",
"marginBottom": "30px",
"border": "1px solid rgba(255,0,136,0.2)",
},
),
# Combined time series
html.Div(
[
html.Div(
[
html.Span("📈 ", style={"fontSize": "24px"}),
html.Span(
"REAL-TIME",
style={
"fontFamily": "'Courier New', monospace",
"letterSpacing": "3px",
"fontWeight": "bold",
},
),
html.Span(
" METRICS",
style={
"fontFamily": "'Courier New', monospace",
"letterSpacing": "3px",
"color": "#00ff88",
},
),
],
style={
"color": "#ffffff",
"fontSize": "20px",
"borderBottom": "2px solid #00ff88",
"paddingBottom": "15px",
"marginBottom": "25px",
"textShadow": "0 0 10px rgba(0,255,136,0.3)",
},
),
dcc.Graph(
id="timeseries-graph", style={"height": "500px"}
),
],
style={
"background": "linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%)",
"padding": "30px",
"borderRadius": "15px",
"boxShadow": "0 8px 32px rgba(0,255,136,0.15)",
"border": "1px solid rgba(0,255,136,0.2)",
},
),
],
style={"padding": "0 20px"},
),
# Footer with pythonBPF branding
html.Div(
[
html.Div(
[
html.Span(
"Powered by ",
style={"color": "#8899ff", "fontSize": "12px"},
),
html.Span(
"pythonBPF",
style={
"color": "#00d4ff",
"fontSize": "14px",
"fontWeight": "bold",
"fontFamily": "'Courier New', monospace",
},
),
html.Span(
" | eBPF Container Monitoring",
style={
"color": "#8899ff",
"fontSize": "12px",
"marginLeft": "10px",
},
),
]
)
],
style={
"textAlign": "center",
"padding": "20px",
"marginTop": "40px",
"background": "linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%)",
"borderTop": "1px solid rgba(0,212,255,0.2)",
},
),
# Auto-update interval
dcc.Interval(id="interval-component", interval=1000, n_intervals=0),
],
style={
"padding": "0",
"fontFamily": "'Segoe UI', 'Courier New', monospace",
"background": "linear-gradient(to bottom, #050813 0%, #0a0e27 100%)",
"minHeight": "100vh",
"margin": "0",
},
)
def _create_stat_card(self, card_id: str, title: str, color: str):
"""Create a statistics card with futuristic styling."""
return html.Div(
[
html.H3(
title,
style={
"color": color,
"fontSize": "16px",
"marginBottom": "20px",
"fontWeight": "bold",
"fontFamily": "'Courier New', monospace",
"letterSpacing": "2px",
"textShadow": f"0 0 10px {color}50",
},
),
html.Div(
[
html.Div(
id=f"{card_id}-value",
style={
"fontSize": "42px",
"fontWeight": "bold",
"color": "#ffffff",
"marginBottom": "10px",
"fontFamily": "'Courier New', monospace",
"textShadow": f"0 0 20px {color}40",
},
),
html.Div(
id=f"{card_id}-rate",
style={
"fontSize": "14px",
"color": "#8899ff",
"fontFamily": "'Courier New', monospace",
},
),
]
),
],
style={
"flex": "1",
"minWidth": "280px",
"background": "linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%)",
"padding": "30px",
"borderRadius": "15px",
"boxShadow": f"0 8px 32px {color}20",
"border": f"1px solid {color}40",
"borderLeft": f"4px solid {color}",
"transition": "transform 0.3s, box-shadow 0.3s",
"position": "relative",
"overflow": "hidden",
},
)
def _setup_callbacks(self):
"""Setup dashboard callbacks."""
@self.app.callback(
[Output("cgroup-selector", "options"), Output("cgroup-selector", "value")],
[Input("interval-component", "n_intervals")],
)
def update_cgroup_selector(n):
if self.selected_cgroup is not None:
return [], self.selected_cgroup
cgroups = self.collector.get_all_cgroups()
options = [
{"label": f"{cg.name} (ID: {cg.id})", "value": cg.id}
for cg in sorted(cgroups, key=lambda c: c.name)
]
value = options[0]["value"] if options else None
if value and self.selected_cgroup is None:
self.selected_cgroup = value
return options, self.selected_cgroup
@self.app.callback(
Output("cgroup-selector", "value", allow_duplicate=True),
[Input("cgroup-selector", "value")],
prevent_initial_call=True,
)
def select_cgroup(value):
if value:
self.selected_cgroup = value
return value
@self.app.callback(
[
Output("cgroup-name", "children"),
Output("syscall-card-value", "children"),
Output("syscall-card-rate", "children"),
Output("network-card-value", "children"),
Output("network-card-rate", "children"),
Output("file-card-value", "children"),
Output("file-card-rate", "children"),
Output("network-graph", "figure"),
Output("file-io-graph", "figure"),
Output("timeseries-graph", "figure"),
],
[Input("interval-component", "n_intervals")],
)
def update_dashboard(n):
if self.selected_cgroup is None:
empty_fig = self._create_empty_figure(
"Select a cgroup to begin monitoring"
)
return (
"SELECT A CGROUP TO START",
"0",
"",
"0 B",
"",
"0 B",
"",
empty_fig,
empty_fig,
empty_fig,
)
try:
stats = self.collector.get_stats_for_cgroup(self.selected_cgroup)
history = self.collector.get_history(self.selected_cgroup)
rates = self._calculate_rates(history)
return (
f"{stats.cgroup_name}",
f"{stats.syscall_count:,}",
f"{rates['syscalls_per_sec']:.1f} calls/sec",
f"{self._format_bytes(stats.rx_bytes + stats.tx_bytes)}",
f"{self._format_bytes(rates['rx_bytes_per_sec'])}/s ↑ {self._format_bytes(rates['tx_bytes_per_sec'])}/s",
f"{self._format_bytes(stats.read_bytes + stats.write_bytes)}",
f"R: {self._format_bytes(rates['read_bytes_per_sec'])}/s W: {self._format_bytes(rates['write_bytes_per_sec'])}/s",
self._create_network_graph(history),
self._create_file_io_graph(history),
self._create_timeseries_graph(history),
)
except Exception as e:
empty_fig = self._create_empty_figure(f"Error: {str(e)}")
return (
"ERROR",
"0",
str(e),
"0 B",
"",
"0 B",
"",
empty_fig,
empty_fig,
empty_fig,
)
def _create_empty_figure(self, message: str):
"""Create an empty figure with a message."""
fig = go.Figure()
fig.update_layout(
title=message,
template="plotly_dark",
paper_bgcolor="#0a0e27",
plot_bgcolor="#0a0e27",
font=dict(color="#8899ff", family="Courier New, monospace"),
)
return fig
def _create_network_graph(self, history):
"""Create network I/O graph with futuristic styling."""
if len(history) < 2:
return self._create_empty_figure("Collecting data...")
times = [i for i in range(len(history))]
rx_bytes = [s.rx_bytes for s in history]
tx_bytes = [s.tx_bytes for s in history]
fig = make_subplots(
rows=2,
cols=1,
subplot_titles=("RECEIVED (RX)", "TRANSMITTED (TX)"),
vertical_spacing=0.15,
)
fig.add_trace(
go.Scatter(
x=times,
y=rx_bytes,
mode="lines",
name="RX",
fill="tozeroy",
line=dict(color="#00d4ff", width=3, shape="spline"),
fillcolor="rgba(0, 212, 255, 0.2)",
),
row=1,
col=1,
)
fig.add_trace(
go.Scatter(
x=times,
y=tx_bytes,
mode="lines",
name="TX",
fill="tozeroy",
line=dict(color="#00ff88", width=3, shape="spline"),
fillcolor="rgba(0, 255, 136, 0.2)",
),
row=2,
col=1,
)
fig.update_xaxes(title_text="Time (samples)", row=2, col=1, color="#8899ff")
fig.update_yaxes(title_text="Bytes", row=1, col=1, color="#8899ff")
fig.update_yaxes(title_text="Bytes", row=2, col=1, color="#8899ff")
fig.update_layout(
height=400,
template="plotly_dark",
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="#0a0e27",
showlegend=False,
hovermode="x unified",
font=dict(family="Courier New, monospace", color="#8899ff"),
)
return fig
def _create_file_io_graph(self, history):
"""Create file I/O graph with futuristic styling."""
if len(history) < 2:
return self._create_empty_figure("Collecting data...")
times = [i for i in range(len(history))]
read_bytes = [s.read_bytes for s in history]
write_bytes = [s.write_bytes for s in history]
fig = make_subplots(
rows=2,
cols=1,
subplot_titles=("READ OPERATIONS", "WRITE OPERATIONS"),
vertical_spacing=0.15,
)
fig.add_trace(
go.Scatter(
x=times,
y=read_bytes,
mode="lines",
name="Read",
fill="tozeroy",
line=dict(color="#ff0088", width=3, shape="spline"),
fillcolor="rgba(255, 0, 136, 0.2)",
),
row=1,
col=1,
)
fig.add_trace(
go.Scatter(
x=times,
y=write_bytes,
mode="lines",
name="Write",
fill="tozeroy",
line=dict(color="#8844ff", width=3, shape="spline"),
fillcolor="rgba(136, 68, 255, 0.2)",
),
row=2,
col=1,
)
fig.update_xaxes(title_text="Time (samples)", row=2, col=1, color="#8899ff")
fig.update_yaxes(title_text="Bytes", row=1, col=1, color="#8899ff")
fig.update_yaxes(title_text="Bytes", row=2, col=1, color="#8899ff")
fig.update_layout(
height=400,
template="plotly_dark",
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="#0a0e27",
showlegend=False,
hovermode="x unified",
font=dict(family="Courier New, monospace", color="#8899ff"),
)
return fig
def _create_timeseries_graph(self, history):
"""Create combined time series graph with futuristic styling."""
if len(history) < 2:
return self._create_empty_figure("Collecting data...")
times = [i for i in range(len(history))]
fig = make_subplots(
rows=3,
cols=1,
subplot_titles=(
"SYSTEM CALLS",
"NETWORK TRAFFIC (Bytes)",
"FILE I/O (Bytes)",
),
vertical_spacing=0.1,
specs=[
[{"secondary_y": False}],
[{"secondary_y": True}],
[{"secondary_y": True}],
],
)
# Syscalls
fig.add_trace(
go.Scatter(
x=times,
y=[s.syscall_count for s in history],
mode="lines",
name="Syscalls",
line=dict(color="#00ff88", width=3, shape="spline"),
),
row=1,
col=1,
)
# Network
fig.add_trace(
go.Scatter(
x=times,
y=[s.rx_bytes for s in history],
mode="lines",
name="RX",
line=dict(color="#00d4ff", width=2, shape="spline"),
),
row=2,
col=1,
secondary_y=False,
)
fig.add_trace(
go.Scatter(
x=times,
y=[s.tx_bytes for s in history],
mode="lines",
name="TX",
line=dict(color="#00ff88", width=2, shape="spline", dash="dot"),
),
row=2,
col=1,
secondary_y=True,
)
# File I/O
fig.add_trace(
go.Scatter(
x=times,
y=[s.read_bytes for s in history],
mode="lines",
name="Read",
line=dict(color="#ff0088", width=2, shape="spline"),
),
row=3,
col=1,
secondary_y=False,
)
fig.add_trace(
go.Scatter(
x=times,
y=[s.write_bytes for s in history],
mode="lines",
name="Write",
line=dict(color="#8844ff", width=2, shape="spline", dash="dot"),
),
row=3,
col=1,
secondary_y=True,
)
fig.update_xaxes(title_text="Time (samples)", row=3, col=1, color="#8899ff")
fig.update_yaxes(title_text="Count", row=1, col=1, color="#8899ff")
fig.update_yaxes(
title_text="RX Bytes", row=2, col=1, secondary_y=False, color="#00d4ff"
)
fig.update_yaxes(
title_text="TX Bytes", row=2, col=1, secondary_y=True, color="#00ff88"
)
fig.update_yaxes(
title_text="Read Bytes", row=3, col=1, secondary_y=False, color="#ff0088"
)
fig.update_yaxes(
title_text="Write Bytes", row=3, col=1, secondary_y=True, color="#8844ff"
)
fig.update_layout(
height=500,
template="plotly_dark",
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="#0a0e27",
hovermode="x unified",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
font=dict(color="#8899ff"),
),
font=dict(family="Courier New, monospace", color="#8899ff"),
)
return fig
def _calculate_rates(self, history):
"""Calculate rates from history."""
if len(history) < 2:
return {
"syscalls_per_sec": 0.0,
"rx_bytes_per_sec": 0.0,
"tx_bytes_per_sec": 0.0,
"read_bytes_per_sec": 0.0,
"write_bytes_per_sec": 0.0,
}
recent = history[-1]
previous = history[-2]
time_delta = recent.timestamp - previous.timestamp
if time_delta <= 0:
time_delta = 1.0
return {
"syscalls_per_sec": max(
0, (recent.syscall_count - previous.syscall_count) / time_delta
),
"rx_bytes_per_sec": max(
0, (recent.rx_bytes - previous.rx_bytes) / time_delta
),
"tx_bytes_per_sec": max(
0, (recent.tx_bytes - previous.tx_bytes) / time_delta
),
"read_bytes_per_sec": max(
0, (recent.read_bytes - previous.read_bytes) / time_delta
),
"write_bytes_per_sec": max(
0, (recent.write_bytes - previous.write_bytes) / time_delta
),
}
def _format_bytes(self, bytes_val: float) -> str:
"""Format bytes into human-readable string."""
if bytes_val < 0:
bytes_val = 0
for unit in ["B", "KB", "MB", "GB", "TB"]:
if bytes_val < 1024.0:
return f"{bytes_val:.2f} {unit}"
bytes_val /= 1024.0
return f"{bytes_val:.2f} PB"
def run(self):
"""Run the web dashboard."""
self._running = True
# Suppress Werkzeug logging
import logging
log = logging.getLogger("werkzeug")
log.setLevel(logging.ERROR)
self.app.run(debug=False, host=self.host, port=self.port, use_reloader=False)
def stop(self):
"""Stop the web dashboard."""
self._running = False

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "pythonbpf"
version = "0.1.7"
version = "0.1.8"
description = "Reduced Python frontend for eBPF"
authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
@ -34,6 +34,14 @@ dependencies = [
"pylibbpf"
]
[project.optional-dependencies]
docs = [
"sphinx>=7.0",
"myst-parser>=2.0",
"sphinx-rtd-theme>=2.0",
"sphinx-copybutton",
]
[tool.setuptools.packages.find]
where = ["."]
include = ["pythonbpf*"]

View File

@ -114,9 +114,22 @@ def _allocate_for_call(
# Struct constructors
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
if len(rval.args) == 0:
# Zero-arg constructor: allocate the struct itself
var = builder.alloca(struct_info.ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
local_sym_tab[var_name] = LocalSymbol(
var, struct_info.ir_type, call_type
)
logger.info(f"Pre-allocated {var_name} for struct {call_type}")
else:
# Pointer cast: allocate as pointer to struct
ptr_type = ir.PointerType(struct_info.ir_type)
var = builder.alloca(ptr_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ptr_type, call_type)
logger.info(
f"Pre-allocated {var_name} for struct pointer cast to {call_type}"
)
elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type):
# When calling struct_name(pointer), we're doing a cast, not construction
@ -371,6 +384,7 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
f"Could not determine size for ctypes field {field_name}: {e}"
)
actual_ir_type = ir.IntType(64)
field_size_bits = 64
# Check if it's a nested vmlinux struct or complex type
elif field.type.__module__ == "vmlinux":
@ -379,23 +393,34 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
field.ctype_complex_type, ctypes._Pointer
):
actual_ir_type = ir.IntType(64) # Pointer is always 64-bit
field_size_bits = 64
# For embedded structs, this is more complex - might need different handling
else:
logger.warning(
f"Field {field_name} is a nested vmlinux struct, using i64 for now"
)
actual_ir_type = ir.IntType(64)
field_size_bits = 64
else:
logger.warning(
f"Unknown field type module {field.type.__module__} for {field_name}"
)
actual_ir_type = ir.IntType(64)
field_size_bits = 64
# Allocate with the actual IR type
# Pre-allocate the tmp storage used by load_struct_field (so we don't alloca inside handler)
tmp_name = f"{struct_var}_{field_name}_tmp"
tmp_ir_type = ir.IntType(field_size_bits)
tmp_var = builder.alloca(tmp_ir_type, name=tmp_name)
tmp_var.align = tmp_ir_type.width // 8
local_sym_tab[tmp_name] = LocalSymbol(tmp_var, tmp_ir_type)
logger.info(
f"Pre-allocated temp {tmp_name} (i{field_size_bits}) for vmlinux field read {vmlinux_struct_name}.{field_name}"
)
# Allocate with the actual IR type for the destination var
var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol(
var, actual_ir_type, field
) # <-- Store Field metadata
local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field)
logger.info(
f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}"

View File

@ -174,6 +174,23 @@ def handle_variable_assignment(
f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}"
)
return False
# Handle user-defined struct pointer casts
# val_type is a string (struct name), var_type is a pointer to the struct
if isinstance(val_type, str) and val_type in structs_sym_tab:
struct_info = structs_sym_tab[val_type]
expected_ptr_type = ir.PointerType(struct_info.ir_type)
# Check if var_type matches the expected pointer type
if isinstance(var_type, ir.PointerType) and var_type == expected_ptr_type:
# val is already the correct pointer type from inttoptr/bitcast
builder.store(val, var_ptr)
logger.info(f"Assigned user-defined struct pointer cast to {var_name}")
return True
else:
logger.error(
f"Type mismatch: user-defined struct pointer cast requires pointer type, got {var_type}"
)
return False
if isinstance(val_type, Field):
logger.info("Handling assignment to struct field")
# Special handling for struct_xdp_md i32 fields that are zero-extended to i64

View File

@ -25,7 +25,7 @@ import re
logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.7"
VERSION = "v0.1.8"
def finalize_module(original_str):

View File

@ -1,6 +1,6 @@
from .expr_pass import eval_expr, handle_expr, get_operand_value
from .type_normalization import convert_to_bool, get_base_type_and_depth
from .ir_ops import deref_to_depth
from .ir_ops import deref_to_depth, access_struct_field
from .call_registry import CallHandlerRegistry
from .vmlinux_registry import VmlinuxHandlerRegistry
@ -10,6 +10,7 @@ __all__ = [
"convert_to_bool",
"get_base_type_and_depth",
"deref_to_depth",
"access_struct_field",
"get_operand_value",
"CallHandlerRegistry",
"VmlinuxHandlerRegistry",

View File

@ -6,11 +6,11 @@ from typing import Dict
from pythonbpf.type_deducer import ctypes_to_ir, is_ctypes
from .call_registry import CallHandlerRegistry
from .ir_ops import deref_to_depth, access_struct_field
from .type_normalization import (
convert_to_bool,
handle_comparator,
get_base_type_and_depth,
deref_to_depth,
)
from .vmlinux_registry import VmlinuxHandlerRegistry
from ..vmlinux_parser.dependency_node import Field
@ -77,89 +77,6 @@ def _handle_attribute_expr(
logger.info(
f"Variable type: {var_type}, Variable ptr: {var_ptr}, Variable Metadata: {var_metadata}"
)
# Check if this is a pointer to a struct (from map lookup)
if (
isinstance(var_type, ir.PointerType)
and var_metadata
and isinstance(var_metadata, str)
):
if var_metadata in structs_sym_tab:
logger.info(
f"Handling pointer to struct {var_metadata} from map lookup"
)
if func is None:
raise ValueError(
f"func parameter required for null-safe pointer access to {var_name}.{attr_name}"
)
# Load the pointer value (ptr<struct>)
struct_ptr = builder.load(var_ptr)
# Create blocks for null check
null_check_block = builder.block
not_null_block = func.append_basic_block(
name=f"{var_name}_not_null"
)
merge_block = func.append_basic_block(name=f"{var_name}_merge")
# Check if pointer is null
null_ptr = ir.Constant(struct_ptr.type, None)
is_not_null = builder.icmp_signed("!=", struct_ptr, null_ptr)
logger.info(f"Inserted null check for pointer {var_name}")
builder.cbranch(is_not_null, not_null_block, merge_block)
# Not-null block: Access the field
builder.position_at_end(not_null_block)
# Get struct metadata
metadata = structs_sym_tab[var_metadata]
struct_ptr = builder.bitcast(
struct_ptr, metadata.ir_type.as_pointer()
)
if attr_name not in metadata.fields:
raise ValueError(
f"Field '{attr_name}' not found in struct '{var_metadata}'"
)
# GEP to field
field_gep = metadata.gep(builder, struct_ptr, attr_name)
# Load field value
field_val = builder.load(field_gep)
field_type = metadata.field_type(attr_name)
logger.info(
f"Loaded field {attr_name} from struct pointer, type: {field_type}"
)
# Branch to merge
not_null_after_load = builder.block
builder.branch(merge_block)
# Merge block: PHI node for the result
builder.position_at_end(merge_block)
phi = builder.phi(field_type, name=f"{var_name}_{attr_name}")
# If null, return zero/default value
if isinstance(field_type, ir.IntType):
zero_value = ir.Constant(field_type, 0)
elif isinstance(field_type, ir.PointerType):
zero_value = ir.Constant(field_type, None)
elif isinstance(field_type, ir.ArrayType):
# For arrays, we can't easily create a zero constant
# This case is tricky - for now, just use undef
zero_value = ir.Constant(field_type, ir.Undefined)
else:
zero_value = ir.Constant(field_type, ir.Undefined)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(field_val, not_null_after_load)
logger.info(f"Created PHI node for {var_name}.{attr_name}")
return phi, field_type
if (
hasattr(var_metadata, "__module__")
and var_metadata.__module__ == "vmlinux"
@ -180,13 +97,23 @@ def _handle_attribute_expr(
)
return None
# Regular user-defined struct
metadata = structs_sym_tab.get(var_metadata)
if metadata and attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
if var_metadata in structs_sym_tab:
return access_struct_field(
builder,
var_ptr,
var_type,
var_metadata,
expr.attr,
structs_sym_tab,
func,
)
else:
logger.error(f"Struct metadata for '{var_name}' not found")
else:
logger.error(f"Undefined variable '{var_name}' for attribute access")
else:
logger.error("Unsupported attribute base expression type")
return None
@ -241,6 +168,10 @@ def get_operand_value(
var_type = var.type
base_type, depth = get_base_type_and_depth(var_type)
logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}")
if depth == 1:
val = builder.load(var)
return val
else:
val = deref_to_depth(func, builder, var, depth)
return val
else:
@ -618,7 +549,7 @@ def _handle_boolean_op(
# ============================================================================
# VMLinux casting
# Struct casting (including vmlinux struct casting)
# ============================================================================
@ -666,17 +597,85 @@ def _handle_vmlinux_cast(
# Cast the integer/value to a pointer to the struct
# If arg_val is an integer type, we need to inttoptr it
ptr_type = ir.PointerType()
# TODO: add a integer check here later
# TODO: add a field value type check here
# print(arg_type)
if isinstance(arg_type, Field):
if ctypes_to_ir(arg_type.type.__name__):
# Cast integer to pointer
casted_ptr = builder.inttoptr(arg_val, ptr_type)
else:
logger.error(f"Unsupported type for vmlinux cast: {arg_type}")
return None
else:
casted_ptr = builder.inttoptr(arg_val, ptr_type)
return casted_ptr, vmlinux_struct_type
def _handle_user_defined_struct_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
):
"""Handle user-defined struct cast expressions like iphdr(nh).
This casts a pointer/integer value to a pointer to the user-defined struct,
similar to how vmlinux struct casts work but for user-defined @struct types.
"""
if len(expr.args) != 1:
logger.info("User-defined struct cast takes exactly one argument")
return None
# Get the struct name
struct_name = expr.func.id
if struct_name not in structs_sym_tab:
logger.error(f"Struct {struct_name} not found in structs_sym_tab")
return None
struct_info = structs_sym_tab[struct_name]
# Evaluate the argument (e.g.,
# an address/pointer value)
arg_result = eval_expr(
func,
module,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
logger.info("Failed to evaluate argument to user-defined struct cast")
return None
arg_val, arg_type = arg_result
# Cast the integer/pointer value to a pointer to the struct type
# The struct pointer type is a pointer to the struct's IR type
struct_ptr_type = ir.PointerType(struct_info.ir_type)
# If arg_val is an integer type (like i64), convert to pointer using inttoptr
if isinstance(arg_val.type, ir.IntType):
casted_ptr = builder.inttoptr(arg_val, struct_ptr_type)
logger.info(f"Cast integer to pointer for struct {struct_name}")
elif isinstance(arg_val.type, ir.PointerType):
# If already a pointer, bitcast to the struct pointer type
casted_ptr = builder.bitcast(arg_val, struct_ptr_type)
logger.info(f"Bitcast pointer to struct pointer for {struct_name}")
else:
logger.error(f"Unsupported type for user-defined struct cast: {arg_val.type}")
return None
return casted_ptr, struct_name
# ============================================================================
# Expression Dispatcher
# ============================================================================
@ -722,6 +721,16 @@ def eval_expr(
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and (expr.func.id in structs_sym_tab):
return _handle_user_defined_struct_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
result = CallHandlerRegistry.handle_call(
expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab

View File

@ -17,41 +17,100 @@ def deref_to_depth(func, builder, val, target_depth):
# dereference with null check
pointee_type = cur_type.pointee
null_check_block = builder.block
not_null_block = func.append_basic_block(name=f"deref_not_null_{depth}")
merge_block = func.append_basic_block(name=f"deref_merge_{depth}")
null_ptr = ir.Constant(cur_type, None)
is_not_null = builder.icmp_signed("!=", cur_val, null_ptr)
logger.debug(f"Inserted null check for pointer at depth {depth}")
def load_op(builder, ptr):
return builder.load(ptr)
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
dereferenced_val = builder.load(cur_val)
logger.debug(f"Dereferenced to depth {depth - 1}, type: {pointee_type}")
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(pointee_type, name=f"deref_result_{depth}")
zero_value = (
ir.Constant(pointee_type, 0)
if isinstance(pointee_type, ir.IntType)
else ir.Constant(pointee_type, None)
cur_val = _null_checked_operation(
func, builder, cur_val, load_op, pointee_type, f"deref_{depth}"
)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(dereferenced_val, not_null_block)
# Continue with phi result
cur_val = phi
cur_type = pointee_type
logger.debug(f"Dereferenced to depth {depth}, type: {pointee_type}")
return cur_val
def deref_struct_ptr(
func, builder, struct_ptr, struct_metadata, field_name, structs_sym_tab
def _null_checked_operation(func, builder, ptr, operation, result_type, name_prefix):
"""
Generic null-checked operation on a pointer.
"""
curr_block = builder.block
not_null_block = func.append_basic_block(name=f"{name_prefix}_not_null")
merge_block = func.append_basic_block(name=f"{name_prefix}_merge")
null_ptr = ir.Constant(ptr.type, None)
is_not_null = builder.icmp_signed("!=", ptr, null_ptr)
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
result = operation(builder, ptr)
not_null_after = builder.block
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(result_type, name=f"{name_prefix}_result")
if isinstance(result_type, ir.IntType):
null_val = ir.Constant(result_type, 0)
elif isinstance(result_type, ir.PointerType):
null_val = ir.Constant(result_type, None)
else:
null_val = ir.Constant(result_type, ir.Undefined)
phi.add_incoming(null_val, curr_block)
phi.add_incoming(result, not_null_after)
return phi
def access_struct_field(
builder, var_ptr, var_type, var_metadata, field_name, structs_sym_tab, func=None
):
"""Dereference a pointer to a struct type."""
return deref_to_depth(func, builder, struct_ptr, 1)
"""
Access a struct field - automatically returns value or pointer based on field type.
"""
metadata = (
structs_sym_tab.get(var_metadata)
if isinstance(var_metadata, str)
else var_metadata
)
if not metadata or field_name not in metadata.fields:
raise ValueError(f"Field '{field_name}' not found in struct")
field_type = metadata.field_type(field_name)
is_ptr_to_struct = isinstance(var_type, ir.PointerType) and isinstance(
var_metadata, str
)
# Get struct pointer
struct_ptr = builder.load(var_ptr) if is_ptr_to_struct else var_ptr
should_load = not isinstance(field_type, ir.ArrayType)
def field_access_op(builder, ptr):
typed_ptr = builder.bitcast(ptr, metadata.ir_type.as_pointer())
field_ptr = metadata.gep(builder, typed_ptr, field_name)
return builder.load(field_ptr) if should_load else field_ptr
# Handle null check for pointer-to-struct
if is_ptr_to_struct:
if func is None:
raise ValueError("func required for null-safe struct pointer access")
if should_load:
result_type = field_type
else:
result_type = field_type.as_pointer()
result = _null_checked_operation(
func,
builder,
struct_ptr,
field_access_op,
result_type,
f"field_{field_name}",
)
return result, field_type
field_ptr = metadata.gep(builder, struct_ptr, field_name)
result = builder.load(field_ptr) if should_load else field_ptr
return result, field_type

View File

@ -16,6 +16,7 @@ from .helpers import (
smp_processor_id,
uid,
skb_store_bytes,
get_current_cgroup_id,
get_stack,
XDP_DROP,
XDP_PASS,
@ -79,6 +80,7 @@ __all__ = [
"handle_helper_call",
"emit_probe_read_kernel_str_call",
"emit_probe_read_kernel_call",
"get_current_cgroup_id",
"ktime",
"pid",
"deref",

View File

@ -30,6 +30,7 @@ class BPFHelperID(Enum):
BPF_SKB_STORE_BYTES = 9
BPF_GET_CURRENT_PID_TGID = 14
BPF_GET_CURRENT_UID_GID = 15
BPF_GET_CURRENT_CGROUP_ID = 80
BPF_GET_CURRENT_COMM = 16
BPF_PERF_EVENT_OUTPUT = 25
BPF_GET_STACK = 67
@ -68,6 +69,33 @@ def bpf_ktime_get_ns_emitter(
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"get_current_cgroup_id",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_get_current_cgroup_id(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_cgroup_id helper function call.
"""
# func is an arg to just have a uniform signature with other emitters
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_CGROUP_ID.value)
fn_type = ir.FunctionType(ir.IntType(64), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"lookup",
param_types=[ir.PointerType(ir.IntType(64))],

View File

@ -5,6 +5,7 @@ from llvmlite import ir
from pythonbpf.expr import (
get_operand_value,
eval_expr,
access_struct_field,
)
logger = logging.getLogger(__name__)
@ -135,7 +136,7 @@ def get_or_create_ptr_from_arg(
and field_type.element.width == 8
):
ptr, sz = get_char_array_ptr_and_size(
arg, builder, local_sym_tab, struct_sym_tab
arg, builder, local_sym_tab, struct_sym_tab, func
)
if not ptr:
raise ValueError("Failed to get char array pointer from struct field")
@ -266,7 +267,9 @@ def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
)
def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
def get_char_array_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab, func=None
):
"""Get pointer to char array and its size."""
# Struct field: obj.field
@ -277,11 +280,11 @@ def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab)
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
struct_type = local_sym_tab[var_name].metadata
if not (struct_sym_tab and struct_type in struct_sym_tab):
raise ValueError(f"Struct type '{struct_type}' not found")
struct_ptr, struct_type, struct_metadata = local_sym_tab[var_name]
if not (struct_sym_tab and struct_metadata in struct_sym_tab):
raise ValueError(f"Struct type '{struct_metadata}' not found")
struct_info = struct_sym_tab[struct_type]
struct_info = struct_sym_tab[struct_metadata]
if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found")
@ -292,8 +295,24 @@ def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab)
)
return None, 0
struct_ptr = local_sym_tab[var_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# Check if char array
if not (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
logger.warning("Field is not a char array")
return None, 0
field_ptr, _ = access_struct_field(
builder,
struct_ptr,
struct_type,
struct_metadata,
field_name,
struct_sym_tab,
func,
)
# GEP to first element: [N x i8]* -> i8*
buf_ptr = builder.gep(

View File

@ -57,6 +57,11 @@ def get_stack(buf, flags=0):
return ctypes.c_int64(0)
def get_current_cgroup_id():
"""Get the current cgroup ID"""
return ctypes.c_int64(0)
XDP_ABORTED = ctypes.c_int64(0)
XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2)

View File

@ -222,7 +222,7 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
# Special case: struct field char array needs pointer to first element
if isinstance(expr, ast.Attribute):
char_array_ptr, _ = get_char_array_ptr_and_size(
expr, builder, local_sym_tab, struct_sym_tab
expr, builder, local_sym_tab, struct_sym_tab, func
)
if char_array_ptr:
return char_array_ptr

View File

@ -135,7 +135,7 @@ def process_perf_event_map(map_name, rval, module, structs_sym_tab):
logger.info(f"Map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params)
# Generate debug info for BTF
create_map_debug_info(module, map_global.sym, map_name, map_params)
create_map_debug_info(module, map_global.sym, map_name, map_params, structs_sym_tab)
return map_global

View File

@ -18,6 +18,10 @@ mapping = {
"c_longlong": ir.IntType(64),
"c_uint": ir.IntType(32),
"c_int": ir.IntType(32),
"c_ushort": ir.IntType(16),
"c_short": ir.IntType(16),
"c_ubyte": ir.IntType(8),
"c_byte": ir.IntType(8),
# Not so sure about this one
"str": ir.PointerType(ir.IntType(8)),
}

View File

@ -77,7 +77,7 @@ class VmlinuxHandler:
return None
def get_vmlinux_enum_value(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
"""Handle vmlinux.enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name].value
logger.info(f"The value of vmlinux enum {name} = {value}")
@ -119,9 +119,18 @@ class VmlinuxHandler:
# Load the struct pointer from the local variable
struct_ptr = builder.load(var_info.var)
# Determine the preallocated tmp name that assignment pass should have created
tmp_name = f"{struct_var_name}_{field_name}_tmp"
# Use bpf_probe_read_kernel for non-context struct field access
field_value = self.load_struct_field(
builder, struct_ptr, globvar_ir, field_data, struct_name
builder,
struct_ptr,
globvar_ir,
field_data,
struct_name,
local_sym_tab,
tmp_name,
)
# Return field value and field type
return field_value, field_data
@ -130,7 +139,13 @@ class VmlinuxHandler:
@staticmethod
def load_struct_field(
builder, struct_ptr_int, offset_global, field_data, struct_name=None
builder,
struct_ptr_int,
offset_global,
field_data,
struct_name=None,
local_sym_tab=None,
tmp_name: str | None = None,
):
"""
Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel.
@ -141,6 +156,8 @@ class VmlinuxHandler:
offset_global: Global variable containing the field offset (i64)
field_data: contains data about the field
struct_name: Name of the struct being accessed (optional)
local_sym_tab: symbol table (optional) - used to locate preallocated tmp storage
tmp_name: name of the preallocated temporary storage to use (preferred)
Returns:
The loaded value
"""
@ -203,9 +220,18 @@ class VmlinuxHandler:
else:
logger.warning("Complex vmlinux field type, using default 64 bits")
# Allocate local storage for the field value
# Use preallocated temporary storage if provided by allocation pass
local_storage_i8_ptr = None
if tmp_name and local_sym_tab and tmp_name in local_sym_tab:
# Expect the tmp to be an alloca created during allocation pass
tmp_alloca = local_sym_tab[tmp_name].var
local_storage_i8_ptr = builder.bitcast(tmp_alloca, i8_ptr_type)
else:
# Fallback: allocate inline (not ideal, but preserves behavior)
local_storage = builder.alloca(ir.IntType(int_width))
local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type)
logger.warning(f"Temp storage '{tmp_name}' not found. Allocating inline")
# Use bpf_probe_read_kernel to safely read the field
# This generates:
@ -219,7 +245,9 @@ class VmlinuxHandler:
)
# Load the value from local storage
value = builder.load(local_storage)
value = builder.load(
builder.bitcast(local_storage_i8_ptr, ir.PointerType(ir.IntType(int_width)))
)
# Zero-extend i32 to i64 if needed
if needs_zext:

View File

@ -1,5 +1,5 @@
BPF_CLANG := clang
CFLAGS := -emit-llvm -target bpf -c
CFLAGS := -emit-llvm -target bpf -c -D__TARGET_ARCH_x86
SRC := $(wildcard *.bpf.c)
LL := $(SRC:.bpf.c=.bpf.ll)
@ -10,7 +10,7 @@ LL0 := $(SRC:.bpf.c=.bpf.o0.ll)
all: $(LL) $(OBJ) $(LL0)
%.bpf.o: %.bpf.c
$(BPF_CLANG) -O2 -g -target bpf -c $< -o $@
$(BPF_CLANG) -O2 -D__TARGET_ARCH_x86 -g -target bpf -c $< -o $@
%.bpf.ll: %.bpf.c
$(BPF_CLANG) $(CFLAGS) -O2 -g -S $< -o $@

View File

@ -0,0 +1,66 @@
// disksnoop.bpf.c
// eBPF program (compile with: clang -O2 -g -target bpf -c disksnoop.bpf.c -o disksnoop.bpf.o)
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, __u64);
__type(value, __u64);
__uint(max_entries, 10240);
} start_map SEC(".maps");
/* kprobe: record start timestamp keyed by request pointer */
SEC("kprobe/blk_mq_start_request")
int trace_start(struct pt_regs *ctx)
{
/* request * is first arg */
__u64 reqp = (__u64)(ctx->di);
__u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start_map, &reqp, &ts, BPF_ANY);
// /* optional debug:
bpf_printk("start: req=%llu ts=%llu\n", reqp, ts);
// */
return 0;
}
/* completion: compute latency and print data_len, cmd_flags, latency_us */
SEC("kprobe/blk_mq_end_request")
int trace_completion(struct pt_regs *ctx)
{
__u64 reqp = (__u64)(ctx->di);
__u64 *tsp;
__u64 now_ns;
__u64 delta_ns;
__u64 delta_us = 0;
bpf_printk("%lld", reqp);
tsp = bpf_map_lookup_elem(&start_map, &reqp);
if (!tsp)
return 0;
now_ns = bpf_ktime_get_ns();
delta_ns = now_ns - *tsp;
delta_us = delta_ns / 1000;
/* read request fields using CO-RE; needs vmlinux.h/BTF */
__u32 data_len = 0;
__u32 cmd_flags = 0;
/* __data_len is usually a 32/64-bit; use CORE read to be safe */
data_len = ( __u32 ) BPF_CORE_READ((struct request *)reqp, __data_len);
cmd_flags = ( __u32 ) BPF_CORE_READ((struct request *)reqp, cmd_flags);
/* print: "<bytes> <flags_hex> <latency_us>" */
bpf_printk("%u %x %llu\n", data_len, cmd_flags, delta_us);
/* remove from map */
bpf_map_delete_elem(&start_map, &reqp);
return 0;
}

View File

@ -0,0 +1,31 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
struct fake_iphdr {
unsigned short useless;
unsigned short tot_len;
unsigned short id;
unsigned short frag_off;
unsigned char ttl;
unsigned char protocol;
unsigned short check;
unsigned int saddr;
unsigned int daddr;
};
SEC("xdp")
int xdp_prog(struct xdp_md *ctx) {
unsigned long data = ctx->data;
unsigned long data_end = ctx->data_end;
if (data + sizeof(struct ethhdr) + sizeof(struct fake_iphdr) > data_end) {
return XDP_ABORTED;
}
struct fake_iphdr *iph = (void *)data + sizeof(struct ethhdr);
bpf_printk("%d", iph->saddr);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,46 @@
from vmlinux import XDP_PASS, XDP_ABORTED
from vmlinux import (
struct_xdp_md,
)
from pythonbpf import bpf, section, bpfglobal, compile, compile_to_ir, struct
from ctypes import c_int64, c_ubyte, c_ushort, c_uint32, c_void_p
@bpf
@struct
class iphdr:
useless: c_ushort
tot_len: c_ushort
id: c_ushort
frag_off: c_ushort
ttl: c_ubyte
protocol: c_ubyte
check: c_ushort
saddr: c_uint32
daddr: c_uint32
@bpf
@section("xdp")
def ip_detector(ctx: struct_xdp_md) -> c_int64:
data = c_void_p(ctx.data)
data_end = c_void_p(ctx.data_end)
if data + 34 < data_end:
hdr = data + 14
iph = iphdr(hdr)
addr = iph.saddr
print(f"ipaddress: {addr}")
else:
return c_int64(XDP_ABORTED)
return c_int64(XDP_PASS)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_test_1.py", "xdp_test_1.ll")
compile()

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, struct, section, bpfglobal
from pythonbpf import bpf, struct, section, bpfglobal, compile
from pythonbpf.helper import comm
from ctypes import c_void_p, c_int64
@ -26,3 +26,6 @@ def hello(ctx: c_void_p) -> c_int64:
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,6 +1,6 @@
from pythonbpf import bpf, section, struct, bpfglobal, compile, map
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from pythonbpf.helper import pid, comm
from ctypes import c_void_p, c_int64
@ -9,6 +9,7 @@ from ctypes import c_void_p, c_int64
class val_type:
counter: c_int64
shizzle: c_int64
comm: str(16)
@bpf
@ -22,6 +23,7 @@ def last() -> HashMap:
def hello_world(ctx: c_void_p) -> c_int64:
obj = val_type()
obj.counter, obj.shizzle = 42, 96
comm(obj.comm)
t = last.lookup(obj)
if t:
print(f"Found existing entry: counter={obj.counter}, pid={t}")

View File

@ -0,0 +1,93 @@
"""
Test struct values in HashMap.
This example stores a struct in a HashMap and reads it back,
testing the new set_value_struct() functionality in pylibbpf.
"""
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, smp_processor_id, pid, comm
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
import time
import os
@bpf
@struct
class task_info:
pid: c_uint64
timestamp: c_uint64
comm: str(16)
@bpf
@map
def cpu_tasks() -> HashMap:
return HashMap(key=c_uint32, value=task_info, max_entries=256)
@bpf
@section("tracepoint/sched/sched_switch")
def trace_sched_switch(ctx: c_void_p) -> c_int64:
cpu = smp_processor_id()
# Create task info struct
info = task_info()
info.pid = pid()
info.timestamp = ktime()
comm(info.comm)
# Store in map
cpu_tasks.update(cpu, info)
return 0 # type: ignore
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Testing HashMap with Struct Values")
cpu_map = b["cpu_tasks"]
cpu_map.set_value_struct("task_info") # Enable struct deserialization
print("Listening for context switches.. .\n")
num_cpus = os.cpu_count() or 16
try:
while True:
time.sleep(1)
print(f"--- Snapshot at {time.strftime('%H:%M:%S')} ---")
for cpu in range(num_cpus):
try:
info = cpu_map.lookup(cpu)
if info:
comm_str = (
bytes(info.comm).decode("utf-8", errors="ignore").rstrip("\x00")
)
ts_sec = info.timestamp / 1e9
print(
f" CPU {cpu}: PID={info.pid}, comm={comm_str}, ts={ts_sec:.3f}s"
)
except KeyError:
# No data for this CPU yet
pass
print()
except KeyboardInterrupt:
print("\nStopped")