mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2026-02-08 06:00:56 +00:00
Compare commits
59 Commits
c97efb2570
...
copilot/cr
| Author | SHA1 | Date | |
|---|---|---|---|
| b6ecec9889 | |||
| 4f56f8c426 | |||
| 3bff930e98 | |||
| 036830c200 | |||
| aded125cba | |||
| 581269e52b | |||
| 8bfd998863 | |||
| a31ef3997a | |||
| 217e760e98 | |||
| 06c81ae55e | |||
| 9131d044dc | |||
| 2840a5c101 | |||
| 9ff33229a0 | |||
| 2e95b77ceb | |||
| c6aa1077de | |||
| 220adaf011 | |||
| 92162e5cb4 | |||
| e0251a05bf | |||
| f03c08703a | |||
| 4edfb18609 | |||
| c58483ab81 | |||
| 2d8c6c144c | |||
| c1f32a2839 | |||
| b6d8b71308 | |||
| ab881772af | |||
| b03924836e | |||
| 4ab1e26b92 | |||
| 73f6e83445 | |||
| c1e90b9d46 | |||
| 917d386d33 | |||
| ef128f3752 | |||
| b92208ed0d | |||
| 2bd8e73724 | |||
| 641f8bacbe | |||
| 749b06020d | |||
| 0ce5add39b | |||
| d0e2360f46 | |||
| 049ec55e85 | |||
| 77901accf2 | |||
| 0616a2fccb | |||
| 526425a267 | |||
| 466ecdb6a4 | |||
| 752a10fa5f | |||
| 3602b502f4 | |||
| 808db2722d | |||
| 99fc5d75cc | |||
| c91e69e2f7 | |||
| dc995a1448 | |||
| 0fd6bea211 | |||
| 01d234ac86 | |||
| 2543826e85 | |||
| 2daedc5882 | |||
| 0ca835079d | |||
| 127852ee9f | |||
| 2fd4fefbcc | |||
| 016fd5de5c | |||
| 8ad5fb8a3a | |||
| bf9635e324 | |||
| cbe365d760 |
4
.github/workflows/python-publish.yml
vendored
4
.github/workflows/python-publish.yml
vendored
@ -33,7 +33,7 @@ jobs:
|
||||
python -m build
|
||||
|
||||
- name: Upload distributions
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: release-dists
|
||||
path: dist/
|
||||
@ -59,7 +59,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Retrieve release distributions
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: release-dists
|
||||
path: dist/
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@ -10,3 +10,7 @@ __pycache__/
|
||||
vmlinux.py
|
||||
~*
|
||||
vmlinux.h
|
||||
|
||||
# Documentation build artifacts
|
||||
docs/_build/
|
||||
docs/_templates/
|
||||
|
||||
@ -7,14 +7,25 @@ This folder contains examples of BCC tutorial examples that have been ported to
|
||||
- You will also need `matplotlib` for vfsreadlat.py example.
|
||||
- You will also need `rich` for vfsreadlat_rich.py example.
|
||||
- You will also need `plotly` and `dash` for vfsreadlat_plotly.py example.
|
||||
- All of these are added to `requirements.txt` file. You can install them using the following command:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
- You'll need root privileges to run these examples. If you are using a virtualenv, use the following command to run the scripts:
|
||||
```bash
|
||||
sudo <path_to_virtualenv>/bin/python3 <script_name>.py
|
||||
```
|
||||
- For the disksnoop and container-monitor examples, you need to generate the vmlinux.py file first. Follow the instructions in the [main README](https://github.com/pythonbpf/Python-BPF/tree/master?tab=readme-ov-file#first-generate-the-vmlinuxpy-file-for-your-kernel) to generate the vmlinux.py file.
|
||||
- For vfsreadlat_plotly.py, run the following command to start the Dash server:
|
||||
```bash
|
||||
sudo <path_to_virtualenv>/bin/python3 vfsreadlat_plotly/bpf_program.py
|
||||
```
|
||||
Then open your web browser and navigate to the given URL.
|
||||
- For container-monitor, you need to first copy the vmlinux.py to `container-monitor/` directory.
|
||||
Then run the following command to run the example:
|
||||
```bash
|
||||
cp vmlinux.py container-monitor/
|
||||
sudo <path_to_virtualenv>/bin/python3 container-monitor/container_monitor.py
|
||||
```
|
||||
|
||||
@ -1,12 +1,10 @@
|
||||
from vmlinux import struct_request, struct_pt_regs
|
||||
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile, map
|
||||
from ctypes import c_int32, c_int64, c_uint64
|
||||
|
||||
from vmlinux import struct_pt_regs, struct_request
|
||||
|
||||
from pythonbpf import bpf, bpfglobal, compile, map, section
|
||||
from pythonbpf.helper import ktime
|
||||
from pythonbpf.maps import HashMap
|
||||
import logging
|
||||
from ctypes import c_int64, c_uint64, c_int32
|
||||
|
||||
# Constants
|
||||
REQ_WRITE = 1 # from include/linux/blk_types.h
|
||||
|
||||
|
||||
@bpf
|
||||
@ -18,24 +16,15 @@ def start() -> HashMap:
|
||||
@bpf
|
||||
@section("kprobe/blk_mq_end_request")
|
||||
def trace_completion(ctx: struct_pt_regs) -> c_int64:
|
||||
# Get request pointer from first argument
|
||||
req_ptr = ctx.di
|
||||
req = struct_request(ctx.di)
|
||||
# Print: data_len, cmd_flags, latency_us
|
||||
data_len = req.__data_len
|
||||
cmd_flags = req.cmd_flags
|
||||
# Lookup start timestamp
|
||||
req_tsp = start.lookup(req_ptr)
|
||||
if req_tsp:
|
||||
# Calculate delta in nanoseconds
|
||||
delta = ktime() - req_tsp
|
||||
|
||||
# Convert to microseconds for printing
|
||||
delta_us = delta // 1000
|
||||
|
||||
print(f"{data_len} {cmd_flags:x} {delta_us}\n")
|
||||
|
||||
# Delete the entry
|
||||
start.delete(req_ptr)
|
||||
|
||||
return c_int64(0)
|
||||
@ -56,6 +45,4 @@ def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
compile_to_ir("disksnoop.py", "disksnoop.ll", loglevel=logging.INFO)
|
||||
compile()
|
||||
compile()
|
||||
|
||||
9
BCC-Examples/requirements.txt
Normal file
9
BCC-Examples/requirements.txt
Normal file
@ -0,0 +1,9 @@
|
||||
# =============================================================================
|
||||
# Requirements for PythonBPF BCC-Examples
|
||||
# =============================================================================
|
||||
|
||||
dash
|
||||
matplotlib
|
||||
numpy
|
||||
plotly
|
||||
rich
|
||||
46
README.md
46
README.md
@ -40,16 +40,11 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses
|
||||
|
||||
---
|
||||
|
||||
## Try It Out!
|
||||
Run
|
||||
```bash
|
||||
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
Dependencies:
|
||||
|
||||
* `bpftool`
|
||||
* `clang`
|
||||
* Python ≥ 3.8
|
||||
|
||||
@ -61,6 +56,38 @@ pip install pythonbpf pylibbpf
|
||||
|
||||
---
|
||||
|
||||
## Try It Out!
|
||||
|
||||
#### First, generate the vmlinux.py file for your kernel:
|
||||
- Install the required dependencies:
|
||||
- On Ubuntu:
|
||||
```bash
|
||||
sudo apt-get install bpftool clang
|
||||
pip install pythonbpf pylibbpf ctypeslib2
|
||||
```
|
||||
- Generate the `vmlinux.py` using:
|
||||
```bash
|
||||
sudo tools/vmlinux-gen.py
|
||||
```
|
||||
- Copy this file to `BCC-Examples/`
|
||||
|
||||
#### Next, install requirements for BCC-Examples:
|
||||
- These requirements are only required for the python notebooks, vfsreadlat and container-monitor examples.
|
||||
```bash
|
||||
pip install -r BCC-Examples/requirements.txt
|
||||
```
|
||||
- Now, follow the instructions in the [BCC-Examples/README.md](https://github.com/pythonbpf/Python-BPF/blob/master/BCC-Examples/README.md) to run the examples.
|
||||
|
||||
|
||||
#### To spin up jupyter notebook examples:
|
||||
- Run and follow the instructions on screen
|
||||
```bash
|
||||
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
|
||||
```
|
||||
- Check the jupyter server on the web browser and run the notebooks in the `BCC-Examples/` folder.
|
||||
|
||||
---
|
||||
|
||||
## Example Usage
|
||||
|
||||
```python
|
||||
@ -88,16 +115,15 @@ def hist() -> HashMap:
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
def hello(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
one = 1
|
||||
prev = hist.lookup(process_id)
|
||||
if prev:
|
||||
previous_value = prev + 1
|
||||
print(f"count: {previous_value} with {process_id}")
|
||||
hist.update(process_id, previous_value)
|
||||
return c_int64(0)
|
||||
return 0
|
||||
else:
|
||||
hist.update(process_id, one)
|
||||
return c_int64(0)
|
||||
hist.update(process_id, 1)
|
||||
return 0
|
||||
|
||||
|
||||
@bpf
|
||||
|
||||
405
blazesym-example/Cargo.lock
generated
Normal file
405
blazesym-example/Cargo.lock
generated
Normal file
@ -0,0 +1,405 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "blazesym"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ace0ab71bbe9a25cb82f6d0e513ae11aebd1a38787664475bb2ed5cbe2329736"
|
||||
dependencies = [
|
||||
"cpp_demangle",
|
||||
"gimli",
|
||||
"libc",
|
||||
"memmap2",
|
||||
"miniz_oxide",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.46"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "cfg_aliases"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.51"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.51"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.5.49"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
|
||||
|
||||
[[package]]
|
||||
name = "find-msvc-tools"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
|
||||
dependencies = [
|
||||
"fallible-iterator",
|
||||
"indexmap",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||
|
||||
[[package]]
|
||||
name = "libbpf-rs"
|
||||
version = "0.24.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93edd9cd673087fa7518fd63ad6c87be2cd9b4e35034b1873f3e3258c018275b"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"libbpf-sys",
|
||||
"libc",
|
||||
"vsprintf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libbpf-sys"
|
||||
version = "1.6.2+v1.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba0346fc595fa2c8e274903e8a0e3ed5e6a29183af167567f6289fd3b116881b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"nix",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.177"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.30.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"cfg_aliases",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "plain"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blazesym-example"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blazesym",
|
||||
"clap",
|
||||
"libbpf-rs",
|
||||
"libc",
|
||||
"plain",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "simd-adler32"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.110"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "vsprintf"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aec2f81b75ca063294776b4f7e8da71d1d5ae81c2b1b149c8d89969230265d63"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
14
blazesym-example/Cargo.toml
Normal file
14
blazesym-example/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "blazesym-example"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
libbpf-rs = "0.24"
|
||||
blazesym = "0.2.0-rc.4"
|
||||
anyhow = "1.0"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
libc = "0.2"
|
||||
plain = "0.2"
|
||||
|
||||
[build-dependencies]
|
||||
333
blazesym-example/src/main.rs
Normal file
333
blazesym-example/src/main.rs
Normal file
@ -0,0 +1,333 @@
|
||||
// src/main.rs - Fixed imports and error handling
|
||||
use std::mem;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use blazesym::symbolize::{CodeInfo, Input, Symbolized, Symbolizer};
|
||||
use blazesym::symbolize::source::{Source, Kernel, Process};
|
||||
use clap::Parser;
|
||||
use libbpf_rs::{MapCore, ObjectBuilder, RingBufferBuilder}; // Added MapCore
|
||||
|
||||
// Match your Python struct exactly
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
struct ExecEvent {
|
||||
pid: i64,
|
||||
cpu: i32,
|
||||
timestamp: i64,
|
||||
comm: [u8; 16],
|
||||
kstack_sz: i64,
|
||||
ustack_sz: i64,
|
||||
kstack: [u8; 128], // str(128) in Python
|
||||
ustack: [u8; 128], // str(128) in Python
|
||||
}
|
||||
|
||||
unsafe impl plain::Plain for ExecEvent {}
|
||||
|
||||
// Define perf_event constants (not in libc on all platforms)
|
||||
const PERF_TYPE_HARDWARE: u32 = 0;
|
||||
const PERF_TYPE_SOFTWARE: u32 = 1;
|
||||
const PERF_COUNT_HW_CPU_CYCLES: u64 = 0;
|
||||
const PERF_COUNT_SW_CPU_CLOCK: u64 = 0;
|
||||
|
||||
#[repr(C)]
|
||||
struct PerfEventAttr {
|
||||
type_: u32,
|
||||
size: u32,
|
||||
config: u64,
|
||||
sample_period_or_freq: u64,
|
||||
sample_type: u64,
|
||||
read_format: u64,
|
||||
flags: u64,
|
||||
// ... rest can be zeroed
|
||||
_padding: [u64; 64],
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Args {
|
||||
/// Path to the BPF object file
|
||||
#[arg(default_value = "stack_traces.o")]
|
||||
object_file: PathBuf,
|
||||
|
||||
/// Sampling frequency
|
||||
#[arg(short, long, default_value_t = 50)]
|
||||
freq: u64,
|
||||
|
||||
/// Use software events
|
||||
#[arg(long)]
|
||||
sw_event: bool,
|
||||
|
||||
/// Verbose output
|
||||
#[arg(short, long)]
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
fn open_perf_event(cpu: i32, freq: u64, sw_event: bool) -> Result<i32> {
|
||||
let mut attr: PerfEventAttr = unsafe { mem::zeroed() };
|
||||
|
||||
attr.size = mem::size_of::<PerfEventAttr>() as u32;
|
||||
attr.type_ = if sw_event {
|
||||
PERF_TYPE_SOFTWARE
|
||||
} else {
|
||||
PERF_TYPE_HARDWARE
|
||||
};
|
||||
attr.config = if sw_event {
|
||||
PERF_COUNT_SW_CPU_CLOCK
|
||||
} else {
|
||||
PERF_COUNT_HW_CPU_CYCLES
|
||||
};
|
||||
|
||||
// Use frequency-based sampling
|
||||
attr.sample_period_or_freq = freq;
|
||||
attr.flags = 1 << 10; // freq = 1, disabled = 1
|
||||
|
||||
let fd = unsafe {
|
||||
libc::syscall(
|
||||
libc::SYS_perf_event_open,
|
||||
&attr as *const _,
|
||||
-1, // pid = -1 (all processes)
|
||||
cpu, // cpu
|
||||
-1, // group_fd
|
||||
0, // flags
|
||||
)
|
||||
};
|
||||
|
||||
if fd < 0 {
|
||||
Err(anyhow!("Failed to open perf event on CPU {}: {}", cpu,
|
||||
std::io::Error::last_os_error()))
|
||||
} else {
|
||||
Ok(fd as i32)
|
||||
}
|
||||
}
|
||||
|
||||
fn print_stack_trace(
|
||||
addrs: &[u64],
|
||||
symbolizer: &Symbolizer,
|
||||
pid: u32,
|
||||
is_kernel: bool,
|
||||
) {
|
||||
if addrs.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let src = if is_kernel {
|
||||
Source::Kernel(Kernel::default())
|
||||
} else {
|
||||
Source::Process(Process::new(pid.into()))
|
||||
};
|
||||
|
||||
let syms = match symbolizer.symbolize(&src, Input::AbsAddr(addrs)) {
|
||||
Ok(syms) => syms,
|
||||
Err(e) => {
|
||||
eprintln!(" Failed to symbolize: {}", e);
|
||||
for addr in addrs {
|
||||
println!("0x{:016x}: <no-symbol>", addr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
for (addr, sym) in addrs.iter().zip(syms.iter()) {
|
||||
match sym {
|
||||
Symbolized::Sym(sym_info) => {
|
||||
print!("0x{:016x}: {} @ 0x{:x}+0x{:x}",
|
||||
addr, sym_info.name, sym_info.addr, sym_info.offset);
|
||||
|
||||
if let Some(ref code_info) = sym_info.code_info {
|
||||
print_code_info(code_info);
|
||||
}
|
||||
println!();
|
||||
|
||||
// Print inlined frames
|
||||
for inlined in &sym_info.inlined {
|
||||
print!(" {} (inlined)", inlined.name);
|
||||
if let Some(ref code_info) = inlined.code_info {
|
||||
print_code_info(code_info);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
Symbolized::Unknown(..) => {
|
||||
println!("0x{:016x}: <no-symbol>", addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_code_info(code_info: &CodeInfo) {
|
||||
let path = code_info.to_path();
|
||||
let path_str = path.display();
|
||||
|
||||
match (code_info.line, code_info.column) {
|
||||
(Some(line), Some(col)) => print!(" {}:{}:{}", path_str, line, col),
|
||||
(Some(line), None) => print!(" {}:{}", path_str, line),
|
||||
(None, _) => print!(" {}", path_str),
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_event(symbolizer: &Symbolizer, data: &[u8]) -> i32 {
|
||||
let event = plain::from_bytes::<ExecEvent>(data).expect("Invalid event data");
|
||||
|
||||
// Extract comm string
|
||||
let comm = std::str::from_utf8(&event.comm)
|
||||
.unwrap_or("<unknown>")
|
||||
.trim_end_matches('\0');
|
||||
|
||||
println!("[{:.9}] COMM: {} (pid={}) @ CPU {}",
|
||||
event.timestamp as f64 / 1_000_000_000.0,
|
||||
comm,
|
||||
event.pid,
|
||||
event.cpu);
|
||||
|
||||
// Handle kernel stack
|
||||
if event.kstack_sz > 0 {
|
||||
println!("Kernel:");
|
||||
let num_frames = (event.kstack_sz / 8) as usize;
|
||||
let kstack_u64 = unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
event.kstack.as_ptr() as *const u64,
|
||||
num_frames.min(16),
|
||||
)
|
||||
};
|
||||
|
||||
// Filter out zero addresses
|
||||
let kstack: Vec<u64> = kstack_u64.iter()
|
||||
.copied()
|
||||
.take_while(|&addr| addr != 0)
|
||||
.collect();
|
||||
|
||||
print_stack_trace(&kstack, symbolizer, 0, true);
|
||||
} else {
|
||||
println!("No Kernel Stack");
|
||||
}
|
||||
|
||||
// Handle user stack
|
||||
if event.ustack_sz > 0 {
|
||||
println!("Userspace:");
|
||||
let num_frames = (event.ustack_sz / 8) as usize;
|
||||
let ustack_u64 = unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
event.ustack.as_ptr() as *const u64,
|
||||
num_frames.min(16),
|
||||
)
|
||||
};
|
||||
|
||||
// Filter out zero addresses
|
||||
let ustack: Vec<u64> = ustack_u64.iter()
|
||||
.copied()
|
||||
.take_while(|&addr| addr != 0)
|
||||
.collect();
|
||||
|
||||
print_stack_trace(&ustack, symbolizer, event.pid as u32, false);
|
||||
} else {
|
||||
println!("No Userspace Stack");
|
||||
}
|
||||
|
||||
println!();
|
||||
0
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
if !args.object_file.exists() {
|
||||
return Err(anyhow!("Object file not found: {:?}", args.object_file));
|
||||
}
|
||||
|
||||
println!("Loading BPF object: {:?}", args.object_file);
|
||||
|
||||
// Load BPF object
|
||||
let mut obj_builder = ObjectBuilder::default();
|
||||
obj_builder.debug(args.verbose);
|
||||
|
||||
let open_obj = obj_builder
|
||||
.open_file(&args.object_file)
|
||||
.context("Failed to open BPF object")?;
|
||||
|
||||
let mut obj = open_obj.load().context("Failed to load BPF object")?;
|
||||
|
||||
println!("✓ BPF object loaded");
|
||||
|
||||
// Find the program
|
||||
let prog = obj
|
||||
.progs_mut()
|
||||
.find(|p| p.name() == "trace_exec_enter")
|
||||
.ok_or_else(|| anyhow!("Program 'trace_exec_enter' not found"))?;
|
||||
|
||||
println!("✓ Found program: trace_exec_enter");
|
||||
|
||||
// Find the map
|
||||
let map = obj
|
||||
.maps()
|
||||
.find(|m| m.name() == "exec_events")
|
||||
.ok_or_else(|| anyhow!("Map 'exec_events' not found"))?;
|
||||
|
||||
println!("✓ Found map: exec_events");
|
||||
|
||||
// Get number of CPUs
|
||||
let num_cpus = libbpf_rs::num_possible_cpus()?;
|
||||
println!("✓ Detected {} CPUs\n", num_cpus);
|
||||
|
||||
// Open perf events and attach BPF program
|
||||
println!("Setting up perf events...");
|
||||
let mut links = Vec::new();
|
||||
|
||||
for cpu in 0..num_cpus {
|
||||
match open_perf_event(cpu as i32, args.freq, args.sw_event) {
|
||||
Ok(perf_fd) => {
|
||||
match prog.attach_perf_event(perf_fd) {
|
||||
Ok(link) => {
|
||||
links.push(link);
|
||||
if args.verbose {
|
||||
println!(" ✓ Attached to CPU {}", cpu);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" ✗ Failed to attach to CPU {}: {}", cpu, e);
|
||||
unsafe { libc::close(perf_fd); }
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if args.verbose {
|
||||
eprintln!(" ✗ Failed to open perf event on CPU {}: {}", cpu, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("✓ Attached to {} CPUs\n", links.len());
|
||||
|
||||
if links.is_empty() {
|
||||
return Err(anyhow!("Failed to attach to any CPU"));
|
||||
}
|
||||
|
||||
// Initialize symbolizer
|
||||
let symbolizer = Symbolizer::new();
|
||||
|
||||
// Set up ring buffer
|
||||
let mut builder = RingBufferBuilder::new();
|
||||
|
||||
builder.add(&map, move |data: &[u8]| -> i32 {
|
||||
handle_event(&symbolizer, data)
|
||||
})?;
|
||||
|
||||
let ringbuf = builder.build()?;
|
||||
|
||||
println!("========================================");
|
||||
println!("Profiling started. Press Ctrl+C to stop.");
|
||||
println!("========================================\n");
|
||||
|
||||
// Poll for events - just keep polling until error
|
||||
loop {
|
||||
if let Err(e) = ringbuf.poll(Duration::from_millis(100)) {
|
||||
// Any error breaks the loop (including Ctrl+C)
|
||||
eprintln!("\nStopping: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
println!("Done.");
|
||||
Ok(())
|
||||
}
|
||||
49
blazesym-example/stack_traces.py
Normal file
49
blazesym-example/stack_traces.py
Normal file
@ -0,0 +1,49 @@
|
||||
# tests/passing_tests/ringbuf_advanced.py
|
||||
from pythonbpf import bpf, map, section, bpfglobal, struct, compile
|
||||
from pythonbpf.maps import RingBuffer
|
||||
from pythonbpf.helper import ktime, pid, smp_processor_id, comm, get_stack
|
||||
from ctypes import c_void_p, c_int32, c_int64
|
||||
import logging
|
||||
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class exec_event:
|
||||
pid: c_int64
|
||||
cpu: c_int32
|
||||
timestamp: c_int64
|
||||
comm: str(16) # type: ignore [valid-type]
|
||||
kstack_sz: c_int64
|
||||
ustack_sz: c_int64
|
||||
kstack: str(128) # type: ignore [valid-type]
|
||||
ustack: str(128) # type: ignore [valid-type]
|
||||
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def exec_events() -> RingBuffer:
|
||||
return RingBuffer(max_entries=1048576)
|
||||
|
||||
|
||||
@bpf
|
||||
@section("perf_event")
|
||||
def trace_exec_enter(ctx: c_void_p) -> c_int64:
|
||||
evt = exec_event()
|
||||
evt.pid = pid()
|
||||
evt.cpu = smp_processor_id()
|
||||
evt.timestamp = ktime()
|
||||
comm(evt.comm)
|
||||
evt.kstack_sz = get_stack(evt.kstack)
|
||||
evt.ustack_sz = get_stack(evt.ustack, 256)
|
||||
exec_events.output(evt)
|
||||
print(f"Submitted exec_event for pid: {evt.pid}, cpu: {evt.cpu}")
|
||||
return 0 # type: ignore [return-value]
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
compile(logging.INFO)
|
||||
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
52
docs/README.md
Normal file
52
docs/README.md
Normal file
@ -0,0 +1,52 @@
|
||||
# PythonBPF Documentation
|
||||
|
||||
This directory contains the Sphinx documentation for PythonBPF.
|
||||
|
||||
## Building the Documentation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
Install the documentation dependencies:
|
||||
|
||||
**Using uv (recommended):**
|
||||
```bash
|
||||
uv pip install -r requirements.txt
|
||||
# Or install the optional docs dependencies
|
||||
uv pip install pythonbpf[docs]
|
||||
```
|
||||
|
||||
**Using pip:**
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
# Or install the optional docs dependencies
|
||||
pip install pythonbpf[docs]
|
||||
```
|
||||
|
||||
### Build HTML Documentation
|
||||
|
||||
```bash
|
||||
make html
|
||||
```
|
||||
|
||||
The generated documentation will be in `_build/html/`. Open `_build/html/index.html` in a browser to view.
|
||||
|
||||
### Other Build Formats
|
||||
|
||||
```bash
|
||||
make latexpdf # Build PDF documentation
|
||||
make epub # Build ePub format
|
||||
make clean # Clean build artifacts
|
||||
```
|
||||
|
||||
## Documentation Structure
|
||||
|
||||
- `index.md` - Main landing page
|
||||
- `getting-started/` - Installation and quick start guides
|
||||
- `user-guide/` - Comprehensive user documentation
|
||||
- `api/` - API reference documentation
|
||||
- `conf.py` - Sphinx configuration
|
||||
- `_static/` - Static files (images, CSS, etc.)
|
||||
|
||||
## Writing Documentation
|
||||
|
||||
Documentation is written in Markdown using [MyST-Parser](https://myst-parser.readthedocs.io/). See the existing files for examples.
|
||||
0
docs/_static/.gitkeep
vendored
Normal file
0
docs/_static/.gitkeep
vendored
Normal file
471
docs/api/index.md
Normal file
471
docs/api/index.md
Normal file
@ -0,0 +1,471 @@
|
||||
# API Reference
|
||||
|
||||
This section provides detailed API documentation for all PythonBPF modules, classes, and functions.
|
||||
|
||||
## Module Overview
|
||||
|
||||
PythonBPF is organized into several modules:
|
||||
|
||||
* `pythonbpf` - Main module with decorators and compilation functions
|
||||
* `pythonbpf.maps` - BPF map types
|
||||
* `pythonbpf.helper` - BPF helper functions
|
||||
* `pythonbpf.structs` - Struct type handling
|
||||
* `pythonbpf.codegen` - Code generation and compilation
|
||||
|
||||
## Public API
|
||||
|
||||
The main `pythonbpf` module exports the following public API:
|
||||
|
||||
```python
|
||||
from pythonbpf import (
|
||||
# Decorators
|
||||
bpf,
|
||||
map,
|
||||
section,
|
||||
bpfglobal,
|
||||
struct,
|
||||
|
||||
# Compilation
|
||||
compile_to_ir,
|
||||
compile,
|
||||
BPF,
|
||||
|
||||
# Utilities
|
||||
trace_pipe,
|
||||
trace_fields,
|
||||
)
|
||||
```
|
||||
|
||||
## Decorators
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: pythonbpf.decorators
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
```
|
||||
|
||||
### bpf
|
||||
|
||||
```python
|
||||
@bpf
|
||||
def my_function():
|
||||
pass
|
||||
```
|
||||
|
||||
Decorator to mark a function or class for BPF compilation. Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler.
|
||||
|
||||
**See also:** {doc}`../user-guide/decorators`
|
||||
|
||||
### map
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@map
|
||||
def my_map() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
|
||||
```
|
||||
|
||||
Decorator to mark a function as a BPF map definition. The function must return a map type.
|
||||
|
||||
**See also:** {doc}`../user-guide/maps`
|
||||
|
||||
### section
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
return c_int64(0)
|
||||
```
|
||||
|
||||
Decorator to specify which kernel hook to attach the BPF program to.
|
||||
|
||||
**Parameters:**
|
||||
* `name` (str) - The section name (e.g., "tracepoint/...", "kprobe/...", "xdp")
|
||||
|
||||
**See also:** {doc}`../user-guide/decorators`
|
||||
|
||||
### bpfglobal
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
Decorator to mark a function as a BPF global variable definition.
|
||||
|
||||
**See also:** {doc}`../user-guide/decorators`
|
||||
|
||||
### struct
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
```
|
||||
|
||||
Decorator to mark a class as a BPF struct definition.
|
||||
|
||||
**See also:** {doc}`../user-guide/structs`
|
||||
|
||||
## Compilation Functions
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: pythonbpf.codegen
|
||||
:members: compile_to_ir, compile, BPF
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
```
|
||||
|
||||
### compile_to_ir()
|
||||
|
||||
```python
|
||||
def compile_to_ir(
|
||||
filename: str,
|
||||
output: str,
|
||||
loglevel=logging.WARNING
|
||||
) -> None
|
||||
```
|
||||
|
||||
Compile Python source to LLVM Intermediate Representation.
|
||||
|
||||
**Parameters:**
|
||||
* `filename` (str) - Path to the Python source file
|
||||
* `output` (str) - Path for the output LLVM IR file (.ll)
|
||||
* `loglevel` - Logging level (default: logging.WARNING)
|
||||
|
||||
**See also:** {doc}`../user-guide/compilation`
|
||||
|
||||
### compile()
|
||||
|
||||
```python
|
||||
def compile(
|
||||
filename: str = None,
|
||||
output: str = None,
|
||||
loglevel=logging.WARNING
|
||||
) -> None
|
||||
```
|
||||
|
||||
Compile Python source to BPF object file.
|
||||
|
||||
**Parameters:**
|
||||
* `filename` (str, optional) - Path to the Python source file (default: calling file)
|
||||
* `output` (str, optional) - Path for the output object file (default: same name with .o extension)
|
||||
* `loglevel` - Logging level (default: logging.WARNING)
|
||||
|
||||
**See also:** {doc}`../user-guide/compilation`
|
||||
|
||||
### BPF
|
||||
|
||||
```python
|
||||
class BPF:
|
||||
def __init__(
|
||||
self,
|
||||
filename: str = None,
|
||||
loglevel=logging.WARNING
|
||||
)
|
||||
|
||||
def load(self) -> BpfObject
|
||||
def attach_all(self) -> None
|
||||
def load_and_attach(self) -> BpfObject
|
||||
```
|
||||
|
||||
High-level interface to compile, load, and attach BPF programs.
|
||||
|
||||
**Parameters:**
|
||||
* `filename` (str, optional) - Path to Python source file (default: calling file)
|
||||
* `loglevel` - Logging level (default: logging.WARNING)
|
||||
|
||||
**Methods:**
|
||||
* `load()` - Load the compiled BPF program into the kernel
|
||||
* `attach_all()` - Attach all BPF programs to their hooks
|
||||
* `load_and_attach()` - Convenience method that loads and attaches
|
||||
|
||||
**See also:** {doc}`../user-guide/compilation`
|
||||
|
||||
## Utilities
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: pythonbpf.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
```
|
||||
|
||||
### trace_pipe()
|
||||
|
||||
```python
|
||||
def trace_pipe() -> None
|
||||
```
|
||||
|
||||
Read and display output from the kernel trace pipe.
|
||||
|
||||
Blocks until interrupted with Ctrl+C. Displays BPF program output from `print()` statements.
|
||||
|
||||
**See also:** {doc}`../user-guide/helpers`
|
||||
|
||||
### trace_fields()
|
||||
|
||||
```python
|
||||
def trace_fields() -> tuple
|
||||
```
|
||||
|
||||
Parse one line from the trace pipe into structured fields.
|
||||
|
||||
**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)`
|
||||
* `task` (str) - Task/process name
|
||||
* `pid` (int) - Process ID
|
||||
* `cpu` (int) - CPU number
|
||||
* `flags` (bytes) - Trace flags
|
||||
* `timestamp` (float) - Timestamp in seconds
|
||||
* `message` (str) - The trace message
|
||||
|
||||
**See also:** {doc}`../user-guide/helpers`
|
||||
|
||||
## Map Types
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: pythonbpf.maps.maps
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
```
|
||||
|
||||
### HashMap
|
||||
|
||||
```python
|
||||
class HashMap:
|
||||
def __init__(
|
||||
self,
|
||||
key,
|
||||
value,
|
||||
max_entries: int
|
||||
)
|
||||
|
||||
def lookup(self, key)
|
||||
def update(self, key, value, flags=None)
|
||||
def delete(self, key)
|
||||
```
|
||||
|
||||
Hash map for efficient key-value storage.
|
||||
|
||||
**Parameters:**
|
||||
* `key` - The type of the key (ctypes type or struct)
|
||||
* `value` - The type of the value (ctypes type or struct)
|
||||
* `max_entries` (int) - Maximum number of entries
|
||||
|
||||
**Methods:**
|
||||
* `lookup(key)` - Look up a value by key
|
||||
* `update(key, value, flags=None)` - Update or insert a key-value pair
|
||||
* `delete(key)` - Remove an entry from the map
|
||||
|
||||
**See also:** {doc}`../user-guide/maps`
|
||||
|
||||
### PerfEventArray
|
||||
|
||||
```python
|
||||
class PerfEventArray:
|
||||
def __init__(
|
||||
self,
|
||||
key_size,
|
||||
value_size
|
||||
)
|
||||
|
||||
def output(self, data)
|
||||
```
|
||||
|
||||
Perf event array for sending data to userspace.
|
||||
|
||||
**Parameters:**
|
||||
* `key_size` - Type for the key
|
||||
* `value_size` - Type for the value
|
||||
|
||||
**Methods:**
|
||||
* `output(data)` - Send data to userspace
|
||||
|
||||
**See also:** {doc}`../user-guide/maps`
|
||||
|
||||
### RingBuffer
|
||||
|
||||
```python
|
||||
class RingBuffer:
|
||||
def __init__(self, max_entries: int)
|
||||
|
||||
def output(self, data, flags=0)
|
||||
def reserve(self, size: int)
|
||||
def submit(self, data, flags=0)
|
||||
def discard(self, data, flags=0)
|
||||
```
|
||||
|
||||
Ring buffer for efficient event delivery.
|
||||
|
||||
**Parameters:**
|
||||
* `max_entries` (int) - Maximum size in bytes (must be power of 2)
|
||||
|
||||
**Methods:**
|
||||
* `output(data, flags=0)` - Send data to the ring buffer
|
||||
* `reserve(size)` - Reserve space in the buffer
|
||||
* `submit(data, flags=0)` - Submit previously reserved space
|
||||
* `discard(data, flags=0)` - Discard previously reserved space
|
||||
|
||||
**See also:** {doc}`../user-guide/maps`
|
||||
|
||||
## Helper Functions
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: pythonbpf.helper.helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
```
|
||||
|
||||
### Process Information
|
||||
|
||||
* `pid()` - Get current process ID
|
||||
* `comm(buf)` - Get current process command name (requires buffer parameter)
|
||||
* `uid()` - Get current user ID
|
||||
|
||||
### Time
|
||||
|
||||
* `ktime()` - Get current kernel time in nanoseconds
|
||||
|
||||
### CPU
|
||||
|
||||
* `smp_processor_id()` - Get current CPU ID
|
||||
|
||||
### Memory
|
||||
|
||||
* `probe_read(dst, size, src)` - Safely read kernel memory
|
||||
* `probe_read_str(dst, src)` - Safely read string from kernel memory
|
||||
* `deref(ptr)` - Dereference a pointer
|
||||
|
||||
### Random
|
||||
|
||||
* `random()` - Get pseudo-random number
|
||||
|
||||
**See also:** {doc}`../user-guide/helpers`
|
||||
|
||||
## Type System
|
||||
|
||||
PythonBPF uses Python's `ctypes` module for type definitions:
|
||||
|
||||
### Integer Types
|
||||
|
||||
* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers
|
||||
* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers
|
||||
|
||||
### Other Types
|
||||
|
||||
* `c_char`, `c_bool` - Characters and booleans
|
||||
* `c_void_p` - Void pointers
|
||||
* `str(N)` - Fixed-length strings
|
||||
|
||||
## Examples
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def hello(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
### With Maps
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, section, bpfglobal, BPF
|
||||
from pythonbpf.maps import HashMap
|
||||
from pythonbpf.helper import pid
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def counters() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
def count_clones(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
count = counters.lookup(process_id)
|
||||
|
||||
if count:
|
||||
counters.update(process_id, count + 1)
|
||||
else:
|
||||
counters.update(process_id, 1)
|
||||
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
```
|
||||
|
||||
### With Structs
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, map, section, bpfglobal, BPF
|
||||
from pythonbpf.maps import RingBuffer
|
||||
from pythonbpf.helper import pid, ktime, comm
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
comm: str(16)
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> RingBuffer:
|
||||
return RingBuffer(max_entries=4096)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def track_exec(ctx: c_void_p) -> c_int64:
|
||||
event = Event()
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
comm(event.comm)
|
||||
|
||||
events.output(event)
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
* {doc}`../user-guide/index` - Comprehensive user guide
|
||||
* {doc}`../getting-started/quickstart` - Quick start tutorial
|
||||
* [GitHub Repository](https://github.com/pythonbpf/Python-BPF) - Source code and examples
|
||||
105
docs/conf.py
Normal file
105
docs/conf.py
Normal file
@ -0,0 +1,105 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add the parent directory to the path so we can import pythonbpf
|
||||
sys.path.insert(0, os.path.abspath(".."))
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = "PythonBPF"
|
||||
copyright = "2026, Pragyansh Chaturvedi, Varun Mallya"
|
||||
author = "Pragyansh Chaturvedi, Varun Mallya"
|
||||
release = "0.1.8"
|
||||
version = "0.1.8"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [
|
||||
"myst_parser",
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx_copybutton",
|
||||
]
|
||||
|
||||
# MyST-Parser configuration
|
||||
myst_enable_extensions = [
|
||||
"colon_fence",
|
||||
"deflist",
|
||||
"fieldlist",
|
||||
]
|
||||
|
||||
# Napoleon settings for Google/NumPy style docstrings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = True
|
||||
napoleon_include_init_with_doc = True
|
||||
napoleon_include_private_with_doc = False
|
||||
napoleon_include_special_with_doc = True
|
||||
napoleon_use_admonition_for_examples = True
|
||||
napoleon_use_admonition_for_notes = True
|
||||
napoleon_use_admonition_for_references = False
|
||||
napoleon_use_ivar = False
|
||||
napoleon_use_param = True
|
||||
napoleon_use_rtype = True
|
||||
napoleon_type_aliases = None
|
||||
|
||||
# Intersphinx mapping
|
||||
intersphinx_mapping = {
|
||||
"python": ("https://docs.python.org/3", None),
|
||||
"llvmlite": ("https://llvmlite.readthedocs.io/en/latest/", None),
|
||||
}
|
||||
|
||||
templates_path = ["_templates"]
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
|
||||
# Source file suffixes
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
".md": "markdown",
|
||||
}
|
||||
|
||||
# The master toctree document
|
||||
master_doc = "index"
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
|
||||
# Theme options
|
||||
html_theme_options = {
|
||||
"logo_only": False,
|
||||
"display_version": True,
|
||||
"prev_next_buttons_location": "bottom",
|
||||
"style_external_links": False,
|
||||
"vcs_pageview_mode": "",
|
||||
# Toc options
|
||||
"collapse_navigation": False,
|
||||
"sticky_navigation": True,
|
||||
"navigation_depth": 4,
|
||||
"includehidden": True,
|
||||
"titles_only": False,
|
||||
}
|
||||
|
||||
# -- Options for autodoc -----------------------------------------------------
|
||||
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"special-members": "__init__",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__",
|
||||
}
|
||||
|
||||
autodoc_typehints = "description"
|
||||
|
||||
exclude_patterns = ["README.md"]
|
||||
35
docs/getting-started/index.md
Normal file
35
docs/getting-started/index.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Getting Started
|
||||
|
||||
Welcome to PythonBPF! This section will help you get started with writing eBPF programs in Python.
|
||||
|
||||
## What You'll Learn
|
||||
|
||||
In this section, you'll learn how to:
|
||||
|
||||
1. **Install PythonBPF** - Set up your development environment with all necessary dependencies
|
||||
2. **Write Your First Program** - Create a simple BPF program to understand the basics
|
||||
3. **Understand Core Concepts** - Learn about decorators, compilation, and program structure
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you begin, make sure you have:
|
||||
|
||||
* A Linux system (eBPF requires Linux kernel 4.15+)
|
||||
* Python 3.10 or higher
|
||||
* Root or sudo access (required for loading BPF programs)
|
||||
|
||||
## Next Steps
|
||||
|
||||
After completing the getting started guide, you can:
|
||||
|
||||
* Explore the {doc}`../user-guide/index` for detailed information on features
|
||||
* Check out the {doc}`../api/index`
|
||||
* Browse the [examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) and the [BCC examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/BCC-Examples)
|
||||
|
||||
## Need Help?
|
||||
|
||||
If you encounter any issues:
|
||||
|
||||
* Check the [GitHub Issues](https://github.com/pythonbpf/Python-BPF/issues) for known problems
|
||||
* Review the [README](https://github.com/pythonbpf/Python-BPF/blob/master/README.md) for additional information
|
||||
* Reach out to the maintainers: [@r41k0u](https://github.com/r41k0u) and [@varun-r-mallya](https://github.com/varun-r-mallya)
|
||||
182
docs/getting-started/installation.md
Normal file
182
docs/getting-started/installation.md
Normal file
@ -0,0 +1,182 @@
|
||||
# Installation
|
||||
|
||||
This guide will walk you through installing PythonBPF and its dependencies.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### System Requirements
|
||||
|
||||
PythonBPF requires:
|
||||
|
||||
* **Linux** - eBPF is a Linux kernel feature (kernel 4.15 or higher recommended)
|
||||
* **Python 3.10+** - Python 3.10 or higher is required
|
||||
* **Root/sudo access** - Loading BPF programs into the kernel requires elevated privileges
|
||||
|
||||
### Required System Packages
|
||||
|
||||
Before installing PythonBPF, you need to install the following system packages:
|
||||
|
||||
#### On Ubuntu/Debian:
|
||||
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y bpftool clang llvm
|
||||
```
|
||||
|
||||
#### On Fedora/RHEL/CentOS:
|
||||
|
||||
```bash
|
||||
sudo dnf install -y bpftool clang llvm
|
||||
```
|
||||
|
||||
#### On Arch Linux:
|
||||
|
||||
```bash
|
||||
sudo pacman -S bpf clang llvm
|
||||
```
|
||||
|
||||
## Installing PythonBPF
|
||||
|
||||
### From PyPI (Recommended)
|
||||
|
||||
The easiest way to install PythonBPF is using uv or pip:
|
||||
|
||||
**Using uv (recommended):**
|
||||
```bash
|
||||
uv pip install pythonbpf pylibbpf
|
||||
```
|
||||
|
||||
**Using pip:**
|
||||
```bash
|
||||
pip install pythonbpf pylibbpf
|
||||
```
|
||||
|
||||
This will install:
|
||||
* `pythonbpf` - The main package for writing and compiling BPF programs
|
||||
* `pylibbpf` - Python bindings for libbpf, used to load and attach BPF programs
|
||||
|
||||
### Development Installation
|
||||
|
||||
If you want to contribute to PythonBPF or work with the latest development version:
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/pythonbpf/Python-BPF.git
|
||||
cd Python-BPF
|
||||
```
|
||||
|
||||
2. Create and activate a virtual environment:
|
||||
|
||||
```bash
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. Install in development mode:
|
||||
|
||||
**Using uv (recommended):**
|
||||
```bash
|
||||
uv pip install -e .
|
||||
uv pip install pylibbpf
|
||||
```
|
||||
|
||||
**Using pip:**
|
||||
```bash
|
||||
pip install -e .
|
||||
pip install pylibbpf
|
||||
```
|
||||
|
||||
4. Install development dependencies:
|
||||
|
||||
```bash
|
||||
make install
|
||||
```
|
||||
|
||||
### Installing Documentation Dependencies
|
||||
|
||||
If you want to build the documentation locally:
|
||||
|
||||
**Using uv (recommended):**
|
||||
```bash
|
||||
uv pip install pythonbpf[docs]
|
||||
# Or from the repository root:
|
||||
uv pip install -e .[docs]
|
||||
```
|
||||
|
||||
**Using pip:**
|
||||
```bash
|
||||
pip install pythonbpf[docs]
|
||||
# Or from the repository root:
|
||||
pip install -e .[docs]
|
||||
```
|
||||
|
||||
## Generating vmlinux.py
|
||||
|
||||
`vmlinux.py` contains the running kernel's data structures and is analogous to `vmlinux.h` included in eBPF programs written in C. Some examples require access to it. To use these features, you need to generate a `vmlinux.py` file:
|
||||
|
||||
1. Install additional dependencies:
|
||||
|
||||
**Using uv (recommended):**
|
||||
```bash
|
||||
uv pip install ctypeslib2
|
||||
```
|
||||
|
||||
**Using pip:**
|
||||
```bash
|
||||
pip install ctypeslib2
|
||||
```
|
||||
|
||||
2. Generate the vmlinux.py file:
|
||||
|
||||
```bash
|
||||
sudo tools/vmlinux-gen.py
|
||||
```
|
||||
|
||||
3. Copy the generated file to your working directory or the examples directory as needed.
|
||||
|
||||
```{warning}
|
||||
The `vmlinux.py` file is kernel-specific. If you upgrade your kernel, you may need to regenerate this file.
|
||||
```
|
||||
|
||||
## Verifying Installation
|
||||
|
||||
To verify that PythonBPF is installed correctly, run:
|
||||
|
||||
```bash
|
||||
python3 -c "import pythonbpf; print(pythonbpf.__all__)"
|
||||
```
|
||||
|
||||
You should see output similar to:
|
||||
|
||||
```
|
||||
['bpf', 'map', 'section', 'bpfglobal', 'struct', 'compile_to_ir', 'compile', 'BPF', 'trace_pipe', 'trace_fields']
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Permission Errors
|
||||
|
||||
If you encounter permission errors when running BPF programs:
|
||||
|
||||
* Make sure you're running with `sudo` or as root
|
||||
* Check that `/sys/kernel/tracing/` is accessible
|
||||
|
||||
### LLVM/Clang Not Found
|
||||
|
||||
If you get errors about `llc` or `clang` not being found:
|
||||
|
||||
* Verify they're installed: `which llc` and `which clang`
|
||||
* Check your PATH environment variable includes the LLVM bin directory
|
||||
|
||||
### Import Errors
|
||||
|
||||
If Python can't find the `pythonbpf` module:
|
||||
|
||||
* Make sure you've activated your virtual environment
|
||||
* Verify installation with `uv pip list | grep pythonbpf` or `pip list | grep pythonbpf`
|
||||
* Try reinstalling: `uv pip install --force-reinstall pythonbpf` or `pip install --force-reinstall pythonbpf`
|
||||
|
||||
## Next Steps
|
||||
|
||||
Now that you have PythonBPF installed, continue to the {doc}`quickstart` guide to write your first BPF program!
|
||||
249
docs/getting-started/quickstart.md
Normal file
249
docs/getting-started/quickstart.md
Normal file
@ -0,0 +1,249 @@
|
||||
# Quick Start
|
||||
|
||||
This guide will walk you through creating your first BPF program with PythonBPF.
|
||||
|
||||
## Your First BPF Program
|
||||
|
||||
Let's create a simple "Hello World" program that prints a message every time a process is executed on your system.
|
||||
|
||||
### Step 1: Create the Program
|
||||
|
||||
Create a new file called `hello_world.py`:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def hello_world(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load()
|
||||
b.attach_all()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
### Step 2: Run the Program
|
||||
|
||||
Run the program with sudo (required for BPF operations):
|
||||
|
||||
```bash
|
||||
sudo python3 hello_world.py
|
||||
```
|
||||
|
||||
### Step 3: See it in Action
|
||||
|
||||
Open another terminal and run any command:
|
||||
|
||||
```bash
|
||||
ls
|
||||
echo "test"
|
||||
date
|
||||
```
|
||||
|
||||
You should see "Hello, World!" printed in the first terminal for each command executed!
|
||||
|
||||
Press `Ctrl+C` to stop the program.
|
||||
|
||||
## Understanding the Code
|
||||
|
||||
Let's break down what each part does:
|
||||
|
||||
### Imports
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from ctypes import c_void_p, c_int64
|
||||
```
|
||||
|
||||
* `bpf` - Decorator to mark functions for BPF compilation
|
||||
* `section` - Decorator to specify which kernel event to attach to
|
||||
* `bpfglobal` - Decorator for BPF global variables
|
||||
* `BPF` - Class to compile, load, and attach BPF programs
|
||||
* `trace_pipe` - Utility to read kernel trace output (similar to BCC)
|
||||
* `c_void_p`, `c_int64` - C types for function signatures
|
||||
|
||||
### The BPF Function
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def hello_world(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!")
|
||||
return 0
|
||||
```
|
||||
|
||||
* `@bpf` - Marks this function to be compiled to BPF bytecode
|
||||
* `@section("tracepoint/syscalls/sys_enter_execve")` - Attaches to the execve syscall tracepoint (called when processes start)
|
||||
* `ctx: c_void_p` - Context parameter (required for all BPF functions)
|
||||
* `print()` - the PythonBPF API for `bpf_printk` helper function
|
||||
* `return 0` - BPF functions must return an integer
|
||||
|
||||
### License Declaration
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
* The Linux kernel requires BPF programs to declare a license
|
||||
* Most kernel features require GPL-compatible licenses
|
||||
* This is defined as a BPF global variable
|
||||
|
||||
### Compilation and Execution
|
||||
|
||||
```python
|
||||
b = BPF()
|
||||
b.load()
|
||||
b.attach_all()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
* `BPF()` - Creates a BPF object and compiles the current file
|
||||
* `b.load()` - Loads the compiled BPF program into the kernel
|
||||
* `b.attach_all()` - Attaches all BPF programs to their specified hooks
|
||||
* `trace_pipe()` - Reads and displays output from the kernel trace buffer
|
||||
|
||||
Alternatively, you can also use the `compile()` function to compile the BPF code to an object file:
|
||||
|
||||
```python
|
||||
from pythonbpf import compile
|
||||
```
|
||||
|
||||
This object file can then be loaded using any other userspace library in any language.
|
||||
|
||||
## Next Example: Tracking Process IDs
|
||||
|
||||
Let's make a more interesting program that tracks which processes are being created:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from pythonbpf.helper import pid
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def track_exec(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
print(f"Process with PID: {process_id} is starting")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load()
|
||||
b.attach_all()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
This program uses BPF helper functions:
|
||||
|
||||
* `pid()` - Gets the current process ID
|
||||
|
||||
Run it with `sudo python3 track_exec.py` and watch processes being created!
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Tracepoints
|
||||
|
||||
Tracepoints are predefined hooks in the kernel. Common ones include:
|
||||
|
||||
```python
|
||||
# System calls
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
|
||||
# Scheduler events
|
||||
@section("tracepoint/sched/sched_process_fork")
|
||||
@section("tracepoint/sched/sched_switch")
|
||||
```
|
||||
|
||||
### Kprobes
|
||||
|
||||
Kprobes allow you to attach to any kernel function:
|
||||
|
||||
```python
|
||||
@section("kprobe/do_sys_open")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
print("File is being opened")
|
||||
return 0
|
||||
```
|
||||
|
||||
### XDP (eXpress Data Path)
|
||||
|
||||
For network packet processing:
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import XDP_PASS
|
||||
|
||||
@section("xdp")
|
||||
def xdp_pass(ctx: c_void_p) -> c_int64:
|
||||
return XDP_PASS
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always include a LICENSE** - Required by the kernel
|
||||
2. **Use type hints** - Required by PythonBPF to generate correct code
|
||||
3. **Return the correct type** - Match the expected return type for your program type
|
||||
4. **Test incrementally** - Start simple and add complexity gradually
|
||||
5. **Check kernel logs** - Use `dmesg` to see BPF verifier messages if loading fails
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Program Won't Load
|
||||
|
||||
If your BPF program fails to load:
|
||||
|
||||
* Check `dmesg` for verifier error messages
|
||||
* Ensure your LICENSE is GPL-compatible
|
||||
* Verify you're using supported BPF features
|
||||
* Make sure return types match function signatures
|
||||
|
||||
### No Output
|
||||
|
||||
If you don't see output:
|
||||
|
||||
* Verify the tracepoint/kprobe is being triggered
|
||||
* Check that you're running with sudo
|
||||
* Ensure `/sys/kernel/tracing/trace_pipe` is accessible
|
||||
|
||||
### Compilation Errors
|
||||
|
||||
If compilation fails:
|
||||
|
||||
* Check that `llc` is installed and in your PATH
|
||||
* Verify your Python syntax is correct
|
||||
* Ensure all imported types are from `ctypes`
|
||||
* In the worst case, compile object files manually using `compile_to_ir()` and `llc` to get detailed errors
|
||||
|
||||
### Verification Failure
|
||||
|
||||
If verification fails:
|
||||
|
||||
* Compile the object files using `compile()` function instead of loading directly
|
||||
* Run `sudo check.sh check <bpf>.o` to get detailed verification output
|
||||
|
||||
## Next Steps
|
||||
|
||||
Now that you understand the basics, explore:
|
||||
|
||||
* {doc}`../user-guide/decorators` - Learn about all available decorators
|
||||
* {doc}`../user-guide/maps` - Use BPF maps for data storage and communication
|
||||
* {doc}`../user-guide/structs` - Define custom data structures
|
||||
* {doc}`../user-guide/helpers` - Discover all available BPF helper functions
|
||||
* [Examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) - See more complex examples
|
||||
95
docs/index.md
Normal file
95
docs/index.md
Normal file
@ -0,0 +1,95 @@
|
||||
# PythonBPF Documentation
|
||||
|
||||
Welcome to **PythonBPF** - a Python frontend for writing eBPF programs without embedding C code. PythonBPF uses [llvmlite](https://github.com/numba/llvmlite) to generate LLVM IR and compiles directly to eBPF object files that can be loaded into the Linux kernel.
|
||||
|
||||
```{note}
|
||||
This project is under active development.
|
||||
```
|
||||
|
||||
## What is PythonBPF?
|
||||
|
||||
PythonBPF is an LLVM IR generator for eBPF programs written in Python. It provides:
|
||||
|
||||
* **Pure Python syntax** - Write eBPF programs in Python using familiar decorators and type annotations
|
||||
* **Direct compilation** - Compile to LLVM object files without relying on BCC
|
||||
* **Full eBPF features** - Support for maps, helpers, global definitions, and more
|
||||
* **Integration with libbpf** - Works with [pylibbpf](https://github.com/pythonbpf/pylibbpf) for object loading and execution
|
||||
|
||||
## Quick Example
|
||||
|
||||
Here's a simple "Hello World" BPF program that traces process creation:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def hello_world(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load()
|
||||
b.attach_all()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
* Generate eBPF programs directly using Python syntax
|
||||
* Compile to LLVM object files for kernel execution
|
||||
* Built with `llvmlite` for IR generation
|
||||
* Supports maps, helpers, and global definitions for BPF
|
||||
* Companion project: [pylibbpf](https://github.com/pythonbpf/pylibbpf), which provides bindings for libbpf
|
||||
|
||||
## Table of Contents
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:caption: Getting Started
|
||||
|
||||
getting-started/index
|
||||
getting-started/installation
|
||||
getting-started/quickstart
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:caption: User Guide
|
||||
|
||||
user-guide/index
|
||||
user-guide/decorators
|
||||
user-guide/maps
|
||||
user-guide/structs
|
||||
user-guide/compilation
|
||||
user-guide/helpers
|
||||
```
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:caption: API Reference
|
||||
|
||||
api/index
|
||||
```
|
||||
|
||||
## Links
|
||||
|
||||
* **GitHub Repository**: [pythonbpf/Python-BPF](https://github.com/pythonbpf/Python-BPF)
|
||||
* **PyPI Package**: [pythonbpf](https://pypi.org/project/pythonbpf/)
|
||||
* **Video Demo**: [YouTube](https://www.youtube.com/watch?v=eFVhLnWFxtE)
|
||||
|
||||
## License
|
||||
|
||||
PythonBPF is licensed under the Apache License 2.0.
|
||||
|
||||
## Indices and tables
|
||||
|
||||
* {ref}`genindex`
|
||||
* {ref}`modindex`
|
||||
* {ref}`search`
|
||||
4
docs/requirements.txt
Normal file
4
docs/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
myst-parser>=2.0
|
||||
sphinx>=7.0
|
||||
sphinx-copybutton
|
||||
sphinx-rtd-theme>=2.0
|
||||
432
docs/user-guide/compilation.md
Normal file
432
docs/user-guide/compilation.md
Normal file
@ -0,0 +1,432 @@
|
||||
# Compilation
|
||||
|
||||
PythonBPF provides several functions and classes for compiling Python code into BPF bytecode and loading it into the kernel.
|
||||
|
||||
## Overview
|
||||
|
||||
The compilation process transforms Python code into executable BPF programs:
|
||||
|
||||
1. **Python AST** → LLVM IR generation (using llvmlite)
|
||||
2. **LLVM IR** → BPF bytecode (using llc)
|
||||
3. **BPF Object** → Kernel loading (using libbpf)
|
||||
|
||||
## Compilation Functions
|
||||
|
||||
### compile_to_ir()
|
||||
|
||||
Compile Python source to LLVM Intermediate Representation.
|
||||
|
||||
#### Signature
|
||||
|
||||
```python
|
||||
def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING)
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* `filename` - Path to the Python source file to compile
|
||||
* `output` - Path where the LLVM IR file (.ll) should be written
|
||||
* `loglevel` - Logging level (default: `logging.WARNING`)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import compile_to_ir
|
||||
import logging
|
||||
|
||||
# Compile to LLVM IR
|
||||
compile_to_ir(
|
||||
filename="my_bpf_program.py",
|
||||
output="my_bpf_program.ll",
|
||||
loglevel=logging.DEBUG
|
||||
)
|
||||
```
|
||||
|
||||
#### Output
|
||||
|
||||
This function generates an `.ll` file containing LLVM IR, which is human-readable assembly-like code. This is useful for:
|
||||
|
||||
* Debugging compilation issues
|
||||
* Understanding code generation
|
||||
|
||||
### compile()
|
||||
|
||||
Compile Python source to BPF object file.
|
||||
|
||||
#### Signature
|
||||
|
||||
```python
|
||||
def compile(filename: str = None, output: str = None, loglevel=logging.WARNING)
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* `filename` - Path to the Python source file (default: calling file)
|
||||
* `output` - Path for the output object file (default: same name with `.o` extension)
|
||||
* `loglevel` - Logging level (default: `logging.WARNING`)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import compile
|
||||
import logging
|
||||
|
||||
# Compile current file
|
||||
compile()
|
||||
|
||||
# Compile specific file
|
||||
compile(filename="my_program.py", output="my_program.o")
|
||||
|
||||
# Compile with debug logging
|
||||
compile(loglevel=logging.DEBUG)
|
||||
```
|
||||
|
||||
#### Output
|
||||
|
||||
This function generates a `.o` file containing BPF bytecode that can be:
|
||||
|
||||
* Loaded into the kernel
|
||||
* Inspected with `bpftool`
|
||||
* Verified with the BPF verifier
|
||||
* Distributed as a compiled binary
|
||||
|
||||
### BPF Class
|
||||
|
||||
The `BPF` class provides a high-level interface to compile, load, and attach BPF programs.
|
||||
|
||||
#### Signature
|
||||
|
||||
```python
|
||||
class BPF:
|
||||
def __init__(self, filename: str = None, loglevel=logging.WARNING)
|
||||
def load(self)
|
||||
def attach_all(self)
|
||||
def load_and_attach(self)
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* `filename` - Path to Python source file (default: calling file)
|
||||
* `loglevel` - Logging level (default: `logging.WARNING`)
|
||||
|
||||
#### Methods
|
||||
|
||||
##### __init__()
|
||||
|
||||
Create a BPF object and compile the source.
|
||||
|
||||
```python
|
||||
from pythonbpf import BPF
|
||||
|
||||
# Compile current file
|
||||
b = BPF()
|
||||
|
||||
# Compile specific file
|
||||
b = BPF(filename="my_program.py")
|
||||
```
|
||||
|
||||
##### load()
|
||||
|
||||
Load the compiled BPF program into the kernel.
|
||||
|
||||
```python
|
||||
b = BPF()
|
||||
b.load()
|
||||
```
|
||||
|
||||
This method:
|
||||
* Loads the BPF object file into the kernel
|
||||
* Creates maps
|
||||
* Verifies the BPF program
|
||||
* Returns a `BpfObject` instance
|
||||
|
||||
##### attach_all()
|
||||
|
||||
Attach all BPF programs to their specified hooks.
|
||||
|
||||
```python
|
||||
b = BPF()
|
||||
b.load()
|
||||
b.attach_all()
|
||||
```
|
||||
|
||||
This method:
|
||||
* Attaches tracepoints
|
||||
* Attaches kprobes/kretprobes
|
||||
* Attaches XDP programs
|
||||
* Enables all hooks
|
||||
|
||||
##### load_and_attach()
|
||||
|
||||
Convenience method that loads and attaches in one call.
|
||||
|
||||
```python
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
```
|
||||
|
||||
Equivalent to:
|
||||
```python
|
||||
b = BPF()
|
||||
b.load()
|
||||
b.attach_all()
|
||||
```
|
||||
|
||||
## Complete Example
|
||||
|
||||
Here's a complete example showing the compilation workflow:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def trace_exec(ctx: c_void_p) -> c_int64:
|
||||
print("Process started")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Method 1: Simple compilation and loading
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
trace_pipe()
|
||||
|
||||
# Method 2: Step-by-step
|
||||
# b = BPF()
|
||||
# b.load()
|
||||
# b.attach_all()
|
||||
# trace_pipe()
|
||||
|
||||
# Method 3: Manual compilation
|
||||
# from pythonbpf import compile
|
||||
# compile(filename="my_program.py", output="my_program.o")
|
||||
# # Then load with pylibbpf directly
|
||||
```
|
||||
|
||||
## Compilation Pipeline Details
|
||||
|
||||
### AST Parsing
|
||||
|
||||
The Python `ast` module parses your source code:
|
||||
|
||||
```python
|
||||
import ast
|
||||
tree = ast.parse(source_code, filename)
|
||||
```
|
||||
|
||||
The AST is then walked to find:
|
||||
* Functions decorated with `@bpf`
|
||||
* Classes decorated with `@struct`
|
||||
* Map definitions with `@map`
|
||||
* Global variables with `@bpfglobal`
|
||||
|
||||
### IR Generation
|
||||
|
||||
PythonBPF uses `llvmlite` to generate LLVM IR:
|
||||
|
||||
```python
|
||||
from llvmlite import ir
|
||||
|
||||
# Create module
|
||||
module = ir.Module(name='bpf_module')
|
||||
module.triple = 'bpf'
|
||||
|
||||
# Generate IR for each BPF function
|
||||
# ...
|
||||
```
|
||||
|
||||
Key aspects of IR generation:
|
||||
|
||||
* Type conversion (Python types → LLVM types)
|
||||
* Function definitions
|
||||
* Map declarations
|
||||
* Global variable initialization
|
||||
* Debug information
|
||||
|
||||
### BPF Compilation
|
||||
|
||||
The LLVM IR is compiled to BPF bytecode using `llc`:
|
||||
|
||||
```bash
|
||||
llc -march=bpf -filetype=obj input.ll -o output.o
|
||||
```
|
||||
|
||||
### Kernel Loading
|
||||
|
||||
The compiled object is loaded using `pylibbpf`:
|
||||
|
||||
```python
|
||||
from pylibbpf import BpfObject
|
||||
|
||||
obj = BpfObject(path="program.o")
|
||||
obj.load()
|
||||
```
|
||||
|
||||
## Debugging Compilation
|
||||
|
||||
### Logging
|
||||
|
||||
Enable debug logging to see compilation details:
|
||||
|
||||
```python
|
||||
import logging
|
||||
from pythonbpf import BPF
|
||||
|
||||
b = BPF(loglevel=logging.DEBUG)
|
||||
```
|
||||
|
||||
This will show:
|
||||
* AST parsing details
|
||||
* IR generation steps
|
||||
* Compilation commands
|
||||
* Loading status
|
||||
|
||||
### Inspecting LLVM IR
|
||||
|
||||
Generate and inspect the IR file:
|
||||
|
||||
```python
|
||||
from pythonbpf import compile_to_ir
|
||||
|
||||
compile_to_ir("program.py", "program.ll")
|
||||
```
|
||||
|
||||
Then examine `program.ll` to understand the generated code.
|
||||
|
||||
### Using bpftool
|
||||
|
||||
Inspect compiled objects with `bpftool`:
|
||||
|
||||
```bash
|
||||
# Show program info
|
||||
bpftool prog show
|
||||
|
||||
# Dump program instructions
|
||||
bpftool prog dump xlated id <ID>
|
||||
|
||||
# Dump program JIT code
|
||||
bpftool prog dump jited id <ID>
|
||||
|
||||
# Show maps
|
||||
bpftool map show
|
||||
|
||||
# Dump map contents
|
||||
bpftool map dump id <ID>
|
||||
```
|
||||
|
||||
### Verifier Errors
|
||||
|
||||
If the kernel verifier rejects your program:
|
||||
|
||||
* Check `dmesg` for detailed error messages:
|
||||
```bash
|
||||
sudo dmesg | tail -50
|
||||
```
|
||||
|
||||
## Compilation Options
|
||||
|
||||
### Optimization Levels
|
||||
|
||||
While PythonBPF doesn't expose optimization flags directly, you can:
|
||||
|
||||
1. Manually compile IR with specific flags:
|
||||
```bash
|
||||
llc -march=bpf -O2 -filetype=obj program.ll -o program.o
|
||||
```
|
||||
|
||||
2. Modify the compilation pipeline in your code
|
||||
|
||||
### Debug Information
|
||||
|
||||
PythonBPF automatically generates debug information (DWARF) for:
|
||||
|
||||
* Function names
|
||||
* Variable names
|
||||
* Type information
|
||||
|
||||
This helps with:
|
||||
* Stack traces
|
||||
* Debugging with `bpftool`
|
||||
* Source-level debugging
|
||||
|
||||
## Working with Compiled Objects
|
||||
|
||||
### Loading Pre-compiled Objects
|
||||
|
||||
You can load previously compiled objects:
|
||||
|
||||
```python
|
||||
from pylibbpf import BpfObject
|
||||
|
||||
# Load object file
|
||||
obj = BpfObject(path="my_program.o")
|
||||
obj.load()
|
||||
|
||||
# Attach programs
|
||||
# (specific attachment depends on program type)
|
||||
```
|
||||
|
||||
### Distribution
|
||||
|
||||
Distribute compiled BPF objects:
|
||||
|
||||
1. Compile once:
|
||||
```python
|
||||
from pythonbpf import compile
|
||||
compile(filename="program.py", output="program.o")
|
||||
```
|
||||
|
||||
2. Ship `program.o` file
|
||||
|
||||
3. Load on target systems:
|
||||
```python
|
||||
from pylibbpf import BpfObject
|
||||
obj = BpfObject(path="program.o")
|
||||
obj.load()
|
||||
```
|
||||
|
||||
### Version Compatibility
|
||||
|
||||
BPF objects are generally compatible across kernel versions, but:
|
||||
|
||||
* Some features require specific kernel versions
|
||||
* Helper functions may not be available on older kernels
|
||||
* BTF (BPF Type Format) requirements vary
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Compilation Fails
|
||||
|
||||
If compilation fails:
|
||||
* Check Python syntax
|
||||
* Verify all decorators are correct
|
||||
* Ensure type hints are present
|
||||
* Check for unsupported Python features
|
||||
|
||||
### Loading Fails
|
||||
|
||||
If loading fails:
|
||||
* Check `dmesg` for verifier errors
|
||||
* Verify LICENSE is set correctly
|
||||
* Ensure helper functions are valid
|
||||
* Check map definitions
|
||||
|
||||
### Programs Don't Attach
|
||||
|
||||
If attachment fails:
|
||||
* Verify section names are correct
|
||||
* Check that hooks exist on your kernel
|
||||
* Ensure you have sufficient permissions
|
||||
* Verify kernel version supports the feature
|
||||
|
||||
## Next Steps
|
||||
|
||||
* Learn about {doc}`helpers` for available BPF helper functions
|
||||
* Explore {doc}`maps` for data storage
|
||||
* See {doc}`decorators` for compilation markers
|
||||
448
docs/user-guide/decorators.md
Normal file
448
docs/user-guide/decorators.md
Normal file
@ -0,0 +1,448 @@
|
||||
# Decorators
|
||||
|
||||
Decorators are the primary way to mark Python code for BPF compilation. PythonBPF provides five core decorators that control how your code is transformed into eBPF bytecode.
|
||||
|
||||
## @bpf
|
||||
|
||||
The `@bpf` decorator marks functions or classes for BPF compilation.
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf
|
||||
|
||||
@bpf
|
||||
def my_function(ctx):
|
||||
# This function will be compiled to BPF bytecode
|
||||
pass
|
||||
```
|
||||
|
||||
### Description
|
||||
|
||||
Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler and transformed into LLVM IR, then compiled to BPF bytecode. This is the fundamental decorator that enables BPF compilation.
|
||||
|
||||
### Rules
|
||||
|
||||
* Must be used on top-level functions or classes
|
||||
* The function must have proper type hints
|
||||
* Return types must be BPF-compatible
|
||||
* Only BPF-compatible operations are allowed inside
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def trace_exec(ctx: c_void_p) -> c_int64:
|
||||
print("Process started")
|
||||
return c_int64(0)
|
||||
```
|
||||
|
||||
## @section
|
||||
|
||||
The `@section(name)` decorator specifies which kernel hook to attach the BPF program to.
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def trace_open(ctx):
|
||||
pass
|
||||
```
|
||||
|
||||
### Section Types
|
||||
|
||||
#### Tracepoints
|
||||
|
||||
Tracepoints are stable kernel hooks defined in `/sys/kernel/tracing/events/`:
|
||||
|
||||
```python
|
||||
# System call tracepoints
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
@section("tracepoint/syscalls/sys_exit_read")
|
||||
|
||||
# Scheduler tracepoints
|
||||
@section("tracepoint/sched/sched_process_fork")
|
||||
@section("tracepoint/sched/sched_process_exit")
|
||||
@section("tracepoint/sched/sched_switch")
|
||||
|
||||
# Block I/O tracepoints
|
||||
@section("tracepoint/block/block_rq_insert")
|
||||
@section("tracepoint/block/block_rq_complete")
|
||||
```
|
||||
|
||||
#### Kprobes
|
||||
|
||||
Kprobes allow attaching to any kernel function:
|
||||
|
||||
```python
|
||||
@section("kprobe/do_sys_open")
|
||||
def trace_sys_open(ctx):
|
||||
pass
|
||||
|
||||
@section("kprobe/__x64_sys_write")
|
||||
def trace_write(ctx):
|
||||
pass
|
||||
```
|
||||
|
||||
#### Kretprobes
|
||||
|
||||
Kretprobes trigger when a kernel function returns:
|
||||
|
||||
```python
|
||||
@section("kretprobe/do_sys_open")
|
||||
def trace_open_return(ctx):
|
||||
pass
|
||||
```
|
||||
|
||||
#### XDP (eXpress Data Path)
|
||||
|
||||
For network packet processing at the earliest point:
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import XDP_PASS
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@section("xdp")
|
||||
def xdp_prog(ctx: c_void_p) -> c_int64:
|
||||
# XDP_PASS, XDP_DROP, XDP_ABORTED constants available from pythonbpf.helper
|
||||
return XDP_PASS
|
||||
```
|
||||
|
||||
### Finding Tracepoints
|
||||
|
||||
To find available tracepoints on your system:
|
||||
|
||||
```bash
|
||||
# List all tracepoints
|
||||
ls /sys/kernel/tracing/events/
|
||||
|
||||
# List syscall tracepoints
|
||||
ls /sys/kernel/tracing/events/syscalls/
|
||||
|
||||
# View tracepoint format
|
||||
cat /sys/kernel/tracing/events/syscalls/sys_enter_open/format
|
||||
```
|
||||
|
||||
## @map
|
||||
|
||||
The `@map` decorator marks a function as a BPF map definition.
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def my_map() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
|
||||
```
|
||||
|
||||
### Description
|
||||
|
||||
Maps are BPF data structures used to:
|
||||
|
||||
* Store state between BPF program invocations
|
||||
* Communicate data between BPF programs
|
||||
* Share data with userspace
|
||||
|
||||
The function must return a map type (HashMap, PerfEventArray, RingBuffer) and the return type must be annotated.
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, section
|
||||
from pythonbpf.maps import HashMap
|
||||
from pythonbpf.helper import pid
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def process_count() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
def count_clones(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
count = process_count.lookup(process_id)
|
||||
if count:
|
||||
process_count.update(process_id, count + 1)
|
||||
else:
|
||||
process_count.update(process_id, c_uint64(1))
|
||||
return 0
|
||||
```
|
||||
|
||||
See {doc}`maps` for more details on available map types.
|
||||
|
||||
## @struct
|
||||
|
||||
The `@struct` decorator marks a class as a BPF struct definition.
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct
|
||||
from ctypes import c_uint64, c_uint32
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
cpu: c_uint32
|
||||
```
|
||||
|
||||
### Description
|
||||
|
||||
Structs allow you to define custom data types for use in BPF programs. They can be used:
|
||||
|
||||
* As map keys and values
|
||||
* For perf event output
|
||||
* In ring buffer submissions
|
||||
* As local variables
|
||||
|
||||
### Field Types
|
||||
|
||||
Supported field types include:
|
||||
|
||||
* **Integer types**: `c_int8`, `c_int16`, `c_int32`, `c_int64`, `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64`
|
||||
* **Pointers**: `c_void_p`, `c_char_p`
|
||||
* **Fixed strings**: `str(N)` where N is the size (e.g., `str(16)`)
|
||||
* **Nested structs**: Other `@struct` decorated classes
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, map, section
|
||||
from pythonbpf.maps import RingBuffer
|
||||
from pythonbpf.helper import pid, ktime
|
||||
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class ProcessEvent:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
comm: str(16)
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> RingBuffer:
|
||||
return RingBuffer(max_entries=4096)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def track_processes(ctx: c_void_p) -> c_int64:
|
||||
event = ProcessEvent()
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
comm(event.comm) # Fills event.comm with process name
|
||||
|
||||
events.output(event)
|
||||
return 0
|
||||
```
|
||||
|
||||
See {doc}`structs` for more details on working with structs.
|
||||
|
||||
## @bpfglobal
|
||||
|
||||
The `@bpfglobal` decorator marks a function as a BPF global variable definition.
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, bpfglobal
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
### Description
|
||||
|
||||
BPF global variables are values that:
|
||||
|
||||
* Are initialized when the program loads
|
||||
* Can be read by all BPF functions
|
||||
* Must be constant (cannot be modified at runtime in current implementation)
|
||||
|
||||
### Common Global Variables
|
||||
|
||||
#### LICENSE (Required)
|
||||
|
||||
Every BPF program must declare a license:
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
Valid licenses include:
|
||||
* `"GPL"` - GNU General Public License
|
||||
* `"GPL v2"` - GPL version 2
|
||||
* `"Dual BSD/GPL"` - Dual licensed
|
||||
* `"Dual MIT/GPL"` - Dual licensed
|
||||
|
||||
```{warning}
|
||||
Many BPF features require a GPL-compatible license. Using a non-GPL license may prevent your program from loading or accessing certain kernel features.
|
||||
```
|
||||
|
||||
#### Custom Global Variables
|
||||
|
||||
You can define other global variables:
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def DEBUG_MODE() -> int:
|
||||
return 1
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def MAX_EVENTS() -> int:
|
||||
return 1000
|
||||
```
|
||||
|
||||
These can be referenced in your BPF functions, though modifying them at runtime is currently not supported.
|
||||
|
||||
## Combining Decorators
|
||||
|
||||
Decorators are often used together. The order matters:
|
||||
|
||||
### Correct Order
|
||||
|
||||
```python
|
||||
@bpf # Always first
|
||||
@section("...") # Section before other decorators
|
||||
def my_function():
|
||||
pass
|
||||
|
||||
@bpf # Always first
|
||||
@map # Map/struct/bpfglobal after @bpf
|
||||
def my_map():
|
||||
pass
|
||||
|
||||
@bpf # Always first
|
||||
@struct # Map/struct/bpfglobal after @bpf
|
||||
class MyStruct:
|
||||
pass
|
||||
|
||||
@bpf # Always first
|
||||
@bpfglobal # Map/struct/bpfglobal after @bpf
|
||||
def LICENSE():
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
### Examples by Use Case
|
||||
|
||||
#### Simple Tracepoint
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
return c_int64(0)
|
||||
```
|
||||
|
||||
#### Map Definition
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@map
|
||||
def counters() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
|
||||
```
|
||||
|
||||
#### Struct Definition
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
timestamp: c_uint64
|
||||
value: c_uint32
|
||||
```
|
||||
|
||||
#### Global Variable
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always use @bpf first** - It must be the outermost decorator
|
||||
2. **Provide type hints** - Required for proper code generation
|
||||
3. **Test incrementally** - Verify each component works before combining
|
||||
|
||||
## Common Errors
|
||||
|
||||
### Missing @bpf Decorator
|
||||
|
||||
```python
|
||||
# Wrong - missing @bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def my_func(ctx):
|
||||
pass
|
||||
|
||||
# Correct
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def my_func(ctx):
|
||||
pass
|
||||
```
|
||||
|
||||
### Wrong Decorator Order
|
||||
|
||||
```python
|
||||
# Wrong - @section before @bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
@bpf
|
||||
def my_func(ctx):
|
||||
pass
|
||||
|
||||
# Correct
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def my_func(ctx):
|
||||
pass
|
||||
```
|
||||
|
||||
### Missing Type Hints
|
||||
|
||||
```python
|
||||
# Wrong - no type hints
|
||||
@bpf
|
||||
def my_func(ctx):
|
||||
pass
|
||||
|
||||
# Correct
|
||||
@bpf
|
||||
def my_func(ctx: c_void_p) -> c_int64:
|
||||
pass
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
* Learn about {doc}`maps` for data storage and communication
|
||||
* Explore {doc}`structs` for defining custom data types
|
||||
* Understand {doc}`compilation` to see how code is transformed
|
||||
* Check out {doc}`helpers` for available BPF helper functions
|
||||
503
docs/user-guide/helpers.md
Normal file
503
docs/user-guide/helpers.md
Normal file
@ -0,0 +1,503 @@
|
||||
# Helper Functions and Utilities
|
||||
|
||||
PythonBPF provides helper functions and utilities for BPF programs and userspace code.
|
||||
|
||||
```{note}
|
||||
**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more helpers, kfuncs, and map types. Check back for updates!
|
||||
```
|
||||
For comprehensive documentation on BPF helpers, see the [eBPF Helper Functions documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#helper-calls).
|
||||
|
||||
## BPF Helper Functions
|
||||
|
||||
BPF helper functions are kernel-provided functions that BPF programs can call to interact with the system. PythonBPF exposes these through the `pythonbpf.helper` module.
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import pid, ktime, comm
|
||||
```
|
||||
|
||||
### Process and Task Information
|
||||
|
||||
#### pid()
|
||||
|
||||
Get the current process ID.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_get_current_pid_tgid()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import pid
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
print(f"Process {process_id} opened a file")
|
||||
return 0
|
||||
```
|
||||
|
||||
**Returns:** `c_int32` - The process ID of the current task
|
||||
|
||||
#### comm()
|
||||
|
||||
Get the current process command name.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_get_current_comm()`
|
||||
|
||||
**Parameters:**
|
||||
* `buf` - Buffer to fill with the process command name
|
||||
|
||||
**Returns:** `c_int64` - 0 on success, negative on error
|
||||
|
||||
#### uid()
|
||||
|
||||
Get the current user ID.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_get_current_uid_gid()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import uid
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
user_id = uid()
|
||||
if user_id == 0:
|
||||
print("Root user opened a file")
|
||||
return 0
|
||||
```
|
||||
|
||||
**Returns:** `c_int32` - The user ID of the current task
|
||||
|
||||
### Time and Timing
|
||||
|
||||
#### ktime()
|
||||
|
||||
Get the current kernel time in nanoseconds since system boot.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_ktime_get_ns()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import ktime
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_read")
|
||||
def measure_latency(ctx: c_void_p) -> c_int64:
|
||||
start_time = ktime()
|
||||
# Store for later comparison
|
||||
return 0
|
||||
```
|
||||
|
||||
**Returns:** `c_int64` - Current time in nanoseconds
|
||||
|
||||
**Use cases:**
|
||||
* Measuring latency
|
||||
* Timestamping events
|
||||
* Rate limiting
|
||||
* Timeout detection
|
||||
|
||||
### CPU Information
|
||||
|
||||
#### smp_processor_id()
|
||||
|
||||
Get the ID of the CPU on which the BPF program is running.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_get_smp_processor_id()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import smp_processor_id
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/sched/sched_switch")
|
||||
def track_cpu(ctx: c_void_p) -> c_int64:
|
||||
cpu = smp_processor_id()
|
||||
print(f"Running on CPU {cpu}")
|
||||
return 0
|
||||
```
|
||||
|
||||
**Returns:** `c_int32` - The current CPU ID
|
||||
|
||||
**Use cases:**
|
||||
* Per-CPU statistics
|
||||
* Load balancing analysis
|
||||
* CPU affinity tracking
|
||||
|
||||
### Memory Operations
|
||||
|
||||
#### probe_read()
|
||||
|
||||
Safely read data from kernel memory.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_probe_read()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import probe_read
|
||||
|
||||
@bpf
|
||||
def read_kernel_data(ctx: c_void_p) -> c_int64:
|
||||
dst = 0
|
||||
size = 8
|
||||
src = ctx # kernel address
|
||||
|
||||
result = probe_read(dst, size, src)
|
||||
if result == 0:
|
||||
print(f"Read value: {dst}")
|
||||
return 0
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
* `dst` - Destination buffer
|
||||
* `size` - Number of bytes to read
|
||||
* `src` - Source kernel address
|
||||
|
||||
**Returns:** `c_int64` - 0 on success, negative on error
|
||||
|
||||
**Safety:** This function performs bounds checking and prevents invalid memory access.
|
||||
|
||||
#### probe_read_str()
|
||||
|
||||
Safely read a null-terminated string from kernel memory.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_probe_read_str()`
|
||||
|
||||
**Parameters:**
|
||||
* `dst` - Destination buffer (string)
|
||||
* `src` - Source kernel address
|
||||
|
||||
**Returns:** `c_int64` - Length of string on success, negative on error
|
||||
|
||||
### Random Numbers
|
||||
|
||||
#### random()
|
||||
|
||||
Generate a pseudo-random 32-bit number.
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_get_prandom_u32()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import random
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def sample_events(ctx: c_void_p) -> c_int64:
|
||||
# Sample 1% of events
|
||||
if (random() % 100) == 0:
|
||||
print("Sampled event")
|
||||
return 0
|
||||
```
|
||||
|
||||
**Returns:** `c_int32` - A pseudo-random number
|
||||
|
||||
### Network Helpers
|
||||
|
||||
#### skb_store_bytes()
|
||||
|
||||
Store bytes into a socket buffer (for network programs).
|
||||
|
||||
> **Linux Kernel Helper:** `bpf_skb_store_bytes()`
|
||||
|
||||
```python
|
||||
from pythonbpf.helper import skb_store_bytes
|
||||
|
||||
@bpf
|
||||
@section("classifier")
|
||||
def modify_packet(ctx: c_void_p) -> c_int32:
|
||||
offset = 14 # Skip Ethernet header
|
||||
data = b"\x00\x01\x02\x03"
|
||||
size = len(data)
|
||||
|
||||
result = skb_store_bytes(offset, data, size)
|
||||
return 0
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
* `offset` - Offset in the socket buffer
|
||||
* `from_buf` - Data to write
|
||||
* `size` - Number of bytes to write
|
||||
* `flags` - Optional flags
|
||||
|
||||
**Returns:** `c_int64` - 0 on success, negative on error
|
||||
|
||||
## Userspace Utilities
|
||||
|
||||
PythonBPF provides utilities for working with BPF programs from Python userspace code.
|
||||
|
||||
### trace_pipe()
|
||||
|
||||
Read and display output from the kernel trace pipe.
|
||||
|
||||
```python
|
||||
from pythonbpf import trace_pipe
|
||||
|
||||
# After loading and attaching BPF programs
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
**Description:**
|
||||
|
||||
The `trace_pipe()` function reads from `/sys/kernel/tracing/trace_pipe` and displays BPF program output to stdout. This is the output from `print()` statements in BPF programs.
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def trace_exec(ctx: c_void_p) -> c_int64:
|
||||
print("Process started") # This goes to trace_pipe
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
trace_pipe() # Display BPF output
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
|
||||
* Blocks until Ctrl+C is pressed
|
||||
* Displays output in real-time
|
||||
* Shows task name, PID, CPU, timestamp, and message
|
||||
* Automatically handles trace pipe access errors
|
||||
|
||||
**Requirements:**
|
||||
|
||||
* Root or sudo access
|
||||
* Accessible `/sys/kernel/tracing/trace_pipe`
|
||||
|
||||
### trace_fields()
|
||||
|
||||
Parse one line from the trace pipe into structured fields.
|
||||
|
||||
```python
|
||||
from pythonbpf import trace_fields
|
||||
|
||||
# Read and parse trace output
|
||||
task, pid, cpu, flags, ts, msg = trace_fields()
|
||||
print(f"Task: {task}, PID: {pid}, CPU: {cpu}, Time: {ts}, Message: {msg}")
|
||||
```
|
||||
|
||||
**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)`
|
||||
|
||||
* `task` - String: Task/process name (up to 16 chars)
|
||||
* `pid` - Integer: Process ID
|
||||
* `cpu` - Integer: CPU number
|
||||
* `flags` - Bytes: Trace flags
|
||||
* `timestamp` - Float: Timestamp in seconds
|
||||
* `message` - String: The actual trace message
|
||||
|
||||
**Description:**
|
||||
|
||||
The `trace_fields()` function reads one line from the trace pipe and parses it into individual fields. This is useful when you need programmatic access to trace data rather than just displaying it.
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def trace_exec(ctx: c_void_p) -> c_int64:
|
||||
print(f"PID:{pid()}")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
|
||||
# Process trace events
|
||||
try:
|
||||
while True:
|
||||
task, pid, cpu, flags, ts, msg = trace_fields()
|
||||
print(f"[{ts:.6f}] {task}({pid}) on CPU{cpu}: {msg}")
|
||||
except KeyboardInterrupt:
|
||||
print("Stopped")
|
||||
```
|
||||
|
||||
**Error Handling:**
|
||||
|
||||
* Raises `ValueError` if line cannot be parsed
|
||||
* Skips lines about lost events
|
||||
* Blocks waiting for next line
|
||||
|
||||
## Helper Function Examples
|
||||
|
||||
### Example 1: Latency Measurement
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_pipe
|
||||
from pythonbpf.maps import HashMap
|
||||
from pythonbpf.helper import pid, ktime
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def start_times() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_read")
|
||||
def read_start(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
start = ktime()
|
||||
start_times.update(process_id, start)
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_exit_read")
|
||||
def read_end(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
start = start_times.lookup(process_id)
|
||||
|
||||
if start:
|
||||
latency = ktime() - start
|
||||
print(f"Read latency: {latency} ns")
|
||||
start_times.delete(process_id)
|
||||
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
### Example 2: Process Tracking
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from pythonbpf.helper import pid, uid
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def track_exec(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
user_id = uid()
|
||||
|
||||
print(f"User {user_id} started process (PID: {process_id})")
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
### Example 3: CPU Load Monitoring
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, section, bpfglobal, BPF
|
||||
from pythonbpf.maps import HashMap
|
||||
from pythonbpf.helper import smp_processor_id
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def cpu_counts() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=256)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/sched/sched_switch")
|
||||
def count_switches(ctx: c_void_p) -> c_int64:
|
||||
cpu = smp_processor_id()
|
||||
count = cpu_counts.lookup(cpu)
|
||||
|
||||
if count:
|
||||
cpu_counts.update(cpu, count + 1)
|
||||
else:
|
||||
cpu_counts.update(cpu, 1)
|
||||
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
|
||||
import time
|
||||
time.sleep(5)
|
||||
|
||||
# Read results
|
||||
from pylibbpf import BpfMap
|
||||
map_obj = BpfMap(b, cpu_counts)
|
||||
for cpu, count in map_obj.items():
|
||||
print(f"CPU {cpu}: {count} context switches")
|
||||
```
|
||||
|
||||
### Example 4: Event Sampling
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
|
||||
from pythonbpf.helper import random, pid
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def sample_opens(ctx: c_void_p) -> c_int64:
|
||||
# Sample 5% of events
|
||||
if (random() % 100) < 5:
|
||||
process_id = pid()
|
||||
print(f"Sampled: PID {process_id} opening file")
|
||||
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
trace_pipe()
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Helper Not Available
|
||||
|
||||
If a helper function doesn't work:
|
||||
* Check your kernel version (some helpers are newer)
|
||||
* Ensure your LICENSE is GPL-compatible
|
||||
|
||||
### Trace Pipe Access Denied
|
||||
|
||||
If `trace_pipe()` fails:
|
||||
* Run with sudo/root
|
||||
* Check `/sys/kernel/tracing/` is accessible
|
||||
* Verify tracing is enabled in kernel config
|
||||
|
||||
## Examples
|
||||
|
||||
Check out these examples in the `BCC-Examples/` directory that demonstrate helper functions:
|
||||
|
||||
* [hello_world.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_world.py) - Basic tracing with `print()`
|
||||
* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - Using `ktime()` for timing measurements
|
||||
* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - Using `pid()`, `ktime()`, and `comm()` with perf events
|
||||
* [vfsreadlat.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/vfsreadlat.py) - Latency measurement with `ktime()` in kprobes
|
||||
|
||||
## Next Steps
|
||||
|
||||
* Explore {doc}`maps` for data storage with helpers
|
||||
* Learn about {doc}`compilation` to understand helper implementation
|
||||
* See {doc}`decorators` for marking BPF functions
|
||||
87
docs/user-guide/index.md
Normal file
87
docs/user-guide/index.md
Normal file
@ -0,0 +1,87 @@
|
||||
# User Guide
|
||||
|
||||
This user guide provides comprehensive documentation for all PythonBPF features. Whether you're building simple tracing tools or complex performance monitoring systems, this guide will help you master PythonBPF.
|
||||
|
||||
## Overview
|
||||
|
||||
PythonBPF transforms Python code into eBPF bytecode that runs in the Linux kernel. It provides a Pythonic interface to eBPF features through decorators, type annotations, and familiar programming patterns.
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### Decorators
|
||||
|
||||
PythonBPF uses decorators to mark code for BPF compilation:
|
||||
|
||||
* `@bpf` - Mark functions and classes for BPF compilation
|
||||
* `@map` - Define BPF maps for data storage
|
||||
* `@struct` - Define custom data structures
|
||||
* `@section(name)` - Specify attachment points
|
||||
* `@bpfglobal` - Define global variables
|
||||
|
||||
### Compilation Pipeline
|
||||
|
||||
Your Python code goes through several stages:
|
||||
|
||||
1. **IR Generation** - The Python AST is transformed into LLVM IR using llvmlite
|
||||
2. **BPF Compilation** - LLVM IR is compiled to BPF bytecode using `llc`
|
||||
3. **Loading** - The BPF object is loaded into the kernel using libbpf
|
||||
4. **Attachment** - Programs are attached to kernel hooks (tracepoints, kprobes, etc.)
|
||||
|
||||
## Code Organization
|
||||
|
||||
When writing BPF programs with PythonBPF, we recommend:
|
||||
|
||||
1. **Use type hints** - Required for proper code generation
|
||||
2. **Test incrementally** - Verify each component works before adding complexity
|
||||
|
||||
## Type System
|
||||
|
||||
PythonBPF uses Python's `ctypes` module for type definitions:
|
||||
|
||||
* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers
|
||||
* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers
|
||||
* `c_char`, `c_bool` - Characters and booleans
|
||||
* `c_void_p` - Void pointers
|
||||
* `str(N)` - Fixed-length strings (e.g., `str(16)` for 16-byte string)
|
||||
|
||||
## Example Structure
|
||||
|
||||
A typical PythonBPF program follows this structure:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, section, bpfglobal, BPF, compile
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_void_p, c_int64, c_uint32
|
||||
|
||||
# Define maps
|
||||
@bpf
|
||||
@map
|
||||
def my_map() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=1024)
|
||||
|
||||
# Define BPF function
|
||||
@bpf
|
||||
@section("tracepoint/...")
|
||||
def my_function(ctx: c_void_p) -> c_int64:
|
||||
# BPF logic here
|
||||
return 0
|
||||
|
||||
# License (required)
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
# Compile, load, and run
|
||||
if __name__ == "__main__":
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
# Use the program...
|
||||
|
||||
# Or, compile to an object file
|
||||
compile()
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
Start with {doc}`decorators` to learn about all available decorators, then explore the other sections to master specific features.
|
||||
476
docs/user-guide/maps.md
Normal file
476
docs/user-guide/maps.md
Normal file
@ -0,0 +1,476 @@
|
||||
# BPF Maps
|
||||
|
||||
Maps are BPF data structures that provide storage and communication mechanisms. They allow BPF programs to:
|
||||
|
||||
* Store state between invocations
|
||||
* Share data between multiple BPF programs
|
||||
* Communicate with userspace applications
|
||||
|
||||
```{note}
|
||||
**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more map types, helpers, and kfuncs. Check back for updates!
|
||||
```
|
||||
For comprehensive documentation on BPF maps, see the [eBPF Maps documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#maps).
|
||||
|
||||
## Map Types
|
||||
|
||||
PythonBPF supports several map types, each optimized for different use cases.
|
||||
|
||||
### HashMap
|
||||
|
||||
Hash maps provide efficient key-value storage with O(1) lookup time.
|
||||
|
||||
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_HASH`
|
||||
|
||||
#### Definition
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def my_map() -> HashMap:
|
||||
return HashMap(
|
||||
key=c_uint32,
|
||||
value=c_uint64,
|
||||
max_entries=1024
|
||||
)
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* `key` - The type of the key (must be a ctypes type or struct)
|
||||
* `value` - The type of the value (must be a ctypes type or struct)
|
||||
* `max_entries` - Maximum number of entries the map can hold
|
||||
|
||||
#### Operations
|
||||
|
||||
##### lookup(key)
|
||||
|
||||
Look up a value by key. Returns the value if found, `None` otherwise.
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
value = my_map.lookup(1)
|
||||
if value:
|
||||
print(f"Found value: {value}")
|
||||
return 0
|
||||
```
|
||||
|
||||
##### update(key, value, flags=None)
|
||||
|
||||
Update or insert a key-value pair.
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def track_opens(ctx: c_void_p) -> c_int64:
|
||||
key = pid()
|
||||
count = my_map.lookup(key)
|
||||
if count:
|
||||
my_map.update(key, count + 1)
|
||||
else:
|
||||
my_map.update(key, 1)
|
||||
return 0
|
||||
```
|
||||
|
||||
##### delete(key)
|
||||
|
||||
Remove an entry from the map.
|
||||
|
||||
```python
|
||||
@bpf
|
||||
def cleanup(ctx: c_void_p) -> c_int64:
|
||||
my_map.delete(1)
|
||||
return 0
|
||||
```
|
||||
|
||||
#### Use Cases
|
||||
|
||||
* Counting events per process/CPU
|
||||
* Storing timestamps for latency calculations
|
||||
* Caching lookup results
|
||||
* Implementing rate limiters
|
||||
|
||||
#### Example: Process Counter
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, section, bpfglobal, BPF
|
||||
from pythonbpf.maps import HashMap
|
||||
from pythonbpf.helper import pid
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def process_count() -> HashMap:
|
||||
return HashMap(key=c_uint32, value=c_uint64, max_entries=4096)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
def count_processes(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
count = process_count.lookup(process_id)
|
||||
|
||||
if count:
|
||||
new_count = count + 1
|
||||
process_count.update(process_id, new_count)
|
||||
else:
|
||||
process_count.update(process_id, 1)
|
||||
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
if __name__ == "__main__":
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
# Access map from userspace
|
||||
from pylibbpf import BpfMap
|
||||
map_obj = BpfMap(b, process_count)
|
||||
# Read values...
|
||||
```
|
||||
|
||||
### PerfEventArray
|
||||
|
||||
Perf event arrays are used to send data from BPF programs to userspace with high throughput.
|
||||
|
||||
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_PERF_EVENT_ARRAY`
|
||||
|
||||
#### Definition
|
||||
|
||||
```python
|
||||
from pythonbpf.maps import PerfEventArray
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> PerfEventArray:
|
||||
return PerfEventArray(
|
||||
key_size=c_uint32,
|
||||
value_size=c_uint32
|
||||
)
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* `key_size` - Type for the key (typically `c_uint32`)
|
||||
* `value_size` - Type for the value (typically `c_uint32`)
|
||||
|
||||
#### Operations
|
||||
|
||||
##### output(data)
|
||||
|
||||
Send data to userspace. The data can be a struct or basic type.
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
pid: c_uint32
|
||||
timestamp: c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> PerfEventArray:
|
||||
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def send_event(ctx: c_void_p) -> c_int64:
|
||||
event = Event()
|
||||
event.pid = pid()
|
||||
event.timestamp = ktime()
|
||||
events.output(event)
|
||||
return 0
|
||||
```
|
||||
|
||||
#### Use Cases
|
||||
|
||||
* Sending detailed event data to userspace
|
||||
* Real-time monitoring and alerting
|
||||
* Collecting samples for analysis
|
||||
* High-throughput data collection
|
||||
|
||||
#### Example: Event Logging
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
|
||||
from pythonbpf.maps import PerfEventArray
|
||||
from pythonbpf.helper import pid, ktime, comm
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class ProcessEvent:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
comm: str(16)
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> PerfEventArray:
|
||||
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def log_exec(ctx: c_void_p) -> c_int64:
|
||||
event = ProcessEvent()
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
comm(event.comm) # Fills event.comm with process name
|
||||
events.output(event)
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
```
|
||||
|
||||
### RingBuffer
|
||||
|
||||
Ring buffers provide efficient, ordered event delivery with lower overhead than perf event arrays.
|
||||
|
||||
> **Linux Kernel Map Type:** `BPF_MAP_TYPE_RINGBUF`
|
||||
|
||||
#### Definition
|
||||
|
||||
```python
|
||||
from pythonbpf.maps import RingBuffer
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> RingBuffer:
|
||||
return RingBuffer(max_entries=4096)
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* `max_entries` - Maximum size of the ring buffer in bytes (must be power of 2)
|
||||
|
||||
#### Operations
|
||||
|
||||
##### output(data, flags=0)
|
||||
|
||||
Send data to the ring buffer.
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_open")
|
||||
def log_event(ctx: c_void_p) -> c_int64:
|
||||
event = Event()
|
||||
event.pid = pid()
|
||||
events.output(event)
|
||||
return 0
|
||||
```
|
||||
|
||||
##### reserve(size)
|
||||
|
||||
Reserve space in the ring buffer. Returns a pointer to the reserved space or 0 if no space available.
|
||||
|
||||
```python
|
||||
@bpf
|
||||
def reserve_space(ctx: c_void_p) -> c_int64:
|
||||
ptr = events.reserve(64) # Reserve 64 bytes
|
||||
if ptr:
|
||||
# Use the reserved space
|
||||
events.submit(ptr)
|
||||
return 0
|
||||
```
|
||||
|
||||
##### submit(data, flags=0)
|
||||
|
||||
Submit previously reserved space.
|
||||
|
||||
##### discard(data, flags=0)
|
||||
|
||||
Discard previously reserved space without submitting.
|
||||
|
||||
#### Use Cases
|
||||
|
||||
* Modern event streaming (preferred over PerfEventArray)
|
||||
* Lower overhead event delivery
|
||||
* Ordered event processing
|
||||
* Kernel 5.8+ systems
|
||||
|
||||
#### Advantages over PerfEventArray
|
||||
|
||||
* Lower memory overhead
|
||||
* Better performance
|
||||
* Simpler API
|
||||
* Ordered delivery guarantees
|
||||
|
||||
### BPFMapType Enum
|
||||
|
||||
PythonBPF supports various BPF map types through the `BPFMapType` enum:
|
||||
|
||||
```python
|
||||
from pythonbpf.maps import BPFMapType
|
||||
|
||||
# Common map types
|
||||
BPFMapType.BPF_MAP_TYPE_HASH # Hash map
|
||||
BPFMapType.BPF_MAP_TYPE_ARRAY # Array map
|
||||
BPFMapType.BPF_MAP_TYPE_PERF_EVENT_ARRAY # Perf event array
|
||||
BPFMapType.BPF_MAP_TYPE_RINGBUF # Ring buffer
|
||||
BPFMapType.BPF_MAP_TYPE_STACK_TRACE # Stack trace storage
|
||||
BPFMapType.BPF_MAP_TYPE_LRU_HASH # LRU hash map
|
||||
```
|
||||
|
||||
## Using Maps with Structs
|
||||
|
||||
Maps can store complex data types using structs as values:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, map, struct, section
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Stats:
|
||||
count: c_uint64
|
||||
total_time: c_uint64
|
||||
max_time: c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def process_stats() -> HashMap:
|
||||
return HashMap(
|
||||
key=c_uint32, # PID as key
|
||||
value=Stats, # Struct as value
|
||||
max_entries=1024
|
||||
)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_read")
|
||||
def track_stats(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
stats = process_stats.lookup(process_id)
|
||||
|
||||
if stats:
|
||||
stats.count = stats.count + 1
|
||||
process_stats.update(process_id, stats)
|
||||
else:
|
||||
new_stats = Stats()
|
||||
new_stats.count = 1
|
||||
new_stats.total_time = 0
|
||||
new_stats.max_time = 0
|
||||
process_stats.update(process_id, new_stats)
|
||||
|
||||
return 0
|
||||
```
|
||||
|
||||
## Accessing Maps from Userspace
|
||||
|
||||
After loading a BPF program, you can access maps from Python using `pylibbpf`:
|
||||
|
||||
```python
|
||||
from pythonbpf import BPF
|
||||
from pylibbpf import BpfMap
|
||||
|
||||
# Load BPF program
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
|
||||
# Get map reference
|
||||
map_obj = BpfMap(b, my_map)
|
||||
|
||||
# Read all key-value pairs
|
||||
for key, value in map_obj.items():
|
||||
print(f"Key: {key}, Value: {value}")
|
||||
|
||||
# Get all keys
|
||||
keys = list(map_obj.keys())
|
||||
|
||||
# Get all values
|
||||
values = list(map_obj.values())
|
||||
|
||||
# Lookup specific key
|
||||
value = map_obj[key]
|
||||
|
||||
# Update from userspace
|
||||
map_obj[key] = new_value
|
||||
|
||||
# Delete from userspace
|
||||
del map_obj[key]
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Counter Pattern
|
||||
|
||||
```python
|
||||
count = my_map.lookup(key)
|
||||
if count:
|
||||
my_map.update(key, count + 1)
|
||||
else:
|
||||
my_map.update(key, 1)
|
||||
```
|
||||
|
||||
### Latency Tracking
|
||||
|
||||
```python
|
||||
# Store start time
|
||||
start = ktime()
|
||||
start_map.update(key, start)
|
||||
|
||||
# Later: calculate latency
|
||||
start_time = start_map.lookup(key)
|
||||
if start_time:
|
||||
latency = ktime() - start_time
|
||||
latency_map.update(key, latency)
|
||||
start_map.delete(key)
|
||||
```
|
||||
|
||||
### Event Sampling
|
||||
|
||||
```python
|
||||
# Only process every Nth event
|
||||
count = counter.lookup(key)
|
||||
if count and (count % 100) == 0:
|
||||
events.output(data)
|
||||
counter.update(key, count + 1 if count else 1)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Map Not Found
|
||||
|
||||
If you get "map not found" errors:
|
||||
* Ensure the map is defined with `@bpf` and `@map`
|
||||
* Check that the map name matches exactly
|
||||
* Verify the BPF program loaded successfully
|
||||
|
||||
### Map Full
|
||||
|
||||
If updates fail due to map being full:
|
||||
* Increase `max_entries`
|
||||
* Use LRU maps for automatic eviction
|
||||
* Add cleanup logic to delete old entries
|
||||
|
||||
### Type Errors
|
||||
|
||||
If you get type-related errors:
|
||||
* Verify key and value types match the definition
|
||||
* Check that structs are properly defined
|
||||
|
||||
## Examples
|
||||
|
||||
Check out these examples in the `BCC-Examples/` directory that demonstrate map usage:
|
||||
|
||||
* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - HashMap for storing timestamps
|
||||
* [sync_count.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_count.py) - HashMap for counting events
|
||||
* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - PerfEventArray for sending structs to userspace
|
||||
* [sync_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_perf_output.py) - PerfEventArray with timing data
|
||||
* [disksnoop.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/disksnoop.py) - HashMap for tracking disk I/O
|
||||
|
||||
## Next Steps
|
||||
|
||||
* Learn about {doc}`structs` for defining custom value types
|
||||
* Explore {doc}`helpers` for BPF helper functions
|
||||
* See {doc}`compilation` to understand how maps are compiled
|
||||
413
docs/user-guide/structs.md
Normal file
413
docs/user-guide/structs.md
Normal file
@ -0,0 +1,413 @@
|
||||
# BPF Structs
|
||||
|
||||
Structs allow you to define custom data types for use in BPF programs. They provide a way to group related fields together and can be used as map values, event payloads, or local variables.
|
||||
|
||||
## Defining Structs
|
||||
|
||||
Use the `@bpf` and `@struct` decorators to define a BPF struct:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct
|
||||
from ctypes import c_uint64, c_uint32
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
cpu: c_uint32
|
||||
```
|
||||
|
||||
## Field Types
|
||||
|
||||
Structs support various field types from Python's `ctypes` module.
|
||||
|
||||
### Integer Types
|
||||
|
||||
```python
|
||||
from ctypes import (
|
||||
c_int8, c_int16, c_int32, c_int64,
|
||||
c_uint8, c_uint16, c_uint32, c_uint64
|
||||
)
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Numbers:
|
||||
small_int: c_int8 # -128 to 127
|
||||
short_int: c_int16 # -32768 to 32767
|
||||
int_val: c_int32 # -2^31 to 2^31-1
|
||||
long_int: c_int64 # -2^63 to 2^63-1
|
||||
|
||||
byte: c_uint8 # 0 to 255
|
||||
word: c_uint16 # 0 to 65535
|
||||
dword: c_uint32 # 0 to 2^32-1
|
||||
qword: c_uint64 # 0 to 2^64-1
|
||||
```
|
||||
|
||||
### String Types
|
||||
|
||||
Fixed-length strings are defined using `str(N)` where N is the size:
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@struct
|
||||
class ProcessInfo:
|
||||
name: str(16) # 16-byte string
|
||||
path: str(256) # 256-byte string
|
||||
```
|
||||
|
||||
```{note}
|
||||
Strings in BPF are fixed-length and null-terminated. The size includes the null terminator.
|
||||
```
|
||||
|
||||
### Pointer Types
|
||||
|
||||
```python
|
||||
from ctypes import c_void_p, c_char_p
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Pointers:
|
||||
ptr: c_void_p # Generic pointer
|
||||
str_ptr: c_char_p # Character pointer
|
||||
```
|
||||
|
||||
### Nested Structs
|
||||
|
||||
Structs can contain other structs as fields:
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@struct
|
||||
class Address:
|
||||
street: str(64)
|
||||
city: str(32)
|
||||
zip_code: c_uint32
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Person:
|
||||
name: str(32)
|
||||
age: c_uint32
|
||||
address: Address # Nested struct
|
||||
```
|
||||
|
||||
## Using Structs
|
||||
|
||||
### As Local Variables
|
||||
|
||||
Create and use struct instances within BPF functions:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, section
|
||||
from pythonbpf.helper import pid, ktime, comm
|
||||
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class Event:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
comm: str(16)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def capture_event(ctx: c_void_p) -> c_int64:
|
||||
# Create an instance
|
||||
event = Event()
|
||||
|
||||
# Set fields
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
comm(event.comm) # Fills event.comm with process name
|
||||
|
||||
# Use the struct
|
||||
print(f"Process with PID {event.pid}")
|
||||
|
||||
return 0
|
||||
```
|
||||
|
||||
### As Map Keys and Values
|
||||
|
||||
Use structs as keys and values in maps for complex state storage:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, map, section
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class ProcessStats:
|
||||
syscall_count: c_uint64
|
||||
total_time: c_uint64
|
||||
max_latency: c_uint64
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def stats() -> HashMap:
|
||||
return HashMap(
|
||||
key=c_uint32,
|
||||
value=ProcessStats,
|
||||
max_entries=1024
|
||||
)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_read")
|
||||
def track_syscalls(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
|
||||
# Lookup existing stats
|
||||
s = stats.lookup(process_id)
|
||||
|
||||
if s:
|
||||
# Update existing stats
|
||||
s.syscall_count = s.syscall_count + 1
|
||||
stats.update(process_id, s)
|
||||
else:
|
||||
# Create new stats
|
||||
new_stats = ProcessStats()
|
||||
new_stats.syscall_count = 1
|
||||
new_stats.total_time = 0
|
||||
new_stats.max_latency = 0
|
||||
stats.update(process_id, new_stats)
|
||||
|
||||
return 0
|
||||
```
|
||||
|
||||
### With Perf Events
|
||||
|
||||
Send struct data to userspace using PerfEventArray:
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, map, section
|
||||
from pythonbpf.maps import PerfEventArray
|
||||
from pythonbpf.helper import pid, ktime, comm
|
||||
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class ProcessEvent:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
ppid: c_uint32
|
||||
comm: str(16)
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> PerfEventArray:
|
||||
return PerfEventArray(key_size=c_uint32, value_size=c_uint32)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/sched/sched_process_fork")
|
||||
def trace_fork(ctx: c_void_p) -> c_int64:
|
||||
event = ProcessEvent()
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
comm(event.comm) # Fills event.comm with process name
|
||||
|
||||
# Send to userspace
|
||||
events.output(event)
|
||||
|
||||
return 0
|
||||
```
|
||||
|
||||
### With Ring Buffers
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, map, section
|
||||
from pythonbpf.maps import RingBuffer
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class FileEvent:
|
||||
timestamp: c_uint64
|
||||
pid: c_uint32
|
||||
filename: str(256)
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def events() -> RingBuffer:
|
||||
return RingBuffer(max_entries=4096)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_openat")
|
||||
def trace_open(ctx: c_void_p) -> c_int64:
|
||||
event = FileEvent()
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
|
||||
events.output(event)
|
||||
|
||||
return 0
|
||||
```
|
||||
|
||||
## Field Access and Modification
|
||||
|
||||
### Reading Fields
|
||||
|
||||
Access struct fields using dot notation:
|
||||
|
||||
```python
|
||||
event = Event()
|
||||
ts = event.timestamp
|
||||
process_id = event.pid
|
||||
```
|
||||
|
||||
### Writing Fields
|
||||
|
||||
Assign values to fields:
|
||||
|
||||
```python
|
||||
event = Event()
|
||||
event.timestamp = ktime()
|
||||
event.pid = pid()
|
||||
comm(event.comm)
|
||||
```
|
||||
|
||||
## StructType Class
|
||||
|
||||
PythonBPF provides a `StructType` class for working with struct metadata:
|
||||
|
||||
```python
|
||||
from pythonbpf.structs import StructType
|
||||
|
||||
# Define a struct
|
||||
@bpf
|
||||
@struct
|
||||
class MyStruct:
|
||||
field1: c_uint64
|
||||
field2: c_uint32
|
||||
|
||||
# Access struct information (from userspace)
|
||||
# This is typically used internally by the compiler
|
||||
```
|
||||
|
||||
## Complex Examples
|
||||
|
||||
### Network Packet Event
|
||||
|
||||
```python
|
||||
from pythonbpf import bpf, struct, map, section
|
||||
from pythonbpf.maps import RingBuffer
|
||||
from pythonbpf.helper import ktime, XDP_PASS
|
||||
from ctypes import c_void_p, c_int64, c_uint8, c_uint16, c_uint32, c_uint64
|
||||
|
||||
@bpf
|
||||
@struct
|
||||
class PacketEvent:
|
||||
timestamp: c_uint64
|
||||
src_ip: c_uint32
|
||||
dst_ip: c_uint32
|
||||
src_port: c_uint16
|
||||
dst_port: c_uint16
|
||||
protocol: c_uint8
|
||||
length: c_uint16
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def packets() -> RingBuffer:
|
||||
return RingBuffer(max_entries=8192)
|
||||
|
||||
@bpf
|
||||
@section("xdp")
|
||||
def capture_packets(ctx: c_void_p) -> c_int64:
|
||||
pkt = PacketEvent()
|
||||
pkt.timestamp = ktime()
|
||||
# Parse packet data from ctx...
|
||||
|
||||
packets.output(pkt)
|
||||
|
||||
return XDP_PASS
|
||||
```
|
||||
|
||||
### Process Lifecycle Tracking
|
||||
|
||||
```python
|
||||
@bpf
|
||||
@struct
|
||||
class ProcessLifecycle:
|
||||
pid: c_uint32
|
||||
ppid: c_uint32
|
||||
start_time: c_uint64
|
||||
exit_time: c_uint64
|
||||
exit_code: c_int32
|
||||
comm: str(16)
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def process_info() -> HashMap:
|
||||
return HashMap(
|
||||
key=c_uint32,
|
||||
value=ProcessLifecycle,
|
||||
max_entries=4096
|
||||
)
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/sched/sched_process_fork")
|
||||
def track_fork(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
|
||||
info = ProcessLifecycle()
|
||||
info.pid = process_id
|
||||
info.start_time = ktime()
|
||||
|
||||
process_info.update(process_id, info)
|
||||
|
||||
return 0
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/sched/sched_process_exit")
|
||||
def track_exit(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
|
||||
info = process_info.lookup(process_id)
|
||||
if info:
|
||||
info.exit_time = ktime()
|
||||
process_info.update(process_id, info)
|
||||
|
||||
return 0
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Struct Size Issues
|
||||
|
||||
If you encounter size-related errors:
|
||||
* Check for excessive padding
|
||||
* Verify field types are correct
|
||||
* Consider reordering fields
|
||||
|
||||
### Initialization Problems
|
||||
|
||||
If fields aren't initialized correctly:
|
||||
* Always initialize all fields explicitly
|
||||
* Set default values where appropriate
|
||||
* Use helper functions for dynamic values
|
||||
|
||||
### Type Mismatch Errors
|
||||
|
||||
If you get type errors:
|
||||
* Ensure field types match assignments
|
||||
* Check that imported types are from `ctypes`
|
||||
* Verify nested struct definitions
|
||||
|
||||
## Reading Struct Data in Userspace
|
||||
|
||||
After capturing struct data, read it in Python:
|
||||
|
||||
```python
|
||||
from pylibbpf import BpfMap
|
||||
|
||||
# Read from map
|
||||
map_obj = BpfMap(b, stats)
|
||||
for key, value_bytes in map_obj.items():
|
||||
value = Event.from_buffer_copy(value_bytes)
|
||||
print(f"PID: {value.pid}, Comm: {value.comm.decode()}")
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
* Learn about {doc}`maps` for storing struct data
|
||||
* Explore {doc}`helpers` for populating struct fields
|
||||
* See {doc}`compilation` to understand how structs are compiled
|
||||
22
examples/anomaly-detection/lib/__init__.py
Normal file
22
examples/anomaly-detection/lib/__init__.py
Normal file
@ -0,0 +1,22 @@
|
||||
"""
|
||||
Process Anomaly Detection - Constants and Utilities
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
MAX_SYSCALLS = 548
|
||||
|
||||
|
||||
def comm_for_pid(pid: int) -> bytes | None:
|
||||
"""Get process name from /proc."""
|
||||
try:
|
||||
with open(f"/proc/{pid}/comm", "rb") as f:
|
||||
return f.read().strip()
|
||||
except FileNotFoundError:
|
||||
logger.warning(f"Process with PID {pid} not found.")
|
||||
except PermissionError:
|
||||
logger.warning(f"Permission denied when accessing /proc/{pid}/comm.")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error reading /proc/{pid}/comm: {e}")
|
||||
return None
|
||||
173
examples/anomaly-detection/lib/ml.py
Normal file
173
examples/anomaly-detection/lib/ml.py
Normal file
@ -0,0 +1,173 @@
|
||||
"""
|
||||
Autoencoder for Process Behavior Anomaly Detection
|
||||
|
||||
Uses Keras/TensorFlow to train an autoencoder on syscall patterns.
|
||||
Anomalies are detected when reconstruction error exceeds threshold.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from tensorflow import keras
|
||||
|
||||
from lib import MAX_SYSCALLS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_autoencoder(n_inputs: int = MAX_SYSCALLS) -> keras.Model:
|
||||
"""
|
||||
Create the autoencoder architecture.
|
||||
|
||||
Architecture: input → encoder → bottleneck → decoder → output
|
||||
"""
|
||||
inp = keras.Input(shape=(n_inputs,))
|
||||
|
||||
# Encoder
|
||||
encoder = keras.layers.Dense(n_inputs)(inp)
|
||||
encoder = keras.layers.ReLU()(encoder)
|
||||
|
||||
# Bottleneck (compressed representation)
|
||||
bottleneck = keras.layers.Dense(n_inputs // 2)(encoder)
|
||||
|
||||
# Decoder
|
||||
decoder = keras.layers.Dense(n_inputs)(bottleneck)
|
||||
decoder = keras.layers.ReLU()(decoder)
|
||||
output = keras.layers.Dense(n_inputs, activation="linear")(decoder)
|
||||
|
||||
model = keras.Model(inp, output)
|
||||
model.compile(optimizer="adam", loss="mse")
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class AutoEncoder:
|
||||
"""
|
||||
Autoencoder for syscall pattern anomaly detection.
|
||||
|
||||
Usage:
|
||||
# Training
|
||||
ae = AutoEncoder('model.keras')
|
||||
model, threshold = ae.train('data.csv', epochs=200)
|
||||
|
||||
# Inference
|
||||
ae = AutoEncoder('model.keras', load=True)
|
||||
_, errors, total_error = ae.predict([features])
|
||||
"""
|
||||
|
||||
def __init__(self, filename: str, load: bool = False):
|
||||
self.filename = filename
|
||||
self.model = None
|
||||
|
||||
if load:
|
||||
self._load_model()
|
||||
|
||||
def _load_model(self) -> None:
|
||||
"""Load a trained model from disk."""
|
||||
if not os.path.exists(self.filename):
|
||||
raise FileNotFoundError(f"Model file not found: {self.filename}")
|
||||
|
||||
logger.info(f"Loading model from {self.filename}")
|
||||
self.model = keras.models.load_model(self.filename)
|
||||
|
||||
def train(
|
||||
self,
|
||||
datafile: str,
|
||||
epochs: int,
|
||||
batch_size: int,
|
||||
test_size: float = 0.1,
|
||||
) -> tuple[keras.Model, float]:
|
||||
"""
|
||||
Train the autoencoder on collected data.
|
||||
|
||||
Args:
|
||||
datafile: Path to CSV file with training data
|
||||
epochs: Number of training epochs
|
||||
batch_size: Training batch size
|
||||
test_size: Fraction of data to use for validation
|
||||
|
||||
Returns:
|
||||
Tuple of (trained model, error threshold)
|
||||
"""
|
||||
if not os.path.exists(datafile):
|
||||
raise FileNotFoundError(f"Data file not found: {datafile}")
|
||||
|
||||
logger.info(f"Loading training data from {datafile}")
|
||||
|
||||
# Load and prepare data
|
||||
df = pd.read_csv(datafile)
|
||||
features = df.drop(["sample_time"], axis=1).values
|
||||
|
||||
logger.info(f"Loaded {len(features)} samples with {features.shape[1]} features")
|
||||
|
||||
# Split train/test
|
||||
train_data, test_data = train_test_split(
|
||||
features,
|
||||
test_size=test_size,
|
||||
random_state=42,
|
||||
)
|
||||
|
||||
logger.info(f"Training set: {len(train_data)} samples")
|
||||
logger.info(f"Test set: {len(test_data)} samples")
|
||||
|
||||
# Create and train model
|
||||
self.model = create_autoencoder()
|
||||
|
||||
if self.model is None:
|
||||
raise RuntimeError("Failed to create the autoencoder model.")
|
||||
|
||||
logger.info("Training autoencoder...")
|
||||
self.model.fit(
|
||||
train_data,
|
||||
train_data,
|
||||
validation_data=(test_data, test_data),
|
||||
epochs=epochs,
|
||||
batch_size=batch_size,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
# Save model (use .keras format for Keras 3.x compatibility)
|
||||
self.model.save(self.filename)
|
||||
logger.info(f"Model saved to {self.filename}")
|
||||
|
||||
# Calculate error threshold from test data
|
||||
threshold = self._calculate_threshold(test_data)
|
||||
|
||||
return self.model, threshold
|
||||
|
||||
def _calculate_threshold(self, test_data: np.ndarray) -> float:
|
||||
"""Calculate error threshold from test data."""
|
||||
logger.info(f"Calculating error threshold from {len(test_data)} test samples")
|
||||
|
||||
if self.model is None:
|
||||
raise RuntimeError("Model not loaded. Use load=True or train first.")
|
||||
|
||||
predictions = self.model.predict(test_data, verbose=0)
|
||||
errors = np.abs(test_data - predictions).sum(axis=1)
|
||||
|
||||
return float(errors.max())
|
||||
|
||||
def predict(self, X: list | np.ndarray) -> tuple[np.ndarray, np.ndarray, float]:
|
||||
"""
|
||||
Run prediction and return reconstruction error.
|
||||
|
||||
Args:
|
||||
X: Input data (list of feature vectors)
|
||||
|
||||
Returns:
|
||||
Tuple of (reconstructed, per_feature_errors, total_error)
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError("Model not loaded. Use load=True or train first.")
|
||||
|
||||
X = np.asarray(X, dtype=np.float32)
|
||||
y = self.model.predict(X, verbose=0)
|
||||
|
||||
# Per-feature reconstruction error
|
||||
errors = np.abs(X[0] - y[0])
|
||||
total_error = float(errors.sum())
|
||||
|
||||
return y, errors, total_error
|
||||
448
examples/anomaly-detection/lib/platform.py
Normal file
448
examples/anomaly-detection/lib/platform.py
Normal file
@ -0,0 +1,448 @@
|
||||
# Copyright 2017 Sasha Goldshtein
|
||||
# Copyright 2018 Red Hat, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
syscall.py contains functions useful for mapping between syscall names and numbers
|
||||
"""
|
||||
|
||||
# Syscall table for Linux x86_64, not very recent. Automatically generated from
|
||||
# https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/entry/syscalls/syscall_64.tbl?h=linux-6.17.y
|
||||
# using the following command:
|
||||
#
|
||||
# cat arch/x86/entry/syscalls/syscall_64.tbl \
|
||||
# | awk 'BEGIN { print "syscalls = {" }
|
||||
# /^[0-9]/ { print " "$1": b\""$3"\"," }
|
||||
# END { print "}" }'
|
||||
|
||||
SYSCALLS = {
|
||||
0: b"read",
|
||||
1: b"write",
|
||||
2: b"open",
|
||||
3: b"close",
|
||||
4: b"stat",
|
||||
5: b"fstat",
|
||||
6: b"lstat",
|
||||
7: b"poll",
|
||||
8: b"lseek",
|
||||
9: b"mmap",
|
||||
10: b"mprotect",
|
||||
11: b"munmap",
|
||||
12: b"brk",
|
||||
13: b"rt_sigaction",
|
||||
14: b"rt_sigprocmask",
|
||||
15: b"rt_sigreturn",
|
||||
16: b"ioctl",
|
||||
17: b"pread64",
|
||||
18: b"pwrite64",
|
||||
19: b"readv",
|
||||
20: b"writev",
|
||||
21: b"access",
|
||||
22: b"pipe",
|
||||
23: b"select",
|
||||
24: b"sched_yield",
|
||||
25: b"mremap",
|
||||
26: b"msync",
|
||||
27: b"mincore",
|
||||
28: b"madvise",
|
||||
29: b"shmget",
|
||||
30: b"shmat",
|
||||
31: b"shmctl",
|
||||
32: b"dup",
|
||||
33: b"dup2",
|
||||
34: b"pause",
|
||||
35: b"nanosleep",
|
||||
36: b"getitimer",
|
||||
37: b"alarm",
|
||||
38: b"setitimer",
|
||||
39: b"getpid",
|
||||
40: b"sendfile",
|
||||
41: b"socket",
|
||||
42: b"connect",
|
||||
43: b"accept",
|
||||
44: b"sendto",
|
||||
45: b"recvfrom",
|
||||
46: b"sendmsg",
|
||||
47: b"recvmsg",
|
||||
48: b"shutdown",
|
||||
49: b"bind",
|
||||
50: b"listen",
|
||||
51: b"getsockname",
|
||||
52: b"getpeername",
|
||||
53: b"socketpair",
|
||||
54: b"setsockopt",
|
||||
55: b"getsockopt",
|
||||
56: b"clone",
|
||||
57: b"fork",
|
||||
58: b"vfork",
|
||||
59: b"execve",
|
||||
60: b"exit",
|
||||
61: b"wait4",
|
||||
62: b"kill",
|
||||
63: b"uname",
|
||||
64: b"semget",
|
||||
65: b"semop",
|
||||
66: b"semctl",
|
||||
67: b"shmdt",
|
||||
68: b"msgget",
|
||||
69: b"msgsnd",
|
||||
70: b"msgrcv",
|
||||
71: b"msgctl",
|
||||
72: b"fcntl",
|
||||
73: b"flock",
|
||||
74: b"fsync",
|
||||
75: b"fdatasync",
|
||||
76: b"truncate",
|
||||
77: b"ftruncate",
|
||||
78: b"getdents",
|
||||
79: b"getcwd",
|
||||
80: b"chdir",
|
||||
81: b"fchdir",
|
||||
82: b"rename",
|
||||
83: b"mkdir",
|
||||
84: b"rmdir",
|
||||
85: b"creat",
|
||||
86: b"link",
|
||||
87: b"unlink",
|
||||
88: b"symlink",
|
||||
89: b"readlink",
|
||||
90: b"chmod",
|
||||
91: b"fchmod",
|
||||
92: b"chown",
|
||||
93: b"fchown",
|
||||
94: b"lchown",
|
||||
95: b"umask",
|
||||
96: b"gettimeofday",
|
||||
97: b"getrlimit",
|
||||
98: b"getrusage",
|
||||
99: b"sysinfo",
|
||||
100: b"times",
|
||||
101: b"ptrace",
|
||||
102: b"getuid",
|
||||
103: b"syslog",
|
||||
104: b"getgid",
|
||||
105: b"setuid",
|
||||
106: b"setgid",
|
||||
107: b"geteuid",
|
||||
108: b"getegid",
|
||||
109: b"setpgid",
|
||||
110: b"getppid",
|
||||
111: b"getpgrp",
|
||||
112: b"setsid",
|
||||
113: b"setreuid",
|
||||
114: b"setregid",
|
||||
115: b"getgroups",
|
||||
116: b"setgroups",
|
||||
117: b"setresuid",
|
||||
118: b"getresuid",
|
||||
119: b"setresgid",
|
||||
120: b"getresgid",
|
||||
121: b"getpgid",
|
||||
122: b"setfsuid",
|
||||
123: b"setfsgid",
|
||||
124: b"getsid",
|
||||
125: b"capget",
|
||||
126: b"capset",
|
||||
127: b"rt_sigpending",
|
||||
128: b"rt_sigtimedwait",
|
||||
129: b"rt_sigqueueinfo",
|
||||
130: b"rt_sigsuspend",
|
||||
131: b"sigaltstack",
|
||||
132: b"utime",
|
||||
133: b"mknod",
|
||||
134: b"uselib",
|
||||
135: b"personality",
|
||||
136: b"ustat",
|
||||
137: b"statfs",
|
||||
138: b"fstatfs",
|
||||
139: b"sysfs",
|
||||
140: b"getpriority",
|
||||
141: b"setpriority",
|
||||
142: b"sched_setparam",
|
||||
143: b"sched_getparam",
|
||||
144: b"sched_setscheduler",
|
||||
145: b"sched_getscheduler",
|
||||
146: b"sched_get_priority_max",
|
||||
147: b"sched_get_priority_min",
|
||||
148: b"sched_rr_get_interval",
|
||||
149: b"mlock",
|
||||
150: b"munlock",
|
||||
151: b"mlockall",
|
||||
152: b"munlockall",
|
||||
153: b"vhangup",
|
||||
154: b"modify_ldt",
|
||||
155: b"pivot_root",
|
||||
156: b"_sysctl",
|
||||
157: b"prctl",
|
||||
158: b"arch_prctl",
|
||||
159: b"adjtimex",
|
||||
160: b"setrlimit",
|
||||
161: b"chroot",
|
||||
162: b"sync",
|
||||
163: b"acct",
|
||||
164: b"settimeofday",
|
||||
165: b"mount",
|
||||
166: b"umount2",
|
||||
167: b"swapon",
|
||||
168: b"swapoff",
|
||||
169: b"reboot",
|
||||
170: b"sethostname",
|
||||
171: b"setdomainname",
|
||||
172: b"iopl",
|
||||
173: b"ioperm",
|
||||
174: b"create_module",
|
||||
175: b"init_module",
|
||||
176: b"delete_module",
|
||||
177: b"get_kernel_syms",
|
||||
178: b"query_module",
|
||||
179: b"quotactl",
|
||||
180: b"nfsservctl",
|
||||
181: b"getpmsg",
|
||||
182: b"putpmsg",
|
||||
183: b"afs_syscall",
|
||||
184: b"tuxcall",
|
||||
185: b"security",
|
||||
186: b"gettid",
|
||||
187: b"readahead",
|
||||
188: b"setxattr",
|
||||
189: b"lsetxattr",
|
||||
190: b"fsetxattr",
|
||||
191: b"getxattr",
|
||||
192: b"lgetxattr",
|
||||
193: b"fgetxattr",
|
||||
194: b"listxattr",
|
||||
195: b"llistxattr",
|
||||
196: b"flistxattr",
|
||||
197: b"removexattr",
|
||||
198: b"lremovexattr",
|
||||
199: b"fremovexattr",
|
||||
200: b"tkill",
|
||||
201: b"time",
|
||||
202: b"futex",
|
||||
203: b"sched_setaffinity",
|
||||
204: b"sched_getaffinity",
|
||||
205: b"set_thread_area",
|
||||
206: b"io_setup",
|
||||
207: b"io_destroy",
|
||||
208: b"io_getevents",
|
||||
209: b"io_submit",
|
||||
210: b"io_cancel",
|
||||
211: b"get_thread_area",
|
||||
212: b"lookup_dcookie",
|
||||
213: b"epoll_create",
|
||||
214: b"epoll_ctl_old",
|
||||
215: b"epoll_wait_old",
|
||||
216: b"remap_file_pages",
|
||||
217: b"getdents64",
|
||||
218: b"set_tid_address",
|
||||
219: b"restart_syscall",
|
||||
220: b"semtimedop",
|
||||
221: b"fadvise64",
|
||||
222: b"timer_create",
|
||||
223: b"timer_settime",
|
||||
224: b"timer_gettime",
|
||||
225: b"timer_getoverrun",
|
||||
226: b"timer_delete",
|
||||
227: b"clock_settime",
|
||||
228: b"clock_gettime",
|
||||
229: b"clock_getres",
|
||||
230: b"clock_nanosleep",
|
||||
231: b"exit_group",
|
||||
232: b"epoll_wait",
|
||||
233: b"epoll_ctl",
|
||||
234: b"tgkill",
|
||||
235: b"utimes",
|
||||
236: b"vserver",
|
||||
237: b"mbind",
|
||||
238: b"set_mempolicy",
|
||||
239: b"get_mempolicy",
|
||||
240: b"mq_open",
|
||||
241: b"mq_unlink",
|
||||
242: b"mq_timedsend",
|
||||
243: b"mq_timedreceive",
|
||||
244: b"mq_notify",
|
||||
245: b"mq_getsetattr",
|
||||
246: b"kexec_load",
|
||||
247: b"waitid",
|
||||
248: b"add_key",
|
||||
249: b"request_key",
|
||||
250: b"keyctl",
|
||||
251: b"ioprio_set",
|
||||
252: b"ioprio_get",
|
||||
253: b"inotify_init",
|
||||
254: b"inotify_add_watch",
|
||||
255: b"inotify_rm_watch",
|
||||
256: b"migrate_pages",
|
||||
257: b"openat",
|
||||
258: b"mkdirat",
|
||||
259: b"mknodat",
|
||||
260: b"fchownat",
|
||||
261: b"futimesat",
|
||||
262: b"newfstatat",
|
||||
263: b"unlinkat",
|
||||
264: b"renameat",
|
||||
265: b"linkat",
|
||||
266: b"symlinkat",
|
||||
267: b"readlinkat",
|
||||
268: b"fchmodat",
|
||||
269: b"faccessat",
|
||||
270: b"pselect6",
|
||||
271: b"ppoll",
|
||||
272: b"unshare",
|
||||
273: b"set_robust_list",
|
||||
274: b"get_robust_list",
|
||||
275: b"splice",
|
||||
276: b"tee",
|
||||
277: b"sync_file_range",
|
||||
278: b"vmsplice",
|
||||
279: b"move_pages",
|
||||
280: b"utimensat",
|
||||
281: b"epoll_pwait",
|
||||
282: b"signalfd",
|
||||
283: b"timerfd_create",
|
||||
284: b"eventfd",
|
||||
285: b"fallocate",
|
||||
286: b"timerfd_settime",
|
||||
287: b"timerfd_gettime",
|
||||
288: b"accept4",
|
||||
289: b"signalfd4",
|
||||
290: b"eventfd2",
|
||||
291: b"epoll_create1",
|
||||
292: b"dup3",
|
||||
293: b"pipe2",
|
||||
294: b"inotify_init1",
|
||||
295: b"preadv",
|
||||
296: b"pwritev",
|
||||
297: b"rt_tgsigqueueinfo",
|
||||
298: b"perf_event_open",
|
||||
299: b"recvmmsg",
|
||||
300: b"fanotify_init",
|
||||
301: b"fanotify_mark",
|
||||
302: b"prlimit64",
|
||||
303: b"name_to_handle_at",
|
||||
304: b"open_by_handle_at",
|
||||
305: b"clock_adjtime",
|
||||
306: b"syncfs",
|
||||
307: b"sendmmsg",
|
||||
308: b"setns",
|
||||
309: b"getcpu",
|
||||
310: b"process_vm_readv",
|
||||
311: b"process_vm_writev",
|
||||
312: b"kcmp",
|
||||
313: b"finit_module",
|
||||
314: b"sched_setattr",
|
||||
315: b"sched_getattr",
|
||||
316: b"renameat2",
|
||||
317: b"seccomp",
|
||||
318: b"getrandom",
|
||||
319: b"memfd_create",
|
||||
320: b"kexec_file_load",
|
||||
321: b"bpf",
|
||||
322: b"execveat",
|
||||
323: b"userfaultfd",
|
||||
324: b"membarrier",
|
||||
325: b"mlock2",
|
||||
326: b"copy_file_range",
|
||||
327: b"preadv2",
|
||||
328: b"pwritev2",
|
||||
329: b"pkey_mprotect",
|
||||
330: b"pkey_alloc",
|
||||
331: b"pkey_free",
|
||||
332: b"statx",
|
||||
333: b"io_pgetevents",
|
||||
334: b"rseq",
|
||||
335: b"uretprobe",
|
||||
424: b"pidfd_send_signal",
|
||||
425: b"io_uring_setup",
|
||||
426: b"io_uring_enter",
|
||||
427: b"io_uring_register",
|
||||
428: b"open_tree",
|
||||
429: b"move_mount",
|
||||
430: b"fsopen",
|
||||
431: b"fsconfig",
|
||||
432: b"fsmount",
|
||||
433: b"fspick",
|
||||
434: b"pidfd_open",
|
||||
435: b"clone3",
|
||||
436: b"close_range",
|
||||
437: b"openat2",
|
||||
438: b"pidfd_getfd",
|
||||
439: b"faccessat2",
|
||||
440: b"process_madvise",
|
||||
441: b"epoll_pwait2",
|
||||
442: b"mount_setattr",
|
||||
443: b"quotactl_fd",
|
||||
444: b"landlock_create_ruleset",
|
||||
445: b"landlock_add_rule",
|
||||
446: b"landlock_restrict_self",
|
||||
447: b"memfd_secret",
|
||||
448: b"process_mrelease",
|
||||
449: b"futex_waitv",
|
||||
450: b"set_mempolicy_home_node",
|
||||
451: b"cachestat",
|
||||
452: b"fchmodat2",
|
||||
453: b"map_shadow_stack",
|
||||
454: b"futex_wake",
|
||||
455: b"futex_wait",
|
||||
456: b"futex_requeue",
|
||||
457: b"statmount",
|
||||
458: b"listmount",
|
||||
459: b"lsm_get_self_attr",
|
||||
460: b"lsm_set_self_attr",
|
||||
461: b"lsm_list_modules",
|
||||
462: b"mseal",
|
||||
463: b"setxattrat",
|
||||
464: b"getxattrat",
|
||||
465: b"listxattrat",
|
||||
466: b"removexattrat",
|
||||
467: b"open_tree_attr",
|
||||
468: b"file_getattr",
|
||||
469: b"file_setattr",
|
||||
512: b"rt_sigaction",
|
||||
513: b"rt_sigreturn",
|
||||
514: b"ioctl",
|
||||
515: b"readv",
|
||||
516: b"writev",
|
||||
517: b"recvfrom",
|
||||
518: b"sendmsg",
|
||||
519: b"recvmsg",
|
||||
520: b"execve",
|
||||
521: b"ptrace",
|
||||
522: b"rt_sigpending",
|
||||
523: b"rt_sigtimedwait",
|
||||
524: b"rt_sigqueueinfo",
|
||||
525: b"sigaltstack",
|
||||
526: b"timer_create",
|
||||
527: b"mq_notify",
|
||||
528: b"kexec_load",
|
||||
529: b"waitid",
|
||||
530: b"set_robust_list",
|
||||
531: b"get_robust_list",
|
||||
532: b"vmsplice",
|
||||
533: b"move_pages",
|
||||
534: b"preadv",
|
||||
535: b"pwritev",
|
||||
536: b"rt_tgsigqueueinfo",
|
||||
537: b"recvmmsg",
|
||||
538: b"sendmmsg",
|
||||
539: b"process_vm_readv",
|
||||
540: b"process_vm_writev",
|
||||
541: b"setsockopt",
|
||||
542: b"getsockopt",
|
||||
543: b"io_setup",
|
||||
544: b"io_submit",
|
||||
545: b"execveat",
|
||||
546: b"preadv2",
|
||||
547: b"pwritev2",
|
||||
}
|
||||
117
examples/anomaly-detection/lib/probe.py
Normal file
117
examples/anomaly-detection/lib/probe.py
Normal file
@ -0,0 +1,117 @@
|
||||
"""
|
||||
PythonBPF eBPF Probe for Syscall Histogram Collection
|
||||
"""
|
||||
|
||||
from vmlinux import struct_trace_event_raw_sys_enter
|
||||
from pythonbpf import bpf, map, section, bpfglobal, BPF
|
||||
from pythonbpf.helper import pid
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_int64
|
||||
from lib import MAX_SYSCALLS, comm_for_pid
|
||||
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def histogram() -> HashMap:
|
||||
return HashMap(key=c_int64, value=c_int64, max_entries=1024)
|
||||
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def target_pid_map() -> HashMap:
|
||||
return HashMap(key=c_int64, value=c_int64, max_entries=1)
|
||||
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/raw_syscalls/sys_enter")
|
||||
def trace_syscall(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
|
||||
syscall_id = ctx.id
|
||||
current_pid = pid()
|
||||
target = target_pid_map.lookup(0)
|
||||
if target:
|
||||
if current_pid != target:
|
||||
return 0 # type: ignore
|
||||
if syscall_id < 0 or syscall_id >= 548:
|
||||
return 0 # type: ignore
|
||||
count = histogram.lookup(syscall_id)
|
||||
if count:
|
||||
histogram.update(syscall_id, count + 1)
|
||||
else:
|
||||
histogram.update(syscall_id, 1)
|
||||
return 0 # type: ignore
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
ebpf_prog = BPF()
|
||||
|
||||
|
||||
class Probe:
|
||||
"""
|
||||
Syscall histogram probe for a target process.
|
||||
|
||||
Usage:
|
||||
probe = Probe(target_pid=1234)
|
||||
probe.start()
|
||||
histogram = probe.get_histogram()
|
||||
"""
|
||||
|
||||
def __init__(self, target_pid: int, max_syscalls: int = MAX_SYSCALLS):
|
||||
self.target_pid = target_pid
|
||||
self.max_syscalls = max_syscalls
|
||||
self.comm = comm_for_pid(target_pid)
|
||||
|
||||
if self.comm is None:
|
||||
raise ValueError(f"Cannot find process with PID {target_pid}")
|
||||
|
||||
self._bpf = None
|
||||
self._histogram_map = None
|
||||
self._target_map = None
|
||||
|
||||
def start(self):
|
||||
"""Compile, load, and attach the BPF probe."""
|
||||
# Compile and load
|
||||
self._bpf = ebpf_prog
|
||||
self._bpf.load()
|
||||
self._bpf.attach_all()
|
||||
|
||||
# Get map references
|
||||
self._histogram_map = self._bpf["histogram"]
|
||||
self._target_map = self._bpf["target_pid_map"]
|
||||
|
||||
# Set target PID in the map
|
||||
self._target_map.update(0, self.target_pid)
|
||||
|
||||
return self
|
||||
|
||||
def get_histogram(self) -> list:
|
||||
"""Read current histogram values as a list."""
|
||||
if self._histogram_map is None:
|
||||
raise RuntimeError("Probe not started. Call start() first.")
|
||||
|
||||
result = [0] * self.max_syscalls
|
||||
|
||||
for syscall_id in range(self.max_syscalls):
|
||||
try:
|
||||
count = self._histogram_map.lookup(syscall_id)
|
||||
if count is not None:
|
||||
result[syscall_id] = int(count)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
def __getitem__(self, syscall_id: int) -> int:
|
||||
"""Allow indexing: probe[syscall_id]"""
|
||||
if self._histogram_map is None:
|
||||
raise RuntimeError("Probe not started")
|
||||
|
||||
try:
|
||||
count = self._histogram_map.lookup(syscall_id)
|
||||
return int(count) if count is not None else 0
|
||||
except Exception:
|
||||
return 0
|
||||
335
examples/anomaly-detection/main.py
Normal file
335
examples/anomaly-detection/main.py
Normal file
@ -0,0 +1,335 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Process Behavior Anomaly Detection using PythonBPF and Autoencoders
|
||||
|
||||
Ported from evilsocket's BCC implementation to PythonBPF.
|
||||
https://github.com/evilsocket/ebpf-process-anomaly-detection
|
||||
|
||||
Usage:
|
||||
# 1.Learn normal behavior from a process
|
||||
sudo python main.py --learn --pid 1234 --data normal.csv
|
||||
|
||||
# 2.Train the autoencoder (no sudo needed)
|
||||
python main.py --train --data normal.csv --model model.h5
|
||||
|
||||
# 3.Monitor for anomalies
|
||||
sudo python main.py --run --pid 1234 --model model.h5
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from collections import Counter
|
||||
|
||||
from lib import MAX_SYSCALLS
|
||||
from lib.ml import AutoEncoder
|
||||
from lib.platform import SYSCALLS
|
||||
from lib.probe import Probe
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def learn(pid: int, data_path: str, poll_interval_ms: int) -> None:
|
||||
"""
|
||||
Capture syscall patterns from target process.
|
||||
|
||||
Args:
|
||||
pid: Target process ID
|
||||
data_path: Path to save CSV data
|
||||
poll_interval_ms: Polling interval in milliseconds
|
||||
"""
|
||||
if os.path.exists(data_path):
|
||||
logger.error(
|
||||
f"{data_path} already exists.Delete it or use a different filename."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
probe = Probe(pid)
|
||||
except ValueError as e:
|
||||
logger.error(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
probe_comm = probe.comm.decode() if probe.comm else "unknown"
|
||||
|
||||
print(f"📊 Learning from process {pid} ({probe_comm})")
|
||||
print(f"📁 Saving data to {data_path}")
|
||||
print(f"⏱️ Polling interval: {poll_interval_ms}ms")
|
||||
print("Press Ctrl+C to stop...\n")
|
||||
|
||||
probe.start()
|
||||
|
||||
prev_histogram = [0.0] * MAX_SYSCALLS
|
||||
prev_report_time = time.time()
|
||||
sample_count = 0
|
||||
poll_interval_sec = poll_interval_ms / 1000.0
|
||||
|
||||
header = "sample_time," + ",".join(f"sys_{i}" for i in range(MAX_SYSCALLS))
|
||||
|
||||
with open(data_path, "w") as fp:
|
||||
fp.write(header + "\n")
|
||||
|
||||
try:
|
||||
while True:
|
||||
histogram = [float(x) for x in probe.get_histogram()]
|
||||
|
||||
if histogram != prev_histogram:
|
||||
deltas = _compute_deltas(prev_histogram, histogram)
|
||||
prev_histogram = histogram.copy()
|
||||
|
||||
row = f"{time.time()},{','.join(map(str, deltas))}"
|
||||
fp.write(row + "\n")
|
||||
fp.flush()
|
||||
sample_count += 1
|
||||
|
||||
now = time.time()
|
||||
if now - prev_report_time >= 1.0:
|
||||
print(f" {sample_count} samples saved...")
|
||||
prev_report_time = now
|
||||
|
||||
time.sleep(poll_interval_sec)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n✅ Stopped. Saved {sample_count} samples to {data_path}")
|
||||
|
||||
|
||||
def train(data_path: str, model_path: str, epochs: int, batch_size: int) -> None:
|
||||
"""
|
||||
Train autoencoder on captured data.
|
||||
|
||||
Args:
|
||||
data_path: Path to training CSV data
|
||||
model_path: Path to save trained model
|
||||
epochs: Number of training epochs
|
||||
batch_size: Training batch size
|
||||
"""
|
||||
if not os.path.exists(data_path):
|
||||
logger.error(f"Data file {data_path} not found.Run --learn first.")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"🧠 Training autoencoder on {data_path}")
|
||||
print(f" Epochs: {epochs}")
|
||||
print(f" Batch size: {batch_size}")
|
||||
print()
|
||||
|
||||
ae = AutoEncoder(model_path)
|
||||
_, threshold = ae.train(data_path, epochs, batch_size)
|
||||
|
||||
print()
|
||||
print("=" * 50)
|
||||
print("✅ Training complete!")
|
||||
print(f" Model saved to: {model_path}")
|
||||
print(f" Error threshold: {threshold:.6f}")
|
||||
print()
|
||||
print(f"💡 Use --max-error {threshold:.4f} when running detection")
|
||||
print("=" * 50)
|
||||
|
||||
|
||||
def run(pid: int, model_path: str, max_error: float, poll_interval_ms: int) -> None:
|
||||
"""
|
||||
Monitor process and detect anomalies.
|
||||
|
||||
Args:
|
||||
pid: Target process ID
|
||||
model_path: Path to trained model
|
||||
max_error: Anomaly detection threshold
|
||||
poll_interval_ms: Polling interval in milliseconds
|
||||
"""
|
||||
if not os.path.exists(model_path):
|
||||
logger.error(f"Model file {model_path} not found. Run --train first.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
probe = Probe(pid)
|
||||
except ValueError as e:
|
||||
logger.error(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
ae = AutoEncoder(model_path, load=True)
|
||||
probe_comm = probe.comm.decode() if probe.comm else "unknown"
|
||||
|
||||
print(f"🔍 Monitoring process {pid} ({probe_comm}) for anomalies")
|
||||
print(f" Error threshold: {max_error}")
|
||||
print(f" Polling interval: {poll_interval_ms}ms")
|
||||
print("Press Ctrl+C to stop...\n")
|
||||
|
||||
probe.start()
|
||||
|
||||
prev_histogram = [0.0] * MAX_SYSCALLS
|
||||
anomaly_count = 0
|
||||
check_count = 0
|
||||
poll_interval_sec = poll_interval_ms / 1000.0
|
||||
|
||||
try:
|
||||
while True:
|
||||
histogram = [float(x) for x in probe.get_histogram()]
|
||||
|
||||
if histogram != prev_histogram:
|
||||
deltas = _compute_deltas(prev_histogram, histogram)
|
||||
prev_histogram = histogram.copy()
|
||||
check_count += 1
|
||||
|
||||
_, feat_errors, total_error = ae.predict([deltas])
|
||||
|
||||
if total_error > max_error:
|
||||
anomaly_count += 1
|
||||
_report_anomaly(anomaly_count, total_error, max_error, feat_errors)
|
||||
|
||||
time.sleep(poll_interval_sec)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n✅ Stopped.")
|
||||
print(f" Checks performed: {check_count}")
|
||||
print(f" Anomalies detected: {anomaly_count}")
|
||||
|
||||
|
||||
def _compute_deltas(prev: list[float], current: list[float]) -> list[float]:
|
||||
"""Compute rate of change between two histograms."""
|
||||
deltas = []
|
||||
for p, c in zip(prev, current):
|
||||
if c != 0.0:
|
||||
delta = 1.0 - (p / c)
|
||||
else:
|
||||
delta = 0.0
|
||||
deltas.append(delta)
|
||||
return deltas
|
||||
|
||||
|
||||
def _report_anomaly(
|
||||
count: int,
|
||||
total_error: float,
|
||||
threshold: float,
|
||||
feat_errors: list[float],
|
||||
) -> None:
|
||||
"""Print anomaly report with top offending syscalls."""
|
||||
print(f"🚨 ANOMALY #{count} detected!")
|
||||
print(f" Total error: {total_error:.4f} (threshold: {threshold})")
|
||||
|
||||
errors_by_syscall = {idx: err for idx, err in enumerate(feat_errors)}
|
||||
top3 = Counter(errors_by_syscall).most_common(3)
|
||||
|
||||
print(" Top anomalous syscalls:")
|
||||
for idx, err in top3:
|
||||
name = SYSCALLS.get(idx, f"syscall_{idx}")
|
||||
print(f" • {name!r}: {err:.4f}")
|
||||
print()
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process anomaly detection with PythonBPF and Autoencoders",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Learn from a process (e.g., Firefox) for a few minutes
|
||||
sudo python main.py --learn --pid $(pgrep -o firefox) --data firefox.csv
|
||||
|
||||
# Train the model (no sudo needed)
|
||||
python main.py --train --data firefox.csv --model firefox.h5
|
||||
|
||||
# Monitor the same process for anomalies
|
||||
sudo python main.py --run --pid $(pgrep -o firefox) --model firefox.h5
|
||||
|
||||
# Full workflow for nginx:
|
||||
sudo python main.py --learn --pid $(pgrep -o nginx) --data nginx_normal.csv
|
||||
python main.py --train --data nginx_normal.csv --model nginx.h5 --epochs 100
|
||||
sudo python main.py --run --pid $(pgrep -o nginx) --model nginx.h5 --max-error 0.05
|
||||
""",
|
||||
)
|
||||
|
||||
actions = parser.add_mutually_exclusive_group()
|
||||
actions.add_argument(
|
||||
"--learn",
|
||||
action="store_true",
|
||||
help="Capture syscall patterns from a process",
|
||||
)
|
||||
actions.add_argument(
|
||||
"--train",
|
||||
action="store_true",
|
||||
help="Train autoencoder on captured data",
|
||||
)
|
||||
actions.add_argument(
|
||||
"--run",
|
||||
action="store_true",
|
||||
help="Monitor process for anomalies",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--pid",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Target process ID",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data",
|
||||
default="data.csv",
|
||||
help="CSV file for training data (default: data.csv)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="model.keras",
|
||||
help="Model file path (default: model.h5)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Polling interval in milliseconds (default: 100)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--epochs",
|
||||
type=int,
|
||||
default=200,
|
||||
help="Training epochs (default: 200)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=16,
|
||||
help="Training batch size (default: 16)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-error",
|
||||
type=float,
|
||||
default=0.09,
|
||||
help="Anomaly detection threshold (default: 0.09)",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main entry point."""
|
||||
args = parse_args()
|
||||
|
||||
if not any([args.learn, args.train, args.run]):
|
||||
print("No action specified.Use --learn, --train, or --run.")
|
||||
print("Run with --help for usage information.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.learn:
|
||||
if args.pid == 0:
|
||||
logger.error("--pid required for --learn")
|
||||
sys.exit(1)
|
||||
learn(args.pid, args.data, args.time)
|
||||
|
||||
elif args.train:
|
||||
train(args.data, args.model, args.epochs, args.batch_size)
|
||||
|
||||
elif args.run:
|
||||
if args.pid == 0:
|
||||
logger.error("--pid required for --run")
|
||||
sys.exit(1)
|
||||
run(args.pid, args.model, args.max_error, args.time)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "pythonbpf"
|
||||
version = "0.1.7"
|
||||
version = "0.1.8"
|
||||
description = "Reduced Python frontend for eBPF"
|
||||
authors = [
|
||||
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
|
||||
@ -34,6 +34,14 @@ dependencies = [
|
||||
"pylibbpf"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
docs = [
|
||||
"sphinx>=7.0",
|
||||
"myst-parser>=2.0",
|
||||
"sphinx-rtd-theme>=2.0",
|
||||
"sphinx-copybutton",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["pythonbpf*"]
|
||||
|
||||
@ -25,7 +25,7 @@ import re
|
||||
|
||||
logger: Logger = logging.getLogger(__name__)
|
||||
|
||||
VERSION = "v0.1.7"
|
||||
VERSION = "v0.1.8"
|
||||
|
||||
|
||||
def finalize_module(original_str):
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
BPF_CLANG := clang
|
||||
CFLAGS := -emit-llvm -target bpf -c
|
||||
CFLAGS := -emit-llvm -target bpf -c -D__TARGET_ARCH_x86
|
||||
|
||||
SRC := $(wildcard *.bpf.c)
|
||||
LL := $(SRC:.bpf.c=.bpf.ll)
|
||||
@ -10,7 +10,7 @@ LL0 := $(SRC:.bpf.c=.bpf.o0.ll)
|
||||
all: $(LL) $(OBJ) $(LL0)
|
||||
|
||||
%.bpf.o: %.bpf.c
|
||||
$(BPF_CLANG) -O2 -g -target bpf -c $< -o $@
|
||||
$(BPF_CLANG) -O2 -D__TARGET_ARCH_x86 -g -target bpf -c $< -o $@
|
||||
|
||||
%.bpf.ll: %.bpf.c
|
||||
$(BPF_CLANG) $(CFLAGS) -O2 -g -S $< -o $@
|
||||
|
||||
@ -1,18 +1,16 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct fake_iphdr {
|
||||
unsigned short useless;
|
||||
unsigned short tot_len;
|
||||
unsigned short id;
|
||||
unsigned short frag_off;
|
||||
unsigned char ttl;
|
||||
unsigned char protocol;
|
||||
unsigned short check;
|
||||
unsigned int saddr;
|
||||
unsigned int daddr;
|
||||
unsigned short useless;
|
||||
unsigned short tot_len;
|
||||
unsigned short id;
|
||||
unsigned short frag_off;
|
||||
unsigned char ttl;
|
||||
unsigned char protocol;
|
||||
unsigned short check;
|
||||
unsigned int saddr;
|
||||
unsigned int daddr;
|
||||
};
|
||||
|
||||
SEC("xdp")
|
||||
@ -25,9 +23,9 @@ int xdp_prog(struct xdp_md *ctx) {
|
||||
}
|
||||
struct fake_iphdr *iph = (void *)data + sizeof(struct ethhdr);
|
||||
|
||||
bpf_printk("%d", iph->saddr);
|
||||
bpf_printk("%d", iph->saddr);
|
||||
|
||||
return XDP_PASS;
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
Reference in New Issue
Block a user