124 Commits

Author SHA1 Message Date
cf99b3bb9a Fix call to get_or_create_ptr_from_arg for probe_read_str 2025-11-07 19:16:48 +05:30
6c85b248ce Init sz in get_or_create_ptr_from_arg 2025-11-07 19:03:21 +05:30
b5a3494cc6 Fix typo in get_or_create_ptr_from_arg 2025-11-07 19:01:40 +05:30
be62972974 Fix ScratchPoolManager::counter 2025-11-07 19:00:57 +05:30
2f4a7d2f90 Remove get_struct_char_array_ptr in favour of get_char_array_ptr_and_size, wrap it in get_or_crate_ptr_from_arg to use in bpf_helper_handler 2025-11-07 18:54:59 +05:30
3ccd3f767e Add expected types for pointer creation of args in probe_read handler 2025-11-06 19:59:04 +05:30
2e37726922 Add signature relection for all helper handlers except print 2025-11-06 19:47:57 +05:30
5b36726b7d Make bpf_skb_store_bytes work 2025-11-05 20:02:39 +05:30
3e6cea2b67 Move get_struct_char_array_ptr from helper/printk_formatter to helper/helper_utils, enable array to ptr conversion in skb_store_bytes 2025-11-05 19:10:58 +05:30
338d4994d8 Fix count_temps_in_call to only look for Pointer args of a helper_sig 2025-11-05 17:36:37 +05:30
3078d4224d Add typed scratch space support to the bpf_skb_store_bytes helper 2025-11-04 16:09:11 +05:30
7d29790f00 Make use of new get_next_temp in helpers 2025-11-04 16:02:56 +05:30
963e2a8171 Change ScratchPoolManager to use typed scratch space 2025-11-04 14:16:44 +05:30
123a92af1d Change allocation pass to generate typed temp variables 2025-11-04 06:20:39 +05:30
752f564d3f Change count_temps_in_call to return hashmap of types 2025-11-04 05:40:22 +05:30
d8cddb9799 Add signature extraction to HelperHandlerRegistry 2025-11-04 05:19:22 +05:30
33e18f6d6d Introduce HelperSignature in HelperHandlerRegistry 2025-11-03 21:21:13 +05:30
5e371787eb Fix the number of args for skb_store_bytes by making the first arg implicit 2025-11-03 21:11:16 +05:30
67c9d9b932 Fix imports for bpf_skb_store_bytes 2025-11-02 04:33:45 +05:30
f757a32a63 Implement bpf_skb_store_bytes_emitter 2025-11-02 04:32:05 +05:30
c5de92b9d0 Add BPF_SKB_STORE_BYTES to HelperIDs 2025-11-02 04:17:15 +05:30
4efd3223cd Add passing uid_gid helper test 2025-11-02 03:47:26 +05:30
4884ed7577 Fix imports for bpf_get_current_uid_gid 2025-11-02 03:35:41 +05:30
5b7769dd38 Implement bpf_get_current_uid_gid_emitter 2025-11-02 03:34:04 +05:30
b7c1e92f05 Add BPF_GET_CURRENT_UID_GID to HelperIDs 2025-11-02 03:29:02 +05:30
8b28a927c3 Add helpful TODO to PID_TGID emitter 2025-11-02 03:27:27 +05:30
f9ee43e7ef Add passing test smp_processor_id.py for helpers 2025-11-01 14:13:52 +05:30
dabb8bf0df Fix imports for BPF_GET_SMP_PROCESSOR_ID 2025-11-01 14:07:47 +05:30
19dedede53 Implement BPF_GET_SMP_PROCESSOR_ID helper 2025-11-01 14:05:50 +05:30
82cac8f8ef Add BPF_GET_SMP_PROCESSOR_ID to HelperIDs 2025-11-01 14:02:07 +05:30
70a04f54d1 Add passing test for bpf_probe_read helper 2025-11-01 13:51:08 +05:30
ec2ea835e5 Fix imports and type issues for bpf_probe_read 2025-11-01 13:50:23 +05:30
2257c175ed Implement BPF_PROBE_READ helper 2025-11-01 13:14:50 +05:30
5bf60d69b8 Add BPF_PROBE_READ to HelperIDs 2025-11-01 12:52:15 +05:30
0006e26b08 Add passing test for bpf_get_prandom_u32 implementation 2025-10-27 01:09:27 +05:30
5cbd9a531e Add bpf_get_prandom_u32 helper 2025-10-27 01:08:56 +05:30
5c1e7103a6 Add Python notebook examples for current BCC examples 2025-10-23 00:28:45 +05:30
576fa2f106 Add interactive Python notebook for hello_world BCC Example 2025-10-22 21:58:32 +05:30
76a873cb0d Update clone-matplotlib example 2025-10-22 21:47:16 +05:30
e86c6082c9 Add BCC examples and change dir structure in setup.sh 2025-10-22 21:46:45 +05:30
cb1ad15f43 Fix examples/clone_plot to use new syntax and pylibbpf API 2025-10-22 20:54:59 +05:30
b24b3ed250 Remove TypedDict from assignment_info in favour of dataclasses 2025-10-22 20:48:56 +05:30
beaad996db Fix map access syntax in examples/xdp_pass 2025-10-22 20:07:38 +05:30
99b92e44e3 Fix exapmles/kprobes to use latest pylibbpf 2025-10-22 20:04:02 +05:30
ce7adaadb6 Fix examples/hello_world to use latest pylibbpf 2025-10-22 19:57:47 +05:30
5ac316a1ac Fix examples/binops_demo.py syntax 2025-10-22 19:52:23 +05:30
7a99d21b24 Fix typo in BCC Examples README 2025-10-22 04:49:22 +05:30
cf05e4959d Add README for BCC Examples 2025-10-22 04:45:09 +05:30
a7394ccafa bump version 2025-10-22 04:18:10 +05:30
63f378c34b Merge pull request #51 from pythonbpf/bcc_examples
Port BCC tutorial examples to PythonBPF
2025-10-22 04:14:09 +05:30
37af7d2e20 Janitorial fix format 2025-10-22 04:12:42 +05:30
77c0d131be Add permission error handling in trace_pipe 2025-10-22 04:10:31 +05:30
84fdf52658 Merge branch 'master' into bcc_examples 2025-10-22 04:09:07 +05:30
f4d903d4b5 Fix create_targets_and_rvals early returns 2025-10-22 04:06:22 +05:30
f9494c870b Fix logical fallacy in get_char_array_ptr_and_size 2025-10-22 04:01:45 +05:30
0d4ebf72b6 lint readme
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-22 03:59:53 +05:30
c65900b733 Merge pull request #62 from pythonbpf/script
make script curlable
2025-10-22 03:25:31 +05:30
711e34cae1 Add script running instruction to README.md 2025-10-22 03:25:00 +05:30
cf3f4a0002 make script curlable 2025-10-22 03:21:05 +05:30
d50157fa09 Merge pull request #61 from pythonbpf/script
Add setup script
2025-10-22 02:47:03 +05:30
ba860b5039 add setup script 2025-10-22 02:44:13 +05:30
798f07986a Add enhanced vfsreadlat BCC example with live plotly and dash graphs on browser 2025-10-21 05:36:59 +05:30
caecb8c9b0 Merge pull request #59 from pythonbpf/vmlinux-handler
LGTM
2025-10-21 05:01:14 +05:30
1a0e21eaa8 support vmlinux enum in map arguments 2025-10-21 04:59:46 +05:30
e98d5684ea Add enhanced live vfsreadlat.py monitor BCC example with rich library 2025-10-21 04:31:23 +05:30
190baf2674 support vmlinux enum in printk handler 2025-10-21 04:10:25 +05:30
c07707a9ad Add vfsreadlat.py BCC example 2025-10-21 03:56:04 +05:30
c3f3d1e564 format chore 2025-10-21 03:43:22 +05:30
e7734629a5 support binary ops with vmlinux enums 2025-10-21 03:41:21 +05:30
5955db88cf add vmlinux expressions to eval expr 2025-10-21 03:24:27 +05:30
66caa3cf1d Merge pull request #58 from pythonbpf/vmlinux-handler
finish table construction for vmlinux symbol info transfer
2025-10-20 22:19:27 +05:30
e499c29d42 float vmlinux_symtab till process_func_body 2025-10-20 22:13:38 +05:30
76d0dbfbf4 change globvar string to real global variable 2025-10-20 21:36:54 +05:30
56a2fbaf5b change globvar string to real global variable 2025-10-20 21:36:46 +05:30
3b323132f0 change equality condition 2025-10-20 21:29:04 +05:30
c9363e62a9 move field name generation to assignments dict 2025-10-20 21:27:42 +05:30
a20643f3a7 move assignemnt tablegen to ir_generation.py 2025-10-20 18:41:59 +05:30
d0fecbc03c Add sync_perf_output BCC example 2025-10-20 15:43:57 +05:30
174095973b Fix userspace calling in sys_sync 2025-10-20 15:10:12 +05:30
3273620447 Fix userspace calling in sync_timing 2025-10-20 15:08:27 +05:30
610cbe82a8 Fix userspace calling in sync_count 2025-10-20 15:04:40 +05:30
54c97e648b Fix userspace calling in hello_fields 2025-10-20 15:02:31 +05:30
dd9411b7b9 Fix userspace calling in hello_world 2025-10-20 14:59:13 +05:30
aa85d0e0ef Remove unnecessary attached var in hello_perf_output 2025-10-20 14:58:50 +05:30
eee212795f add assignment dict handling to class_handler.py 2025-10-20 04:41:00 +05:30
8da50b7068 float assignments to class_handler.py 2025-10-20 04:31:23 +05:30
e636fcaea7 add assignment info class family and change how assignments are handled 2025-10-20 04:23:52 +05:30
5bba8dce12 Complete hello_perf_output BCC example 2025-10-20 04:02:34 +05:30
8c976e46ae Fix loglevel and pylibbpf import in codegen 2025-10-20 04:00:30 +05:30
5512bf52e4 add todo on struct name generator 2025-10-18 23:29:31 +05:30
079ceaa0d6 Merge pull request #57 from pythonbpf/vmlinux-debug-info
Add debug info handling to vmlinux
* Does not add support for recursive ctypes pointer based resolution
* Still does not support unions and function pointers.
* Has the mechanism to build for function pointers added.
2025-10-18 23:10:57 +05:30
328b792e4e add function pointer detection warning as well as identify ctypes non recursion error 2025-10-18 23:09:29 +05:30
5dafa5bd0d add function pointer detection warning as well as identify ctypes non recursion error 2025-10-18 22:59:01 +05:30
33aa794718 identify error in pointer to ctypes subclass dependency fixing 2025-10-18 22:48:34 +05:30
d855e9ef2e correct mistake in null pointer. Also identify error in pointer to char debug info generation 2025-10-18 22:32:03 +05:30
de19c8fc90 rename passing test 2025-10-18 22:15:58 +05:30
dc1b243e82 correct error size calculation for arrays 2025-10-18 22:13:59 +05:30
1b4272b408 members generated with wrong size calc for arrays 2025-10-18 22:02:10 +05:30
101183c315 members generated for simple ctypes 2025-10-18 21:45:26 +05:30
3a3116253f generate members with dummy types 2025-10-18 03:53:10 +05:30
9b7aa6d8be add dependency debug info list 2025-10-18 03:27:26 +05:30
60737d9894 Improve error handling in compile, pass structs_sym_tab and maps_sym_tab to BpfProgram 2025-10-17 03:25:15 +05:30
fc55b7ecaa Add passing ptr_to_char_array test for strings 2025-10-16 23:42:03 +05:30
c143739a04 Add passing test struct_field_to_var_str for strings 2025-10-16 23:21:55 +05:30
51a1be0b0b add classifiers 2025-10-16 19:09:19 +05:30
009b11aca6 Implement bpf_probe_read_kernel_str helper, Allow i8* to i8 ArrayType conversion 2025-10-15 23:52:15 +05:30
9fc3c85b75 Add struct-field to named-var assignment of char arrays 2025-10-15 20:02:18 +05:30
fd630293f7 Remove duplicate alignment logic from allocation_pass 2025-10-15 18:14:13 +05:30
81f72a7698 Support var-to-var and var-to-struct-fld allocations 2025-10-15 18:10:04 +05:30
fb480639a5 Make char array struct fields work 2025-10-15 17:27:43 +05:30
13a804f7ac Implement bpf_get_current_comm_emitter 2025-10-15 14:03:09 +05:30
a0d954b20b Register bpf_get_current_comm_emitter for comm 2025-10-15 12:47:30 +05:30
b105c70b38 Add hello_perf_output BCC-Example skeleton 2025-10-15 12:35:59 +05:30
0a1557e318 Add sync_count BCC example to use tuple-like assignment 2025-10-14 23:33:54 +05:30
c56928bc8a Add create_targets_and_rvals, use it in handle_assign to enable tuple assignment 2025-10-14 23:30:59 +05:30
dd3fc74d09 Add support for tuple style multiiassignment in allocation_pass 2025-10-14 23:06:43 +05:30
4a79f9b9b2 Add sync_count BCC-Example 2025-10-14 18:50:46 +05:30
b676a5ebb4 Fix return in BCC-Examples 2025-10-14 17:05:05 +05:30
d7329ad3d7 Add BCC sync_timing example 2025-10-14 16:07:55 +05:30
903654daff Add hello_fields BCC Example 2025-10-14 04:42:12 +05:30
263402d137 Add trace_fields 2025-10-14 04:22:17 +05:30
37d1e1b143 Add sys_sync BCC example 2025-10-14 03:54:02 +05:30
edc33733d9 Add trace_pipe utility 2025-10-14 03:51:43 +05:30
18d62d605a Add hello_world BCC example 2025-10-13 20:11:27 +05:30
62 changed files with 4492 additions and 532 deletions

20
BCC-Examples/README.md Normal file
View File

@ -0,0 +1,20 @@
## BCC examples ported to PythonBPF
This folder contains examples of BCC tutorial examples that have been ported to use **PythonBPF**.
## Requirements
- install `pythonbpf` and `pylibbpf` using pip.
- You will also need `matplotlib` for vfsreadlat.py example.
- You will also need `rich` for vfsreadlat_rich.py example.
- You will also need `plotly` and `dash` for vfsreadlat_plotly.py example.
## Usage
- You'll need root privileges to run these examples. If you are using a virtualenv, use the following command to run the scripts:
```bash
sudo <path_to_virtualenv>/bin/python3 <script_name>.py
```
- For vfsreadlat_plotly.py, run the following command to start the Dash server:
```bash
sudo <path_to_virtualenv>/bin/python3 vfsreadlat_plotly/bpf_program.py
```
Then open your web browser and navigate to the given URL.

View File

@ -0,0 +1,83 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "28cf2e27-41e2-461c-a39c-147417141a4e",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "133190e5-5a99-4585-b6e1-91224ed973c2",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello_world(ctx: c_void_p) -> c_int64:\n",
" print(\"Hello, World!\")\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3934efb-4043-4545-ae4c-c50ec40a24fd",
"metadata": {},
"outputs": [],
"source": [
"# header\n",
"print(f\"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}\")\n",
"\n",
"# format output\n",
"while True:\n",
" try:\n",
" (task, pid, cpu, flags, ts, msg) = trace_fields()\n",
" except ValueError:\n",
" continue\n",
" except KeyboardInterrupt:\n",
" exit()\n",
" print(f\"{ts:<18} {task:<16} {pid:<6} {msg}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,34 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
# header
print(f"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}")
# format output
while True:
try:
(task, pid, cpu, flags, ts, msg) = trace_fields()
except ValueError:
continue
except KeyboardInterrupt:
exit()
print(f"{ts:<18} {task:<16} {pid:<6} {msg}")

View File

@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "79b74928-f4b4-4320-96e3-d973997de2f4",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, struct, section, bpfglobal, BPF\n",
"from pythonbpf.helper import ktime, pid, comm\n",
"from pythonbpf.maps import PerfEventArray\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5bdb0329-ae2d-45e8-808e-5ed5b1374204",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@struct\n",
"class data_t:\n",
" pid: c_int64\n",
" ts: c_int64\n",
" comm: str(16)\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def events() -> PerfEventArray:\n",
" return PerfEventArray(key_size=c_int64, value_size=c_int64)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello(ctx: c_void_p) -> c_int64:\n",
" dataobj = data_t()\n",
" dataobj.pid, dataobj.ts = pid(), ktime()\n",
" comm(dataobj.comm)\n",
" events.output(dataobj)\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4bcc7d57-6cc4-48a3-bbd2-42ad6263afdf",
"metadata": {},
"outputs": [],
"source": [
"start = 0\n",
"\n",
"\n",
"def callback(cpu, event):\n",
" global start\n",
" if start == 0:\n",
" start = event.ts\n",
" ts = (event.ts - start) / 1e9\n",
" print(f\"[CPU {cpu}] PID: {event.pid}, TS: {ts}, COMM: {event.comm.decode()}\")\n",
"\n",
"\n",
"perf = b[\"events\"].open_perf_buffer(callback, struct_name=\"data_t\")\n",
"print(\"Starting to poll... (Ctrl+C to stop)\")\n",
"print(\"Try running: fork() or clone() system calls to trigger events\")\n",
"\n",
"try:\n",
" while True:\n",
" b[\"events\"].poll(1000)\n",
"except KeyboardInterrupt:\n",
" print(\"Stopping...\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,61 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid, comm
from pythonbpf.maps import PerfEventArray
from ctypes import c_void_p, c_int64
@bpf
@struct
class data_t:
pid: c_int64
ts: c_int64
comm: str(16) # type: ignore [valid-type]
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_int64, value_size=c_int64)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
dataobj = data_t()
dataobj.pid, dataobj.ts = pid(), ktime()
comm(dataobj.comm)
events.output(dataobj)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
start = 0
def callback(cpu, event):
global start
if start == 0:
start = event.ts
ts = (event.ts - start) / 1e9
print(f"[CPU {cpu}] PID: {event.pid}, TS: {ts}, COMM: {event.comm.decode()}")
perf = b["events"].open_perf_buffer(callback, struct_name="data_t")
print("Starting to poll... (Ctrl+C to stop)")
print("Try running: fork() or clone() system calls to trigger events")
try:
while True:
b["events"].poll(1000)
except KeyboardInterrupt:
print("Stopping...")

View File

@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "7d5d3cfb-39ba-4516-9856-b3bed47a0cef",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cf1c87aa-e173-4156-8f2d-762225bc6d19",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello_world(ctx: c_void_p) -> c_int64:\n",
" print(\"Hello, World!\")\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"b = BPF()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd81383d-f75a-4269-8451-3d985d85b124",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Cache2 I/O-4716 [003] ...21 8218.000492: bpf_trace_printk: count: 11 with 4716\n",
"\n",
" Cache2 I/O-4716 [003] ...21 8218.000499: bpf_trace_printk: Hello, World!\n",
"\n",
" WebExtensions-5168 [002] ...21 8219.320392: bpf_trace_printk: count: 13 with 5168\n",
"\n",
" WebExtensions-5168 [002] ...21 8219.320399: bpf_trace_printk: Hello, World!\n",
"\n",
" python-21155 [001] ...21 8220.933716: bpf_trace_printk: count: 5 with 21155\n",
"\n",
" python-21155 [001] ...21 8220.933721: bpf_trace_printk: Hello, World!\n",
"\n",
" python-21155 [002] ...21 8221.341290: bpf_trace_printk: count: 6 with 21155\n",
"\n",
" python-21155 [002] ...21 8221.341295: bpf_trace_printk: Hello, World!\n",
"\n",
" Isolated Web Co-5462 [000] ...21 8223.095033: bpf_trace_printk: count: 7 with 5462\n",
"\n",
" Isolated Web Co-5462 [000] ...21 8223.095043: bpf_trace_printk: Hello, World!\n",
"\n",
" firefox-4542 [000] ...21 8227.760067: bpf_trace_printk: count: 8 with 4542\n",
"\n",
" firefox-4542 [000] ...21 8227.760080: bpf_trace_printk: Hello, World!\n",
"\n",
" Isolated Web Co-12404 [003] ...21 8227.917086: bpf_trace_printk: count: 7 with 12404\n",
"\n",
" Isolated Web Co-12404 [003] ...21 8227.917095: bpf_trace_printk: Hello, World!\n",
"\n"
]
}
],
"source": [
"b.load()\n",
"b.attach_all()\n",
"trace_pipe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01e1f25b-decc-425b-a1aa-a5e701082574",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,23 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
trace_pipe()

View File

@ -0,0 +1,107 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "dcab010c-f5e9-446f-9f9f-056cc794ad14",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap\n",
"\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "720797e8-9c81-4af6-a385-80f1ec4c0f15",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@map\n",
"def last() -> HashMap:\n",
" return HashMap(key=c_int64, value=c_int64, max_entries=2)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def do_trace(ctx: c_void_p) -> c_int64:\n",
" ts_key, cnt_key = 0, 1\n",
" tsp, cntp = last.lookup(ts_key), last.lookup(cnt_key)\n",
" if not cntp:\n",
" last.update(cnt_key, 0)\n",
" cntp = last.lookup(cnt_key)\n",
" if tsp:\n",
" delta = ktime() - tsp\n",
" if delta < 1000000000:\n",
" time_ms = delta // 1000000\n",
" print(f\"{time_ms} {cntp}\")\n",
" last.delete(ts_key)\n",
" else:\n",
" last.update(ts_key, ktime())\n",
" last.update(cnt_key, cntp + 1)\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78a8b82c-7c5f-43c1-9de1-cd982a0f345b",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing for quick sync's... Ctrl-C to end\")\n",
"\n",
"# format output\n",
"start = 0\n",
"while True:\n",
" try:\n",
" task, pid, cpu, flags, ts, msg = trace_fields()\n",
" if start == 0:\n",
" start = ts\n",
" ts -= start\n",
" ms, cnt = msg.split()\n",
" print(f\"At time {ts} s: Multiple syncs detected, last {ms} ms ago. Count {cnt}\")\n",
" except KeyboardInterrupt:\n",
" exit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,58 @@
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=2)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
ts_key, cnt_key = 0, 1
tsp, cntp = last.lookup(ts_key), last.lookup(cnt_key)
if not cntp:
last.update(cnt_key, 0)
cntp = last.lookup(cnt_key)
if tsp:
delta = ktime() - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"{time_ms} {cntp}")
last.delete(ts_key)
else:
last.update(ts_key, ktime())
last.update(cnt_key, cntp + 1)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing for quick sync's... Ctrl-C to end")
# format output
start = 0
while True:
try:
task, pid, cpu, flags, ts, msg = trace_fields()
if start == 0:
start = ts
ts -= start
ms, cnt = msg.split()
print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago. Count {cnt}")
except KeyboardInterrupt:
exit()

View File

@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b0d1ab05-0c1f-4578-9c1b-568202b95a5c",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, struct, section, bpfglobal, BPF\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap, PerfEventArray\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85e50d0a-f9d8-468f-8e03-f5f7128f05d8",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@struct\n",
"class data_t:\n",
" ts: c_int64\n",
" ms: c_int64\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def events() -> PerfEventArray:\n",
" return PerfEventArray(key_size=c_int64, value_size=c_int64)\n",
"\n",
"\n",
"@bpf\n",
"@map\n",
"def last() -> HashMap:\n",
" return HashMap(key=c_int64, value=c_int64, max_entries=1)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def do_trace(ctx: c_void_p) -> c_int64:\n",
" dat, dat.ts, key = data_t(), ktime(), 0\n",
" tsp = last.lookup(key)\n",
" if tsp:\n",
" delta = ktime() - tsp\n",
" if delta < 1000000000:\n",
" dat.ms = delta // 1000000\n",
" events.output(dat)\n",
" last.delete(key)\n",
" else:\n",
" last.update(key, ktime())\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40bb1107-369f-4be7-9f10-37201900c16b",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing for quick sync's... Ctrl-C to end\")\n",
"\n",
"# format output\n",
"start = 0\n",
"\n",
"\n",
"def callback(cpu, event):\n",
" global start\n",
" if start == 0:\n",
" start = event.ts\n",
" event.ts -= start\n",
" print(\n",
" f\"At time {event.ts / 1e9} s: Multiple sync detected, Last sync: {event.ms} ms ago\"\n",
" )\n",
"\n",
"\n",
"perf = b[\"events\"].open_perf_buffer(callback, struct_name=\"data_t\")\n",
"print(\"Starting to poll... (Ctrl+C to stop)\")\n",
"print(\"Try running: fork() or clone() system calls to trigger events\")\n",
"\n",
"try:\n",
" while True:\n",
" b[\"events\"].poll(1000)\n",
"except KeyboardInterrupt:\n",
" print(\"Stopping...\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94a588d9-3a40-437c-a35b-fc40410f3eb7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,77 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_int64
@bpf
@struct
class data_t:
ts: c_int64
ms: c_int64
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_int64, value_size=c_int64)
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
dat, dat.ts, key = data_t(), ktime(), 0
tsp = last.lookup(key)
if tsp:
delta = ktime() - tsp
if delta < 1000000000:
dat.ms = delta // 1000000
events.output(dat)
last.delete(key)
else:
last.update(key, ktime())
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing for quick sync's... Ctrl-C to end")
# format output
start = 0
def callback(cpu, event):
global start
if start == 0:
start = event.ts
event.ts -= start
print(
f"At time {event.ts / 1e9} s: Multiple sync detected, Last sync: {event.ms} ms ago"
)
perf = b["events"].open_perf_buffer(callback, struct_name="data_t")
print("Starting to poll... (Ctrl+C to stop)")
print("Try running: fork() or clone() system calls to trigger events")
try:
while True:
b["events"].poll(1000)
except KeyboardInterrupt:
print("Stopping...")

View File

@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bfe01ceb-2f27-41b3-b3ba-50ec65cfddda",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields\n",
"from pythonbpf.helper import ktime\n",
"from pythonbpf.maps import HashMap\n",
"\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddb115f4-20a7-43bc-bb5b-ccbfd6031fc2",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@map\n",
"def last() -> HashMap:\n",
" return HashMap(key=c_int64, value=c_int64, max_entries=1)\n",
"\n",
"\n",
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def do_trace(ctx: c_void_p) -> c_int64:\n",
" key = 0\n",
" tsp = last.lookup(key)\n",
" if tsp:\n",
" delta = ktime() - tsp\n",
" if delta < 1000000000:\n",
" time_ms = delta // 1000000\n",
" print(f\"{time_ms}\")\n",
" last.delete(key)\n",
" else:\n",
" last.update(key, ktime())\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4f46574-9fd8-46e7-9c7b-27a36d07f200",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing for quick sync's... Ctrl-C to end\")\n",
"\n",
"# format output\n",
"start = 0\n",
"while True:\n",
" try:\n",
" task, pid, cpu, flags, ts, ms = trace_fields()\n",
" if start == 0:\n",
" start = ts\n",
" ts -= start\n",
" print(f\"At time {ts} s: Multiple syncs detected, last {ms} ms ago\")\n",
" except KeyboardInterrupt:\n",
" exit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,53 @@
from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields
from pythonbpf.helper import ktime
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last.lookup(key)
if tsp:
delta = ktime() - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"{time_ms}")
last.delete(key)
else:
last.update(key, ktime())
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing for quick sync's... Ctrl-C to end")
# format output
start = 0
while True:
try:
task, pid, cpu, flags, ts, ms = trace_fields()
if start == 0:
start = ts
ts -= start
print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago")
except KeyboardInterrupt:
exit()

View File

@ -0,0 +1,73 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bb49598f-b9cc-4ea8-8391-923cad513711",
"metadata": {},
"outputs": [],
"source": [
"from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe\n",
"from ctypes import c_void_p, c_int64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5da237b0-1c7d-4ec5-8c24-696b1c1d97fa",
"metadata": {},
"outputs": [],
"source": [
"@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_sync\")\n",
"def hello_world(ctx: c_void_p) -> c_int64:\n",
" print(\"sys_sync() called\")\n",
" return 0\n",
"\n",
"\n",
"@bpf\n",
"@bpfglobal\n",
"def LICENSE() -> str:\n",
" return \"GPL\"\n",
"\n",
"\n",
"# Compile and load\n",
"b = BPF()\n",
"b.load()\n",
"b.attach_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e4c218ac-fe47-4fd1-a27b-c07e02f3cd05",
"metadata": {},
"outputs": [],
"source": [
"print(\"Tracing sys_sync()... Ctrl-C to end.\")\n",
"trace_pipe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

23
BCC-Examples/sys_sync.py Normal file
View File

@ -0,0 +1,23 @@
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def hello_world(ctx: c_void_p) -> c_int64:
print("sys_sync() called")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
print("Tracing sys_sync()... Ctrl-C to end.")
trace_pipe()

File diff suppressed because one or more lines are too long

127
BCC-Examples/vfsreadlat.py Normal file
View File

@ -0,0 +1,127 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_uint64
import matplotlib.pyplot as plt
import numpy as np
@bpf
@struct
class latency_event:
pid: c_uint64
delta_us: c_uint64 # Latency in microseconds
@bpf
@map
def start() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
def do_entry(ctx: c_void_p) -> c_uint64:
p, ts = pid(), ktime()
start.update(p, ts)
return 0 # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
def do_return(ctx: c_void_p) -> c_uint64:
p = pid()
tsp = start.lookup(p)
if tsp:
delta_ns = ktime() - tsp
# Only track if latency > 1 microsecond
if delta_ns > 1000:
evt = latency_event()
evt.pid, evt.delta_us = p, delta_ns // 1000
events.output(evt)
start.delete(p)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Load BPF
print("Loading BPF program...")
b = BPF()
b.load()
b.attach_all()
# Collect latencies
latencies = []
def callback(cpu, event):
latencies.append(event.delta_us)
b["events"].open_perf_buffer(callback, struct_name="latency_event")
print("Tracing vfs_read latency... Hit Ctrl-C to end.")
try:
while True:
b["events"].poll(1000)
if len(latencies) > 0 and len(latencies) % 1000 == 0:
print(f"Collected {len(latencies)} samples...")
except KeyboardInterrupt:
print(f"Collected {len(latencies)} samples. Generating histogram...")
# Create histogram with matplotlib
if latencies:
# Use log scale for better visualization
log_latencies = np.log2(latencies)
plt.figure(figsize=(12, 6))
# Plot 1: Linear histogram
plt.subplot(1, 2, 1)
plt.hist(latencies, bins=50, edgecolor="black", alpha=0.7)
plt.xlabel("Latency (microseconds)")
plt.ylabel("Count")
plt.title("VFS Read Latency Distribution (Linear)")
plt.grid(True, alpha=0.3)
# Plot 2: Log2 histogram (like BCC)
plt.subplot(1, 2, 2)
plt.hist(log_latencies, bins=50, edgecolor="black", alpha=0.7, color="orange")
plt.xlabel("log2(Latency in µs)")
plt.ylabel("Count")
plt.title("VFS Read Latency Distribution (Log2)")
plt.grid(True, alpha=0.3)
# Add statistics
print("Statistics:")
print(f" Count: {len(latencies)}")
print(f" Min: {min(latencies)} µs")
print(f" Max: {max(latencies)} µs")
print(f" Mean: {np.mean(latencies):.2f} µs")
print(f" Median: {np.median(latencies):.2f} µs")
print(f" P95: {np.percentile(latencies, 95):.2f} µs")
print(f" P99: {np.percentile(latencies, 99):.2f} µs")
plt.tight_layout()
plt.savefig("vfs_read_latency.png", dpi=150)
print("Histogram saved to vfs_read_latency.png")
plt.show()
else:
print("No samples collected!")

View File

@ -0,0 +1,101 @@
"""BPF program for tracing VFS read latency."""
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_uint64
import argparse
from data_collector import LatencyCollector
from dashboard import LatencyDashboard
@bpf
@struct
class latency_event:
pid: c_uint64
delta_us: c_uint64
@bpf
@map
def start() -> HashMap:
"""Map to store start timestamps by PID."""
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
def events() -> PerfEventArray:
"""Perf event array for sending latency events to userspace."""
return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
def do_entry(ctx: c_void_p) -> c_uint64:
"""Record start time when vfs_read is called."""
p, ts = pid(), ktime()
start.update(p, ts)
return 0 # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
def do_return(ctx: c_void_p) -> c_uint64:
"""Calculate and record latency when vfs_read returns."""
p = pid()
tsp = start.lookup(p)
if tsp:
delta_ns = ktime() - tsp
# Only track latencies > 1 microsecond
if delta_ns > 1000:
evt = latency_event()
evt.pid, evt.delta_us = p, delta_ns // 1000
events.output(evt)
start.delete(p)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Monitor VFS read latency with live dashboard"
)
parser.add_argument(
"--host", default="0.0.0.0", help="Dashboard host (default: 0.0.0.0)"
)
parser.add_argument(
"--port", type=int, default=8050, help="Dashboard port (default: 8050)"
)
parser.add_argument(
"--buffer", type=int, default=10000, help="Recent data buffer size"
)
return parser.parse_args()
args = parse_args()
# Load BPF program
print("Loading BPF program...")
b = BPF()
b.load()
b.attach_all()
print("✅ BPF program loaded and attached")
# Setup data collector
collector = LatencyCollector(b, buffer_size=args.buffer)
collector.start()
# Create and run dashboard
dashboard = LatencyDashboard(collector)
dashboard.run(host=args.host, port=args.port)

View File

@ -0,0 +1,282 @@
"""Plotly Dash dashboard for visualizing latency data."""
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
class LatencyDashboard:
"""Interactive dashboard for latency visualization."""
def __init__(self, collector, title: str = "VFS Read Latency Monitor"):
self.collector = collector
self.app = dash.Dash(__name__)
self.app.title = title
self._setup_layout()
self._setup_callbacks()
def _setup_layout(self):
"""Create dashboard layout."""
self.app.layout = html.Div(
[
html.H1(
"🔥 VFS Read Latency Dashboard",
style={
"textAlign": "center",
"color": "#2c3e50",
"marginBottom": 20,
},
),
# Stats cards
html.Div(
[
self._create_stat_card(
"total-samples", "📊 Total Samples", "#3498db"
),
self._create_stat_card(
"mean-latency", "⚡ Mean Latency", "#e74c3c"
),
self._create_stat_card(
"p99-latency", "🔥 P99 Latency", "#f39c12"
),
],
style={
"display": "flex",
"justifyContent": "space-around",
"marginBottom": 30,
},
),
# Graphs - ✅ Make sure these IDs match the callback outputs
dcc.Graph(id="dual-histogram", style={"height": "450px"}),
dcc.Graph(id="log2-buckets", style={"height": "350px"}),
dcc.Graph(id="timeseries-graph", style={"height": "300px"}),
# Auto-update
dcc.Interval(id="interval-component", interval=1000, n_intervals=0),
],
style={"padding": 20, "fontFamily": "Arial, sans-serif"},
)
def _create_stat_card(self, id_name: str, title: str, color: str):
"""Create a statistics card."""
return html.Div(
[
html.H3(title, style={"color": color}),
html.H2(id=id_name, style={"fontSize": 48, "color": "#2c3e50"}),
],
className="stat-box",
style={
"background": "white",
"padding": 20,
"borderRadius": 10,
"boxShadow": "0 4px 6px rgba(0,0,0,0.1)",
"textAlign": "center",
"flex": 1,
"margin": "0 10px",
},
)
def _setup_callbacks(self):
"""Setup dashboard callbacks."""
@self.app.callback(
[
Output("total-samples", "children"),
Output("mean-latency", "children"),
Output("p99-latency", "children"),
Output("dual-histogram", "figure"), # ✅ Match layout IDs
Output("log2-buckets", "figure"), # ✅ Match layout IDs
Output("timeseries-graph", "figure"), # ✅ Match layout IDs
],
[Input("interval-component", "n_intervals")],
)
def update_dashboard(n):
stats = self.collector.get_stats()
if stats.total == 0:
return self._empty_state()
return (
f"{stats.total:,}",
f"{stats.mean:.1f} µs",
f"{stats.p99:.1f} µs",
self._create_dual_histogram(),
self._create_log2_buckets(),
self._create_timeseries(),
)
def _empty_state(self):
"""Return empty state for dashboard."""
empty_fig = go.Figure()
empty_fig.update_layout(
title="Waiting for data... Generate some disk I/O!", template="plotly_white"
)
# ✅ Return 6 values (3 stats + 3 figures)
return "0", "0 µs", "0 µs", empty_fig, empty_fig, empty_fig
def _create_dual_histogram(self) -> go.Figure:
"""Create side-by-side linear and log2 histograms."""
latencies = self.collector.get_all_latencies()
# Create subplots
fig = make_subplots(
rows=1,
cols=2,
subplot_titles=("Linear Scale", "Log2 Scale"),
horizontal_spacing=0.12,
)
# Linear histogram
fig.add_trace(
go.Histogram(
x=latencies,
nbinsx=50,
marker_color="rgb(55, 83, 109)",
opacity=0.75,
name="Linear",
),
row=1,
col=1,
)
# Log2 histogram
log2_latencies = np.log2(latencies + 1) # +1 to avoid log2(0)
fig.add_trace(
go.Histogram(
x=log2_latencies,
nbinsx=30,
marker_color="rgb(243, 156, 18)",
opacity=0.75,
name="Log2",
),
row=1,
col=2,
)
# Update axes
fig.update_xaxes(title_text="Latency (µs)", row=1, col=1)
fig.update_xaxes(title_text="log2(Latency in µs)", row=1, col=2)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Count", row=1, col=2)
fig.update_layout(
title_text="📊 Latency Distribution (Linear vs Log2)",
template="plotly_white",
showlegend=False,
height=450,
)
return fig
def _create_log2_buckets(self) -> go.Figure:
"""Create bar chart of log2 buckets (like BCC histogram)."""
buckets = self.collector.get_histogram_buckets()
if not buckets:
fig = go.Figure()
fig.update_layout(
title="🔥 Log2 Histogram - Waiting for data...", template="plotly_white"
)
return fig
# Sort buckets
sorted_buckets = sorted(buckets.keys())
counts = [buckets[b] for b in sorted_buckets]
# Create labels (e.g., "8-16µs", "16-32µs")
labels = []
hover_text = []
for bucket in sorted_buckets:
lower = 2**bucket
upper = 2 ** (bucket + 1)
labels.append(f"{lower}-{upper}")
# Calculate percentage
total = sum(counts)
pct = (buckets[bucket] / total) * 100 if total > 0 else 0
hover_text.append(
f"Range: {lower}-{upper} µs<br>"
f"Count: {buckets[bucket]:,}<br>"
f"Percentage: {pct:.2f}%"
)
# Create bar chart
fig = go.Figure()
fig.add_trace(
go.Bar(
x=labels,
y=counts,
marker=dict(
color=counts,
colorscale="YlOrRd",
showscale=True,
colorbar=dict(title="Count"),
),
text=counts,
textposition="outside",
hovertext=hover_text,
hoverinfo="text",
)
)
fig.update_layout(
title="🔥 Log2 Histogram (BCC-style buckets)",
xaxis_title="Latency Range (µs)",
yaxis_title="Count",
template="plotly_white",
height=350,
xaxis=dict(tickangle=-45),
)
return fig
def _create_timeseries(self) -> go.Figure:
"""Create time series figure."""
recent = self.collector.get_recent_latencies()
if not recent:
fig = go.Figure()
fig.update_layout(
title="⏱️ Real-time Latency - Waiting for data...",
template="plotly_white",
)
return fig
times = [d["time"] for d in recent]
lats = [d["latency"] for d in recent]
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=times,
y=lats,
mode="lines",
line=dict(color="rgb(231, 76, 60)", width=2),
fill="tozeroy",
fillcolor="rgba(231, 76, 60, 0.2)",
)
)
fig.update_layout(
title="⏱️ Real-time Latency (Last 10,000 samples)",
xaxis_title="Time (seconds)",
yaxis_title="Latency (µs)",
template="plotly_white",
height=300,
)
return fig
def run(self, host: str = "0.0.0.0", port: int = 8050, debug: bool = False):
"""Run the dashboard server."""
print(f"\n{'=' * 60}")
print(f"🚀 Dashboard running at: http://{host}:{port}")
print(" Access from your browser to see live graphs")
print(
" Generate disk I/O to see data: dd if=/dev/zero of=/tmp/test bs=1M count=100"
)
print(f"{'=' * 60}\n")
self.app.run(debug=debug, host=host, port=port)

View File

@ -0,0 +1,96 @@
"""Data collection and management."""
import threading
import time
import numpy as np
from collections import deque
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class LatencyStats:
"""Statistics computed from latency data."""
total: int = 0
mean: float = 0.0
median: float = 0.0
min: float = 0.0
max: float = 0.0
p95: float = 0.0
p99: float = 0.0
@classmethod
def from_array(cls, data: np.ndarray) -> "LatencyStats":
"""Compute stats from numpy array."""
if len(data) == 0:
return cls()
return cls(
total=len(data),
mean=float(np.mean(data)),
median=float(np.median(data)),
min=float(np.min(data)),
max=float(np.max(data)),
p95=float(np.percentile(data, 95)),
p99=float(np.percentile(data, 99)),
)
class LatencyCollector:
"""Collects and manages latency data from BPF."""
def __init__(self, bpf_object, buffer_size: int = 10000):
self.bpf = bpf_object
self.all_latencies: List[float] = []
self.recent_latencies = deque(maxlen=buffer_size) # type: ignore [var-annotated]
self.start_time = time.time()
self._lock = threading.Lock()
self._poll_thread = None
def callback(self, cpu: int, event):
"""Callback for BPF events."""
with self._lock:
self.all_latencies.append(event.delta_us)
self.recent_latencies.append(
{"time": time.time() - self.start_time, "latency": event.delta_us}
)
def start(self):
"""Start collecting data."""
self.bpf["events"].open_perf_buffer(self.callback, struct_name="latency_event")
def poll_loop():
while True:
self.bpf["events"].poll(100)
self._poll_thread = threading.Thread(target=poll_loop, daemon=True)
self._poll_thread.start()
print("✅ Data collection started")
def get_all_latencies(self) -> np.ndarray:
"""Get all latencies as numpy array."""
with self._lock:
return np.array(self.all_latencies) if self.all_latencies else np.array([])
def get_recent_latencies(self) -> List[Dict]:
"""Get recent latencies with timestamps."""
with self._lock:
return list(self.recent_latencies)
def get_stats(self) -> LatencyStats:
"""Compute current statistics."""
return LatencyStats.from_array(self.get_all_latencies())
def get_histogram_buckets(self) -> Dict[int, int]:
"""Get log2 histogram buckets."""
latencies = self.get_all_latencies()
if len(latencies) == 0:
return {}
log_buckets = np.floor(np.log2(latencies + 1)).astype(int)
buckets = {} # type: ignore [var-annotated]
for bucket in log_buckets:
buckets[bucket] = buckets.get(bucket, 0) + 1
return buckets

View File

@ -0,0 +1,178 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import HashMap, PerfEventArray
from ctypes import c_void_p, c_uint64
from rich.console import Console
from rich.live import Live
from rich.table import Table
from rich.panel import Panel
from rich.layout import Layout
import numpy as np
import threading
import time
from collections import Counter
# ==================== BPF Setup ====================
@bpf
@struct
class latency_event:
pid: c_uint64
delta_us: c_uint64
@bpf
@map
def start() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_uint64, value_size=c_uint64)
@bpf
@section("kprobe/vfs_read")
def do_entry(ctx: c_void_p) -> c_uint64:
p, ts = pid(), ktime()
start.update(p, ts)
return 0 # type: ignore [return-value]
@bpf
@section("kretprobe/vfs_read")
def do_return(ctx: c_void_p) -> c_uint64:
p = pid()
tsp = start.lookup(p)
if tsp:
delta_ns = ktime() - tsp
if delta_ns > 1000:
evt = latency_event()
evt.pid, evt.delta_us = p, delta_ns // 1000
events.output(evt)
start.delete(p)
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
console = Console()
console.print("[bold green]Loading BPF program...[/]")
b = BPF()
b.load()
b.attach_all()
# ==================== Data Collection ====================
all_latencies = []
histogram_buckets = Counter() # type: ignore [var-annotated]
def callback(cpu, event):
all_latencies.append(event.delta_us)
# Create log2 bucket
bucket = int(np.floor(np.log2(event.delta_us + 1)))
histogram_buckets[bucket] += 1
b["events"].open_perf_buffer(callback, struct_name="latency_event")
def poll_events():
while True:
b["events"].poll(100)
poll_thread = threading.Thread(target=poll_events, daemon=True)
poll_thread.start()
# ==================== Live Display ====================
def generate_display():
layout = Layout()
layout.split_column(
Layout(name="header", size=3),
Layout(name="stats", size=8),
Layout(name="histogram", size=20),
)
# Header
layout["header"].update(
Panel("[bold cyan]🔥 VFS Read Latency Monitor[/]", style="bold white on blue")
)
# Stats
if len(all_latencies) > 0:
lats = np.array(all_latencies)
stats_table = Table(show_header=False, box=None, padding=(0, 2))
stats_table.add_column(style="bold cyan")
stats_table.add_column(style="bold yellow")
stats_table.add_row("📊 Total Samples:", f"{len(lats):,}")
stats_table.add_row("⚡ Mean Latency:", f"{np.mean(lats):.2f} µs")
stats_table.add_row("📉 Min Latency:", f"{np.min(lats):.2f} µs")
stats_table.add_row("📈 Max Latency:", f"{np.max(lats):.2f} µs")
stats_table.add_row("🎯 P95 Latency:", f"{np.percentile(lats, 95):.2f} µs")
stats_table.add_row("🔥 P99 Latency:", f"{np.percentile(lats, 99):.2f} µs")
layout["stats"].update(
Panel(stats_table, title="Statistics", border_style="green")
)
else:
layout["stats"].update(
Panel("[yellow]Waiting for data...[/]", border_style="yellow")
)
# Histogram
if histogram_buckets:
hist_table = Table(title="Latency Distribution", box=None)
hist_table.add_column("Range", style="cyan", no_wrap=True)
hist_table.add_column("Count", justify="right", style="yellow")
hist_table.add_column("Distribution", style="green")
max_count = max(histogram_buckets.values())
for bucket in sorted(histogram_buckets.keys()):
count = histogram_buckets[bucket]
lower = 2**bucket
upper = 2 ** (bucket + 1)
# Create bar
bar_width = int((count / max_count) * 40)
bar = "" * bar_width
hist_table.add_row(
f"{lower:5d}-{upper:5d} µs",
f"{count:6d}",
f"[green]{bar}[/] {count / len(all_latencies) * 100:.1f}%",
)
layout["histogram"].update(Panel(hist_table, border_style="green"))
return layout
try:
with Live(generate_display(), refresh_per_second=2, console=console) as live:
while True:
time.sleep(0.5)
live.update(generate_display())
except KeyboardInterrupt:
console.print("\n[bold red]Stopping...[/]")
if all_latencies:
console.print(f"\n[bold green]✅ Collected {len(all_latencies):,} samples[/]")

View File

@ -40,6 +40,12 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses
---
## Try It Out!
Run
```bash
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
```
## Installation
Dependencies:

View File

@ -21,17 +21,17 @@ def last() -> HashMap:
@section("tracepoint/syscalls/sys_enter_execve")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last().lookup(key)
tsp = last.lookup(key)
if tsp:
kt = ktime()
delta = kt - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"Execve syscall entered within last second, last {time_ms} ms ago")
last().delete(key)
last.delete(key)
else:
kt = ktime()
last().update(key, kt)
last.update(key, kt)
return c_int64(0)

File diff suppressed because one or more lines are too long

View File

@ -3,7 +3,6 @@ import time
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.helper import pid
from pythonbpf.maps import HashMap
from pylibbpf import BpfMap
from ctypes import c_void_p, c_int64, c_uint64, c_int32
import matplotlib.pyplot as plt
@ -26,14 +25,14 @@ def hist() -> HashMap:
def hello(ctx: c_void_p) -> c_int64:
process_id = pid()
one = 1
prev = hist().lookup(process_id)
prev = hist.lookup(process_id)
if prev:
previous_value = prev + 1
print(f"count: {previous_value} with {process_id}")
hist().update(process_id, previous_value)
hist.update(process_id, previous_value)
return c_int64(0)
else:
hist().update(process_id, one)
hist.update(process_id, one)
return c_int64(0)
@ -44,12 +43,12 @@ def LICENSE() -> str:
b = BPF()
b.load_and_attach()
hist = BpfMap(b, hist)
b.load()
b.attach_all()
print("Recording")
time.sleep(10)
counts = list(hist.values())
counts = list(b["hist"].values())
plt.hist(counts, bins=20)
plt.xlabel("Clone calls per PID")

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, section, bpfglobal, BPF
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
# Instructions to how to run this program
@ -21,10 +21,6 @@ def LICENSE() -> str:
b = BPF()
b.load_and_attach()
if b.is_loaded() and b.is_attached():
print("Successfully loaded and attached")
else:
print("Could not load successfully")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of the execve syscall.
b.load()
b.attach_all()
trace_pipe()

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, section, bpfglobal, BPF
from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe
from ctypes import c_void_p, c_int64
@ -23,7 +23,7 @@ def LICENSE() -> str:
b = BPF()
b.load_and_attach()
while True:
print("running")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of unlink kprobe.
b.load()
b.attach_all()
print("running")
trace_pipe()

View File

@ -23,14 +23,14 @@ def count() -> HashMap:
def hello_world(ctx: c_void_p) -> c_int64:
key = 0
one = 1
prev = count().lookup(key)
prev = count.lookup(key)
if prev:
prevval = prev + 1
print(f"count: {prevval}")
count().update(key, prevval)
count.update(key, prevval)
return XDP_PASS
else:
count().update(key, one)
count.update(key, one)
return XDP_PASS

View File

@ -4,12 +4,26 @@ build-backend = "setuptools.build_meta"
[project]
name = "pythonbpf"
version = "0.1.5"
version = "0.1.6"
description = "Reduced Python frontend for eBPF"
authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
{ name = "varun-r-mallya", email="varunrmallya@gmail.com" }
]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: System :: Operating System Kernels :: Linux",
]
readme = "README.md"
license = {text = "Apache-2.0"}
requires-python = ">=3.8"

View File

@ -1,5 +1,6 @@
from .decorators import bpf, map, section, bpfglobal, struct
from .codegen import compile_to_ir, compile, BPF
from .utils import trace_pipe, trace_fields
__all__ = [
"bpf",
@ -10,4 +11,6 @@ __all__ = [
"compile_to_ir",
"compile",
"BPF",
"trace_pipe",
"trace_fields",
]

View File

@ -5,6 +5,7 @@ from llvmlite import ir
from dataclasses import dataclass
from typing import Any
from pythonbpf.helper import HelperHandlerRegistry
from .expr import VmlinuxHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir
logger = logging.getLogger(__name__)
@ -22,44 +23,77 @@ class LocalSymbol:
yield self.metadata
def create_targets_and_rvals(stmt):
"""Create lists of targets and right-hand values from an assignment statement."""
if isinstance(stmt.targets[0], ast.Tuple):
if not isinstance(stmt.value, ast.Tuple):
logger.warning("Mismatched multi-target assignment, skipping allocation")
return [], []
targets, rvals = stmt.targets[0].elts, stmt.value.elts
if len(targets) != len(rvals):
logger.warning("length of LHS != length of RHS, skipping allocation")
return [], []
return targets, rvals
return stmt.targets, [stmt.value]
def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
"""Handle memory allocation for assignment statements."""
# Validate assignment
if len(stmt.targets) != 1:
logger.warning("Multi-target assignment not supported, skipping allocation")
return
logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}")
target = stmt.targets[0]
# NOTE: Support multi-target assignments (e.g.: a, b = 1, 2)
targets, rvals = create_targets_and_rvals(stmt)
# Skip non-name targets (e.g., struct field assignments)
if isinstance(target, ast.Attribute):
logger.debug(f"Struct field assignment to {target.attr}, no allocation needed")
return
for target, rval in zip(targets, rvals):
# Skip non-name targets (e.g., struct field assignments)
if isinstance(target, ast.Attribute):
logger.debug(
f"Struct field assignment to {target.attr}, no allocation needed"
)
continue
if not isinstance(target, ast.Name):
logger.warning(f"Unsupported assignment target type: {type(target).__name__}")
return
if not isinstance(target, ast.Name):
logger.warning(
f"Unsupported assignment target type: {type(target).__name__}"
)
continue
var_name = target.id
rval = stmt.value
var_name = target.id
# Skip if already allocated
if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping")
return
# Skip if already allocated
if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping")
continue
# Determine type and allocate based on rval
if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp):
_allocate_for_binop(builder, var_name, local_sym_tab)
else:
logger.warning(
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
)
# When allocating a variable, check if it's a vmlinux struct type
if isinstance(
stmt.value, ast.Name
) and VmlinuxHandlerRegistry.is_vmlinux_struct(stmt.value.id):
# Handle vmlinux struct allocation
# This requires more implementation
print(stmt.value)
pass
# Determine type and allocate based on rval
if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp):
_allocate_for_binop(builder, var_name, local_sym_tab)
elif isinstance(rval, ast.Name):
# Variable-to-variable assignment (b = a)
_allocate_for_name(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.Attribute):
# Struct field-to-variable assignment (a = dat.fld)
_allocate_for_attribute(
builder, var_name, rval, local_sym_tab, structs_sym_tab
)
else:
logger.warning(
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
)
def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab):
@ -165,14 +199,115 @@ def _allocate_for_binop(builder, var_name, local_sym_tab):
logger.info(f"Pre-allocated {var_name} for binop result")
def _get_type_name(ir_type):
"""Get a string representation of an IR type."""
if isinstance(ir_type, ir.IntType):
return f"i{ir_type.width}"
elif isinstance(ir_type, ir.PointerType):
return "ptr"
elif isinstance(ir_type, ir.ArrayType):
return f"[{ir_type.count}x{_get_type_name(ir_type.element)}]"
else:
return str(ir_type).replace(" ", "")
def allocate_temp_pool(builder, max_temps, local_sym_tab):
"""Allocate the temporary scratch space pool for helper arguments."""
if max_temps == 0:
if not max_temps:
logger.info("No temp pool allocation needed")
return
logger.info(f"Allocating temp pool of {max_temps} variables")
for i in range(max_temps):
temp_name = f"__helper_temp_{i}"
temp_var = builder.alloca(ir.IntType(64), name=temp_name)
temp_var.align = 8
local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64))
for tmp_type, cnt in max_temps.items():
type_name = _get_type_name(tmp_type)
logger.info(f"Allocating temp pool of {cnt} variables of type {type_name}")
for i in range(cnt):
temp_name = f"__helper_temp_{type_name}_{i}"
temp_var = builder.alloca(tmp_type, name=temp_name)
temp_var.align = _get_alignment(tmp_type)
local_sym_tab[temp_name] = LocalSymbol(temp_var, tmp_type)
logger.debug(f"Allocated temp variable: {temp_name}")
def _allocate_for_name(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable-to-variable assignment (b = a)."""
source_var = rval.id
if source_var not in local_sym_tab:
logger.error(f"Source variable '{source_var}' not found in symbol table")
return
# Get type and metadata from source variable
source_symbol = local_sym_tab[source_var]
# Allocate with same type and alignment
var = _allocate_with_type(builder, var_name, source_symbol.ir_type)
local_sym_tab[var_name] = LocalSymbol(
var, source_symbol.ir_type, source_symbol.metadata
)
logger.info(
f"Pre-allocated {var_name} from {source_var} with type {source_symbol.ir_type}"
)
def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab):
"""Allocate memory for struct field-to-variable assignment (a = dat.fld)."""
if not isinstance(rval.value, ast.Name):
logger.warning(f"Complex attribute access not supported for {var_name}")
return
struct_var = rval.value.id
field_name = rval.attr
# Validate struct and field
if struct_var not in local_sym_tab:
logger.error(f"Struct variable '{struct_var}' not found")
return
struct_type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab:
logger.error(f"Struct type '{struct_type}' not found")
return
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
return
# Get field type
field_type = struct_info.field_type(field_name)
# Special case: char array -> allocate as i8* pointer instead
if (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
alloc_type = ir.PointerType(ir.IntType(8))
logger.info(f"Allocating {var_name} as i8* (pointer to char array)")
else:
alloc_type = field_type
var = _allocate_with_type(builder, var_name, alloc_type)
local_sym_tab[var_name] = LocalSymbol(var, alloc_type)
logger.info(
f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {alloc_type}"
)
def _allocate_with_type(builder, var_name, ir_type):
"""Allocate variable with appropriate alignment for type."""
var = builder.alloca(ir_type, name=var_name)
var.align = _get_alignment(ir_type)
return var
def _get_alignment(ir_type):
"""Get appropriate alignment for IR type."""
if isinstance(ir_type, ir.IntType):
return ir_type.width // 8
elif isinstance(ir_type, ir.ArrayType) and isinstance(ir_type.element, ir.IntType):
return ir_type.element.width // 8
else:
return 8 # Default: pointer size

View File

@ -2,6 +2,7 @@ import ast
import logging
from llvmlite import ir
from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call
logger = logging.getLogger(__name__)
@ -27,27 +28,82 @@ def handle_struct_field_assignment(
# Get field pointer and evaluate value
field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name)
val = eval_expr(
field_type = struct_info.field_type(field_name)
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val is None:
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}.{field_name}")
return
# TODO: Handle string assignment to char array (not a priority)
field_type = struct_info.field_type(field_name)
if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
logger.warning(
f"String to char array assignment not implemented for {var_name}.{field_name}"
val, val_type = val_result
# Special case: i8* string to [N x i8] char array
if _is_char_array(field_type) and _is_i8_ptr(val_type):
_copy_string_to_char_array(
func,
module,
builder,
val,
field_ptr,
field_type,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
logger.info(f"Copied string to char array {var_name}.{field_name}")
return
# Store the value
builder.store(val[0], field_ptr)
# Regular assignment
builder.store(val, field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
def _copy_string_to_char_array(
func,
module,
builder,
src_ptr,
dst_ptr,
array_type,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
):
"""Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str"""
array_size = array_type.count
# Get pointer to first element: [N x i8]* -> i8*
dst_i8_ptr = builder.gep(
dst_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
# Use the shared emitter function
emit_probe_read_kernel_str_call(builder, dst_i8_ptr, array_size, src_ptr)
def _is_char_array(ir_type):
"""Check if type is [N x i8]."""
return (
isinstance(ir_type, ir.ArrayType)
and isinstance(ir_type.element, ir.IntType)
and ir_type.element.width == 8
)
def _is_i8_ptr(ir_type):
"""Check if type is i8*."""
return (
isinstance(ir_type, ir.PointerType)
and isinstance(ir_type.pointee, ir.IntType)
and ir_type.pointee.width == 8
)
def handle_variable_assignment(
func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
@ -71,6 +127,17 @@ def handle_variable_assignment(
logger.info(f"Initialized struct {struct_name} for variable {var_name}")
return True
# Special case: struct field char array -> pointer
# Handle this before eval_expr to get the pointer, not the value
if isinstance(rval, ast.Attribute) and isinstance(rval.value, ast.Name):
converted_val = _try_convert_char_array_to_ptr(
rval, var_type, builder, local_sym_tab, structs_sym_tab
)
if converted_val is not None:
builder.store(converted_val, var_ptr)
logger.info(f"Assigned char array pointer to {var_name}")
return True
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
@ -106,3 +173,52 @@ def handle_variable_assignment(
builder.store(val, var_ptr)
logger.info(f"Assigned value to variable {var_name}")
return True
def _try_convert_char_array_to_ptr(
rval, var_type, builder, local_sym_tab, structs_sym_tab
):
"""Try to convert char array field to i8* pointer"""
# Only convert if target is i8*
if not (
isinstance(var_type, ir.PointerType)
and isinstance(var_type.pointee, ir.IntType)
and var_type.pointee.width == 8
):
return None
struct_var = rval.value.id
field_name = rval.attr
# Validate struct
if struct_var not in local_sym_tab:
return None
struct_type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab:
return None
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
return None
field_type = struct_info.field_type(field_name)
# Check if it's a char array
if not (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
return None
# Get pointer to struct field
struct_ptr = local_sym_tab[struct_var].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# GEP to first element: [N x i8]* -> i8*
return builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)

View File

@ -5,6 +5,8 @@ from .functions import func_proc
from .maps import maps_proc
from .structs import structs_proc
from .vmlinux_parser import vmlinux_proc
from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler
from .expr import VmlinuxHandlerRegistry
from .globals_pass import (
globals_list_creation,
globals_processing,
@ -15,7 +17,7 @@ import os
import subprocess
import inspect
from pathlib import Path
from pylibbpf import BpfProgram
from pylibbpf import BpfObject
import tempfile
from logging import Logger
import logging
@ -23,7 +25,7 @@ import re
logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.5"
VERSION = "v0.1.6"
def finalize_module(original_str):
@ -55,16 +57,20 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}")
vmlinux_proc(tree, module)
vmlinux_symtab = vmlinux_proc(tree, module)
if vmlinux_symtab:
handler = VmlinuxHandler.initialize(vmlinux_symtab)
VmlinuxHandlerRegistry.set_handler(handler)
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
globals_list_creation(tree, module)
return structs_sym_tab, map_sym_tab
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
@ -90,7 +96,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
True,
)
processor(source, filename, module)
structs_sym_tab, maps_sym_tab = processor(source, filename, module)
wchar_size = module.add_metadata(
[
@ -139,7 +145,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
f.write(module_string)
f.write("\n")
return output
return output, structs_sym_tab, maps_sym_tab
def _run_llc(ll_file, obj_file):
@ -169,7 +175,7 @@ def _run_llc(ll_file, obj_file):
return False
def compile(loglevel=logging.INFO) -> bool:
def compile(loglevel=logging.WARNING) -> bool:
# Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
@ -177,18 +183,19 @@ def compile(loglevel=logging.INFO) -> bool:
ll_file = Path("/tmp") / caller_file.with_suffix(".ll").name
o_file = caller_file.with_suffix(".o")
success = True
success = (
compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success
_, structs_sym_tab, maps_sym_tab = compile_to_ir(
str(caller_file), str(ll_file), loglevel=loglevel
)
success = _run_llc(ll_file, o_file) and success
if not _run_llc(ll_file, o_file):
logger.error("Compilation to object file failed.")
return False
logger.info(f"Object written to {o_file}")
return success
return True
def BPF(loglevel=logging.INFO) -> BpfProgram:
def BPF(loglevel=logging.WARNING) -> BpfObject:
caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile(
@ -201,7 +208,9 @@ def BPF(loglevel=logging.INFO) -> BpfProgram:
f.write(src)
f.flush()
source = f.name
compile_to_ir(source, str(inter.name), loglevel=loglevel)
_, structs_sym_tab, maps_sym_tab = compile_to_ir(
source, str(inter.name), loglevel=loglevel
)
_run_llc(str(inter.name), str(obj_file.name))
return BpfProgram(str(obj_file.name))
return BpfObject(str(obj_file.name), structs=structs_sym_tab)

View File

@ -81,6 +81,20 @@ class DebugInfoGenerator:
},
)
def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any:
"""Create an array type of the given base type with specified count"""
base_type, type_sizing = type_info
subrange = self.module.add_debug_info("DISubrange", {"count": count})
return self.module.add_debug_info(
"DICompositeType",
{
"tag": dc.DW_TAG_array_type,
"baseType": base_type,
"size": type_sizing,
"elements": [subrange],
},
)
@staticmethod
def _compute_array_size(base_type: Any, count: int) -> int:
# Extract size from base_type if possible
@ -101,6 +115,23 @@ class DebugInfoGenerator:
},
)
def create_struct_member_vmlinux(
self, name: str, base_type_with_size: Any, offset: int
) -> Any:
"""Create a struct member with the given name, type, and offset"""
base_type, type_size = base_type_with_size
return self.module.add_debug_info(
"DIDerivedType",
{
"tag": dc.DW_TAG_member,
"name": name,
"file": self.module._file_metadata,
"baseType": base_type,
"size": type_size,
"offset": offset,
},
)
def create_struct_type(
self, members: List[Any], size: int, is_distinct: bool
) -> Any:
@ -116,6 +147,22 @@ class DebugInfoGenerator:
is_distinct=is_distinct,
)
def create_struct_type_with_name(
self, name: str, members: List[Any], size: int, is_distinct: bool
) -> Any:
"""Create a struct type with the given members and size"""
return self.module.add_debug_info(
"DICompositeType",
{
"name": name,
"tag": dc.DW_TAG_structure_type,
"file": self.module._file_metadata,
"size": size,
"elements": members,
},
is_distinct=is_distinct,
)
def create_global_var_debug_info(
self, name: str, var_type: Any, is_local: bool = False
) -> Any:

View File

@ -2,6 +2,7 @@ from .expr_pass import eval_expr, handle_expr, get_operand_value
from .type_normalization import convert_to_bool, get_base_type_and_depth
from .ir_ops import deref_to_depth
from .call_registry import CallHandlerRegistry
from .vmlinux_registry import VmlinuxHandlerRegistry
__all__ = [
"eval_expr",
@ -11,4 +12,5 @@ __all__ = [
"deref_to_depth",
"get_operand_value",
"CallHandlerRegistry",
"VmlinuxHandlerRegistry",
]

View File

@ -12,6 +12,7 @@ from .type_normalization import (
get_base_type_and_depth,
deref_to_depth,
)
from .vmlinux_registry import VmlinuxHandlerRegistry
logger: Logger = logging.getLogger(__name__)
@ -27,8 +28,12 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder
val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type
else:
logger.info(f"Undefined variable {expr.id}")
return None
# Check if it's a vmlinux enum/constant
vmlinux_result = VmlinuxHandlerRegistry.handle_name(expr.id)
if vmlinux_result is not None:
return vmlinux_result
raise SyntaxError(f"Undefined variable {expr.id}")
def _handle_constant_expr(module, builder, expr: ast.Constant):
@ -74,6 +79,13 @@ def _handle_attribute_expr(
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
# Try vmlinux handler as fallback
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
return None
@ -130,7 +142,12 @@ def get_operand_value(
logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}")
val = deref_to_depth(func, builder, var, depth)
return val
raise ValueError(f"Undefined variable: {operand.id}")
else:
# Check if it's a vmlinux enum/constant
vmlinux_result = VmlinuxHandlerRegistry.handle_name(operand.id)
if vmlinux_result is not None:
val, _ = vmlinux_result
return val
elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int):
cst = ir.Constant(ir.IntType(64), int(operand.value))
@ -332,6 +349,7 @@ def _handle_unary_op(
neg_one = ir.Constant(ir.IntType(64), -1)
result = builder.mul(operand, neg_one)
return result, ir.IntType(64)
return None
# ============================================================================

View File

@ -0,0 +1,45 @@
import ast
class VmlinuxHandlerRegistry:
"""Registry for vmlinux handler operations"""
_handler = None
@classmethod
def set_handler(cls, handler):
"""Set the vmlinux handler"""
cls._handler = handler
@classmethod
def get_handler(cls):
"""Get the vmlinux handler"""
return cls._handler
@classmethod
def handle_name(cls, name):
"""Try to handle a name as vmlinux enum/constant"""
if cls._handler is None:
return None
return cls._handler.handle_vmlinux_enum(name)
@classmethod
def handle_attribute(cls, expr, local_sym_tab, module, builder):
"""Try to handle an attribute access as vmlinux struct field"""
if cls._handler is None:
return None
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
field_name = expr.attr
return cls._handler.handle_vmlinux_struct_field(
var_name, field_name, module, builder, local_sym_tab
)
return None
@classmethod
def is_vmlinux_struct(cls, name):
"""Check if a name refers to a vmlinux struct"""
if cls._handler is None:
return False
return cls._handler.is_vmlinux_struct(name)

View File

@ -12,7 +12,11 @@ from pythonbpf.assign_pass import (
handle_variable_assignment,
handle_struct_field_assignment,
)
from pythonbpf.allocation_pass import handle_assign_allocation, allocate_temp_pool
from pythonbpf.allocation_pass import (
handle_assign_allocation,
allocate_temp_pool,
create_targets_and_rvals,
)
from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name
from .function_metadata import get_probe_string, is_global_function, infer_return_type
@ -29,7 +33,7 @@ logger = logging.getLogger(__name__)
def count_temps_in_call(call_node, local_sym_tab):
"""Count the number of temporary variables needed for a function call."""
count = 0
count = {}
is_helper = False
# NOTE: We exclude print calls for now
@ -39,21 +43,28 @@ def count_temps_in_call(call_node, local_sym_tab):
and call_node.func.id != "print"
):
is_helper = True
func_name = call_node.func.id
elif isinstance(call_node.func, ast.Attribute):
if HelperHandlerRegistry.has_handler(call_node.func.attr):
is_helper = True
func_name = call_node.func.attr
if not is_helper:
return 0
return {} # No temps needed
for arg in call_node.args:
for arg_idx in range(len(call_node.args)):
# NOTE: Count all non-name arguments
# For struct fields, if it is being passed as an argument,
# The struct object should already exist in the local_sym_tab
if not isinstance(arg, ast.Name) and not (
arg = call_node.args[arg_idx]
if isinstance(arg, ast.Name) or (
isinstance(arg, ast.Attribute) and arg.value.id in local_sym_tab
):
count += 1
continue
param_type = HelperHandlerRegistry.get_param_type(func_name, arg_idx)
if isinstance(param_type, ir.PointerType):
pointee_type = param_type.pointee
count[pointee_type] = count.get(pointee_type, 0) + 1
return count
@ -89,11 +100,15 @@ def handle_if_allocation(
def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
max_temps_needed = 0
max_temps_needed = {}
def merge_type_counts(count_dict):
nonlocal max_temps_needed
for typ, cnt in count_dict.items():
max_temps_needed[typ] = max(max_temps_needed.get(typ, 0), cnt)
def update_max_temps_for_stmt(stmt):
nonlocal max_temps_needed
temps_needed = 0
if isinstance(stmt, ast.If):
for s in stmt.body:
@ -102,10 +117,13 @@ def allocate_mem(
update_max_temps_for_stmt(s)
return
stmt_temps = {}
for node in ast.walk(stmt):
if isinstance(node, ast.Call):
temps_needed += count_temps_in_call(node, local_sym_tab)
max_temps_needed = max(max_temps_needed, temps_needed)
call_temps = count_temps_in_call(node, local_sym_tab)
for typ, cnt in call_temps.items():
stmt_temps[typ] = stmt_temps.get(typ, 0) + cnt
merge_type_counts(stmt_temps)
for stmt in body:
update_max_temps_for_stmt(stmt)
@ -140,48 +158,43 @@ def handle_assign(
):
"""Handle assignment statements in the function body."""
# TODO: Support this later
# GH #37
if len(stmt.targets) != 1:
logger.error("Multi-target assignment is not supported for now")
return
# NOTE: Support multi-target assignments (e.g.: a, b = 1, 2)
targets, rvals = create_targets_and_rvals(stmt)
target = stmt.targets[0]
rval = stmt.value
for target, rval in zip(targets, rvals):
if isinstance(target, ast.Name):
# NOTE: Simple variable assignment case: x = 5
var_name = target.id
result = handle_variable_assignment(
func,
module,
builder,
var_name,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if not result:
logger.error(f"Failed to handle assignment to {var_name}")
continue
if isinstance(target, ast.Name):
# NOTE: Simple variable assignment case: x = 5
var_name = target.id
result = handle_variable_assignment(
func,
module,
builder,
var_name,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if not result:
logger.error(f"Failed to handle assignment to {var_name}")
return
if isinstance(target, ast.Attribute):
# NOTE: Struct field assignment case: pkt.field = value
handle_struct_field_assignment(
func,
module,
builder,
target,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
continue
if isinstance(target, ast.Attribute):
# NOTE: Struct field assignment case: pkt.field = value
handle_struct_field_assignment(
func,
module,
builder,
target,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
return
# Unsupported target type
logger.error(f"Unsupported assignment target: {ast.dump(target)}")
# Unsupported target type
logger.error(f"Unsupported assignment target: {ast.dump(target)}")
def handle_cond(
@ -311,7 +324,13 @@ def process_stmt(
def process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
module,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
):
"""Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now
@ -384,7 +403,13 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
builder = ir.IRBuilder(block)
process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
module,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
)
return func

View File

@ -1,7 +1,20 @@
from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import handle_helper_call
from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS
from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call
from .helpers import (
ktime,
pid,
deref,
comm,
probe_read_str,
random,
probe_read,
smp_processor_id,
uid,
skb_store_bytes,
XDP_DROP,
XDP_PASS,
)
# Register the helper handler with expr module
@ -59,9 +72,17 @@ __all__ = [
"HelperHandlerRegistry",
"reset_scratch_pool",
"handle_helper_call",
"emit_probe_read_kernel_str_call",
"ktime",
"pid",
"deref",
"comm",
"probe_read_str",
"random",
"probe_read",
"smp_processor_id",
"uid",
"skb_store_bytes",
"XDP_DROP",
"XDP_PASS",
]

View File

@ -7,6 +7,9 @@ from .helper_utils import (
get_or_create_ptr_from_arg,
get_flags_val,
get_data_ptr_and_size,
get_buffer_ptr_and_size,
get_ptr_from_arg,
get_int_value_from_arg,
)
from .printk_formatter import simple_string_print, handle_fstring_print
@ -20,13 +23,24 @@ class BPFHelperID(Enum):
BPF_MAP_LOOKUP_ELEM = 1
BPF_MAP_UPDATE_ELEM = 2
BPF_MAP_DELETE_ELEM = 3
BPF_PROBE_READ = 4
BPF_KTIME_GET_NS = 5
BPF_PRINTK = 6
BPF_GET_PRANDOM_U32 = 7
BPF_GET_SMP_PROCESSOR_ID = 8
BPF_SKB_STORE_BYTES = 9
BPF_GET_CURRENT_PID_TGID = 14
BPF_GET_CURRENT_UID_GID = 15
BPF_GET_CURRENT_COMM = 16
BPF_PERF_EVENT_OUTPUT = 25
BPF_PROBE_READ_KERNEL_STR = 115
@HelperHandlerRegistry.register("ktime")
@HelperHandlerRegistry.register(
"ktime",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_ktime_get_ns_emitter(
call,
map_ptr,
@ -49,7 +63,11 @@ def bpf_ktime_get_ns_emitter(
return result, ir.IntType(64)
@HelperHandlerRegistry.register("lookup")
@HelperHandlerRegistry.register(
"lookup",
param_types=[ir.PointerType(ir.IntType(64))],
return_type=ir.PointerType(ir.IntType(64)),
)
def bpf_map_lookup_elem_emitter(
call,
map_ptr,
@ -91,6 +109,7 @@ def bpf_map_lookup_elem_emitter(
return result, ir.PointerType()
# NOTE: This has special handling so we won't reflect the signature here.
@HelperHandlerRegistry.register("print")
def bpf_printk_emitter(
call,
@ -139,7 +158,15 @@ def bpf_printk_emitter(
return True
@HelperHandlerRegistry.register("update")
@HelperHandlerRegistry.register(
"update",
param_types=[
ir.PointerType(ir.IntType(64)),
ir.PointerType(ir.IntType(64)),
ir.IntType(64),
],
return_type=ir.PointerType(ir.IntType(64)),
)
def bpf_map_update_elem_emitter(
call,
map_ptr,
@ -194,7 +221,11 @@ def bpf_map_update_elem_emitter(
return result, None
@HelperHandlerRegistry.register("delete")
@HelperHandlerRegistry.register(
"delete",
param_types=[ir.PointerType(ir.IntType(64))],
return_type=ir.PointerType(ir.IntType(64)),
)
def bpf_map_delete_elem_emitter(
call,
map_ptr,
@ -234,7 +265,72 @@ def bpf_map_delete_elem_emitter(
return result, None
@HelperHandlerRegistry.register("pid")
@HelperHandlerRegistry.register(
"comm",
param_types=[ir.PointerType(ir.IntType(8))],
return_type=ir.IntType(64),
)
def bpf_get_current_comm_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_comm helper function call.
Accepts: comm(dataobj.field) or comm(my_buffer)
"""
if not call.args or len(call.args) != 1:
raise ValueError(
f"comm expects exactly one argument (buffer), got {len(call.args)}"
)
buf_arg = call.args[0]
# Extract buffer pointer and size
buf_ptr, buf_size = get_buffer_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab
)
# Validate it's a char array
if not isinstance(
buf_ptr.type.pointee, ir.ArrayType
) or buf_ptr.type.pointee.element != ir.IntType(8):
raise ValueError(
f"comm expects a char array buffer, got {buf_ptr.type.pointee}"
)
# Cast to void* and call helper
buf_void_ptr = builder.bitcast(buf_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32)],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_COMM.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr, [buf_void_ptr, ir.Constant(ir.IntType(32), buf_size)], tail=False
)
logger.info(f"Emitted bpf_get_current_comm with {buf_size} byte buffer")
return result, None
@HelperHandlerRegistry.register(
"pid",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_get_current_pid_tgid_emitter(
call,
map_ptr,
@ -256,12 +352,17 @@ def bpf_get_current_pid_tgid_emitter(
result = builder.call(fn_ptr, [], tail=False)
# Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF
# TODO: return both PID and TGID if we end up needing TGID somewhere
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
pid = builder.and_(result, mask)
return pid, ir.IntType(64)
@HelperHandlerRegistry.register("output")
@HelperHandlerRegistry.register(
"output",
param_types=[ir.PointerType(ir.IntType(8))],
return_type=ir.IntType(64),
)
def bpf_perf_event_output_handler(
call,
map_ptr,
@ -309,6 +410,332 @@ def bpf_perf_event_output_handler(
return result, None
def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel_str"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL_STR.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})")
return result
@HelperHandlerRegistry.register(
"probe_read_str",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_kernel_str_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel_str helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_str expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"random",
param_types=[],
return_type=ir.IntType(32),
)
def bpf_get_prandom_u32_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_prandom_u32 helper function call.
"""
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_PRANDOM_U32.value)
fn_type = ir.FunctionType(ir.IntType(32), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
return result, ir.IntType(32)
@HelperHandlerRegistry.register(
"probe_read",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.IntType(32),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_probe_read helper function
"""
if len(call.args) != 3:
logger.warn("Expected 3 args for probe_read helper")
return
dst_ptr = get_or_create_ptr_from_arg(
func,
module,
call.args[0],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.IntType(8),
)
size_val = get_int_value_from_arg(
call.args[1],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
src_ptr = get_or_create_ptr_from_arg(
func,
module,
call.args[2],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
ir.IntType(8),
)
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
builder.trunc(size_val, ir.IntType(32)),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read (size={size_val})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register(
"smp_processor_id",
param_types=[],
return_type=ir.IntType(32),
)
def bpf_get_smp_processor_id_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_smp_processor_id helper function call.
"""
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_SMP_PROCESSOR_ID.value)
fn_type = ir.FunctionType(ir.IntType(32), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
logger.info("Emitted bpf_get_smp_processor_id call")
return result, ir.IntType(32)
@HelperHandlerRegistry.register(
"uid",
param_types=[],
return_type=ir.IntType(64),
)
def bpf_get_current_uid_gid_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_get_current_uid_gid helper function call.
"""
helper_id = ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_UID_GID.value)
fn_type = ir.FunctionType(ir.IntType(64), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
# Extract the lower 32 bits (UID) using bitwise AND with 0xFFFFFFFF
# TODO: return both UID and GID if we end up needing GID somewhere
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
pid = builder.and_(result, mask)
return pid, ir.IntType(64)
@HelperHandlerRegistry.register(
"skb_store_bytes",
param_types=[
ir.IntType(32),
ir.PointerType(ir.IntType(8)),
ir.IntType(32),
ir.IntType(64),
],
return_type=ir.IntType(64),
)
def bpf_skb_store_bytes_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""
Emit LLVM IR for bpf_skb_store_bytes helper function call.
Expected call signature: skb_store_bytes(skb, offset, from, len, flags)
"""
args_signature = [
ir.PointerType(), # skb pointer
ir.IntType(32), # offset
ir.PointerType(), # from
ir.IntType(32), # len
ir.IntType(64), # flags
]
if len(call.args) not in (3, 4):
raise ValueError(
f"skb_store_bytes expects 3 or 4 args (offset, from, len, flags), got {len(call.args)}"
)
skb_ptr = func.args[0] # First argument to the function is skb
offset_val = get_int_value_from_arg(
call.args[0],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
from_ptr = get_or_create_ptr_from_arg(
func,
module,
call.args[1],
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
args_signature[2],
)
len_val = get_int_value_from_arg(
call.args[2],
func,
module,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
)
if len(call.args) == 4:
flags_val = get_flags_val(call.args[3], builder, local_sym_tab)
else:
flags_val = 0
flags = ir.Constant(ir.IntType(64), flags_val)
fn_type = ir.FunctionType(
ir.IntType(64),
args_signature,
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_SKB_STORE_BYTES.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(skb_ptr, ir.PointerType()),
builder.trunc(offset_val, ir.IntType(32)),
builder.bitcast(from_ptr, ir.PointerType()),
builder.trunc(len_val, ir.IntType(32)),
flags,
],
tail=False,
)
logger.info("Emitted bpf_skb_store_bytes call")
return result, ir.IntType(64)
def handle_helper_call(
call,
module,

View File

@ -1,17 +1,31 @@
from dataclasses import dataclass
from llvmlite import ir
from typing import Callable
@dataclass
class HelperSignature:
"""Signature of a BPF helper function"""
arg_types: list[ir.Type]
return_type: ir.Type
func: Callable
class HelperHandlerRegistry:
"""Registry for BPF helpers"""
_handlers: dict[str, Callable] = {}
_handlers: dict[str, HelperSignature] = {}
@classmethod
def register(cls, helper_name):
def register(cls, helper_name, param_types=None, return_type=None):
"""Decorator to register a handler function for a helper"""
def decorator(func):
cls._handlers[helper_name] = func
helper_sig = HelperSignature(
arg_types=param_types, return_type=return_type, func=func
)
cls._handlers[helper_name] = helper_sig
return func
return decorator
@ -19,9 +33,29 @@ class HelperHandlerRegistry:
@classmethod
def get_handler(cls, helper_name):
"""Get the handler function for a helper"""
return cls._handlers.get(helper_name)
handler = cls._handlers.get(helper_name)
return handler.func if handler else None
@classmethod
def has_handler(cls, helper_name):
"""Check if a handler function is registered for a helper"""
return helper_name in cls._handlers
@classmethod
def get_signature(cls, helper_name):
"""Get the signature of a helper function"""
return cls._handlers.get(helper_name)
@classmethod
def get_param_type(cls, helper_name, index):
"""Get the type of a parameter of a helper function by the index"""
signature = cls.get_signature(helper_name)
if signature and signature.arg_types and 0 <= index < len(signature.arg_types):
return signature.arg_types[index]
return None
@classmethod
def get_return_type(cls, helper_name):
"""Get the return type of a helper function"""
signature = cls.get_signature(helper_name)
return signature.return_type if signature else None

View File

@ -4,6 +4,7 @@ import logging
from llvmlite import ir
from pythonbpf.expr import (
get_operand_value,
eval_expr,
)
logger = logging.getLogger(__name__)
@ -13,26 +14,43 @@ class ScratchPoolManager:
"""Manage the temporary helper variables in local_sym_tab"""
def __init__(self):
self._counter = 0
self._counters = {}
@property
def counter(self):
return self._counter
return sum(self._counters.values())
def reset(self):
self._counter = 0
self._counters.clear()
logger.debug("Scratch pool counter reset to 0")
def get_next_temp(self, local_sym_tab):
temp_name = f"__helper_temp_{self._counter}"
self._counter += 1
def _get_type_name(self, ir_type):
if isinstance(ir_type, ir.PointerType):
return "ptr"
elif isinstance(ir_type, ir.IntType):
return f"i{ir_type.width}"
elif isinstance(ir_type, ir.ArrayType):
return f"[{ir_type.count}x{self._get_type_name(ir_type.element)}]"
else:
return str(ir_type).replace(" ", "")
def get_next_temp(self, local_sym_tab, expected_type=None):
# Default to i64 if no expected type provided
type_name = self._get_type_name(expected_type) if expected_type else "i64"
if type_name not in self._counters:
self._counters[type_name] = 0
counter = self._counters[type_name]
temp_name = f"__helper_temp_{type_name}_{counter}"
self._counters[type_name] += 1
if temp_name not in local_sym_tab:
raise ValueError(
f"Scratch pool exhausted or inadequate: {temp_name}. "
f"Current counter: {self._counter}"
f"Type: {type_name} Counter: {counter}"
)
logger.debug(f"Using {temp_name} for type {type_name}")
return local_sym_tab[temp_name].var, temp_name
@ -59,24 +77,73 @@ def get_var_ptr_from_name(var_name, local_sym_tab):
def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64):
"""Create a pointer to an integer constant."""
# Default to 64-bit integer
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab)
int_type = ir.IntType(int_width)
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab, int_type)
logger.info(f"Using temp variable '{temp_name}' for int constant {value}")
const_val = ir.Constant(ir.IntType(int_width), value)
const_val = ir.Constant(int_type, value)
builder.store(const_val, ptr)
return ptr
def get_or_create_ptr_from_arg(
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab=None
func,
module,
arg,
builder,
local_sym_tab,
map_sym_tab,
struct_sym_tab=None,
expected_type=None,
):
"""Extract or create pointer from the call arguments."""
logger.info(f"Getting pointer from arg: {ast.dump(arg)}")
sz = None
if isinstance(arg, ast.Name):
# Stack space is already allocated
ptr = get_var_ptr_from_name(arg.id, local_sym_tab)
elif isinstance(arg, ast.Constant) and isinstance(arg.value, int):
ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab)
int_width = 64 # Default to i64
if expected_type and isinstance(expected_type, ir.IntType):
int_width = expected_type.width
ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab, int_width)
elif isinstance(arg, ast.Attribute):
# A struct field
struct_name = arg.value.id
field_name = arg.attr
if not local_sym_tab or struct_name not in local_sym_tab:
raise ValueError(f"Struct '{struct_name}' not found")
struct_type = local_sym_tab[struct_name].metadata
if not struct_sym_tab or struct_type not in struct_sym_tab:
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
if field_name not in struct_info.fields:
raise ValueError(
f"Field '{field_name}' not found in struct '{struct_name}'"
)
field_type = struct_info.field_type(field_name)
struct_ptr = local_sym_tab[struct_name].var
# Special handling for char arrays
if (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
ptr, sz = get_char_array_ptr_and_size(
arg, builder, local_sym_tab, struct_sym_tab
)
if not ptr:
raise ValueError("Failed to get char array pointer from struct field")
else:
ptr = struct_info.gep(builder, struct_ptr, field_name)
else:
# NOTE: For any integer expression reaching this branch, it is probably a struct field or a binop
# Evaluate the expression and store the result in a temp variable
val = get_operand_value(
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
@ -84,13 +151,20 @@ def get_or_create_ptr_from_arg(
if val is None:
raise ValueError("Failed to evaluate expression for helper arg.")
# NOTE: We assume the result is an int64 for now
# if isinstance(arg, ast.Attribute):
# return val
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab)
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab, expected_type)
logger.info(f"Using temp variable '{temp_name}' for expression result")
if (
isinstance(val.type, ir.IntType)
and expected_type
and val.type.width > expected_type.width
):
val = builder.trunc(val, expected_type)
builder.store(val, ptr)
# NOTE: For char arrays, also return size
if sz:
return ptr, sz
return ptr
@ -136,3 +210,160 @@ def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab):
raise NotImplementedError(
"Only simple object names are supported as data in perf event output."
)
def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
"""Extract buffer pointer and size from either a struct field or variable."""
# Case 1: Struct field (obj.field)
if isinstance(buf_arg, ast.Attribute):
if not isinstance(buf_arg.value, ast.Name):
raise ValueError(
"Only simple struct field access supported (e.g., obj.field)"
)
struct_name = buf_arg.value.id
field_name = buf_arg.attr
# Lookup struct
if not local_sym_tab or struct_name not in local_sym_tab:
raise ValueError(f"Struct '{struct_name}' not found")
struct_type = local_sym_tab[struct_name].metadata
if not struct_sym_tab or struct_type not in struct_sym_tab:
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
# Get field pointer and type
struct_ptr = local_sym_tab[struct_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
field_type = struct_info.field_type(field_name)
if not isinstance(field_type, ir.ArrayType):
raise ValueError(f"Field '{field_name}' must be an array type")
return field_ptr, field_type.count
# Case 2: Variable name
elif isinstance(buf_arg, ast.Name):
var_name = buf_arg.id
if not local_sym_tab or var_name not in local_sym_tab:
raise ValueError(f"Variable '{var_name}' not found")
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
if not isinstance(var_type, ir.ArrayType):
raise ValueError(f"Variable '{var_name}' must be an array type")
return var_ptr, var_type.count
else:
raise ValueError(
"comm expects either a struct field (obj.field) or variable name"
)
def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
"""Get pointer to char array and its size."""
# Struct field: obj.field
if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name):
var_name = buf_arg.value.id
field_name = buf_arg.attr
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
struct_type = local_sym_tab[var_name].metadata
if not (struct_sym_tab and struct_type in struct_sym_tab):
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found")
field_type = struct_info.field_type(field_name)
if not _is_char_array(field_type):
raise ValueError("Expected char array field")
struct_ptr = local_sym_tab[var_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# GEP to first element: [N x i8]* -> i8*
buf_ptr = builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
return buf_ptr, field_type.count
elif isinstance(buf_arg, ast.Name):
# NOTE: We shouldn't be doing this as we can't get size info
var_name = buf_arg.id
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
if not isinstance(var_type, ir.PointerType) or not isinstance(
var_type.pointee, ir.IntType(8)
):
raise ValueError("Expected str ptr variable")
return var_ptr, 256 # Size unknown for str ptr, using 256 as default
else:
raise ValueError("Expected struct field or variable name")
def _is_char_array(ir_type):
"""Check if IR type is [N x i8]."""
return (
isinstance(ir_type, ir.ArrayType)
and isinstance(ir_type.element, ir.IntType)
and ir_type.element.width == 8
)
def get_ptr_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
"""Evaluate argument and return pointer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
if not result:
raise ValueError("Failed to evaluate argument")
val, val_type = result
if not isinstance(val_type, ir.PointerType):
raise ValueError(f"Expected pointer type, got {val_type}")
return val, val_type
def get_int_value_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
"""Evaluate argument and return integer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
if not result:
raise ValueError("Failed to evaluate argument")
val, val_type = result
if not isinstance(val_type, ir.IntType):
raise ValueError(f"Expected integer type, got {val_type}")
return val

View File

@ -2,19 +2,56 @@ import ctypes
def ktime():
"""get current ktime"""
return ctypes.c_int64(0)
def pid():
"""get current process id"""
return ctypes.c_int32(0)
def deref(ptr):
"dereference a pointer"
"""dereference a pointer"""
result = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_void_p)).contents.value
return result if result is not None else 0
def comm(buf):
"""get current process command name"""
return ctypes.c_int64(0)
def probe_read_str(dst, src):
"""Safely read a null-terminated string from kernel memory"""
return ctypes.c_int64(0)
def random():
"""get a pseudorandom u32 number"""
return ctypes.c_int32(0)
def probe_read(dst, size, src):
"""Safely read data from kernel memory"""
return ctypes.c_int64(0)
def smp_processor_id():
"""get the current CPU id"""
return ctypes.c_int32(0)
def uid():
"""get current user id"""
return ctypes.c_int32(0)
def skb_store_bytes(offset, from_buf, size, flags=0):
"""store bytes into a socket buffer"""
return ctypes.c_int64(0)
XDP_ABORTED = ctypes.c_int64(0)
XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2)

View File

@ -3,6 +3,8 @@ import logging
from llvmlite import ir
from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth
from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
from pythonbpf.helper.helper_utils import get_char_array_ptr_and_size
logger = logging.getLogger(__name__)
@ -108,6 +110,16 @@ def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab):
if local_sym_tab and name_node.id in local_sym_tab:
_, var_type, tmp = local_sym_tab[name_node.id]
_populate_fval(var_type, name_node, fmt_parts, exprs)
else:
# Try to resolve through vmlinux registry if not in local symbol table
result = VmlinuxHandlerRegistry.handle_name(name_node.id)
if result:
val, var_type = result
_populate_fval(var_type, name_node, fmt_parts, exprs)
else:
raise ValueError(
f"Variable '{name_node.id}' not found in symbol table or vmlinux"
)
def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
@ -173,6 +185,15 @@ def _populate_fval(ftype, node, fmt_parts, exprs):
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
elif isinstance(ftype, ir.ArrayType):
if isinstance(ftype.element, ir.IntType) and ftype.element.width == 8:
# Char array
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported array element type in f-string: {ftype.element}"
)
else:
raise NotImplementedError(f"Unsupported field type in f-string: {ftype}")
@ -197,44 +218,54 @@ def _create_format_string_global(fmt_str, func, module, builder):
def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab):
"""Evaluate and prepare an expression to use as an arg for bpf_printk."""
val, _ = eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
None,
struct_sym_tab,
# Special case: struct field char array needs pointer to first element
char_array_ptr, _ = get_char_array_ptr_and_size(
expr, builder, local_sym_tab, struct_sym_tab
)
if char_array_ptr:
return char_array_ptr
if val:
if isinstance(val.type, ir.PointerType):
target, depth = get_base_type_and_depth(val.type)
if isinstance(target, ir.IntType):
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
val = builder.sext(val, ir.IntType(64))
elif target.width == 8 and depth == 1:
# NOTE: i8* is string, no need to deref
pass
# Regular expression evaluation
val, _ = eval_expr(func, module, builder, expr, local_sym_tab, None, struct_sym_tab)
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
elif isinstance(val.type, ir.IntType):
if val.type.width < 64:
val = builder.sext(val, ir.IntType(64))
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
return val
else:
logger.warning(
"Failed to evaluate expression for bpf_printk argument. "
"It will be converted to 0."
)
if not val:
logger.warning("Failed to evaluate expression for bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
# Convert value to bpf_printk compatible type
if isinstance(val.type, ir.PointerType):
return _handle_pointer_arg(val, func, builder)
elif isinstance(val.type, ir.IntType):
return _handle_int_arg(val, builder)
else:
logger.warning(f"Unsupported type {val.type} in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
def _handle_pointer_arg(val, func, builder):
"""Convert pointer type for bpf_printk."""
target, depth = get_base_type_and_depth(val.type)
if not isinstance(target, ir.IntType):
logger.warning("Only int pointers supported in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
# i8* is string - use as-is
if target.width == 8 and depth == 1:
return val
# Integer pointers: dereference and sign-extend to i64
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
return builder.sext(val, ir.IntType(64))
logger.warning("Unsupported pointer width in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
def _handle_int_arg(val, builder):
"""Convert integer type for bpf_printk (sign-extend to i64)."""
if val.type.width < 64:
return builder.sext(val, ir.IntType(64))
return val

View File

@ -6,6 +6,8 @@ from llvmlite import ir
from .maps_utils import MapProcessorRegistry
from .map_types import BPFMapType
from .map_debug_info import create_map_debug_info, create_ringbuf_debug_info
from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
logger: Logger = logging.getLogger(__name__)
@ -51,7 +53,7 @@ def _parse_map_params(rval, expected_args=None):
"""Parse map parameters from call arguments and keywords."""
params = {}
handler = VmlinuxHandlerRegistry.get_handler()
# Parse positional arguments
if expected_args:
for i, arg_name in enumerate(expected_args):
@ -65,7 +67,12 @@ def _parse_map_params(rval, expected_args=None):
# Parse keyword arguments (override positional)
for keyword in rval.keywords:
if isinstance(keyword.value, ast.Name):
params[keyword.arg] = keyword.value.id
name = keyword.value.id
if handler and handler.is_vmlinux_enum(name):
result = handler.get_vmlinux_enum_value(name)
params[keyword.arg] = result if result is not None else name
else:
params[keyword.arg] = name
elif isinstance(keyword.value, ast.Constant):
params[keyword.arg] = keyword.value.value

58
pythonbpf/utils.py Normal file
View File

@ -0,0 +1,58 @@
import subprocess
def trace_pipe():
"""Util to read from the trace pipe."""
try:
subprocess.run(["cat", "/sys/kernel/tracing/trace_pipe"])
except KeyboardInterrupt:
print("Tracing stopped.")
except (FileNotFoundError, PermissionError) as e:
print(f"Error accessing trace_pipe: {e}. Try running as root.")
def trace_fields():
"""Parse one line from trace_pipe into fields."""
with open("/sys/kernel/tracing/trace_pipe", "rb", buffering=0) as f:
while True:
line = f.readline().rstrip()
if not line:
continue
# Skip lost event lines
if line.startswith(b"CPU:"):
continue
# Parse BCC-style: first 16 bytes = task
task = line[:16].lstrip().decode("utf-8")
line = line[17:] # Skip past task field and space
# Find the colon that ends "pid cpu flags timestamp"
ts_end = line.find(b":")
if ts_end == -1:
raise ValueError("Cannot parse trace line")
# Split "pid [cpu] flags timestamp"
try:
parts = line[:ts_end].split()
if len(parts) < 4:
raise ValueError("Not enough fields")
pid = int(parts[0])
cpu = parts[1][1:-1] # Remove brackets from [cpu]
cpu = int(cpu)
flags = parts[2]
ts = float(parts[3])
except (ValueError, IndexError):
raise ValueError("Cannot parse trace line")
# Get message: skip ": symbol:" part
line = line[ts_end + 1 :] # Skip first ":"
sym_end = line.find(b":")
if sym_end != -1:
msg = line[sym_end + 2 :].decode("utf-8") # Skip ": " after symbol
else:
msg = line.lstrip().decode("utf-8")
return (task, pid, cpu, flags, ts, msg)

View File

@ -0,0 +1,36 @@
from enum import Enum, auto
from typing import Any, Dict, List, Optional
from dataclasses import dataclass
import llvmlite.ir as ir
from pythonbpf.vmlinux_parser.dependency_node import Field
@dataclass
class AssignmentType(Enum):
CONSTANT = auto()
STRUCT = auto()
ARRAY = auto() # probably won't be used
FUNCTION_POINTER = auto()
POINTER = auto() # again, probably won't be used
@dataclass
class FunctionSignature:
return_type: str
param_types: List[str]
varargs: bool
# Thew name of the assignment will be in the dict that uses this class
@dataclass
class AssignmentInfo:
value_type: AssignmentType
python_type: type
value: Optional[Any]
pointer_level: Optional[int]
signature: Optional[FunctionSignature] # For function pointers
# The key of the dict is the name of the field.
# Value is a tuple that contains the global variable representing that field
# along with all the information about that field as a Field type.
members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs.

View File

@ -1,6 +1,7 @@
import logging
from functools import lru_cache
import importlib
from .dependency_handler import DependencyHandler
from .dependency_node import DependencyNode
import ctypes
@ -15,7 +16,11 @@ def get_module_symbols(module_name: str):
return [name for name in dir(imported_module)], imported_module
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
def process_vmlinux_class(
node,
llvm_module,
handler: DependencyHandler,
):
symbols_in_module, imported_module = get_module_symbols("vmlinux")
if node.name in symbols_in_module:
vmlinux_type = getattr(imported_module, node.name)
@ -25,7 +30,10 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
def process_vmlinux_post_ast(
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None
elem_type_class,
llvm_handler,
handler: DependencyHandler,
processing_stack=None,
):
# Initialize processing stack on first call
if processing_stack is None:
@ -46,7 +54,7 @@ def process_vmlinux_post_ast(
logger.debug(f"Node {current_symbol_name} already processed and ready")
return True
# XXX:Check it's use. It's probably not being used.
# XXX:Check its use. It's probably not being used.
if current_symbol_name in processing_stack:
logger.debug(
f"Dependency already in processing stack for {current_symbol_name}, skipping"
@ -98,12 +106,47 @@ def process_vmlinux_post_ast(
[elem_type, elem_bitfield_size] = elem_temp_list
local_module_name = getattr(elem_type, "__module__", None)
new_dep_node.add_field(elem_name, elem_type, ready=False)
if local_module_name == ctypes.__name__:
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.debug(
f"Field {elem_name} is direct ctypes type: {elem_type}"
)
# Process pointer to ctype
if isinstance(elem_type, type) and issubclass(
elem_type, ctypes._Pointer
):
# Get the pointed-to type
pointed_type = elem_type._type_
logger.debug(f"Found pointer to type: {pointed_type}")
new_dep_node.set_field_containing_type(elem_name, pointed_type)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctypes._Pointer
)
new_dep_node.set_field_ready(elem_name, is_ready=True)
# Process function pointers (CFUNCTYPE)
elif hasattr(elem_type, "_restype_") and hasattr(
elem_type, "_argtypes_"
):
# This is a CFUNCTYPE or similar
logger.info(
f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}"
)
# Set the field as ready but mark it with special handling
new_dep_node.set_field_ctype_complex_type(
elem_name, ctypes.CFUNCTYPE
)
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.warning(
"Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported"
)
else:
# Regular ctype
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.debug(
f"Field {elem_name} is direct ctypes type: {elem_type}"
)
elif local_module_name == "vmlinux":
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
logger.debug(
@ -127,6 +170,10 @@ def process_vmlinux_post_ast(
ctype_complex_type = ctypes.Array
elif issubclass(elem_type, ctypes._Pointer):
ctype_complex_type = ctypes._Pointer
else:
raise ImportError(
"Non Array and Pointer type ctype imports not supported in current version"
)
else:
raise TypeError("Unsupported ctypes subclass")
else:
@ -188,7 +235,10 @@ def process_vmlinux_post_ast(
else str(elem_type)
)
process_vmlinux_post_ast(
elem_type, llvm_handler, handler, processing_stack
elem_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
else:
@ -199,7 +249,7 @@ def process_vmlinux_post_ast(
else:
raise ImportError("UNSUPPORTED Module")
logging.info(
logger.info(
f"{current_symbol_name} processed and handler readiness {handler.is_ready}"
)
return True

View File

@ -18,6 +18,31 @@ class Field:
value: Any = None
ready: bool = False
def __hash__(self):
"""
Create a hash based on the immutable attributes that define this field's identity.
This allows Field objects to be used as dictionary keys.
"""
# Use a tuple of the fields that uniquely identify this field
identity = (
self.name,
id(self.type), # Use id for non-hashable types
id(self.ctype_complex_type) if self.ctype_complex_type else None,
id(self.containing_type) if self.containing_type else None,
self.type_size,
self.bitfield_size,
self.offset,
self.value if self.value else None,
)
return hash(identity)
def __eq__(self, other):
"""
Define equality consistent with the hash function.
Two fields are equal if they have they are the same
"""
return self is other
def set_ready(self, is_ready: bool = True) -> None:
"""Set the readiness state of this field."""
self.ready = is_ready

View File

@ -1,9 +1,9 @@
import ast
import logging
from typing import List, Tuple, Any
import importlib
import inspect
from .assignment_info import AssignmentInfo, AssignmentType
from .dependency_handler import DependencyHandler
from .ir_gen import IRGenerator
from .class_handler import process_vmlinux_class
@ -11,7 +11,7 @@ from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__)
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
"""
Parse AST and detect import statements from vmlinux.
@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module):
# initialise dependency handler
handler = DependencyHandler()
# initialise assignment dictionary of name to type
assignments: dict[str, tuple[type, Any]] = {}
assignments: dict[str, AssignmentInfo] = {}
if not import_statements:
logger.info("No vmlinux imports found")
@ -128,20 +128,35 @@ def vmlinux_proc(tree: ast.AST, module):
f"{imported_name} not found as ClassDef or Assign in vmlinux"
)
IRGenerator(module, handler)
IRGenerator(module, handler, assignments)
return assignments
def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]):
# Check if this is a simple assignment with a constant value
def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]):
"""Process assignments from vmlinux module."""
# Only handle single-target assignments
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
target_name = node.targets[0].id
# Handle constant value assignments
if isinstance(node.value, ast.Constant):
assignments[target_name] = (type(node.value.value), node.value.value)
# Fixed: using proper TypedDict creation syntax with named arguments
assignments[target_name] = AssignmentInfo(
value_type=AssignmentType.CONSTANT,
python_type=type(node.value.value),
value=node.value.value,
pointer_level=None,
signature=None,
members=None,
)
logger.info(
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
)
# Handle other assignment types that we may need to support
else:
raise ValueError(f"Unsupported assignment type for {target_name}")
logger.warning(
f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}"
)
else:
raise ValueError("Not a simple assignment")

View File

@ -1,15 +1,161 @@
from pythonbpf.debuginfo import DebugInfoGenerator
from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc
from ..dependency_node import DependencyNode
import ctypes
import logging
from typing import List, Any, Tuple
logger = logging.getLogger(__name__)
def debug_info_generation(struct, llvm_module):
def debug_info_generation(
struct: DependencyNode,
llvm_module,
generated_debug_info: List[Tuple[DependencyNode, Any]],
) -> Any:
"""
Generate DWARF debug information for a struct defined in a DependencyNode.
Args:
struct: The dependency node containing struct information
llvm_module: The LLVM module to add debug info to
generated_debug_info: List of tuples (struct, debug_info) to track generated debug info
Returns:
The generated global variable debug info
"""
# Set up debug info generator
generator = DebugInfoGenerator(llvm_module)
# this is sample debug info generation
# i64type = generator.get_uint64_type()
struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
# Check if debug info for this struct has already been generated
for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct.name:
return debug_info
global_var = generator.create_global_var_debug_info(
struct.name, struct_type, is_local=False
# Process all fields and create members for the struct
members = []
for field_name, field in struct.fields.items():
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
if struct.name.startswith("struct_"):
struct_name = struct.name.removeprefix("struct_")
else:
raise ValueError("Unions are not supported in the current version")
# Create struct type with all members
struct_type = generator.create_struct_type_with_name(
struct_name, members, struct.__sizeof__() * 8, is_distinct=True
)
return global_var
return struct_type
def _get_field_debug_type(
field_name: str,
field,
generator: DebugInfoGenerator,
parent_struct: DependencyNode,
generated_debug_info: List[Tuple[DependencyNode, Any]],
) -> tuple[Any, int]:
"""
Determine the appropriate debug type for a field based on its Python/ctypes type.
Args:
field_name: Name of the field
field: Field object containing type information
generator: DebugInfoGenerator instance
parent_struct: The parent struct containing this field
generated_debug_info: List of already generated debug info
Returns:
The debug info type for this field
"""
# Handle complex types (arrays, pointers)
if field.ctype_complex_type is not None:
if issubclass(field.ctype_complex_type, ctypes.Array):
# Handle array types
element_type, base_type_size = _get_basic_debug_type(
field.containing_type, generator
)
return generator.create_array_type_vmlinux(
(element_type, base_type_size * field.type_size), field.type_size
), field.type_size * base_type_size
elif issubclass(field.ctype_complex_type, ctypes._Pointer):
# Handle pointer types
pointee_type, _ = _get_basic_debug_type(field.containing_type, generator)
return generator.create_pointer_type(pointee_type), 64
# Handle other vmlinux types (nested structs)
if field.type.__module__ == "vmlinux":
# If it's a struct from vmlinux, check if we've already generated debug info for it
struct_name = field.type.__name__
# Look for existing debug info in the list
for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct_name:
# Use existing debug info
return debug_info, existing_struct.__sizeof__()
# If not found, create a forward declaration
# This will be completed when the actual struct is processed
logger.warning("Forward declaration in struct created")
forward_type = generator.create_struct_type([], 0, is_distinct=True)
return forward_type, 0
# Handle basic C types
return _get_basic_debug_type(field.type, generator)
def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any:
"""
Map a ctypes type to a DWARF debug type.
Args:
ctype: A ctypes type or Python type
generator: DebugInfoGenerator instance
Returns:
The corresponding debug type
"""
# Map ctypes to debug info types
if ctype == ctypes.c_char or ctype == ctypes.c_byte:
return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8:
return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8
elif ctype == ctypes.c_short or ctype == ctypes.c_int16:
return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16
elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16:
return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16
elif ctype == ctypes.c_int or ctype == ctypes.c_int32:
return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32
elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32:
return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32
elif ctype == ctypes.c_long:
return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64
elif ctype == ctypes.c_ulong:
return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64
elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64:
return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64
elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64:
return generator.get_basic_type(
"unsigned long long", 64, dc.DW_ATE_unsigned
), 64
elif ctype == ctypes.c_float:
return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32
elif ctype == ctypes.c_double:
return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64
elif ctype == ctypes.c_bool:
return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8
elif ctype == ctypes.c_char_p:
char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
return generator.create_pointer_type(char_type)
elif ctype == ctypes.c_void_p:
return generator.create_pointer_type(None), 64
else:
return generator.get_uint64_type(), 64

View File

@ -1,5 +1,7 @@
import ctypes
import logging
from ..assignment_info import AssignmentInfo, AssignmentType
from ..dependency_handler import DependencyHandler
from .debug_info_gen import debug_info_generation
from ..dependency_node import DependencyNode
@ -10,10 +12,14 @@ logger = logging.getLogger(__name__)
class IRGenerator:
# get the assignments dict and add this stuff to it.
def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
def __init__(self, llvm_module, handler: DependencyHandler, assignments):
self.llvm_module = llvm_module
self.handler: DependencyHandler = handler
self.generated: list[str] = []
self.generated_debug_info: list = []
# Use struct_name and field_name as key instead of Field object
self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {}
self.assignments: dict[str, AssignmentInfo] = assignments
if not handler.is_ready:
raise ImportError(
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
@ -66,20 +72,61 @@ class IRGenerator:
f"Warning: Dependency {dependency} not found in handler"
)
# Actual processor logic here after dependencies are resolved
self.gen_ir(struct)
# Generate IR first to populate field names
self.generated_debug_info.append(
(struct, self.gen_ir(struct, self.generated_debug_info))
)
# Fill the assignments dictionary with struct information
if struct.name not in self.assignments:
# Create a members dictionary for AssignmentInfo
members_dict = {}
for field_name, field in struct.fields.items():
# Get the generated field name from our dictionary, or use field_name if not found
if (
struct.name in self.generated_field_names
and field_name in self.generated_field_names[struct.name]
):
field_global_variable = self.generated_field_names[struct.name][
field_name
]
members_dict[field_name] = (field_global_variable, field)
else:
raise ValueError(
f"llvm global name not found for struct field {field_name}"
)
# members_dict[field_name] = (field_name, field)
# Add struct to assignments dictionary
self.assignments[struct.name] = AssignmentInfo(
value_type=AssignmentType.STRUCT,
python_type=struct.ctype_struct,
value=None,
pointer_level=None,
signature=None,
members=members_dict,
)
logger.info(f"Added struct assignment info for {struct.name}")
self.generated.append(struct.name)
finally:
# Remove from processing stack after we're done
processing_stack.discard(struct.name)
def gen_ir(self, struct):
def gen_ir(self, struct, generated_debug_info):
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
# accepts our issue, we will resort to normal accessed attribute based attribute addition
# currently we generate all possible field accesses for CO-RE and put into the assignment table
debug_info = debug_info_generation(struct, self.llvm_module)
debug_info = debug_info_generation(
struct, self.llvm_module, generated_debug_info
)
field_index = 0
# Make sure the struct has an entry in our field names dictionary
if struct.name not in self.generated_field_names:
self.generated_field_names[struct.name] = {}
for field_name, field in struct.fields.items():
# does not take arrays and similar types into consideration yet.
if field.ctype_complex_type is not None and issubclass(
@ -89,6 +136,18 @@ class IRGenerator:
containing_type = field.containing_type
if containing_type.__module__ == ctypes.__name__:
containing_type_size = ctypes.sizeof(containing_type)
if array_size == 0:
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
continue
for i in range(0, array_size):
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size
@ -98,6 +157,7 @@ class IRGenerator:
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
elif field.type_size is not None:
array_size = field.type_size
@ -115,6 +175,7 @@ class IRGenerator:
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
else:
field_co_re_name = self._struct_name_generator(
@ -126,6 +187,8 @@ class IRGenerator:
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
return debug_info
def _struct_name_generator(
self,
@ -136,6 +199,7 @@ class IRGenerator:
index: int = 0,
containing_type_size: int = 0,
) -> str:
# TODO: Does not support Unions as well as recursive pointer and array type naming
if is_indexed:
name = (
"llvm."

View File

@ -0,0 +1,90 @@
import logging
from llvmlite import ir
from pythonbpf.vmlinux_parser.assignment_info import AssignmentType
logger = logging.getLogger(__name__)
class VmlinuxHandler:
"""Handler for vmlinux-related operations"""
_instance = None
@classmethod
def get_instance(cls):
"""Get the singleton instance"""
if cls._instance is None:
logger.warning("VmlinuxHandler used before initialization")
return None
return cls._instance
@classmethod
def initialize(cls, vmlinux_symtab):
"""Initialize the handler with vmlinux symbol table"""
cls._instance = cls(vmlinux_symtab)
return cls._instance
def __init__(self, vmlinux_symtab):
"""Initialize with vmlinux symbol table"""
self.vmlinux_symtab = vmlinux_symtab
logger.info(
f"VmlinuxHandler initialized with {len(vmlinux_symtab) if vmlinux_symtab else 0} symbols"
)
def is_vmlinux_enum(self, name):
"""Check if name is a vmlinux enum constant"""
return (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT
)
def is_vmlinux_struct(self, name):
"""Check if name is a vmlinux struct"""
return (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT
)
def handle_vmlinux_enum(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name]["value"]
logger.info(f"Resolving vmlinux enum {name} = {value}")
return ir.Constant(ir.IntType(64), value), ir.IntType(64)
return None
def get_vmlinux_enum_value(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name]["value"]
logger.info(f"The value of vmlinux enum {name} = {value}")
return value
return None
def handle_vmlinux_struct(self, struct_name, module, builder):
"""Handle vmlinux struct initializations"""
if self.is_vmlinux_struct(struct_name):
# TODO: Implement core-specific struct handling
# This will be more complex and depends on the BTF information
logger.info(f"Handling vmlinux struct {struct_name}")
# Return struct type and allocated pointer
# This is a stub, actual implementation will be more complex
return None
return None
def handle_vmlinux_struct_field(
self, struct_var_name, field_name, module, builder, local_sym_tab
):
"""Handle access to vmlinux struct fields"""
# Check if it's a variable of vmlinux struct type
if struct_var_name in local_sym_tab:
var_info = local_sym_tab[struct_var_name] # noqa: F841
# Need to check if this variable is a vmlinux struct
# This will depend on how you track vmlinux struct types in your symbol table
logger.info(
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
)
# Return pointer to field and field type
return None
return None

View File

@ -19,7 +19,7 @@ struct {
SEC("tp/syscalls/sys_enter_setuid")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
struct event data = {};
struct blk_integrity_iter it = {};
// Extract UID from the syscall arguments
data.uid = (unsigned int)ctx->args[0];
data.ts = bpf_ktime_get_ns();

View File

@ -0,0 +1,28 @@
from pythonbpf import bpf, struct, section, bpfglobal
from pythonbpf.helper import comm
from ctypes import c_void_p, c_int64
@bpf
@struct
class data_t:
comm: str(16) # type: ignore [valid-type]
copp: str(16) # type: ignore [valid-type]
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
dataobj = data_t()
comm(dataobj.comm)
strobj = dataobj.comm
dataobj.copp = strobj
print(f"clone called by comm {dataobj.copp}")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"

View File

@ -0,0 +1,26 @@
from pythonbpf import bpf, struct, section, bpfglobal
from pythonbpf.helper import comm
from ctypes import c_void_p, c_int64
@bpf
@struct
class data_t:
comm: str(16) # type: ignore [valid-type]
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
dataobj = data_t()
comm(dataobj.comm)
strobj = dataobj.comm
print(f"clone called by comm {strobj}")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"

View File

@ -0,0 +1,29 @@
from pythonbpf import bpf, section, bpfglobal, compile, struct
from ctypes import c_void_p, c_int64, c_uint64, c_uint32
from pythonbpf.helper import probe_read
@bpf
@struct
class data_t:
pid: c_uint32
value: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def test_probe_read(ctx: c_void_p) -> c_int64:
"""Test bpf_probe_read helper function"""
data = data_t()
probe_read(data.value, 8, ctx)
probe_read(data.pid, 4, ctx)
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,25 @@
from pythonbpf import bpf, bpfglobal, section, BPF, trace_pipe
from ctypes import c_void_p, c_int64
from pythonbpf.helper import random
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello_world(ctx: c_void_p) -> c_int64:
r = random()
print(f"Hello, World!, {r}")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
# Compile and load
b = BPF()
b.load()
b.attach_all()
trace_pipe()

View File

@ -0,0 +1,40 @@
from pythonbpf import bpf, section, bpfglobal, compile, struct
from ctypes import c_void_p, c_int64, c_uint32, c_uint64
from pythonbpf.helper import smp_processor_id, ktime
@bpf
@struct
class cpu_event_t:
cpu_id: c_uint32
timestamp: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def trace_with_cpu(ctx: c_void_p) -> c_int64:
"""Test bpf_get_smp_processor_id helper function"""
# Get the current CPU ID
cpu = smp_processor_id()
# Print it
print(f"Running on CPU {cpu}")
# Use it in a struct
event = cpu_event_t()
event.cpu_id = smp_processor_id()
event.timestamp = ktime()
print(f"Event on CPU {event.cpu_id} at time {event.timestamp}")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,31 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
from pythonbpf.helper import uid, pid
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def filter_by_user(ctx: c_void_p) -> c_int64:
"""Filter events by specific user ID"""
current_uid = uid()
# Only trace root user (UID 0)
if current_uid == 0:
process_id = pid()
print(f"Root process {process_id} executed")
# Or trace specific user (e.g., UID 1000)
if current_uid == 1002:
print("User 1002 executed something")
return 0
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,47 @@
import logging
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, map
from pythonbpf import compile # noqa: F401
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
from ctypes import c_uint64, c_int32, c_int64
from pythonbpf.maps import HashMap
# from vmlinux import struct_uinput_device
# from vmlinux import struct_blk_integrity_iter
@bpf
@map
def mymap() -> HashMap:
return HashMap(key=c_int32, value=c_uint64, max_entries=TASK_COMM_LEN)
@bpf
@map
def mymap2() -> HashMap:
return HashMap(key=c_int32, value=c_uint64, max_entries=18)
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python examples/simple_struct_test.py
# 3. Run the program with sudo: sudo tools/check.sh run examples/simple_struct_test.o
# 4. Attach object file to any network device with something like ./check.sh run examples/simple_struct_test.o tailscale0
# 5. send traffic through the device and observe effects
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
a = 2 + TASK_COMM_LEN + TASK_COMM_LEN
print(f"Hello, World{TASK_COMM_LEN} and {a}")
return c_int64(TASK_COMM_LEN + 2)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("simple_struct_test.py", "simple_struct_test.ll", loglevel=logging.DEBUG)
# compile()

209
tools/setup.sh Executable file
View File

@ -0,0 +1,209 @@
#!/bin/bash
print_warning() {
echo -e "\033[1;33m$1\033[0m"
}
print_info() {
echo -e "\033[1;32m$1\033[0m"
}
if [ "$EUID" -ne 0 ]; then
echo "Please run this script with sudo."
exit 1
fi
print_warning "===================================================================="
print_warning " WARNING "
print_warning " This script will run kernel-level BPF programs. "
print_warning " BPF programs run with kernel privileges and could potentially "
print_warning " affect system stability if not used properly. "
print_warning " "
print_warning " This is a non-interactive version for curl piping. "
print_warning " The script will proceed automatically with installation. "
print_warning "===================================================================="
echo
print_info "This script will:"
echo "1. Check and install required dependencies (libelf, clang, python, bpftool)"
echo "2. Download example programs from the Python-BPF GitHub repository"
echo "3. Create a Python virtual environment with necessary packages"
echo "4. Set up a Jupyter notebook server"
echo "Starting in 5 seconds. Press Ctrl+C to cancel..."
sleep 5
WORK_DIR="/tmp/python_bpf_setup"
REAL_USER=$(logname || echo "$SUDO_USER")
echo "Creating temporary directory: $WORK_DIR"
mkdir -p "$WORK_DIR"
cd "$WORK_DIR" || exit 1
if [ -f /etc/os-release ]; then
. /etc/os-release
DISTRO=$ID
else
echo "Cannot determine Linux distribution. Exiting."
exit 1
fi
install_dependencies() {
case $DISTRO in
ubuntu|debian|pop|mint|elementary|zorin)
echo "Detected Ubuntu/Debian-based system"
apt update
# Check and install libelf
if ! dpkg -l libelf-dev >/dev/null 2>&1; then
echo "Installing libelf-dev..."
apt install -y libelf-dev
else
echo "libelf-dev is already installed."
fi
# Check and install clang
if ! command -v clang >/dev/null 2>&1; then
echo "Installing clang..."
apt install -y clang
else
echo "clang is already installed."
fi
# Check and install python
if ! command -v python3 >/dev/null 2>&1; then
echo "Installing python3..."
apt install -y python3 python3-pip python3-venv
else
echo "python3 is already installed."
fi
# Check and install bpftool
if ! command -v bpftool >/dev/null 2>&1; then
echo "Installing bpftool..."
apt install -y linux-tools-common linux-tools-generic
# If bpftool still not found, try installing linux-tools-$(uname -r)
if ! command -v bpftool >/dev/null 2>&1; then
KERNEL_VERSION=$(uname -r)
apt install -y linux-tools-$KERNEL_VERSION
fi
else
echo "bpftool is already installed."
fi
;;
arch|manjaro|endeavouros)
echo "Detected Arch-based Linux system"
# Check and install libelf
if ! pacman -Q libelf >/dev/null 2>&1; then
echo "Installing libelf..."
pacman -S --noconfirm libelf
else
echo "libelf is already installed."
fi
# Check and install clang
if ! command -v clang >/dev/null 2>&1; then
echo "Installing clang..."
pacman -S --noconfirm clang
else
echo "clang is already installed."
fi
# Check and install python
if ! command -v python3 >/dev/null 2>&1; then
echo "Installing python3..."
pacman -S --noconfirm python python-pip
else
echo "python3 is already installed."
fi
# Check and install bpftool
if ! command -v bpftool >/dev/null 2>&1; then
echo "Installing bpftool..."
pacman -S --noconfirm bpf linux-headers
else
echo "bpftool is already installed."
fi
;;
*)
echo "Unsupported distribution: $DISTRO"
echo "This script only supports Ubuntu/Debian and Arch Linux derivatives."
exit 1
;;
esac
}
echo "Checking and installing dependencies..."
install_dependencies
# Download example programs
echo "Downloading example programs from Python-BPF GitHub repository..."
mkdir -p examples
cd examples || exit 1
echo "Fetching example files list..."
FILES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/examples" | grep -E -o '"path": "examples/[^"]*(\.md|\.ipynb|\.py)"' | awk -F'"' '{print $4}')
BCC_EXAMPLES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/BCC-Examples" | grep -E -o '"path": "BCC-Examples/[^"]*(\.md|\.ipynb|\.py)"' | awk -F'"' '{print $4}')
if [ -z "$FILES" ]; then
echo "Failed to fetch file list from repository. Using fallback method..."
# Fallback to downloading common example files
EXAMPLES=(
"binops_demo.py"
"blk_request.py"
"clone-matplotlib.ipynb"
"clone_plot.py"
"hello_world.py"
"kprobes.py"
"struct_and_perf.py"
"sys_sync.py"
"xdp_pass.py"
)
for example in "${EXAMPLES[@]}"; do
echo "Downloading: $example"
curl -s -O "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/examples/$example"
done
else
mkdir examples && cd examples
for file in $FILES; do
filename=$(basename "$file")
echo "Downloading: $filename"
curl -s -o "$filename" "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/$file"
done
cd ..
mkdir BCC-Examples && cd BCC-Examples
for file in $BCC_EXAMPLES; do
filename=$(basename "$file")
echo "Downloading: $filename"
curl -s -o "$filename" "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/$file"
done
cd ..
fi
cd "$WORK_DIR" || exit 1
chown -R "$REAL_USER:$(id -gn "$REAL_USER")" .
echo "Creating Python virtual environment..."
su - "$REAL_USER" -c "cd \"$WORK_DIR\" && python3 -m venv venv"
echo "Installing Python packages..."
su - "$REAL_USER" -c "cd \"$WORK_DIR\" && source venv/bin/activate && pip install --upgrade pip && pip install jupyter pythonbpf pylibbpf matplotlib"
cat > "$WORK_DIR/start_jupyter.sh" << EOF
#!/bin/bash
cd "$WORK_DIR"
source venv/bin/activate
cd examples
sudo ../venv/bin/python -m notebook --ip=0.0.0.0 --allow-root
EOF
chmod +x "$WORK_DIR/start_jupyter.sh"
chown "$REAL_USER:$(id -gn "$REAL_USER")" "$WORK_DIR/start_jupyter.sh"
print_info "========================================================"
print_info "Setup complete! To start Jupyter Notebook, run:"
print_info "$ sudo $WORK_DIR/start_jupyter.sh"
print_info "========================================================"