60 Commits

Author SHA1 Message Date
8c2196c05c bump version
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-09-26 22:48:17 +05:30
a2f86d680d Merge pull request #4 from varun-r-mallya/type_system
Type system and strings
2025-09-26 18:27:10 +05:30
0f365be65e Add some support for strings in structs 2025-09-26 18:26:07 +05:30
4ebf0480dd tweak commit to add placeholder string 2025-09-26 04:54:01 +05:30
b9ddecd6b1 Add string as a primitve to struct defs 2025-09-26 04:44:38 +05:30
737c4d3039 Support storing and printing string type 2025-09-26 04:17:29 +05:30
da8a495da7 Fix handle_cond for new symtab convention 2025-09-26 04:05:37 +05:30
ee03ac04d0 Fix printk handler to comply with new symtab convention 2025-09-26 01:02:10 +05:30
51595f9ec2 Add types returns to bpf helpers 2025-09-26 00:28:10 +05:30
4cf284a81f provide type as weel in eval_expr 2025-09-26 00:24:10 +05:30
1517f6e052 Fix local_sym_tab accesses in expr_pass 2025-09-25 23:54:04 +05:30
95f360059b Fix local_sym_tab accesses in binary_ops 2025-09-25 23:53:04 +05:30
dad57bd340 Fix local_sym_tab accesses in bpf_helper_handler 2025-09-25 23:51:08 +05:30
529b0bde19 Fix local_sym_tab accesses in functions_pass 2025-09-25 23:49:28 +05:30
943697ac9f Pass down type info in local_sym_tab 2025-09-25 23:43:19 +05:30
ba90af9ff2 Allocate space for string consts 2025-09-25 22:24:55 +05:30
35969c4ff7 Add string example 2025-09-25 22:15:14 +05:30
9e87ee52f2 Move relevant vmlinux files to ex7.bpf.c 2025-09-25 00:10:39 +05:30
d0be8893eb Add setuid C example 2025-09-24 23:48:42 +05:30
dda05bd044 Add matplotlib example 2025-09-23 20:36:15 +05:30
28e6f97708 add support for compilation with pylibbpf
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-09-21 18:05:43 +05:30
a1bc813ec5 Small fix to enum va 2025-09-21 17:58:51 +05:30
fefd6840c8 finish perf_event_output helper integration 2025-09-21 17:50:58 +05:30
79f0949abc Fix calling conventions changed by structs 2025-09-21 16:19:12 +05:30
a1371697cc overhaul handle_helper_calls 2025-09-21 16:10:29 +05:30
3c976b88d3 pass down structs_sym_tab 2025-09-21 15:20:41 +05:30
69a86c2433 Add perf_event_output boilerplate 2025-09-21 15:14:55 +05:30
6b92a16ca1 update release for pylibbpf
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-09-21 12:31:26 +05:30
12b8bf698b Add struct field access stub - too sleepy to debug 2025-09-21 05:27:34 +05:30
0f9a4078ee Complete struct field assignment 2025-09-21 05:22:00 +05:30
36c2c0b695 Add struct malloc, add struct instantiation to example 2025-09-21 04:48:50 +05:30
63c44fa48c Pass down structs_sym_tab 2025-09-21 04:28:44 +05:30
c79dc635d7 Add process_bpf_struct 2025-09-21 04:23:54 +05:30
9fc939cb8e Add structs_pass, tweak functions_pass to respect structs 2025-09-21 03:29:05 +05:30
780f53cd3f Make bpf structs discoverable during chunk exploration 2025-09-21 03:17:23 +05:30
48c0a1f506 Add struct to init to allow inclusion 2025-09-21 03:14:29 +05:30
8e231845ef Add struct example and decorator 2025-09-21 03:01:13 +05:30
d01c7ad8ba change README accordingly 2025-09-20 04:31:11 +05:30
a124476583 big overhaul of debug info and params passed to maps 2025-09-20 04:30:08 +05:30
73862f0084 Make max_entries optional in map BTF, add PerfEventArray to execve5 2025-09-20 03:15:09 +05:30
b8fdc16b4f Add PerfEventArray class 2025-09-20 02:57:27 +05:30
4fd8bee8e7 Add IR and debug info generation for multiple MAP types 2025-09-20 02:53:49 +05:30
67fc3f9562 Add map type support to process_bpf_map 2025-09-20 02:19:17 +05:30
69d0cf2e0e Add process_perf_event_map 2025-09-20 02:10:11 +05:30
b0f18229d9 Add PID helper 2025-09-19 22:58:16 +05:30
95727e3374 init execve5.py to emulate ex6.bpf.c 2025-09-19 22:35:47 +05:30
079288265f Format integers in fstrings to display as u64 2025-09-19 22:34:19 +05:30
efd6083caf Add custom struct C example 2025-09-19 22:06:20 +05:30
4797c007a0 Define arch in C example 2025-09-19 04:22:36 +05:30
b2413644e4 Add generated vmlinux.py from ctypeslib 2025-09-19 04:16:17 +05:30
af32758048 Add vmlinux.h 2025-09-19 04:15:54 +05:30
cb11d60fcc Add barebones python skeleton for kfuncs 2025-09-19 04:15:39 +05:30
1967332175 Add kprobe and vmlinux example 2025-09-19 04:15:13 +05:30
224e6ba781 Add basic TODO.md 2025-09-18 01:51:01 +05:30
62db39db74 Add presentation and video links to README 2025-09-18 01:47:24 +05:30
cc5f720406 Support simple XDP 2025-09-13 19:58:01 +05:30
9f858bd159 Add recursive dereferencing and get example working 2025-09-13 00:12:04 +05:30
ca203a1fdd support referencing other variables inside binops 2025-09-12 23:05:52 +05:30
a09e4e1bb6 Add deref(), add delete helper, refactor pre-alloc 2025-09-12 04:26:27 +05:30
0950d0550c Add side by side view 2025-09-12 04:25:08 +05:30
31 changed files with 326234 additions and 210 deletions

View File

@ -8,6 +8,12 @@ This is an LLVM IR generator for eBPF programs in Python. We use llvmlite to gen
# DO NOT USE IN PRODUCTION. IN DEVELOPMENT. # DO NOT USE IN PRODUCTION. IN DEVELOPMENT.
## Video Demo
[Video demo for code under demo/](https://youtu.be/eMyLW8iWbks)
## Slide Deck
[Slide deck explaining the project](https://docs.google.com/presentation/d/1DsWDIVrpJhM4RgOETO9VWqUtEHo3-c7XIWmNpi6sTSo/edit?usp=sharing)
## Installation ## Installation
- Have `clang` installed. - Have `clang` installed.
- `pip install pythonbpf` - `pip install pythonbpf`
@ -24,7 +30,7 @@ from ctypes import c_void_p, c_int64, c_int32, c_uint64
@bpf @bpf
@map @map
def last() -> HashMap: def last() -> HashMap:
return HashMap(key_type=c_uint64, value_type=c_uint64, max_entries=1) return HashMap(key=c_uint64, value=c_uint64, max_entries=1)
@bpf @bpf
@section("tracepoint/syscalls/sys_enter_execve") @section("tracepoint/syscalls/sys_enter_execve")

9
TODO.md Normal file
View File

@ -0,0 +1,9 @@
## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
## Long term
- Refactor the codebase to be better than a hackathon project
- Port to C++ and use actual LLVM?

46
demo/bcc.py Normal file
View File

@ -0,0 +1,46 @@
from __future__ import print_function
from bcc import BPF
from bcc.utils import printb
# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>
BPF_HASH(last);
int do_trace(struct pt_regs *ctx) {
u64 ts, *tsp, delta, key = 0;
// attempt to read stored timestamp
tsp = last.lookup(&key);
if (tsp != NULL) {
delta = bpf_ktime_get_ns() - *tsp;
if (delta < 1000000000) {
// output if time is less than 1 second
bpf_trace_printk("%d\\n", delta / 1000000);
}
last.delete(&key);
}
// update stored timestamp
ts = bpf_ktime_get_ns();
last.update(&key, &ts);
return 0;
}
""")
b.attach_kprobe(event=b.get_syscall_fnname("sync"), fn_name="do_trace")
print("Tracing for quick sync's... Ctrl-C to end")
# TODO
# format output
start = 0
while 1:
try:
(task, pid, cpu, flags, ts, ms) = b.trace_fields()
if start == 0:
start = ts
ts = ts - start
printb(b"At time %.2f s: multiple syncs detected, last %s ms ago" % (ts, ms))
except KeyboardInterrupt:
exit()

23
demo/pybpf0.py Normal file
View File

@ -0,0 +1,23 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python demo/pybpf0.py
# 3. Run the program with sudo: sudo examples/check.sh run demo/pybpf0.o
# 4. Start up any program and watch the output
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

41
demo/pybpf1.py Normal file
View File

@ -0,0 +1,41 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helpers import XDP_PASS
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python demo/pybpf1.py
# 3. Run the program with sudo: sudo examples/check.sh run demo/pybpf1.o
# 4. Attach object file to any network device with something like ./check.sh xdp ../demo/pybpf1.o tailscale0
# 5. send traffic through the device and observe effects
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
key = 0
one = 1
prev = count().lookup(key)
if prev:
prevval = prev + 1
print(f"count: {prevval}")
count().update(key, prevval)
return XDP_PASS
else:
count().update(key, one)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

43
demo/pybpf2.py Normal file
View File

@ -0,0 +1,43 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helpers import ktime
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_uint64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python demo/pybpf2.py
# 3. Run the program with sudo: sudo examples/check.sh run demo/pybpf2.o
# 4. Start a Python repl and `import os` and then keep entering `os.sync()` to see reponses.
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last().lookup(key)
if tsp:
kt = ktime()
delta = (kt - tsp)
if delta < 1000000000:
time_ms = (delta // 1000000)
print(f"sync called within last second, last {time_ms} ms ago")
last().delete(key)
else:
kt = ktime()
last().update(key, kt)
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

52
demo/pybpf3.py Normal file
View File

@ -0,0 +1,52 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helpers import ktime
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_uint64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python demo/pybpf3.py
# 3. Run the program with sudo: sudo examples/check.sh run demo/pybpf3.o
# 4. Start up any program and watch the output
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last().lookup(key)
if tsp:
kt = ktime()
delta = (kt - tsp)
if delta < 1000000000:
time_ms = (delta // 1000000)
print(f"Execve syscall entered within last second, last {time_ms} ms ago")
last().delete(key)
else:
kt = ktime()
last().update(key, kt)
return c_int64(0)
@bpf
@section("tracepoint/syscalls/sys_exit_execve")
def do_exit(ctx: c_void_p) -> c_int64:
va = 8
nm = 5 ^ va
al = 6 & 3
ru = (nm + al)
print(f"this is a variable {ru}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

56
demo/pybpf4.py Normal file
View File

@ -0,0 +1,56 @@
import time
from pythonbpf import bpf, map, section, bpfglobal, BPF
from pythonbpf.helpers import pid
from pythonbpf.maps import HashMap
from pylibbpf import *
from ctypes import c_void_p, c_int64, c_uint64, c_int32
import matplotlib.pyplot as plt
# This program attaches an eBPF tracepoint to sys_enter_clone,
# counts per-PID clone syscalls, stores them in a hash map,
# and then plots the distribution as a histogram using matplotlib.
# It provides a quick view of process creation activity over 10 seconds.
# Everything is done with Python only code and with the new pylibbpf library.
# Run `sudo /path/to/python/binary/ pybpf4.py`
@bpf
@map
def hist() -> HashMap:
return HashMap(key=c_int32, value=c_uint64, max_entries=4096)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
process_id = pid()
one = 1
prev = hist().lookup(process_id)
if prev:
previous_value = prev + 1
print(f"count: {previous_value} with {process_id}")
hist().update(process_id, previous_value)
return c_int64(0)
else:
hist().update(process_id, one)
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
hist = BpfMap(b, hist)
print("Recording")
time.sleep(10)
counts = list(hist.values())
plt.hist(counts, bins=20)
plt.xlabel("Clone calls per PID")
plt.ylabel("Frequency")
plt.title("Syscall clone counts")
plt.show()

View File

@ -3,17 +3,10 @@
#define u64 unsigned long long #define u64 unsigned long long
#define u32 unsigned int #define u32 unsigned int
struct { SEC("xdp")
__uint(type, BPF_MAP_TYPE_HASH); int hello(struct xdp_md *ctx) {
__uint(max_entries, 1);
__type(key, u32);
__type(value, u64);
} last SEC(".maps");
SEC("tracepoint/syscalls/sys_enter_execve")
int hello(struct pt_regs *ctx) {
bpf_printk("Hello, World!\n"); bpf_printk("Hello, World!\n");
return 0; return XDP_PASS;
} }
char LICENSE[] SEC("license") = "GPL"; char LICENSE[] SEC("license") = "GPL";

25
examples/c-form/ex5.bpf.c Normal file
View File

@ -0,0 +1,25 @@
#define __TARGET_ARCH_arm64
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// Map: key = struct request*, value = u64 timestamp
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct request *);
__type(value, u64);
__uint(max_entries, 1024);
} start SEC(".maps");
// Attach to kprobe for blk_start_request
SEC("kprobe/blk_start_request")
int BPF_KPROBE(trace_start, struct request *req)
{
u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start, &req, &ts, BPF_ANY);
return 0;
}
char LICENSE[] SEC("license") = "GPL";

43
examples/c-form/ex6.bpf.c Normal file
View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#define TASK_COMM_LEN 16
// Define output data structure
struct data_t {
__u32 pid;
__u64 ts;
// char comm[TASK_COMM_LEN];
};
// Define a perf event output map
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} events SEC(".maps");
SEC("tracepoint/syscalls/sys_enter_clone")
int hello(struct pt_regs *ctx)
{
struct data_t data = {};
// Get PID (lower 32 bits of the 64-bit value returned)
data.pid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
// Get timestamp
data.ts = bpf_ktime_get_ns();
// Get current process name
// bpf_get_current_comm(&data.comm, sizeof(data.comm));
// Submit data to userspace via perf event
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&data, sizeof(data));
return 0;
}
char LICENSE[] SEC("license") = "GPL";

47
examples/c-form/ex7.bpf.c Normal file
View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
struct trace_entry {
short unsigned int type;
unsigned char flags;
unsigned char preempt_count;
int pid;
};
struct trace_event_raw_sys_enter {
struct trace_entry ent;
long int id;
long unsigned int args[6];
char __data[0];
};
struct event {
__u32 pid;
__u32 uid;
__u64 ts;
};
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(int));
} events SEC(".maps");
SEC("tp/syscalls/sys_enter_setuid")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
struct event data = {};
// Extract UID from the syscall arguments
data.uid = (unsigned int)ctx->args[0];
data.ts = bpf_ktime_get_ns();
data.pid = bpf_get_current_pid_tgid() >> 32;
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &data, sizeof(data));
return 0;
}
char LICENSE[] SEC("license") = "GPL";

121617
examples/c-form/vmlinux.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -22,11 +22,23 @@ case "$1" in
sudo rm -f "$PIN_PATH" sudo rm -f "$PIN_PATH"
echo "[+] Stopped" echo "[+] Stopped"
;; ;;
xdp)
echo "[*] Loading and running $FILE"
sudo bpftool net detach xdp dev $3
sudo bpftool prog load "$FILE" "$PIN_PATH" type xdp
sudo bpftool net attach xdp pinned "$PIN_PATH" dev $3
echo "[+] Program loaded. Press Ctrl+C to stop"
sudo cat /sys/kernel/debug/tracing/trace_pipe
sudo bpftool net detach xdp dev $3
sudo rm -rf "$PIN_PATH"
echo "[+] Stopped"
;;
*) *)
echo "Usage: $0 <check|run|stop> <file.o>" echo "Usage: $0 <check|run|stop> <file.o>"
echo "Examples:" echo "Examples:"
echo " $0 check program.bpf.o" echo " $0 check program.bpf.o"
echo " $0 run program.bpf.o" echo " $0 run program.bpf.o"
echo " $0 xdp program.bpf.o wlp6s0"
echo " $0 stop" echo " $0 stop"
exit 1 exit 1
;; ;;

View File

@ -7,7 +7,7 @@ from pythonbpf.maps import HashMap
@bpf @bpf
@map @map
def last() -> HashMap: def last() -> HashMap:
return HashMap(key_type=c_uint64, value_type=c_uint64, max_entries=1) return HashMap(key=c_uint64, value=c_uint64, max_entries=1)
@bpf @bpf

View File

@ -1,5 +1,5 @@
from pythonbpf import bpf, map, section, bpfglobal, compile from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helpers import ktime from pythonbpf.helpers import ktime, deref
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_int32, c_uint64 from ctypes import c_void_p, c_int64, c_int32, c_uint64
@ -8,7 +8,7 @@ from ctypes import c_void_p, c_int64, c_int32, c_uint64
@bpf @bpf
@map @map
def last() -> HashMap: def last() -> HashMap:
return HashMap(key_type=c_uint64, value_type=c_uint64, max_entries=3) return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf @bpf
@ -24,37 +24,32 @@ def hello(ctx: c_void_p) -> c_int32:
def hello_again(ctx: c_void_p) -> c_int64: def hello_again(ctx: c_void_p) -> c_int64:
print("exited") print("exited")
key = 0 key = 0
delta = 0
dddelta = 0
tsp = last().lookup(key) tsp = last().lookup(key)
# if tsp: if True:
# delta = (bpf_ktime_get_ns() - tsp.value) delta = ktime()
# if delta < 1000000000: ddelta = deref(delta)
# print("execve called within last second") ttsp = deref(deref(tsp))
# last().delete(key) dddelta = ddelta - ttsp
x = 1 if dddelta < 1000000000:
y = False print("execve called within last second")
if x > 0: last().delete(key)
if x < 2:
print(f"we prevailed {x}")
else:
print(f"we did not prevail {x}")
ts = ktime() ts = ktime()
last().update(key, ts) last().update(key, ts)
st = "st" va = 8
last().update(key, ts) nm = 5 + va
al = 6 & 3
print(f"this is a variable {nm}")
keena = 2 + 1
# below breaks
# keela = keena + 1
# TODO: binops evaluate but into a random register and dont get assigned.
keema = 8 * 9
keesa = 10 - 11
keeda = 10 / 5
return c_int64(0) return c_int64(0)
@bpf @bpf
@bpfglobal @bpfglobal
def LICENSE() -> str: def LICENSE() -> str:
return "GPL" return "GPL"
compile() compile()

28
examples/execve4.py Normal file
View File

@ -0,0 +1,28 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helpers import ktime, deref
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64, c_int32, c_uint64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("blk_start_request")
def trace_start(ctx: c_void_p) -> c_int32:
ts = ktime()
print("req started")
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

43
examples/execve5.py Normal file
View File

@ -0,0 +1,43 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, compile
from pythonbpf.helpers import ktime, pid
from pythonbpf.maps import PerfEventArray
from ctypes import c_void_p, c_int64, c_int32, c_uint64
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
comm: str(16)
@bpf
@map
def events() -> PerfEventArray:
return PerfEventArray(key_size=c_int32, value_size=c_int32)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int32:
dataobj = data_t()
ts = ktime()
process_id = pid()
strobj = "hellohellohello"
dataobj.pid = process_id
dataobj.ts = ts
# dataobj.comm = strobj
print(f"clone called at {ts} by pid {process_id}, comm {strobj}")
events.output(dataobj)
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

203381
examples/vmlinux.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "pythonbpf" name = "pythonbpf"
version = "0.1.1" version = "0.1.3"
description = "Reduced Python frontend for eBPF" description = "Reduced Python frontend for eBPF"
authors = [ authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
@ -16,7 +16,8 @@ requires-python = ">=3.8"
dependencies = [ dependencies = [
"llvmlite", "llvmlite",
"astpretty" "astpretty",
"pylibbpf"
] ]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]

View File

@ -1,2 +1,2 @@
from .decorators import bpf, map, section, bpfglobal from .decorators import bpf, map, section, bpfglobal, struct
from .codegen import compile_to_ir, compile from .codegen import compile_to_ir, compile, BPF

View File

@ -1,35 +1,82 @@
import ast import ast
from llvmlite import ir from llvmlite import ir
def handle_binary_op(rval, module, builder, func, local_sym_tab, map_sym_tab):
def recursive_dereferencer(var, builder):
""" dereference until primitive type comes out"""
if var.type == ir.PointerType(ir.PointerType(ir.IntType(64))):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif var.type == ir.PointerType(ir.IntType(64)):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif var.type == ir.IntType(64):
return var
else:
raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def handle_binary_op(rval, module, builder, var_name, local_sym_tab, map_sym_tab, func):
print(module)
left = rval.left left = rval.left
right = rval.right right = rval.right
op = rval.op op = rval.op
# Handle left operand
if isinstance(left, ast.Name): if isinstance(left, ast.Name):
left = local_sym_tab[left.id] if left.id in local_sym_tab:
left = recursive_dereferencer(local_sym_tab[left.id][0], builder)
else:
raise SyntaxError(f"Undefined variable: {left.id}")
elif isinstance(left, ast.Constant): elif isinstance(left, ast.Constant):
left = ir.Constant(ir.IntType(64), left.value) left = ir.Constant(ir.IntType(64), left.value)
else: else:
print("Unsupported left operand type") raise SyntaxError("Unsupported left operand type")
if isinstance(right, ast.Name): if isinstance(right, ast.Name):
right = local_sym_tab[right.id] if right.id in local_sym_tab:
right = recursive_dereferencer(local_sym_tab[right.id][0], builder)
else:
raise SyntaxError(f"Undefined variable: {right.id}")
elif isinstance(right, ast.Constant): elif isinstance(right, ast.Constant):
right = ir.Constant(ir.IntType(64), right.value) right = ir.Constant(ir.IntType(64), right.value)
else: else:
SyntaxError("Unsupported right operand type") raise SyntaxError("Unsupported right operand type")
print(f"left is {left}, right is {right}, op is {op}")
if isinstance(op, ast.Add): if isinstance(op, ast.Add):
result = builder.add(left, right) builder.store(builder.add(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.Sub): elif isinstance(op, ast.Sub):
result = builder.sub(left, right) builder.store(builder.sub(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.Mult): elif isinstance(op, ast.Mult):
result = builder.mul(left, right) builder.store(builder.mul(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.Div): elif isinstance(op, ast.Div):
result = builder.sdiv(left, right) builder.store(builder.sdiv(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.Mod):
builder.store(builder.srem(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.LShift):
builder.store(builder.shl(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.RShift):
builder.store(builder.lshr(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.BitOr):
builder.store(builder.or_(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.BitXor):
builder.store(builder.xor(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.BitAnd):
builder.store(builder.and_(left, right),
local_sym_tab[var_name][0])
elif isinstance(op, ast.FloorDiv):
builder.store(builder.udiv(left, right),
local_sym_tab[var_name][0])
else: else:
result = "fuck type errors" raise SyntaxError("Unsupported binary operation")
SyntaxError("Unsupported binary operation")
return result

View File

@ -3,7 +3,7 @@ from llvmlite import ir
from .expr_pass import eval_expr from .expr_pass import eval_expr
def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab=None): def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
""" """
Emit LLVM IR for bpf_ktime_get_ns helper function call. Emit LLVM IR for bpf_ktime_get_ns helper function call.
""" """
@ -13,10 +13,10 @@ def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab
fn_ptr_type = ir.PointerType(fn_type) fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type) fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False) result = builder.call(fn_ptr, [], tail=False)
return result return result, ir.IntType(64)
def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, local_sym_tab=None): def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
""" """
Emit LLVM IR for bpf_map_lookup_elem helper function call. Emit LLVM IR for bpf_map_lookup_elem helper function call.
""" """
@ -27,7 +27,7 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, local_sym_tab=No
if isinstance(key_arg, ast.Name): if isinstance(key_arg, ast.Name):
key_name = key_arg.id key_name = key_arg.id
if local_sym_tab and key_name in local_sym_tab: if local_sym_tab and key_name in local_sym_tab:
key_ptr = local_sym_tab[key_name] key_ptr = local_sym_tab[key_name][0]
else: else:
raise ValueError( raise ValueError(
f"Key variable {key_name} not found in local symbol table.") f"Key variable {key_name} not found in local symbol table.")
@ -60,10 +60,10 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, local_sym_tab=No
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False) result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
return result return result, ir.PointerType()
def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None): def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
if not hasattr(func, "_fmt_counter"): if not hasattr(func, "_fmt_counter"):
func._fmt_counter = 0 func._fmt_counter = 0
@ -75,6 +75,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None)
exprs = [] exprs = []
for value in call.args[0].values: for value in call.args[0].values:
print("Value in f-string:", ast.dump(value))
if isinstance(value, ast.Constant): if isinstance(value, ast.Constant):
if isinstance(value.value, str): if isinstance(value.value, str):
fmt_parts.append(value.value) fmt_parts.append(value.value)
@ -85,10 +86,25 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None)
raise NotImplementedError( raise NotImplementedError(
"Only string and integer constants are supported in f-string.") "Only string and integer constants are supported in f-string.")
elif isinstance(value, ast.FormattedValue): elif isinstance(value, ast.FormattedValue):
# Assume int for now print("Formatted value:", ast.dump(value))
fmt_parts.append("%d") # TODO: Dirty handling here, only checks for int or str
if isinstance(value.value, ast.Name): if isinstance(value.value, ast.Name):
exprs.append(value.value) if local_sym_tab and value.value.id in local_sym_tab:
var_ptr, var_type = local_sym_tab[value.value.id]
if isinstance(var_type, ir.IntType):
fmt_parts.append("%lld")
exprs.append(value.value)
elif var_type == ir.PointerType(ir.IntType(8)):
# Case with string
fmt_parts.append("%s")
exprs.append(value.value)
else:
raise NotImplementedError(
"Only integer and pointer types are supported in formatted values.")
print("Formatted value variable:", var_ptr, var_type)
else:
raise ValueError(
f"Variable {value.value.id} not found in local symbol table.")
else: else:
raise NotImplementedError( raise NotImplementedError(
"Only simple variable names are supported in formatted values.") "Only simple variable names are supported in formatted values.")
@ -120,7 +136,8 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None)
"Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.") "Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.")
for expr in exprs[:3]: for expr in exprs[:3]:
val = eval_expr(func, module, builder, expr, local_sym_tab, None) val, _ = eval_expr(func, module, builder,
expr, local_sym_tab, None)
if val: if val:
if isinstance(val.type, ir.PointerType): if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64)) val = builder.ptrtoint(val, ir.IntType(64))
@ -136,7 +153,6 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None)
print( print(
"Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.") "Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.")
args.append(ir.Constant(ir.IntType(64), 0)) args.append(ir.Constant(ir.IntType(64), 0))
fn_type = ir.FunctionType(ir.IntType( fn_type = ir.FunctionType(ir.IntType(
64), [ir.PointerType(), ir.IntType(32)], var_arg=True) 64), [ir.PointerType(), ir.IntType(32)], var_arg=True)
fn_ptr_type = ir.PointerType(fn_type) fn_ptr_type = ir.PointerType(fn_type)
@ -172,7 +188,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None)
ir.IntType(32), len(fmt_str))], tail=True) ir.IntType(32), len(fmt_str))], tail=True)
def bpf_map_update_elem_emitter(call, map_ptr, module, builder, local_sym_tab=None): def bpf_map_update_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
""" """
Emit LLVM IR for bpf_map_update_elem helper function call. Emit LLVM IR for bpf_map_update_elem helper function call.
Expected call signature: map.update(key, value, flags=0) Expected call signature: map.update(key, value, flags=0)
@ -189,7 +205,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, local_sym_tab=No
if isinstance(key_arg, ast.Name): if isinstance(key_arg, ast.Name):
key_name = key_arg.id key_name = key_arg.id
if local_sym_tab and key_name in local_sym_tab: if local_sym_tab and key_name in local_sym_tab:
key_ptr = local_sym_tab[key_name] key_ptr = local_sym_tab[key_name][0]
else: else:
raise ValueError( raise ValueError(
f"Key variable {key_name} not found in local symbol table.") f"Key variable {key_name} not found in local symbol table.")
@ -208,7 +224,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, local_sym_tab=No
if isinstance(value_arg, ast.Name): if isinstance(value_arg, ast.Name):
value_name = value_arg.id value_name = value_arg.id
if local_sym_tab and value_name in local_sym_tab: if local_sym_tab and value_name in local_sym_tab:
value_ptr = local_sym_tab[value_name] value_ptr = local_sym_tab[value_name][0]
else: else:
raise ValueError( raise ValueError(
f"Value variable {value_name} not found in local symbol table.") f"Value variable {value_name} not found in local symbol table.")
@ -231,7 +247,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, local_sym_tab=No
flags_name = flags_arg.id flags_name = flags_arg.id
if local_sym_tab and flags_name in local_sym_tab: if local_sym_tab and flags_name in local_sym_tab:
# Assume it's a stored integer value, load it # Assume it's a stored integer value, load it
flags_ptr = local_sym_tab[flags_name] flags_ptr = local_sym_tab[flags_name][0]
flags_val = builder.load(flags_ptr) flags_val = builder.load(flags_ptr)
else: else:
raise ValueError( raise ValueError(
@ -265,7 +281,131 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, local_sym_tab=No
result = builder.call( result = builder.call(
fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False) fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False)
return result return result, None
def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
"""
Emit LLVM IR for bpf_map_delete_elem helper function call.
Expected call signature: map.delete(key)
"""
# Check for correct number of arguments
if not call.args or len(call.args) != 1:
raise ValueError("Map delete expects exactly 1 argument (key), got "
f"{len(call.args)}")
key_arg = call.args[0]
# Handle key argument
if isinstance(key_arg, ast.Name):
key_name = key_arg.id
if local_sym_tab and key_name in local_sym_tab:
key_ptr = local_sym_tab[key_name][0]
else:
raise ValueError(
f"Key variable {key_name} not found in local symbol table.")
elif isinstance(key_arg, ast.Constant) and isinstance(key_arg.value, int):
# Handle constant integer keys
key_val = key_arg.value
key_type = ir.IntType(64)
key_ptr = builder.alloca(key_type)
key_ptr.align = key_type.width // 8
builder.store(ir.Constant(key_type, key_val), key_ptr)
else:
raise NotImplementedError(
"Only simple variable names and integer constants are supported as keys in map delete.")
if key_ptr is None:
raise ValueError("Key pointer is None.")
# Cast map pointer to void*
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# Define function type for bpf_map_delete_elem
fn_type = ir.FunctionType(
ir.IntType(64), # Return type: int64 (status code)
[ir.PointerType(), ir.PointerType()], # Args: (void*, void*)
var_arg=False
)
fn_ptr_type = ir.PointerType(fn_type)
# Helper ID 3 is bpf_map_delete_elem
fn_addr = ir.Constant(ir.IntType(64), 3)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
# Call the helper function
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
return result, None
def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
"""
Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
"""
# func is an arg to just have a uniform signature with other emitters
helper_id = ir.Constant(ir.IntType(64), 14)
fn_type = ir.FunctionType(ir.IntType(64), [], var_arg=False)
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
# Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
pid = builder.and_(result, mask)
return pid, ir.IntType(64)
def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
if len(call.args) != 1:
raise ValueError("Perf event output expects exactly one argument (data), got "
f"{len(call.args)}")
data_arg = call.args[0]
ctx_ptr = func.args[0] # First argument to the function is ctx
if isinstance(data_arg, ast.Name):
data_name = data_arg.id
if local_sym_tab and data_name in local_sym_tab:
data_ptr = local_sym_tab[data_name][0]
else:
raise ValueError(
f"Data variable {data_name} not found in local symbol table.")
# Check is data_name is a struct
if local_var_metadata and data_name in local_var_metadata:
data_type = local_var_metadata[data_name]
if data_type in struct_sym_tab:
struct_info = struct_sym_tab[data_type]
size_val = ir.Constant(ir.IntType(64), struct_info["size"])
else:
raise ValueError(
f"Struct type {data_type} for variable {data_name} not found in struct symbol table.")
else:
raise ValueError(
f"Metadata for variable {data_name} not found in local variable metadata.")
# BPF_F_CURRENT_CPU is -1 in 32 bit
flags_val = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
data_void_ptr = builder.bitcast(data_ptr, ir.PointerType())
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(ir.IntType(8)), ir.PointerType(), ir.IntType(64),
ir.PointerType(), ir.IntType(64)],
var_arg=False
)
fn_ptr_type = ir.PointerType(fn_type)
# helper id
fn_addr = ir.Constant(ir.IntType(64), 25)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
result = builder.call(
fn_ptr, [ctx_ptr, map_void_ptr, flags_val, data_void_ptr, size_val], tail=False)
return result, None
else:
raise NotImplementedError(
"Only simple object names are supported as data in perf event output.")
helper_func_list = { helper_func_list = {
@ -273,10 +413,13 @@ helper_func_list = {
"print": bpf_printk_emitter, "print": bpf_printk_emitter,
"ktime": bpf_ktime_get_ns_emitter, "ktime": bpf_ktime_get_ns_emitter,
"update": bpf_map_update_elem_emitter, "update": bpf_map_update_elem_emitter,
"delete": bpf_map_delete_elem_emitter,
"pid": bpf_get_current_pid_tgid_emitter,
"output": bpf_perf_event_output_handler,
} }
def handle_helper_call(call, module, builder, func, local_sym_tab=None, map_sym_tab=None): def handle_helper_call(call, module, builder, func, local_sym_tab=None, map_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
if isinstance(call.func, ast.Name): if isinstance(call.func, ast.Name):
func_name = call.func.id func_name = call.func.id
if func_name in helper_func_list: if func_name in helper_func_list:
@ -293,14 +436,29 @@ def handle_helper_call(call, module, builder, func, local_sym_tab=None, map_sym_
if map_sym_tab and map_name in map_sym_tab: if map_sym_tab and map_name in map_sym_tab:
map_ptr = map_sym_tab[map_name] map_ptr = map_sym_tab[map_name]
if method_name in helper_func_list: if method_name in helper_func_list:
print(local_var_metadata)
return helper_func_list[method_name]( return helper_func_list[method_name](
call, map_ptr, module, builder, local_sym_tab) call, map_ptr, module, builder, func, local_sym_tab, struct_sym_tab, local_var_metadata)
else: else:
raise NotImplementedError( raise NotImplementedError(
f"Map method {method_name} is not implemented as a helper function.") f"Map method {method_name} is not implemented as a helper function.")
else: else:
raise ValueError( raise ValueError(
f"Map variable {map_name} not found in symbol tables.") f"Map variable {map_name} not found in symbol tables.")
elif isinstance(call.func.value, ast.Name):
obj_name = call.func.value.id
method_name = call.func.attr
if map_sym_tab and obj_name in map_sym_tab:
map_ptr = map_sym_tab[obj_name]
if method_name in helper_func_list:
return helper_func_list[method_name](
call, map_ptr, module, builder, func, local_sym_tab, struct_sym_tab, local_var_metadata)
else:
raise NotImplementedError(
f"Map method {method_name} is not implemented as a helper function.")
else:
raise ValueError(
f"Map variable {obj_name} not found in symbol tables.")
else: else:
raise NotImplementedError( raise NotImplementedError(
"Attribute not supported for map method calls.") "Attribute not supported for map method calls.")

View File

@ -3,17 +3,20 @@ from llvmlite import ir
from .license_pass import license_processing from .license_pass import license_processing
from .functions_pass import func_proc from .functions_pass import func_proc
from .maps_pass import maps_proc from .maps_pass import maps_proc
from .structs_pass import structs_proc
from .globals_pass import globals_processing from .globals_pass import globals_processing
import os import os
import subprocess import subprocess
import inspect import inspect
from pathlib import Path from pathlib import Path
from pylibbpf import BpfProgram
def find_bpf_chunks(tree): def find_bpf_chunks(tree):
"""Find all functions decorated with @bpf in the AST.""" """Find all functions decorated with @bpf in the AST."""
bpf_functions = [] bpf_functions = []
for node in ast.walk(tree): for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef): if isinstance(node, ast.FunctionDef) or isinstance(node, ast.ClassDef):
for decorator in node.decorator_list: for decorator in node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "bpf": if isinstance(decorator, ast.Name) and decorator.id == "bpf":
bpf_functions.append(node) bpf_functions.append(node)
@ -27,10 +30,11 @@ def processor(source_code, filename, module):
bpf_chunks = find_bpf_chunks(tree) bpf_chunks = find_bpf_chunks(tree)
for func_node in bpf_chunks: for func_node in bpf_chunks:
print(f"Found BPF function: {func_node.name}") print(f"Found BPF function/struct: {func_node.name}")
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
license_processing(tree, module) license_processing(tree, module)
globals_processing(tree, module) globals_processing(tree, module)
@ -49,7 +53,7 @@ def compile_to_ir(filename: str, output: str):
"filename": filename, "filename": filename,
"directory": os.path.dirname(filename) "directory": os.path.dirname(filename)
}) })
module._debug_compile_unit = module.add_debug_info("DICompileUnit", { # type: ignore module._debug_compile_unit = module.add_debug_info("DICompileUnit", { # type: ignore
"language": 29, # DW_LANG_C11 "language": 29, # DW_LANG_C11
"file": module._file_metadata, # type: ignore "file": module._file_metadata, # type: ignore
@ -61,7 +65,8 @@ def compile_to_ir(filename: str, output: str):
"nameTableKind": 0 "nameTableKind": 0
}, is_distinct=True) }, is_distinct=True)
module.add_named_metadata("llvm.dbg.cu", module._debug_compile_unit) # type: ignore module.add_named_metadata(
"llvm.dbg.cu", module._debug_compile_unit) # type: ignore
processor(source, filename, module) processor(source, filename, module)
@ -88,6 +93,7 @@ def compile_to_ir(filename: str, output: str):
module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"]) module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"])
print(f"IR written to {output}")
with open(output, "w") as f: with open(output, "w") as f:
f.write(f"source_filename = \"{filename}\"\n") f.write(f"source_filename = \"{filename}\"\n")
f.write(str(module)) f.write(str(module))
@ -95,6 +101,7 @@ def compile_to_ir(filename: str, output: str):
return output return output
def compile(): def compile():
# Look one level up the stack to the caller of this function # Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
@ -110,4 +117,19 @@ def compile():
str(ll_file), "-o", str(o_file) str(ll_file), "-o", str(o_file)
], check=True) ], check=True)
print(f"Object written to {o_file}") print(f"Object written to {o_file}, {ll_file} can be removed")
def BPF() -> BpfProgram:
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
ll_file = Path("/tmp") / caller_file.with_suffix(".ll").name
o_file = Path("/tmp") / caller_file.with_suffix(".o").name
compile_to_ir(str(caller_file), str(ll_file))
subprocess.run([
"llc", "-march=bpf", "-filetype=obj", "-O2",
str(ll_file), "-o", str(o_file)
], check=True)
return BpfProgram(str(o_file))

View File

@ -9,11 +9,19 @@ def bpfglobal(func):
func._is_bpfglobal = True func._is_bpfglobal = True
return func return func
def map(func): def map(func):
"""Decorator to mark a function as a BPF map.""" """Decorator to mark a function as a BPF map."""
func._is_map = True func._is_map = True
return func return func
def struct(cls):
"""Decorator to mark a class as a BPF struct."""
cls._is_struct = True
return cls
def section(name: str): def section(name: str):
def wrapper(fn): def wrapper(fn):
fn._section = name fn._section = name

View File

@ -2,21 +2,21 @@ import ast
from llvmlite import ir from llvmlite import ir
def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab): def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab=None, local_var_metadata=None):
print(f"Evaluating expression: {expr}") print(f"Evaluating expression: {ast.dump(expr)}")
if isinstance(expr, ast.Name): if isinstance(expr, ast.Name):
if expr.id in local_sym_tab: if expr.id in local_sym_tab:
var = local_sym_tab[expr.id] var = local_sym_tab[expr.id][0]
val = builder.load(var) val = builder.load(var)
return val return val, local_sym_tab[expr.id][1] # return value and type
else: else:
print(f"Undefined variable {expr.id}") print(f"Undefined variable {expr.id}")
return None return None
elif isinstance(expr, ast.Constant): elif isinstance(expr, ast.Constant):
if isinstance(expr.value, int): if isinstance(expr.value, int):
return ir.Constant(ir.IntType(64), expr.value) return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
elif isinstance(expr.value, bool): elif isinstance(expr.value, bool):
return ir.Constant(ir.IntType(1), int(expr.value)) return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
else: else:
print("Unsupported constant type") print("Unsupported constant type")
return None return None
@ -25,25 +25,57 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab):
from .bpf_helper_handler import helper_func_list, handle_helper_call from .bpf_helper_handler import helper_func_list, handle_helper_call
if isinstance(expr.func, ast.Name): if isinstance(expr.func, ast.Name):
# check for helpers first # check deref
if expr.func.id == "deref":
print(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
print("deref takes exactly one argument")
return None
arg = expr.args[0]
if isinstance(arg, ast.Call) and isinstance(arg.func, ast.Name) and arg.func.id == "deref":
print("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg = local_sym_tab[arg.id][0]
else:
print(f"Undefined variable {arg.id}")
return None
if arg is None:
print("Failed to evaluate deref argument")
return None
# Since we are handling only name case, directly take type from sym tab
val = builder.load(arg)
return val, local_sym_tab[expr.args[0].id][1]
# check for helpers
if expr.func.id in helper_func_list: if expr.func.id in helper_func_list:
return handle_helper_call( return handle_helper_call(
expr, module, builder, func, local_sym_tab, map_sym_tab) expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
elif isinstance(expr.func, ast.Attribute): elif isinstance(expr.func, ast.Attribute):
print(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(expr.func.value.func, ast.Name): if isinstance(expr.func.value, ast.Call) and isinstance(expr.func.value.func, ast.Name):
method_name = expr.func.attr method_name = expr.func.attr
if method_name in helper_func_list: if method_name in helper_func_list:
return handle_helper_call( return handle_helper_call(
expr, module, builder, func, local_sym_tab, map_sym_tab) expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
elif isinstance(expr.func.value, ast.Name):
obj_name = expr.func.value.id
method_name = expr.func.attr
if obj_name in map_sym_tab:
if method_name in helper_func_list:
return handle_helper_call(
expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
print("Unsupported expression evaluation") print("Unsupported expression evaluation")
return None return None
def handle_expr(func, module, builder, expr, local_sym_tab, map_sym_tab): def handle_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata):
"""Handle expression statements in the function body.""" """Handle expression statements in the function body."""
print(f"Handling expression: {ast.dump(expr)}") print(f"Handling expression: {ast.dump(expr)}")
call = expr.value call = expr.value
if isinstance(call, ast.Call): if isinstance(call, ast.Call):
eval_expr(func, module, builder, call, local_sym_tab, map_sym_tab) eval_expr(func, module, builder, call, local_sym_tab,
map_sym_tab, structs_sym_tab, local_var_metadata)
else: else:
print("Unsupported expression type") print("Unsupported expression type")

View File

@ -7,6 +7,8 @@ from .type_deducer import ctypes_to_ir
from .binary_ops import handle_binary_op from .binary_ops import handle_binary_op
from .expr_pass import eval_expr, handle_expr from .expr_pass import eval_expr, handle_expr
local_var_metadata = {}
def get_probe_string(func_node): def get_probe_string(func_node):
"""Extract the probe string from the decorator of the function node.""" """Extract the probe string from the decorator of the function node."""
@ -25,7 +27,7 @@ def get_probe_string(func_node):
return "helper" return "helper"
def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab): def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab):
"""Handle assignment statements in the function body.""" """Handle assignment statements in the function body."""
if len(stmt.targets) != 1: if len(stmt.targets) != 1:
print("Unsupported multiassignment") print("Unsupported multiassignment")
@ -34,28 +36,72 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab):
num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64") num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64")
target = stmt.targets[0] target = stmt.targets[0]
if not isinstance(target, ast.Name): print(f"Handling assignment to {ast.dump(target)}")
if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute):
print("Unsupported assignment target") print("Unsupported assignment target")
return return
var_name = target.id var_name = target.id if isinstance(target, ast.Name) else target.value.id
rval = stmt.value rval = stmt.value
if isinstance(rval, ast.Constant): if isinstance(target, ast.Attribute):
# struct field assignment
field_name = target.attr
if var_name in local_sym_tab and var_name in local_var_metadata:
struct_type = local_var_metadata[var_name]
struct_info = structs_sym_tab[struct_type]
if field_name in struct_info["fields"]:
field_idx = struct_info["fields"][field_name]
struct_ptr = local_sym_tab[var_name][0]
field_ptr = builder.gep(
struct_ptr, [ir.Constant(ir.IntType(32), 0),
ir.Constant(ir.IntType(32), field_idx)],
inbounds=True)
val = eval_expr(func, module, builder, rval,
local_sym_tab, map_sym_tab, structs_sym_tab)
if isinstance(struct_info["field_types"][field_idx], ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
# TODO: Figure it out, not a priority rn
# Special case for string assignment to char array
#str_len = struct_info["field_types"][field_idx].count
#assign_string_to_array(builder, field_ptr, val[0], str_len)
#print(f"Assigned to struct field {var_name}.{field_name}")
pass
if val is None:
print("Failed to evaluate struct field assignment")
return
print(field_ptr)
builder.store(val[0], field_ptr)
print(f"Assigned to struct field {var_name}.{field_name}")
return
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool): if isinstance(rval.value, bool):
if rval.value: if rval.value:
builder.store(ir.Constant(ir.IntType(1), 1), builder.store(ir.Constant(ir.IntType(1), 1),
local_sym_tab[var_name]) local_sym_tab[var_name][0])
else: else:
builder.store(ir.Constant(ir.IntType(1), 0), builder.store(ir.Constant(ir.IntType(1), 0),
local_sym_tab[var_name]) local_sym_tab[var_name][0])
print(f"Assigned constant {rval.value} to {var_name}") print(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, int): elif isinstance(rval.value, int):
# Assume c_int64 for now # Assume c_int64 for now
# var = builder.alloca(ir.IntType(64), name=var_name) # var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8 # var.align = 8
builder.store(ir.Constant(ir.IntType(64), rval.value), builder.store(ir.Constant(ir.IntType(64), rval.value),
local_sym_tab[var_name]) local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var # local_sym_tab[var_name] = var
print(f"Assigned constant {rval.value} to {var_name}") print(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, str):
str_val = rval.value.encode('utf-8') + b'\x00'
str_const = ir.Constant(ir.ArrayType(
ir.IntType(8), len(str_val)), bytearray(str_val))
global_str = ir.GlobalVariable(
module, str_const.type, name=f"{var_name}_str")
global_str.linkage = 'internal'
global_str.global_constant = True
global_str.initializer = str_const
str_ptr = builder.bitcast(
global_str, ir.PointerType(ir.IntType(8)))
builder.store(str_ptr, local_sym_tab[var_name][0])
print(f"Assigned string constant '{rval.value}' to {var_name}")
else: else:
print("Unsupported constant type") print("Unsupported constant type")
elif isinstance(rval, ast.Call): elif isinstance(rval, ast.Call):
@ -67,7 +113,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab):
# var = builder.alloca(ir_type, name=var_name) # var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8 # var.align = ir_type.width // 8
builder.store(ir.Constant( builder.store(ir.Constant(
ir_type, rval.args[0].value), local_sym_tab[var_name]) ir_type, rval.args[0].value), local_sym_tab[var_name][0])
print(f"Assigned {call_type} constant " print(f"Assigned {call_type} constant "
f"{rval.args[0].value} to {var_name}") f"{rval.args[0].value} to {var_name}")
# local_sym_tab[var_name] = var # local_sym_tab[var_name] = var
@ -75,32 +121,57 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab):
# var = builder.alloca(ir.IntType(64), name=var_name) # var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8 # var.align = 8
val = handle_helper_call( val = handle_helper_call(
rval, module, builder, None, local_sym_tab, map_sym_tab) rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
builder.store(val, local_sym_tab[var_name]) builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var # local_sym_tab[var_name] = var
print(f"Assigned constant {rval.func.id} to {var_name}") print(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1:
print(f"Handling deref assignment {ast.dump(rval)}")
val = eval_expr(func, module, builder, rval,
local_sym_tab, map_sym_tab, structs_sym_tab)
if val is None:
print("Failed to evaluate deref argument")
return
print(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
print(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info["type"]
# var = builder.alloca(ir_type, name=var_name)
# Null init
builder.store(ir.Constant(ir_type, None),
local_sym_tab[var_name][0])
local_var_metadata[var_name] = call_type
print(f"Assigned struct {call_type} to {var_name}")
# local_sym_tab[var_name] = var
else: else:
print(f"Unsupported assignment call type: {call_type}") print(f"Unsupported assignment call type: {call_type}")
elif isinstance(rval.func, ast.Attribute): elif isinstance(rval.func, ast.Attribute):
if isinstance(rval.func.value, ast.Call) and isinstance(rval.func.value.func, ast.Name): print(f"Assignment call attribute: {ast.dump(rval.func)}")
if isinstance(rval.func.value, ast.Name):
# TODO: probably a struct access
print(f"TODO STRUCT ACCESS {ast.dump(rval)}")
elif isinstance(rval.func.value, ast.Call) and isinstance(rval.func.value.func, ast.Name):
map_name = rval.func.value.func.id map_name = rval.func.value.func.id
method_name = rval.func.attr method_name = rval.func.attr
if map_name in map_sym_tab: if map_name in map_sym_tab:
map_ptr = map_sym_tab[map_name] map_ptr = map_sym_tab[map_name]
if method_name in helper_func_list: if method_name in helper_func_list:
val = handle_helper_call( val = handle_helper_call(
rval, module, builder, func, local_sym_tab, map_sym_tab) rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
# var = builder.alloca(ir.IntType(64), name=var_name) # var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8 # var.align = 8
builder.store(val, local_sym_tab[var_name]) builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var # local_sym_tab[var_name] = var
else: else:
print("Unsupported assignment call structure") print("Unsupported assignment call structure")
else: else:
print("Unsupported assignment call function type") print("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp): elif isinstance(rval, ast.BinOp):
handle_binary_op(rval, module, builder, func, handle_binary_op(rval, module, builder, var_name,
local_sym_tab, map_sym_tab) local_sym_tab, map_sym_tab, func)
else: else:
print("Unsupported assignment value type") print("Unsupported assignment value type")
@ -116,20 +187,29 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
return None return None
elif isinstance(cond, ast.Name): elif isinstance(cond, ast.Name):
if cond.id in local_sym_tab: if cond.id in local_sym_tab:
var = local_sym_tab[cond.id] var = local_sym_tab[cond.id][0]
val = builder.load(var) val = builder.load(var)
if val.type != ir.IntType(1):
# Convert nonzero values to true, zero to false
if isinstance(val.type, ir.PointerType):
# For pointer types, compare with null pointer
zero = ir.Constant(val.type, None)
else:
# For integer types, compare with zero
zero = ir.Constant(val.type, 0)
val = builder.icmp_signed("!=", val, zero)
return val return val
else: else:
print(f"Undefined variable {cond.id} in condition") print(f"Undefined variable {cond.id} in condition")
return None return None
elif isinstance(cond, ast.Compare): elif isinstance(cond, ast.Compare):
lhs = eval_expr(func, module, builder, cond.left, lhs = eval_expr(func, module, builder, cond.left,
local_sym_tab, map_sym_tab) local_sym_tab, map_sym_tab)[0]
if len(cond.ops) != 1 or len(cond.comparators) != 1: if len(cond.ops) != 1 or len(cond.comparators) != 1:
print("Unsupported complex comparison") print("Unsupported complex comparison")
return None return None
rhs = eval_expr(func, module, builder, rhs = eval_expr(func, module, builder,
cond.comparators[0], local_sym_tab, map_sym_tab) cond.comparators[0], local_sym_tab, map_sym_tab)[0]
op = cond.ops[0] op = cond.ops[0]
if lhs.type != rhs.type: if lhs.type != rhs.type:
@ -163,7 +243,7 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
return None return None
def handle_if(func, module, builder, stmt, map_sym_tab, local_sym_tab): def handle_if(func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None):
"""Handle if statements in the function body.""" """Handle if statements in the function body."""
print("Handling if statement") print("Handling if statement")
start = builder.block.parent start = builder.block.parent
@ -184,7 +264,7 @@ def handle_if(func, module, builder, stmt, map_sym_tab, local_sym_tab):
builder.position_at_end(then_block) builder.position_at_end(then_block)
for s in stmt.body: for s in stmt.body:
process_stmt(func, module, builder, s, process_stmt(func, module, builder, s,
local_sym_tab, map_sym_tab, False) local_sym_tab, map_sym_tab, structs_sym_tab, False)
if not builder.block.is_terminated: if not builder.block.is_terminated:
builder.branch(merge_block) builder.branch(merge_block)
@ -192,23 +272,26 @@ def handle_if(func, module, builder, stmt, map_sym_tab, local_sym_tab):
builder.position_at_end(else_block) builder.position_at_end(else_block)
for s in stmt.orelse: for s in stmt.orelse:
process_stmt(func, module, builder, s, process_stmt(func, module, builder, s,
local_sym_tab, map_sym_tab, False) local_sym_tab, map_sym_tab, structs_sym_tab, False)
if not builder.block.is_terminated: if not builder.block.is_terminated:
builder.branch(merge_block) builder.branch(merge_block)
builder.position_at_end(merge_block) builder.position_at_end(merge_block)
def process_stmt(func, module, builder, stmt, local_sym_tab, map_sym_tab, did_return, ret_type=ir.IntType(64)): def process_stmt(func, module, builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab, did_return, ret_type=ir.IntType(64)):
print(f"Processing statement: {ast.dump(stmt)}") print(f"Processing statement: {ast.dump(stmt)}")
if isinstance(stmt, ast.Expr): if isinstance(stmt, ast.Expr):
handle_expr(func, module, builder, stmt, local_sym_tab, map_sym_tab) handle_expr(func, module, builder, stmt, local_sym_tab,
map_sym_tab, structs_sym_tab, local_var_metadata)
elif isinstance(stmt, ast.Assign): elif isinstance(stmt, ast.Assign):
handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab) handle_assign(func, module, builder, stmt, map_sym_tab,
local_sym_tab, structs_sym_tab)
elif isinstance(stmt, ast.AugAssign): elif isinstance(stmt, ast.AugAssign):
raise SyntaxError("Augmented assignment not supported") raise SyntaxError("Augmented assignment not supported")
elif isinstance(stmt, ast.If): elif isinstance(stmt, ast.If):
handle_if(func, module, builder, stmt, map_sym_tab, local_sym_tab) handle_if(func, module, builder, stmt, map_sym_tab,
local_sym_tab, structs_sym_tab)
elif isinstance(stmt, ast.Return): elif isinstance(stmt, ast.Return):
if stmt.value is None: if stmt.value is None:
builder.ret(ir.Constant(ir.IntType(32), 0)) builder.ret(ir.Constant(ir.IntType(32), 0))
@ -222,21 +305,30 @@ def process_stmt(func, module, builder, stmt, local_sym_tab, map_sym_tab, did_re
builder.ret(ir.Constant( builder.ret(ir.Constant(
ret_type, stmt.value.args[0].value)) ret_type, stmt.value.args[0].value))
did_return = True did_return = True
elif isinstance(stmt.value, ast.Name):
if stmt.value.id == "XDP_PASS":
builder.ret(ir.Constant(ret_type, 2))
did_return = True
elif stmt.value.id == "XDP_DROP":
builder.ret(ir.Constant(ret_type, 1))
did_return = True
else:
raise ValueError("Failed to evaluate return expression")
else: else:
print("Unsupported return value") raise ValueError("Unsupported return value")
return did_return return did_return
def process_func_body(module, builder, func_node, func, ret_type, map_sym_tab): def allocate_mem(module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab):
"""Process the body of a bpf function""" for stmt in body:
# TODO: A lot. We just have print -> bpf_trace_printk for now if isinstance(stmt, ast.If):
did_return = False if stmt.body:
local_sym_tab = allocate_mem(
local_sym_tab = {} module, builder, stmt.body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab)
if stmt.orelse:
# pre-allocate dynamic variables local_sym_tab = allocate_mem(
for stmt in func_node.body: module, builder, stmt.orelse, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab)
if isinstance(stmt, ast.Assign): elif isinstance(stmt, ast.Assign):
if len(stmt.targets) != 1: if len(stmt.targets) != 1:
print("Unsupported multiassignment") print("Unsupported multiassignment")
continue continue
@ -262,6 +354,20 @@ def process_func_body(module, builder, func_node, func, ret_type, map_sym_tab):
var.align = ir_type.width // 8 var.align = ir_type.width // 8
print( print(
f"Pre-allocated variable {var_name} for helper") f"Pre-allocated variable {var_name} for helper")
elif call_type == "deref" and len(rval.args) == 1:
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(
f"Pre-allocated variable {var_name} for deref")
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info["type"]
var = builder.alloca(ir_type, name=var_name)
local_var_metadata[var_name] = call_type
print(
f"Pre-allocated variable {var_name} for struct {call_type}")
elif isinstance(rval.func, ast.Attribute): elif isinstance(rval.func, ast.Attribute):
ir_type = ir.PointerType(ir.IntType(64)) ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
@ -285,25 +391,51 @@ def process_func_body(module, builder, func_node, func, ret_type, map_sym_tab):
var.align = ir_type.width // 8 var.align = ir_type.width // 8
print( print(
f"Pre-allocated variable {var_name} of type c_int64") f"Pre-allocated variable {var_name} of type c_int64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
print(
f"Pre-allocated variable {var_name} of type string")
else: else:
print("Unsupported constant type") print(f"Unsupported constant type")
continue continue
elif isinstance(rval, ast.BinOp):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(
f"Pre-allocated variable {var_name} of type c_int64")
else: else:
print("Unsupported assignment value type") print("Unsupported assignment value type")
continue continue
local_sym_tab[var_name] = var local_sym_tab[var_name] = (var, ir_type)
return local_sym_tab
def process_func_body(module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab):
"""Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now
did_return = False
local_sym_tab = {}
# pre-allocate dynamic variables
local_sym_tab = allocate_mem(
module, builder, func_node.body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab)
print(f"Local symbol table: {local_sym_tab.keys()}") print(f"Local symbol table: {local_sym_tab.keys()}")
for stmt in func_node.body: for stmt in func_node.body:
did_return = process_stmt(func, module, builder, stmt, local_sym_tab, did_return = process_stmt(func, module, builder, stmt, local_sym_tab,
map_sym_tab, did_return, ret_type) map_sym_tab, structs_sym_tab, did_return, ret_type)
if not did_return: if not did_return:
builder.ret(ir.Constant(ir.IntType(32), 0)) builder.ret(ir.Constant(ir.IntType(32), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab): def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):
"""Process a single BPF chunk (function) and emit corresponding LLVM IR.""" """Process a single BPF chunk (function) and emit corresponding LLVM IR."""
func_name = func_node.name func_name = func_node.name
@ -336,19 +468,16 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab):
block = func.append_basic_block(name="entry") block = func.append_basic_block(name="entry")
builder = ir.IRBuilder(block) builder = ir.IRBuilder(block)
process_func_body(module, builder, func_node, func, ret_type, map_sym_tab) process_func_body(module, builder, func_node, func,
ret_type, map_sym_tab, structs_sym_tab)
return func return func
def func_proc(tree, module, chunks, map_sym_tab): def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
for func_node in chunks: for func_node in chunks:
is_global = False is_global = False
for decorator in func_node.decorator_list: for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "map": if isinstance(decorator, ast.Name) and decorator.id in ("map", "bpfglobal", "struct"):
is_global = True
break
elif isinstance(decorator, ast.Name) and decorator.id == "bpfglobal":
is_global = True is_global = True
break break
if is_global: if is_global:
@ -357,7 +486,7 @@ def func_proc(tree, module, chunks, map_sym_tab):
print(f"Found probe_string of {func_node.name}: {func_type}") print(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk(func_node, module, ctypes_to_ir( process_bpf_chunk(func_node, module, ctypes_to_ir(
infer_return_type(func_node)), map_sym_tab) infer_return_type(func_node)), map_sym_tab, structs_sym_tab)
def infer_return_type(func_node: ast.FunctionDef): def infer_return_type(func_node: ast.FunctionDef):
@ -416,3 +545,46 @@ def infer_return_type(func_node: ast.FunctionDef):
raise ValueError("Conflicting return types:" raise ValueError("Conflicting return types:"
f"{found_type} vs {t}") f"{found_type} vs {t}")
return found_type or "None" return found_type or "None"
# For string assignment to fixed-size arrays
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
"""
Copy a string (i8*) to a fixed-size array ([N x i8]*)
"""
# Create a loop to copy characters one by one
entry_block = builder.block
copy_block = builder.append_basic_block("copy_char")
end_block = builder.append_basic_block("copy_end")
# Create loop counter
i = builder.alloca(ir.IntType(32))
builder.store(ir.Constant(ir.IntType(32), 0), i)
# Start the loop
builder.branch(copy_block)
# Copy loop
builder.position_at_end(copy_block)
idx = builder.load(i)
in_bounds = builder.icmp_unsigned('<', idx, ir.Constant(ir.IntType(32), array_length))
builder.cbranch(in_bounds, copy_block, end_block)
with builder.if_then(in_bounds):
# Load character from source
src_ptr = builder.gep(source_string_ptr, [idx])
char = builder.load(src_ptr)
# Store character in target
dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx])
builder.store(char, dst_ptr)
# Increment counter
next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1))
builder.store(next_idx, i)
builder.position_at_end(end_block)
# Ensure null termination
last_idx = ir.Constant(ir.IntType(32), array_length - 1)
null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx])
builder.store(ir.Constant(ir.IntType(8), 0), null_ptr)

View File

@ -1,5 +1,15 @@
import ctypes import ctypes
def ktime(): def ktime():
return ctypes.c_int64(0) return ctypes.c_int64(0)
def pid():
return ctypes.c_int32(0)
def deref(ptr):
"dereference a pointer"
result = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_void_p)).contents.value
return result if result is not None else 0
XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2)

View File

@ -1,7 +1,7 @@
class HashMap: class HashMap:
def __init__(self, key_type, value_type, max_entries): def __init__(self, key, value, max_entries):
self.key_type = key_type self.key = key
self.value_type = value_type self.value = value
self.max_entries = max_entries self.max_entries = max_entries
self.entries = {} self.entries = {}
@ -10,16 +10,26 @@ class HashMap:
return self.entries[key] return self.entries[key]
else: else:
return None return None
def delete(self, key): def delete(self, key):
if key in self.entries: if key in self.entries:
del self.entries[key] del self.entries[key]
else: else:
raise KeyError(f"Key {key} not found in map") raise KeyError(f"Key {key} not found in map")
# TODO: define the flags that can be added # TODO: define the flags that can be added
def update(self, key, value, flags=None): def update(self, key, value, flags=None):
if key in self.entries: if key in self.entries:
self.entries[key] = value self.entries[key] = value
else: else:
raise KeyError(f"Key {key} not found in map") raise KeyError(f"Key {key} not found in map")
class PerfEventArray:
def __init__(self, key_size, value_size):
self.key_type = key_size
self.value_type = value_size
self.entries = {}
def output(self, data):
pass # Placeholder for output method

View File

@ -21,22 +21,28 @@ def maps_proc(tree, module, chunks):
return map_sym_tab return map_sym_tab
BPF_MAP_MAPPINGS = {
"HASH": 1, # BPF_MAP_TYPE_HASH
"PERF_EVENT_ARRAY": 4, # BPF_MAP_TYPE_PERF_EVENT_ARRAY
}
def create_bpf_map(module, map_name, map_params): def create_bpf_map(module, map_name, map_params):
"""Create a BPF map in the module with the given parameters and debug info""" """Create a BPF map in the module with the given parameters and debug info"""
map_type_str = map_params.get("type", "HASH")
map_type = BPF_MAP_MAPPINGS.get(map_type_str)
# Create the anonymous struct type for BPF map # Create the anonymous struct type for BPF map
map_struct_type = ir.LiteralStructType([ map_struct_type = ir.LiteralStructType(
ir.PointerType(), [ir.PointerType() for _ in range(len(map_params))])
ir.PointerType(),
ir.PointerType(),
ir.PointerType()
])
# Create the global variable # Create the global variable
map_global = ir.GlobalVariable(module, map_struct_type, name=map_name) map_global = ir.GlobalVariable(module, map_struct_type, name=map_name)
map_global.linkage = 'dso_local' map_global.linkage = 'dso_local'
map_global.global_constant = False map_global.global_constant = False
map_global.initializer = ir.Constant(map_struct_type, None) # type: ignore map_global.initializer = ir.Constant(
map_struct_type, None) # type: ignore
map_global.section = ".maps" map_global.section = ".maps"
map_global.align = 8 # type: ignore map_global.align = 8 # type: ignore
@ -47,6 +53,7 @@ def create_bpf_map(module, map_name, map_params):
map_sym_tab[map_name] = map_global map_sym_tab[map_name] = map_global
return map_global return map_global
def create_map_debug_info(module, map_global, map_name, map_params): def create_map_debug_info(module, map_global, map_name, map_params):
"""Generate debug information metadata for BPF map""" """Generate debug information metadata for BPF map"""
file_metadata = module._file_metadata file_metadata = module._file_metadata
@ -67,21 +74,14 @@ def create_map_debug_info(module, map_global, map_name, map_params):
}) })
# Create array type for map type field (array of 1 unsigned int) # Create array type for map type field (array of 1 unsigned int)
array_subrange = module.add_debug_info("DISubrange", {"count": 1}) array_subrange = module.add_debug_info(
"DISubrange", {"count": BPF_MAP_MAPPINGS[map_params.get("type", "HASH")]})
array_type = module.add_debug_info("DICompositeType", { array_type = module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_array_type, "tag": dc.DW_TAG_array_type,
"baseType": uint_type, "baseType": uint_type,
"size": 32, "size": 32,
"elements": [array_subrange] "elements": [array_subrange]
}) })
array_subrange_max_entries = module.add_debug_info("DISubrange", {"count": map_params["max_entries"]})
array_type_max_entries = module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_array_type,
"baseType": uint_type,
"size": 32,
"elements": [array_subrange_max_entries]
})
# Create pointer types # Create pointer types
type_ptr = module.add_debug_info("DIDerivedType", { type_ptr = module.add_debug_info("DIDerivedType", {
@ -90,68 +90,75 @@ def create_map_debug_info(module, map_global, map_name, map_params):
"size": 64 "size": 64
}) })
max_entries_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type,
"baseType": array_type_max_entries,
"size": 64
})
key_ptr = module.add_debug_info("DIDerivedType", { key_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type, "tag": dc.DW_TAG_pointer_type,
"baseType": uint_type, # Adjust based on actual key type # Adjust based on actual key type
"baseType": array_type if "key_size" in map_params else uint_type,
"size": 64 "size": 64
}) })
value_ptr = module.add_debug_info("DIDerivedType", { value_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_pointer_type, "tag": dc.DW_TAG_pointer_type,
"baseType": ulong_type, # Adjust based on actual value type # Adjust based on actual value type
"baseType": array_type if "value_size" in map_params else ulong_type,
"size": 64 "size": 64
}) })
elements_arr = []
# Create struct members # Create struct members
# scope field does not appear for some reason # scope field does not appear for some reason
type_member = module.add_debug_info("DIDerivedType", { cnt = 0
"tag": dc.DW_TAG_member, for elem in map_params:
"name": "type", if elem == "max_entries":
"file": file_metadata, continue
"baseType": type_ptr, if elem == "type":
"size": 64, ptr = type_ptr
"offset": 0 elif "key" in elem:
}) ptr = key_ptr
else:
ptr = value_ptr
member = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_member,
"name": elem,
"file": file_metadata,
"baseType": ptr,
"size": 64,
"offset": cnt * 64
})
elements_arr.append(member)
cnt += 1
max_entries_member = module.add_debug_info("DIDerivedType", { if "max_entries" in map_params:
"tag": dc.DW_TAG_member, array_subrange_max_entries = module.add_debug_info(
"name": "max_entries", "DISubrange", {"count": map_params["max_entries"]})
"file": file_metadata, array_type_max_entries = module.add_debug_info("DICompositeType", {
"baseType": max_entries_ptr, "tag": dc.DW_TAG_array_type,
"size": 64, "baseType": uint_type,
"offset": 64 "size": 32,
}) "elements": [array_subrange_max_entries]
})
key_member = module.add_debug_info("DIDerivedType", { max_entries_ptr = module.add_debug_info("DIDerivedType", {
"tag": dc.DW_TAG_member, "tag": dc.DW_TAG_pointer_type,
"name": "key", "baseType": array_type_max_entries,
"file": file_metadata, "size": 64
"baseType": key_ptr, })
"size": 64, max_entries_member = module.add_debug_info("DIDerivedType", {
"offset": 128 "tag": dc.DW_TAG_member,
}) "name": "max_entries",
"file": file_metadata,
value_member = module.add_debug_info("DIDerivedType", { "baseType": max_entries_ptr,
"tag": dc.DW_TAG_member, "size": 64,
"name": "value", "offset": cnt * 64
"file": file_metadata, })
"baseType": value_ptr, elements_arr.append(max_entries_member)
"size": 64,
"offset": 192
})
# Create the struct type # Create the struct type
struct_type = module.add_debug_info("DICompositeType", { struct_type = module.add_debug_info("DICompositeType", {
"tag": dc.DW_TAG_structure_type, "tag": dc.DW_TAG_structure_type,
"file": file_metadata, "file": file_metadata,
"size": 256, # 4 * 64-bit pointers "size": 64 * len(elements_arr), # 4 * 64-bit pointers
"elements": [type_member, max_entries_member, key_member, value_member] "elements": elements_arr,
}, is_distinct=True) }, is_distinct=True)
# Create global variable debug info # Create global variable debug info
@ -178,23 +185,23 @@ def create_map_debug_info(module, map_global, map_name, map_params):
def process_hash_map(map_name, rval, module): def process_hash_map(map_name, rval, module):
print(f"Creating HashMap map: {map_name}") print(f"Creating HashMap map: {map_name}")
map_params: dict[str, object] = {"map_type": "HASH"} map_params: dict[str, object] = {"type": "HASH"}
# Assuming order: key_type, value_type, max_entries # Assuming order: key_type, value_type, max_entries
if len(rval.args) >= 1 and isinstance(rval.args[0], ast.Name): if len(rval.args) >= 1 and isinstance(rval.args[0], ast.Name):
map_params["key_type"] = rval.args[0].id map_params["key"] = rval.args[0].id
if len(rval.args) >= 2 and isinstance(rval.args[1], ast.Name): if len(rval.args) >= 2 and isinstance(rval.args[1], ast.Name):
map_params["value_type"] = rval.args[1].id map_params["value"] = rval.args[1].id
if len(rval.args) >= 3 and isinstance(rval.args[2], ast.Constant): if len(rval.args) >= 3 and isinstance(rval.args[2], ast.Constant):
const_val = rval.args[2].value const_val = rval.args[2].value
if isinstance(const_val, (int, str)): # safe check if isinstance(const_val, (int, str)): # safe check
map_params["max_entries"] = const_val map_params["max_entries"] = const_val
for keyword in rval.keywords: for keyword in rval.keywords:
if keyword.arg == "key_type" and isinstance(keyword.value, ast.Name): if keyword.arg == "key" and isinstance(keyword.value, ast.Name):
map_params["key_type"] = keyword.value.id map_params["key"] = keyword.value.id
elif keyword.arg == "value_type" and isinstance(keyword.value, ast.Name): elif keyword.arg == "value" and isinstance(keyword.value, ast.Name):
map_params["value_type"] = keyword.value.id map_params["value"] = keyword.value.id
elif keyword.arg == "max_entries" and isinstance(keyword.value, ast.Constant): elif keyword.arg == "max_entries" and isinstance(keyword.value, ast.Constant):
const_val = keyword.value.value const_val = keyword.value.value
if isinstance(const_val, (int, str)): if isinstance(const_val, (int, str)):
@ -204,11 +211,34 @@ def process_hash_map(map_name, rval, module):
return create_bpf_map(module, map_name, map_params) return create_bpf_map(module, map_name, map_params)
def process_perf_event_map(map_name, rval, module):
print(f"Creating PerfEventArray map: {map_name}")
map_params = {"type": "PERF_EVENT_ARRAY"}
if len(rval.args) >= 1 and isinstance(rval.args[0], ast.Name):
map_params["key_size"] = rval.args[0].id
if len(rval.args) >= 2 and isinstance(rval.args[1], ast.Name):
map_params["value_size"] = rval.args[1].id
for keyword in rval.keywords:
if keyword.arg == "key_size" and isinstance(keyword.value, ast.Name):
map_params["key_size"] = keyword.value.id
elif keyword.arg == "value_size" and isinstance(keyword.value, ast.Name):
map_params["value_size"] = keyword.value.id
print(f"Map parameters: {map_params}")
return create_bpf_map(module, map_name, map_params)
def process_bpf_map(func_node, module): def process_bpf_map(func_node, module):
"""Process a BPF map (a function decorated with @map)""" """Process a BPF map (a function decorated with @map)"""
map_name = func_node.name map_name = func_node.name
print(f"Processing BPF map: {map_name}") print(f"Processing BPF map: {map_name}")
BPF_MAP_TYPES = {"HashMap": process_hash_map, # BPF_MAP_TYPE_HASH
"PerfEventArray": process_perf_event_map, # BPF_MAP_TYPE_PERF_EVENT_ARRAY
}
# For now, assume single return statement # For now, assume single return statement
return_stmt = None return_stmt = None
for stmt in func_node.body: for stmt in func_node.body:
@ -221,7 +251,12 @@ def process_bpf_map(func_node, module):
rval = return_stmt.value rval = return_stmt.value
# Handle only HashMap maps # Handle only HashMap maps
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name) and rval.func.id == "HashMap": if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
process_hash_map(map_name, rval, module) if rval.func.id in BPF_MAP_TYPES:
handler = BPF_MAP_TYPES[rval.func.id]
handler(map_name, rval, module)
else:
print(f"Unknown map type {rval.func.id}, defaulting to HashMap")
process_hash_map(map_name, rval, module)
else: else:
raise ValueError("Function under @map must return a map") raise ValueError("Function under @map must return a map")

69
pythonbpf/structs_pass.py Normal file
View File

@ -0,0 +1,69 @@
import ast
from llvmlite import ir
from .type_deducer import ctypes_to_ir
from . import dwarf_constants as dc
structs_sym_tab = {}
def structs_proc(tree, module, chunks):
for cls_node in chunks:
# Check if this class is a struct
is_struct = False
for decorator in cls_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "struct":
is_struct = True
break
if is_struct:
print(f"Found BPF struct: {cls_node.name}")
process_bpf_struct(cls_node, module)
continue
return structs_sym_tab
def process_bpf_struct(cls_node, module):
struct_name = cls_node.name
field_names = []
field_types = []
for item in cls_node.body:
if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
print(f"Field: {item.target.id}, Type: "
f"{ast.dump(item.annotation)}")
field_names.append(item.target.id)
if isinstance(item.annotation, ast.Call) and isinstance(item.annotation.func, ast.Name) and item.annotation.func.id == "str":
# This is a char array with fixed length
# TODO: For now assuming str is always called with constant
field_types.append(ir.ArrayType(
ir.IntType(8), item.annotation.args[0].value))
else:
field_types.append(ctypes_to_ir(item.annotation.id))
curr_offset = 0
for ftype in field_types:
if isinstance(ftype, ir.IntType):
fsize = ftype.width // 8
alignment = fsize
elif isinstance(ftype, ir.ArrayType):
fsize = ftype.count * (ftype.element.width // 8)
alignment = ftype.element.width // 8
elif isinstance(ftype, ir.PointerType):
fsize = 8
alignment = 8
else:
print(f"Unsupported field type in struct {struct_name}")
return
padding = (alignment - (curr_offset % alignment)) % alignment
curr_offset += padding
curr_offset += fsize
final_padding = (8 - (curr_offset % 8)) % 8
total_size = curr_offset + final_padding
struct_type = ir.LiteralStructType(field_types)
structs_sym_tab[struct_name] = {
"type": struct_type,
"fields": {name: idx for idx, name in enumerate(field_names)},
"size": total_size,
"field_types": field_types,
}
print(f"Created struct {struct_name} with fields {field_names}")