15 Commits

17 changed files with 294 additions and 121691 deletions

20
BCC-Examples/README.md Normal file
View File

@ -0,0 +1,20 @@
## BCC examples ported to PythonBPF
This folder contains examples of BCC tutorial examples that have been ported to use **PythonBPF**.
## Requirements
- install `pythonbpf` and `pylibbpf` using pip.
- You will also need `matplotlib` for vfsreadlat.py example.
- You will also need `rich` for vfsreadlat_rich.py example.
- You will also need `plotly` and `dash` for vfsreadlat_plotly.py example.
## Usage
- You'll need root privileges to run these examples. If you are using a virtualenv, use the following command to run the scripts:
```bash
sudo <path_to_virtualenv>/bin/python3 <script_name>.py
```
- For vfsreadlat_plotly.py, run the following command to start the Dash server:
```bash
sudo <path_to_virtualenv>/bin/python3 vfsreadlat_plotly/bpf_program.py
```
Then open your web browser and navigate to the given URL.

View File

@ -2,27 +2,15 @@ import ast
import logging import logging
from llvmlite import ir from llvmlite import ir
from dataclasses import dataclass from .local_symbol import LocalSymbol
from typing import Any
from pythonbpf.helper import HelperHandlerRegistry from pythonbpf.helper import HelperHandlerRegistry
from pythonbpf.vmlinux_parser.dependency_node import Field
from .expr import VmlinuxHandlerRegistry from .expr import VmlinuxHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata
def create_targets_and_rvals(stmt): def create_targets_and_rvals(stmt):
"""Create lists of targets and right-hand values from an assignment statement.""" """Create lists of targets and right-hand values from an assignment statement."""
if isinstance(stmt.targets[0], ast.Tuple): if isinstance(stmt.targets[0], ast.Tuple):
@ -60,21 +48,11 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
continue continue
var_name = target.id var_name = target.id
# Skip if already allocated # Skip if already allocated
if var_name in local_sym_tab: if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping") logger.debug(f"Variable {var_name} already allocated, skipping")
continue continue
# When allocating a variable, check if it's a vmlinux struct type
if isinstance(
stmt.value, ast.Name
) and VmlinuxHandlerRegistry.is_vmlinux_struct(stmt.value.id):
# Handle vmlinux struct allocation
# This requires more implementation
print(stmt.value)
pass
# Determine type and allocate based on rval # Determine type and allocate based on rval
if isinstance(rval, ast.Call): if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
@ -248,8 +226,45 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
logger.error(f"Struct variable '{struct_var}' not found") logger.error(f"Struct variable '{struct_var}' not found")
return return
struct_type = local_sym_tab[struct_var].metadata struct_type: type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab: if not struct_type or struct_type not in structs_sym_tab:
if VmlinuxHandlerRegistry.is_vmlinux_struct(struct_type.__name__):
# Handle vmlinux struct field access
vmlinux_struct_name = struct_type.__name__
if not VmlinuxHandlerRegistry.has_field(vmlinux_struct_name, field_name):
logger.error(
f"Field '{field_name}' not found in vmlinux struct '{vmlinux_struct_name}'"
)
return
field_type: tuple[ir.GlobalVariable, Field] = (
VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name)
)
field_ir, field = field_type
# TODO: For now, we only support integer type allocations.
# loaded_value = builder.load(field_ir, align=8)
# #TODO: fatal flaw that this always assumes first argument of function to be the context of what this gets.
# base_ptr = builder.function.args[0]
# gep_result = builder.gep(
# base_ptr,
# [loaded_value],
# inbounds=False, # Not using inbounds GEP
# )
# print("DEBB", loaded_value, base_ptr, gep_result)
# Use i64 for allocation since that's what the global variable contains
actual_ir_type = ir.IntType(64)
# Allocate with the actual IR type, not the GlobalVariable
var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field)
logger.info(
f"Pre-allocated {var_name} from vmlinux struct {vmlinux_struct_name}.{field_name}"
)
return
else:
logger.error(f"Struct type '{struct_type}' not found") logger.error(f"Struct type '{struct_type}' not found")
return return

View File

@ -37,6 +37,24 @@ def finalize_module(original_str):
return re.sub(pattern, replacement, original_str) return re.sub(pattern, replacement, original_str)
def bpf_passthrough_gen(module):
i32_ty = ir.IntType(32)
ptr_ty = ir.PointerType(ir.IntType(8))
fnty = ir.FunctionType(ptr_ty, [i32_ty, ptr_ty])
# Declare the intrinsic
passthrough = ir.Function(module, fnty, "llvm.bpf.passthrough.p0.p0")
# Set function attributes
# TODO: the ones commented are supposed to be there but cannot be added due to llvmlite limitations at the moment
# passthrough.attributes.add("nofree")
# passthrough.attributes.add("nosync")
passthrough.attributes.add("nounwind")
# passthrough.attributes.add("memory(none)")
return passthrough
def find_bpf_chunks(tree): def find_bpf_chunks(tree):
"""Find all functions decorated with @bpf in the AST.""" """Find all functions decorated with @bpf in the AST."""
bpf_functions = [] bpf_functions = []
@ -57,6 +75,8 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks: for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}") logger.info(f"Found BPF function/struct: {func_node.name}")
bpf_passthrough_gen(module)
vmlinux_symtab = vmlinux_proc(tree, module) vmlinux_symtab = vmlinux_proc(tree, module)
if vmlinux_symtab: if vmlinux_symtab:
handler = VmlinuxHandler.initialize(vmlinux_symtab) handler = VmlinuxHandler.initialize(vmlinux_symtab)

View File

@ -72,20 +72,28 @@ def _handle_attribute_expr(
if var_name in local_sym_tab: if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name] var_ptr, var_type, var_metadata = local_sym_tab[var_name]
logger.info(f"Loading attribute {attr_name} from variable {var_name}") logger.info(f"Loading attribute {attr_name} from variable {var_name}")
logger.info(f"Variable type: {var_type}, Variable ptr: {var_ptr}") logger.info(
f"Variable type: {var_type}, Variable ptr: {var_ptr}, Variable Metadata: {var_metadata}"
)
if (
hasattr(var_metadata, "__module__")
and var_metadata.__module__ == "vmlinux"
):
# Try vmlinux handler when var_metadata is not a string, but has a module attribute.
# This has been done to keep everything separate in vmlinux struct handling.
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
else:
raise RuntimeError("Vmlinux struct did not process successfully")
metadata = structs_sym_tab[var_metadata] metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields: if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name) gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep) val = builder.load(gep)
field_type = metadata.field_type(attr_name) field_type = metadata.field_type(attr_name)
return val, field_type return val, field_type
# Try vmlinux handler as fallback
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
return None return None

View File

@ -1,5 +1,7 @@
import ast import ast
from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler
class VmlinuxHandlerRegistry: class VmlinuxHandlerRegistry:
"""Registry for vmlinux handler operations""" """Registry for vmlinux handler operations"""
@ -7,7 +9,7 @@ class VmlinuxHandlerRegistry:
_handler = None _handler = None
@classmethod @classmethod
def set_handler(cls, handler): def set_handler(cls, handler: VmlinuxHandler):
"""Set the vmlinux handler""" """Set the vmlinux handler"""
cls._handler = handler cls._handler = handler
@ -43,3 +45,25 @@ class VmlinuxHandlerRegistry:
if cls._handler is None: if cls._handler is None:
return False return False
return cls._handler.is_vmlinux_struct(name) return cls._handler.is_vmlinux_struct(name)
@classmethod
def get_struct_type(cls, name):
"""Try to handle a struct name as vmlinux struct"""
if cls._handler is None:
return None
return cls._handler.get_vmlinux_struct_type(name)
@classmethod
def has_field(cls, vmlinux_struct_name, field_name):
"""Check if a vmlinux struct has a specific field"""
if cls._handler is None:
return False
return cls._handler.has_field(vmlinux_struct_name, field_name)
@classmethod
def get_field_type(cls, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if cls._handler is None:
return None
assert isinstance(cls._handler, VmlinuxHandler)
return cls._handler.get_field_type(vmlinux_struct_name, field_name)

View File

@ -7,7 +7,12 @@ from pythonbpf.helper import (
reset_scratch_pool, reset_scratch_pool,
) )
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool from pythonbpf.expr import (
eval_expr,
handle_expr,
convert_to_bool,
VmlinuxHandlerRegistry,
)
from pythonbpf.assign_pass import ( from pythonbpf.assign_pass import (
handle_variable_assignment, handle_variable_assignment,
handle_struct_field_assignment, handle_struct_field_assignment,
@ -16,6 +21,7 @@ from pythonbpf.allocation_pass import (
handle_assign_allocation, handle_assign_allocation,
allocate_temp_pool, allocate_temp_pool,
create_targets_and_rvals, create_targets_and_rvals,
LocalSymbol,
) )
from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name
@ -324,6 +330,28 @@ def process_func_body(
local_sym_tab = {} local_sym_tab = {}
# Add the context parameter (first function argument) to the local symbol table
if func_node.args.args and len(func_node.args.args) > 0:
context_arg = func_node.args.args[0]
context_name = context_arg.arg
if hasattr(context_arg, "annotation") and context_arg.annotation:
if isinstance(context_arg.annotation, ast.Name):
context_type_name = context_arg.annotation.id
elif isinstance(context_arg.annotation, ast.Attribute):
context_type_name = context_arg.annotation.attr
else:
raise TypeError(
f"Unsupported annotation type: {ast.dump(context_arg.annotation)}"
)
if VmlinuxHandlerRegistry.is_vmlinux_struct(context_type_name):
resolved_type = VmlinuxHandlerRegistry.get_struct_type(
context_type_name
)
context_type = LocalSymbol(None, None, resolved_type)
local_sym_tab[context_name] = context_type
logger.info(f"Added argument '{context_name}' to local symbol table")
# pre-allocate dynamic variables # pre-allocate dynamic variables
local_sym_tab = allocate_mem( local_sym_tab = allocate_mem(
module, module,

15
pythonbpf/local_symbol.py Normal file
View File

@ -0,0 +1,15 @@
import llvmlite.ir as ir
from dataclasses import dataclass
from typing import Any
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata

View File

@ -86,19 +86,19 @@ def vmlinux_proc(tree: ast.AST, module):
if not import_statements: if not import_statements:
logger.info("No vmlinux imports found") logger.info("No vmlinux imports found")
return return None
# Import vmlinux module directly # Import vmlinux module directly
try: try:
vmlinux_mod = importlib.import_module("vmlinux") vmlinux_mod = importlib.import_module("vmlinux")
except ImportError: except ImportError:
logger.warning("Could not import vmlinux module") logger.warning("Could not import vmlinux module")
return return None
source_file = inspect.getsourcefile(vmlinux_mod) source_file = inspect.getsourcefile(vmlinux_mod)
if source_file is None: if source_file is None:
logger.warning("Cannot find source for vmlinux module") logger.warning("Cannot find source for vmlinux module")
return return None
with open(source_file, "r") as f: with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file) mod_ast = ast.parse(f.read(), filename=source_file)

View File

@ -1,6 +1,7 @@
import logging import logging
from llvmlite import ir from llvmlite import ir
from pythonbpf.local_symbol import LocalSymbol
from pythonbpf.vmlinux_parser.assignment_info import AssignmentType from pythonbpf.vmlinux_parser.assignment_info import AssignmentType
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -39,6 +40,16 @@ class VmlinuxHandler:
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT
) )
def get_vmlinux_struct_type(self, name):
"""Check if name is a vmlinux struct type"""
if (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT
):
return self.vmlinux_symtab[name]["python_type"]
else:
raise ValueError(f"{name} is not a vmlinux struct type")
def is_vmlinux_struct(self, name): def is_vmlinux_struct(self, name):
"""Check if name is a vmlinux struct""" """Check if name is a vmlinux struct"""
return ( return (
@ -77,14 +88,37 @@ class VmlinuxHandler:
self, struct_var_name, field_name, module, builder, local_sym_tab self, struct_var_name, field_name, module, builder, local_sym_tab
): ):
"""Handle access to vmlinux struct fields""" """Handle access to vmlinux struct fields"""
# Check if it's a variable of vmlinux struct type
if struct_var_name in local_sym_tab: if struct_var_name in local_sym_tab:
var_info = local_sym_tab[struct_var_name] # noqa: F841 var_info: LocalSymbol = local_sym_tab[struct_var_name]
# Need to check if this variable is a vmlinux struct
# This will depend on how you track vmlinux struct types in your symbol table
logger.info( logger.info(
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
) )
python_type: type = var_info.metadata
globvar_ir, field_data = self.get_field_type(
python_type.__name__, field_name
)
# Return pointer to field and field type # Return pointer to field and field type
return None return None
return None else:
raise RuntimeError("Variable accessed not found in symbol table")
def has_field(self, struct_name, field_name):
"""Check if a vmlinux struct has a specific field"""
if self.is_vmlinux_struct(struct_name):
python_type = self.vmlinux_symtab[struct_name]["python_type"]
return hasattr(python_type, field_name)
return False
def get_field_type(self, vmlinux_struct_name, field_name):
"""Get the type of a field in a vmlinux struct"""
if self.is_vmlinux_struct(vmlinux_struct_name):
python_type = self.vmlinux_symtab[vmlinux_struct_name]["python_type"]
if hasattr(python_type, field_name):
return self.vmlinux_symtab[vmlinux_struct_name]["members"][field_name]
else:
raise ValueError(
f"Field {field_name} not found in vmlinux struct {vmlinux_struct_name}"
)
else:
raise ValueError(f"{vmlinux_struct_name} is not a vmlinux struct")

View File

@ -1,5 +1,5 @@
BPF_CLANG := clang BPF_CLANG := clang
CFLAGS := -O2 -emit-llvm -target bpf -c CFLAGS := -O0 -emit-llvm -target bpf -c
SRC := $(wildcard *.bpf.c) SRC := $(wildcard *.bpf.c)
LL := $(SRC:.bpf.c=.bpf.ll) LL := $(SRC:.bpf.c=.bpf.ll)

View File

@ -1,25 +0,0 @@
#define __TARGET_ARCH_arm64
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// Map: key = struct request*, value = u64 timestamp
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct request *);
__type(value, u64);
__uint(max_entries, 1024);
} start SEC(".maps");
// Attach to kprobe for blk_start_request
SEC("kprobe/blk_start_request")
int BPF_KPROBE(trace_start, struct request *req)
{
u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start, &req, &ts, BPF_ANY);
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -0,0 +1,28 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/*
Information gained from reversing this (multiple kernel versions):
There is no point of
```llvm
tail call void @llvm.dbg.value(metadata ptr %0, metadata !60, metadata !DIExpression()), !dbg !70
```
and the first argument of passthrough is fucking useless. It just needs to be a distinct integer:
```llvm
%9 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %8)
```
*/
SEC("tp/syscalls/sys_enter_execve")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
// Access each argument separately with clear variable assignments
long int arg0 = ctx->id;
bpf_printk("args[0]: %d", arg0);
return 0;
}
char LICENSE[] SEC("license") = "GPL";

121617
tests/c-form/vmlinux.h vendored

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
// xdp_rewrite.c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
SEC("xdp")
int xdp_rewrite_mac(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
if ((void*)(eth + 1) > data_end)
return XDP_PASS;
__u8 new_src[ETH_ALEN] = {0x02,0x00,0x00,0x00,0x00,0x02};
for (int i = 0; i < ETH_ALEN; i++) eth->h_source[i] = new_src[i];
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,31 @@
import logging
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
from pythonbpf import compile # noqa: F401
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
from ctypes import c_int64, c_void_p # noqa: F401
# from vmlinux import struct_uinput_device
# from vmlinux import struct_blk_integrity_iter
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
a = 2 + TASK_COMM_LEN + TASK_COMM_LEN
b = ctx.id
print(f"Hello, World{TASK_COMM_LEN} and {a}")
print(f"This is context field {b}")
return c_int64(TASK_COMM_LEN + 2)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("struct_field_access.py", "struct_field_access.ll", loglevel=logging.INFO)
# compile()

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, struct from pythonbpf import bpf, map, section, bpfglobal, compile, struct, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32, c_uint64 from ctypes import c_void_p, c_int64, c_int32, c_uint64
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from pythonbpf.helper import ktime from pythonbpf.helper import ktime
@ -71,4 +71,5 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile_to_ir("comprehensive.py", "comprehensive.ll")
compile() compile()