13 Commits

11 changed files with 371 additions and 32 deletions

View File

@ -118,6 +118,18 @@ def _allocate_for_call(
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
logger.info(f"Pre-allocated {var_name} for struct {call_type}") logger.info(f"Pre-allocated {var_name} for struct {call_type}")
elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type):
# When calling struct_name(pointer), we're doing a cast, not construction
# So we allocate as a pointer (i64) not as the actual struct
var = builder.alloca(ir.IntType(64), name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(
var, ir.IntType(64), VmlinuxHandlerRegistry.get_struct_type(call_type)
)
logger.info(
f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}"
)
else: else:
logger.warning(f"Unknown call type for allocation: {call_type}") logger.warning(f"Unknown call type for allocation: {call_type}")
@ -325,13 +337,6 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name) VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name)
) )
field_ir, field = field_type field_ir, field = field_type
# TODO: For now, we only support integer type allocations.
# This always assumes first argument of function to be the context struct
base_ptr = builder.function.args[0]
local_sym_tab[
struct_var
].var = base_ptr # This is repurposing of var to store the pointer of the base type
local_sym_tab[struct_var].ir_type = field_ir
# Determine the actual IR type based on the field's type # Determine the actual IR type based on the field's type
actual_ir_type = None actual_ir_type = None
@ -386,12 +391,14 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
) )
actual_ir_type = ir.IntType(64) actual_ir_type = ir.IntType(64)
# Allocate with the actual IR type, not the GlobalVariable # Allocate with the actual IR type
var = _allocate_with_type(builder, var_name, actual_ir_type) var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field) local_sym_tab[var_name] = LocalSymbol(
var, actual_ir_type, field
) # <-- Store Field metadata
logger.info( logger.info(
f"Pre-allocated {var_name} from vmlinux struct {vmlinux_struct_name}.{field_name}" f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}"
) )
return return
else: else:

View File

@ -1,5 +1,7 @@
import ast import ast
import logging import logging
from inspect import isclass
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call from pythonbpf.helper import emit_probe_read_kernel_str_call
@ -148,8 +150,30 @@ def handle_variable_assignment(
return False return False
val, val_type = val_result val, val_type = val_result
logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}") logger.info(
f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}"
)
if val_type != var_type: if val_type != var_type:
# Handle vmlinux struct pointers - they're represented as Python classes but are i64 pointers
if isclass(val_type) and (val_type.__module__ == "vmlinux"):
logger.info("Handling vmlinux struct pointer assignment")
# vmlinux struct pointers: val is a pointer, need to convert to i64
if isinstance(var_type, ir.IntType) and var_type.width == 64:
# Convert pointer to i64 using ptrtoint
if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64))
logger.info(
"Converted vmlinux struct pointer to i64 using ptrtoint"
)
builder.store(val, var_ptr)
logger.info(f"Assigned vmlinux struct pointer to {var_name} (i64)")
return True
else:
logger.error(
f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}"
)
return False
if isinstance(val_type, Field): if isinstance(val_type, Field):
logger.info("Handling assignment to struct field") logger.info("Handling assignment to struct field")
# Special handling for struct_xdp_md i32 fields that are zero-extended to i64 # Special handling for struct_xdp_md i32 fields that are zero-extended to i64

View File

@ -12,8 +12,8 @@ from .type_normalization import (
get_base_type_and_depth, get_base_type_and_depth,
deref_to_depth, deref_to_depth,
) )
from pythonbpf.vmlinux_parser.assignment_info import Field
from .vmlinux_registry import VmlinuxHandlerRegistry from .vmlinux_registry import VmlinuxHandlerRegistry
from ..vmlinux_parser.dependency_node import Field
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -89,8 +89,16 @@ def _handle_attribute_expr(
return vmlinux_result return vmlinux_result
else: else:
raise RuntimeError("Vmlinux struct did not process successfully") raise RuntimeError("Vmlinux struct did not process successfully")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields: elif isinstance(var_metadata, Field):
logger.error(
f"Cannot access field '{attr_name}' on already-loaded field value '{var_name}'"
)
return None
# Regular user-defined struct
metadata = structs_sym_tab.get(var_metadata)
if metadata and attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name) gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep) val = builder.load(gep)
field_type = metadata.field_type(attr_name) field_type = metadata.field_type(attr_name)
@ -525,6 +533,66 @@ def _handle_boolean_op(
return None return None
# ============================================================================
# VMLinux casting
# ============================================================================
def _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
# handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux
# struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64
# which needs to be cast to a pointer. This is also a field of another vmlinux struct
"""Handle vmlinux struct cast expressions like struct_request(ctx.di)."""
if len(expr.args) != 1:
logger.info("vmlinux struct cast takes exactly one argument")
return None
# Get the struct name
struct_name = expr.func.id
# Evaluate the argument (e.g., ctx.di which is a c_uint64)
arg_result = eval_expr(
func,
module,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
logger.info("Failed to evaluate argument to vmlinux struct cast")
return None
arg_val, arg_type = arg_result
# Get the vmlinux struct type
vmlinux_struct_type = VmlinuxHandlerRegistry.get_struct_type(struct_name)
if vmlinux_struct_type is None:
logger.error(f"Failed to get vmlinux struct type for {struct_name}")
return None
# Cast the integer/value to a pointer to the struct
# If arg_val is an integer type, we need to inttoptr it
ptr_type = ir.PointerType()
# TODO: add a integer check here later
if ctypes_to_ir(arg_type.type.__name__):
# Cast integer to pointer
casted_ptr = builder.inttoptr(arg_val, ptr_type)
else:
logger.error(f"Unsupported type for vmlinux cast: {arg_type}")
return None
return casted_ptr, vmlinux_struct_type
# ============================================================================ # ============================================================================
# Expression Dispatcher # Expression Dispatcher
# ============================================================================ # ============================================================================
@ -545,6 +613,18 @@ def eval_expr(
elif isinstance(expr, ast.Constant): elif isinstance(expr, ast.Constant):
return _handle_constant_expr(module, builder, expr) return _handle_constant_expr(module, builder, expr)
elif isinstance(expr, ast.Call): elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(
expr.func.id
):
return _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and expr.func.id == "deref": if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder) return _handle_deref_call(expr, local_sym_tab, builder)

View File

@ -1,6 +1,10 @@
from .helper_registry import HelperHandlerRegistry from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call from .bpf_helper_handler import (
handle_helper_call,
emit_probe_read_kernel_str_call,
emit_probe_read_kernel_call,
)
from .helpers import ( from .helpers import (
ktime, ktime,
pid, pid,
@ -74,6 +78,7 @@ __all__ = [
"reset_scratch_pool", "reset_scratch_pool",
"handle_helper_call", "handle_helper_call",
"emit_probe_read_kernel_str_call", "emit_probe_read_kernel_str_call",
"emit_probe_read_kernel_call",
"ktime", "ktime",
"pid", "pid",
"deref", "deref",

View File

@ -34,6 +34,7 @@ class BPFHelperID(Enum):
BPF_PERF_EVENT_OUTPUT = 25 BPF_PERF_EVENT_OUTPUT = 25
BPF_GET_STACK = 67 BPF_GET_STACK = 67
BPF_PROBE_READ_KERNEL_STR = 115 BPF_PROBE_READ_KERNEL_STR = 115
BPF_PROBE_READ_KERNEL = 113
BPF_RINGBUF_OUTPUT = 130 BPF_RINGBUF_OUTPUT = 130
BPF_RINGBUF_RESERVE = 131 BPF_RINGBUF_RESERVE = 131
BPF_RINGBUF_SUBMIT = 132 BPF_RINGBUF_SUBMIT = 132
@ -574,6 +575,75 @@ def bpf_probe_read_kernel_str_emitter(
return result, ir.IntType(64) return result, ir.IntType(64)
def emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result
@HelperHandlerRegistry.register(
"probe_read_kernel",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_kernel_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_kernel expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register( @HelperHandlerRegistry.register(
"random", "random",
param_types=[], param_types=[],

View File

@ -17,7 +17,6 @@ mapping = {
"c_ulong": ir.IntType(64), "c_ulong": ir.IntType(64),
"c_longlong": ir.IntType(64), "c_longlong": ir.IntType(64),
"c_uint": ir.IntType(32), "c_uint": ir.IntType(32),
"c_int": ir.IntType(32),
# Not so sure about this one # Not so sure about this one
"str": ir.PointerType(ir.IntType(8)), "str": ir.PointerType(ir.IntType(8)),
} }

View File

@ -42,7 +42,10 @@ def debug_info_generation(
# Process all fields and create members for the struct # Process all fields and create members for the struct
members = [] members = []
for field_name, field in struct.fields.items():
sorted_fields = sorted(struct.fields.items(), key=lambda item: item[1].offset)
for field_name, field in sorted_fields:
try: try:
# Get appropriate debug type for this field # Get appropriate debug type for this field
field_type = _get_field_debug_type( field_type = _get_field_debug_type(
@ -97,7 +100,9 @@ def _get_field_debug_type(
# Handle function pointer types (CFUNCTYPE) # Handle function pointer types (CFUNCTYPE)
if callable(field.ctype_complex_type): if callable(field.ctype_complex_type):
# Function pointers are represented as void pointers # Function pointers are represented as void pointers
logger.info(f"Field {field_name} is a function pointer, using void pointer") logger.warning(
f"Field {field_name} is a function pointer, using void pointer"
)
void_ptr = generator.create_pointer_type(None, 64) void_ptr = generator.create_pointer_type(None, 64)
return void_ptr, 64 return void_ptr, 64
elif issubclass(field.ctype_complex_type, ctypes.Array): elif issubclass(field.ctype_complex_type, ctypes.Array):
@ -126,7 +131,7 @@ def _get_field_debug_type(
# If not found, create a forward declaration # If not found, create a forward declaration
# This will be completed when the actual struct is processed # This will be completed when the actual struct is processed
logger.warning( logger.info(
f"Forward declaration created for {struct_name} in {parent_struct.name}" f"Forward declaration created for {struct_name} in {parent_struct.name}"
) )
forward_type = generator.create_struct_type([], 0, is_distinct=True) forward_type = generator.create_struct_type([], 0, is_distinct=True)

View File

@ -94,17 +94,140 @@ class VmlinuxHandler:
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
) )
python_type: type = var_info.metadata python_type: type = var_info.metadata
struct_name = python_type.__name__ # Check if this is a context field (ctx) or a cast struct
globvar_ir, field_data = self.get_field_type(struct_name, field_name) is_context_field = var_info.var is None
builder.function.args[0].type = ir.PointerType(ir.IntType(8))
field_ptr = self.load_ctx_field( if is_context_field:
builder, builder.function.args[0], globvar_ir, field_data, struct_name # Handle context field access (original behavior)
) struct_name = python_type.__name__
# Return pointer to field and field type globvar_ir, field_data = self.get_field_type(struct_name, field_name)
return field_ptr, field_data builder.function.args[0].type = ir.PointerType(ir.IntType(8))
field_ptr = self.load_ctx_field(
builder,
builder.function.args[0],
globvar_ir,
field_data,
struct_name,
)
return field_ptr, field_data
else:
# Handle cast struct field access
struct_name = python_type.__name__
globvar_ir, field_data = self.get_field_type(struct_name, field_name)
# Handle cast struct field access (use bpf_probe_read_kernel)
# Load the struct pointer from the local variable
struct_ptr = builder.load(var_info.var)
# Use bpf_probe_read_kernel for non-context struct field access
field_value = self.load_struct_field(
builder, struct_ptr, globvar_ir, field_data, struct_name
)
# Return field value and field type
return field_value, field_data
else: else:
raise RuntimeError("Variable accessed not found in symbol table") raise RuntimeError("Variable accessed not found in symbol table")
@staticmethod
def load_struct_field(
builder, struct_ptr_int, offset_global, field_data, struct_name=None
):
"""
Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel.
Args:
builder: llvmlite IRBuilder instance
struct_ptr_int: The struct pointer as an i64 value (already loaded from alloca)
offset_global: Global variable containing the field offset (i64)
field_data: contains data about the field
struct_name: Name of the struct being accessed (optional)
Returns:
The loaded value
"""
# Load the offset value
offset = builder.load(offset_global)
# Convert i64 to pointer type (BPF stores pointers as i64)
i8_ptr_type = ir.PointerType(ir.IntType(8))
struct_ptr = builder.inttoptr(struct_ptr_int, i8_ptr_type)
# GEP with offset to get field pointer
field_ptr = builder.gep(
struct_ptr,
[offset],
inbounds=False,
)
# Determine the appropriate field size based on field information
field_size_bytes = 8 # Default to 8 bytes (64-bit)
int_width = 64 # Default to 64-bit
needs_zext = False
if field_data is not None:
# Try to determine the size from field metadata
if field_data.type.__module__ == ctypes.__name__:
try:
field_size_bytes = ctypes.sizeof(field_data.type)
field_size_bits = field_size_bytes * 8
if field_size_bits in [8, 16, 32, 64]:
int_width = field_size_bits
logger.info(
f"Determined field size: {int_width} bits ({field_size_bytes} bytes)"
)
# Special handling for struct_xdp_md i32 fields
if struct_name == "struct_xdp_md" and int_width == 32:
needs_zext = True
logger.info(
"struct_xdp_md i32 field detected, will zero-extend to i64"
)
else:
logger.warning(
f"Unusual field size {field_size_bits} bits, using default 64"
)
except Exception as e:
logger.warning(
f"Could not determine field size: {e}, using default 64"
)
elif field_data.type.__module__ == "vmlinux":
# For pointers to structs or complex vmlinux types
if field_data.ctype_complex_type is not None and issubclass(
field_data.ctype_complex_type, ctypes._Pointer
):
int_width = 64 # Pointers are always 64-bit
field_size_bytes = 8
logger.info("Field is a pointer type, using 64 bits")
else:
logger.warning("Complex vmlinux field type, using default 64 bits")
# Allocate local storage for the field value
local_storage = builder.alloca(ir.IntType(int_width))
local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type)
# Use bpf_probe_read_kernel to safely read the field
# This generates:
# %gep = getelementptr i8, ptr %struct_ptr, i64 %offset (already done above as field_ptr)
# %passed = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %gep)
# %result = call i64 inttoptr (i64 113 to ptr)(ptr %local_storage, i32 %size, ptr %passed)
from pythonbpf.helper import emit_probe_read_kernel_call
emit_probe_read_kernel_call(
builder, local_storage_i8_ptr, field_size_bytes, field_ptr
)
# Load the value from local storage
value = builder.load(local_storage)
# Zero-extend i32 to i64 if needed
if needs_zext:
value = builder.zext(value, ir.IntType(64))
logger.info("Zero-extended i32 value to i64")
return value
@staticmethod @staticmethod
def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None): def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None):
""" """

View File

@ -1,15 +1,18 @@
#include "vmlinux.h" #include "vmlinux.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL"; char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request") SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx) int example(struct pt_regs *ctx)
{ {
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di); struct request *req = (struct request *)(ctx->di);
u32 data_len = req->__data_len; unsigned int something_ns = BPF_CORE_READ(req, timeout);
bpf_printk("data length %u\n", data_len); unsigned int data_len = BPF_CORE_READ(req, __data_len);
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0; return 0;
} }

View File

@ -0,0 +1,18 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = req->timeout;
unsigned int data_len = req->__data_len;
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -1,5 +1,5 @@
from vmlinux import struct_request, struct_pt_regs from vmlinux import struct_request, struct_pt_regs
from pythonbpf import bpf, section, bpfglobal, compile_to_ir from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile
import logging import logging
from ctypes import c_int64 from ctypes import c_int64
@ -7,9 +7,13 @@ from ctypes import c_int64
@bpf @bpf
@section("kprobe/blk_mq_start_request") @section("kprobe/blk_mq_start_request")
def example(ctx: struct_pt_regs) -> c_int64: def example(ctx: struct_pt_regs) -> c_int64:
a = ctx.r15
req = struct_request(ctx.di) req = struct_request(ctx.di)
c = req.__data_len d = req.__data_len
print(f"data length {c}") b = ctx.r12
c = req.timeout
print(f"data length {d} and {c} and {a}")
print(f"ctx arg {b}")
return c_int64(0) return c_int64(0)
@ -20,3 +24,4 @@ def LICENSE() -> str:
compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO) compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO)
compile()