2 Commits

25 changed files with 184 additions and 1033 deletions

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "pythonbpf" name = "pythonbpf"
version = "0.1.7" version = "0.1.6"
description = "Reduced Python frontend for eBPF" description = "Reduced Python frontend for eBPF"
authors = [ authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
@ -29,7 +29,7 @@ license = {text = "Apache-2.0"}
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"llvmlite>=0.45", "llvmlite",
"astpretty", "astpretty",
"pylibbpf" "pylibbpf"
] ]

View File

@ -7,7 +7,6 @@ from pythonbpf.helper import HelperHandlerRegistry
from pythonbpf.vmlinux_parser.dependency_node import Field from pythonbpf.vmlinux_parser.dependency_node import Field
from .expr import VmlinuxHandlerRegistry from .expr import VmlinuxHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.maps import BPFMapType
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,9 +25,7 @@ def create_targets_and_rvals(stmt):
return stmt.targets, [stmt.value] return stmt.targets, [stmt.value]
def handle_assign_allocation( def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle memory allocation for assignment statements.""" """Handle memory allocation for assignment statements."""
logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}") logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}")
@ -58,9 +55,7 @@ def handle_assign_allocation(
# Determine type and allocate based on rval # Determine type and allocate based on rval
if isinstance(rval, ast.Call): if isinstance(rval, ast.Call):
_allocate_for_call( _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(rval, ast.Constant): elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab) _allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp): elif isinstance(rval, ast.BinOp):
@ -79,9 +74,7 @@ def handle_assign_allocation(
) )
def _allocate_for_call( def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab):
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Allocate memory for variable assigned from a call.""" """Allocate memory for variable assigned from a call."""
if isinstance(rval.func, ast.Name): if isinstance(rval.func, ast.Name):
@ -118,91 +111,20 @@ def _allocate_for_call(
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
logger.info(f"Pre-allocated {var_name} for struct {call_type}") logger.info(f"Pre-allocated {var_name} for struct {call_type}")
elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type):
# When calling struct_name(pointer), we're doing a cast, not construction
# So we allocate as a pointer (i64) not as the actual struct
var = builder.alloca(ir.IntType(64), name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(
var, ir.IntType(64), VmlinuxHandlerRegistry.get_struct_type(call_type)
)
logger.info(
f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}"
)
else: else:
logger.warning(f"Unknown call type for allocation: {call_type}") logger.warning(f"Unknown call type for allocation: {call_type}")
elif isinstance(rval.func, ast.Attribute): elif isinstance(rval.func, ast.Attribute):
# Map method calls - need double allocation for ptr handling # Map method calls - need double allocation for ptr handling
_allocate_for_map_method( _allocate_for_map_method(builder, var_name, local_sym_tab)
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
else: else:
logger.warning(f"Unsupported call function type for {var_name}") logger.warning(f"Unsupported call function type for {var_name}")
def _allocate_for_map_method( def _allocate_for_map_method(builder, var_name, local_sym_tab):
builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Allocate memory for variable assigned from map method (double alloc).""" """Allocate memory for variable assigned from map method (double alloc)."""
map_name = rval.func.value.id
method_name = rval.func.attr
# NOTE: We will have to special case HashMap.lookup which returns a pointer to value type
# The value type can be a struct as well, so we need to handle that properly
# This special casing is not ideal, as over time other map methods may need similar handling
# But for now, we will just handle lookup specifically
if map_name not in map_sym_tab:
logger.error(f"Map '{map_name}' not found for allocation")
return
if method_name != "lookup":
# Fallback allocation for other map methods
_allocate_for_map_method_fallback(builder, var_name, local_sym_tab)
return
map_params = map_sym_tab[map_name].params
if map_params["type"] != BPFMapType.HASH:
logger.warning(
"Map method lookup used on non-hash map, using fallback allocation"
)
_allocate_for_map_method_fallback(builder, var_name, local_sym_tab)
return
value_type = map_params["value"]
# Determine IR type for value
if isinstance(value_type, str) and value_type in structs_sym_tab:
struct_info = structs_sym_tab[value_type]
value_ir_type = struct_info.ir_type
else:
value_ir_type = ctypes_to_ir(value_type)
if value_ir_type is None:
logger.warning(
f"Could not determine IR type for map value '{value_type}', using fallback allocation"
)
_allocate_for_map_method_fallback(builder, var_name, local_sym_tab)
return
# Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, ir_type, value_type)
# Temporary variable for computed values
tmp_ir_type = value_ir_type
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type)
logger.info(
f"Pre-allocated {var_name} and {var_name}_tmp for map method lookup of type {value_ir_type}"
)
def _allocate_for_map_method_fallback(builder, var_name, local_sym_tab):
"""Fallback allocation for map method variable (i64* and i64**)."""
# Main variable (pointer to pointer) # Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64)) ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
@ -213,9 +135,7 @@ def _allocate_for_map_method_fallback(builder, var_name, local_sym_tab):
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type) local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type)
logger.info( logger.info(f"Pre-allocated {var_name} and {var_name}_tmp for map method")
f"Pre-allocated {var_name} and {var_name}_tmp for map method (fallback)"
)
def _allocate_for_constant(builder, var_name, rval, local_sym_tab): def _allocate_for_constant(builder, var_name, rval, local_sym_tab):
@ -337,6 +257,13 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name) VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name)
) )
field_ir, field = field_type field_ir, field = field_type
# TODO: For now, we only support integer type allocations.
# This always assumes first argument of function to be the context struct
base_ptr = builder.function.args[0]
local_sym_tab[
struct_var
].var = base_ptr # This is repurposing of var to store the pointer of the base type
local_sym_tab[struct_var].ir_type = field_ir
# Determine the actual IR type based on the field's type # Determine the actual IR type based on the field's type
actual_ir_type = None actual_ir_type = None
@ -391,14 +318,12 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
) )
actual_ir_type = ir.IntType(64) actual_ir_type = ir.IntType(64)
# Allocate with the actual IR type # Allocate with the actual IR type, not the GlobalVariable
var = _allocate_with_type(builder, var_name, actual_ir_type) var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol( local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field)
var, actual_ir_type, field
) # <-- Store Field metadata
logger.info( logger.info(
f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}" f"Pre-allocated {var_name} from vmlinux struct {vmlinux_struct_name}.{field_name}"
) )
return return
else: else:

View File

@ -1,7 +1,5 @@
import ast import ast
import logging import logging
from inspect import isclass
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call from pythonbpf.helper import emit_probe_read_kernel_str_call
@ -150,30 +148,8 @@ def handle_variable_assignment(
return False return False
val, val_type = val_result val, val_type = val_result
logger.info( logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}")
f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}"
)
if val_type != var_type: if val_type != var_type:
# Handle vmlinux struct pointers - they're represented as Python classes but are i64 pointers
if isclass(val_type) and (val_type.__module__ == "vmlinux"):
logger.info("Handling vmlinux struct pointer assignment")
# vmlinux struct pointers: val is a pointer, need to convert to i64
if isinstance(var_type, ir.IntType) and var_type.width == 64:
# Convert pointer to i64 using ptrtoint
if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64))
logger.info(
"Converted vmlinux struct pointer to i64 using ptrtoint"
)
builder.store(val, var_ptr)
logger.info(f"Assigned vmlinux struct pointer to {var_name} (i64)")
return True
else:
logger.error(
f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}"
)
return False
if isinstance(val_type, Field): if isinstance(val_type, Field):
logger.info("Handling assignment to struct field") logger.info("Handling assignment to struct field")
# Special handling for struct_xdp_md i32 fields that are zero-extended to i64 # Special handling for struct_xdp_md i32 fields that are zero-extended to i64

View File

@ -25,7 +25,7 @@ import re
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.7" VERSION = "v0.1.6"
def finalize_module(original_str): def finalize_module(original_str):

View File

@ -49,10 +49,6 @@ class DebugInfoGenerator:
) )
return self._type_cache[key] return self._type_cache[key]
def get_uint8_type(self) -> Any:
"""Get debug info for signed 8-bit integer"""
return self.get_basic_type("char", 8, dc.DW_ATE_unsigned)
def get_int32_type(self) -> Any: def get_int32_type(self) -> Any:
"""Get debug info for signed 32-bit integer""" """Get debug info for signed 32-bit integer"""
return self.get_basic_type("int", 32, dc.DW_ATE_signed) return self.get_basic_type("int", 32, dc.DW_ATE_signed)

View File

@ -12,8 +12,8 @@ from .type_normalization import (
get_base_type_and_depth, get_base_type_and_depth,
deref_to_depth, deref_to_depth,
) )
from pythonbpf.vmlinux_parser.assignment_info import Field
from .vmlinux_registry import VmlinuxHandlerRegistry from .vmlinux_registry import VmlinuxHandlerRegistry
from ..vmlinux_parser.dependency_node import Field
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -61,7 +61,6 @@ def _handle_constant_expr(module, builder, expr: ast.Constant):
def _handle_attribute_expr( def _handle_attribute_expr(
func,
expr: ast.Attribute, expr: ast.Attribute,
local_sym_tab: Dict, local_sym_tab: Dict,
structs_sym_tab: Dict, structs_sym_tab: Dict,
@ -77,89 +76,6 @@ def _handle_attribute_expr(
logger.info( logger.info(
f"Variable type: {var_type}, Variable ptr: {var_ptr}, Variable Metadata: {var_metadata}" f"Variable type: {var_type}, Variable ptr: {var_ptr}, Variable Metadata: {var_metadata}"
) )
# Check if this is a pointer to a struct (from map lookup)
if (
isinstance(var_type, ir.PointerType)
and var_metadata
and isinstance(var_metadata, str)
):
if var_metadata in structs_sym_tab:
logger.info(
f"Handling pointer to struct {var_metadata} from map lookup"
)
if func is None:
raise ValueError(
f"func parameter required for null-safe pointer access to {var_name}.{attr_name}"
)
# Load the pointer value (ptr<struct>)
struct_ptr = builder.load(var_ptr)
# Create blocks for null check
null_check_block = builder.block
not_null_block = func.append_basic_block(
name=f"{var_name}_not_null"
)
merge_block = func.append_basic_block(name=f"{var_name}_merge")
# Check if pointer is null
null_ptr = ir.Constant(struct_ptr.type, None)
is_not_null = builder.icmp_signed("!=", struct_ptr, null_ptr)
logger.info(f"Inserted null check for pointer {var_name}")
builder.cbranch(is_not_null, not_null_block, merge_block)
# Not-null block: Access the field
builder.position_at_end(not_null_block)
# Get struct metadata
metadata = structs_sym_tab[var_metadata]
struct_ptr = builder.bitcast(
struct_ptr, metadata.ir_type.as_pointer()
)
if attr_name not in metadata.fields:
raise ValueError(
f"Field '{attr_name}' not found in struct '{var_metadata}'"
)
# GEP to field
field_gep = metadata.gep(builder, struct_ptr, attr_name)
# Load field value
field_val = builder.load(field_gep)
field_type = metadata.field_type(attr_name)
logger.info(
f"Loaded field {attr_name} from struct pointer, type: {field_type}"
)
# Branch to merge
not_null_after_load = builder.block
builder.branch(merge_block)
# Merge block: PHI node for the result
builder.position_at_end(merge_block)
phi = builder.phi(field_type, name=f"{var_name}_{attr_name}")
# If null, return zero/default value
if isinstance(field_type, ir.IntType):
zero_value = ir.Constant(field_type, 0)
elif isinstance(field_type, ir.PointerType):
zero_value = ir.Constant(field_type, None)
elif isinstance(field_type, ir.ArrayType):
# For arrays, we can't easily create a zero constant
# This case is tricky - for now, just use undef
zero_value = ir.Constant(field_type, ir.Undefined)
else:
zero_value = ir.Constant(field_type, ir.Undefined)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(field_val, not_null_after_load)
logger.info(f"Created PHI node for {var_name}.{attr_name}")
return phi, field_type
if ( if (
hasattr(var_metadata, "__module__") hasattr(var_metadata, "__module__")
and var_metadata.__module__ == "vmlinux" and var_metadata.__module__ == "vmlinux"
@ -173,16 +89,8 @@ def _handle_attribute_expr(
return vmlinux_result return vmlinux_result
else: else:
raise RuntimeError("Vmlinux struct did not process successfully") raise RuntimeError("Vmlinux struct did not process successfully")
metadata = structs_sym_tab[var_metadata]
elif isinstance(var_metadata, Field): if attr_name in metadata.fields:
logger.error(
f"Cannot access field '{attr_name}' on already-loaded field value '{var_name}'"
)
return None
# Regular user-defined struct
metadata = structs_sym_tab.get(var_metadata)
if metadata and attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name) gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep) val = builder.load(gep)
field_type = metadata.field_type(attr_name) field_type = metadata.field_type(attr_name)
@ -617,66 +525,6 @@ def _handle_boolean_op(
return None return None
# ============================================================================
# VMLinux casting
# ============================================================================
def _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
# handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux
# struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64
# which needs to be cast to a pointer. This is also a field of another vmlinux struct
"""Handle vmlinux struct cast expressions like struct_request(ctx.di)."""
if len(expr.args) != 1:
logger.info("vmlinux struct cast takes exactly one argument")
return None
# Get the struct name
struct_name = expr.func.id
# Evaluate the argument (e.g., ctx.di which is a c_uint64)
arg_result = eval_expr(
func,
module,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
logger.info("Failed to evaluate argument to vmlinux struct cast")
return None
arg_val, arg_type = arg_result
# Get the vmlinux struct type
vmlinux_struct_type = VmlinuxHandlerRegistry.get_struct_type(struct_name)
if vmlinux_struct_type is None:
logger.error(f"Failed to get vmlinux struct type for {struct_name}")
return None
# Cast the integer/value to a pointer to the struct
# If arg_val is an integer type, we need to inttoptr it
ptr_type = ir.PointerType()
# TODO: add a integer check here later
if ctypes_to_ir(arg_type.type.__name__):
# Cast integer to pointer
casted_ptr = builder.inttoptr(arg_val, ptr_type)
else:
logger.error(f"Unsupported type for vmlinux cast: {arg_type}")
return None
return casted_ptr, vmlinux_struct_type
# ============================================================================ # ============================================================================
# Expression Dispatcher # Expression Dispatcher
# ============================================================================ # ============================================================================
@ -697,18 +545,6 @@ def eval_expr(
elif isinstance(expr, ast.Constant): elif isinstance(expr, ast.Constant):
return _handle_constant_expr(module, builder, expr) return _handle_constant_expr(module, builder, expr)
elif isinstance(expr, ast.Call): elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(
expr.func.id
):
return _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and expr.func.id == "deref": if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder) return _handle_deref_call(expr, local_sym_tab, builder)
@ -732,9 +568,7 @@ def eval_expr(
logger.warning(f"Unknown call: {ast.dump(expr)}") logger.warning(f"Unknown call: {ast.dump(expr)}")
return None return None
elif isinstance(expr, ast.Attribute): elif isinstance(expr, ast.Attribute):
return _handle_attribute_expr( return _handle_attribute_expr(expr, local_sym_tab, structs_sym_tab, builder)
func, expr, local_sym_tab, structs_sym_tab, builder
)
elif isinstance(expr, ast.BinOp): elif isinstance(expr, ast.BinOp):
return _handle_binary_op( return _handle_binary_op(
func, func,

View File

@ -48,10 +48,3 @@ def deref_to_depth(func, builder, val, target_depth):
cur_val = phi cur_val = phi
cur_type = pointee_type cur_type = pointee_type
return cur_val return cur_val
def deref_struct_ptr(
func, builder, struct_ptr, struct_metadata, field_name, structs_sym_tab
):
"""Dereference a pointer to a struct type."""
return deref_to_depth(func, builder, struct_ptr, 1)

View File

@ -147,9 +147,7 @@ def allocate_mem(
structs_sym_tab, structs_sym_tab,
) )
elif isinstance(stmt, ast.Assign): elif isinstance(stmt, ast.Assign):
handle_assign_allocation( handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab)
builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab
)
allocate_temp_pool(builder, max_temps_needed, local_sym_tab) allocate_temp_pool(builder, max_temps_needed, local_sym_tab)

View File

@ -1,10 +1,6 @@
from .helper_registry import HelperHandlerRegistry from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import ( from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call
handle_helper_call,
emit_probe_read_kernel_str_call,
emit_probe_read_kernel_call,
)
from .helpers import ( from .helpers import (
ktime, ktime,
pid, pid,
@ -78,7 +74,6 @@ __all__ = [
"reset_scratch_pool", "reset_scratch_pool",
"handle_helper_call", "handle_helper_call",
"emit_probe_read_kernel_str_call", "emit_probe_read_kernel_str_call",
"emit_probe_read_kernel_call",
"ktime", "ktime",
"pid", "pid",
"deref", "deref",

View File

@ -34,7 +34,6 @@ class BPFHelperID(Enum):
BPF_PERF_EVENT_OUTPUT = 25 BPF_PERF_EVENT_OUTPUT = 25
BPF_GET_STACK = 67 BPF_GET_STACK = 67
BPF_PROBE_READ_KERNEL_STR = 115 BPF_PROBE_READ_KERNEL_STR = 115
BPF_PROBE_READ_KERNEL = 113
BPF_RINGBUF_OUTPUT = 130 BPF_RINGBUF_OUTPUT = 130
BPF_RINGBUF_RESERVE = 131 BPF_RINGBUF_RESERVE = 131
BPF_RINGBUF_SUBMIT = 132 BPF_RINGBUF_SUBMIT = 132
@ -575,75 +574,6 @@ def bpf_probe_read_kernel_str_emitter(
return result, ir.IntType(64) return result, ir.IntType(64)
def emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result
@HelperHandlerRegistry.register(
"probe_read_kernel",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_kernel_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_kernel expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register( @HelperHandlerRegistry.register(
"random", "random",
param_types=[], param_types=[],

View File

@ -1,31 +1,22 @@
import logging
from llvmlite import ir
from pythonbpf.debuginfo import DebugInfoGenerator from pythonbpf.debuginfo import DebugInfoGenerator
from .map_types import BPFMapType from .map_types import BPFMapType
logger: logging.Logger = logging.getLogger(__name__)
def create_map_debug_info(module, map_global, map_name, map_params, structs_sym_tab): def create_map_debug_info(module, map_global, map_name, map_params, structs_sym_tab):
"""Generate debug info metadata for BPF maps HASH and PERF_EVENT_ARRAY""" """Generate debug info metadata for BPF maps HASH and PERF_EVENT_ARRAY"""
generator = DebugInfoGenerator(module) generator = DebugInfoGenerator(module)
logger.info(f"Creating debug info for map {map_name} with params {map_params}")
uint_type = generator.get_uint32_type() uint_type = generator.get_uint32_type()
ulong_type = generator.get_uint64_type()
array_type = generator.create_array_type( array_type = generator.create_array_type(
uint_type, map_params.get("type", BPFMapType.UNSPEC).value uint_type, map_params.get("type", BPFMapType.UNSPEC).value
) )
type_ptr = generator.create_pointer_type(array_type, 64) type_ptr = generator.create_pointer_type(array_type, 64)
key_ptr = generator.create_pointer_type( key_ptr = generator.create_pointer_type(
array_type array_type if "key_size" in map_params else ulong_type, 64
if "key_size" in map_params
else _get_key_val_dbg_type(map_params.get("key"), generator, structs_sym_tab),
64,
) )
value_ptr = generator.create_pointer_type( value_ptr = generator.create_pointer_type(
array_type array_type if "value_size" in map_params else ulong_type, 64
if "value_size" in map_params
else _get_key_val_dbg_type(map_params.get("value"), generator, structs_sym_tab),
64,
) )
elements_arr = [] elements_arr = []
@ -106,66 +97,3 @@ def create_ringbuf_debug_info(
) )
map_global.set_metadata("dbg", global_var) map_global.set_metadata("dbg", global_var)
return global_var return global_var
def _get_key_val_dbg_type(name, generator, structs_sym_tab):
"""Get the debug type for key/value based on type object"""
if not name:
logger.warn("No name provided for key/value type, defaulting to uint64")
return generator.get_uint64_type()
type_obj = structs_sym_tab.get(name)
if type_obj:
logger.info(f"Found struct named {name}, generating debug type")
return _get_struct_debug_type(type_obj, generator, structs_sym_tab)
# Fallback to basic types
logger.info(f"No struct named {name}, falling back to basic type")
# NOTE: Only handling int and long for now
if name in ["c_int32", "c_uint32"]:
return generator.get_uint32_type()
# Default fallback for now
return generator.get_uint64_type()
def _get_struct_debug_type(struct_obj, generator, structs_sym_tab):
"""Recursively create debug type for struct"""
elements_arr = []
for fld in struct_obj.fields.keys():
fld_type = struct_obj.field_type(fld)
if isinstance(fld_type, ir.IntType):
if fld_type.width == 32:
fld_dbg_type = generator.get_uint32_type()
else:
# NOTE: Assuming 64-bit for all other int types
fld_dbg_type = generator.get_uint64_type()
elif isinstance(fld_type, ir.ArrayType):
# NOTE: Array types have u8 elements only for now
# Debug info generation should fail for other types
elem_type = fld_type.element
if isinstance(elem_type, ir.IntType) and elem_type.width == 8:
char_type = generator.get_uint8_type()
fld_dbg_type = generator.create_array_type(char_type, fld_type.count)
else:
logger.warning(
f"Array element type {str(elem_type)} not supported for debug info, skipping"
)
continue
else:
# NOTE: Only handling int and char arrays for now
logger.warning(
f"Field type {str(fld_type)} not supported for debug info, skipping"
)
continue
member = generator.create_struct_member(
fld, fld_dbg_type, struct_obj.field_size(fld)
)
elements_arr.append(member)
struct_type = generator.create_struct_type(
elements_arr, struct_obj.size * 8, is_distinct=True
)
return struct_type

View File

@ -48,7 +48,7 @@ def create_bpf_map(module, map_name, map_params):
map_global.align = 8 map_global.align = 8
logger.info(f"Created BPF map: {map_name} with params {map_params}") logger.info(f"Created BPF map: {map_name} with params {map_params}")
return MapSymbol(type=map_params["type"], sym=map_global, params=map_params) return MapSymbol(type=map_params["type"], sym=map_global)
def _parse_map_params(rval, expected_args=None): def _parse_map_params(rval, expected_args=None):
@ -105,9 +105,7 @@ def process_ringbuf_map(map_name, rval, module, structs_sym_tab):
logger.info(f"Ringbuf map parameters: {map_params}") logger.info(f"Ringbuf map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params) map_global = create_bpf_map(module, map_name, map_params)
create_ringbuf_debug_info( create_ringbuf_debug_info(module, map_global.sym, map_name, map_params)
module, map_global.sym, map_name, map_params, structs_sym_tab
)
return map_global return map_global
@ -121,7 +119,7 @@ def process_hash_map(map_name, rval, module, structs_sym_tab):
logger.info(f"Map parameters: {map_params}") logger.info(f"Map parameters: {map_params}")
map_global = create_bpf_map(module, map_name, map_params) map_global = create_bpf_map(module, map_name, map_params)
# Generate debug info for BTF # Generate debug info for BTF
create_map_debug_info(module, map_global.sym, map_name, map_params, structs_sym_tab) create_map_debug_info(module, map_global.sym, map_name, map_params)
return map_global return map_global

View File

@ -11,7 +11,6 @@ class MapSymbol:
type: BPFMapType type: BPFMapType
sym: ir.GlobalVariable sym: ir.GlobalVariable
params: dict[str, Any] | None = None
class MapProcessorRegistry: class MapProcessorRegistry:

View File

@ -16,33 +16,6 @@ def get_module_symbols(module_name: str):
return [name for name in dir(imported_module)], imported_module return [name for name in dir(imported_module)], imported_module
def unwrap_pointer_type(type_obj: Any) -> Any:
"""
Recursively unwrap all pointer layers to get the base type.
This handles multiply nested pointers like LP_LP_struct_attribute_group
and returns the base type (struct_attribute_group).
Stops unwrapping when reaching a non-pointer type (one without _type_ attribute).
Args:
type_obj: The type object to unwrap
Returns:
The base type after unwrapping all pointer layers
"""
current_type = type_obj
# Keep unwrapping while it's a pointer/array type (has _type_)
# But stop if _type_ is just a string or basic type marker
while hasattr(current_type, "_type_"):
next_type = current_type._type_
# Stop if _type_ is a string (like 'c' for c_char)
if isinstance(next_type, str):
break
current_type = next_type
return current_type
def process_vmlinux_class( def process_vmlinux_class(
node, node,
llvm_module, llvm_module,
@ -185,90 +158,13 @@ def process_vmlinux_post_ast(
if hasattr(elem_type, "_length_") and is_complex_type: if hasattr(elem_type, "_length_") and is_complex_type:
type_length = elem_type._length_ type_length = elem_type._length_
# Unwrap all pointer layers to get the base type for dependency tracking if containing_type.__module__ == "vmlinux":
base_type = unwrap_pointer_type(elem_type) new_dep_node.add_dependent(
base_type_module = getattr(base_type, "__module__", None) elem_type._type_.__name__
if hasattr(elem_type._type_, "__name__")
if base_type_module == "vmlinux": else str(elem_type._type_)
base_type_name = (
base_type.__name__
if hasattr(base_type, "__name__")
else str(base_type)
)
# ONLY add vmlinux types as dependencies
new_dep_node.add_dependent(base_type_name)
logger.debug(
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
)
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
# Check the containing_type module to decide whether to recurse
containing_type_module = getattr(
containing_type, "__module__", None
)
if containing_type_module == "vmlinux":
# Also unwrap containing_type to get base type name
base_containing_type = unwrap_pointer_type(
containing_type
)
containing_type_name = (
base_containing_type.__name__
if hasattr(base_containing_type, "__name__")
else str(base_containing_type)
)
# Check for self-reference or already processed
if containing_type_name == current_symbol_name:
# Self-referential pointer
logger.debug(
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
)
new_dep_node.set_field_ready(elem_name, True)
elif handler.has_node(containing_type_name):
# Already processed
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - use base containing type, not the pointer wrapper
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
base_containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif (
containing_type_module == ctypes.__name__
or containing_type_module is None
):
logger.debug(
f"Processing ctype internal{containing_type}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
raise TypeError(
f"Module not supported in recursive resolution: {containing_type_module}"
)
elif (
base_type_module == ctypes.__name__
or base_type_module is None
):
# Handle ctypes or types with no module (like some internal ctypes types)
# DO NOT add ctypes as dependencies - just set field metadata and mark ready
logger.debug(
f"Base type {base_type} is ctypes - NOT adding as dependency, just processing field"
) )
elif containing_type.__module__ == ctypes.__name__:
if isinstance(elem_type, type): if isinstance(elem_type, type):
if issubclass(elem_type, ctypes.Array): if issubclass(elem_type, ctypes.Array):
ctype_complex_type = ctypes.Array ctype_complex_type = ctypes.Array
@ -280,20 +176,57 @@ def process_vmlinux_post_ast(
) )
else: else:
raise TypeError("Unsupported ctypes subclass") raise TypeError("Unsupported ctypes subclass")
# Set field metadata but DO NOT add dependency or recurse
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
new_dep_node.set_field_ready(elem_name, True)
else: else:
raise ImportError( raise ImportError(
f"Unsupported module of {base_type}: {base_type_module}" f"Unsupported module of {containing_type}"
)
logger.debug(
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
)
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
if containing_type.__module__ == "vmlinux":
containing_type_name = (
containing_type.__name__
if hasattr(containing_type, "__name__")
else str(containing_type)
)
# Check for self-reference or already processed
if containing_type_name == current_symbol_name:
# Self-referential pointer
logger.debug(
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
)
new_dep_node.set_field_ready(elem_name, True)
elif handler.has_node(containing_type_name):
# Already processed
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - THIS WAS MISSING
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif containing_type.__module__ == ctypes.__name__:
logger.debug(f"Processing ctype internal{containing_type}")
new_dep_node.set_field_ready(elem_name, True)
else:
raise TypeError(
"Module not supported in recursive resolution"
) )
else: else:
new_dep_node.add_dependent( new_dep_node.add_dependent(
@ -312,12 +245,9 @@ def process_vmlinux_post_ast(
raise ValueError( raise ValueError(
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
) )
elif module_name == ctypes.__name__ or module_name is None:
# Handle ctypes types - these don't need processing, just return
logger.debug(f"Skipping ctypes type {current_symbol_name}")
return True
else: else:
raise ImportError(f"UNSUPPORTED Module {module_name}") raise ImportError("UNSUPPORTED Module")
logger.info( logger.info(
f"{current_symbol_name} processed and handler readiness {handler.is_ready}" f"{current_symbol_name} processed and handler readiness {handler.is_ready}"

View File

@ -11,9 +11,7 @@ from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def detect_import_statement( def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
tree: ast.AST,
) -> list[tuple[str, ast.ImportFrom, str, str]]:
""" """
Parse AST and detect import statements from vmlinux. Parse AST and detect import statements from vmlinux.
@ -27,7 +25,7 @@ def detect_import_statement(
List of tuples containing (module_name, imported_item) for each vmlinux import List of tuples containing (module_name, imported_item) for each vmlinux import
Raises: Raises:
SyntaxError: If import * is used SyntaxError: If multiple imports from vmlinux are attempted or import * is used
""" """
vmlinux_imports = [] vmlinux_imports = []
@ -42,19 +40,28 @@ def detect_import_statement(
"Please import specific types explicitly." "Please import specific types explicitly."
) )
# Check for multiple imports: from vmlinux import A, B, C
if len(node.names) > 1:
imported_names = [alias.name for alias in node.names]
raise SyntaxError(
f"Multiple imports from vmlinux are not supported. "
f"Found: {', '.join(imported_names)}. "
f"Please use separate import statements for each type."
)
# Check if no specific import is specified (should not happen with valid Python) # Check if no specific import is specified (should not happen with valid Python)
if len(node.names) == 0: if len(node.names) == 0:
raise SyntaxError( raise SyntaxError(
"Import from vmlinux must specify at least one type." "Import from vmlinux must specify at least one type."
) )
# Support multiple imports: from vmlinux import A, B, C # Valid single import
for alias in node.names: for alias in node.names:
import_name = alias.name import_name = alias.name
# Use alias if provided, otherwise use the original name # Use alias if provided, otherwise use the original name (commented)
as_name = alias.asname if alias.asname else alias.name # as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node, import_name, as_name)) vmlinux_imports.append(("vmlinux", node))
logger.info(f"Found vmlinux import: {import_name} as {as_name}") logger.info(f"Found vmlinux import: {import_name}")
# Handle "import vmlinux" statements (not typical but should be rejected) # Handle "import vmlinux" statements (not typical but should be rejected)
elif isinstance(node, ast.Import): elif isinstance(node, ast.Import):
@ -96,37 +103,40 @@ def vmlinux_proc(tree: ast.AST, module):
with open(source_file, "r") as f: with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file) mod_ast = ast.parse(f.read(), filename=source_file)
for import_mod, import_node, imported_name, as_name in import_statements: for import_mod, import_node in import_statements:
found = False for alias in import_node.names:
for mod_node in mod_ast.body: imported_name = alias.name
if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name: found = False
process_vmlinux_class(mod_node, module, handler) for mod_node in mod_ast.body:
found = True if (
break isinstance(mod_node, ast.ClassDef)
if isinstance(mod_node, ast.Assign): and mod_node.name == imported_name
for target in mod_node.targets: ):
if isinstance(target, ast.Name) and target.id == imported_name: process_vmlinux_class(mod_node, module, handler)
process_vmlinux_assign(mod_node, module, assignments, as_name) found = True
found = True break
break if isinstance(mod_node, ast.Assign):
if found: for target in mod_node.targets:
break if isinstance(target, ast.Name) and target.id == imported_name:
if not found: process_vmlinux_assign(mod_node, module, assignments)
logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux") found = True
break
if found:
break
if not found:
logger.info(
f"{imported_name} not found as ClassDef or Assign in vmlinux"
)
IRGenerator(module, handler, assignments) IRGenerator(module, handler, assignments)
return assignments return assignments
def process_vmlinux_assign( def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]):
node, module, assignments: dict[str, AssignmentInfo], target_name=None
):
"""Process assignments from vmlinux module.""" """Process assignments from vmlinux module."""
# Only handle single-target assignments # Only handle single-target assignments
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
# Use provided target_name (for aliased imports) or fall back to original name target_name = node.targets[0].id
if target_name is None:
target_name = node.targets[0].id
# Handle constant value assignments # Handle constant value assignments
if isinstance(node.value, ast.Constant): if isinstance(node.value, ast.Constant):

View File

@ -21,7 +21,7 @@ def debug_info_generation(
generated_debug_info: List of tuples (struct, debug_info) to track generated debug info generated_debug_info: List of tuples (struct, debug_info) to track generated debug info
Returns: Returns:
The generated global variable debug info, or None for unsupported types The generated global variable debug info
""" """
# Set up debug info generator # Set up debug info generator
generator = DebugInfoGenerator(llvm_module) generator = DebugInfoGenerator(llvm_module)
@ -31,42 +31,23 @@ def debug_info_generation(
if existing_struct.name == struct.name: if existing_struct.name == struct.name:
return debug_info return debug_info
# Check if this is a union (not supported yet)
if not struct.name.startswith("struct_"):
logger.warning(f"Skipping debug info generation for union: {struct.name}")
# Create a minimal forward declaration for unions
union_type = generator.create_struct_type(
[], struct.__sizeof__() * 8, is_distinct=True
)
return union_type
# Process all fields and create members for the struct # Process all fields and create members for the struct
members = [] members = []
for field_name, field in struct.fields.items():
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
sorted_fields = sorted(struct.fields.items(), key=lambda item: item[1].offset) if struct.name.startswith("struct_"):
struct_name = struct.name.removeprefix("struct_")
for field_name, field in sorted_fields: else:
try: raise ValueError("Unions are not supported in the current version")
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Ensure field_type is a tuple
if not isinstance(field_type, tuple) or len(field_type) != 2:
logger.error(f"Invalid field_type for {field_name}: {field_type}")
continue
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
except Exception as e:
logger.error(f"Failed to process field {field_name} in {struct.name}: {e}")
continue
struct_name = struct.name.removeprefix("struct_")
# Create struct type with all members # Create struct type with all members
struct_type = generator.create_struct_type_with_name( struct_type = generator.create_struct_type_with_name(
struct_name, members, struct.__sizeof__() * 8, is_distinct=True struct_name, members, struct.__sizeof__() * 8, is_distinct=True
@ -93,19 +74,11 @@ def _get_field_debug_type(
generated_debug_info: List of already generated debug info generated_debug_info: List of already generated debug info
Returns: Returns:
A tuple of (debug_type, size_in_bits) The debug info type for this field
""" """
# Handle complex types (arrays, pointers, function pointers) # Handle complex types (arrays, pointers)
if field.ctype_complex_type is not None: if field.ctype_complex_type is not None:
# Handle function pointer types (CFUNCTYPE) if issubclass(field.ctype_complex_type, ctypes.Array):
if callable(field.ctype_complex_type):
# Function pointers are represented as void pointers
logger.warning(
f"Field {field_name} is a function pointer, using void pointer"
)
void_ptr = generator.create_pointer_type(None, 64)
return void_ptr, 64
elif issubclass(field.ctype_complex_type, ctypes.Array):
# Handle array types # Handle array types
element_type, base_type_size = _get_basic_debug_type( element_type, base_type_size = _get_basic_debug_type(
field.containing_type, generator field.containing_type, generator
@ -127,13 +100,11 @@ def _get_field_debug_type(
for existing_struct, debug_info in generated_debug_info: for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct_name: if existing_struct.name == struct_name:
# Use existing debug info # Use existing debug info
return debug_info, existing_struct.__sizeof__() * 8 return debug_info, existing_struct.__sizeof__()
# If not found, create a forward declaration # If not found, create a forward declaration
# This will be completed when the actual struct is processed # This will be completed when the actual struct is processed
logger.info( logger.warning("Forward declaration in struct created")
f"Forward declaration created for {struct_name} in {parent_struct.name}"
)
forward_type = generator.create_struct_type([], 0, is_distinct=True) forward_type = generator.create_struct_type([], 0, is_distinct=True)
return forward_type, 0 return forward_type, 0

View File

@ -11,10 +11,6 @@ logger = logging.getLogger(__name__)
class IRGenerator: class IRGenerator:
# This field keeps track of the non_struct names to avoid duplicate name errors.
type_number = 0
unprocessed_store: list[str] = []
# get the assignments dict and add this stuff to it. # get the assignments dict and add this stuff to it.
def __init__(self, llvm_module, handler: DependencyHandler, assignments): def __init__(self, llvm_module, handler: DependencyHandler, assignments):
self.llvm_module = llvm_module self.llvm_module = llvm_module
@ -133,19 +129,7 @@ class IRGenerator:
for field_name, field in struct.fields.items(): for field_name, field in struct.fields.items():
# does not take arrays and similar types into consideration yet. # does not take arrays and similar types into consideration yet.
if callable(field.ctype_complex_type): if field.ctype_complex_type is not None and issubclass(
# Function pointer case - generate a simple field accessor
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index
)
field_index += 1
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
elif field.ctype_complex_type is not None and issubclass(
field.ctype_complex_type, ctypes.Array field.ctype_complex_type, ctypes.Array
): ):
array_size = field.type_size array_size = field.type_size
@ -153,7 +137,7 @@ class IRGenerator:
if containing_type.__module__ == ctypes.__name__: if containing_type.__module__ == ctypes.__name__:
containing_type_size = ctypes.sizeof(containing_type) containing_type_size = ctypes.sizeof(containing_type)
if array_size == 0: if array_size == 0:
field_co_re_name, returned = self._struct_name_generator( field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size struct, field, field_index, True, 0, containing_type_size
) )
globvar = ir.GlobalVariable( globvar = ir.GlobalVariable(
@ -165,7 +149,7 @@ class IRGenerator:
field_index += 1 field_index += 1
continue continue
for i in range(0, array_size): for i in range(0, array_size):
field_co_re_name, returned = self._struct_name_generator( field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size struct, field, field_index, True, i, containing_type_size
) )
globvar = ir.GlobalVariable( globvar = ir.GlobalVariable(
@ -179,28 +163,12 @@ class IRGenerator:
array_size = field.type_size array_size = field.type_size
containing_type = field.containing_type containing_type = field.containing_type
if containing_type.__module__ == "vmlinux": if containing_type.__module__ == "vmlinux":
# Unwrap all pointer layers to get the base struct type containing_type_size = self.handler[
base_containing_type = containing_type containing_type.__name__
while hasattr(base_containing_type, "_type_"): ].current_offset
next_type = base_containing_type._type_ for i in range(0, array_size):
# Stop if _type_ is a string (like 'c' for c_char) field_co_re_name = self._struct_name_generator(
# TODO: stacked pointers not handl;ing ctypes check here as well struct, field, field_index, True, i, containing_type_size
if isinstance(next_type, str):
break
base_containing_type = next_type
# Get the base struct name
base_struct_name = (
base_containing_type.__name__
if hasattr(base_containing_type, "__name__")
else str(base_containing_type)
)
# Look up the size using the base struct name
containing_type_size = self.handler[base_struct_name].current_offset
if array_size == 0:
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size
) )
globvar = ir.GlobalVariable( globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name self.llvm_module, ir.IntType(64), name=field_co_re_name
@ -208,30 +176,9 @@ class IRGenerator:
globvar.linkage = "external" globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info) globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar self.generated_field_names[struct.name][field_name] = globvar
field_index += 1 field_index += 1
else:
for i in range(0, array_size):
field_co_re_name, returned = self._struct_name_generator(
struct,
field,
field_index,
True,
i,
containing_type_size,
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata(
"llvm.preserve.access.index", debug_info
)
self.generated_field_names[struct.name][field_name] = (
globvar
)
field_index += 1
else: else:
field_co_re_name, returned = self._struct_name_generator( field_co_re_name = self._struct_name_generator(
struct, field, field_index struct, field, field_index
) )
field_index += 1 field_index += 1
@ -251,7 +198,7 @@ class IRGenerator:
is_indexed: bool = False, is_indexed: bool = False,
index: int = 0, index: int = 0,
containing_type_size: int = 0, containing_type_size: int = 0,
) -> tuple[str, bool]: ) -> str:
# TODO: Does not support Unions as well as recursive pointer and array type naming # TODO: Does not support Unions as well as recursive pointer and array type naming
if is_indexed: if is_indexed:
name = ( name = (
@ -261,7 +208,7 @@ class IRGenerator:
+ "$" + "$"
+ f"0:{field_index}:{index}" + f"0:{field_index}:{index}"
) )
return name, True return name
elif struct.name.startswith("struct_"): elif struct.name.startswith("struct_"):
name = ( name = (
"llvm." "llvm."
@ -270,18 +217,9 @@ class IRGenerator:
+ "$" + "$"
+ f"0:{field_index}" + f"0:{field_index}"
) )
return name, True return name
else: else:
logger.warning( print(self.handler[struct.name])
"Blindly handling non-struct type to avoid type errors in vmlinux IR generation. Possibly a union." raise TypeError(
"Name generation cannot occur due to type name not starting with struct"
) )
self.type_number += 1
unprocessed_type = "unprocessed_type_" + str(self.handler[struct.name].name)
if self.unprocessed_store.__contains__(unprocessed_type):
return unprocessed_type + "_" + str(self.type_number), False
else:
self.unprocessed_store.append(unprocessed_type)
return unprocessed_type, False
# raise TypeError(
# "Name generation cannot occur due to type name not starting with struct"
# )

View File

@ -94,140 +94,17 @@ class VmlinuxHandler:
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
) )
python_type: type = var_info.metadata python_type: type = var_info.metadata
# Check if this is a context field (ctx) or a cast struct struct_name = python_type.__name__
is_context_field = var_info.var is None globvar_ir, field_data = self.get_field_type(struct_name, field_name)
builder.function.args[0].type = ir.PointerType(ir.IntType(8))
if is_context_field: field_ptr = self.load_ctx_field(
# Handle context field access (original behavior) builder, builder.function.args[0], globvar_ir, field_data, struct_name
struct_name = python_type.__name__ )
globvar_ir, field_data = self.get_field_type(struct_name, field_name) # Return pointer to field and field type
builder.function.args[0].type = ir.PointerType(ir.IntType(8)) return field_ptr, field_data
field_ptr = self.load_ctx_field(
builder,
builder.function.args[0],
globvar_ir,
field_data,
struct_name,
)
return field_ptr, field_data
else:
# Handle cast struct field access
struct_name = python_type.__name__
globvar_ir, field_data = self.get_field_type(struct_name, field_name)
# Handle cast struct field access (use bpf_probe_read_kernel)
# Load the struct pointer from the local variable
struct_ptr = builder.load(var_info.var)
# Use bpf_probe_read_kernel for non-context struct field access
field_value = self.load_struct_field(
builder, struct_ptr, globvar_ir, field_data, struct_name
)
# Return field value and field type
return field_value, field_data
else: else:
raise RuntimeError("Variable accessed not found in symbol table") raise RuntimeError("Variable accessed not found in symbol table")
@staticmethod
def load_struct_field(
builder, struct_ptr_int, offset_global, field_data, struct_name=None
):
"""
Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel.
Args:
builder: llvmlite IRBuilder instance
struct_ptr_int: The struct pointer as an i64 value (already loaded from alloca)
offset_global: Global variable containing the field offset (i64)
field_data: contains data about the field
struct_name: Name of the struct being accessed (optional)
Returns:
The loaded value
"""
# Load the offset value
offset = builder.load(offset_global)
# Convert i64 to pointer type (BPF stores pointers as i64)
i8_ptr_type = ir.PointerType(ir.IntType(8))
struct_ptr = builder.inttoptr(struct_ptr_int, i8_ptr_type)
# GEP with offset to get field pointer
field_ptr = builder.gep(
struct_ptr,
[offset],
inbounds=False,
)
# Determine the appropriate field size based on field information
field_size_bytes = 8 # Default to 8 bytes (64-bit)
int_width = 64 # Default to 64-bit
needs_zext = False
if field_data is not None:
# Try to determine the size from field metadata
if field_data.type.__module__ == ctypes.__name__:
try:
field_size_bytes = ctypes.sizeof(field_data.type)
field_size_bits = field_size_bytes * 8
if field_size_bits in [8, 16, 32, 64]:
int_width = field_size_bits
logger.info(
f"Determined field size: {int_width} bits ({field_size_bytes} bytes)"
)
# Special handling for struct_xdp_md i32 fields
if struct_name == "struct_xdp_md" and int_width == 32:
needs_zext = True
logger.info(
"struct_xdp_md i32 field detected, will zero-extend to i64"
)
else:
logger.warning(
f"Unusual field size {field_size_bits} bits, using default 64"
)
except Exception as e:
logger.warning(
f"Could not determine field size: {e}, using default 64"
)
elif field_data.type.__module__ == "vmlinux":
# For pointers to structs or complex vmlinux types
if field_data.ctype_complex_type is not None and issubclass(
field_data.ctype_complex_type, ctypes._Pointer
):
int_width = 64 # Pointers are always 64-bit
field_size_bytes = 8
logger.info("Field is a pointer type, using 64 bits")
else:
logger.warning("Complex vmlinux field type, using default 64 bits")
# Allocate local storage for the field value
local_storage = builder.alloca(ir.IntType(int_width))
local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type)
# Use bpf_probe_read_kernel to safely read the field
# This generates:
# %gep = getelementptr i8, ptr %struct_ptr, i64 %offset (already done above as field_ptr)
# %passed = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %gep)
# %result = call i64 inttoptr (i64 113 to ptr)(ptr %local_storage, i32 %size, ptr %passed)
from pythonbpf.helper import emit_probe_read_kernel_call
emit_probe_read_kernel_call(
builder, local_storage_i8_ptr, field_size_bytes, field_ptr
)
# Load the value from local storage
value = builder.load(local_storage)
# Zero-extend i32 to i64 if needed
if needs_zext:
value = builder.zext(value, ir.IntType(64))
logger.info("Zero-extended i32 value to i64")
return value
@staticmethod @staticmethod
def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None): def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None):
""" """

View File

@ -3,20 +3,21 @@ CFLAGS := -emit-llvm -target bpf -c
SRC := $(wildcard *.bpf.c) SRC := $(wildcard *.bpf.c)
LL := $(SRC:.bpf.c=.bpf.ll) LL := $(SRC:.bpf.c=.bpf.ll)
LL2 := $(SRC:.bpf.c=.bpf.o2.ll)
OBJ := $(SRC:.bpf.c=.bpf.o) OBJ := $(SRC:.bpf.c=.bpf.o)
LL0 := $(SRC:.bpf.c=.bpf.o0.ll)
.PHONY: all clean .PHONY: all clean
all: $(LL) $(OBJ) $(LL0) all: $(LL) $(OBJ) $(LL2)
%.bpf.o: %.bpf.c %.bpf.o: %.bpf.c
$(BPF_CLANG) -O2 -g -target bpf -c $< -o $@ $(BPF_CLANG) -O2 -g -target bpf -c $< -o $@
%.bpf.ll: %.bpf.c %.bpf.ll: %.bpf.c
$(BPF_CLANG) $(CFLAGS) -O2 -g -S $< -o $@ $(BPF_CLANG) -O0 $(CFLAGS) -g -S $< -o $@
%.bpf.o0.ll: %.bpf.c %.bpf.o2.ll: %.bpf.c
$(BPF_CLANG) $(CFLAGS) -O0 -g -S $< -o $@ $(BPF_CLANG) -O2 $(CFLAGS) -g -S $< -o $@
clean: clean:
rm -f $(LL) $(OBJ) $(LL0) rm -f $(LL) $(OBJ) $(LL2)

View File

@ -1,18 +0,0 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = BPF_CORE_READ(req, timeout);
unsigned int data_len = BPF_CORE_READ(req, __data_len);
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -1,18 +0,0 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = req->timeout;
unsigned int data_len = req->__data_len;
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -1,22 +0,0 @@
from vmlinux import XDP_PASS
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
import logging
from ctypes import c_int64, c_void_p
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: c_void_p) -> c_int64:
d = XDP_PASS # This gives an error, but
e = XDP_PASS + 0 # this does not
print(f"test1 {e} test2 {d}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("assignment_handling.py", "assignment_handling.ll", loglevel=logging.INFO)

View File

@ -1,42 +0,0 @@
from pythonbpf import bpf, section, struct, bpfglobal, compile, map
from pythonbpf.maps import HashMap
from pythonbpf.helper import pid
from ctypes import c_void_p, c_int64
@bpf
@struct
class val_type:
counter: c_int64
shizzle: c_int64
@bpf
@map
def last() -> HashMap:
return HashMap(key=val_type, value=c_int64, max_entries=16)
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello_world(ctx: c_void_p) -> c_int64:
obj = val_type()
obj.counter, obj.shizzle = 42, 96
t = last.lookup(obj)
if t:
print(f"Found existing entry: counter={obj.counter}, pid={t}")
last.delete(obj)
return 0 # type: ignore [return-value]
val = pid()
last.update(obj, val)
print(f"Map updated!, {obj.counter}, {obj.shizzle}, {val}")
return 0 # type: ignore [return-value]
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,27 +0,0 @@
from vmlinux import struct_request, struct_pt_regs
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile
import logging
from ctypes import c_int64
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: struct_pt_regs) -> c_int64:
a = ctx.r15
req = struct_request(ctx.di)
d = req.__data_len
b = ctx.r12
c = req.timeout
print(f"data length {d} and {c} and {a}")
print(f"ctx arg {b}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO)
compile()

View File

@ -1,21 +0,0 @@
from vmlinux import struct_pt_regs
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
import logging
from ctypes import c_int64
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: struct_pt_regs) -> c_int64:
req = ctx.di
print(f"data length {req}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("requests2.py", "requests2.ll", loglevel=logging.INFO)