31 Commits

Author SHA1 Message Date
8ad5fb8a3a Janitorial: Remove useless comments 2025-11-23 06:29:44 +05:30
bf9635e324 Fix passing test hash_map_struct to include char array test 2025-11-23 06:27:01 +05:30
cbe365d760 Unify struct and pointer to struct handling, abstract null check in ir_ops 2025-11-23 06:26:09 +05:30
fed6af1ed6 bump version and prepare for release 2025-11-22 13:54:41 +05:30
18886816fb Merge pull request #68 from pythonbpf/request-struct
Support enough machinery to make request struct work
2025-11-22 13:48:06 +05:30
a2de15fb1e add c_int to type_deducer.py 2025-11-22 13:36:21 +05:30
9def969592 Make map val struct type allocation work by fixing pointer deref and debuginfogen: WIP 2025-11-22 13:20:09 +05:30
081ee5cb4c move requests.py to passing tests 2025-11-22 13:19:55 +05:30
a91c3158ad sort fields in debug info by offset order 2025-11-22 12:35:47 +05:30
2b3635fe20 format chore 2025-11-22 01:48:44 +05:30
6f25c554a9 fix CO-RE read for cast structs 2025-11-22 01:47:25 +05:30
84507b8b98 add btf probe read kernel helper 2025-11-22 00:57:12 +05:30
a42a75179d format chore 2025-11-22 00:37:39 +05:30
377fa4041d add regular struct field access handling in vmlinux_registry.py 2025-11-22 00:36:59 +05:30
99321c7669 add a failing C test 2025-11-21 23:01:08 +05:30
11850d16d3 field check in allocation pass 2025-11-21 21:47:58 +05:30
9ee821c7f6 make pointer allocation feasible but subverting LLC 2025-11-21 21:47:55 +05:30
25394059a6 allow casting 2025-11-21 21:47:10 +05:30
fde8eab775 allow allocation pass on vmlinux cast 2025-11-21 21:47:07 +05:30
42b8865a56 Merge branch 'master' into request-struct 2025-11-21 02:10:52 +05:30
144d9b0ab4 change c-file test structure 2025-11-20 17:24:02 +05:30
902a52a07d remove debug print statements 2025-11-20 14:39:13 +05:30
306570953b format chore 2025-11-20 14:18:45 +05:30
740eed45e1 add placeholder debug info to shut llvmlite up about NoneType 2025-11-20 14:17:57 +05:30
c8801f4c3e nonetype not parsed 2025-11-19 23:35:10 +05:30
49740598ea format chore 2025-11-13 09:31:10 +05:30
73bbf00e7c add tests 2025-11-13 09:29:53 +05:30
f7dee329cb fix nested pointers issue in array generation and also fix zero length array IR generation 2025-11-10 20:29:28 +05:30
5031f90377 fix stacked vmlinux struct parsing issue 2025-11-10 20:06:04 +05:30
95a624044a fix type error 2025-11-08 20:28:56 +05:30
c5bef26b88 add multi imports to single import line. 2025-11-08 18:08:04 +05:30
24 changed files with 880 additions and 213 deletions

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "pythonbpf" name = "pythonbpf"
version = "0.1.6" version = "0.1.7"
description = "Reduced Python frontend for eBPF" description = "Reduced Python frontend for eBPF"
authors = [ authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
@ -29,7 +29,7 @@ license = {text = "Apache-2.0"}
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"llvmlite", "llvmlite>=0.45",
"astpretty", "astpretty",
"pylibbpf" "pylibbpf"
] ]

View File

@ -118,6 +118,18 @@ def _allocate_for_call(
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
logger.info(f"Pre-allocated {var_name} for struct {call_type}") logger.info(f"Pre-allocated {var_name} for struct {call_type}")
elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type):
# When calling struct_name(pointer), we're doing a cast, not construction
# So we allocate as a pointer (i64) not as the actual struct
var = builder.alloca(ir.IntType(64), name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(
var, ir.IntType(64), VmlinuxHandlerRegistry.get_struct_type(call_type)
)
logger.info(
f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}"
)
else: else:
logger.warning(f"Unknown call type for allocation: {call_type}") logger.warning(f"Unknown call type for allocation: {call_type}")
@ -178,7 +190,7 @@ def _allocate_for_map_method(
# Main variable (pointer to pointer) # Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64)) ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, ir_type) local_sym_tab[var_name] = LocalSymbol(var, ir_type, value_type)
# Temporary variable for computed values # Temporary variable for computed values
tmp_ir_type = value_ir_type tmp_ir_type = value_ir_type
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp") var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
@ -325,13 +337,6 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name) VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name)
) )
field_ir, field = field_type field_ir, field = field_type
# TODO: For now, we only support integer type allocations.
# This always assumes first argument of function to be the context struct
base_ptr = builder.function.args[0]
local_sym_tab[
struct_var
].var = base_ptr # This is repurposing of var to store the pointer of the base type
local_sym_tab[struct_var].ir_type = field_ir
# Determine the actual IR type based on the field's type # Determine the actual IR type based on the field's type
actual_ir_type = None actual_ir_type = None
@ -386,12 +391,14 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_
) )
actual_ir_type = ir.IntType(64) actual_ir_type = ir.IntType(64)
# Allocate with the actual IR type, not the GlobalVariable # Allocate with the actual IR type
var = _allocate_with_type(builder, var_name, actual_ir_type) var = _allocate_with_type(builder, var_name, actual_ir_type)
local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field) local_sym_tab[var_name] = LocalSymbol(
var, actual_ir_type, field
) # <-- Store Field metadata
logger.info( logger.info(
f"Pre-allocated {var_name} from vmlinux struct {vmlinux_struct_name}.{field_name}" f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}"
) )
return return
else: else:

View File

@ -1,5 +1,7 @@
import ast import ast
import logging import logging
from inspect import isclass
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call from pythonbpf.helper import emit_probe_read_kernel_str_call
@ -148,8 +150,30 @@ def handle_variable_assignment(
return False return False
val, val_type = val_result val, val_type = val_result
logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}") logger.info(
f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}"
)
if val_type != var_type: if val_type != var_type:
# Handle vmlinux struct pointers - they're represented as Python classes but are i64 pointers
if isclass(val_type) and (val_type.__module__ == "vmlinux"):
logger.info("Handling vmlinux struct pointer assignment")
# vmlinux struct pointers: val is a pointer, need to convert to i64
if isinstance(var_type, ir.IntType) and var_type.width == 64:
# Convert pointer to i64 using ptrtoint
if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64))
logger.info(
"Converted vmlinux struct pointer to i64 using ptrtoint"
)
builder.store(val, var_ptr)
logger.info(f"Assigned vmlinux struct pointer to {var_name} (i64)")
return True
else:
logger.error(
f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}"
)
return False
if isinstance(val_type, Field): if isinstance(val_type, Field):
logger.info("Handling assignment to struct field") logger.info("Handling assignment to struct field")
# Special handling for struct_xdp_md i32 fields that are zero-extended to i64 # Special handling for struct_xdp_md i32 fields that are zero-extended to i64

View File

@ -25,7 +25,7 @@ import re
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.6" VERSION = "v0.1.7"
def finalize_module(original_str): def finalize_module(original_str):

View File

@ -1,6 +1,6 @@
from .expr_pass import eval_expr, handle_expr, get_operand_value from .expr_pass import eval_expr, handle_expr, get_operand_value
from .type_normalization import convert_to_bool, get_base_type_and_depth from .type_normalization import convert_to_bool, get_base_type_and_depth
from .ir_ops import deref_to_depth from .ir_ops import deref_to_depth, access_struct_field
from .call_registry import CallHandlerRegistry from .call_registry import CallHandlerRegistry
from .vmlinux_registry import VmlinuxHandlerRegistry from .vmlinux_registry import VmlinuxHandlerRegistry
@ -10,6 +10,7 @@ __all__ = [
"convert_to_bool", "convert_to_bool",
"get_base_type_and_depth", "get_base_type_and_depth",
"deref_to_depth", "deref_to_depth",
"access_struct_field",
"get_operand_value", "get_operand_value",
"CallHandlerRegistry", "CallHandlerRegistry",
"VmlinuxHandlerRegistry", "VmlinuxHandlerRegistry",

View File

@ -6,14 +6,14 @@ from typing import Dict
from pythonbpf.type_deducer import ctypes_to_ir, is_ctypes from pythonbpf.type_deducer import ctypes_to_ir, is_ctypes
from .call_registry import CallHandlerRegistry from .call_registry import CallHandlerRegistry
from .ir_ops import deref_to_depth, access_struct_field
from .type_normalization import ( from .type_normalization import (
convert_to_bool, convert_to_bool,
handle_comparator, handle_comparator,
get_base_type_and_depth, get_base_type_and_depth,
deref_to_depth,
) )
from pythonbpf.vmlinux_parser.assignment_info import Field
from .vmlinux_registry import VmlinuxHandlerRegistry from .vmlinux_registry import VmlinuxHandlerRegistry
from ..vmlinux_parser.dependency_node import Field
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -61,6 +61,7 @@ def _handle_constant_expr(module, builder, expr: ast.Constant):
def _handle_attribute_expr( def _handle_attribute_expr(
func,
expr: ast.Attribute, expr: ast.Attribute,
local_sym_tab: Dict, local_sym_tab: Dict,
structs_sym_tab: Dict, structs_sym_tab: Dict,
@ -89,12 +90,30 @@ def _handle_attribute_expr(
return vmlinux_result return vmlinux_result
else: else:
raise RuntimeError("Vmlinux struct did not process successfully") raise RuntimeError("Vmlinux struct did not process successfully")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields: elif isinstance(var_metadata, Field):
gep = metadata.gep(builder, var_ptr, attr_name) logger.error(
val = builder.load(gep) f"Cannot access field '{attr_name}' on already-loaded field value '{var_name}'"
field_type = metadata.field_type(attr_name) )
return val, field_type return None
if var_metadata in structs_sym_tab:
return access_struct_field(
builder,
var_ptr,
var_type,
var_metadata,
expr.attr,
structs_sym_tab,
func,
)
else:
logger.error(f"Struct metadata for '{var_name}' not found")
else:
logger.error(f"Undefined variable '{var_name}' for attribute access")
else:
logger.error("Unsupported attribute base expression type")
return None return None
@ -525,6 +544,66 @@ def _handle_boolean_op(
return None return None
# ============================================================================
# VMLinux casting
# ============================================================================
def _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
# handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux
# struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64
# which needs to be cast to a pointer. This is also a field of another vmlinux struct
"""Handle vmlinux struct cast expressions like struct_request(ctx.di)."""
if len(expr.args) != 1:
logger.info("vmlinux struct cast takes exactly one argument")
return None
# Get the struct name
struct_name = expr.func.id
# Evaluate the argument (e.g., ctx.di which is a c_uint64)
arg_result = eval_expr(
func,
module,
builder,
expr.args[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if arg_result is None:
logger.info("Failed to evaluate argument to vmlinux struct cast")
return None
arg_val, arg_type = arg_result
# Get the vmlinux struct type
vmlinux_struct_type = VmlinuxHandlerRegistry.get_struct_type(struct_name)
if vmlinux_struct_type is None:
logger.error(f"Failed to get vmlinux struct type for {struct_name}")
return None
# Cast the integer/value to a pointer to the struct
# If arg_val is an integer type, we need to inttoptr it
ptr_type = ir.PointerType()
# TODO: add a integer check here later
if ctypes_to_ir(arg_type.type.__name__):
# Cast integer to pointer
casted_ptr = builder.inttoptr(arg_val, ptr_type)
else:
logger.error(f"Unsupported type for vmlinux cast: {arg_type}")
return None
return casted_ptr, vmlinux_struct_type
# ============================================================================ # ============================================================================
# Expression Dispatcher # Expression Dispatcher
# ============================================================================ # ============================================================================
@ -545,6 +624,18 @@ def eval_expr(
elif isinstance(expr, ast.Constant): elif isinstance(expr, ast.Constant):
return _handle_constant_expr(module, builder, expr) return _handle_constant_expr(module, builder, expr)
elif isinstance(expr, ast.Call): elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(
expr.func.id
):
return _handle_vmlinux_cast(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and expr.func.id == "deref": if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder) return _handle_deref_call(expr, local_sym_tab, builder)
@ -568,7 +659,9 @@ def eval_expr(
logger.warning(f"Unknown call: {ast.dump(expr)}") logger.warning(f"Unknown call: {ast.dump(expr)}")
return None return None
elif isinstance(expr, ast.Attribute): elif isinstance(expr, ast.Attribute):
return _handle_attribute_expr(expr, local_sym_tab, structs_sym_tab, builder) return _handle_attribute_expr(
func, expr, local_sym_tab, structs_sym_tab, builder
)
elif isinstance(expr, ast.BinOp): elif isinstance(expr, ast.BinOp):
return _handle_binary_op( return _handle_binary_op(
func, func,

View File

@ -17,34 +17,100 @@ def deref_to_depth(func, builder, val, target_depth):
# dereference with null check # dereference with null check
pointee_type = cur_type.pointee pointee_type = cur_type.pointee
null_check_block = builder.block
not_null_block = func.append_basic_block(name=f"deref_not_null_{depth}")
merge_block = func.append_basic_block(name=f"deref_merge_{depth}")
null_ptr = ir.Constant(cur_type, None) def load_op(builder, ptr):
is_not_null = builder.icmp_signed("!=", cur_val, null_ptr) return builder.load(ptr)
logger.debug(f"Inserted null check for pointer at depth {depth}")
builder.cbranch(is_not_null, not_null_block, merge_block) cur_val = _null_checked_operation(
func, builder, cur_val, load_op, pointee_type, f"deref_{depth}"
builder.position_at_end(not_null_block)
dereferenced_val = builder.load(cur_val)
logger.debug(f"Dereferenced to depth {depth - 1}, type: {pointee_type}")
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(pointee_type, name=f"deref_result_{depth}")
zero_value = (
ir.Constant(pointee_type, 0)
if isinstance(pointee_type, ir.IntType)
else ir.Constant(pointee_type, None)
) )
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(dereferenced_val, not_null_block)
# Continue with phi result
cur_val = phi
cur_type = pointee_type cur_type = pointee_type
logger.debug(f"Dereferenced to depth {depth}, type: {pointee_type}")
return cur_val return cur_val
def _null_checked_operation(func, builder, ptr, operation, result_type, name_prefix):
"""
Generic null-checked operation on a pointer.
"""
curr_block = builder.block
not_null_block = func.append_basic_block(name=f"{name_prefix}_not_null")
merge_block = func.append_basic_block(name=f"{name_prefix}_merge")
null_ptr = ir.Constant(ptr.type, None)
is_not_null = builder.icmp_signed("!=", ptr, null_ptr)
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
result = operation(builder, ptr)
not_null_after = builder.block
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(result_type, name=f"{name_prefix}_result")
if isinstance(result_type, ir.IntType):
null_val = ir.Constant(result_type, 0)
elif isinstance(result_type, ir.PointerType):
null_val = ir.Constant(result_type, None)
else:
null_val = ir.Constant(result_type, ir.Undefined)
phi.add_incoming(null_val, curr_block)
phi.add_incoming(result, not_null_after)
return phi
def access_struct_field(
builder, var_ptr, var_type, var_metadata, field_name, structs_sym_tab, func=None
):
"""
Access a struct field - automatically returns value or pointer based on field type.
"""
metadata = (
structs_sym_tab.get(var_metadata)
if isinstance(var_metadata, str)
else var_metadata
)
if not metadata or field_name not in metadata.fields:
raise ValueError(f"Field '{field_name}' not found in struct")
field_type = metadata.field_type(field_name)
is_ptr_to_struct = isinstance(var_type, ir.PointerType) and isinstance(
var_metadata, str
)
# Get struct pointer
struct_ptr = builder.load(var_ptr) if is_ptr_to_struct else var_ptr
should_load = not isinstance(field_type, ir.ArrayType)
def field_access_op(builder, ptr):
typed_ptr = builder.bitcast(ptr, metadata.ir_type.as_pointer())
field_ptr = metadata.gep(builder, typed_ptr, field_name)
return builder.load(field_ptr) if should_load else field_ptr
# Handle null check for pointer-to-struct
if is_ptr_to_struct:
if func is None:
raise ValueError("func required for null-safe struct pointer access")
if should_load:
result_type = field_type
else:
result_type = field_type.as_pointer()
result = _null_checked_operation(
func,
builder,
struct_ptr,
field_access_op,
result_type,
f"field_{field_name}",
)
return result, field_type
field_ptr = metadata.gep(builder, struct_ptr, field_name)
result = builder.load(field_ptr) if should_load else field_ptr
return result, field_type

View File

@ -1,6 +1,10 @@
from .helper_registry import HelperHandlerRegistry from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call from .bpf_helper_handler import (
handle_helper_call,
emit_probe_read_kernel_str_call,
emit_probe_read_kernel_call,
)
from .helpers import ( from .helpers import (
ktime, ktime,
pid, pid,
@ -74,6 +78,7 @@ __all__ = [
"reset_scratch_pool", "reset_scratch_pool",
"handle_helper_call", "handle_helper_call",
"emit_probe_read_kernel_str_call", "emit_probe_read_kernel_str_call",
"emit_probe_read_kernel_call",
"ktime", "ktime",
"pid", "pid",
"deref", "deref",

View File

@ -34,6 +34,7 @@ class BPFHelperID(Enum):
BPF_PERF_EVENT_OUTPUT = 25 BPF_PERF_EVENT_OUTPUT = 25
BPF_GET_STACK = 67 BPF_GET_STACK = 67
BPF_PROBE_READ_KERNEL_STR = 115 BPF_PROBE_READ_KERNEL_STR = 115
BPF_PROBE_READ_KERNEL = 113
BPF_RINGBUF_OUTPUT = 130 BPF_RINGBUF_OUTPUT = 130
BPF_RINGBUF_RESERVE = 131 BPF_RINGBUF_RESERVE = 131
BPF_RINGBUF_SUBMIT = 132 BPF_RINGBUF_SUBMIT = 132
@ -574,6 +575,75 @@ def bpf_probe_read_kernel_str_emitter(
return result, ir.IntType(64) return result, ir.IntType(64)
def emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result
@HelperHandlerRegistry.register(
"probe_read_kernel",
param_types=[
ir.PointerType(ir.IntType(8)),
ir.PointerType(ir.IntType(8)),
],
return_type=ir.IntType(64),
)
def bpf_probe_read_kernel_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_kernel expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_or_create_ptr_from_arg(
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})")
return result, ir.IntType(64)
@HelperHandlerRegistry.register( @HelperHandlerRegistry.register(
"random", "random",
param_types=[], param_types=[],

View File

@ -5,6 +5,7 @@ from llvmlite import ir
from pythonbpf.expr import ( from pythonbpf.expr import (
get_operand_value, get_operand_value,
eval_expr, eval_expr,
access_struct_field,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -135,7 +136,7 @@ def get_or_create_ptr_from_arg(
and field_type.element.width == 8 and field_type.element.width == 8
): ):
ptr, sz = get_char_array_ptr_and_size( ptr, sz = get_char_array_ptr_and_size(
arg, builder, local_sym_tab, struct_sym_tab arg, builder, local_sym_tab, struct_sym_tab, func
) )
if not ptr: if not ptr:
raise ValueError("Failed to get char array pointer from struct field") raise ValueError("Failed to get char array pointer from struct field")
@ -266,7 +267,9 @@ def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
) )
def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): def get_char_array_ptr_and_size(
buf_arg, builder, local_sym_tab, struct_sym_tab, func=None
):
"""Get pointer to char array and its size.""" """Get pointer to char array and its size."""
# Struct field: obj.field # Struct field: obj.field
@ -277,11 +280,11 @@ def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab)
if not (local_sym_tab and var_name in local_sym_tab): if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found") raise ValueError(f"Variable '{var_name}' not found")
struct_type = local_sym_tab[var_name].metadata struct_ptr, struct_type, struct_metadata = local_sym_tab[var_name]
if not (struct_sym_tab and struct_type in struct_sym_tab): if not (struct_sym_tab and struct_metadata in struct_sym_tab):
raise ValueError(f"Struct type '{struct_type}' not found") raise ValueError(f"Struct type '{struct_metadata}' not found")
struct_info = struct_sym_tab[struct_type] struct_info = struct_sym_tab[struct_metadata]
if field_name not in struct_info.fields: if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found") raise ValueError(f"Field '{field_name}' not found")
@ -292,8 +295,24 @@ def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab)
) )
return None, 0 return None, 0
struct_ptr = local_sym_tab[var_name].var # Check if char array
field_ptr = struct_info.gep(builder, struct_ptr, field_name) if not (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
logger.warning("Field is not a char array")
return None, 0
field_ptr, _ = access_struct_field(
builder,
struct_ptr,
struct_type,
struct_metadata,
field_name,
struct_sym_tab,
func,
)
# GEP to first element: [N x i8]* -> i8* # GEP to first element: [N x i8]* -> i8*
buf_ptr = builder.gep( buf_ptr = builder.gep(

View File

@ -222,7 +222,7 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
# Special case: struct field char array needs pointer to first element # Special case: struct field char array needs pointer to first element
if isinstance(expr, ast.Attribute): if isinstance(expr, ast.Attribute):
char_array_ptr, _ = get_char_array_ptr_and_size( char_array_ptr, _ = get_char_array_ptr_and_size(
expr, builder, local_sym_tab, struct_sym_tab expr, builder, local_sym_tab, struct_sym_tab, func
) )
if char_array_ptr: if char_array_ptr:
return char_array_ptr return char_array_ptr

View File

@ -117,6 +117,7 @@ def _get_key_val_dbg_type(name, generator, structs_sym_tab):
type_obj = structs_sym_tab.get(name) type_obj = structs_sym_tab.get(name)
if type_obj: if type_obj:
logger.info(f"Found struct named {name}, generating debug type")
return _get_struct_debug_type(type_obj, generator, structs_sym_tab) return _get_struct_debug_type(type_obj, generator, structs_sym_tab)
# Fallback to basic types # Fallback to basic types
@ -165,6 +166,6 @@ def _get_struct_debug_type(struct_obj, generator, structs_sym_tab):
) )
elements_arr.append(member) elements_arr.append(member)
struct_type = generator.create_struct_type( struct_type = generator.create_struct_type(
elements_arr, struct_obj.size, is_distinct=True elements_arr, struct_obj.size * 8, is_distinct=True
) )
return struct_type return struct_type

View File

@ -16,6 +16,33 @@ def get_module_symbols(module_name: str):
return [name for name in dir(imported_module)], imported_module return [name for name in dir(imported_module)], imported_module
def unwrap_pointer_type(type_obj: Any) -> Any:
"""
Recursively unwrap all pointer layers to get the base type.
This handles multiply nested pointers like LP_LP_struct_attribute_group
and returns the base type (struct_attribute_group).
Stops unwrapping when reaching a non-pointer type (one without _type_ attribute).
Args:
type_obj: The type object to unwrap
Returns:
The base type after unwrapping all pointer layers
"""
current_type = type_obj
# Keep unwrapping while it's a pointer/array type (has _type_)
# But stop if _type_ is just a string or basic type marker
while hasattr(current_type, "_type_"):
next_type = current_type._type_
# Stop if _type_ is a string (like 'c' for c_char)
if isinstance(next_type, str):
break
current_type = next_type
return current_type
def process_vmlinux_class( def process_vmlinux_class(
node, node,
llvm_module, llvm_module,
@ -158,13 +185,90 @@ def process_vmlinux_post_ast(
if hasattr(elem_type, "_length_") and is_complex_type: if hasattr(elem_type, "_length_") and is_complex_type:
type_length = elem_type._length_ type_length = elem_type._length_
if containing_type.__module__ == "vmlinux": # Unwrap all pointer layers to get the base type for dependency tracking
new_dep_node.add_dependent( base_type = unwrap_pointer_type(elem_type)
elem_type._type_.__name__ base_type_module = getattr(base_type, "__module__", None)
if hasattr(elem_type._type_, "__name__")
else str(elem_type._type_) if base_type_module == "vmlinux":
base_type_name = (
base_type.__name__
if hasattr(base_type, "__name__")
else str(base_type)
)
# ONLY add vmlinux types as dependencies
new_dep_node.add_dependent(base_type_name)
logger.debug(
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
)
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
# Check the containing_type module to decide whether to recurse
containing_type_module = getattr(
containing_type, "__module__", None
)
if containing_type_module == "vmlinux":
# Also unwrap containing_type to get base type name
base_containing_type = unwrap_pointer_type(
containing_type
)
containing_type_name = (
base_containing_type.__name__
if hasattr(base_containing_type, "__name__")
else str(base_containing_type)
)
# Check for self-reference or already processed
if containing_type_name == current_symbol_name:
# Self-referential pointer
logger.debug(
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
)
new_dep_node.set_field_ready(elem_name, True)
elif handler.has_node(containing_type_name):
# Already processed
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - use base containing type, not the pointer wrapper
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
base_containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif (
containing_type_module == ctypes.__name__
or containing_type_module is None
):
logger.debug(
f"Processing ctype internal{containing_type}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
raise TypeError(
f"Module not supported in recursive resolution: {containing_type_module}"
)
elif (
base_type_module == ctypes.__name__
or base_type_module is None
):
# Handle ctypes or types with no module (like some internal ctypes types)
# DO NOT add ctypes as dependencies - just set field metadata and mark ready
logger.debug(
f"Base type {base_type} is ctypes - NOT adding as dependency, just processing field"
) )
elif containing_type.__module__ == ctypes.__name__:
if isinstance(elem_type, type): if isinstance(elem_type, type):
if issubclass(elem_type, ctypes.Array): if issubclass(elem_type, ctypes.Array):
ctype_complex_type = ctypes.Array ctype_complex_type = ctypes.Array
@ -176,57 +280,20 @@ def process_vmlinux_post_ast(
) )
else: else:
raise TypeError("Unsupported ctypes subclass") raise TypeError("Unsupported ctypes subclass")
else:
raise ImportError(
f"Unsupported module of {containing_type}"
)
logger.debug(
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
)
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
if containing_type.__module__ == "vmlinux":
containing_type_name = (
containing_type.__name__
if hasattr(containing_type, "__name__")
else str(containing_type)
)
# Check for self-reference or already processed # Set field metadata but DO NOT add dependency or recurse
if containing_type_name == current_symbol_name: new_dep_node.set_field_containing_type(
# Self-referential pointer elem_name, containing_type
logger.debug( )
f"Self-referential pointer in {current_symbol_name}.{elem_name}" new_dep_node.set_field_type_size(elem_name, type_length)
) new_dep_node.set_field_ctype_complex_type(
new_dep_node.set_field_ready(elem_name, True) elem_name, ctype_complex_type
elif handler.has_node(containing_type_name): )
# Already processed new_dep_node.set_field_type(elem_name, elem_type)
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - THIS WAS MISSING
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif containing_type.__module__ == ctypes.__name__:
logger.debug(f"Processing ctype internal{containing_type}")
new_dep_node.set_field_ready(elem_name, True) new_dep_node.set_field_ready(elem_name, True)
else: else:
raise TypeError( raise ImportError(
"Module not supported in recursive resolution" f"Unsupported module of {base_type}: {base_type_module}"
) )
else: else:
new_dep_node.add_dependent( new_dep_node.add_dependent(
@ -245,9 +312,12 @@ def process_vmlinux_post_ast(
raise ValueError( raise ValueError(
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
) )
elif module_name == ctypes.__name__ or module_name is None:
# Handle ctypes types - these don't need processing, just return
logger.debug(f"Skipping ctypes type {current_symbol_name}")
return True
else: else:
raise ImportError("UNSUPPORTED Module") raise ImportError(f"UNSUPPORTED Module {module_name}")
logger.info( logger.info(
f"{current_symbol_name} processed and handler readiness {handler.is_ready}" f"{current_symbol_name} processed and handler readiness {handler.is_ready}"

View File

@ -11,7 +11,9 @@ from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: def detect_import_statement(
tree: ast.AST,
) -> list[tuple[str, ast.ImportFrom, str, str]]:
""" """
Parse AST and detect import statements from vmlinux. Parse AST and detect import statements from vmlinux.
@ -25,7 +27,7 @@ def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
List of tuples containing (module_name, imported_item) for each vmlinux import List of tuples containing (module_name, imported_item) for each vmlinux import
Raises: Raises:
SyntaxError: If multiple imports from vmlinux are attempted or import * is used SyntaxError: If import * is used
""" """
vmlinux_imports = [] vmlinux_imports = []
@ -40,28 +42,19 @@ def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
"Please import specific types explicitly." "Please import specific types explicitly."
) )
# Check for multiple imports: from vmlinux import A, B, C
if len(node.names) > 1:
imported_names = [alias.name for alias in node.names]
raise SyntaxError(
f"Multiple imports from vmlinux are not supported. "
f"Found: {', '.join(imported_names)}. "
f"Please use separate import statements for each type."
)
# Check if no specific import is specified (should not happen with valid Python) # Check if no specific import is specified (should not happen with valid Python)
if len(node.names) == 0: if len(node.names) == 0:
raise SyntaxError( raise SyntaxError(
"Import from vmlinux must specify at least one type." "Import from vmlinux must specify at least one type."
) )
# Valid single import # Support multiple imports: from vmlinux import A, B, C
for alias in node.names: for alias in node.names:
import_name = alias.name import_name = alias.name
# Use alias if provided, otherwise use the original name (commented) # Use alias if provided, otherwise use the original name
# as_name = alias.asname if alias.asname else alias.name as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node)) vmlinux_imports.append(("vmlinux", node, import_name, as_name))
logger.info(f"Found vmlinux import: {import_name}") logger.info(f"Found vmlinux import: {import_name} as {as_name}")
# Handle "import vmlinux" statements (not typical but should be rejected) # Handle "import vmlinux" statements (not typical but should be rejected)
elif isinstance(node, ast.Import): elif isinstance(node, ast.Import):
@ -103,40 +96,37 @@ def vmlinux_proc(tree: ast.AST, module):
with open(source_file, "r") as f: with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file) mod_ast = ast.parse(f.read(), filename=source_file)
for import_mod, import_node in import_statements: for import_mod, import_node, imported_name, as_name in import_statements:
for alias in import_node.names: found = False
imported_name = alias.name for mod_node in mod_ast.body:
found = False if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name:
for mod_node in mod_ast.body: process_vmlinux_class(mod_node, module, handler)
if ( found = True
isinstance(mod_node, ast.ClassDef) break
and mod_node.name == imported_name if isinstance(mod_node, ast.Assign):
): for target in mod_node.targets:
process_vmlinux_class(mod_node, module, handler) if isinstance(target, ast.Name) and target.id == imported_name:
found = True process_vmlinux_assign(mod_node, module, assignments, as_name)
break found = True
if isinstance(mod_node, ast.Assign): break
for target in mod_node.targets: if found:
if isinstance(target, ast.Name) and target.id == imported_name: break
process_vmlinux_assign(mod_node, module, assignments) if not found:
found = True logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux")
break
if found:
break
if not found:
logger.info(
f"{imported_name} not found as ClassDef or Assign in vmlinux"
)
IRGenerator(module, handler, assignments) IRGenerator(module, handler, assignments)
return assignments return assignments
def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]): def process_vmlinux_assign(
node, module, assignments: dict[str, AssignmentInfo], target_name=None
):
"""Process assignments from vmlinux module.""" """Process assignments from vmlinux module."""
# Only handle single-target assignments # Only handle single-target assignments
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
target_name = node.targets[0].id # Use provided target_name (for aliased imports) or fall back to original name
if target_name is None:
target_name = node.targets[0].id
# Handle constant value assignments # Handle constant value assignments
if isinstance(node.value, ast.Constant): if isinstance(node.value, ast.Constant):

View File

@ -21,7 +21,7 @@ def debug_info_generation(
generated_debug_info: List of tuples (struct, debug_info) to track generated debug info generated_debug_info: List of tuples (struct, debug_info) to track generated debug info
Returns: Returns:
The generated global variable debug info The generated global variable debug info, or None for unsupported types
""" """
# Set up debug info generator # Set up debug info generator
generator = DebugInfoGenerator(llvm_module) generator = DebugInfoGenerator(llvm_module)
@ -31,23 +31,42 @@ def debug_info_generation(
if existing_struct.name == struct.name: if existing_struct.name == struct.name:
return debug_info return debug_info
# Check if this is a union (not supported yet)
if not struct.name.startswith("struct_"):
logger.warning(f"Skipping debug info generation for union: {struct.name}")
# Create a minimal forward declaration for unions
union_type = generator.create_struct_type(
[], struct.__sizeof__() * 8, is_distinct=True
)
return union_type
# Process all fields and create members for the struct # Process all fields and create members for the struct
members = [] members = []
for field_name, field in struct.fields.items():
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
if struct.name.startswith("struct_"): sorted_fields = sorted(struct.fields.items(), key=lambda item: item[1].offset)
struct_name = struct.name.removeprefix("struct_")
else: for field_name, field in sorted_fields:
raise ValueError("Unions are not supported in the current version") try:
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Ensure field_type is a tuple
if not isinstance(field_type, tuple) or len(field_type) != 2:
logger.error(f"Invalid field_type for {field_name}: {field_type}")
continue
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
except Exception as e:
logger.error(f"Failed to process field {field_name} in {struct.name}: {e}")
continue
struct_name = struct.name.removeprefix("struct_")
# Create struct type with all members # Create struct type with all members
struct_type = generator.create_struct_type_with_name( struct_type = generator.create_struct_type_with_name(
struct_name, members, struct.__sizeof__() * 8, is_distinct=True struct_name, members, struct.__sizeof__() * 8, is_distinct=True
@ -74,11 +93,19 @@ def _get_field_debug_type(
generated_debug_info: List of already generated debug info generated_debug_info: List of already generated debug info
Returns: Returns:
The debug info type for this field A tuple of (debug_type, size_in_bits)
""" """
# Handle complex types (arrays, pointers) # Handle complex types (arrays, pointers, function pointers)
if field.ctype_complex_type is not None: if field.ctype_complex_type is not None:
if issubclass(field.ctype_complex_type, ctypes.Array): # Handle function pointer types (CFUNCTYPE)
if callable(field.ctype_complex_type):
# Function pointers are represented as void pointers
logger.warning(
f"Field {field_name} is a function pointer, using void pointer"
)
void_ptr = generator.create_pointer_type(None, 64)
return void_ptr, 64
elif issubclass(field.ctype_complex_type, ctypes.Array):
# Handle array types # Handle array types
element_type, base_type_size = _get_basic_debug_type( element_type, base_type_size = _get_basic_debug_type(
field.containing_type, generator field.containing_type, generator
@ -100,11 +127,13 @@ def _get_field_debug_type(
for existing_struct, debug_info in generated_debug_info: for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct_name: if existing_struct.name == struct_name:
# Use existing debug info # Use existing debug info
return debug_info, existing_struct.__sizeof__() return debug_info, existing_struct.__sizeof__() * 8
# If not found, create a forward declaration # If not found, create a forward declaration
# This will be completed when the actual struct is processed # This will be completed when the actual struct is processed
logger.warning("Forward declaration in struct created") logger.info(
f"Forward declaration created for {struct_name} in {parent_struct.name}"
)
forward_type = generator.create_struct_type([], 0, is_distinct=True) forward_type = generator.create_struct_type([], 0, is_distinct=True)
return forward_type, 0 return forward_type, 0

View File

@ -11,6 +11,10 @@ logger = logging.getLogger(__name__)
class IRGenerator: class IRGenerator:
# This field keeps track of the non_struct names to avoid duplicate name errors.
type_number = 0
unprocessed_store: list[str] = []
# get the assignments dict and add this stuff to it. # get the assignments dict and add this stuff to it.
def __init__(self, llvm_module, handler: DependencyHandler, assignments): def __init__(self, llvm_module, handler: DependencyHandler, assignments):
self.llvm_module = llvm_module self.llvm_module = llvm_module
@ -129,7 +133,19 @@ class IRGenerator:
for field_name, field in struct.fields.items(): for field_name, field in struct.fields.items():
# does not take arrays and similar types into consideration yet. # does not take arrays and similar types into consideration yet.
if field.ctype_complex_type is not None and issubclass( if callable(field.ctype_complex_type):
# Function pointer case - generate a simple field accessor
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index
)
field_index += 1
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
elif field.ctype_complex_type is not None and issubclass(
field.ctype_complex_type, ctypes.Array field.ctype_complex_type, ctypes.Array
): ):
array_size = field.type_size array_size = field.type_size
@ -137,7 +153,7 @@ class IRGenerator:
if containing_type.__module__ == ctypes.__name__: if containing_type.__module__ == ctypes.__name__:
containing_type_size = ctypes.sizeof(containing_type) containing_type_size = ctypes.sizeof(containing_type)
if array_size == 0: if array_size == 0:
field_co_re_name = self._struct_name_generator( field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size struct, field, field_index, True, 0, containing_type_size
) )
globvar = ir.GlobalVariable( globvar = ir.GlobalVariable(
@ -149,7 +165,7 @@ class IRGenerator:
field_index += 1 field_index += 1
continue continue
for i in range(0, array_size): for i in range(0, array_size):
field_co_re_name = self._struct_name_generator( field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size struct, field, field_index, True, i, containing_type_size
) )
globvar = ir.GlobalVariable( globvar = ir.GlobalVariable(
@ -163,12 +179,28 @@ class IRGenerator:
array_size = field.type_size array_size = field.type_size
containing_type = field.containing_type containing_type = field.containing_type
if containing_type.__module__ == "vmlinux": if containing_type.__module__ == "vmlinux":
containing_type_size = self.handler[ # Unwrap all pointer layers to get the base struct type
containing_type.__name__ base_containing_type = containing_type
].current_offset while hasattr(base_containing_type, "_type_"):
for i in range(0, array_size): next_type = base_containing_type._type_
field_co_re_name = self._struct_name_generator( # Stop if _type_ is a string (like 'c' for c_char)
struct, field, field_index, True, i, containing_type_size # TODO: stacked pointers not handl;ing ctypes check here as well
if isinstance(next_type, str):
break
base_containing_type = next_type
# Get the base struct name
base_struct_name = (
base_containing_type.__name__
if hasattr(base_containing_type, "__name__")
else str(base_containing_type)
)
# Look up the size using the base struct name
containing_type_size = self.handler[base_struct_name].current_offset
if array_size == 0:
field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size
) )
globvar = ir.GlobalVariable( globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name self.llvm_module, ir.IntType(64), name=field_co_re_name
@ -176,9 +208,30 @@ class IRGenerator:
globvar.linkage = "external" globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info) globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar self.generated_field_names[struct.name][field_name] = globvar
field_index += 1 field_index += 1
else:
for i in range(0, array_size):
field_co_re_name, returned = self._struct_name_generator(
struct,
field,
field_index,
True,
i,
containing_type_size,
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata(
"llvm.preserve.access.index", debug_info
)
self.generated_field_names[struct.name][field_name] = (
globvar
)
field_index += 1
else: else:
field_co_re_name = self._struct_name_generator( field_co_re_name, returned = self._struct_name_generator(
struct, field, field_index struct, field, field_index
) )
field_index += 1 field_index += 1
@ -198,7 +251,7 @@ class IRGenerator:
is_indexed: bool = False, is_indexed: bool = False,
index: int = 0, index: int = 0,
containing_type_size: int = 0, containing_type_size: int = 0,
) -> str: ) -> tuple[str, bool]:
# TODO: Does not support Unions as well as recursive pointer and array type naming # TODO: Does not support Unions as well as recursive pointer and array type naming
if is_indexed: if is_indexed:
name = ( name = (
@ -208,7 +261,7 @@ class IRGenerator:
+ "$" + "$"
+ f"0:{field_index}:{index}" + f"0:{field_index}:{index}"
) )
return name return name, True
elif struct.name.startswith("struct_"): elif struct.name.startswith("struct_"):
name = ( name = (
"llvm." "llvm."
@ -217,9 +270,18 @@ class IRGenerator:
+ "$" + "$"
+ f"0:{field_index}" + f"0:{field_index}"
) )
return name return name, True
else: else:
print(self.handler[struct.name]) logger.warning(
raise TypeError( "Blindly handling non-struct type to avoid type errors in vmlinux IR generation. Possibly a union."
"Name generation cannot occur due to type name not starting with struct"
) )
self.type_number += 1
unprocessed_type = "unprocessed_type_" + str(self.handler[struct.name].name)
if self.unprocessed_store.__contains__(unprocessed_type):
return unprocessed_type + "_" + str(self.type_number), False
else:
self.unprocessed_store.append(unprocessed_type)
return unprocessed_type, False
# raise TypeError(
# "Name generation cannot occur due to type name not starting with struct"
# )

View File

@ -94,17 +94,140 @@ class VmlinuxHandler:
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
) )
python_type: type = var_info.metadata python_type: type = var_info.metadata
struct_name = python_type.__name__ # Check if this is a context field (ctx) or a cast struct
globvar_ir, field_data = self.get_field_type(struct_name, field_name) is_context_field = var_info.var is None
builder.function.args[0].type = ir.PointerType(ir.IntType(8))
field_ptr = self.load_ctx_field( if is_context_field:
builder, builder.function.args[0], globvar_ir, field_data, struct_name # Handle context field access (original behavior)
) struct_name = python_type.__name__
# Return pointer to field and field type globvar_ir, field_data = self.get_field_type(struct_name, field_name)
return field_ptr, field_data builder.function.args[0].type = ir.PointerType(ir.IntType(8))
field_ptr = self.load_ctx_field(
builder,
builder.function.args[0],
globvar_ir,
field_data,
struct_name,
)
return field_ptr, field_data
else:
# Handle cast struct field access
struct_name = python_type.__name__
globvar_ir, field_data = self.get_field_type(struct_name, field_name)
# Handle cast struct field access (use bpf_probe_read_kernel)
# Load the struct pointer from the local variable
struct_ptr = builder.load(var_info.var)
# Use bpf_probe_read_kernel for non-context struct field access
field_value = self.load_struct_field(
builder, struct_ptr, globvar_ir, field_data, struct_name
)
# Return field value and field type
return field_value, field_data
else: else:
raise RuntimeError("Variable accessed not found in symbol table") raise RuntimeError("Variable accessed not found in symbol table")
@staticmethod
def load_struct_field(
builder, struct_ptr_int, offset_global, field_data, struct_name=None
):
"""
Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel.
Args:
builder: llvmlite IRBuilder instance
struct_ptr_int: The struct pointer as an i64 value (already loaded from alloca)
offset_global: Global variable containing the field offset (i64)
field_data: contains data about the field
struct_name: Name of the struct being accessed (optional)
Returns:
The loaded value
"""
# Load the offset value
offset = builder.load(offset_global)
# Convert i64 to pointer type (BPF stores pointers as i64)
i8_ptr_type = ir.PointerType(ir.IntType(8))
struct_ptr = builder.inttoptr(struct_ptr_int, i8_ptr_type)
# GEP with offset to get field pointer
field_ptr = builder.gep(
struct_ptr,
[offset],
inbounds=False,
)
# Determine the appropriate field size based on field information
field_size_bytes = 8 # Default to 8 bytes (64-bit)
int_width = 64 # Default to 64-bit
needs_zext = False
if field_data is not None:
# Try to determine the size from field metadata
if field_data.type.__module__ == ctypes.__name__:
try:
field_size_bytes = ctypes.sizeof(field_data.type)
field_size_bits = field_size_bytes * 8
if field_size_bits in [8, 16, 32, 64]:
int_width = field_size_bits
logger.info(
f"Determined field size: {int_width} bits ({field_size_bytes} bytes)"
)
# Special handling for struct_xdp_md i32 fields
if struct_name == "struct_xdp_md" and int_width == 32:
needs_zext = True
logger.info(
"struct_xdp_md i32 field detected, will zero-extend to i64"
)
else:
logger.warning(
f"Unusual field size {field_size_bits} bits, using default 64"
)
except Exception as e:
logger.warning(
f"Could not determine field size: {e}, using default 64"
)
elif field_data.type.__module__ == "vmlinux":
# For pointers to structs or complex vmlinux types
if field_data.ctype_complex_type is not None and issubclass(
field_data.ctype_complex_type, ctypes._Pointer
):
int_width = 64 # Pointers are always 64-bit
field_size_bytes = 8
logger.info("Field is a pointer type, using 64 bits")
else:
logger.warning("Complex vmlinux field type, using default 64 bits")
# Allocate local storage for the field value
local_storage = builder.alloca(ir.IntType(int_width))
local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type)
# Use bpf_probe_read_kernel to safely read the field
# This generates:
# %gep = getelementptr i8, ptr %struct_ptr, i64 %offset (already done above as field_ptr)
# %passed = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %gep)
# %result = call i64 inttoptr (i64 113 to ptr)(ptr %local_storage, i32 %size, ptr %passed)
from pythonbpf.helper import emit_probe_read_kernel_call
emit_probe_read_kernel_call(
builder, local_storage_i8_ptr, field_size_bytes, field_ptr
)
# Load the value from local storage
value = builder.load(local_storage)
# Zero-extend i32 to i64 if needed
if needs_zext:
value = builder.zext(value, ir.IntType(64))
logger.info("Zero-extended i32 value to i64")
return value
@staticmethod @staticmethod
def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None): def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None):
""" """

View File

@ -3,21 +3,20 @@ CFLAGS := -emit-llvm -target bpf -c
SRC := $(wildcard *.bpf.c) SRC := $(wildcard *.bpf.c)
LL := $(SRC:.bpf.c=.bpf.ll) LL := $(SRC:.bpf.c=.bpf.ll)
LL2 := $(SRC:.bpf.c=.bpf.o2.ll)
OBJ := $(SRC:.bpf.c=.bpf.o) OBJ := $(SRC:.bpf.c=.bpf.o)
LL0 := $(SRC:.bpf.c=.bpf.o0.ll)
.PHONY: all clean .PHONY: all clean
all: $(LL) $(OBJ) $(LL2) all: $(LL) $(OBJ) $(LL0)
%.bpf.o: %.bpf.c %.bpf.o: %.bpf.c
$(BPF_CLANG) -O2 -g -target bpf -c $< -o $@ $(BPF_CLANG) -O2 -g -target bpf -c $< -o $@
%.bpf.ll: %.bpf.c %.bpf.ll: %.bpf.c
$(BPF_CLANG) -O0 $(CFLAGS) -g -S $< -o $@ $(BPF_CLANG) $(CFLAGS) -O2 -g -S $< -o $@
%.bpf.o2.ll: %.bpf.c %.bpf.o0.ll: %.bpf.c
$(BPF_CLANG) -O2 $(CFLAGS) -g -S $< -o $@ $(BPF_CLANG) $(CFLAGS) -O0 -g -S $< -o $@
clean: clean:
rm -f $(LL) $(OBJ) $(LL2) rm -f $(LL) $(OBJ) $(LL0)

View File

@ -0,0 +1,18 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = BPF_CORE_READ(req, timeout);
unsigned int data_len = BPF_CORE_READ(req, __data_len);
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -0,0 +1,18 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
SEC("kprobe/blk_mq_start_request")
int example(struct pt_regs *ctx)
{
u64 a = ctx->r15;
struct request *req = (struct request *)(ctx->di);
unsigned int something_ns = req->timeout;
unsigned int data_len = req->__data_len;
bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a);
return 0;
}

View File

@ -0,0 +1,22 @@
from vmlinux import XDP_PASS
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
import logging
from ctypes import c_int64, c_void_p
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: c_void_p) -> c_int64:
d = XDP_PASS # This gives an error, but
e = XDP_PASS + 0 # this does not
print(f"test1 {e} test2 {d}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("assignment_handling.py", "assignment_handling.ll", loglevel=logging.INFO)

View File

@ -1,6 +1,6 @@
from pythonbpf import bpf, section, struct, bpfglobal, compile, map from pythonbpf import bpf, section, struct, bpfglobal, compile, map
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from pythonbpf.helper import pid from pythonbpf.helper import pid, comm
from ctypes import c_void_p, c_int64 from ctypes import c_void_p, c_int64
@ -9,6 +9,7 @@ from ctypes import c_void_p, c_int64
class val_type: class val_type:
counter: c_int64 counter: c_int64
shizzle: c_int64 shizzle: c_int64
comm: str(16)
@bpf @bpf
@ -22,6 +23,7 @@ def last() -> HashMap:
def hello_world(ctx: c_void_p) -> c_int64: def hello_world(ctx: c_void_p) -> c_int64:
obj = val_type() obj = val_type()
obj.counter, obj.shizzle = 42, 96 obj.counter, obj.shizzle = 42, 96
comm(obj.comm)
t = last.lookup(obj) t = last.lookup(obj)
if t: if t:
print(f"Found existing entry: counter={obj.counter}, pid={t}") print(f"Found existing entry: counter={obj.counter}, pid={t}")

View File

@ -0,0 +1,27 @@
from vmlinux import struct_request, struct_pt_regs
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile
import logging
from ctypes import c_int64
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: struct_pt_regs) -> c_int64:
a = ctx.r15
req = struct_request(ctx.di)
d = req.__data_len
b = ctx.r12
c = req.timeout
print(f"data length {d} and {c} and {a}")
print(f"ctx arg {b}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,21 @@
from vmlinux import struct_pt_regs
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
import logging
from ctypes import c_int64
@bpf
@section("kprobe/blk_mq_start_request")
def example(ctx: struct_pt_regs) -> c_int64:
req = ctx.di
print(f"data length {req}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("requests2.py", "requests2.ll", loglevel=logging.INFO)