From 009b11aca64b54d29748137a913fd38e304ff440 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 23:52:15 +0530 Subject: [PATCH] Implement bpf_probe_read_kernel_str helper, Allow i8* to i8 ArrayType conversion --- pythonbpf/assign_pass.py | 74 ++++++++++++++++++++--- pythonbpf/helper/__init__.py | 6 +- pythonbpf/helper/bpf_helper_handler.py | 65 ++++++++++++++++++++ pythonbpf/helper/helper_utils.py | 84 ++++++++++++++++++++++++++ pythonbpf/helper/helpers.py | 5 ++ 5 files changed, 223 insertions(+), 11 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index a7cd52b..e0ef2db 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -2,6 +2,7 @@ import ast import logging from llvmlite import ir from pythonbpf.expr import eval_expr +from pythonbpf.helper import emit_probe_read_kernel_str_call logger = logging.getLogger(__name__) @@ -27,27 +28,82 @@ def handle_struct_field_assignment( # Get field pointer and evaluate value field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name) - val = eval_expr( + field_type = struct_info.field_type(field_name) + val_result = eval_expr( func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab ) - if val is None: + if val_result is None: logger.error(f"Failed to evaluate value for {var_name}.{field_name}") return - # TODO: Handle string assignment to char array (not a priority) - field_type = struct_info.field_type(field_name) - if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)): - logger.warning( - f"String to char array assignment not implemented for {var_name}.{field_name}" + val, val_type = val_result + + # Special case: i8* string to [N x i8] char array + if _is_char_array(field_type) and _is_i8_ptr(val_type): + _copy_string_to_char_array( + func, + module, + builder, + val, + field_ptr, + field_type, + local_sym_tab, + map_sym_tab, + structs_sym_tab, ) + logger.info(f"Copied string to char array {var_name}.{field_name}") return - # Store the value - builder.store(val[0], field_ptr) + # Regular assignment + builder.store(val, field_ptr) logger.info(f"Assigned to struct field {var_name}.{field_name}") +def _copy_string_to_char_array( + func, + module, + builder, + src_ptr, + dst_ptr, + array_type, + local_sym_tab, + map_sym_tab, + struct_sym_tab, +): + """Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str""" + + array_size = array_type.count + + # Get pointer to first element: [N x i8]* -> i8* + dst_i8_ptr = builder.gep( + dst_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + + # Use the shared emitter function + emit_probe_read_kernel_str_call(builder, dst_i8_ptr, array_size, src_ptr) + + +def _is_char_array(ir_type): + """Check if type is [N x i8].""" + return ( + isinstance(ir_type, ir.ArrayType) + and isinstance(ir_type.element, ir.IntType) + and ir_type.element.width == 8 + ) + + +def _is_i8_ptr(ir_type): + """Check if type is i8*.""" + return ( + isinstance(ir_type, ir.PointerType) + and isinstance(ir_type.pointee, ir.IntType) + and ir_type.pointee.width == 8 + ) + + def handle_variable_assignment( func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab ): diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index 26f792b..2f9c347 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,7 +1,7 @@ from .helper_registry import HelperHandlerRegistry from .helper_utils import reset_scratch_pool -from .bpf_helper_handler import handle_helper_call -from .helpers import ktime, pid, deref, comm, XDP_DROP, XDP_PASS +from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call +from .helpers import ktime, pid, deref, comm, probe_read_str, XDP_DROP, XDP_PASS # Register the helper handler with expr module @@ -59,10 +59,12 @@ __all__ = [ "HelperHandlerRegistry", "reset_scratch_pool", "handle_helper_call", + "emit_probe_read_kernel_str_call", "ktime", "pid", "deref", "comm", + "probe_read_str", "XDP_DROP", "XDP_PASS", ] diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index 1c96cb4..7868677 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -8,6 +8,8 @@ from .helper_utils import ( get_flags_val, get_data_ptr_and_size, get_buffer_ptr_and_size, + get_char_array_ptr_and_size, + get_ptr_from_arg, ) from .printk_formatter import simple_string_print, handle_fstring_print @@ -26,6 +28,7 @@ class BPFHelperID(Enum): BPF_GET_CURRENT_PID_TGID = 14 BPF_GET_CURRENT_COMM = 16 BPF_PERF_EVENT_OUTPUT = 25 + BPF_PROBE_READ_KERNEL_STR = 115 @HelperHandlerRegistry.register("ktime") @@ -368,6 +371,68 @@ def bpf_perf_event_output_handler( return result, None +def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr): + """Emit LLVM IR call to bpf_probe_read_kernel_str""" + + fn_type = ir.FunctionType( + ir.IntType(64), + [ir.PointerType(), ir.IntType(32), ir.PointerType()], + var_arg=False, + ) + fn_ptr = builder.inttoptr( + ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL_STR.value), + ir.PointerType(fn_type), + ) + + result = builder.call( + fn_ptr, + [ + builder.bitcast(dst_ptr, ir.PointerType()), + ir.Constant(ir.IntType(32), dst_size), + builder.bitcast(src_ptr, ir.PointerType()), + ], + tail=False, + ) + + logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})") + return result + + +@HelperHandlerRegistry.register("probe_read_str") +def bpf_probe_read_kernel_str_emitter( + call, + map_ptr, + module, + builder, + func, + local_sym_tab=None, + struct_sym_tab=None, + map_sym_tab=None, +): + """Emit LLVM IR for bpf_probe_read_kernel_str helper.""" + + if len(call.args) != 2: + raise ValueError( + f"probe_read_str expects 2 args (dst, src), got {len(call.args)}" + ) + + # Get destination buffer (char array -> i8*) + dst_ptr, dst_size = get_char_array_ptr_and_size( + call.args[0], builder, local_sym_tab, struct_sym_tab + ) + + # Get source pointer (evaluate expression) + src_ptr, src_type = get_ptr_from_arg( + call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + # Emit the helper call + result = emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr) + + logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})") + return result, ir.IntType(64) + + def handle_helper_call( call, module, diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index cf89c30..7f3fdbe 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -4,6 +4,7 @@ import logging from llvmlite import ir from pythonbpf.expr import ( get_operand_value, + eval_expr, ) logger = logging.getLogger(__name__) @@ -190,3 +191,86 @@ def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): raise ValueError( "comm expects either a struct field (obj.field) or variable name" ) + + +def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): + """Get pointer to char array and its size.""" + + # Struct field: obj.field + if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name): + var_name = buf_arg.value.id + field_name = buf_arg.attr + + if not (local_sym_tab and var_name in local_sym_tab): + raise ValueError(f"Variable '{var_name}' not found") + + struct_type = local_sym_tab[var_name].metadata + if not (struct_sym_tab and struct_type in struct_sym_tab): + raise ValueError(f"Struct type '{struct_type}' not found") + + struct_info = struct_sym_tab[struct_type] + if field_name not in struct_info.fields: + raise ValueError(f"Field '{field_name}' not found") + + field_type = struct_info.field_type(field_name) + if not _is_char_array(field_type): + raise ValueError("Expected char array field") + + struct_ptr = local_sym_tab[var_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + # GEP to first element: [N x i8]* -> i8* + buf_ptr = builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + return buf_ptr, field_type.count + + elif isinstance(buf_arg, ast.Name): + # NOTE: We shouldn't be doing this as we can't get size info + var_name = buf_arg.id + if not (local_sym_tab and var_name in local_sym_tab): + raise ValueError(f"Variable '{var_name}' not found") + + var_ptr = local_sym_tab[var_name].var + var_type = local_sym_tab[var_name].ir_type + + if not isinstance(var_type, ir.PointerType) and not isinstance( + var_type.pointee, ir.IntType(8) + ): + raise ValueError("Expected str ptr variable") + + return var_ptr, 256 # Size unknown for str ptr, using 256 as default + + else: + raise ValueError("Expected struct field or variable name") + + +def _is_char_array(ir_type): + """Check if IR type is [N x i8].""" + return ( + isinstance(ir_type, ir.ArrayType) + and isinstance(ir_type.element, ir.IntType) + and ir_type.element.width == 8 + ) + + +def get_ptr_from_arg( + arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab +): + """Evaluate argument and return pointer value""" + + result = eval_expr( + func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + if not result: + raise ValueError("Failed to evaluate argument") + + val, val_type = result + + if not isinstance(val_type, ir.PointerType): + raise ValueError(f"Expected pointer type, got {val_type}") + + return val, val_type diff --git a/pythonbpf/helper/helpers.py b/pythonbpf/helper/helpers.py index 1861e67..cb1a8e1 100644 --- a/pythonbpf/helper/helpers.py +++ b/pythonbpf/helper/helpers.py @@ -22,6 +22,11 @@ def comm(buf): return ctypes.c_int64(0) +def probe_read_str(dst, src): + """Safely read a null-terminated string from kernel memory""" + return ctypes.c_int64(0) + + XDP_ABORTED = ctypes.c_int64(0) XDP_DROP = ctypes.c_int64(1) XDP_PASS = ctypes.c_int64(2)