5 Commits

8 changed files with 485 additions and 52 deletions

View File

@ -1,4 +1,4 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, compile
from pythonbpf import bpf, map, struct, section, bpfglobal, BPF, trace_pipe
from pythonbpf.helper import ktime, pid, comm
from pythonbpf.maps import PerfEventArray
@ -23,10 +23,9 @@ def events() -> PerfEventArray:
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int64:
dataobj = data_t()
strobj = "hellohellohello"
dataobj.pid, dataobj.ts = pid(), ktime()
comm(dataobj.comm)
print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {strobj}")
print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {dataobj.comm}")
events.output(dataobj)
return 0 # type: ignore [return-value]
@ -37,4 +36,8 @@ def LICENSE() -> str:
return "GPL"
compile()
# compile
BPF().load_and_attach()
print("Tracing clone()... Ctrl-C to end")
trace_pipe()

View File

@ -72,6 +72,14 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp):
_allocate_for_binop(builder, var_name, local_sym_tab)
elif isinstance(rval, ast.Name):
# Variable-to-variable assignment (b = a)
_allocate_for_name(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.Attribute):
# Struct field-to-variable assignment (a = dat.fld)
_allocate_for_attribute(
builder, var_name, rval, local_sym_tab, structs_sym_tab
)
else:
logger.warning(
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
@ -192,3 +200,88 @@ def allocate_temp_pool(builder, max_temps, local_sym_tab):
temp_var = builder.alloca(ir.IntType(64), name=temp_name)
temp_var.align = 8
local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64))
def _allocate_for_name(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable-to-variable assignment (b = a)."""
source_var = rval.id
if source_var not in local_sym_tab:
logger.error(f"Source variable '{source_var}' not found in symbol table")
return
# Get type and metadata from source variable
source_symbol = local_sym_tab[source_var]
# Allocate with same type and alignment
var = _allocate_with_type(builder, var_name, source_symbol.ir_type)
local_sym_tab[var_name] = LocalSymbol(
var, source_symbol.ir_type, source_symbol.metadata
)
logger.info(
f"Pre-allocated {var_name} from {source_var} with type {source_symbol.ir_type}"
)
def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab):
"""Allocate memory for struct field-to-variable assignment (a = dat.fld)."""
if not isinstance(rval.value, ast.Name):
logger.warning(f"Complex attribute access not supported for {var_name}")
return
struct_var = rval.value.id
field_name = rval.attr
# Validate struct and field
if struct_var not in local_sym_tab:
logger.error(f"Struct variable '{struct_var}' not found")
return
struct_type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab:
logger.error(f"Struct type '{struct_type}' not found")
return
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
return
# Get field type
field_type = struct_info.field_type(field_name)
# Special case: char array -> allocate as i8* pointer instead
if (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
alloc_type = ir.PointerType(ir.IntType(8))
logger.info(f"Allocating {var_name} as i8* (pointer to char array)")
else:
alloc_type = field_type
var = _allocate_with_type(builder, var_name, alloc_type)
local_sym_tab[var_name] = LocalSymbol(var, alloc_type)
logger.info(
f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {alloc_type}"
)
def _allocate_with_type(builder, var_name, ir_type):
"""Allocate variable with appropriate alignment for type."""
var = builder.alloca(ir_type, name=var_name)
var.align = _get_alignment(ir_type)
return var
def _get_alignment(ir_type):
"""Get appropriate alignment for IR type."""
if isinstance(ir_type, ir.IntType):
return ir_type.width // 8
elif isinstance(ir_type, ir.ArrayType) and isinstance(ir_type.element, ir.IntType):
return ir_type.element.width // 8
else:
return 8 # Default: pointer size

View File

@ -2,6 +2,7 @@ import ast
import logging
from llvmlite import ir
from pythonbpf.expr import eval_expr
from pythonbpf.helper import emit_probe_read_kernel_str_call
logger = logging.getLogger(__name__)
@ -27,27 +28,82 @@ def handle_struct_field_assignment(
# Get field pointer and evaluate value
field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name)
val = eval_expr(
field_type = struct_info.field_type(field_name)
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val is None:
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}.{field_name}")
return
# TODO: Handle string assignment to char array (not a priority)
field_type = struct_info.field_type(field_name)
if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
logger.warning(
f"String to char array assignment not implemented for {var_name}.{field_name}"
val, val_type = val_result
# Special case: i8* string to [N x i8] char array
if _is_char_array(field_type) and _is_i8_ptr(val_type):
_copy_string_to_char_array(
func,
module,
builder,
val,
field_ptr,
field_type,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
logger.info(f"Copied string to char array {var_name}.{field_name}")
return
# Store the value
builder.store(val[0], field_ptr)
# Regular assignment
builder.store(val, field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
def _copy_string_to_char_array(
func,
module,
builder,
src_ptr,
dst_ptr,
array_type,
local_sym_tab,
map_sym_tab,
struct_sym_tab,
):
"""Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str"""
array_size = array_type.count
# Get pointer to first element: [N x i8]* -> i8*
dst_i8_ptr = builder.gep(
dst_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
# Use the shared emitter function
emit_probe_read_kernel_str_call(builder, dst_i8_ptr, array_size, src_ptr)
def _is_char_array(ir_type):
"""Check if type is [N x i8]."""
return (
isinstance(ir_type, ir.ArrayType)
and isinstance(ir_type.element, ir.IntType)
and ir_type.element.width == 8
)
def _is_i8_ptr(ir_type):
"""Check if type is i8*."""
return (
isinstance(ir_type, ir.PointerType)
and isinstance(ir_type.pointee, ir.IntType)
and ir_type.pointee.width == 8
)
def handle_variable_assignment(
func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
@ -71,6 +127,17 @@ def handle_variable_assignment(
logger.info(f"Initialized struct {struct_name} for variable {var_name}")
return True
# Special case: struct field char array -> pointer
# Handle this before eval_expr to get the pointer, not the value
if isinstance(rval, ast.Attribute) and isinstance(rval.value, ast.Name):
converted_val = _try_convert_char_array_to_ptr(
rval, var_type, builder, local_sym_tab, structs_sym_tab
)
if converted_val is not None:
builder.store(converted_val, var_ptr)
logger.info(f"Assigned char array pointer to {var_name}")
return True
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
@ -106,3 +173,52 @@ def handle_variable_assignment(
builder.store(val, var_ptr)
logger.info(f"Assigned value to variable {var_name}")
return True
def _try_convert_char_array_to_ptr(
rval, var_type, builder, local_sym_tab, structs_sym_tab
):
"""Try to convert char array field to i8* pointer"""
# Only convert if target is i8*
if not (
isinstance(var_type, ir.PointerType)
and isinstance(var_type.pointee, ir.IntType)
and var_type.pointee.width == 8
):
return None
struct_var = rval.value.id
field_name = rval.attr
# Validate struct
if struct_var not in local_sym_tab:
return None
struct_type = local_sym_tab[struct_var].metadata
if not struct_type or struct_type not in structs_sym_tab:
return None
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
return None
field_type = struct_info.field_type(field_name)
# Check if it's a char array
if not (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
):
return None
# Get pointer to struct field
struct_ptr = local_sym_tab[struct_var].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# GEP to first element: [N x i8]* -> i8*
return builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)

View File

@ -1,7 +1,7 @@
from .helper_registry import HelperHandlerRegistry
from .helper_utils import reset_scratch_pool
from .bpf_helper_handler import handle_helper_call
from .helpers import ktime, pid, deref, comm, XDP_DROP, XDP_PASS
from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call
from .helpers import ktime, pid, deref, comm, probe_read_str, XDP_DROP, XDP_PASS
# Register the helper handler with expr module
@ -59,10 +59,12 @@ __all__ = [
"HelperHandlerRegistry",
"reset_scratch_pool",
"handle_helper_call",
"emit_probe_read_kernel_str_call",
"ktime",
"pid",
"deref",
"comm",
"probe_read_str",
"XDP_DROP",
"XDP_PASS",
]

View File

@ -8,6 +8,8 @@ from .helper_utils import (
get_flags_val,
get_data_ptr_and_size,
get_buffer_ptr_and_size,
get_char_array_ptr_and_size,
get_ptr_from_arg,
)
from .printk_formatter import simple_string_print, handle_fstring_print
@ -26,6 +28,7 @@ class BPFHelperID(Enum):
BPF_GET_CURRENT_PID_TGID = 14
BPF_GET_CURRENT_COMM = 16
BPF_PERF_EVENT_OUTPUT = 25
BPF_PROBE_READ_KERNEL_STR = 115
@HelperHandlerRegistry.register("ktime")
@ -368,6 +371,68 @@ def bpf_perf_event_output_handler(
return result, None
def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr):
"""Emit LLVM IR call to bpf_probe_read_kernel_str"""
fn_type = ir.FunctionType(
ir.IntType(64),
[ir.PointerType(), ir.IntType(32), ir.PointerType()],
var_arg=False,
)
fn_ptr = builder.inttoptr(
ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL_STR.value),
ir.PointerType(fn_type),
)
result = builder.call(
fn_ptr,
[
builder.bitcast(dst_ptr, ir.PointerType()),
ir.Constant(ir.IntType(32), dst_size),
builder.bitcast(src_ptr, ir.PointerType()),
],
tail=False,
)
logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})")
return result
@HelperHandlerRegistry.register("probe_read_str")
def bpf_probe_read_kernel_str_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
map_sym_tab=None,
):
"""Emit LLVM IR for bpf_probe_read_kernel_str helper."""
if len(call.args) != 2:
raise ValueError(
f"probe_read_str expects 2 args (dst, src), got {len(call.args)}"
)
# Get destination buffer (char array -> i8*)
dst_ptr, dst_size = get_char_array_ptr_and_size(
call.args[0], builder, local_sym_tab, struct_sym_tab
)
# Get source pointer (evaluate expression)
src_ptr, src_type = get_ptr_from_arg(
call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
# Emit the helper call
result = emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr)
logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})")
return result, ir.IntType(64)
def handle_helper_call(
call,
module,

View File

@ -4,6 +4,7 @@ import logging
from llvmlite import ir
from pythonbpf.expr import (
get_operand_value,
eval_expr,
)
logger = logging.getLogger(__name__)
@ -190,3 +191,86 @@ def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
raise ValueError(
"comm expects either a struct field (obj.field) or variable name"
)
def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab):
"""Get pointer to char array and its size."""
# Struct field: obj.field
if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name):
var_name = buf_arg.value.id
field_name = buf_arg.attr
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
struct_type = local_sym_tab[var_name].metadata
if not (struct_sym_tab and struct_type in struct_sym_tab):
raise ValueError(f"Struct type '{struct_type}' not found")
struct_info = struct_sym_tab[struct_type]
if field_name not in struct_info.fields:
raise ValueError(f"Field '{field_name}' not found")
field_type = struct_info.field_type(field_name)
if not _is_char_array(field_type):
raise ValueError("Expected char array field")
struct_ptr = local_sym_tab[var_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
# GEP to first element: [N x i8]* -> i8*
buf_ptr = builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
return buf_ptr, field_type.count
elif isinstance(buf_arg, ast.Name):
# NOTE: We shouldn't be doing this as we can't get size info
var_name = buf_arg.id
if not (local_sym_tab and var_name in local_sym_tab):
raise ValueError(f"Variable '{var_name}' not found")
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
if not isinstance(var_type, ir.PointerType) and not isinstance(
var_type.pointee, ir.IntType(8)
):
raise ValueError("Expected str ptr variable")
return var_ptr, 256 # Size unknown for str ptr, using 256 as default
else:
raise ValueError("Expected struct field or variable name")
def _is_char_array(ir_type):
"""Check if IR type is [N x i8]."""
return (
isinstance(ir_type, ir.ArrayType)
and isinstance(ir_type.element, ir.IntType)
and ir_type.element.width == 8
)
def get_ptr_from_arg(
arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab
):
"""Evaluate argument and return pointer value"""
result = eval_expr(
func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab
)
if not result:
raise ValueError("Failed to evaluate argument")
val, val_type = result
if not isinstance(val_type, ir.PointerType):
raise ValueError(f"Expected pointer type, got {val_type}")
return val, val_type

View File

@ -22,6 +22,11 @@ def comm(buf):
return ctypes.c_int64(0)
def probe_read_str(dst, src):
"""Safely read a null-terminated string from kernel memory"""
return ctypes.c_int64(0)
XDP_ABORTED = ctypes.c_int64(0)
XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2)

View File

@ -173,6 +173,15 @@ def _populate_fval(ftype, node, fmt_parts, exprs):
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
elif isinstance(ftype, ir.ArrayType):
if isinstance(ftype.element, ir.IntType) and ftype.element.width == 8:
# Char array
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported array element type in f-string: {ftype.element}"
)
else:
raise NotImplementedError(f"Unsupported field type in f-string: {ftype}")
@ -197,44 +206,100 @@ def _create_format_string_global(fmt_str, func, module, builder):
def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab):
"""Evaluate and prepare an expression to use as an arg for bpf_printk."""
val, _ = eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
None,
struct_sym_tab,
# Special case: struct field char array needs pointer to first element
char_array_ptr = _get_struct_char_array_ptr(
expr, builder, local_sym_tab, struct_sym_tab
)
if char_array_ptr:
return char_array_ptr
# Regular expression evaluation
val, _ = eval_expr(func, module, builder, expr, local_sym_tab, None, struct_sym_tab)
if not val:
logger.warning("Failed to evaluate expression for bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
# Convert value to bpf_printk compatible type
if isinstance(val.type, ir.PointerType):
return _handle_pointer_arg(val, func, builder)
elif isinstance(val.type, ir.IntType):
return _handle_int_arg(val, builder)
else:
logger.warning(f"Unsupported type {val.type} in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
def _get_struct_char_array_ptr(expr, builder, local_sym_tab, struct_sym_tab):
"""Get pointer to first element of char array in struct field, or None."""
if not (isinstance(expr, ast.Attribute) and isinstance(expr.value, ast.Name)):
return None
var_name = expr.value.id
field_name = expr.attr
# Check if it's a valid struct field
if not (
local_sym_tab
and var_name in local_sym_tab
and struct_sym_tab
and local_sym_tab[var_name].metadata in struct_sym_tab
):
return None
struct_type = local_sym_tab[var_name].metadata
struct_info = struct_sym_tab[struct_type]
if field_name not in struct_info.fields:
return None
field_type = struct_info.field_type(field_name)
# Check if it's a char array
is_char_array = (
isinstance(field_type, ir.ArrayType)
and isinstance(field_type.element, ir.IntType)
and field_type.element.width == 8
)
if val:
if isinstance(val.type, ir.PointerType):
target, depth = get_base_type_and_depth(val.type)
if isinstance(target, ir.IntType):
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
val = builder.sext(val, ir.IntType(64))
elif target.width == 8 and depth == 1:
# NOTE: i8* is string, no need to deref
pass
if not is_char_array:
return None
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
elif isinstance(val.type, ir.IntType):
if val.type.width < 64:
val = builder.sext(val, ir.IntType(64))
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
return val
else:
logger.warning(
"Failed to evaluate expression for bpf_printk argument. "
"It will be converted to 0."
)
# Get field pointer and GEP to first element: [N x i8]* -> i8*
struct_ptr = local_sym_tab[var_name].var
field_ptr = struct_info.gep(builder, struct_ptr, field_name)
return builder.gep(
field_ptr,
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
inbounds=True,
)
def _handle_pointer_arg(val, func, builder):
"""Convert pointer type for bpf_printk."""
target, depth = get_base_type_and_depth(val.type)
if not isinstance(target, ir.IntType):
logger.warning("Only int pointers supported in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
# i8* is string - use as-is
if target.width == 8 and depth == 1:
return val
# Integer pointers: dereference and sign-extend to i64
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
return builder.sext(val, ir.IntType(64))
logger.warning("Unsupported pointer width in bpf_printk, defaulting to 0")
return ir.Constant(ir.IntType(64), 0)
def _handle_int_arg(val, builder):
"""Convert integer type for bpf_printk (sign-extend to i64)."""
if val.type.width < 64:
return builder.sext(val, ir.IntType(64))
return val