mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2025-12-31 21:06:25 +00:00
Merge pull request #4 from varun-r-mallya/type_system
Type system and strings
This commit is contained in:
@ -10,6 +10,7 @@ from ctypes import c_void_p, c_int64, c_int32, c_uint64
|
||||
class data_t:
|
||||
pid: c_uint64
|
||||
ts: c_uint64
|
||||
comm: str(16)
|
||||
|
||||
|
||||
@bpf
|
||||
@ -21,13 +22,14 @@ def events() -> PerfEventArray:
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_clone")
|
||||
def hello(ctx: c_void_p) -> c_int32:
|
||||
strobj = "Hi"
|
||||
dataobj = data_t()
|
||||
ts = ktime()
|
||||
process_id = pid()
|
||||
strobj = "hellohellohello"
|
||||
dataobj.pid = process_id
|
||||
dataobj.ts = ts
|
||||
print(f"clone called at {ts} by pid {process_id}")
|
||||
# dataobj.comm = strobj
|
||||
print(f"clone called at {ts} by pid {process_id}, comm {strobj}")
|
||||
events.output(dataobj)
|
||||
return c_int32(0)
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@ def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab
|
||||
fn_ptr_type = ir.PointerType(fn_type)
|
||||
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
|
||||
result = builder.call(fn_ptr, [], tail=False)
|
||||
return result
|
||||
return result, ir.IntType(64)
|
||||
|
||||
|
||||
def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
|
||||
@ -60,7 +60,7 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_
|
||||
|
||||
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
|
||||
|
||||
return result
|
||||
return result, ir.PointerType()
|
||||
|
||||
|
||||
def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
|
||||
@ -75,6 +75,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
|
||||
exprs = []
|
||||
|
||||
for value in call.args[0].values:
|
||||
print("Value in f-string:", ast.dump(value))
|
||||
if isinstance(value, ast.Constant):
|
||||
if isinstance(value.value, str):
|
||||
fmt_parts.append(value.value)
|
||||
@ -86,10 +87,24 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
|
||||
"Only string and integer constants are supported in f-string.")
|
||||
elif isinstance(value, ast.FormattedValue):
|
||||
print("Formatted value:", ast.dump(value))
|
||||
# Assume int for now
|
||||
fmt_parts.append("%lld")
|
||||
# TODO: Dirty handling here, only checks for int or str
|
||||
if isinstance(value.value, ast.Name):
|
||||
exprs.append(value.value)
|
||||
if local_sym_tab and value.value.id in local_sym_tab:
|
||||
var_ptr, var_type = local_sym_tab[value.value.id]
|
||||
if isinstance(var_type, ir.IntType):
|
||||
fmt_parts.append("%lld")
|
||||
exprs.append(value.value)
|
||||
elif var_type == ir.PointerType(ir.IntType(8)):
|
||||
# Case with string
|
||||
fmt_parts.append("%s")
|
||||
exprs.append(value.value)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Only integer and pointer types are supported in formatted values.")
|
||||
print("Formatted value variable:", var_ptr, var_type)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Variable {value.value.id} not found in local symbol table.")
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Only simple variable names are supported in formatted values.")
|
||||
@ -121,7 +136,8 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
|
||||
"Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.")
|
||||
|
||||
for expr in exprs[:3]:
|
||||
val = eval_expr(func, module, builder, expr, local_sym_tab, None)
|
||||
val, _ = eval_expr(func, module, builder,
|
||||
expr, local_sym_tab, None)
|
||||
if val:
|
||||
if isinstance(val.type, ir.PointerType):
|
||||
val = builder.ptrtoint(val, ir.IntType(64))
|
||||
@ -137,7 +153,6 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
|
||||
print(
|
||||
"Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.")
|
||||
args.append(ir.Constant(ir.IntType(64), 0))
|
||||
|
||||
fn_type = ir.FunctionType(ir.IntType(
|
||||
64), [ir.PointerType(), ir.IntType(32)], var_arg=True)
|
||||
fn_ptr_type = ir.PointerType(fn_type)
|
||||
@ -266,7 +281,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, func, local_sym_
|
||||
result = builder.call(
|
||||
fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False)
|
||||
|
||||
return result
|
||||
return result, None
|
||||
|
||||
|
||||
def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
|
||||
@ -321,7 +336,7 @@ def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_
|
||||
# Call the helper function
|
||||
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
|
||||
|
||||
return result
|
||||
return result, None
|
||||
|
||||
|
||||
def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
|
||||
@ -338,7 +353,7 @@ def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local
|
||||
# Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF
|
||||
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
|
||||
pid = builder.and_(result, mask)
|
||||
return pid
|
||||
return pid, ir.IntType(64)
|
||||
|
||||
|
||||
def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
|
||||
@ -387,7 +402,7 @@ def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sy
|
||||
|
||||
result = builder.call(
|
||||
fn_ptr, [ctx_ptr, map_void_ptr, flags_val, data_void_ptr, size_val], tail=False)
|
||||
return result
|
||||
return result, None
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Only simple object names are supported as data in perf event output.")
|
||||
|
||||
@ -93,6 +93,7 @@ def compile_to_ir(filename: str, output: str):
|
||||
|
||||
module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"])
|
||||
|
||||
print(f"IR written to {output}")
|
||||
with open(output, "w") as f:
|
||||
f.write(f"source_filename = \"{filename}\"\n")
|
||||
f.write(str(module))
|
||||
@ -118,6 +119,7 @@ def compile():
|
||||
|
||||
print(f"Object written to {o_file}, {ll_file} can be removed")
|
||||
|
||||
|
||||
def BPF() -> BpfProgram:
|
||||
caller_frame = inspect.stack()[1]
|
||||
caller_file = Path(caller_frame.filename).resolve()
|
||||
@ -129,5 +131,5 @@ def BPF() -> BpfProgram:
|
||||
"llc", "-march=bpf", "-filetype=obj", "-O2",
|
||||
str(ll_file), "-o", str(o_file)
|
||||
], check=True)
|
||||
|
||||
|
||||
return BpfProgram(str(o_file))
|
||||
|
||||
@ -3,20 +3,20 @@ from llvmlite import ir
|
||||
|
||||
|
||||
def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab=None, local_var_metadata=None):
|
||||
print(f"Evaluating expression: {expr}")
|
||||
print(f"Evaluating expression: {ast.dump(expr)}")
|
||||
if isinstance(expr, ast.Name):
|
||||
if expr.id in local_sym_tab:
|
||||
var = local_sym_tab[expr.id][0]
|
||||
val = builder.load(var)
|
||||
return val
|
||||
return val, local_sym_tab[expr.id][1] # return value and type
|
||||
else:
|
||||
print(f"Undefined variable {expr.id}")
|
||||
return None
|
||||
elif isinstance(expr, ast.Constant):
|
||||
if isinstance(expr.value, int):
|
||||
return ir.Constant(ir.IntType(64), expr.value)
|
||||
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
|
||||
elif isinstance(expr.value, bool):
|
||||
return ir.Constant(ir.IntType(1), int(expr.value))
|
||||
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
|
||||
else:
|
||||
print("Unsupported constant type")
|
||||
return None
|
||||
@ -44,8 +44,9 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s
|
||||
if arg is None:
|
||||
print("Failed to evaluate deref argument")
|
||||
return None
|
||||
# Since we are handling only name case, directly take type from sym tab
|
||||
val = builder.load(arg)
|
||||
return val
|
||||
return val, local_sym_tab[expr.args[0].id][1]
|
||||
|
||||
# check for helpers
|
||||
if expr.func.id in helper_func_list:
|
||||
|
||||
@ -58,10 +58,18 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
|
||||
inbounds=True)
|
||||
val = eval_expr(func, module, builder, rval,
|
||||
local_sym_tab, map_sym_tab, structs_sym_tab)
|
||||
if isinstance(struct_info["field_types"][field_idx], ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
|
||||
# TODO: Figure it out, not a priority rn
|
||||
# Special case for string assignment to char array
|
||||
#str_len = struct_info["field_types"][field_idx].count
|
||||
#assign_string_to_array(builder, field_ptr, val[0], str_len)
|
||||
#print(f"Assigned to struct field {var_name}.{field_name}")
|
||||
pass
|
||||
if val is None:
|
||||
print("Failed to evaluate struct field assignment")
|
||||
return
|
||||
builder.store(val, field_ptr)
|
||||
print(field_ptr)
|
||||
builder.store(val[0], field_ptr)
|
||||
print(f"Assigned to struct field {var_name}.{field_name}")
|
||||
return
|
||||
elif isinstance(rval, ast.Constant):
|
||||
@ -114,7 +122,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
|
||||
# var.align = 8
|
||||
val = handle_helper_call(
|
||||
rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
|
||||
builder.store(val, local_sym_tab[var_name][0])
|
||||
builder.store(val[0], local_sym_tab[var_name][0])
|
||||
# local_sym_tab[var_name] = var
|
||||
print(f"Assigned constant {rval.func.id} to {var_name}")
|
||||
elif call_type == "deref" and len(rval.args) == 1:
|
||||
@ -125,7 +133,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
|
||||
print("Failed to evaluate deref argument")
|
||||
return
|
||||
print(f"Dereferenced value: {val}, storing in {var_name}")
|
||||
builder.store(val, local_sym_tab[var_name][0])
|
||||
builder.store(val[0], local_sym_tab[var_name][0])
|
||||
# local_sym_tab[var_name] = var
|
||||
print(f"Dereferenced and assigned to {var_name}")
|
||||
elif call_type in structs_sym_tab and len(rval.args) == 0:
|
||||
@ -155,7 +163,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
|
||||
rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
|
||||
# var = builder.alloca(ir.IntType(64), name=var_name)
|
||||
# var.align = 8
|
||||
builder.store(val, local_sym_tab[var_name][0])
|
||||
builder.store(val[0], local_sym_tab[var_name][0])
|
||||
# local_sym_tab[var_name] = var
|
||||
else:
|
||||
print("Unsupported assignment call structure")
|
||||
@ -196,12 +204,12 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
|
||||
return None
|
||||
elif isinstance(cond, ast.Compare):
|
||||
lhs = eval_expr(func, module, builder, cond.left,
|
||||
local_sym_tab, map_sym_tab)
|
||||
local_sym_tab, map_sym_tab)[0]
|
||||
if len(cond.ops) != 1 or len(cond.comparators) != 1:
|
||||
print("Unsupported complex comparison")
|
||||
return None
|
||||
rhs = eval_expr(func, module, builder,
|
||||
cond.comparators[0], local_sym_tab, map_sym_tab)
|
||||
cond.comparators[0], local_sym_tab, map_sym_tab)[0]
|
||||
op = cond.ops[0]
|
||||
|
||||
if lhs.type != rhs.type:
|
||||
@ -462,7 +470,6 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
|
||||
|
||||
process_func_body(module, builder, func_node, func,
|
||||
ret_type, map_sym_tab, structs_sym_tab)
|
||||
|
||||
return func
|
||||
|
||||
|
||||
@ -538,3 +545,46 @@ def infer_return_type(func_node: ast.FunctionDef):
|
||||
raise ValueError("Conflicting return types:"
|
||||
f"{found_type} vs {t}")
|
||||
return found_type or "None"
|
||||
|
||||
# For string assignment to fixed-size arrays
|
||||
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
|
||||
"""
|
||||
Copy a string (i8*) to a fixed-size array ([N x i8]*)
|
||||
"""
|
||||
# Create a loop to copy characters one by one
|
||||
entry_block = builder.block
|
||||
copy_block = builder.append_basic_block("copy_char")
|
||||
end_block = builder.append_basic_block("copy_end")
|
||||
|
||||
# Create loop counter
|
||||
i = builder.alloca(ir.IntType(32))
|
||||
builder.store(ir.Constant(ir.IntType(32), 0), i)
|
||||
|
||||
# Start the loop
|
||||
builder.branch(copy_block)
|
||||
|
||||
# Copy loop
|
||||
builder.position_at_end(copy_block)
|
||||
idx = builder.load(i)
|
||||
in_bounds = builder.icmp_unsigned('<', idx, ir.Constant(ir.IntType(32), array_length))
|
||||
builder.cbranch(in_bounds, copy_block, end_block)
|
||||
|
||||
with builder.if_then(in_bounds):
|
||||
# Load character from source
|
||||
src_ptr = builder.gep(source_string_ptr, [idx])
|
||||
char = builder.load(src_ptr)
|
||||
|
||||
# Store character in target
|
||||
dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx])
|
||||
builder.store(char, dst_ptr)
|
||||
|
||||
# Increment counter
|
||||
next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1))
|
||||
builder.store(next_idx, i)
|
||||
|
||||
builder.position_at_end(end_block)
|
||||
|
||||
# Ensure null termination
|
||||
last_idx = ir.Constant(ir.IntType(32), array_length - 1)
|
||||
null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx])
|
||||
builder.store(ir.Constant(ir.IntType(8), 0), null_ptr)
|
||||
|
||||
@ -28,14 +28,25 @@ def process_bpf_struct(cls_node, module):
|
||||
|
||||
for item in cls_node.body:
|
||||
if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
|
||||
print(f"Field: {item.target.id}, Type: "
|
||||
f"{ast.dump(item.annotation)}")
|
||||
field_names.append(item.target.id)
|
||||
field_types.append(ctypes_to_ir(item.annotation.id))
|
||||
if isinstance(item.annotation, ast.Call) and isinstance(item.annotation.func, ast.Name) and item.annotation.func.id == "str":
|
||||
# This is a char array with fixed length
|
||||
# TODO: For now assuming str is always called with constant
|
||||
field_types.append(ir.ArrayType(
|
||||
ir.IntType(8), item.annotation.args[0].value))
|
||||
else:
|
||||
field_types.append(ctypes_to_ir(item.annotation.id))
|
||||
|
||||
curr_offset = 0
|
||||
for ftype in field_types:
|
||||
if isinstance(ftype, ir.IntType):
|
||||
fsize = ftype.width // 8
|
||||
alignment = fsize
|
||||
elif isinstance(ftype, ir.ArrayType):
|
||||
fsize = ftype.count * (ftype.element.width // 8)
|
||||
alignment = ftype.element.width // 8
|
||||
elif isinstance(ftype, ir.PointerType):
|
||||
fsize = 8
|
||||
alignment = 8
|
||||
@ -52,6 +63,7 @@ def process_bpf_struct(cls_node, module):
|
||||
structs_sym_tab[struct_name] = {
|
||||
"type": struct_type,
|
||||
"fields": {name: idx for idx, name in enumerate(field_names)},
|
||||
"size": total_size
|
||||
"size": total_size,
|
||||
"field_types": field_types,
|
||||
}
|
||||
print(f"Created struct {struct_name} with fields {field_names}")
|
||||
|
||||
Reference in New Issue
Block a user