11 Commits

7 changed files with 173 additions and 35 deletions

View File

@ -10,6 +10,7 @@ from ctypes import c_void_p, c_int64, c_int32, c_uint64
class data_t:
pid: c_uint64
ts: c_uint64
comm: str(16)
@bpf
@ -21,13 +22,14 @@ def events() -> PerfEventArray:
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def hello(ctx: c_void_p) -> c_int32:
strobj = "Hi"
dataobj = data_t()
ts = ktime()
process_id = pid()
dataobj.pid = process_id
dataobj.ts = ts
print(f"clone called at {ts} by pid {process_id}")
strobj = "hellohellohello"
dataobj.pid = pid()
dataobj.ts = ktime()
# dataobj.comm = strobj
print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {strobj}")
events.output(dataobj)
return c_int32(0)

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "pythonbpf"
version = "0.1.2"
version = "0.1.3"
description = "Reduced Python frontend for eBPF"
authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },

View File

@ -3,7 +3,7 @@ from llvmlite import ir
from .expr_pass import eval_expr
def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
"""
Emit LLVM IR for bpf_ktime_get_ns helper function call.
"""
@ -13,7 +13,7 @@ def bpf_ktime_get_ns_emitter(call, map_ptr, module, builder, func, local_sym_tab
fn_ptr_type = ir.PointerType(fn_type)
fn_ptr = builder.inttoptr(helper_id, fn_ptr_type)
result = builder.call(fn_ptr, [], tail=False)
return result
return result, ir.IntType(64)
def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
@ -60,10 +60,10 @@ def bpf_map_lookup_elem_emitter(call, map_ptr, module, builder, func, local_sym_
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
return result
return result, ir.PointerType()
def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
if not hasattr(func, "_fmt_counter"):
func._fmt_counter = 0
@ -75,6 +75,7 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
exprs = []
for value in call.args[0].values:
print("Value in f-string:", ast.dump(value))
if isinstance(value, ast.Constant):
if isinstance(value.value, str):
fmt_parts.append(value.value)
@ -86,10 +87,56 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
"Only string and integer constants are supported in f-string.")
elif isinstance(value, ast.FormattedValue):
print("Formatted value:", ast.dump(value))
# Assume int for now
fmt_parts.append("%lld")
# TODO: Dirty handling here, only checks for int or str
if isinstance(value.value, ast.Name):
exprs.append(value.value)
if local_sym_tab and value.value.id in local_sym_tab:
var_ptr, var_type = local_sym_tab[value.value.id]
if isinstance(var_type, ir.IntType):
fmt_parts.append("%lld")
exprs.append(value.value)
elif var_type == ir.PointerType(ir.IntType(8)):
# Case with string
fmt_parts.append("%s")
exprs.append(value.value)
else:
raise NotImplementedError(
"Only integer and pointer types are supported in formatted values.")
else:
raise ValueError(
f"Variable {value.value.id} not found in local symbol table.")
elif isinstance(value.value, ast.Attribute):
# object field access from struct
if isinstance(value.value.value, ast.Name) and local_sym_tab and value.value.value.id in local_sym_tab:
var_name = value.value.value.id
field_name = value.value.attr
if local_var_metadata and var_name in local_var_metadata:
var_type = local_var_metadata[var_name]
if var_type in struct_sym_tab:
struct_info = struct_sym_tab[var_type]
if field_name in struct_info["fields"]:
field_index = struct_info["fields"][field_name]
field_type = struct_info["field_types"][field_index]
if isinstance(field_type, ir.IntType):
fmt_parts.append("%lld")
exprs.append(value.value)
elif field_type == ir.PointerType(ir.IntType(8)):
fmt_parts.append("%s")
exprs.append(value.value)
else:
raise NotImplementedError(
"Only integer and pointer types are supported in formatted values.")
else:
raise ValueError(
f"Field {field_name} not found in struct {var_type}.")
else:
raise ValueError(
f"Struct type {var_type} for variable {var_name} not found in struct symbol table.")
else:
raise ValueError(
f"Metadata for variable {var_name} not found in local variable metadata.")
else:
raise ValueError(
f"Variable {value.value.value.id} not found in local symbol table.")
else:
raise NotImplementedError(
"Only simple variable names are supported in formatted values.")
@ -121,7 +168,9 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
"Warning: bpf_printk supports up to 3 arguments, extra arguments will be ignored.")
for expr in exprs[:3]:
val = eval_expr(func, module, builder, expr, local_sym_tab, None)
print(f"{ast.dump(expr)}")
val, _ = eval_expr(func, module, builder,
expr, local_sym_tab, None, struct_sym_tab, local_var_metadata)
if val:
if isinstance(val.type, ir.PointerType):
val = builder.ptrtoint(val, ir.IntType(64))
@ -137,7 +186,6 @@ def bpf_printk_emitter(call, map_ptr, module, builder, func, local_sym_tab=None,
print(
"Warning: Failed to evaluate expression for bpf_printk argument. It will be converted to 0.")
args.append(ir.Constant(ir.IntType(64), 0))
fn_type = ir.FunctionType(ir.IntType(
64), [ir.PointerType(), ir.IntType(32)], var_arg=True)
fn_ptr_type = ir.PointerType(fn_type)
@ -266,7 +314,7 @@ def bpf_map_update_elem_emitter(call, map_ptr, module, builder, func, local_sym_
result = builder.call(
fn_ptr, [map_void_ptr, key_ptr, value_ptr, flags_const], tail=False)
return result
return result, None
def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
@ -321,10 +369,10 @@ def bpf_map_delete_elem_emitter(call, map_ptr, module, builder, func, local_sym_
# Call the helper function
result = builder.call(fn_ptr, [map_void_ptr, key_ptr], tail=False)
return result
return result, None
def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, local_var_metadata=None):
def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
"""
Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
"""
@ -338,7 +386,7 @@ def bpf_get_current_pid_tgid_emitter(call, map_ptr, module, builder, func, local
# Extract the lower 32 bits (PID) using bitwise AND with 0xFFFFFFFF
mask = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
pid = builder.and_(result, mask)
return pid
return pid, ir.IntType(64)
def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
@ -387,7 +435,7 @@ def bpf_perf_event_output_handler(call, map_ptr, module, builder, func, local_sy
result = builder.call(
fn_ptr, [ctx_ptr, map_void_ptr, flags_val, data_void_ptr, size_val], tail=False)
return result
return result, None
else:
raise NotImplementedError(
"Only simple object names are supported as data in perf event output.")
@ -405,11 +453,12 @@ helper_func_list = {
def handle_helper_call(call, module, builder, func, local_sym_tab=None, map_sym_tab=None, struct_sym_tab=None, local_var_metadata=None):
print(local_var_metadata)
if isinstance(call.func, ast.Name):
func_name = call.func.id
if func_name in helper_func_list:
# it is not a map method call
return helper_func_list[func_name](call, None, module, builder, func, local_sym_tab)
return helper_func_list[func_name](call, None, module, builder, func, local_sym_tab, struct_sym_tab, local_var_metadata)
else:
raise NotImplementedError(
f"Function {func_name} is not implemented as a helper function.")

View File

@ -93,6 +93,7 @@ def compile_to_ir(filename: str, output: str):
module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"])
print(f"IR written to {output}")
with open(output, "w") as f:
f.write(f"source_filename = \"{filename}\"\n")
f.write(str(module))
@ -118,6 +119,7 @@ def compile():
print(f"Object written to {o_file}, {ll_file} can be removed")
def BPF() -> BpfProgram:
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
@ -129,5 +131,5 @@ def BPF() -> BpfProgram:
"llc", "-march=bpf", "-filetype=obj", "-O2",
str(ll_file), "-o", str(o_file)
], check=True)
return BpfProgram(str(o_file))

View File

@ -3,20 +3,21 @@ from llvmlite import ir
def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab=None, local_var_metadata=None):
print(f"Evaluating expression: {expr}")
print(f"Evaluating expression: {ast.dump(expr)}")
print(local_var_metadata)
if isinstance(expr, ast.Name):
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id][0]
val = builder.load(var)
return val
return val, local_sym_tab[expr.id][1] # return value and type
else:
print(f"Undefined variable {expr.id}")
return None
elif isinstance(expr, ast.Constant):
if isinstance(expr.value, int):
return ir.Constant(ir.IntType(64), expr.value)
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
elif isinstance(expr.value, bool):
return ir.Constant(ir.IntType(1), int(expr.value))
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
else:
print("Unsupported constant type")
return None
@ -44,8 +45,9 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s
if arg is None:
print("Failed to evaluate deref argument")
return None
# Since we are handling only name case, directly take type from sym tab
val = builder.load(arg)
return val
return val, local_sym_tab[expr.args[0].id][1]
# check for helpers
if expr.func.id in helper_func_list:
@ -65,6 +67,25 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s
if method_name in helper_func_list:
return handle_helper_call(
expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
elif isinstance(expr, ast.Attribute):
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type = local_sym_tab[var_name]
print(f"Loading attribute "
f"{attr_name} from variable {var_name}")
print(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
print(local_var_metadata)
if local_var_metadata and var_name in local_var_metadata:
metadata = structs_sym_tab[local_var_metadata[var_name]]
if attr_name in metadata["fields"]:
field_idx = metadata["fields"][attr_name]
gep = builder.gep(var_ptr, [ir.Constant(ir.IntType(32), 0),
ir.Constant(ir.IntType(32), field_idx)])
val = builder.load(gep)
field_type = metadata["field_types"][field_idx]
return val, field_type
print("Unsupported expression evaluation")
return None
@ -72,6 +93,7 @@ def eval_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_s
def handle_expr(func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata):
"""Handle expression statements in the function body."""
print(f"Handling expression: {ast.dump(expr)}")
print(local_var_metadata)
call = expr.value
if isinstance(call, ast.Call):
eval_expr(func, module, builder, call, local_sym_tab,

View File

@ -58,10 +58,18 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
inbounds=True)
val = eval_expr(func, module, builder, rval,
local_sym_tab, map_sym_tab, structs_sym_tab)
if isinstance(struct_info["field_types"][field_idx], ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
# TODO: Figure it out, not a priority rn
# Special case for string assignment to char array
#str_len = struct_info["field_types"][field_idx].count
#assign_string_to_array(builder, field_ptr, val[0], str_len)
#print(f"Assigned to struct field {var_name}.{field_name}")
pass
if val is None:
print("Failed to evaluate struct field assignment")
return
builder.store(val, field_ptr)
print(field_ptr)
builder.store(val[0], field_ptr)
print(f"Assigned to struct field {var_name}.{field_name}")
return
elif isinstance(rval, ast.Constant):
@ -114,7 +122,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
# var.align = 8
val = handle_helper_call(
rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
builder.store(val, local_sym_tab[var_name][0])
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
print(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1:
@ -125,7 +133,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
print("Failed to evaluate deref argument")
return
print(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val, local_sym_tab[var_name][0])
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
print(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0:
@ -155,7 +163,7 @@ def handle_assign(func, module, builder, stmt, map_sym_tab, local_sym_tab, struc
rval, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab, local_var_metadata)
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(val, local_sym_tab[var_name][0])
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
else:
print("Unsupported assignment call structure")
@ -196,12 +204,12 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
return None
elif isinstance(cond, ast.Compare):
lhs = eval_expr(func, module, builder, cond.left,
local_sym_tab, map_sym_tab)
local_sym_tab, map_sym_tab)[0]
if len(cond.ops) != 1 or len(cond.comparators) != 1:
print("Unsupported complex comparison")
return None
rhs = eval_expr(func, module, builder,
cond.comparators[0], local_sym_tab, map_sym_tab)
cond.comparators[0], local_sym_tab, map_sym_tab)[0]
op = cond.ops[0]
if lhs.type != rhs.type:
@ -274,6 +282,7 @@ def handle_if(func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_s
def process_stmt(func, module, builder, stmt, local_sym_tab, map_sym_tab, structs_sym_tab, did_return, ret_type=ir.IntType(64)):
print(f"Processing statement: {ast.dump(stmt)}")
if isinstance(stmt, ast.Expr):
print(local_var_metadata)
handle_expr(func, module, builder, stmt, local_sym_tab,
map_sym_tab, structs_sym_tab, local_var_metadata)
elif isinstance(stmt, ast.Assign):
@ -462,7 +471,6 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
process_func_body(module, builder, func_node, func,
ret_type, map_sym_tab, structs_sym_tab)
return func
@ -538,3 +546,46 @@ def infer_return_type(func_node: ast.FunctionDef):
raise ValueError("Conflicting return types:"
f"{found_type} vs {t}")
return found_type or "None"
# For string assignment to fixed-size arrays
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
"""
Copy a string (i8*) to a fixed-size array ([N x i8]*)
"""
# Create a loop to copy characters one by one
entry_block = builder.block
copy_block = builder.append_basic_block("copy_char")
end_block = builder.append_basic_block("copy_end")
# Create loop counter
i = builder.alloca(ir.IntType(32))
builder.store(ir.Constant(ir.IntType(32), 0), i)
# Start the loop
builder.branch(copy_block)
# Copy loop
builder.position_at_end(copy_block)
idx = builder.load(i)
in_bounds = builder.icmp_unsigned('<', idx, ir.Constant(ir.IntType(32), array_length))
builder.cbranch(in_bounds, copy_block, end_block)
with builder.if_then(in_bounds):
# Load character from source
src_ptr = builder.gep(source_string_ptr, [idx])
char = builder.load(src_ptr)
# Store character in target
dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx])
builder.store(char, dst_ptr)
# Increment counter
next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1))
builder.store(next_idx, i)
builder.position_at_end(end_block)
# Ensure null termination
last_idx = ir.Constant(ir.IntType(32), array_length - 1)
null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx])
builder.store(ir.Constant(ir.IntType(8), 0), null_ptr)

View File

@ -28,14 +28,25 @@ def process_bpf_struct(cls_node, module):
for item in cls_node.body:
if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
print(f"Field: {item.target.id}, Type: "
f"{ast.dump(item.annotation)}")
field_names.append(item.target.id)
field_types.append(ctypes_to_ir(item.annotation.id))
if isinstance(item.annotation, ast.Call) and isinstance(item.annotation.func, ast.Name) and item.annotation.func.id == "str":
# This is a char array with fixed length
# TODO: For now assuming str is always called with constant
field_types.append(ir.ArrayType(
ir.IntType(8), item.annotation.args[0].value))
else:
field_types.append(ctypes_to_ir(item.annotation.id))
curr_offset = 0
for ftype in field_types:
if isinstance(ftype, ir.IntType):
fsize = ftype.width // 8
alignment = fsize
elif isinstance(ftype, ir.ArrayType):
fsize = ftype.count * (ftype.element.width // 8)
alignment = ftype.element.width // 8
elif isinstance(ftype, ir.PointerType):
fsize = 8
alignment = 8
@ -52,6 +63,7 @@ def process_bpf_struct(cls_node, module):
structs_sym_tab[struct_name] = {
"type": struct_type,
"fields": {name: idx for idx, name in enumerate(field_names)},
"size": total_size
"size": total_size,
"field_types": field_types,
}
print(f"Created struct {struct_name} with fields {field_names}")