From 0ee8b541d12c30c73743d7fd5506483c460c84cb Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 3 Sep 2025 18:30:52 +0530 Subject: [PATCH] Add constant check (maybe broken) and also a globals pass --- examples/c-form/example.bpf.c | 24 +++++++++++++++++- examples/check.sh | 2 ++ examples/execve.py | 5 ++++ pythonbpf/codegen.py | 21 ++++++++++++---- pythonbpf/constants_pass.py | 46 +++++++++++++++++++++++++++++++++++ pythonbpf/globals_pass.py | 42 ++++++++++++++++++++++++++++++++ 6 files changed, 134 insertions(+), 6 deletions(-) create mode 100755 examples/check.sh create mode 100644 pythonbpf/constants_pass.py create mode 100644 pythonbpf/globals_pass.py diff --git a/examples/c-form/example.bpf.c b/examples/c-form/example.bpf.c index c47700e..d656dda 100644 --- a/examples/c-form/example.bpf.c +++ b/examples/c-form/example.bpf.c @@ -1,10 +1,32 @@ #include #include +#include + +int trace_testing(void *ctx) +{ + bpf_printk("THISISACONSTANT"); + bpf_printk("THISISCONSTANT2"); + uint64_t a = 69; + bpf_printk("%d", a); + return 0; +} SEC("tracepoint/syscalls/sys_enter_execve") int trace_execve(void *ctx) { - bpf_printk("execve called\n"); + if(ctx){ + trace_testing(ctx); + } else { + bpf_printk("THISISANOTHERCONSTANT"); + } + bpf_trace_printk("execve called\n", 15); + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_execve") +int trace_randomname_exit(void *ctx) +{ + bpf_trace_printk("execve called to exit\n", 15); return 0; } diff --git a/examples/check.sh b/examples/check.sh new file mode 100755 index 0000000..98086e7 --- /dev/null +++ b/examples/check.sh @@ -0,0 +1,2 @@ +#!/bin/bash +sudo bpftool prog -d load ./execve.o /sys/fs/bpf/tmp && sudo rm -f /sys/fs/bpf/tmp \ No newline at end of file diff --git a/examples/execve.py b/examples/execve.py index a680ff0..644c84e 100644 --- a/examples/execve.py +++ b/examples/execve.py @@ -1,10 +1,15 @@ from pythonbpf.decorators import tracepoint, syscalls from ctypes import c_void_p, c_int32 +#This is a test function +def test_function(): + print("test_function called") @tracepoint(syscalls.sys_enter_execve) def trace_execve(ctx: c_void_p) -> c_int32: print("execve called") + print("execve2 called") + test_function() return c_int32(0) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 74b6031..8acbf10 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -2,15 +2,16 @@ import ast from llvmlite import ir from .license_pass import license_processing from .functions_pass import functions_processing +from .constants_pass import constants_processing +from .globals_pass import globals_processing def processor(source_code, filename, module): tree = ast.parse(source_code, filename) print(ast.dump(tree)) - section_names = [] - section_names.append(license_processing(tree, module)) - section_names.append(functions_processing(tree, module)) - if any(name is None for name in section_names): - print("Processing failed") + constants_processing(tree, module) + license_processing(tree, module) + globals_processing(tree, module) + functions_processing(tree, module) def compile_to_ir(filename: str, output: str): with open(filename) as f: @@ -21,6 +22,16 @@ def compile_to_ir(filename: str, output: str): module.triple = "bpf" processor(source, filename, module) + + wchar_size = module.add_metadata([ir.Constant(ir.IntType(32), 1), + "wchar_size", + ir.Constant(ir.IntType(32), 4)]) + frame_pointer = module.add_metadata([ir.Constant(ir.IntType(32), 7), + "frame-pointer", + ir.Constant(ir.IntType(32), 2)]) + module.add_named_metadata("llvm.module.flags", wchar_size) + module.add_named_metadata("llvm.module.flags", frame_pointer) + module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.0"]) with open(output, "w") as f: f.write(str(module)) diff --git a/pythonbpf/constants_pass.py b/pythonbpf/constants_pass.py new file mode 100644 index 0000000..8bd39fd --- /dev/null +++ b/pythonbpf/constants_pass.py @@ -0,0 +1,46 @@ +from llvmlite import ir +import ast + +def emit_constants(module: ir.Module, constant_str: str, name: str): + constant_bytes = constant_str.encode("utf8") + b"\x00" + elems = [ir.Constant(ir.IntType(8), b) for b in constant_bytes] + ty = ir.ArrayType(ir.IntType(8), len(elems)) + + gvar = ir.GlobalVariable(module, ty, name=name) + + gvar.initializer = ir.Constant(ty, elems) # type: ignore + + gvar.align = 1 # type: ignore + gvar.linkage = "internal" # type: ignore + gvar.global_constant = True + + return gvar + +def constants_processing(tree, module): + """Process string constants in the given AST tree and emit them to rodata""" + constant_count = 0 + current_function = None + + class ConstantVisitor(ast.NodeVisitor): + def visit_FunctionDef(self, node): + nonlocal current_function + old_function = current_function + current_function = node.name + self.generic_visit(node) + current_function = old_function + + def visit_Constant(self, node): + nonlocal constant_count + if isinstance(node.value, str) and current_function is not None: + if constant_count == 0: + constant_name = f"{current_function}.____fmt" + else: + constant_name = f"{current_function}.____fmt.{constant_count}" + emit_constants(module, node.value, constant_name) + constant_count += 1 + self.generic_visit(node) + + visitor = ConstantVisitor() + visitor.visit(tree) + + return constant_count \ No newline at end of file diff --git a/pythonbpf/globals_pass.py b/pythonbpf/globals_pass.py new file mode 100644 index 0000000..41da7f4 --- /dev/null +++ b/pythonbpf/globals_pass.py @@ -0,0 +1,42 @@ +from llvmlite import ir +import ast + +def emit_globals(module: ir.Module, names: list[str]): + """ + Emit the @llvm.compiler.used global given a list of function/global names. + """ + ptr_ty = ir.PointerType() + used_array_ty = ir.ArrayType(ptr_ty, len(names)) + + elems = [] + for name in names: + # Reuse existing globals (like LICENSE), don't redeclare + if name in module.globals: + g = module.get_global(name) + else: + g = ir.GlobalValue(module, ptr_ty, name) + + elems.append(g.bitcast(ptr_ty)) + + gv = ir.GlobalVariable(module, used_array_ty, "llvm.compiler.used") + gv.linkage = "appending" + gv.initializer = ir.Constant(used_array_ty, elems) # type: ignore + gv.section = "llvm.metadata" + + +def globals_processing(tree, module: ir.Module): + collected = ["LICENSE"] + + for node in tree.body: + if isinstance(node, ast.FunctionDef) and len(node.decorator_list) == 1: + dec = node.decorator_list[0] + if ( + isinstance(dec, ast.Call) + and isinstance(dec.func, ast.Name) + and len(dec.args) == 1 + and isinstance(dec.args[0], ast.Attribute) + and isinstance(dec.args[0].value, ast.Name) + ): + collected.append(node.name) + + emit_globals(module, collected)