From 5955db88cfacdfda39dc6316578799c80125cbd8 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 21 Oct 2025 03:24:27 +0530 Subject: [PATCH] add vmlinux expressions to eval expr --- pythonbpf/allocation_pass.py | 10 +++ pythonbpf/codegen.py | 6 ++ pythonbpf/expr/__init__.py | 2 + pythonbpf/expr/expr_pass.py | 16 +++- pythonbpf/expr/vmlinux_registry.py | 45 ++++++++++ pythonbpf/functions/functions_pass.py | 25 ++++-- .../vmlinux_parser/vmlinux_exports_handler.py | 82 +++++++++++++++++++ .../vmlinux/simple_struct_test.py | 4 +- 8 files changed, 180 insertions(+), 10 deletions(-) create mode 100644 pythonbpf/expr/vmlinux_registry.py create mode 100644 pythonbpf/vmlinux_parser/vmlinux_exports_handler.py diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 9d82484..3149c75 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -5,6 +5,7 @@ from llvmlite import ir from dataclasses import dataclass from typing import Any from pythonbpf.helper import HelperHandlerRegistry +from .expr import VmlinuxHandlerRegistry from pythonbpf.type_deducer import ctypes_to_ir logger = logging.getLogger(__name__) @@ -49,6 +50,15 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): logger.debug(f"Variable {var_name} already allocated, skipping") return + # When allocating a variable, check if it's a vmlinux struct type + if isinstance(stmt.value, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct( + stmt.value.id + ): + # Handle vmlinux struct allocation + # This requires more implementation + print(stmt.value) + pass + # Determine type and allocate based on rval if isinstance(rval, ast.Call): _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 287fef9..e3fa5d3 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -5,6 +5,8 @@ from .functions import func_proc from .maps import maps_proc from .structs import structs_proc from .vmlinux_parser import vmlinux_proc +from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler +from .expr import VmlinuxHandlerRegistry from .globals_pass import ( globals_list_creation, globals_processing, @@ -56,6 +58,10 @@ def processor(source_code, filename, module): logger.info(f"Found BPF function/struct: {func_node.name}") vmlinux_symtab = vmlinux_proc(tree, module) + if vmlinux_symtab: + handler = VmlinuxHandler.initialize(vmlinux_symtab) + VmlinuxHandlerRegistry.set_handler(handler) + populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) diff --git a/pythonbpf/expr/__init__.py b/pythonbpf/expr/__init__.py index 3c403dd..ac3a975 100644 --- a/pythonbpf/expr/__init__.py +++ b/pythonbpf/expr/__init__.py @@ -2,6 +2,7 @@ from .expr_pass import eval_expr, handle_expr, get_operand_value from .type_normalization import convert_to_bool, get_base_type_and_depth from .ir_ops import deref_to_depth from .call_registry import CallHandlerRegistry +from .vmlinux_registry import VmlinuxHandlerRegistry __all__ = [ "eval_expr", @@ -11,4 +12,5 @@ __all__ = [ "deref_to_depth", "get_operand_value", "CallHandlerRegistry", + "VmlinuxHandlerRegistry", ] diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 8bbd524..281d3a1 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -12,6 +12,7 @@ from .type_normalization import ( get_base_type_and_depth, deref_to_depth, ) +from .vmlinux_registry import VmlinuxHandlerRegistry logger: Logger = logging.getLogger(__name__) @@ -27,8 +28,12 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder val = builder.load(var) return val, local_sym_tab[expr.id].ir_type else: - logger.info(f"Undefined variable {expr.id}") - return None + # Check if it's a vmlinux enum/constant + vmlinux_result = VmlinuxHandlerRegistry.handle_name(expr.id) + if vmlinux_result is not None: + return vmlinux_result + + raise SyntaxError(f"Undefined variable {expr.id}") def _handle_constant_expr(module, builder, expr: ast.Constant): @@ -74,6 +79,13 @@ def _handle_attribute_expr( val = builder.load(gep) field_type = metadata.field_type(attr_name) return val, field_type + + # Try vmlinux handler as fallback + vmlinux_result = VmlinuxHandlerRegistry.handle_attribute( + expr, local_sym_tab, None, builder + ) + if vmlinux_result is not None: + return vmlinux_result return None diff --git a/pythonbpf/expr/vmlinux_registry.py b/pythonbpf/expr/vmlinux_registry.py new file mode 100644 index 0000000..9e9d52e --- /dev/null +++ b/pythonbpf/expr/vmlinux_registry.py @@ -0,0 +1,45 @@ +import ast + + +class VmlinuxHandlerRegistry: + """Registry for vmlinux handler operations""" + + _handler = None + + @classmethod + def set_handler(cls, handler): + """Set the vmlinux handler""" + cls._handler = handler + + @classmethod + def get_handler(cls): + """Get the vmlinux handler""" + return cls._handler + + @classmethod + def handle_name(cls, name): + """Try to handle a name as vmlinux enum/constant""" + if cls._handler is None: + return None + return cls._handler.handle_vmlinux_enum(name) + + @classmethod + def handle_attribute(cls, expr, local_sym_tab, module, builder): + """Try to handle an attribute access as vmlinux struct field""" + if cls._handler is None: + return None + + if isinstance(expr.value, ast.Name): + var_name = expr.value.id + field_name = expr.attr + return cls._handler.handle_vmlinux_struct_field( + var_name, field_name, module, builder, local_sym_tab + ) + return None + + @classmethod + def is_vmlinux_struct(cls, name): + """Check if a name refers to a vmlinux struct""" + if cls._handler is None: + return False + return cls._handler.is_vmlinux_struct(name) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 6e06de7..e712030 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -311,7 +311,13 @@ def process_stmt( def process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_symtab + module, + builder, + func_node, + func, + ret_type, + map_sym_tab, + structs_sym_tab, ): """Process the body of a bpf function""" # TODO: A lot. We just have print -> bpf_trace_printk for now @@ -350,7 +356,9 @@ def process_func_body( builder.ret(ir.Constant(ir.IntType(64), 0)) -def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab, vmlinux_symtab): +def process_bpf_chunk( + func_node, module, return_type, map_sym_tab, structs_sym_tab +): """Process a single BPF chunk (function) and emit corresponding LLVM IR.""" func_name = func_node.name @@ -384,7 +392,13 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t builder = ir.IRBuilder(block) process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_symtab + module, + builder, + func_node, + func, + ret_type, + map_sym_tab, + structs_sym_tab, ) return func @@ -394,7 +408,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t # ============================================================================ -def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab): +def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): for func_node in chunks: if is_global_function(func_node): continue @@ -406,8 +420,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab module, ctypes_to_ir(infer_return_type(func_node)), map_sym_tab, - structs_sym_tab, - vmlinux_symtab + structs_sym_tab ) diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py new file mode 100644 index 0000000..5fa6a18 --- /dev/null +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -0,0 +1,82 @@ +import logging +from llvmlite import ir + +from pythonbpf.vmlinux_parser.assignment_info import AssignmentType + +logger = logging.getLogger(__name__) + + +class VmlinuxHandler: + """Handler for vmlinux-related operations""" + + _instance = None + + @classmethod + def get_instance(cls): + """Get the singleton instance""" + if cls._instance is None: + logger.warning("VmlinuxHandler used before initialization") + return None + return cls._instance + + @classmethod + def initialize(cls, vmlinux_symtab): + """Initialize the handler with vmlinux symbol table""" + cls._instance = cls(vmlinux_symtab) + return cls._instance + + def __init__(self, vmlinux_symtab): + """Initialize with vmlinux symbol table""" + self.vmlinux_symtab = vmlinux_symtab + logger.info( + f"VmlinuxHandler initialized with {len(vmlinux_symtab) if vmlinux_symtab else 0} symbols" + ) + + def is_vmlinux_enum(self, name): + """Check if name is a vmlinux enum constant""" + return ( + name in self.vmlinux_symtab + and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT + ) + + def is_vmlinux_struct(self, name): + """Check if name is a vmlinux struct""" + return ( + name in self.vmlinux_symtab + and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT + ) + + def handle_vmlinux_enum(self, name): + """Handle vmlinux enum constants by returning LLVM IR constants""" + if self.is_vmlinux_enum(name): + value = self.vmlinux_symtab[name]["value"] + logger.info(f"Resolving vmlinux enum {name} = {value}") + return ir.Constant(ir.IntType(64), value), ir.IntType(64) + return None + + def handle_vmlinux_struct(self, struct_name, module, builder): + """Handle vmlinux struct initializations""" + if self.is_vmlinux_struct(struct_name): + # TODO: Implement core-specific struct handling + # This will be more complex and depends on the BTF information + logger.info(f"Handling vmlinux struct {struct_name}") + # Return struct type and allocated pointer + # This is a stub, actual implementation will be more complex + return None + return None + + def handle_vmlinux_struct_field( + self, struct_var_name, field_name, module, builder, local_sym_tab + ): + """Handle access to vmlinux struct fields""" + # Check if it's a variable of vmlinux struct type + if struct_var_name in local_sym_tab: + var_info = local_sym_tab[struct_var_name] + # Need to check if this variable is a vmlinux struct + # This will depend on how you track vmlinux struct types in your symbol table + logger.info( + f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" + ) + # Return pointer to field and field type + return None + return None diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index c9390c8..c784696 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -16,8 +16,8 @@ from ctypes import c_int64 @bpf @section("tracepoint/syscalls/sys_enter_execve") def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: - print("Hello, World!") - return c_int64(0) + print("Hello, World") + return c_int64(TASK_COMM_LEN) @bpf