From 5031f90377bfc3c57359010abf8b229e3168c350 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 10 Nov 2025 20:06:04 +0530 Subject: [PATCH] fix stacked vmlinux struct parsing issue --- pythonbpf/vmlinux_parser/class_handler.py | 93 +++++++++++++------ .../vmlinux_parser/ir_gen/debug_info_gen.py | 21 +++-- .../vmlinux_parser/ir_gen/ir_generation.py | 49 +++++++--- tests/failing_tests/vmlinux/requests2.py | 19 ++++ 4 files changed, 137 insertions(+), 45 deletions(-) create mode 100644 tests/failing_tests/vmlinux/requests2.py diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index a508ff7..ba51687 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -16,10 +16,37 @@ def get_module_symbols(module_name: str): return [name for name in dir(imported_module)], imported_module +def unwrap_pointer_type(type_obj: Any) -> Any: + """ + Recursively unwrap all pointer layers to get the base type. + + This handles multiply nested pointers like LP_LP_struct_attribute_group + and returns the base type (struct_attribute_group). + + Stops unwrapping when reaching a non-pointer type (one without _type_ attribute). + + Args: + type_obj: The type object to unwrap + + Returns: + The base type after unwrapping all pointer layers + """ + current_type = type_obj + # Keep unwrapping while it's a pointer/array type (has _type_) + # But stop if _type_ is just a string or basic type marker + while hasattr(current_type, "_type_"): + next_type = current_type._type_ + # Stop if _type_ is a string (like 'c' for c_char) + if isinstance(next_type, str): + break + current_type = next_type + return current_type + + def process_vmlinux_class( - node, - llvm_module, - handler: DependencyHandler, + node, + llvm_module, + handler: DependencyHandler, ): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: @@ -30,10 +57,10 @@ def process_vmlinux_class( def process_vmlinux_post_ast( - elem_type_class, - llvm_handler, - handler: DependencyHandler, - processing_stack=None, + elem_type_class, + llvm_handler, + handler: DependencyHandler, + processing_stack=None, ): # Initialize processing stack on first call if processing_stack is None: @@ -113,7 +140,7 @@ def process_vmlinux_post_ast( # Process pointer to ctype if isinstance(elem_type, type) and issubclass( - elem_type, ctypes._Pointer + elem_type, ctypes._Pointer ): # Get the pointed-to type pointed_type = elem_type._type_ @@ -126,7 +153,7 @@ def process_vmlinux_post_ast( # Process function pointers (CFUNCTYPE) elif hasattr(elem_type, "_restype_") and hasattr( - elem_type, "_argtypes_" + elem_type, "_argtypes_" ): # This is a CFUNCTYPE or similar logger.info( @@ -158,13 +185,19 @@ def process_vmlinux_post_ast( if hasattr(elem_type, "_length_") and is_complex_type: type_length = elem_type._length_ - if containing_type.__module__ == "vmlinux": - new_dep_node.add_dependent( - elem_type._type_.__name__ - if hasattr(elem_type._type_, "__name__") - else str(elem_type._type_) + # Unwrap all pointer layers to get the base type for dependency tracking + base_type = unwrap_pointer_type(elem_type) + base_type_module = getattr(base_type, "__module__", None) + + if base_type_module == "vmlinux": + base_type_name = ( + base_type.__name__ + if hasattr(base_type, "__name__") + else str(base_type) ) - elif containing_type.__module__ == ctypes.__name__: + new_dep_node.add_dependent(base_type_name) + elif base_type_module == ctypes.__name__ or base_type_module is None: + # Handle ctypes or types with no module (like some internal ctypes types) if isinstance(elem_type, type): if issubclass(elem_type, ctypes.Array): ctype_complex_type = ctypes.Array @@ -178,7 +211,7 @@ def process_vmlinux_post_ast( raise TypeError("Unsupported ctypes subclass") else: raise ImportError( - f"Unsupported module of {containing_type}" + f"Unsupported module of {base_type}: {base_type_module}" ) logger.debug( f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}" @@ -191,11 +224,16 @@ def process_vmlinux_post_ast( elem_name, ctype_complex_type ) new_dep_node.set_field_type(elem_name, elem_type) - if containing_type.__module__ == "vmlinux": + + # Check the containing_type module to decide whether to recurse + containing_type_module = getattr(containing_type, "__module__", None) + if containing_type_module == "vmlinux": + # Also unwrap containing_type to get base type name + base_containing_type = unwrap_pointer_type(containing_type) containing_type_name = ( - containing_type.__name__ - if hasattr(containing_type, "__name__") - else str(containing_type) + base_containing_type.__name__ + if hasattr(base_containing_type, "__name__") + else str(base_containing_type) ) # Check for self-reference or already processed @@ -212,21 +250,21 @@ def process_vmlinux_post_ast( ) new_dep_node.set_field_ready(elem_name, True) else: - # Process recursively - THIS WAS MISSING + # Process recursively - use base containing type, not the pointer wrapper new_dep_node.add_dependent(containing_type_name) process_vmlinux_post_ast( - containing_type, + base_containing_type, llvm_handler, handler, processing_stack, ) new_dep_node.set_field_ready(elem_name, True) - elif containing_type.__module__ == ctypes.__name__: + elif containing_type_module == ctypes.__name__ or containing_type_module is None: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) else: raise TypeError( - "Module not supported in recursive resolution" + f"Module not supported in recursive resolution: {containing_type_module}" ) else: new_dep_node.add_dependent( @@ -245,9 +283,12 @@ def process_vmlinux_post_ast( raise ValueError( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" ) - + elif module_name == ctypes.__name__ or module_name is None: + # Handle ctypes types - these don't need processing, just return + logger.debug(f"Skipping ctypes type {current_symbol_name}") + return True else: - raise ImportError("UNSUPPORTED Module") + raise ImportError(f"UNSUPPORTED Module {module_name}") logger.info( f"{current_symbol_name} processed and handler readiness {handler.is_ready}" diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 232cf10..d83e3c0 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -46,13 +46,14 @@ def debug_info_generation( if struct.name.startswith("struct_"): struct_name = struct.name.removeprefix("struct_") + # Create struct type with all members + struct_type = generator.create_struct_type_with_name( + struct_name, members, struct.__sizeof__() * 8, is_distinct=True + ) else: - raise ValueError("Unions are not supported in the current version") - # Create struct type with all members - struct_type = generator.create_struct_type_with_name( - struct_name, members, struct.__sizeof__() * 8, is_distinct=True - ) - + logger.warning("Blindly handling Unions present in vmlinux dependencies") + struct_type = None + # raise ValueError("Unions are not supported in the current version") return struct_type @@ -62,7 +63,7 @@ def _get_field_debug_type( generator: DebugInfoGenerator, parent_struct: DependencyNode, generated_debug_info: List[Tuple[DependencyNode, Any]], -) -> tuple[Any, int]: +) -> tuple[Any, int] | None: """ Determine the appropriate debug type for a field based on its Python/ctypes type. @@ -78,7 +79,11 @@ def _get_field_debug_type( """ # Handle complex types (arrays, pointers) if field.ctype_complex_type is not None: - if issubclass(field.ctype_complex_type, ctypes.Array): + #TODO: Check if this is a CFUNCTYPE (function pointer), but sadly it just checks callable for now + if callable(field.ctype_complex_type): + # Handle function pointer types, create a void pointer as a placeholder + return generator.create_pointer_type(None), 64 + elif issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types element_type, base_type_size = _get_basic_debug_type( field.containing_type, generator diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 14a74ad..e248d4c 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -11,6 +11,9 @@ logger = logging.getLogger(__name__) class IRGenerator: + # This field keeps track of the non_struct names to avoid duplicate name errors. + type_number = 0 + unprocessed_store = [] # get the assignments dict and add this stuff to it. def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.llvm_module = llvm_module @@ -68,6 +71,7 @@ class IRGenerator: dep_node_from_dependency, processing_stack ) else: + print(struct) raise RuntimeError( f"Warning: Dependency {dependency} not found in handler" ) @@ -129,7 +133,20 @@ class IRGenerator: for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. - if field.ctype_complex_type is not None and issubclass( + if callable(field.ctype_complex_type): + # Function pointer case - generate a simple field accessor + field_co_re_name, returned = self._struct_name_generator( + struct, field, field_index + ) + print(field_co_re_name) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + elif field.ctype_complex_type is not None and issubclass( field.ctype_complex_type, ctypes.Array ): array_size = field.type_size @@ -137,7 +154,7 @@ class IRGenerator: if containing_type.__module__ == ctypes.__name__: containing_type_size = ctypes.sizeof(containing_type) if array_size == 0: - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, 0, containing_type_size ) globvar = ir.GlobalVariable( @@ -149,7 +166,7 @@ class IRGenerator: field_index += 1 continue for i in range(0, array_size): - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) globvar = ir.GlobalVariable( @@ -163,11 +180,12 @@ class IRGenerator: array_size = field.type_size containing_type = field.containing_type if containing_type.__module__ == "vmlinux": + print(struct) containing_type_size = self.handler[ containing_type.__name__ ].current_offset for i in range(0, array_size): - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) globvar = ir.GlobalVariable( @@ -178,7 +196,7 @@ class IRGenerator: self.generated_field_names[struct.name][field_name] = globvar field_index += 1 else: - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index ) field_index += 1 @@ -198,7 +216,7 @@ class IRGenerator: is_indexed: bool = False, index: int = 0, containing_type_size: int = 0, - ) -> str: + ) -> tuple[str, bool]: # TODO: Does not support Unions as well as recursive pointer and array type naming if is_indexed: name = ( @@ -208,7 +226,7 @@ class IRGenerator: + "$" + f"0:{field_index}:{index}" ) - return name + return name, True elif struct.name.startswith("struct_"): name = ( "llvm." @@ -217,9 +235,18 @@ class IRGenerator: + "$" + f"0:{field_index}" ) - return name + return name, True else: - print(self.handler[struct.name]) - raise TypeError( - "Name generation cannot occur due to type name not starting with struct" + logger.warning( + "Blindly handling non-struct type to avoid type errors in vmlinux IR generation. Possibly a union." ) + self.type_number += 1 + unprocessed_type = "unprocessed_type_" + str(self.handler[struct.name].name) + if self.unprocessed_store.__contains__(unprocessed_type): + return unprocessed_type + "_" + str(self.type_number), False + else: + self.unprocessed_store.append(unprocessed_type) + return unprocessed_type, False + # raise TypeError( + # "Name generation cannot occur due to type name not starting with struct" + # ) diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/failing_tests/vmlinux/requests2.py new file mode 100644 index 0000000..0f17e30 --- /dev/null +++ b/tests/failing_tests/vmlinux/requests2.py @@ -0,0 +1,19 @@ +from vmlinux import struct_kobj_type +from pythonbpf import bpf, section, bpfglobal, compile_to_ir +import logging +from ctypes import c_void_p + + +@bpf +@section("kprobe/blk_mq_start_request") +def example(ctx: c_void_p): + print(f"data lengt") + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO)