Merge pull request #56 from pythonbpf/vmlinux-ir-gen

Adds IR and debug info generation capabilities for vmlinux imported structs
2025-12-31 21:06:25 +00:00 · 2025-10-16 18:59:32 +05:30
parent 9a60dd87e3 71d005b6b1
commit dd734ea2aa
7 changed files with 259 additions and 42 deletions
--- a/pythonbpf/codegen.py
+++ b/pythonbpf/codegen.py
@ -19,12 +19,22 @@ from pylibbpf import BpfProgram
 import tempfile
 from logging import Logger
 import logging
 import re
 logger: Logger = logging.getLogger(__name__)
 VERSION = "v0.1.4"
 def finalize_module(original_str):
    """After all IR generation is complete, we monkey patch btf_ama attribute"""
    # Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
    pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
    replacement = r'\1 "btf_ama"'
    return re.sub(pattern, replacement, original_str)
 def find_bpf_chunks(tree):
    """Find all functions decorated with @bpf in the AST."""
    bpf_functions = []
@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
    module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
    module_string = finalize_module(str(module))
    logger.info(f"IR written to {output}")
    with open(output, "w") as f:
        f.write(f'source_filename = "{filename}"\n')
-        f.write(str(module))
+        f.write(module_string)
        f.write("\n")
    return output
--- a/pythonbpf/vmlinux_parser/class_handler.py
+++ b/pythonbpf/vmlinux_parser/class_handler.py
@ -60,6 +60,10 @@ def process_vmlinux_post_ast(
            pass
        else:
            new_dep_node = DependencyNode(name=current_symbol_name)
            # elem_type_class is the actual vmlinux struct/class
            new_dep_node.set_ctype_struct(elem_type_class)
            handler.add_node(new_dep_node)
            class_obj = getattr(imported_module, current_symbol_name)
            # Inspect the class fields
@ -71,9 +75,6 @@ def process_vmlinux_post_ast(
                    if len(field_elem) == 2:
                        field_name, field_type = field_elem
                    elif len(field_elem) == 3:
                        raise NotImplementedError(
                            "Bitfields are not supported in the current version"
                        )
                        field_name, field_type, bitfield_size = field_elem
                    field_table[field_name] = [field_type, bitfield_size]
            elif hasattr(class_obj, "__annotations__"):
@ -144,15 +145,35 @@ def process_vmlinux_post_ast(
                        )
                        new_dep_node.set_field_type(elem_name, elem_type)
                        if containing_type.__module__ == "vmlinux":
-                            process_vmlinux_post_ast(
+                            containing_type_name = (
-                                containing_type, llvm_handler, handler, processing_stack
+                                containing_type.__name__
-                            )
+                                if hasattr(containing_type, "__name__")
-                            size_of_containing_type = (
+                                else str(containing_type)
                                handler[containing_type.__name__]
                            ).__sizeof__()
                            new_dep_node.set_field_ready(
                                elem_name, True, size_of_containing_type
                            )
                            # Check for self-reference or already processed
                            if containing_type_name == current_symbol_name:
                                # Self-referential pointer
                                logger.debug(
                                    f"Self-referential pointer in {current_symbol_name}.{elem_name}"
                                )
                                new_dep_node.set_field_ready(elem_name, True)
                            elif handler.has_node(containing_type_name):
                                # Already processed
                                logger.debug(
                                    f"Reusing already processed {containing_type_name}"
                                )
                                new_dep_node.set_field_ready(elem_name, True)
                            else:
                                # Process recursively - THIS WAS MISSING
                                new_dep_node.add_dependent(containing_type_name)
                                process_vmlinux_post_ast(
                                    containing_type,
                                    llvm_handler,
                                    handler,
                                    processing_stack,
                                )
                                new_dep_node.set_field_ready(elem_name, True)
                        elif containing_type.__module__ == ctypes.__name__:
                            logger.debug(f"Processing ctype internal{containing_type}")
                            new_dep_node.set_field_ready(elem_name, True)
@ -169,12 +190,7 @@ def process_vmlinux_post_ast(
                        process_vmlinux_post_ast(
                            elem_type, llvm_handler, handler, processing_stack
                        )
-                        size_of_containing_type = (
+                        new_dep_node.set_field_ready(elem_name, True)
                            handler[elem_type.__name__]
                        ).__sizeof__()
                        new_dep_node.set_field_ready(
                            elem_name, True, size_of_containing_type
                        )
                else:
                    raise ValueError(
                        f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
--- a/pythonbpf/vmlinux_parser/dependency_handler.py
+++ b/pythonbpf/vmlinux_parser/dependency_handler.py
@ -167,3 +167,7 @@ class DependencyHandler:
        if name not in self._nodes:
            raise KeyError(f"No node with name '{name}' found")
        return self._nodes[name]
    @property
    def nodes(self):
        return self._nodes
--- a/pythonbpf/vmlinux_parser/dependency_node.py
+++ b/pythonbpf/vmlinux_parser/dependency_node.py
@ -116,6 +116,7 @@ class DependencyNode:
    fields: Dict[str, Field] = field(default_factory=dict)
    _ready_cache: Optional[bool] = field(default=None, repr=False)
    current_offset: int = 0
    ctype_struct: Optional[Any] = field(default=None, repr=False)
    def add_field(
        self,
@ -146,7 +147,14 @@ class DependencyNode:
        # Invalidate readiness cache
        self._ready_cache = None
    def set_ctype_struct(self, ctype_struct: Any) -> None:
        """Set the ctypes structure for automatic offset calculation."""
        self.ctype_struct = ctype_struct
    def __sizeof__(self):
        # If we have a ctype_struct, use its size
        if self.ctype_struct is not None:
            return ctypes.sizeof(self.ctype_struct)
        return self.current_offset
    def get_field(self, name: str) -> Field:
@ -226,8 +234,22 @@ class DependencyNode:
            raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
        self.fields[name].set_ready(is_ready)
-        self.fields[name].set_offset(self.current_offset)
+
-        self.current_offset += self._calculate_size(name, size_of_containing_type)
+        # Use ctypes built-in offset if available
        if self.ctype_struct is not None:
            try:
                self.fields[name].set_offset(getattr(self.ctype_struct, name).offset)
            except AttributeError:
                # Fallback to manual calculation if field not found in ctype_struct
                self.fields[name].set_offset(self.current_offset)
                self.current_offset += self._calculate_size(
                    name, size_of_containing_type
                )
        else:
            # Manual offset calculation when no ctype_struct is available
            self.fields[name].set_offset(self.current_offset)
            self.current_offset += self._calculate_size(name, size_of_containing_type)
        # Invalidate readiness cache
        self._ready_cache = None
@ -275,8 +297,28 @@ class DependencyNode:
                    raise NotImplementedError(
                        "This subclass of ctype not supported yet"
                    )
            elif processing_field.type_size is not None:
                # Handle vmlinux types with type_size but no ctype_complex_type
                # This means it's a direct vmlinux struct field (not array/pointer wrapped)
                # The type_size should already contain the full size of the struct
                # But if there's a containing_type from vmlinux, we need that size
                if processing_field.containing_type is not None:
                    if processing_field.containing_type.__module__ == "vmlinux":
                        # For vmlinux containing types, we need the pre-calculated size
                        if size_of_containing_type is not None:
                            return size_of_containing_type * processing_field.type_size
                        else:
                            raise RuntimeError(
                                f"Field {name}: vmlinux containing_type requires size_of_containing_type"
                            )
                    else:
                        raise ModuleNotFoundError(
                            f"Containing type module {processing_field.containing_type.__module__} not supported"
                        )
                else:
                    raise RuntimeError("Wrong type found with no containing type")
            else:
-                # search up pre-created stuff and get size
+                # No ctype_complex_type and no type_size, must rely on size_of_containing_type
                if size_of_containing_type is None:
                    raise RuntimeError(
                        f"Size of containing type {size_of_containing_type} is None"
--- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py
+++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py
@ -0,0 +1,15 @@
 from pythonbpf.debuginfo import DebugInfoGenerator
 def debug_info_generation(struct, llvm_module):
    generator = DebugInfoGenerator(llvm_module)
    # this is sample debug info generation
    # i64type = generator.get_uint64_type()
    struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
    global_var = generator.create_global_var_debug_info(
        struct.name, struct_type, is_local=False
    )
    return global_var
--- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py
+++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py
@ -1,12 +1,17 @@
 import ctypes
 import logging
-from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler
+from ..dependency_handler import DependencyHandler
 from .debug_info_gen import debug_info_generation
 from ..dependency_node import DependencyNode
 import llvmlite.ir as ir
 logger = logging.getLogger(__name__)
 class IRGenerator:
-    def __init__(self, module, handler: DependencyHandler):
+    # get the assignments dict and add this stuff to it.
-        self.module = module
+    def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
        self.llvm_module = llvm_module
        self.handler: DependencyHandler = handler
        self.generated: list[str] = []
        if not handler.is_ready:
@ -15,22 +20,142 @@ class IRGenerator:
            )
        for struct in handler:
            self.struct_processor(struct)
            print()
-    def struct_processor(self, struct):
+    def struct_processor(self, struct, processing_stack=None):
-        if struct.name not in self.generated:
+        # Initialize processing stack on first call
-            print(f"IR generating for {struct.name}")
+        if processing_stack is None:
-            print(f"Struct is {struct}")
+            processing_stack = set()
-            for dependency in struct.depends_on:
+
-                if dependency not in self.generated:
+        # If already generated, skip
-                    dep_node_from_dependency = self.handler[dependency]
+        if struct.name in self.generated:
-                    self.struct_processor(dep_node_from_dependency)
+            return
-                    self.generated.append(dependency)
+
-            # write actual processor logic here after assuming all dependencies are resolved
+        # Detect circular dependency
-            # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that.
+        if struct.name in processing_stack:
            logger.info(
                f"Circular dependency detected for {struct.name}, skipping recursive processing"
            )
            # For circular dependencies, we can either:
            # 1. Use forward declarations (opaque pointers)
            # 2. Mark as incomplete and process later
            # 3. Generate a placeholder type
            # Here we'll just skip and let it be processed in its own call
            return
        logger.info(f"IR generating for {struct.name}")
        # Add to processing stack before processing dependencies
        processing_stack.add(struct.name)
        try:
            # Process all dependencies first
            if struct.depends_on is None:
                pass
            else:
                for dependency in struct.depends_on:
                    if dependency not in self.generated:
                        # Check if dependency exists in handler
                        if dependency in self.handler.nodes:
                            dep_node_from_dependency = self.handler[dependency]
                            # Pass the processing_stack down to track circular refs
                            self.struct_processor(
                                dep_node_from_dependency, processing_stack
                            )
                        else:
                            raise RuntimeError(
                                f"Warning: Dependency {dependency} not found in handler"
                            )
            # Actual processor logic here after dependencies are resolved
            self.gen_ir(struct)
            self.generated.append(struct.name)
-    def struct_name_generator(
+        finally:
            # Remove from processing stack after we're done
            processing_stack.discard(struct.name)
    def gen_ir(self, struct):
        # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
        #  accepts our issue, we will resort to normal accessed attribute based attribute addition
        # currently we generate all possible field accesses for CO-RE and put into the assignment table
        debug_info = debug_info_generation(struct, self.llvm_module)
        field_index = 0
        for field_name, field in struct.fields.items():
            # does not take arrays and similar types into consideration yet.
            if field.ctype_complex_type is not None and issubclass(
                field.ctype_complex_type, ctypes.Array
            ):
                array_size = field.type_size
                containing_type = field.containing_type
                if containing_type.__module__ == ctypes.__name__:
                    containing_type_size = ctypes.sizeof(containing_type)
                    for i in range(0, array_size):
                        field_co_re_name = self._struct_name_generator(
                            struct, field, field_index, True, i, containing_type_size
                        )
                        globvar = ir.GlobalVariable(
                            self.llvm_module, ir.IntType(64), name=field_co_re_name
                        )
                        globvar.linkage = "external"
                        globvar.set_metadata("llvm.preserve.access.index", debug_info)
                    field_index += 1
            elif field.type_size is not None:
                array_size = field.type_size
                containing_type = field.containing_type
                if containing_type.__module__ == "vmlinux":
                    containing_type_size = self.handler[
                        containing_type.__name__
                    ].current_offset
                    for i in range(0, array_size):
                        field_co_re_name = self._struct_name_generator(
                            struct, field, field_index, True, i, containing_type_size
                        )
                        globvar = ir.GlobalVariable(
                            self.llvm_module, ir.IntType(64), name=field_co_re_name
                        )
                        globvar.linkage = "external"
                        globvar.set_metadata("llvm.preserve.access.index", debug_info)
                    field_index += 1
            else:
                field_co_re_name = self._struct_name_generator(
                    struct, field, field_index
                )
                field_index += 1
                globvar = ir.GlobalVariable(
                    self.llvm_module, ir.IntType(64), name=field_co_re_name
                )
                globvar.linkage = "external"
                globvar.set_metadata("llvm.preserve.access.index", debug_info)
    def _struct_name_generator(
        self,
-    ) -> None:
+        struct: DependencyNode,
-        pass
+        field,
        field_index: int,
        is_indexed: bool = False,
        index: int = 0,
        containing_type_size: int = 0,
    ) -> str:
        if is_indexed:
            name = (
                "llvm."
                + struct.name.removeprefix("struct_")
                + f":0:{field.offset + index * containing_type_size}"
                + "$"
                + f"0:{field_index}:{index}"
            )
            return name
        elif struct.name.startswith("struct_"):
            name = (
                "llvm."
                + struct.name.removeprefix("struct_")
                + f":0:{field.offset}"
                + "$"
                + f"0:{field_index}"
            )
            return name
        else:
            print(self.handler[struct.name])
            raise TypeError(
                "Name generation cannot occur due to type name not starting with struct"
            )
--- a/tests/failing_tests/xdp_pass.py
+++ b/tests/failing_tests/xdp_pass.py
@ -2,13 +2,16 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
 from pythonbpf.maps import HashMap
 from pythonbpf.helper import XDP_PASS
 from vmlinux import TASK_COMM_LEN  # noqa: F401
 from vmlinux import struct_trace_event_raw_sys_enter  # noqa: F401
-# from vmlinux import struct_request
+from vmlinux import struct_qspinlock  # noqa: F401
 # from vmlinux import struct_trace_event_raw_sys_enter  # noqa: F401
 # from vmlinux import struct_posix_cputimers  # noqa: F401
 from vmlinux import struct_xdp_md
 # from vmlinux import struct_trace_event_raw_sys_enter  # noqa: F401
 # from vmlinux import struct_ring_buffer_per_cpu  # noqa: F401
-
+# from vmlinux import struct_request  # noqa: F401
 from ctypes import c_int64
 # Instructions to how to run this program