TODO added for llvmlite attribute issue

*Refer: https://github.com/numba/llvmlite/issues/1331 Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
add basic IR gen strategy
2026-03-18 03:01:28 +00:00 · 2025-10-15 21:35:28 +05:30 · 2025-10-15 21:25:53 +05:30 · 2025-10-15 19:11:53 +05:30 · 2025-10-15 18:19:51 +05:30 · 2025-10-15 18:05:57 +05:30
7 changed files with 179 additions and 17 deletions
--- a/pythonbpf/codegen.py
+++ b/pythonbpf/codegen.py
@ -19,12 +19,22 @@ from pylibbpf import BpfProgram
 import tempfile
 from logging import Logger
 import logging
+import re

 logger: Logger = logging.getLogger(__name__)

 VERSION = "v0.1.4"


+def finalize_module(original_str):
+    """After all IR generation is complete, we monkey patch btf_ama attribute"""
+
+    # Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
+    pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
+    replacement = r'\1 "btf_ama"'
+    return re.sub(pattern, replacement, original_str)
+
+
 def find_bpf_chunks(tree):
    """Find all functions decorated with @bpf in the AST."""
    bpf_functions = []
@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):

    module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])

+    module_string = finalize_module(str(module))
+
    logger.info(f"IR written to {output}")
    with open(output, "w") as f:
        f.write(f'source_filename = "{filename}"\n')
-        f.write(str(module))
+        f.write(module_string)
        f.write("\n")

    return output
--- a/pythonbpf/vmlinux_parser/class_handler.py
+++ b/pythonbpf/vmlinux_parser/class_handler.py
@ -71,6 +71,9 @@ def process_vmlinux_post_ast(
                    if len(field_elem) == 2:
                        field_name, field_type = field_elem
                    elif len(field_elem) == 3:
+                        raise NotImplementedError(
+                            "Bitfields are not supported in the current version"
+                        )
                        field_name, field_type, bitfield_size = field_elem
                    field_table[field_name] = [field_type, bitfield_size]
            elif hasattr(class_obj, "__annotations__"):
@ -144,7 +147,12 @@ def process_vmlinux_post_ast(
                            process_vmlinux_post_ast(
                                containing_type, llvm_handler, handler, processing_stack
                            )
-                            new_dep_node.set_field_ready(elem_name, True)
+                            size_of_containing_type = (
+                                handler[containing_type.__name__]
+                            ).__sizeof__()
+                            new_dep_node.set_field_ready(
+                                elem_name, True, size_of_containing_type
+                            )
                        elif containing_type.__module__ == ctypes.__name__:
                            logger.debug(f"Processing ctype internal{containing_type}")
                            new_dep_node.set_field_ready(elem_name, True)
@ -161,7 +169,12 @@ def process_vmlinux_post_ast(
                        process_vmlinux_post_ast(
                            elem_type, llvm_handler, handler, processing_stack
                        )
-                        new_dep_node.set_field_ready(elem_name, True)
+                        size_of_containing_type = (
+                            handler[elem_type.__name__]
+                        ).__sizeof__()
+                        new_dep_node.set_field_ready(
+                            elem_name, True, size_of_containing_type
+                        )
                else:
                    raise ValueError(
                        f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
--- a/pythonbpf/vmlinux_parser/dependency_node.py
+++ b/pythonbpf/vmlinux_parser/dependency_node.py
@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from typing import Dict, Any, Optional
+import ctypes


 # TODO: FIX THE FUCKING TYPE NAME CONVENTION.
@ -13,6 +14,7 @@ class Field:
    containing_type: Optional[Any]
    type_size: Optional[int]
    bitfield_size: Optional[int]
+    offset: int
    value: Any = None
    ready: bool = False

@ -60,6 +62,10 @@ class Field:
        if mark_ready:
            self.ready = True

+    def set_offset(self, offset: int) -> None:
+        """Set the offset of this field"""
+        self.offset = offset
+

@dataclass
 class DependencyNode:
@ -109,6 +115,7 @@ class DependencyNode:
    depends_on: Optional[list[str]] = None
    fields: Dict[str, Field] = field(default_factory=dict)
    _ready_cache: Optional[bool] = field(default=None, repr=False)
+    current_offset: int = 0

    def add_field(
        self,
@ -120,6 +127,7 @@ class DependencyNode:
        ctype_complex_type: Optional[int] = None,
        bitfield_size: Optional[int] = None,
        ready: bool = False,
+        offset: int = 0,
    ) -> None:
        """Add a field to the node with an optional initial value and readiness state."""
        if self.depends_on is None:
@ -133,10 +141,14 @@ class DependencyNode:
            type_size=type_size,
            ctype_complex_type=ctype_complex_type,
            bitfield_size=bitfield_size,
+            offset=offset,
        )
        # Invalidate readiness cache
        self._ready_cache = None

+    def __sizeof__(self):
+        return self.current_offset
+
    def get_field(self, name: str) -> Field:
        """Get a field by name."""
        return self.fields[name]
@ -203,15 +215,78 @@ class DependencyNode:
        # Invalidate readiness cache
        self._ready_cache = None

-    def set_field_ready(self, name: str, is_ready: bool = False) -> None:
+    def set_field_ready(
+        self,
+        name: str,
+        is_ready: bool = False,
+        size_of_containing_type: Optional[int] = None,
+    ) -> None:
        """Mark a field as ready or not ready."""
        if name not in self.fields:
            raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")

        self.fields[name].set_ready(is_ready)
+        self.fields[name].set_offset(self.current_offset)
+        self.current_offset += self._calculate_size(name, size_of_containing_type)
        # Invalidate readiness cache
        self._ready_cache = None

+    def _calculate_size(
+        self, name: str, size_of_containing_type: Optional[int] = None
+    ) -> int:
+        processing_field = self.fields[name]
+        # size_of_field will be in bytes
+        if processing_field.type.__module__ == ctypes.__name__:
+            size_of_field = ctypes.sizeof(processing_field.type)
+            return size_of_field
+        elif processing_field.type.__module__ == "vmlinux":
+            if processing_field.ctype_complex_type is not None:
+                if issubclass(processing_field.ctype_complex_type, ctypes.Array):
+                    if processing_field.containing_type.__module__ == ctypes.__name__:
+                        if (
+                            processing_field.containing_type is not None
+                            and processing_field.type_size is not None
+                        ):
+                            size_of_field = (
+                                ctypes.sizeof(processing_field.containing_type)
+                                * processing_field.type_size
+                            )
+                        else:
+                            raise RuntimeError(
+                                f"{processing_field} has no containing_type or type_size"
+                            )
+                        return size_of_field
+                    elif processing_field.containing_type.__module__ == "vmlinux":
+                        if (
+                            size_of_containing_type is not None
+                            and processing_field.type_size is not None
+                        ):
+                            size_of_field = (
+                                size_of_containing_type * processing_field.type_size
+                            )
+                        else:
+                            raise RuntimeError(
+                                f"{processing_field} has no containing_type or type_size"
+                            )
+                        return size_of_field
+                elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer):
+                    return ctypes.sizeof(ctypes.c_void_p)
+                else:
+                    raise NotImplementedError(
+                        "This subclass of ctype not supported yet"
+                    )
+            else:
+                # search up pre-created stuff and get size
+                if size_of_containing_type is None:
+                    raise RuntimeError(
+                        f"Size of containing type {size_of_containing_type} is None"
+                    )
+                return size_of_containing_type
+
+        else:
+            raise ModuleNotFoundError("Module is not supported for the operation")
+        raise RuntimeError("control should not reach here")
+
    @property
    def is_ready(self) -> bool:
        """Check if the node is ready (all fields are ready)."""
--- a/pythonbpf/vmlinux_parser/import_detector.py
+++ b/pythonbpf/vmlinux_parser/import_detector.py
@ -1,6 +1,6 @@
 import ast
 import logging
-from typing import List, Tuple, Dict
+from typing import List, Tuple, Any
 import importlib
 import inspect

@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module):
    # initialise dependency handler
    handler = DependencyHandler()
    # initialise assignment dictionary of name to type
-    assignments: Dict[str, type] = {}
+    assignments: dict[str, tuple[type, Any]] = {}

    if not import_statements:
        logger.info("No vmlinux imports found")
@ -129,7 +129,19 @@ def vmlinux_proc(tree: ast.AST, module):
                )

    IRGenerator(module, handler)
+    return assignments


-def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
-    raise NotImplementedError("Assignment handling has not been implemented yet")
+def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]):
+    # Check if this is a simple assignment with a constant value
+    if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
+        target_name = node.targets[0].id
+        if isinstance(node.value, ast.Constant):
+            assignments[target_name] = (type(node.value.value), node.value.value)
+            logger.info(
+                f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
+            )
+        else:
+            raise ValueError(f"Unsupported assignment type for {target_name}")
+    else:
+        raise ValueError("Not a simple assignment")
--- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py
+++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py
@ -0,0 +1,15 @@
+from pythonbpf.debuginfo import DebugInfoGenerator
+
+
+def debug_info_generation(struct, llvm_module):
+    generator = DebugInfoGenerator(llvm_module)
+    # this is sample debug info generation
+    # i64type = generator.get_uint64_type()
+
+    struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
+
+    global_var = generator.create_global_var_debug_info(
+        struct.name, struct_type, is_local=False
+    )
+
+    return global_var
--- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py
+++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py
@ -1,12 +1,16 @@
 import logging
-from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler
+from ..dependency_handler import DependencyHandler
+from .debug_info_gen import debug_info_generation
+from ..dependency_node import DependencyNode
+import llvmlite.ir as ir

 logger = logging.getLogger(__name__)


 class IRGenerator:
-    def __init__(self, module, handler: DependencyHandler):
-        self.module = module
+    # get the assignments dict and add this stuff to it.
+    def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
+        self.llvm_module = llvm_module
        self.handler: DependencyHandler = handler
        self.generated: list[str] = []
        if not handler.is_ready:
@ -15,21 +19,50 @@ class IRGenerator:
            )
        for struct in handler:
            self.struct_processor(struct)
-            print()

    def struct_processor(self, struct):
        if struct.name not in self.generated:
            print(f"IR generating for {struct.name}")
-            print(f"Struct is {struct}")
            for dependency in struct.depends_on:
                if dependency not in self.generated:
                    dep_node_from_dependency = self.handler[dependency]
                    self.struct_processor(dep_node_from_dependency)
                    self.generated.append(dependency)
-            # write actual processor logic here after assuming all dependencies are resolved
+            # actual processor logic here after assuming all dependencies are resolved
            # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that.
+            self.gen_ir(struct)
            self.generated.append(struct.name)

+    def gen_ir(self, struct):
+        # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
+        #  accepts our issue, we will resort to normal accessed attribute based attribute addition
+        # currently we generate all possible field accesses for CO-RE and put into the assignment table
+        debug_info = debug_info_generation(struct, self.llvm_module)
+        field_index = 0
+        for field_name, field in struct.fields.items():
+            # does not take arrays and similar types into consideration yet.
+            field_co_re_name = self._struct_name_generator(struct, field, field_index)
+            field_index += 1
+            globvar = ir.GlobalVariable(
+                self.llvm_module, ir.IntType(64), name=field_co_re_name
+            )
+            globvar.linkage = "external"
+            globvar.set_metadata("llvm.preserve.access.index", debug_info)
+        print()

-    def struct_name_generator(self, ):
-        pass
+    def _struct_name_generator(
+        self, struct: DependencyNode, field, field_index: int
+    ) -> str:
+        if struct.name.startswith("struct_"):
+            name = (
+                "llvm."
+                + struct.name.removeprefix("struct_")
+                + f":0:{field.offset}"
+                + "$"
+                + f"0:{field_index}"
+            )
+            return name
+        else:
+            raise TypeError(
+                "Name generation cannot occur due to type name not starting with struct"
+            )
--- a/tests/failing_tests/xdp_pass.py
+++ b/tests/failing_tests/xdp_pass.py
@ -1,8 +1,10 @@
 from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
 from pythonbpf.maps import HashMap
 from pythonbpf.helper import XDP_PASS
+from vmlinux import TASK_COMM_LEN  # noqa: F401
+from vmlinux import struct_trace_event_raw_sys_enter  # noqa: F401
+
 # from vmlinux import struct_request
-from vmlinux import struct_trace_event_raw_sys_enter
 from vmlinux import struct_xdp_md
 # from vmlinux import struct_trace_event_raw_sys_enter  # noqa: F401
 # from vmlinux import struct_ring_buffer_per_cpu  # noqa: F401
Author	SHA1	Message	Date
varun-r-mallya	2b3c81affa	TODO added for llvmlite attribute issue *Refer: https://github.com/numba/llvmlite/issues/1331 Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>	2025-10-15 21:35:28 +05:30
varun-r-mallya	8372111616	add basic IR gen strategy	2025-10-15 21:25:53 +05:30
varun-r-mallya	eb4ee64ee5	Revert "float vmlinux_assignments_symtab" This reverts commit `ce7b170fea`.	2025-10-15 19:11:53 +05:30
varun-r-mallya	ce7b170fea	float vmlinux_assignments_symtab	2025-10-15 18:19:51 +05:30
varun-r-mallya	c499fe7421	solve static typing issues	2025-10-15 18:05:57 +05:30
varun-r-mallya	8239097fbb	format chore	2025-10-15 17:49:38 +05:30
varun-r-mallya	a4cfc2b7aa	add assignments table and offset handler	2025-10-15 17:49:20 +05:30
varun-r-mallya	69b73003ca	setup skeleton for offset calculation	2025-10-15 04:42:38 +05:30