From e1f9ac6ba07aa7e6f5dd2b4bd146386db52c69cf Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 14 Oct 2025 02:35:49 +0530 Subject: [PATCH 01/51] add dependency tree functionality --- pythonbpf/vmlinux_parser/class_handler.py | 3 ++- pythonbpf/vmlinux_parser/dependency_node.py | 9 +++++++++ pythonbpf/vmlinux_parser/ir_generation.py | 3 +++ tests/c-form/ex7.bpf.c | 16 +--------------- tests/failing_tests/xdp_pass.py | 4 ++-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 3cb3a97..1ffe2b3 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -112,7 +112,7 @@ def process_vmlinux_post_ast( type_length = elem_type._length_ if containing_type.__module__ == "vmlinux": - pass + new_dep_node.add_dependent(elem_type._type_.__name__ if hasattr(elem_type._type_, "__name__") else str(elem_type._type_)) elif containing_type.__module__ == ctypes.__name__: if isinstance(elem_type, type): if issubclass(elem_type, ctypes.Array): @@ -149,6 +149,7 @@ def process_vmlinux_post_ast( "Module not supported in recursive resolution" ) else: + new_dep_node.add_dependent(elem_type.__name__ if hasattr(elem_type, "__name__") else str(elem_type)) process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index a17ffaf..7f32323 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -106,6 +106,7 @@ class DependencyNode: """ name: str + depends_on: Optional[list[str]] = None fields: Dict[str, Field] = field(default_factory=dict) _ready_cache: Optional[bool] = field(default=None, repr=False) @@ -121,6 +122,8 @@ class DependencyNode: ready: bool = False, ) -> None: """Add a field to the node with an optional initial value and readiness state.""" + if self.depends_on is None: + self.depends_on = [] self.fields[name] = Field( name=name, type=field_type, @@ -235,3 +238,9 @@ class DependencyNode: def get_not_ready_fields(self) -> Dict[str, Field]: """Get all fields that are marked as not ready.""" return {name: elem for name, elem in self.fields.items() if not elem.ready} + + def add_dependent(self, dep_type): + if dep_type in self.depends_on: + return + else: + self.depends_on.append(dep_type) diff --git a/pythonbpf/vmlinux_parser/ir_generation.py b/pythonbpf/vmlinux_parser/ir_generation.py index 62b13bc..c66ba11 100644 --- a/pythonbpf/vmlinux_parser/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_generation.py @@ -12,3 +12,6 @@ class IRGenerator: raise ImportError( "Semantic analysis of vmlinux imports failed. Cannot generate IR" ) + for struct in handler: + print(struct) + print() diff --git a/tests/c-form/ex7.bpf.c b/tests/c-form/ex7.bpf.c index a462444..80a60d1 100644 --- a/tests/c-form/ex7.bpf.c +++ b/tests/c-form/ex7.bpf.c @@ -1,23 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include "vmlinux.h" #include #include -struct trace_entry { - short unsigned int type; - unsigned char flags; - unsigned char preempt_count; - int pid; -}; - -struct trace_event_raw_sys_enter { - struct trace_entry ent; - long int id; - long unsigned int args[6]; - char __data[0]; -}; - struct event { __u32 pid; __u32 uid; diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index a7b4550..473375b 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -2,8 +2,8 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import struct_xdp_md -from vmlinux import struct_xdp_buff # noqa: F401 -from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 from ctypes import c_int64 From a03d3e5d4c5d37ef2d8312dde9c61bd4315521f7 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 14 Oct 2025 02:36:04 +0530 Subject: [PATCH 02/51] format chore --- pythonbpf/vmlinux_parser/class_handler.py | 12 ++++++++++-- tests/failing_tests/xdp_pass.py | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 1ffe2b3..cf82e50 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -112,7 +112,11 @@ def process_vmlinux_post_ast( type_length = elem_type._length_ if containing_type.__module__ == "vmlinux": - new_dep_node.add_dependent(elem_type._type_.__name__ if hasattr(elem_type._type_, "__name__") else str(elem_type._type_)) + new_dep_node.add_dependent( + elem_type._type_.__name__ + if hasattr(elem_type._type_, "__name__") + else str(elem_type._type_) + ) elif containing_type.__module__ == ctypes.__name__: if isinstance(elem_type, type): if issubclass(elem_type, ctypes.Array): @@ -149,7 +153,11 @@ def process_vmlinux_post_ast( "Module not supported in recursive resolution" ) else: - new_dep_node.add_dependent(elem_type.__name__ if hasattr(elem_type, "__name__") else str(elem_type)) + new_dep_node.add_dependent( + elem_type.__name__ + if hasattr(elem_type, "__name__") + else str(elem_type) + ) process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 473375b..6d6be86 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -2,8 +2,8 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import struct_xdp_md -from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 from ctypes import c_int64 From d3f0e3b2ef2881d89718b38e178335a569d5b506 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 14 Oct 2025 03:09:18 +0530 Subject: [PATCH 03/51] remove tbaa_gen and make IR generator module --- pythonbpf/tbaa_gen/__init__.py | 0 pythonbpf/vmlinux_parser/import_detector.py | 2 +- pythonbpf/vmlinux_parser/ir_gen/__init__.py | 3 +++ pythonbpf/vmlinux_parser/{ => ir_gen}/ir_generation.py | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) delete mode 100644 pythonbpf/tbaa_gen/__init__.py create mode 100644 pythonbpf/vmlinux_parser/ir_gen/__init__.py rename pythonbpf/vmlinux_parser/{ => ir_gen}/ir_generation.py (85%) diff --git a/pythonbpf/tbaa_gen/__init__.py b/pythonbpf/tbaa_gen/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 2ce9cb5..f5789ce 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -5,7 +5,7 @@ import importlib import inspect from .dependency_handler import DependencyHandler -from .ir_generation import IRGenerator +from .ir_gen import IRGenerator from .class_handler import process_vmlinux_class logger = logging.getLogger(__name__) diff --git a/pythonbpf/vmlinux_parser/ir_gen/__init__.py b/pythonbpf/vmlinux_parser/ir_gen/__init__.py new file mode 100644 index 0000000..3a13651 --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/__init__.py @@ -0,0 +1,3 @@ +from .ir_generation import IRGenerator + +__all__ = ["IRGenerator"] diff --git a/pythonbpf/vmlinux_parser/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py similarity index 85% rename from pythonbpf/vmlinux_parser/ir_generation.py rename to pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index c66ba11..e4dae7d 100644 --- a/pythonbpf/vmlinux_parser/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,5 +1,5 @@ import logging -from .dependency_handler import DependencyHandler +from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler logger = logging.getLogger(__name__) From 11e8e721886d7c1379ec61a308117a55ec2b7719 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 02:00:23 +0530 Subject: [PATCH 04/51] add base for ir gen --- .../vmlinux_parser/dependency_handler.py | 20 +++++++++++++++++++ .../vmlinux_parser/ir_gen/ir_generation.py | 20 ++++++++++++++++++- tests/failing_tests/xdp_pass.py | 6 ++++-- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_handler.py b/pythonbpf/vmlinux_parser/dependency_handler.py index fb49b00..b960ab3 100644 --- a/pythonbpf/vmlinux_parser/dependency_handler.py +++ b/pythonbpf/vmlinux_parser/dependency_handler.py @@ -147,3 +147,23 @@ class DependencyHandler: int: The number of nodes """ return len(self._nodes) + + def __getitem__(self, name: str) -> DependencyNode: + """ + Get a node by name using dictionary-style access. + + Args: + name: The name of the node to retrieve + + Returns: + DependencyNode: The node with the given name + + Raises: + KeyError: If no node with the given name exists + + Example: + node = handler["some-dep_node_name"] + """ + if name not in self._nodes: + raise KeyError(f"No node with name '{name}' found") + return self._nodes[name] diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index e4dae7d..1a2be62 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -8,10 +8,28 @@ class IRGenerator: def __init__(self, module, handler: DependencyHandler): self.module = module self.handler: DependencyHandler = handler + self.generated: list[str] = [] if not handler.is_ready: raise ImportError( "Semantic analysis of vmlinux imports failed. Cannot generate IR" ) for struct in handler: - print(struct) + self.struct_processor(struct) print() + + def struct_processor(self, struct): + if struct.name not in self.generated: + print(f"IR generating for {struct.name}") + print(f"Struct is {struct}") + for dependency in struct.depends_on: + if dependency not in self.generated: + dep_node_from_dependency = self.handler[dependency] + self.struct_processor(dep_node_from_dependency) + self.generated.append(dependency) + # write actual processor logic here after assuming all dependencies are resolved + # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that. + self.generated.append(struct.name) + + + def struct_name_generator(self, ): + pass diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 6d6be86..f44910d 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -1,9 +1,11 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS +# from vmlinux import struct_request +from vmlinux import struct_trace_event_raw_sys_enter from vmlinux import struct_xdp_md -from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 from ctypes import c_int64 From 69b73003caa507d83a6b90e195ee806dbaf59d4b Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 04:42:38 +0530 Subject: [PATCH 05/51] setup skeleton for offset calculation --- pythonbpf/vmlinux_parser/class_handler.py | 1 + pythonbpf/vmlinux_parser/dependency_node.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index cf82e50..ce08530 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -71,6 +71,7 @@ def process_vmlinux_post_ast( if len(field_elem) == 2: field_name, field_type = field_elem elif len(field_elem) == 3: + raise NotImplementedError("Bitfields are not supported in the current version") field_name, field_type, bitfield_size = field_elem field_table[field_name] = [field_type, bitfield_size] elif hasattr(class_obj, "__annotations__"): diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index 7f32323..8a512cd 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -13,6 +13,7 @@ class Field: containing_type: Optional[Any] type_size: Optional[int] bitfield_size: Optional[int] + offset: int value: Any = None ready: bool = False @@ -60,6 +61,10 @@ class Field: if mark_ready: self.ready = True + def set_offset(self, offset: int) -> None: + """Set the offset of this field""" + self.offset = offset + @dataclass class DependencyNode: @@ -109,6 +114,7 @@ class DependencyNode: depends_on: Optional[list[str]] = None fields: Dict[str, Field] = field(default_factory=dict) _ready_cache: Optional[bool] = field(default=None, repr=False) + current_offset: int = 0 def add_field( self, @@ -120,6 +126,7 @@ class DependencyNode: ctype_complex_type: Optional[int] = None, bitfield_size: Optional[int] = None, ready: bool = False, + offset: int = 0, ) -> None: """Add a field to the node with an optional initial value and readiness state.""" if self.depends_on is None: @@ -133,6 +140,7 @@ class DependencyNode: type_size=type_size, ctype_complex_type=ctype_complex_type, bitfield_size=bitfield_size, + offset=offset ) # Invalidate readiness cache self._ready_cache = None @@ -209,9 +217,14 @@ class DependencyNode: raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") self.fields[name].set_ready(is_ready) + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size(name) + # Invalidate readiness cache self._ready_cache = None + def _calculate_size(self, name: str) -> int: + pass @property def is_ready(self) -> bool: """Check if the node is ready (all fields are ready).""" From a4cfc2b7aafd0f4c77a4da400b2c448c1d16834a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 17:49:20 +0530 Subject: [PATCH 06/51] add assignments table and offset handler --- pythonbpf/vmlinux_parser/class_handler.py | 10 ++-- pythonbpf/vmlinux_parser/dependency_node.py | 49 ++++++++++++++++--- pythonbpf/vmlinux_parser/import_detector.py | 8 ++- .../vmlinux_parser/ir_gen/ir_generation.py | 5 +- tests/failing_tests/xdp_pass.py | 3 +- 5 files changed, 62 insertions(+), 13 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index ce08530..0702939 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -71,7 +71,9 @@ def process_vmlinux_post_ast( if len(field_elem) == 2: field_name, field_type = field_elem elif len(field_elem) == 3: - raise NotImplementedError("Bitfields are not supported in the current version") + raise NotImplementedError( + "Bitfields are not supported in the current version" + ) field_name, field_type, bitfield_size = field_elem field_table[field_name] = [field_type, bitfield_size] elif hasattr(class_obj, "__annotations__"): @@ -145,7 +147,8 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( containing_type, llvm_handler, handler, processing_stack ) - new_dep_node.set_field_ready(elem_name, True) + size_of_containing_type = (handler[containing_type.__name__]).__sizeof__() + new_dep_node.set_field_ready(elem_name, True, size_of_containing_type) elif containing_type.__module__ == ctypes.__name__: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) @@ -162,7 +165,8 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) - new_dep_node.set_field_ready(elem_name, True) + size_of_containing_type = (handler[elem_type.__name__]).__sizeof__() + new_dep_node.set_field_ready(elem_name, True, size_of_containing_type) else: raise ValueError( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index 8a512cd..a6d4013 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from typing import Dict, Any, Optional +import ctypes # TODO: FIX THE FUCKING TYPE NAME CONVENTION. @@ -140,11 +141,14 @@ class DependencyNode: type_size=type_size, ctype_complex_type=ctype_complex_type, bitfield_size=bitfield_size, - offset=offset + offset=offset, ) # Invalidate readiness cache self._ready_cache = None + def __sizeof__(self): + return self.current_offset + def get_field(self, name: str) -> Field: """Get a field by name.""" return self.fields[name] @@ -211,20 +215,53 @@ class DependencyNode: # Invalidate readiness cache self._ready_cache = None - def set_field_ready(self, name: str, is_ready: bool = False) -> None: + def set_field_ready(self, name: str, is_ready: bool = False, size_of_containing_type: Optional[int] = None) -> None: """Mark a field as ready or not ready.""" if name not in self.fields: raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") self.fields[name].set_ready(is_ready) self.fields[name].set_offset(self.current_offset) - self.current_offset += self._calculate_size(name) - + self.current_offset += self._calculate_size(name, size_of_containing_type) # Invalidate readiness cache self._ready_cache = None - def _calculate_size(self, name: str) -> int: - pass + def _calculate_size(self, name: str, size_of_containing_type: Optional[int] = None) -> int: + processing_field = self.fields[name] + # size_of_field will be in bytes + if processing_field.type.__module__ == ctypes.__name__: + size_of_field = ctypes.sizeof(processing_field.type) + return size_of_field + elif processing_field.type.__module__ == "vmlinux": + size_of_field: int = 0 + if processing_field.ctype_complex_type is not None: + if issubclass(processing_field.ctype_complex_type, ctypes.Array): + if processing_field.containing_type.__module__ == ctypes.__name__: + size_of_field = ( + ctypes.sizeof(processing_field.containing_type) + * processing_field.type_size + ) + return size_of_field + elif processing_field.containing_type.__module__ == "vmlinux": + size_of_field = ( + size_of_containing_type + * processing_field.type_size + ) + return size_of_field + elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer): + return ctypes.sizeof(ctypes.pointer()) + else: + raise NotImplementedError( + "This subclass of ctype not supported yet" + ) + else: + # search up pre-created stuff and get size + return size_of_containing_type + + else: + raise ModuleNotFoundError("Module is not supported for the operation") + raise RuntimeError("control should not reach here") + @property def is_ready(self) -> bool: """Check if the node is ready (all fields are ready).""" diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index f5789ce..e314a35 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -129,7 +129,13 @@ def vmlinux_proc(tree: ast.AST, module): ) IRGenerator(module, handler) + return assignments def process_vmlinux_assign(node, module, assignments: Dict[str, type]): - raise NotImplementedError("Assignment handling has not been implemented yet") + # Check if this is a simple assignment with a constant value + if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): + target_name = node.targets[0].id + if isinstance(node.value, ast.Constant): + assignments[target_name] = node.value.value + logger.info(f"Added assignment: {target_name} = {node.value.value}") diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 1a2be62..d500cf0 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -30,6 +30,7 @@ class IRGenerator: # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that. self.generated.append(struct.name) - - def struct_name_generator(self, ): + def struct_name_generator( + self, + ) -> None: pass diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index f44910d..da438c8 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -1,8 +1,9 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS +from vmlinux import TASK_COMM_LEN # noqa: F401 +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_request -from vmlinux import struct_trace_event_raw_sys_enter from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 From 8239097fbb308e1bead8530a662f6024556b74e0 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 17:49:38 +0530 Subject: [PATCH 07/51] format chore --- pythonbpf/vmlinux_parser/class_handler.py | 16 ++++++++++++---- pythonbpf/vmlinux_parser/dependency_node.py | 14 ++++++++++---- tests/failing_tests/xdp_pass.py | 1 + 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 0702939..50f2fd6 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -147,8 +147,12 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( containing_type, llvm_handler, handler, processing_stack ) - size_of_containing_type = (handler[containing_type.__name__]).__sizeof__() - new_dep_node.set_field_ready(elem_name, True, size_of_containing_type) + size_of_containing_type = ( + handler[containing_type.__name__] + ).__sizeof__() + new_dep_node.set_field_ready( + elem_name, True, size_of_containing_type + ) elif containing_type.__module__ == ctypes.__name__: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) @@ -165,8 +169,12 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) - size_of_containing_type = (handler[elem_type.__name__]).__sizeof__() - new_dep_node.set_field_ready(elem_name, True, size_of_containing_type) + size_of_containing_type = ( + handler[elem_type.__name__] + ).__sizeof__() + new_dep_node.set_field_ready( + elem_name, True, size_of_containing_type + ) else: raise ValueError( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index a6d4013..a0e1d45 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -215,7 +215,12 @@ class DependencyNode: # Invalidate readiness cache self._ready_cache = None - def set_field_ready(self, name: str, is_ready: bool = False, size_of_containing_type: Optional[int] = None) -> None: + def set_field_ready( + self, + name: str, + is_ready: bool = False, + size_of_containing_type: Optional[int] = None, + ) -> None: """Mark a field as ready or not ready.""" if name not in self.fields: raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") @@ -226,7 +231,9 @@ class DependencyNode: # Invalidate readiness cache self._ready_cache = None - def _calculate_size(self, name: str, size_of_containing_type: Optional[int] = None) -> int: + def _calculate_size( + self, name: str, size_of_containing_type: Optional[int] = None + ) -> int: processing_field = self.fields[name] # size_of_field will be in bytes if processing_field.type.__module__ == ctypes.__name__: @@ -244,8 +251,7 @@ class DependencyNode: return size_of_field elif processing_field.containing_type.__module__ == "vmlinux": size_of_field = ( - size_of_containing_type - * processing_field.type_size + size_of_containing_type * processing_field.type_size ) return size_of_field elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer): diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index da438c8..9900695 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -3,6 +3,7 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 + # from vmlinux import struct_request from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 From c499fe7421bfdfd8df1ce170a5d611add22e2d03 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 18:05:57 +0530 Subject: [PATCH 08/51] solve static typing issues --- pythonbpf/vmlinux_parser/dependency_node.py | 37 ++++++++++++++++----- pythonbpf/vmlinux_parser/import_detector.py | 16 ++++++--- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index a0e1d45..feebec3 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -240,28 +240,47 @@ class DependencyNode: size_of_field = ctypes.sizeof(processing_field.type) return size_of_field elif processing_field.type.__module__ == "vmlinux": - size_of_field: int = 0 if processing_field.ctype_complex_type is not None: if issubclass(processing_field.ctype_complex_type, ctypes.Array): if processing_field.containing_type.__module__ == ctypes.__name__: - size_of_field = ( - ctypes.sizeof(processing_field.containing_type) - * processing_field.type_size - ) + if ( + processing_field.containing_type is not None + and processing_field.type_size is not None + ): + size_of_field = ( + ctypes.sizeof(processing_field.containing_type) + * processing_field.type_size + ) + else: + raise RuntimeError( + f"{processing_field} has no containing_type or type_size" + ) return size_of_field elif processing_field.containing_type.__module__ == "vmlinux": - size_of_field = ( - size_of_containing_type * processing_field.type_size - ) + if ( + size_of_containing_type is not None + and processing_field.type_size is not None + ): + size_of_field = ( + size_of_containing_type * processing_field.type_size + ) + else: + raise RuntimeError( + f"{processing_field} has no containing_type or type_size" + ) return size_of_field elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer): - return ctypes.sizeof(ctypes.pointer()) + return ctypes.sizeof(ctypes.c_void_p) else: raise NotImplementedError( "This subclass of ctype not supported yet" ) else: # search up pre-created stuff and get size + if size_of_containing_type is None: + raise RuntimeError( + f"Size of containing type {size_of_containing_type} is None" + ) return size_of_containing_type else: diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index e314a35..972b1ff 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -1,6 +1,6 @@ import ast import logging -from typing import List, Tuple, Dict +from typing import List, Tuple, Any import importlib import inspect @@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module): # initialise dependency handler handler = DependencyHandler() # initialise assignment dictionary of name to type - assignments: Dict[str, type] = {} + assignments: dict[str, tuple[type, Any]] = {} if not import_statements: logger.info("No vmlinux imports found") @@ -132,10 +132,16 @@ def vmlinux_proc(tree: ast.AST, module): return assignments -def process_vmlinux_assign(node, module, assignments: Dict[str, type]): +def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]): # Check if this is a simple assignment with a constant value if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): target_name = node.targets[0].id if isinstance(node.value, ast.Constant): - assignments[target_name] = node.value.value - logger.info(f"Added assignment: {target_name} = {node.value.value}") + assignments[target_name] = (type(node.value.value), node.value.value) + logger.info( + f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" + ) + else: + raise ValueError(f"Unsupported assignment type for {target_name}") + else: + raise ValueError("Not a simple assignment") From ce7b170feacb82e88766363ebb95b58412933a97 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 18:19:51 +0530 Subject: [PATCH 09/51] float vmlinux_assignments_symtab --- pythonbpf/codegen.py | 4 ++-- pythonbpf/functions/functions_pass.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 5db9f88..6044e56 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -45,14 +45,14 @@ def processor(source_code, filename, module): for func_node in bpf_chunks: logger.info(f"Found BPF function/struct: {func_node.name}") - vmlinux_proc(tree, module) + vmlinux_assignments_symtab = vmlinux_proc(tree, module) populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) - func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) + func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab) globals_list_creation(tree, module) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 8d0bce1..647fb41 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -311,7 +311,7 @@ def process_stmt( def process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab + module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab ): """Process the body of a bpf function""" # TODO: A lot. We just have print -> bpf_trace_printk for now @@ -350,7 +350,7 @@ def process_func_body( builder.ret(ir.Constant(ir.IntType(64), 0)) -def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab): +def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab): """Process a single BPF chunk (function) and emit corresponding LLVM IR.""" func_name = func_node.name @@ -384,7 +384,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t builder = ir.IRBuilder(block) process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab + module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab ) return func @@ -394,7 +394,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t # ============================================================================ -def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): +def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab): for func_node in chunks: if is_global_function(func_node): continue @@ -407,6 +407,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): ctypes_to_ir(infer_return_type(func_node)), map_sym_tab, structs_sym_tab, + vmlinux_assignments_symtab ) From eb4ee64ee579dd533ee375b365d6306ba6d31cdc Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 19:11:53 +0530 Subject: [PATCH 10/51] Revert "float vmlinux_assignments_symtab" This reverts commit ce7b170feacb82e88766363ebb95b58412933a97. --- pythonbpf/codegen.py | 4 ++-- pythonbpf/functions/functions_pass.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 6044e56..5db9f88 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -45,14 +45,14 @@ def processor(source_code, filename, module): for func_node in bpf_chunks: logger.info(f"Found BPF function/struct: {func_node.name}") - vmlinux_assignments_symtab = vmlinux_proc(tree, module) + vmlinux_proc(tree, module) populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) - func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab) + func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) globals_list_creation(tree, module) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 647fb41..8d0bce1 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -311,7 +311,7 @@ def process_stmt( def process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab + module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab ): """Process the body of a bpf function""" # TODO: A lot. We just have print -> bpf_trace_printk for now @@ -350,7 +350,7 @@ def process_func_body( builder.ret(ir.Constant(ir.IntType(64), 0)) -def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab): +def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab): """Process a single BPF chunk (function) and emit corresponding LLVM IR.""" func_name = func_node.name @@ -384,7 +384,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t builder = ir.IRBuilder(block) process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab + module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab ) return func @@ -394,7 +394,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t # ============================================================================ -def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_assignments_symtab): +def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): for func_node in chunks: if is_global_function(func_node): continue @@ -407,7 +407,6 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_assign ctypes_to_ir(infer_return_type(func_node)), map_sym_tab, structs_sym_tab, - vmlinux_assignments_symtab ) From 8372111616ce5a117acfa2571577e4ac31fe977a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 21:25:53 +0530 Subject: [PATCH 11/51] add basic IR gen strategy --- pythonbpf/codegen.py | 14 +++++- .../vmlinux_parser/ir_gen/debug_info_gen.py | 15 ++++++ .../vmlinux_parser/ir_gen/ir_generation.py | 50 +++++++++++++++---- 3 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 5db9f88..8d25644 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -19,12 +19,22 @@ from pylibbpf import BpfProgram import tempfile from logging import Logger import logging +import re logger: Logger = logging.getLogger(__name__) VERSION = "v0.1.4" +def finalize_module(original_str): + """After all IR generation is complete, we monkey patch btf_ama attribute""" + + # Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses. + pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)' + replacement = r'\1 "btf_ama"' + return re.sub(pattern, replacement, original_str) + + def find_bpf_chunks(tree): """Find all functions decorated with @bpf in the AST.""" bpf_functions = [] @@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) + module_string = finalize_module(str(module)) + logger.info(f"IR written to {output}") with open(output, "w") as f: f.write(f'source_filename = "{filename}"\n') - f.write(str(module)) + f.write(module_string) f.write("\n") return output diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py new file mode 100644 index 0000000..0b38cd6 --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -0,0 +1,15 @@ +from pythonbpf.debuginfo import DebugInfoGenerator + + +def debug_info_generation(struct, llvm_module): + generator = DebugInfoGenerator(llvm_module) + # this is sample debug info generation + # i64type = generator.get_uint64_type() + + struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True) + + global_var = generator.create_global_var_debug_info( + struct.name, struct_type, is_local=False + ) + + return global_var diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index d500cf0..01e55da 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,12 +1,16 @@ import logging -from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler +from ..dependency_handler import DependencyHandler +from .debug_info_gen import debug_info_generation +from ..dependency_node import DependencyNode +import llvmlite.ir as ir logger = logging.getLogger(__name__) class IRGenerator: - def __init__(self, module, handler: DependencyHandler): - self.module = module + # get the assignments dict and add this stuff to it. + def __init__(self, llvm_module, handler: DependencyHandler, assignment=None): + self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] if not handler.is_ready: @@ -15,22 +19,48 @@ class IRGenerator: ) for struct in handler: self.struct_processor(struct) - print() def struct_processor(self, struct): if struct.name not in self.generated: print(f"IR generating for {struct.name}") - print(f"Struct is {struct}") for dependency in struct.depends_on: if dependency not in self.generated: dep_node_from_dependency = self.handler[dependency] self.struct_processor(dep_node_from_dependency) self.generated.append(dependency) - # write actual processor logic here after assuming all dependencies are resolved + # actual processor logic here after assuming all dependencies are resolved # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that. + self.gen_ir(struct) self.generated.append(struct.name) - def struct_name_generator( - self, - ) -> None: - pass + def gen_ir(self, struct): + # currently we generate all possible field accesses for CO-RE and put into the assignment table + debug_info = debug_info_generation(struct, self.llvm_module) + field_index = 0 + for field_name, field in struct.fields.items(): + # does not take arrays and similar types into consideration yet. + field_co_re_name = self._struct_name_generator(struct, field, field_index) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + print() + + def _struct_name_generator( + self, struct: DependencyNode, field, field_index: int + ) -> str: + if struct.name.startswith("struct_"): + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset}" + + "$" + + f"0:{field_index}" + ) + return name + else: + raise TypeError( + "Name generation cannot occur due to type name not starting with struct" + ) From 2b3c81affabd7429fa752b0b39432c3219048b43 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 21:33:08 +0530 Subject: [PATCH 12/51] TODO added for llvmlite attribute issue *Refer: https://github.com/numba/llvmlite/issues/1331 Signed-off-by: varun-r-mallya --- pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 01e55da..c5fe740 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -34,6 +34,8 @@ class IRGenerator: self.generated.append(struct.name) def gen_ir(self, struct): + # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite + # accepts our issue, we will resort to normal accessed attribute based attribute addition # currently we generate all possible field accesses for CO-RE and put into the assignment table debug_info = debug_info_generation(struct, self.llvm_module) field_index = 0 From c22d85ceb805aa2c47146bb1678bcc2c2d4c2359 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 15 Oct 2025 23:56:04 +0530 Subject: [PATCH 13/51] add array field generation support --- .../vmlinux_parser/dependency_handler.py | 4 + pythonbpf/vmlinux_parser/dependency_node.py | 1 + .../vmlinux_parser/ir_gen/ir_generation.py | 104 +++++++++++++++--- tests/failing_tests/xdp_pass.py | 3 +- 4 files changed, 92 insertions(+), 20 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_handler.py b/pythonbpf/vmlinux_parser/dependency_handler.py index b960ab3..b34d27f 100644 --- a/pythonbpf/vmlinux_parser/dependency_handler.py +++ b/pythonbpf/vmlinux_parser/dependency_handler.py @@ -167,3 +167,7 @@ class DependencyHandler: if name not in self._nodes: raise KeyError(f"No node with name '{name}' found") return self._nodes[name] + + @property + def nodes(self): + return self._nodes diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index feebec3..ddfd055 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -240,6 +240,7 @@ class DependencyNode: size_of_field = ctypes.sizeof(processing_field.type) return size_of_field elif processing_field.type.__module__ == "vmlinux": + #TODO: does not take into account offset calculation when not array but has type size if processing_field.ctype_complex_type is not None: if issubclass(processing_field.ctype_complex_type, ctypes.Array): if processing_field.containing_type.__module__ == ctypes.__name__: diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index c5fe740..6c7d3e3 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,8 +1,10 @@ +import ctypes import logging from ..dependency_handler import DependencyHandler from .debug_info_gen import debug_info_generation from ..dependency_node import DependencyNode import llvmlite.ir as ir +from typing import Optional logger = logging.getLogger(__name__) @@ -20,19 +22,50 @@ class IRGenerator: for struct in handler: self.struct_processor(struct) - def struct_processor(self, struct): - if struct.name not in self.generated: - print(f"IR generating for {struct.name}") + def struct_processor(self, struct, processing_stack=None): + # Initialize processing stack on first call + if processing_stack is None: + processing_stack = set() + + # If already generated, skip + if struct.name in self.generated: + return + + # Detect circular dependency + if struct.name in processing_stack: + logger.info(f"Circular dependency detected for {struct.name}, skipping recursive processing") + # For circular dependencies, we can either: + # 1. Use forward declarations (opaque pointers) + # 2. Mark as incomplete and process later + # 3. Generate a placeholder type + # Here we'll just skip and let it be processed in its own call + return + + logger.info(f"IR generating for {struct.name}") + + # Add to processing stack before processing dependencies + processing_stack.add(struct.name) + + try: + # Process all dependencies first for dependency in struct.depends_on: if dependency not in self.generated: - dep_node_from_dependency = self.handler[dependency] - self.struct_processor(dep_node_from_dependency) - self.generated.append(dependency) - # actual processor logic here after assuming all dependencies are resolved - # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that. + # Check if dependency exists in handler + if dependency in self.handler.nodes: + dep_node_from_dependency = self.handler[dependency] + # Pass the processing_stack down to track circular refs + self.struct_processor(dep_node_from_dependency, processing_stack) + else: + raise RuntimeError(f"Warning: Dependency {dependency} not found in handler") + + # Actual processor logic here after dependencies are resolved self.gen_ir(struct) self.generated.append(struct.name) + finally: + # Remove from processing stack after we're done + processing_stack.discard(struct.name) + def gen_ir(self, struct): # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite # accepts our issue, we will resort to normal accessed attribute based attribute addition @@ -41,19 +74,54 @@ class IRGenerator: field_index = 0 for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. - field_co_re_name = self._struct_name_generator(struct, field, field_index) - field_index += 1 - globvar = ir.GlobalVariable( - self.llvm_module, ir.IntType(64), name=field_co_re_name - ) - globvar.linkage = "external" - globvar.set_metadata("llvm.preserve.access.index", debug_info) - print() + if field.ctype_complex_type is not None and issubclass(field.ctype_complex_type, ctypes.Array): + array_size = field.type_size + containing_type = field.containing_type + if containing_type.__module__ == ctypes.__name__: + containing_type_size = ctypes.sizeof(containing_type) + for i in range(0,array_size): + field_co_re_name = self._struct_name_generator(struct, field, field_index, True, i, containing_type_size) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + field_index += 1 + elif field.type_size is not None: + array_size = field.type_size + containing_type = field.containing_type + if containing_type.__module__ == "vmlinux": + containing_type_size = self.handler[containing_type.__name__].current_offset + for i in range(0,array_size): + field_co_re_name = self._struct_name_generator(struct, field, field_index, True, i, containing_type_size) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + field_index += 1 + else: + field_co_re_name = self._struct_name_generator(struct, field, field_index) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) def _struct_name_generator( - self, struct: DependencyNode, field, field_index: int + self, struct: DependencyNode, field, field_index: int, is_indexed: bool=False, index: Optional[int]=None, containing_type_size: Optional[int]=None ) -> str: - if struct.name.startswith("struct_"): + if is_indexed: + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset + index*containing_type_size}" + + "$" + + f"0:{field_index}:{index}" + ) + return name + elif struct.name.startswith("struct_"): name = ( "llvm." + struct.name.removeprefix("struct_") diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 9900695..3354e75 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -3,8 +3,7 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 - -# from vmlinux import struct_request +from vmlinux import struct_posix_cputimers from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 From de02731ea1b860e8cc5b06f3b2a0dfe715075179 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 04:08:06 +0530 Subject: [PATCH 14/51] add support with ctypes getattr offset. Also supports bitfields. * breaks when struct_ring_buffer_per_cpu --- pythonbpf/vmlinux_parser/class_handler.py | 9 +- pythonbpf/vmlinux_parser/dependency_node.py | 103 ++++++++++++++------ 2 files changed, 76 insertions(+), 36 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 50f2fd6..6ef70ba 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -25,7 +25,7 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): def process_vmlinux_post_ast( - elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None + elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None ): # Initialize processing stack on first call if processing_stack is None: @@ -60,6 +60,10 @@ def process_vmlinux_post_ast( pass else: new_dep_node = DependencyNode(name=current_symbol_name) + + # elem_type_class is the actual vmlinux struct/class + new_dep_node.set_ctype_struct(elem_type_class) + handler.add_node(new_dep_node) class_obj = getattr(imported_module, current_symbol_name) # Inspect the class fields @@ -71,9 +75,6 @@ def process_vmlinux_post_ast( if len(field_elem) == 2: field_name, field_type = field_elem elif len(field_elem) == 3: - raise NotImplementedError( - "Bitfields are not supported in the current version" - ) field_name, field_type, bitfield_size = field_elem field_table[field_name] = [field_type, bitfield_size] elif hasattr(class_obj, "__annotations__"): diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index ddfd055..3046f32 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -35,7 +35,7 @@ class Field: self.ready = True def set_containing_type( - self, containing_type: Optional[Any], mark_ready: bool = False + self, containing_type: Optional[Any], mark_ready: bool = False ) -> None: """Set the containing_type of this field and optionally mark it as ready.""" self.containing_type = containing_type @@ -49,7 +49,7 @@ class Field: self.ready = True def set_ctype_complex_type( - self, ctype_complex_type: Any, mark_ready: bool = False + self, ctype_complex_type: Any, mark_ready: bool = False ) -> None: """Set the ctype_complex_type of this field and optionally mark it as ready.""" self.ctype_complex_type = ctype_complex_type @@ -116,18 +116,19 @@ class DependencyNode: fields: Dict[str, Field] = field(default_factory=dict) _ready_cache: Optional[bool] = field(default=None, repr=False) current_offset: int = 0 + ctype_struct: Optional[Any] = field(default=None, repr=False) def add_field( - self, - name: str, - field_type: type, - initial_value: Any = None, - containing_type: Optional[Any] = None, - type_size: Optional[int] = None, - ctype_complex_type: Optional[int] = None, - bitfield_size: Optional[int] = None, - ready: bool = False, - offset: int = 0, + self, + name: str, + field_type: type, + initial_value: Any = None, + containing_type: Optional[Any] = None, + type_size: Optional[int] = None, + ctype_complex_type: Optional[int] = None, + bitfield_size: Optional[int] = None, + ready: bool = False, + offset: int = 0, ) -> None: """Add a field to the node with an optional initial value and readiness state.""" if self.depends_on is None: @@ -146,7 +147,14 @@ class DependencyNode: # Invalidate readiness cache self._ready_cache = None + def set_ctype_struct(self, ctype_struct: Any) -> None: + """Set the ctypes structure for automatic offset calculation.""" + self.ctype_struct = ctype_struct + def __sizeof__(self): + # If we have a ctype_struct, use its size + if self.ctype_struct is not None: + return ctypes.sizeof(self.ctype_struct) return self.current_offset def get_field(self, name: str) -> Field: @@ -172,7 +180,7 @@ class DependencyNode: self._ready_cache = None def set_field_containing_type( - self, name: str, containing_type: Any, mark_ready: bool = False + self, name: str, containing_type: Any, mark_ready: bool = False ) -> None: """Set a field's containing_type and optionally mark it as ready.""" if name not in self.fields: @@ -183,7 +191,7 @@ class DependencyNode: self._ready_cache = None def set_field_type_size( - self, name: str, type_size: Any, mark_ready: bool = False + self, name: str, type_size: Any, mark_ready: bool = False ) -> None: """Set a field's type_size and optionally mark it as ready.""" if name not in self.fields: @@ -194,7 +202,7 @@ class DependencyNode: self._ready_cache = None def set_field_ctype_complex_type( - self, name: str, ctype_complex_type: Any, mark_ready: bool = False + self, name: str, ctype_complex_type: Any, mark_ready: bool = False ) -> None: """Set a field's ctype_complex_type and optionally mark it as ready.""" if name not in self.fields: @@ -205,7 +213,7 @@ class DependencyNode: self._ready_cache = None def set_field_bitfield_size( - self, name: str, bitfield_size: Any, mark_ready: bool = False + self, name: str, bitfield_size: Any, mark_ready: bool = False ) -> None: """Set a field's bitfield_size and optionally mark it as ready.""" if name not in self.fields: @@ -216,23 +224,35 @@ class DependencyNode: self._ready_cache = None def set_field_ready( - self, - name: str, - is_ready: bool = False, - size_of_containing_type: Optional[int] = None, + self, + name: str, + is_ready: bool = False, + size_of_containing_type: Optional[int] = None, ) -> None: """Mark a field as ready or not ready.""" if name not in self.fields: raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") self.fields[name].set_ready(is_ready) - self.fields[name].set_offset(self.current_offset) - self.current_offset += self._calculate_size(name, size_of_containing_type) + + # Use ctypes built-in offset if available + if self.ctype_struct is not None: + try: + self.fields[name].set_offset(getattr(self.ctype_struct, name).offset) + except AttributeError: + # Fallback to manual calculation if field not found in ctype_struct + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size(name, size_of_containing_type) + else: + # Manual offset calculation when no ctype_struct is available + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size(name, size_of_containing_type) + # Invalidate readiness cache self._ready_cache = None def _calculate_size( - self, name: str, size_of_containing_type: Optional[int] = None + self, name: str, size_of_containing_type: Optional[int] = None ) -> int: processing_field = self.fields[name] # size_of_field will be in bytes @@ -240,17 +260,16 @@ class DependencyNode: size_of_field = ctypes.sizeof(processing_field.type) return size_of_field elif processing_field.type.__module__ == "vmlinux": - #TODO: does not take into account offset calculation when not array but has type size if processing_field.ctype_complex_type is not None: if issubclass(processing_field.ctype_complex_type, ctypes.Array): if processing_field.containing_type.__module__ == ctypes.__name__: if ( - processing_field.containing_type is not None - and processing_field.type_size is not None + processing_field.containing_type is not None + and processing_field.type_size is not None ): size_of_field = ( - ctypes.sizeof(processing_field.containing_type) - * processing_field.type_size + ctypes.sizeof(processing_field.containing_type) + * processing_field.type_size ) else: raise RuntimeError( @@ -259,11 +278,11 @@ class DependencyNode: return size_of_field elif processing_field.containing_type.__module__ == "vmlinux": if ( - size_of_containing_type is not None - and processing_field.type_size is not None + size_of_containing_type is not None + and processing_field.type_size is not None ): size_of_field = ( - size_of_containing_type * processing_field.type_size + size_of_containing_type * processing_field.type_size ) else: raise RuntimeError( @@ -276,8 +295,28 @@ class DependencyNode: raise NotImplementedError( "This subclass of ctype not supported yet" ) + elif processing_field.type_size is not None: + # Handle vmlinux types with type_size but no ctype_complex_type + # This means it's a direct vmlinux struct field (not array/pointer wrapped) + # The type_size should already contain the full size of the struct + # But if there's a containing_type from vmlinux, we need that size + if processing_field.containing_type is not None: + if processing_field.containing_type.__module__ == "vmlinux": + # For vmlinux containing types, we need the pre-calculated size + if size_of_containing_type is not None: + return size_of_containing_type * processing_field.type_size + else: + raise RuntimeError( + f"Field {name}: vmlinux containing_type requires size_of_containing_type" + ) + else: + raise ModuleNotFoundError( + f"Containing type module {processing_field.containing_type.__module__} not supported" + ) + else: + raise RuntimeError("Wrong type found with no containing type") else: - # search up pre-created stuff and get size + # No ctype_complex_type and no type_size, must rely on size_of_containing_type if size_of_containing_type is None: raise RuntimeError( f"Size of containing type {size_of_containing_type} is None" From 0f5c1fa75244df8011b31ca0f903eec4cb1aa7a4 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 04:10:24 +0530 Subject: [PATCH 15/51] format chore --- pythonbpf/vmlinux_parser/class_handler.py | 2 +- pythonbpf/vmlinux_parser/dependency_node.py | 60 ++++++++++--------- .../vmlinux_parser/ir_gen/ir_generation.py | 54 ++++++++++++----- tests/failing_tests/xdp_pass.py | 1 - 4 files changed, 70 insertions(+), 47 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 6ef70ba..48668a2 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -25,7 +25,7 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): def process_vmlinux_post_ast( - elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None + elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None ): # Initialize processing stack on first call if processing_stack is None: diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index 3046f32..e266761 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -35,7 +35,7 @@ class Field: self.ready = True def set_containing_type( - self, containing_type: Optional[Any], mark_ready: bool = False + self, containing_type: Optional[Any], mark_ready: bool = False ) -> None: """Set the containing_type of this field and optionally mark it as ready.""" self.containing_type = containing_type @@ -49,7 +49,7 @@ class Field: self.ready = True def set_ctype_complex_type( - self, ctype_complex_type: Any, mark_ready: bool = False + self, ctype_complex_type: Any, mark_ready: bool = False ) -> None: """Set the ctype_complex_type of this field and optionally mark it as ready.""" self.ctype_complex_type = ctype_complex_type @@ -119,16 +119,16 @@ class DependencyNode: ctype_struct: Optional[Any] = field(default=None, repr=False) def add_field( - self, - name: str, - field_type: type, - initial_value: Any = None, - containing_type: Optional[Any] = None, - type_size: Optional[int] = None, - ctype_complex_type: Optional[int] = None, - bitfield_size: Optional[int] = None, - ready: bool = False, - offset: int = 0, + self, + name: str, + field_type: type, + initial_value: Any = None, + containing_type: Optional[Any] = None, + type_size: Optional[int] = None, + ctype_complex_type: Optional[int] = None, + bitfield_size: Optional[int] = None, + ready: bool = False, + offset: int = 0, ) -> None: """Add a field to the node with an optional initial value and readiness state.""" if self.depends_on is None: @@ -180,7 +180,7 @@ class DependencyNode: self._ready_cache = None def set_field_containing_type( - self, name: str, containing_type: Any, mark_ready: bool = False + self, name: str, containing_type: Any, mark_ready: bool = False ) -> None: """Set a field's containing_type and optionally mark it as ready.""" if name not in self.fields: @@ -191,7 +191,7 @@ class DependencyNode: self._ready_cache = None def set_field_type_size( - self, name: str, type_size: Any, mark_ready: bool = False + self, name: str, type_size: Any, mark_ready: bool = False ) -> None: """Set a field's type_size and optionally mark it as ready.""" if name not in self.fields: @@ -202,7 +202,7 @@ class DependencyNode: self._ready_cache = None def set_field_ctype_complex_type( - self, name: str, ctype_complex_type: Any, mark_ready: bool = False + self, name: str, ctype_complex_type: Any, mark_ready: bool = False ) -> None: """Set a field's ctype_complex_type and optionally mark it as ready.""" if name not in self.fields: @@ -213,7 +213,7 @@ class DependencyNode: self._ready_cache = None def set_field_bitfield_size( - self, name: str, bitfield_size: Any, mark_ready: bool = False + self, name: str, bitfield_size: Any, mark_ready: bool = False ) -> None: """Set a field's bitfield_size and optionally mark it as ready.""" if name not in self.fields: @@ -224,10 +224,10 @@ class DependencyNode: self._ready_cache = None def set_field_ready( - self, - name: str, - is_ready: bool = False, - size_of_containing_type: Optional[int] = None, + self, + name: str, + is_ready: bool = False, + size_of_containing_type: Optional[int] = None, ) -> None: """Mark a field as ready or not ready.""" if name not in self.fields: @@ -242,7 +242,9 @@ class DependencyNode: except AttributeError: # Fallback to manual calculation if field not found in ctype_struct self.fields[name].set_offset(self.current_offset) - self.current_offset += self._calculate_size(name, size_of_containing_type) + self.current_offset += self._calculate_size( + name, size_of_containing_type + ) else: # Manual offset calculation when no ctype_struct is available self.fields[name].set_offset(self.current_offset) @@ -252,7 +254,7 @@ class DependencyNode: self._ready_cache = None def _calculate_size( - self, name: str, size_of_containing_type: Optional[int] = None + self, name: str, size_of_containing_type: Optional[int] = None ) -> int: processing_field = self.fields[name] # size_of_field will be in bytes @@ -264,12 +266,12 @@ class DependencyNode: if issubclass(processing_field.ctype_complex_type, ctypes.Array): if processing_field.containing_type.__module__ == ctypes.__name__: if ( - processing_field.containing_type is not None - and processing_field.type_size is not None + processing_field.containing_type is not None + and processing_field.type_size is not None ): size_of_field = ( - ctypes.sizeof(processing_field.containing_type) - * processing_field.type_size + ctypes.sizeof(processing_field.containing_type) + * processing_field.type_size ) else: raise RuntimeError( @@ -278,11 +280,11 @@ class DependencyNode: return size_of_field elif processing_field.containing_type.__module__ == "vmlinux": if ( - size_of_containing_type is not None - and processing_field.type_size is not None + size_of_containing_type is not None + and processing_field.type_size is not None ): size_of_field = ( - size_of_containing_type * processing_field.type_size + size_of_containing_type * processing_field.type_size ) else: raise RuntimeError( diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 6c7d3e3..989a448 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -33,7 +33,9 @@ class IRGenerator: # Detect circular dependency if struct.name in processing_stack: - logger.info(f"Circular dependency detected for {struct.name}, skipping recursive processing") + logger.info( + f"Circular dependency detected for {struct.name}, skipping recursive processing" + ) # For circular dependencies, we can either: # 1. Use forward declarations (opaque pointers) # 2. Mark as incomplete and process later @@ -54,9 +56,13 @@ class IRGenerator: if dependency in self.handler.nodes: dep_node_from_dependency = self.handler[dependency] # Pass the processing_stack down to track circular refs - self.struct_processor(dep_node_from_dependency, processing_stack) + self.struct_processor( + dep_node_from_dependency, processing_stack + ) else: - raise RuntimeError(f"Warning: Dependency {dependency} not found in handler") + raise RuntimeError( + f"Warning: Dependency {dependency} not found in handler" + ) # Actual processor logic here after dependencies are resolved self.gen_ir(struct) @@ -74,13 +80,17 @@ class IRGenerator: field_index = 0 for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. - if field.ctype_complex_type is not None and issubclass(field.ctype_complex_type, ctypes.Array): + if field.ctype_complex_type is not None and issubclass( + field.ctype_complex_type, ctypes.Array + ): array_size = field.type_size containing_type = field.containing_type if containing_type.__module__ == ctypes.__name__: containing_type_size = ctypes.sizeof(containing_type) - for i in range(0,array_size): - field_co_re_name = self._struct_name_generator(struct, field, field_index, True, i, containing_type_size) + for i in range(0, array_size): + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -91,9 +101,13 @@ class IRGenerator: array_size = field.type_size containing_type = field.containing_type if containing_type.__module__ == "vmlinux": - containing_type_size = self.handler[containing_type.__name__].current_offset - for i in range(0,array_size): - field_co_re_name = self._struct_name_generator(struct, field, field_index, True, i, containing_type_size) + containing_type_size = self.handler[ + containing_type.__name__ + ].current_offset + for i in range(0, array_size): + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -101,7 +115,9 @@ class IRGenerator: globvar.set_metadata("llvm.preserve.access.index", debug_info) field_index += 1 else: - field_co_re_name = self._struct_name_generator(struct, field, field_index) + field_co_re_name = self._struct_name_generator( + struct, field, field_index + ) field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name @@ -110,15 +126,21 @@ class IRGenerator: globvar.set_metadata("llvm.preserve.access.index", debug_info) def _struct_name_generator( - self, struct: DependencyNode, field, field_index: int, is_indexed: bool=False, index: Optional[int]=None, containing_type_size: Optional[int]=None + self, + struct: DependencyNode, + field, + field_index: int, + is_indexed: bool = False, + index: int = 0, + containing_type_size: Optional[int] = None, ) -> str: if is_indexed: name = ( - "llvm." - + struct.name.removeprefix("struct_") - + f":0:{field.offset + index*containing_type_size}" - + "$" - + f"0:{field_index}:{index}" + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset + index * containing_type_size}" + + "$" + + f"0:{field_index}:{index}" ) return name elif struct.name.startswith("struct_"): diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 3354e75..595632b 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -3,7 +3,6 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -from vmlinux import struct_posix_cputimers from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 From f21837aefe4773ecbef9ee96d2593cea1ec863f2 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 04:12:09 +0530 Subject: [PATCH 16/51] support most bitfields --- pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 3 +-- tests/failing_tests/xdp_pass.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 989a448..cbf7f9d 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -4,7 +4,6 @@ from ..dependency_handler import DependencyHandler from .debug_info_gen import debug_info_generation from ..dependency_node import DependencyNode import llvmlite.ir as ir -from typing import Optional logger = logging.getLogger(__name__) @@ -132,7 +131,7 @@ class IRGenerator: field_index: int, is_indexed: bool = False, index: int = 0, - containing_type_size: Optional[int] = None, + containing_type_size: int = 0, ) -> str: if is_indexed: name = ( diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 595632b..bf31ee8 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -3,6 +3,7 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from vmlinux import struct_posix_cputimers # noqa: F401 from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 From 5413cc793b1ce7dc8a84580416a717cc27e35680 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 18:06:36 +0530 Subject: [PATCH 17/51] something fixed itself. --- pythonbpf/vmlinux_parser/class_handler.py | 36 +++++++++++++++-------- tests/failing_tests/xdp_pass.py | 2 +- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 48668a2..34cdf73 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -145,15 +145,30 @@ def process_vmlinux_post_ast( ) new_dep_node.set_field_type(elem_name, elem_type) if containing_type.__module__ == "vmlinux": - process_vmlinux_post_ast( - containing_type, llvm_handler, handler, processing_stack - ) - size_of_containing_type = ( - handler[containing_type.__name__] - ).__sizeof__() - new_dep_node.set_field_ready( - elem_name, True, size_of_containing_type + containing_type_name = ( + containing_type.__name__ + if hasattr(containing_type, "__name__") + else str(containing_type) ) + + # Check for self-reference or already processed + if containing_type_name == current_symbol_name: + # Self-referential pointer + logger.debug( + f"Self-referential pointer in {current_symbol_name}.{elem_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + elif handler.has_node(containing_type_name): + # Already processed + logger.debug(f"Reusing already processed {containing_type_name}") + new_dep_node.set_field_ready(elem_name, True) + else: + # Process recursively - THIS WAS MISSING + new_dep_node.add_dependent(containing_type_name) + process_vmlinux_post_ast( + containing_type, llvm_handler, handler, processing_stack + ) + new_dep_node.set_field_ready(elem_name, True) elif containing_type.__module__ == ctypes.__name__: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) @@ -170,11 +185,8 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) - size_of_containing_type = ( - handler[elem_type.__name__] - ).__sizeof__() new_dep_node.set_field_ready( - elem_name, True, size_of_containing_type + elem_name, True ) else: raise ValueError( diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index bf31ee8..83433be 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -6,7 +6,7 @@ from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 from vmlinux import struct_posix_cputimers # noqa: F401 from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 from ctypes import c_int64 From 041e538b53c67c51a6fb49f7578793ba6d71bd35 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 18:21:14 +0530 Subject: [PATCH 18/51] fix errors. Does not support union name resolution yet. --- .../vmlinux_parser/ir_gen/ir_generation.py | 30 +++++++++++-------- tests/failing_tests/xdp_pass.py | 9 +++--- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index cbf7f9d..1cf3794 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -49,19 +49,22 @@ class IRGenerator: try: # Process all dependencies first - for dependency in struct.depends_on: - if dependency not in self.generated: - # Check if dependency exists in handler - if dependency in self.handler.nodes: - dep_node_from_dependency = self.handler[dependency] - # Pass the processing_stack down to track circular refs - self.struct_processor( - dep_node_from_dependency, processing_stack - ) - else: - raise RuntimeError( - f"Warning: Dependency {dependency} not found in handler" - ) + if struct.depends_on is None: + pass + else: + for dependency in struct.depends_on: + if dependency not in self.generated: + # Check if dependency exists in handler + if dependency in self.handler.nodes: + dep_node_from_dependency = self.handler[dependency] + # Pass the processing_stack down to track circular refs + self.struct_processor( + dep_node_from_dependency, processing_stack + ) + else: + raise RuntimeError( + f"Warning: Dependency {dependency} not found in handler" + ) # Actual processor logic here after dependencies are resolved self.gen_ir(struct) @@ -152,6 +155,7 @@ class IRGenerator: ) return name else: + print(self.handler[struct.name]) raise TypeError( "Name generation cannot occur due to type name not starting with struct" ) diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 83433be..a470278 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -2,12 +2,13 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 -from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -from vmlinux import struct_posix_cputimers # noqa: F401 +# from vmlinux import struct_qspinlock_0_1 +# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_posix_cputimers # noqa: F401 from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 - +# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 +from vmlinux import struct_request from ctypes import c_int64 # Instructions to how to run this program From 5d9a29ee8ec5f412da990f6a5858b7c8c5f9e319 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 18:22:25 +0530 Subject: [PATCH 19/51] format chore --- pythonbpf/vmlinux_parser/class_handler.py | 13 ++++++++----- tests/failing_tests/xdp_pass.py | 6 ++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 34cdf73..c940711 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -160,13 +160,18 @@ def process_vmlinux_post_ast( new_dep_node.set_field_ready(elem_name, True) elif handler.has_node(containing_type_name): # Already processed - logger.debug(f"Reusing already processed {containing_type_name}") + logger.debug( + f"Reusing already processed {containing_type_name}" + ) new_dep_node.set_field_ready(elem_name, True) else: # Process recursively - THIS WAS MISSING new_dep_node.add_dependent(containing_type_name) process_vmlinux_post_ast( - containing_type, llvm_handler, handler, processing_stack + containing_type, + llvm_handler, + handler, + processing_stack, ) new_dep_node.set_field_ready(elem_name, True) elif containing_type.__module__ == ctypes.__name__: @@ -185,9 +190,7 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) - new_dep_node.set_field_ready( - elem_name, True - ) + new_dep_node.set_field_ready(elem_name, True) else: raise ValueError( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index a470278..1ab4eb2 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -2,13 +2,15 @@ from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 -# from vmlinux import struct_qspinlock_0_1 + +# from vmlinux import struct_qspinlock_0_1 # noqa: F401 # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_posix_cputimers # noqa: F401 from vmlinux import struct_xdp_md + # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 -from vmlinux import struct_request +from vmlinux import struct_request # noqa: F401 from ctypes import c_int64 # Instructions to how to run this program From 71d005b6b17f8935a8eb129986e72fe9949f341a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 18:58:05 +0530 Subject: [PATCH 20/51] complete vmlinux struct name generation in IR. * Breaks when it finds unions. * Still does not support function pointers. --- tests/failing_tests/xdp_pass.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 1ab4eb2..c8510dc 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -3,14 +3,15 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 -# from vmlinux import struct_qspinlock_0_1 # noqa: F401 +from vmlinux import struct_qspinlock # noqa: F401 + # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_posix_cputimers # noqa: F401 from vmlinux import struct_xdp_md # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 -from vmlinux import struct_request # noqa: F401 +# from vmlinux import struct_request # noqa: F401 from ctypes import c_int64 # Instructions to how to run this program From 7ae629e8f79484f0155e8a90a5234ade7c190c2d Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 19:04:04 +0530 Subject: [PATCH 21/51] bump version to v0.1.5 --- pyproject.toml | 2 +- pythonbpf/codegen.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 09fc8d3..014fca0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pythonbpf" -version = "0.1.4" +version = "0.1.5" description = "Reduced Python frontend for eBPF" authors = [ { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 8d25644..078adf7 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -23,7 +23,7 @@ import re logger: Logger = logging.getLogger(__name__) -VERSION = "v0.1.4" +VERSION = "v0.1.5" def finalize_module(original_str): From 51a1be0b0b7fbf970696915ae733994460a276a6 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 16 Oct 2025 19:09:19 +0530 Subject: [PATCH 22/51] add classifiers --- pyproject.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 014fca0..cea909f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,20 @@ authors = [ { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "varun-r-mallya", email="varunrmallya@gmail.com" } ] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: System :: Operating System Kernels :: Linux", +] readme = "README.md" license = {text = "Apache-2.0"} requires-python = ">=3.8" From 9b7aa6d8beef8134c70385e6bd92cc12cf61794c Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 03:27:26 +0530 Subject: [PATCH 23/51] add dependency debug info list --- .../vmlinux_parser/ir_gen/debug_info_gen.py | 10 ++-- .../vmlinux_parser/ir_gen/ir_generation.py | 12 +++-- tests/passing_tests/vmlinux/xdp_pass.py | 46 +++++++++++++++++++ 3 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 tests/passing_tests/vmlinux/xdp_pass.py diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 0b38cd6..07daea4 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -1,10 +1,14 @@ from pythonbpf.debuginfo import DebugInfoGenerator +from ..dependency_node import DependencyNode -def debug_info_generation(struct, llvm_module): +def debug_info_generation( + struct: DependencyNode, llvm_module, generated_debug_info: list +): generator = DebugInfoGenerator(llvm_module) - # this is sample debug info generation - # i64type = generator.get_uint64_type() + print("DEBUG1", generated_debug_info) + for field in struct.fields: + print("DEBUG", field) struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 1cf3794..5d1df92 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -14,6 +14,7 @@ class IRGenerator: self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] + self.generated_debug_info: list = [] if not handler.is_ready: raise ImportError( "Semantic analysis of vmlinux imports failed. Cannot generate IR" @@ -67,18 +68,22 @@ class IRGenerator: ) # Actual processor logic here after dependencies are resolved - self.gen_ir(struct) + self.generated_debug_info.append( + (struct, self.gen_ir(struct, self.generated_debug_info)) + ) self.generated.append(struct.name) finally: # Remove from processing stack after we're done processing_stack.discard(struct.name) - def gen_ir(self, struct): + def gen_ir(self, struct, generated_debug_info): # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite # accepts our issue, we will resort to normal accessed attribute based attribute addition # currently we generate all possible field accesses for CO-RE and put into the assignment table - debug_info = debug_info_generation(struct, self.llvm_module) + debug_info = debug_info_generation( + struct, self.llvm_module, generated_debug_info + ) field_index = 0 for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. @@ -126,6 +131,7 @@ class IRGenerator: ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + return debug_info def _struct_name_generator( self, diff --git a/tests/passing_tests/vmlinux/xdp_pass.py b/tests/passing_tests/vmlinux/xdp_pass.py new file mode 100644 index 0000000..1e73614 --- /dev/null +++ b/tests/passing_tests/vmlinux/xdp_pass.py @@ -0,0 +1,46 @@ +from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir +from pythonbpf.maps import HashMap +from pythonbpf.helper import XDP_PASS +from vmlinux import TASK_COMM_LEN # noqa: F401 +from vmlinux import struct_xdp_md +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from ctypes import c_int64 + +# Instructions to how to run this program +# 1. Install PythonBPF: pip install pythonbpf +# 2. Run the program: python examples/xdp_pass.py +# 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o +# 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0 +# 5. send traffic through the device and observe effects + + +@bpf +@map +def count() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=1) + + +@bpf +@section("xdp") +def hello_world(ctx: struct_xdp_md) -> c_int64: + key = 0 + one = 1 + prev = count().lookup(key) + if prev: + prevval = prev + 1 + print(f"count: {prevval}") + count().update(key, prevval) + return XDP_PASS + else: + count().update(key, one) + + return XDP_PASS + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("xdp_pass.py", "xdp_pass.ll") From 3a3116253f2a460126bc5f2b0ec1b00283892e01 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 03:53:10 +0530 Subject: [PATCH 24/51] generate members with dummy types --- pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 07daea4..8127d01 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -6,11 +6,12 @@ def debug_info_generation( struct: DependencyNode, llvm_module, generated_debug_info: list ): generator = DebugInfoGenerator(llvm_module) - print("DEBUG1", generated_debug_info) - for field in struct.fields: - print("DEBUG", field) + members = [] + uint32type = generator.get_uint32_type() + for field_name, field in struct.fields.items(): + members.append(generator.create_struct_member(field_name, uint32type, field.offset)) - struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True) + struct_type = generator.create_struct_type(members, struct.__sizeof__(), is_distinct=True) global_var = generator.create_global_var_debug_info( struct.name, struct_type, is_local=False From 101183c3158a1536a9ce2a23f09abc44067045e1 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 21:45:26 +0530 Subject: [PATCH 25/51] members generated for simple ctypes --- pythonbpf/debuginfo/debug_info_generator.py | 31 ++++ .../vmlinux_parser/ir_gen/debug_info_gen.py | 155 ++++++++++++++++-- tests/passing_tests/vmlinux/xdp_pass.py | 20 +-- 3 files changed, 177 insertions(+), 29 deletions(-) diff --git a/pythonbpf/debuginfo/debug_info_generator.py b/pythonbpf/debuginfo/debug_info_generator.py index ab9fed4..e9703db 100644 --- a/pythonbpf/debuginfo/debug_info_generator.py +++ b/pythonbpf/debuginfo/debug_info_generator.py @@ -101,6 +101,21 @@ class DebugInfoGenerator: }, ) + def create_struct_member_vmlinux(self, name: str, base_type_with_size: Any, offset: int) -> Any: + """Create a struct member with the given name, type, and offset""" + base_type, type_size = base_type_with_size + return self.module.add_debug_info( + "DIDerivedType", + { + "tag": dc.DW_TAG_member, + "name": name, + "file": self.module._file_metadata, + "baseType": base_type, + "size": getattr(base_type, "size", type_size), + "offset": offset, + }, + ) + def create_struct_type( self, members: List[Any], size: int, is_distinct: bool ) -> Any: @@ -116,6 +131,22 @@ class DebugInfoGenerator: is_distinct=is_distinct, ) + def create_struct_type_with_name( + self, name: str, members: List[Any], size: int, is_distinct: bool + ) -> Any: + """Create a struct type with the given members and size""" + return self.module.add_debug_info( + "DICompositeType", + { + "name": name, + "tag": dc.DW_TAG_structure_type, + "file": self.module._file_metadata, + "size": size, + "elements": members, + }, + is_distinct=is_distinct, + ) + def create_global_var_debug_info( self, name: str, var_type: Any, is_local: bool = False ) -> Any: diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 8127d01..ab0bf46 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -1,20 +1,155 @@ -from pythonbpf.debuginfo import DebugInfoGenerator +from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc from ..dependency_node import DependencyNode +import ctypes +import logging +from typing import List, Any, Tuple, Optional +logger = logging.getLogger(__name__) def debug_info_generation( - struct: DependencyNode, llvm_module, generated_debug_info: list -): + struct: DependencyNode, llvm_module, generated_debug_info: List[Tuple[DependencyNode, Any]] +) -> Any: + """ + Generate DWARF debug information for a struct defined in a DependencyNode. + + Args: + struct: The dependency node containing struct information + llvm_module: The LLVM module to add debug info to + generated_debug_info: List of tuples (struct, debug_info) to track generated debug info + + Returns: + The generated global variable debug info + """ + # Set up debug info generator generator = DebugInfoGenerator(llvm_module) + + # Check if debug info for this struct has already been generated + for existing_struct, debug_info in generated_debug_info: + if existing_struct.name == struct.name: + return debug_info + + # Process all fields and create members for the struct members = [] - uint32type = generator.get_uint32_type() for field_name, field in struct.fields.items(): - members.append(generator.create_struct_member(field_name, uint32type, field.offset)) + # Get appropriate debug type for this field + field_type = _get_field_debug_type( + field_name, field, generator, struct, generated_debug_info + ) + # Create struct member with proper offset + member = generator.create_struct_member_vmlinux( + field_name, field_type, field.offset * 8 + ) + members.append(member) - struct_type = generator.create_struct_type(members, struct.__sizeof__(), is_distinct=True) - - global_var = generator.create_global_var_debug_info( - struct.name, struct_type, is_local=False + if struct.name.startswith("struct_"): + struct_name = struct.name.removeprefix("struct_") + else: + raise ValueError("Unions are not supported in the current version") + # Create struct type with all members + struct_type = generator.create_struct_type_with_name( + struct_name, members, struct.__sizeof__() * 8, is_distinct=True ) - return global_var + return struct_type + + +def _get_field_debug_type( + field_name: str, + field, + generator: DebugInfoGenerator, + parent_struct: DependencyNode, + generated_debug_info: List[Tuple[DependencyNode, Any]] +) -> Any: + """ + Determine the appropriate debug type for a field based on its Python/ctypes type. + + Args: + field_name: Name of the field + field: Field object containing type information + generator: DebugInfoGenerator instance + parent_struct: The parent struct containing this field + generated_debug_info: List of already generated debug info + + Returns: + The debug info type for this field + """ + # Handle complex types (arrays, pointers) + if field.ctype_complex_type is not None: + if issubclass(field.ctype_complex_type, ctypes.Array): + # Handle array types + element_type = _get_basic_debug_type(field.containing_type, generator) + return generator.create_array_type(element_type, field.type_size) + elif issubclass(field.ctype_complex_type, ctypes._Pointer): + # Handle pointer types + pointee_type = _get_basic_debug_type(field.containing_type, generator) + return generator.create_pointer_type(pointee_type) + + # Handle other vmlinux types (nested structs) + if field.type.__module__ == "vmlinux": + # If it's a struct from vmlinux, check if we've already generated debug info for it + struct_name = field.type.__name__ + + # Look for existing debug info in the list + for existing_struct, debug_info in generated_debug_info: + if existing_struct.name == struct_name: + # Use existing debug info + return debug_info + + # If not found, create a forward declaration + # This will be completed when the actual struct is processed + logger.warning("Forward declaration in struct created") + forward_type = generator.create_struct_type([], 0, is_distinct=True) + return forward_type + + # Handle basic C types + return _get_basic_debug_type(field.type, generator) + + +def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any: + """ + Map a ctypes type to a DWARF debug type. + + Args: + ctype: A ctypes type or Python type + generator: DebugInfoGenerator instance + + Returns: + The corresponding debug type + """ + # Map ctypes to debug info types + if ctype == ctypes.c_char or ctype == ctypes.c_byte: + return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 + elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8: + return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8 + elif ctype == ctypes.c_short or ctype == ctypes.c_int16: + return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16 + elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16: + return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16 + elif ctype == ctypes.c_int or ctype == ctypes.c_int32: + return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32 + elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32: + return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32 + elif ctype == ctypes.c_long: + return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64 + elif ctype == ctypes.c_ulong: + return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64 + elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64: + return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64 + elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64: + return generator.get_basic_type("unsigned long long", 64, dc.DW_ATE_unsigned), 64 + elif ctype == ctypes.c_float: + return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32 + elif ctype == ctypes.c_double: + return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64 + elif ctype == ctypes.c_bool: + return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8 + elif ctype == ctypes.c_char_p: + char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 + return generator.create_pointer_type(char_type) + elif ctype == ctypes.c_void_p: + void_type = generator.module.add_debug_info( + "DIBasicType", {"name": "void"} + ) + return generator.create_pointer_type(void_type), 64 + else: + return generator.get_uint64_type(), 64 diff --git a/tests/passing_tests/vmlinux/xdp_pass.py b/tests/passing_tests/vmlinux/xdp_pass.py index 1e73614..484784b 100644 --- a/tests/passing_tests/vmlinux/xdp_pass.py +++ b/tests/passing_tests/vmlinux/xdp_pass.py @@ -3,7 +3,7 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_xdp_md -from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 from ctypes import c_int64 # Instructions to how to run this program @@ -13,27 +13,9 @@ from ctypes import c_int64 # 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0 # 5. send traffic through the device and observe effects - -@bpf -@map -def count() -> HashMap: - return HashMap(key=c_int64, value=c_int64, max_entries=1) - - @bpf @section("xdp") def hello_world(ctx: struct_xdp_md) -> c_int64: - key = 0 - one = 1 - prev = count().lookup(key) - if prev: - prevval = prev + 1 - print(f"count: {prevval}") - count().update(key, prevval) - return XDP_PASS - else: - count().update(key, one) - return XDP_PASS From 1b4272b408f6df45cbe4dd484e5c52f64911c767 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 22:02:10 +0530 Subject: [PATCH 26/51] members generated with wrong size calc for arrays --- pythonbpf/debuginfo/debug_info_generator.py | 2 +- pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py | 14 +++++++------- tests/passing_tests/vmlinux/xdp_pass.py | 5 +++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pythonbpf/debuginfo/debug_info_generator.py b/pythonbpf/debuginfo/debug_info_generator.py index e9703db..1848ecc 100644 --- a/pythonbpf/debuginfo/debug_info_generator.py +++ b/pythonbpf/debuginfo/debug_info_generator.py @@ -111,7 +111,7 @@ class DebugInfoGenerator: "name": name, "file": self.module._file_metadata, "baseType": base_type, - "size": getattr(base_type, "size", type_size), + "size": type_size, "offset": offset, }, ) diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index ab0bf46..15b21c5 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -59,7 +59,7 @@ def _get_field_debug_type( generator: DebugInfoGenerator, parent_struct: DependencyNode, generated_debug_info: List[Tuple[DependencyNode, Any]] -) -> Any: +) -> tuple[Any, int]: """ Determine the appropriate debug type for a field based on its Python/ctypes type. @@ -77,12 +77,12 @@ def _get_field_debug_type( if field.ctype_complex_type is not None: if issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types - element_type = _get_basic_debug_type(field.containing_type, generator) - return generator.create_array_type(element_type, field.type_size) + element_type, base_type_size = _get_basic_debug_type(field.containing_type, generator) + return generator.create_array_type(element_type, field.type_size), field.type_size * base_type_size elif issubclass(field.ctype_complex_type, ctypes._Pointer): # Handle pointer types - pointee_type = _get_basic_debug_type(field.containing_type, generator) - return generator.create_pointer_type(pointee_type) + pointee_type, _ = _get_basic_debug_type(field.containing_type, generator) + return generator.create_pointer_type(pointee_type), 64 # Handle other vmlinux types (nested structs) if field.type.__module__ == "vmlinux": @@ -93,13 +93,13 @@ def _get_field_debug_type( for existing_struct, debug_info in generated_debug_info: if existing_struct.name == struct_name: # Use existing debug info - return debug_info + return debug_info, existing_struct.__sizeof__() # If not found, create a forward declaration # This will be completed when the actual struct is processed logger.warning("Forward declaration in struct created") forward_type = generator.create_struct_type([], 0, is_distinct=True) - return forward_type + return forward_type, 0 # Handle basic C types return _get_basic_debug_type(field.type, generator) diff --git a/tests/passing_tests/vmlinux/xdp_pass.py b/tests/passing_tests/vmlinux/xdp_pass.py index 484784b..4ba2fea 100644 --- a/tests/passing_tests/vmlinux/xdp_pass.py +++ b/tests/passing_tests/vmlinux/xdp_pass.py @@ -1,9 +1,9 @@ -from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir +from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir, compile from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_xdp_md -# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 from ctypes import c_int64 # Instructions to how to run this program @@ -26,3 +26,4 @@ def LICENSE() -> str: compile_to_ir("xdp_pass.py", "xdp_pass.ll") +compile() From dc1b243e82821edc925948011d606133c69a4c77 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 22:13:59 +0530 Subject: [PATCH 27/51] correct error size calculation for arrays --- pythonbpf/debuginfo/debug_info_generator.py | 18 +++++++++- .../vmlinux_parser/ir_gen/debug_info_gen.py | 33 +++++++++++-------- tests/passing_tests/vmlinux/xdp_pass.py | 14 ++++---- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/pythonbpf/debuginfo/debug_info_generator.py b/pythonbpf/debuginfo/debug_info_generator.py index 1848ecc..62f0cc3 100644 --- a/pythonbpf/debuginfo/debug_info_generator.py +++ b/pythonbpf/debuginfo/debug_info_generator.py @@ -81,6 +81,20 @@ class DebugInfoGenerator: }, ) + def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any: + """Create an array type of the given base type with specified count""" + base_type, type_sizing = type_info + subrange = self.module.add_debug_info("DISubrange", {"count": count}) + return self.module.add_debug_info( + "DICompositeType", + { + "tag": dc.DW_TAG_array_type, + "baseType": base_type, + "size": type_sizing, + "elements": [subrange], + }, + ) + @staticmethod def _compute_array_size(base_type: Any, count: int) -> int: # Extract size from base_type if possible @@ -101,7 +115,9 @@ class DebugInfoGenerator: }, ) - def create_struct_member_vmlinux(self, name: str, base_type_with_size: Any, offset: int) -> Any: + def create_struct_member_vmlinux( + self, name: str, base_type_with_size: Any, offset: int + ) -> Any: """Create a struct member with the given name, type, and offset""" base_type, type_size = base_type_with_size return self.module.add_debug_info( diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 15b21c5..0ec6be3 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -2,12 +2,15 @@ from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc from ..dependency_node import DependencyNode import ctypes import logging -from typing import List, Any, Tuple, Optional +from typing import List, Any, Tuple logger = logging.getLogger(__name__) + def debug_info_generation( - struct: DependencyNode, llvm_module, generated_debug_info: List[Tuple[DependencyNode, Any]] + struct: DependencyNode, + llvm_module, + generated_debug_info: List[Tuple[DependencyNode, Any]], ) -> Any: """ Generate DWARF debug information for a struct defined in a DependencyNode. @@ -54,11 +57,11 @@ def debug_info_generation( def _get_field_debug_type( - field_name: str, - field, - generator: DebugInfoGenerator, - parent_struct: DependencyNode, - generated_debug_info: List[Tuple[DependencyNode, Any]] + field_name: str, + field, + generator: DebugInfoGenerator, + parent_struct: DependencyNode, + generated_debug_info: List[Tuple[DependencyNode, Any]], ) -> tuple[Any, int]: """ Determine the appropriate debug type for a field based on its Python/ctypes type. @@ -77,8 +80,12 @@ def _get_field_debug_type( if field.ctype_complex_type is not None: if issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types - element_type, base_type_size = _get_basic_debug_type(field.containing_type, generator) - return generator.create_array_type(element_type, field.type_size), field.type_size * base_type_size + element_type, base_type_size = _get_basic_debug_type( + field.containing_type, generator + ) + return generator.create_array_type_vmlinux( + (element_type, base_type_size * field.type_size), field.type_size + ), field.type_size * base_type_size elif issubclass(field.ctype_complex_type, ctypes._Pointer): # Handle pointer types pointee_type, _ = _get_basic_debug_type(field.containing_type, generator) @@ -136,7 +143,9 @@ def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any: elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64: return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64 elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64: - return generator.get_basic_type("unsigned long long", 64, dc.DW_ATE_unsigned), 64 + return generator.get_basic_type( + "unsigned long long", 64, dc.DW_ATE_unsigned + ), 64 elif ctype == ctypes.c_float: return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32 elif ctype == ctypes.c_double: @@ -147,9 +156,7 @@ def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any: char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 return generator.create_pointer_type(char_type) elif ctype == ctypes.c_void_p: - void_type = generator.module.add_debug_info( - "DIBasicType", {"name": "void"} - ) + void_type = generator.module.add_debug_info("DIBasicType", {"name": "void"}) return generator.create_pointer_type(void_type), 64 else: return generator.get_uint64_type(), 64 diff --git a/tests/passing_tests/vmlinux/xdp_pass.py b/tests/passing_tests/vmlinux/xdp_pass.py index 4ba2fea..6211b05 100644 --- a/tests/passing_tests/vmlinux/xdp_pass.py +++ b/tests/passing_tests/vmlinux/xdp_pass.py @@ -1,22 +1,20 @@ -from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir, compile -from pythonbpf.maps import HashMap -from pythonbpf.helper import XDP_PASS +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile from vmlinux import TASK_COMM_LEN # noqa: F401 -from vmlinux import struct_xdp_md from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 from ctypes import c_int64 + # Instructions to how to run this program # 1. Install PythonBPF: pip install pythonbpf # 2. Run the program: python examples/xdp_pass.py # 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o # 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0 # 5. send traffic through the device and observe effects - @bpf -@section("xdp") -def hello_world(ctx: struct_xdp_md) -> c_int64: - return XDP_PASS +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: + print("Hello, World!") + return c_int64(0) @bpf From de19c8fc9008b4d9cd05abde646097e53ad483c1 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 22:15:58 +0530 Subject: [PATCH 28/51] rename passing test --- .../vmlinux/{xdp_pass.py => simple_struct_test.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename tests/passing_tests/vmlinux/{xdp_pass.py => simple_struct_test.py} (78%) diff --git a/tests/passing_tests/vmlinux/xdp_pass.py b/tests/passing_tests/vmlinux/simple_struct_test.py similarity index 78% rename from tests/passing_tests/vmlinux/xdp_pass.py rename to tests/passing_tests/vmlinux/simple_struct_test.py index 6211b05..99fc7a1 100644 --- a/tests/passing_tests/vmlinux/xdp_pass.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -6,9 +6,9 @@ from ctypes import c_int64 # Instructions to how to run this program # 1. Install PythonBPF: pip install pythonbpf -# 2. Run the program: python examples/xdp_pass.py -# 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o -# 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0 +# 2. Run the program: python examples/simple_struct_test.py +# 3. Run the program with sudo: sudo tools/check.sh run examples/simple_struct_test.o +# 4. Attach object file to any network device with something like ./check.sh run examples/simple_struct_test.o tailscale0 # 5. send traffic through the device and observe effects @bpf @section("tracepoint/syscalls/sys_enter_execve") @@ -23,5 +23,5 @@ def LICENSE() -> str: return "GPL" -compile_to_ir("xdp_pass.py", "xdp_pass.ll") +compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") compile() From d855e9ef2ed21ecaa8a37feeb2496548207589a0 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 22:32:03 +0530 Subject: [PATCH 29/51] correct mistake in null pointer. Also identify error in pointer to char debug info generation --- pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py | 5 +++-- tests/c-form/ex7.bpf.c | 2 +- tests/passing_tests/vmlinux/simple_struct_test.py | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 0ec6be3..ccb27a9 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -78,6 +78,7 @@ def _get_field_debug_type( """ # Handle complex types (arrays, pointers) if field.ctype_complex_type is not None: + print(field) if issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types element_type, base_type_size = _get_basic_debug_type( @@ -89,6 +90,7 @@ def _get_field_debug_type( elif issubclass(field.ctype_complex_type, ctypes._Pointer): # Handle pointer types pointee_type, _ = _get_basic_debug_type(field.containing_type, generator) + print("DEBUG", pointee_type) return generator.create_pointer_type(pointee_type), 64 # Handle other vmlinux types (nested structs) @@ -156,7 +158,6 @@ def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any: char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 return generator.create_pointer_type(char_type) elif ctype == ctypes.c_void_p: - void_type = generator.module.add_debug_info("DIBasicType", {"name": "void"}) - return generator.create_pointer_type(void_type), 64 + return generator.create_pointer_type(None), 64 else: return generator.get_uint64_type(), 64 diff --git a/tests/c-form/ex7.bpf.c b/tests/c-form/ex7.bpf.c index 80a60d1..33ed6a5 100644 --- a/tests/c-form/ex7.bpf.c +++ b/tests/c-form/ex7.bpf.c @@ -19,7 +19,7 @@ struct { SEC("tp/syscalls/sys_enter_setuid") int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) { struct event data = {}; - + struct blk_integrity_iter it = {}; // Extract UID from the syscall arguments data.uid = (unsigned int)ctx->args[0]; data.ts = bpf_ktime_get_ns(); diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index 99fc7a1..f3cbb97 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,6 +1,9 @@ from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_xdp_md +# from vmlinux import struct_request +from vmlinux import struct_blk_integrity_iter from ctypes import c_int64 @@ -24,4 +27,3 @@ def LICENSE() -> str: compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") -compile() From 33aa7947180484f07620e21d8a82952b41d14a42 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 22:48:34 +0530 Subject: [PATCH 30/51] identify error in pointer to ctypes subclass dependency fixing --- pythonbpf/vmlinux_parser/class_handler.py | 4 ++++ pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py | 2 -- tests/passing_tests/vmlinux/simple_struct_test.py | 3 +-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index c940711..b75b85d 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -99,7 +99,9 @@ def process_vmlinux_post_ast( local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) if local_module_name == ctypes.__name__: + #TODO: need to process pointer to ctype and also CFUNCTYPES here new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) + print(elem_type) new_dep_node.set_field_ready(elem_name, is_ready=True) logger.debug( f"Field {elem_name} is direct ctypes type: {elem_type}" @@ -127,6 +129,8 @@ def process_vmlinux_post_ast( ctype_complex_type = ctypes.Array elif issubclass(elem_type, ctypes._Pointer): ctype_complex_type = ctypes._Pointer + else: + raise ImportError("Non Array and Pointer type ctype imports not supported in current version") else: raise TypeError("Unsupported ctypes subclass") else: diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index ccb27a9..232cf10 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -78,7 +78,6 @@ def _get_field_debug_type( """ # Handle complex types (arrays, pointers) if field.ctype_complex_type is not None: - print(field) if issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types element_type, base_type_size = _get_basic_debug_type( @@ -90,7 +89,6 @@ def _get_field_debug_type( elif issubclass(field.ctype_complex_type, ctypes._Pointer): # Handle pointer types pointee_type, _ = _get_basic_debug_type(field.containing_type, generator) - print("DEBUG", pointee_type) return generator.create_pointer_type(pointee_type), 64 # Handle other vmlinux types (nested structs) diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index f3cbb97..b40e620 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,8 +1,7 @@ from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -# from vmlinux import struct_xdp_md -# from vmlinux import struct_request +# from vmlinux import struct_uinput_device from vmlinux import struct_blk_integrity_iter from ctypes import c_int64 From 5dafa5bd0d37b1576e9cf143b295875d17cb0ec9 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 22:59:01 +0530 Subject: [PATCH 31/51] add function pointer detection warning as well as identify ctypes non recursion error --- pythonbpf/vmlinux_parser/class_handler.py | 31 +++++++++++++++---- .../vmlinux/simple_struct_test.py | 2 +- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index b75b85d..f23d0e5 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -99,13 +99,32 @@ def process_vmlinux_post_ast( local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) if local_module_name == ctypes.__name__: - #TODO: need to process pointer to ctype and also CFUNCTYPES here + #TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. + # for now, function pointers should give an error new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) - print(elem_type) - new_dep_node.set_field_ready(elem_name, is_ready=True) - logger.debug( - f"Field {elem_name} is direct ctypes type: {elem_type}" - ) + + # Process pointer to ctype + if isinstance(elem_type, type) and issubclass(elem_type, ctypes._Pointer): + # Get the pointed-to type + pointed_type = elem_type._type_ + logger.debug(f"Found pointer to type: {pointed_type}") + new_dep_node.set_field_containing_type(elem_name, pointed_type) + new_dep_node.set_field_ctype_complex_type(elem_name, ctypes._Pointer) + new_dep_node.set_field_ready(elem_name, is_ready=True) + + # Process function pointers (CFUNCTYPE) + elif hasattr(elem_type, '_restype_') and hasattr(elem_type, '_argtypes_'): + # This is a CFUNCTYPE or similar + logger.info(f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}") + # Set the field as ready but mark it with special handling + new_dep_node.set_field_ctype_complex_type(elem_name, ctypes.CFUNCTYPE) + new_dep_node.set_field_ready(elem_name, is_ready=True) + logger.warning("Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported") + + else: + # Regular ctype + new_dep_node.set_field_ready(elem_name, is_ready=True) + logger.debug(f"Field {elem_name} is direct ctypes type: {elem_type}") elif local_module_name == "vmlinux": new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) logger.debug( diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index b40e620..9a26ccf 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,7 +1,7 @@ from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -# from vmlinux import struct_uinput_device +from vmlinux import struct_uinput_device from vmlinux import struct_blk_integrity_iter from ctypes import c_int64 From 328b792e4e89f745d97d40dd062f77774e3c118e Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 23:02:00 +0530 Subject: [PATCH 32/51] add function pointer detection warning as well as identify ctypes non recursion error --- pythonbpf/vmlinux_parser/class_handler.py | 35 +++++++++++++------ .../vmlinux/simple_struct_test.py | 6 ++-- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index f23d0e5..108fa9f 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -99,32 +99,45 @@ def process_vmlinux_post_ast( local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) if local_module_name == ctypes.__name__: - #TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. - # for now, function pointers should give an error + # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) # Process pointer to ctype - if isinstance(elem_type, type) and issubclass(elem_type, ctypes._Pointer): + if isinstance(elem_type, type) and issubclass( + elem_type, ctypes._Pointer + ): # Get the pointed-to type pointed_type = elem_type._type_ logger.debug(f"Found pointer to type: {pointed_type}") new_dep_node.set_field_containing_type(elem_name, pointed_type) - new_dep_node.set_field_ctype_complex_type(elem_name, ctypes._Pointer) + new_dep_node.set_field_ctype_complex_type( + elem_name, ctypes._Pointer + ) new_dep_node.set_field_ready(elem_name, is_ready=True) # Process function pointers (CFUNCTYPE) - elif hasattr(elem_type, '_restype_') and hasattr(elem_type, '_argtypes_'): + elif hasattr(elem_type, "_restype_") and hasattr( + elem_type, "_argtypes_" + ): # This is a CFUNCTYPE or similar - logger.info(f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}") + logger.info( + f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}" + ) # Set the field as ready but mark it with special handling - new_dep_node.set_field_ctype_complex_type(elem_name, ctypes.CFUNCTYPE) + new_dep_node.set_field_ctype_complex_type( + elem_name, ctypes.CFUNCTYPE + ) new_dep_node.set_field_ready(elem_name, is_ready=True) - logger.warning("Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported") + logger.warning( + "Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported" + ) else: # Regular ctype new_dep_node.set_field_ready(elem_name, is_ready=True) - logger.debug(f"Field {elem_name} is direct ctypes type: {elem_type}") + logger.debug( + f"Field {elem_name} is direct ctypes type: {elem_type}" + ) elif local_module_name == "vmlinux": new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) logger.debug( @@ -149,7 +162,9 @@ def process_vmlinux_post_ast( elif issubclass(elem_type, ctypes._Pointer): ctype_complex_type = ctypes._Pointer else: - raise ImportError("Non Array and Pointer type ctype imports not supported in current version") + raise ImportError( + "Non Array and Pointer type ctype imports not supported in current version" + ) else: raise TypeError("Unsupported ctypes subclass") else: diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index 9a26ccf..f47076f 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,8 +1,9 @@ from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -from vmlinux import struct_uinput_device -from vmlinux import struct_blk_integrity_iter + +# from vmlinux import struct_uinput_device +# from vmlinux import struct_blk_integrity_iter from ctypes import c_int64 @@ -26,3 +27,4 @@ def LICENSE() -> str: compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") +compile() From 5512bf52e46b58bb3bde526a4f750abdeee1d016 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 18 Oct 2025 23:29:31 +0530 Subject: [PATCH 33/51] add todo on struct name generator --- pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 5d1df92..cacd2e7 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -142,6 +142,7 @@ class IRGenerator: index: int = 0, containing_type_size: int = 0, ) -> str: + # TODO: Does not support Unions as well as recursive pointer and array type naming if is_indexed: name = ( "llvm." From e636fcaea788ecec6b1e69f54ee2c45eb48584d8 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 04:21:42 +0530 Subject: [PATCH 34/51] add assignment info class family and change how assignments are handled --- pythonbpf/vmlinux_parser/assignment_info.py | 34 +++++++++++++++++++ pythonbpf/vmlinux_parser/import_detector.py | 26 +++++++++++--- .../vmlinux/simple_struct_test.py | 1 - 3 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 pythonbpf/vmlinux_parser/assignment_info.py diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py new file mode 100644 index 0000000..d3665c0 --- /dev/null +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -0,0 +1,34 @@ +from enum import Enum, auto +from typing import Any, Callable, Dict, List, Optional, TypedDict +from dataclasses import dataclass + +from pythonbpf.vmlinux_parser.dependency_node import Field + + +@dataclass +class AssignmentType(Enum): + CONSTANT = auto() + STRUCT = auto() + ARRAY = auto() # probably won't be used + FUNCTION_POINTER = auto() + POINTER = auto() # again, probably won't be used + +@dataclass +class FunctionSignature(TypedDict): + return_type: str + param_types: List[str] + varargs: bool + + +# Thew name of the assignment will be in the dict that uses this class +@dataclass +class AssignmentInfo(TypedDict): + value_type: AssignmentType + python_type: type + value: Optional[Any] + pointer_level: Optional[int] + signature: Optional[FunctionSignature] # For function pointers + # The key of the dict is the name of the field. + # Value is a tuple that contains the global variable representing that field + # along with all the information about that field as a Field type. + members: Optional[Dict[str, tuple[str, Field]]] # For structs. diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 972b1ff..965bca8 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -4,6 +4,7 @@ from typing import List, Tuple, Any import importlib import inspect +from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler from .ir_gen import IRGenerator from .class_handler import process_vmlinux_class @@ -82,7 +83,7 @@ def vmlinux_proc(tree: ast.AST, module): # initialise dependency handler handler = DependencyHandler() # initialise assignment dictionary of name to type - assignments: dict[str, tuple[type, Any]] = {} + assignments: dict[str, AssignmentInfo] = {} if not import_statements: logger.info("No vmlinux imports found") @@ -132,16 +133,31 @@ def vmlinux_proc(tree: ast.AST, module): return assignments -def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]): - # Check if this is a simple assignment with a constant value +def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]): + """Process assignments from vmlinux module.""" + # Only handle single-target assignments if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): target_name = node.targets[0].id + + # Handle constant value assignments if isinstance(node.value, ast.Constant): - assignments[target_name] = (type(node.value.value), node.value.value) + # Fixed: using proper TypedDict creation syntax with named arguments + assignments[target_name] = AssignmentInfo( + value_type=AssignmentType.CONSTANT, + python_type=type(node.value.value), + value=node.value.value, + pointer_level=None, + signature=None, + members=None + ) logger.info( f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" ) + + # Handle other assignment types that we may need to support else: - raise ValueError(f"Unsupported assignment type for {target_name}") + logger.warning( + f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}" + ) else: raise ValueError("Not a simple assignment") diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index f47076f..43e5c9e 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -27,4 +27,3 @@ def LICENSE() -> str: compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") -compile() From 8da50b7068f3979e7b8fc3c593f9f6c80866e83e Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 04:31:23 +0530 Subject: [PATCH 35/51] float assignments to class_handler.py --- pythonbpf/vmlinux_parser/assignment_info.py | 3 ++- pythonbpf/vmlinux_parser/class_handler.py | 19 +++++++++++++++---- pythonbpf/vmlinux_parser/import_detector.py | 7 +++---- .../vmlinux/simple_struct_test.py | 2 +- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py index d3665c0..435a7ad 100644 --- a/pythonbpf/vmlinux_parser/assignment_info.py +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -1,5 +1,5 @@ from enum import Enum, auto -from typing import Any, Callable, Dict, List, Optional, TypedDict +from typing import Any, Dict, List, Optional, TypedDict from dataclasses import dataclass from pythonbpf.vmlinux_parser.dependency_node import Field @@ -13,6 +13,7 @@ class AssignmentType(Enum): FUNCTION_POINTER = auto() POINTER = auto() # again, probably won't be used + @dataclass class FunctionSignature(TypedDict): return_type: str diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 108fa9f..2adf76d 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -1,6 +1,8 @@ import logging from functools import lru_cache import importlib + +from .assignment_info import AssignmentInfo from .dependency_handler import DependencyHandler from .dependency_node import DependencyNode import ctypes @@ -15,17 +17,26 @@ def get_module_symbols(module_name: str): return [name for name in dir(imported_module)], imported_module -def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): +def process_vmlinux_class( + node, + llvm_module, + handler: DependencyHandler, + assignments: dict[str, AssignmentInfo], +): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: vmlinux_type = getattr(imported_module, node.name) - process_vmlinux_post_ast(vmlinux_type, llvm_module, handler) + process_vmlinux_post_ast(vmlinux_type, llvm_module, handler, assignments) else: raise ImportError(f"{node.name} not in vmlinux") def process_vmlinux_post_ast( - elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None + elem_type_class, + llvm_handler, + handler: DependencyHandler, + assignments: dict[str, AssignmentInfo], + processing_stack=None, ): # Initialize processing stack on first call if processing_stack is None: @@ -46,7 +57,7 @@ def process_vmlinux_post_ast( logger.debug(f"Node {current_symbol_name} already processed and ready") return True - # XXX:Check it's use. It's probably not being used. + # XXX:Check its use. It's probably not being used. if current_symbol_name in processing_stack: logger.debug( f"Dependency already in processing stack for {current_symbol_name}, skipping" diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 965bca8..d8bd78f 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -1,6 +1,5 @@ import ast import logging -from typing import List, Tuple, Any import importlib import inspect @@ -12,7 +11,7 @@ from .class_handler import process_vmlinux_class logger = logging.getLogger(__name__) -def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]: +def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: """ Parse AST and detect import statements from vmlinux. @@ -113,7 +112,7 @@ def vmlinux_proc(tree: ast.AST, module): isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name ): - process_vmlinux_class(mod_node, module, handler) + process_vmlinux_class(mod_node, module, handler, assignments) found = True break if isinstance(mod_node, ast.Assign): @@ -148,7 +147,7 @@ def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]) value=node.value.value, pointer_level=None, signature=None, - members=None + members=None, ) logger.info( f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index 43e5c9e..c9390c8 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile +from pythonbpf import bpf, section, bpfglobal, compile_to_ir from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 From eee212795f4f5c0a70624c41dae786840afcf780 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 04:41:00 +0530 Subject: [PATCH 36/51] add assignment dict handling to class_handler.py --- pythonbpf/codegen.py | 4 +-- pythonbpf/vmlinux_parser/class_handler.py | 33 ++++++++++++++++++++--- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 078adf7..beac470 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -55,11 +55,11 @@ def processor(source_code, filename, module): for func_node in bpf_chunks: logger.info(f"Found BPF function/struct: {func_node.name}") - vmlinux_proc(tree, module) + vmlinux_symtab = vmlinux_proc(tree, module) populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) - + print("DEBUG:", vmlinux_symtab) structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 2adf76d..58168b3 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -2,9 +2,9 @@ import logging from functools import lru_cache import importlib -from .assignment_info import AssignmentInfo +from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler -from .dependency_node import DependencyNode +from .dependency_node import DependencyNode, Field import ctypes from typing import Optional, Any, Dict @@ -103,12 +103,21 @@ def process_vmlinux_post_ast( else: raise TypeError("Could not get required class and definition") + # Create a members dictionary for AssignmentInfo + members_dict: Dict[str, tuple[str, Field]] = {} + logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}") for elem in field_table.items(): elem_name, elem_temp_list = elem [elem_type, elem_bitfield_size] = elem_temp_list local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) + + # Store field reference for struct assignment info + field_ref = new_dep_node.get_field(elem_name) + if field_ref: + members_dict[elem_name] = (elem_name, field_ref) + if local_module_name == ctypes.__name__: # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) @@ -220,6 +229,7 @@ def process_vmlinux_post_ast( containing_type, llvm_handler, handler, + assignments, # Pass assignments to recursive call processing_stack, ) new_dep_node.set_field_ready(elem_name, True) @@ -237,7 +247,11 @@ def process_vmlinux_post_ast( else str(elem_type) ) process_vmlinux_post_ast( - elem_type, llvm_handler, handler, processing_stack + elem_type, + llvm_handler, + handler, + assignments, + processing_stack, ) new_dep_node.set_field_ready(elem_name, True) else: @@ -245,10 +259,21 @@ def process_vmlinux_post_ast( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" ) + # Add struct to assignments dictionary + assignments[current_symbol_name] = AssignmentInfo( + value_type=AssignmentType.STRUCT, + python_type=elem_type_class, + value=None, + pointer_level=None, + signature=None, + members=members_dict, + ) + logger.info(f"Added struct assignment info for {current_symbol_name}") + else: raise ImportError("UNSUPPORTED Module") - logging.info( + logger.info( f"{current_symbol_name} processed and handler readiness {handler.is_ready}" ) return True From a20643f3a72ab9dbca10c2d9858994ee303d89ba Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 18:41:59 +0530 Subject: [PATCH 37/51] move assignemnt tablegen to ir_generation.py --- pythonbpf/vmlinux_parser/class_handler.py | 28 ++----------------- pythonbpf/vmlinux_parser/import_detector.py | 4 +-- .../vmlinux_parser/ir_gen/ir_generation.py | 28 ++++++++++++++++++- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 58168b3..a508ff7 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -2,9 +2,8 @@ import logging from functools import lru_cache import importlib -from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler -from .dependency_node import DependencyNode, Field +from .dependency_node import DependencyNode import ctypes from typing import Optional, Any, Dict @@ -21,12 +20,11 @@ def process_vmlinux_class( node, llvm_module, handler: DependencyHandler, - assignments: dict[str, AssignmentInfo], ): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: vmlinux_type = getattr(imported_module, node.name) - process_vmlinux_post_ast(vmlinux_type, llvm_module, handler, assignments) + process_vmlinux_post_ast(vmlinux_type, llvm_module, handler) else: raise ImportError(f"{node.name} not in vmlinux") @@ -35,7 +33,6 @@ def process_vmlinux_post_ast( elem_type_class, llvm_handler, handler: DependencyHandler, - assignments: dict[str, AssignmentInfo], processing_stack=None, ): # Initialize processing stack on first call @@ -103,9 +100,6 @@ def process_vmlinux_post_ast( else: raise TypeError("Could not get required class and definition") - # Create a members dictionary for AssignmentInfo - members_dict: Dict[str, tuple[str, Field]] = {} - logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}") for elem in field_table.items(): elem_name, elem_temp_list = elem @@ -113,11 +107,6 @@ def process_vmlinux_post_ast( local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) - # Store field reference for struct assignment info - field_ref = new_dep_node.get_field(elem_name) - if field_ref: - members_dict[elem_name] = (elem_name, field_ref) - if local_module_name == ctypes.__name__: # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) @@ -229,7 +218,6 @@ def process_vmlinux_post_ast( containing_type, llvm_handler, handler, - assignments, # Pass assignments to recursive call processing_stack, ) new_dep_node.set_field_ready(elem_name, True) @@ -250,7 +238,6 @@ def process_vmlinux_post_ast( elem_type, llvm_handler, handler, - assignments, processing_stack, ) new_dep_node.set_field_ready(elem_name, True) @@ -259,17 +246,6 @@ def process_vmlinux_post_ast( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" ) - # Add struct to assignments dictionary - assignments[current_symbol_name] = AssignmentInfo( - value_type=AssignmentType.STRUCT, - python_type=elem_type_class, - value=None, - pointer_level=None, - signature=None, - members=members_dict, - ) - logger.info(f"Added struct assignment info for {current_symbol_name}") - else: raise ImportError("UNSUPPORTED Module") diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index d8bd78f..6df7a98 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -112,7 +112,7 @@ def vmlinux_proc(tree: ast.AST, module): isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name ): - process_vmlinux_class(mod_node, module, handler, assignments) + process_vmlinux_class(mod_node, module, handler) found = True break if isinstance(mod_node, ast.Assign): @@ -128,7 +128,7 @@ def vmlinux_proc(tree: ast.AST, module): f"{imported_name} not found as ClassDef or Assign in vmlinux" ) - IRGenerator(module, handler) + IRGenerator(module, handler, assignments) return assignments diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index cacd2e7..bd0adfa 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,5 +1,8 @@ import ctypes import logging + +from ..dependency_node import Field +from ..assignment_info import AssignmentInfo, AssignmentType from ..dependency_handler import DependencyHandler from .debug_info_gen import debug_info_generation from ..dependency_node import DependencyNode @@ -10,11 +13,13 @@ logger = logging.getLogger(__name__) class IRGenerator: # get the assignments dict and add this stuff to it. - def __init__(self, llvm_module, handler: DependencyHandler, assignment=None): + def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] self.generated_debug_info: list = [] + self.generated_field_names: dict[Field, str] = {} + self.assignments: dict[str, AssignmentInfo] = assignments if not handler.is_ready: raise ImportError( "Semantic analysis of vmlinux imports failed. Cannot generate IR" @@ -67,6 +72,24 @@ class IRGenerator: f"Warning: Dependency {dependency} not found in handler" ) + # Fill the assignments dictionary with struct information + if struct.name not in self.assignments: + # Create a members dictionary for AssignmentInfo + members_dict = {} + for field_name, field in struct.fields.items(): + members_dict[field_name] = (self.generated_field_names[field], field) + + # Add struct to assignments dictionary + self.assignments[struct.name] = AssignmentInfo( + value_type=AssignmentType.STRUCT, + python_type=struct.ctype_struct, + value=None, + pointer_level=None, + signature=None, + members=members_dict, + ) + logger.info(f"Added struct assignment info for {struct.name}") + # Actual processor logic here after dependencies are resolved self.generated_debug_info.append( (struct, self.gen_ir(struct, self.generated_debug_info)) @@ -98,6 +121,7 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) + self.generated_field_names[field] = field_co_re_name globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -115,6 +139,7 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) + self.generated_field_names[field] = field_co_re_name globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -125,6 +150,7 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index ) + self.generated_field_names[field] = field_co_re_name field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name From c9363e62a9a0d017a2972323d068bdce33e150de Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:27:18 +0530 Subject: [PATCH 38/51] move field name generation to assignments dict --- pythonbpf/vmlinux_parser/dependency_node.py | 38 +++++++++++++ .../vmlinux_parser/ir_gen/ir_generation.py | 57 +++++++++++++++---- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index e266761..5055b32 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -18,6 +18,44 @@ class Field: value: Any = None ready: bool = False + def __hash__(self): + """ + Create a hash based on the immutable attributes that define this field's identity. + This allows Field objects to be used as dictionary keys. + """ + # Use a tuple of the fields that uniquely identify this field + identity = ( + self.name, + id(self.type), # Use id for non-hashable types + id(self.ctype_complex_type) if self.ctype_complex_type else None, + id(self.containing_type) if self.containing_type else None, + self.type_size, + self.bitfield_size, + self.offset, + self.value if self.value else None, + ) + return hash(identity) + + def __eq__(self, other): + """ + Define equality consistent with the hash function. + Two fields are equal if they have the same name, type, and offset. + """ + # DO ther change here + if not isinstance(other, Field): + return False + + return ( + self.name == other.name + and self.type is other.type + and self.ctype_complex_type is other.ctype_complex_type + and self.containing_type is other.containing_type + and self.type_size == other.type_size + and self.bitfield_size == other.bitfield_size + and self.offset == other.offset + and self.value == other.value + ) + def set_ready(self, is_ready: bool = True) -> None: """Set the readiness state of this field.""" self.ready = is_ready diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index bd0adfa..52fd4bb 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,7 +1,6 @@ import ctypes import logging -from ..dependency_node import Field from ..assignment_info import AssignmentInfo, AssignmentType from ..dependency_handler import DependencyHandler from .debug_info_gen import debug_info_generation @@ -18,7 +17,8 @@ class IRGenerator: self.handler: DependencyHandler = handler self.generated: list[str] = [] self.generated_debug_info: list = [] - self.generated_field_names: dict[Field, str] = {} + # Use struct_name and field_name as key instead of Field object + self.generated_field_names: dict[str, dict[str, str]] = {} self.assignments: dict[str, AssignmentInfo] = assignments if not handler.is_ready: raise ImportError( @@ -72,12 +72,30 @@ class IRGenerator: f"Warning: Dependency {dependency} not found in handler" ) + # Generate IR first to populate field names + self.generated_debug_info.append( + (struct, self.gen_ir(struct, self.generated_debug_info)) + ) + # Fill the assignments dictionary with struct information if struct.name not in self.assignments: # Create a members dictionary for AssignmentInfo members_dict = {} for field_name, field in struct.fields.items(): - members_dict[field_name] = (self.generated_field_names[field], field) + # Get the generated field name from our dictionary, or use field_name if not found + if ( + struct.name in self.generated_field_names + and field_name in self.generated_field_names[struct.name] + ): + field_co_re_name = self.generated_field_names[struct.name][ + field_name + ] + members_dict[field_name] = (field_co_re_name, field) + else: + raise ValueError( + f"llvm global name not found for struct field {field_name}" + ) + # members_dict[field_name] = (field_name, field) # Add struct to assignments dictionary self.assignments[struct.name] = AssignmentInfo( @@ -90,10 +108,6 @@ class IRGenerator: ) logger.info(f"Added struct assignment info for {struct.name}") - # Actual processor logic here after dependencies are resolved - self.generated_debug_info.append( - (struct, self.gen_ir(struct, self.generated_debug_info)) - ) self.generated.append(struct.name) finally: @@ -108,6 +122,11 @@ class IRGenerator: struct, self.llvm_module, generated_debug_info ) field_index = 0 + + # Make sure the struct has an entry in our field names dictionary + if struct.name not in self.generated_field_names: + self.generated_field_names[struct.name] = {} + for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. if field.ctype_complex_type is not None and issubclass( @@ -117,11 +136,27 @@ class IRGenerator: containing_type = field.containing_type if containing_type.__module__ == ctypes.__name__: containing_type_size = ctypes.sizeof(containing_type) + if array_size == 0: + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, 0, containing_type_size + ) + self.generated_field_names[struct.name][field_name] = ( + field_co_re_name + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + field_index += 1 + continue for i in range(0, array_size): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[field] = field_co_re_name + self.generated_field_names[struct.name][field_name] = ( + field_co_re_name + ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -139,7 +174,9 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[field] = field_co_re_name + self.generated_field_names[struct.name][field_name] = ( + field_co_re_name + ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -150,7 +187,7 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index ) - self.generated_field_names[field] = field_co_re_name + self.generated_field_names[struct.name][field_name] = field_co_re_name field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name From 3b323132f04d409e3889ced6c862533ba4e92a87 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:29:04 +0530 Subject: [PATCH 39/51] change equality condition --- pythonbpf/vmlinux_parser/dependency_node.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index 5055b32..dd413ad 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -39,22 +39,9 @@ class Field: def __eq__(self, other): """ Define equality consistent with the hash function. - Two fields are equal if they have the same name, type, and offset. + Two fields are equal if they have they are the same """ - # DO ther change here - if not isinstance(other, Field): - return False - - return ( - self.name == other.name - and self.type is other.type - and self.ctype_complex_type is other.ctype_complex_type - and self.containing_type is other.containing_type - and self.type_size == other.type_size - and self.bitfield_size == other.bitfield_size - and self.offset == other.offset - and self.value == other.value - ) + return self is other def set_ready(self, is_ready: bool = True) -> None: """Set the readiness state of this field.""" From 56a2fbaf5b4c0a39cdcb144e3472ea13d368c8f6 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:36:46 +0530 Subject: [PATCH 40/51] change globvar string to real global variable --- pythonbpf/vmlinux_parser/assignment_info.py | 3 ++- .../vmlinux_parser/ir_gen/ir_generation.py | 26 +++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py index 435a7ad..465432d 100644 --- a/pythonbpf/vmlinux_parser/assignment_info.py +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -1,6 +1,7 @@ from enum import Enum, auto from typing import Any, Dict, List, Optional, TypedDict from dataclasses import dataclass +import llvmlite.ir as ir from pythonbpf.vmlinux_parser.dependency_node import Field @@ -32,4 +33,4 @@ class AssignmentInfo(TypedDict): # The key of the dict is the name of the field. # Value is a tuple that contains the global variable representing that field # along with all the information about that field as a Field type. - members: Optional[Dict[str, tuple[str, Field]]] # For structs. + members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs. diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 52fd4bb..949b4b6 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -18,7 +18,7 @@ class IRGenerator: self.generated: list[str] = [] self.generated_debug_info: list = [] # Use struct_name and field_name as key instead of Field object - self.generated_field_names: dict[str, dict[str, str]] = {} + self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {} self.assignments: dict[str, AssignmentInfo] = assignments if not handler.is_ready: raise ImportError( @@ -87,10 +87,10 @@ class IRGenerator: struct.name in self.generated_field_names and field_name in self.generated_field_names[struct.name] ): - field_co_re_name = self.generated_field_names[struct.name][ + field_global_variable = self.generated_field_names[struct.name][ field_name ] - members_dict[field_name] = (field_co_re_name, field) + members_dict[field_name] = (field_global_variable, field) else: raise ValueError( f"llvm global name not found for struct field {field_name}" @@ -140,28 +140,28 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index, True, 0, containing_type_size ) - self.generated_field_names[struct.name][field_name] = ( - field_co_re_name - ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 continue for i in range(0, array_size): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[struct.name][field_name] = ( - field_co_re_name - ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 elif field.type_size is not None: array_size = field.type_size @@ -174,26 +174,26 @@ class IRGenerator: field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[struct.name][field_name] = ( - field_co_re_name - ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 else: field_co_re_name = self._struct_name_generator( struct, field, field_index ) - self.generated_field_names[struct.name][field_name] = field_co_re_name field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar return debug_info def _struct_name_generator( From 76d0dbfbf4711729b54ee2f76794d39c6b0de894 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:36:54 +0530 Subject: [PATCH 41/51] change globvar string to real global variable --- pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 949b4b6..960671e 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -145,9 +145,7 @@ class IRGenerator: ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = ( - globvar - ) + self.generated_field_names[struct.name][field_name] = globvar field_index += 1 continue for i in range(0, array_size): @@ -159,9 +157,7 @@ class IRGenerator: ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = ( - globvar - ) + self.generated_field_names[struct.name][field_name] = globvar field_index += 1 elif field.type_size is not None: array_size = field.type_size @@ -179,9 +175,7 @@ class IRGenerator: ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = ( - globvar - ) + self.generated_field_names[struct.name][field_name] = globvar field_index += 1 else: field_co_re_name = self._struct_name_generator( From e499c29d424ab878b21341039ec601a9050276ff Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 22:13:38 +0530 Subject: [PATCH 42/51] float vmlinux_symtab till process_func_body --- pythonbpf/codegen.py | 3 +-- pythonbpf/functions/functions_pass.py | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index beac470..287fef9 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -59,10 +59,9 @@ def processor(source_code, filename, module): populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) - print("DEBUG:", vmlinux_symtab) structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) - func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) + func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab) globals_list_creation(tree, module) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 8d0bce1..6e06de7 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -311,7 +311,7 @@ def process_stmt( def process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab + module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_symtab ): """Process the body of a bpf function""" # TODO: A lot. We just have print -> bpf_trace_printk for now @@ -350,7 +350,7 @@ def process_func_body( builder.ret(ir.Constant(ir.IntType(64), 0)) -def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab): +def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab, vmlinux_symtab): """Process a single BPF chunk (function) and emit corresponding LLVM IR.""" func_name = func_node.name @@ -384,7 +384,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t builder = ir.IRBuilder(block) process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab + module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_symtab ) return func @@ -394,7 +394,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t # ============================================================================ -def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): +def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab): for func_node in chunks: if is_global_function(func_node): continue @@ -407,6 +407,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): ctypes_to_ir(infer_return_type(func_node)), map_sym_tab, structs_sym_tab, + vmlinux_symtab ) From 5955db88cfacdfda39dc6316578799c80125cbd8 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 21 Oct 2025 03:24:27 +0530 Subject: [PATCH 43/51] add vmlinux expressions to eval expr --- pythonbpf/allocation_pass.py | 10 +++ pythonbpf/codegen.py | 6 ++ pythonbpf/expr/__init__.py | 2 + pythonbpf/expr/expr_pass.py | 16 +++- pythonbpf/expr/vmlinux_registry.py | 45 ++++++++++ pythonbpf/functions/functions_pass.py | 25 ++++-- .../vmlinux_parser/vmlinux_exports_handler.py | 82 +++++++++++++++++++ .../vmlinux/simple_struct_test.py | 4 +- 8 files changed, 180 insertions(+), 10 deletions(-) create mode 100644 pythonbpf/expr/vmlinux_registry.py create mode 100644 pythonbpf/vmlinux_parser/vmlinux_exports_handler.py diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 9d82484..3149c75 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -5,6 +5,7 @@ from llvmlite import ir from dataclasses import dataclass from typing import Any from pythonbpf.helper import HelperHandlerRegistry +from .expr import VmlinuxHandlerRegistry from pythonbpf.type_deducer import ctypes_to_ir logger = logging.getLogger(__name__) @@ -49,6 +50,15 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): logger.debug(f"Variable {var_name} already allocated, skipping") return + # When allocating a variable, check if it's a vmlinux struct type + if isinstance(stmt.value, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct( + stmt.value.id + ): + # Handle vmlinux struct allocation + # This requires more implementation + print(stmt.value) + pass + # Determine type and allocate based on rval if isinstance(rval, ast.Call): _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 287fef9..e3fa5d3 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -5,6 +5,8 @@ from .functions import func_proc from .maps import maps_proc from .structs import structs_proc from .vmlinux_parser import vmlinux_proc +from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler +from .expr import VmlinuxHandlerRegistry from .globals_pass import ( globals_list_creation, globals_processing, @@ -56,6 +58,10 @@ def processor(source_code, filename, module): logger.info(f"Found BPF function/struct: {func_node.name}") vmlinux_symtab = vmlinux_proc(tree, module) + if vmlinux_symtab: + handler = VmlinuxHandler.initialize(vmlinux_symtab) + VmlinuxHandlerRegistry.set_handler(handler) + populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) diff --git a/pythonbpf/expr/__init__.py b/pythonbpf/expr/__init__.py index 3c403dd..ac3a975 100644 --- a/pythonbpf/expr/__init__.py +++ b/pythonbpf/expr/__init__.py @@ -2,6 +2,7 @@ from .expr_pass import eval_expr, handle_expr, get_operand_value from .type_normalization import convert_to_bool, get_base_type_and_depth from .ir_ops import deref_to_depth from .call_registry import CallHandlerRegistry +from .vmlinux_registry import VmlinuxHandlerRegistry __all__ = [ "eval_expr", @@ -11,4 +12,5 @@ __all__ = [ "deref_to_depth", "get_operand_value", "CallHandlerRegistry", + "VmlinuxHandlerRegistry", ] diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 8bbd524..281d3a1 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -12,6 +12,7 @@ from .type_normalization import ( get_base_type_and_depth, deref_to_depth, ) +from .vmlinux_registry import VmlinuxHandlerRegistry logger: Logger = logging.getLogger(__name__) @@ -27,8 +28,12 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder val = builder.load(var) return val, local_sym_tab[expr.id].ir_type else: - logger.info(f"Undefined variable {expr.id}") - return None + # Check if it's a vmlinux enum/constant + vmlinux_result = VmlinuxHandlerRegistry.handle_name(expr.id) + if vmlinux_result is not None: + return vmlinux_result + + raise SyntaxError(f"Undefined variable {expr.id}") def _handle_constant_expr(module, builder, expr: ast.Constant): @@ -74,6 +79,13 @@ def _handle_attribute_expr( val = builder.load(gep) field_type = metadata.field_type(attr_name) return val, field_type + + # Try vmlinux handler as fallback + vmlinux_result = VmlinuxHandlerRegistry.handle_attribute( + expr, local_sym_tab, None, builder + ) + if vmlinux_result is not None: + return vmlinux_result return None diff --git a/pythonbpf/expr/vmlinux_registry.py b/pythonbpf/expr/vmlinux_registry.py new file mode 100644 index 0000000..9e9d52e --- /dev/null +++ b/pythonbpf/expr/vmlinux_registry.py @@ -0,0 +1,45 @@ +import ast + + +class VmlinuxHandlerRegistry: + """Registry for vmlinux handler operations""" + + _handler = None + + @classmethod + def set_handler(cls, handler): + """Set the vmlinux handler""" + cls._handler = handler + + @classmethod + def get_handler(cls): + """Get the vmlinux handler""" + return cls._handler + + @classmethod + def handle_name(cls, name): + """Try to handle a name as vmlinux enum/constant""" + if cls._handler is None: + return None + return cls._handler.handle_vmlinux_enum(name) + + @classmethod + def handle_attribute(cls, expr, local_sym_tab, module, builder): + """Try to handle an attribute access as vmlinux struct field""" + if cls._handler is None: + return None + + if isinstance(expr.value, ast.Name): + var_name = expr.value.id + field_name = expr.attr + return cls._handler.handle_vmlinux_struct_field( + var_name, field_name, module, builder, local_sym_tab + ) + return None + + @classmethod + def is_vmlinux_struct(cls, name): + """Check if a name refers to a vmlinux struct""" + if cls._handler is None: + return False + return cls._handler.is_vmlinux_struct(name) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 6e06de7..e712030 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -311,7 +311,13 @@ def process_stmt( def process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_symtab + module, + builder, + func_node, + func, + ret_type, + map_sym_tab, + structs_sym_tab, ): """Process the body of a bpf function""" # TODO: A lot. We just have print -> bpf_trace_printk for now @@ -350,7 +356,9 @@ def process_func_body( builder.ret(ir.Constant(ir.IntType(64), 0)) -def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab, vmlinux_symtab): +def process_bpf_chunk( + func_node, module, return_type, map_sym_tab, structs_sym_tab +): """Process a single BPF chunk (function) and emit corresponding LLVM IR.""" func_name = func_node.name @@ -384,7 +392,13 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t builder = ir.IRBuilder(block) process_func_body( - module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab, vmlinux_symtab + module, + builder, + func_node, + func, + ret_type, + map_sym_tab, + structs_sym_tab, ) return func @@ -394,7 +408,7 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t # ============================================================================ -def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab): +def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): for func_node in chunks: if is_global_function(func_node): continue @@ -406,8 +420,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab module, ctypes_to_ir(infer_return_type(func_node)), map_sym_tab, - structs_sym_tab, - vmlinux_symtab + structs_sym_tab ) diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py new file mode 100644 index 0000000..5fa6a18 --- /dev/null +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -0,0 +1,82 @@ +import logging +from llvmlite import ir + +from pythonbpf.vmlinux_parser.assignment_info import AssignmentType + +logger = logging.getLogger(__name__) + + +class VmlinuxHandler: + """Handler for vmlinux-related operations""" + + _instance = None + + @classmethod + def get_instance(cls): + """Get the singleton instance""" + if cls._instance is None: + logger.warning("VmlinuxHandler used before initialization") + return None + return cls._instance + + @classmethod + def initialize(cls, vmlinux_symtab): + """Initialize the handler with vmlinux symbol table""" + cls._instance = cls(vmlinux_symtab) + return cls._instance + + def __init__(self, vmlinux_symtab): + """Initialize with vmlinux symbol table""" + self.vmlinux_symtab = vmlinux_symtab + logger.info( + f"VmlinuxHandler initialized with {len(vmlinux_symtab) if vmlinux_symtab else 0} symbols" + ) + + def is_vmlinux_enum(self, name): + """Check if name is a vmlinux enum constant""" + return ( + name in self.vmlinux_symtab + and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT + ) + + def is_vmlinux_struct(self, name): + """Check if name is a vmlinux struct""" + return ( + name in self.vmlinux_symtab + and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT + ) + + def handle_vmlinux_enum(self, name): + """Handle vmlinux enum constants by returning LLVM IR constants""" + if self.is_vmlinux_enum(name): + value = self.vmlinux_symtab[name]["value"] + logger.info(f"Resolving vmlinux enum {name} = {value}") + return ir.Constant(ir.IntType(64), value), ir.IntType(64) + return None + + def handle_vmlinux_struct(self, struct_name, module, builder): + """Handle vmlinux struct initializations""" + if self.is_vmlinux_struct(struct_name): + # TODO: Implement core-specific struct handling + # This will be more complex and depends on the BTF information + logger.info(f"Handling vmlinux struct {struct_name}") + # Return struct type and allocated pointer + # This is a stub, actual implementation will be more complex + return None + return None + + def handle_vmlinux_struct_field( + self, struct_var_name, field_name, module, builder, local_sym_tab + ): + """Handle access to vmlinux struct fields""" + # Check if it's a variable of vmlinux struct type + if struct_var_name in local_sym_tab: + var_info = local_sym_tab[struct_var_name] + # Need to check if this variable is a vmlinux struct + # This will depend on how you track vmlinux struct types in your symbol table + logger.info( + f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" + ) + # Return pointer to field and field type + return None + return None diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index c9390c8..c784696 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -16,8 +16,8 @@ from ctypes import c_int64 @bpf @section("tracepoint/syscalls/sys_enter_execve") def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: - print("Hello, World!") - return c_int64(0) + print("Hello, World") + return c_int64(TASK_COMM_LEN) @bpf From e7734629a55dede36def52624200ad83d28c02a3 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 21 Oct 2025 03:41:21 +0530 Subject: [PATCH 44/51] support binary ops with vmlinux enums --- pythonbpf/codegen.py | 2 +- pythonbpf/expr/expr_pass.py | 7 ++++++- tests/passing_tests/vmlinux/simple_struct_test.py | 6 ++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index e3fa5d3..e97b194 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -67,7 +67,7 @@ def processor(source_code, filename, module): globals_processing(tree, module) structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) - func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab, vmlinux_symtab) + func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) globals_list_creation(tree, module) diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 281d3a1..5e1163a 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -142,7 +142,12 @@ def get_operand_value( logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}") val = deref_to_depth(func, builder, var, depth) return val - raise ValueError(f"Undefined variable: {operand.id}") + else: + # Check if it's a vmlinux enum/constant + vmlinux_result = VmlinuxHandlerRegistry.handle_name(operand.id) + if vmlinux_result is not None: + val, _ = vmlinux_result + return val elif isinstance(operand, ast.Constant): if isinstance(operand.value, int): cst = ir.Constant(ir.IntType(64), int(operand.value)) diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index c784696..6507725 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, section, bpfglobal, compile_to_ir +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 @@ -16,7 +16,8 @@ from ctypes import c_int64 @bpf @section("tracepoint/syscalls/sys_enter_execve") def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: - print("Hello, World") + a = 2 + TASK_COMM_LEN + TASK_COMM_LEN + print(f"Hello, World{a}") return c_int64(TASK_COMM_LEN) @@ -27,3 +28,4 @@ def LICENSE() -> str: compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") +compile() From c3f3d1e564c21c2696e08dc2bcfb21e345fde35a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 21 Oct 2025 03:42:45 +0530 Subject: [PATCH 45/51] format chore --- pythonbpf/functions/functions_pass.py | 6 ++---- pythonbpf/vmlinux_parser/vmlinux_exports_handler.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index e712030..8243344 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -356,9 +356,7 @@ def process_func_body( builder.ret(ir.Constant(ir.IntType(64), 0)) -def process_bpf_chunk( - func_node, module, return_type, map_sym_tab, structs_sym_tab -): +def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab): """Process a single BPF chunk (function) and emit corresponding LLVM IR.""" func_name = func_node.name @@ -420,7 +418,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): module, ctypes_to_ir(infer_return_type(func_node)), map_sym_tab, - structs_sym_tab + structs_sym_tab, ) diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index 5fa6a18..f821520 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -71,7 +71,7 @@ class VmlinuxHandler: """Handle access to vmlinux struct fields""" # Check if it's a variable of vmlinux struct type if struct_var_name in local_sym_tab: - var_info = local_sym_tab[struct_var_name] + var_info = local_sym_tab[struct_var_name] # noqa: F841 # Need to check if this variable is a vmlinux struct # This will depend on how you track vmlinux struct types in your symbol table logger.info( From 190baf26749291b4134dd62caef78f91d0e21c8f Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 21 Oct 2025 04:10:25 +0530 Subject: [PATCH 46/51] support vmlinux enum in printk handler --- pythonbpf/helper/printk_formatter.py | 11 +++++++++++ tests/passing_tests/vmlinux/simple_struct_test.py | 9 ++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pythonbpf/helper/printk_formatter.py b/pythonbpf/helper/printk_formatter.py index e0cd669..66fcb50 100644 --- a/pythonbpf/helper/printk_formatter.py +++ b/pythonbpf/helper/printk_formatter.py @@ -3,6 +3,7 @@ import logging from llvmlite import ir from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth +from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry logger = logging.getLogger(__name__) @@ -108,6 +109,16 @@ def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab): if local_sym_tab and name_node.id in local_sym_tab: _, var_type, tmp = local_sym_tab[name_node.id] _populate_fval(var_type, name_node, fmt_parts, exprs) + else: + # Try to resolve through vmlinux registry if not in local symbol table + result = VmlinuxHandlerRegistry.handle_name(name_node.id) + if result: + val, var_type = result + _populate_fval(var_type, name_node, fmt_parts, exprs) + else: + raise ValueError( + f"Variable '{name_node.id}' not found in symbol table or vmlinux" + ) def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab): diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index 6507725..9c6d272 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,4 +1,7 @@ -from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile +import logging + +from pythonbpf import bpf, section, bpfglobal, compile_to_ir +from pythonbpf import compile # noqa: F401 from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 @@ -17,7 +20,7 @@ from ctypes import c_int64 @section("tracepoint/syscalls/sys_enter_execve") def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: a = 2 + TASK_COMM_LEN + TASK_COMM_LEN - print(f"Hello, World{a}") + print(f"Hello, World{TASK_COMM_LEN} and {a}") return c_int64(TASK_COMM_LEN) @@ -27,5 +30,5 @@ def LICENSE() -> str: return "GPL" -compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") +compile_to_ir("simple_struct_test.py", "simple_struct_test.ll", loglevel=logging.DEBUG) compile() From 1a0e21eaa8cbcf34f521f15af57990e02e5fac11 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Tue, 21 Oct 2025 04:50:34 +0530 Subject: [PATCH 47/51] support vmlinux enum in map arguments --- pythonbpf/expr/expr_pass.py | 1 + pythonbpf/maps/maps_pass.py | 11 ++++++++-- .../vmlinux_parser/vmlinux_exports_handler.py | 8 +++++++ .../vmlinux/simple_struct_test.py | 21 +++++++++++++++---- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 5e1163a..2a7cd5f 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -349,6 +349,7 @@ def _handle_unary_op( neg_one = ir.Constant(ir.IntType(64), -1) result = builder.mul(operand, neg_one) return result, ir.IntType(64) + return None # ============================================================================ diff --git a/pythonbpf/maps/maps_pass.py b/pythonbpf/maps/maps_pass.py index 8459848..85837d7 100644 --- a/pythonbpf/maps/maps_pass.py +++ b/pythonbpf/maps/maps_pass.py @@ -6,6 +6,8 @@ from llvmlite import ir from .maps_utils import MapProcessorRegistry from .map_types import BPFMapType from .map_debug_info import create_map_debug_info, create_ringbuf_debug_info +from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry + logger: Logger = logging.getLogger(__name__) @@ -51,7 +53,7 @@ def _parse_map_params(rval, expected_args=None): """Parse map parameters from call arguments and keywords.""" params = {} - + handler = VmlinuxHandlerRegistry.get_handler() # Parse positional arguments if expected_args: for i, arg_name in enumerate(expected_args): @@ -65,7 +67,12 @@ def _parse_map_params(rval, expected_args=None): # Parse keyword arguments (override positional) for keyword in rval.keywords: if isinstance(keyword.value, ast.Name): - params[keyword.arg] = keyword.value.id + name = keyword.value.id + if handler and handler.is_vmlinux_enum(name): + result = handler.get_vmlinux_enum_value(name) + params[keyword.arg] = result if result is not None else name + else: + params[keyword.arg] = name elif isinstance(keyword.value, ast.Constant): params[keyword.arg] = keyword.value.value diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index f821520..1986b44 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -54,6 +54,14 @@ class VmlinuxHandler: return ir.Constant(ir.IntType(64), value), ir.IntType(64) return None + def get_vmlinux_enum_value(self, name): + """Handle vmlinux enum constants by returning LLVM IR constants""" + if self.is_vmlinux_enum(name): + value = self.vmlinux_symtab[name]["value"] + logger.info(f"The value of vmlinux enum {name} = {value}") + return value + return None + def handle_vmlinux_struct(self, struct_name, module, builder): """Handle vmlinux struct initializations""" if self.is_vmlinux_struct(struct_name): diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index 9c6d272..97ab54a 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,13 +1,26 @@ import logging -from pythonbpf import bpf, section, bpfglobal, compile_to_ir +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, map from pythonbpf import compile # noqa: F401 from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +from ctypes import c_uint64, c_int32, c_int64 +from pythonbpf.maps import HashMap # from vmlinux import struct_uinput_device # from vmlinux import struct_blk_integrity_iter -from ctypes import c_int64 + + +@bpf +@map +def mymap() -> HashMap: + return HashMap(key=c_int32, value=c_uint64, max_entries=TASK_COMM_LEN) + + +@bpf +@map +def mymap2() -> HashMap: + return HashMap(key=c_int32, value=c_uint64, max_entries=18) # Instructions to how to run this program @@ -21,7 +34,7 @@ from ctypes import c_int64 def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: a = 2 + TASK_COMM_LEN + TASK_COMM_LEN print(f"Hello, World{TASK_COMM_LEN} and {a}") - return c_int64(TASK_COMM_LEN) + return c_int64(TASK_COMM_LEN + 2) @bpf @@ -31,4 +44,4 @@ def LICENSE() -> str: compile_to_ir("simple_struct_test.py", "simple_struct_test.ll", loglevel=logging.DEBUG) -compile() +# compile() From ba860b503956e6b0289d733cfb5ef4fcc0bf58ec Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 22 Oct 2025 02:43:22 +0530 Subject: [PATCH 48/51] add setup script --- tools/setup.sh | 198 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100755 tools/setup.sh diff --git a/tools/setup.sh b/tools/setup.sh new file mode 100755 index 0000000..39f8245 --- /dev/null +++ b/tools/setup.sh @@ -0,0 +1,198 @@ +#!/bin/bash + +echo "====================================================================" +echo " ⚠️ WARNING ⚠️" +echo " This script will run kernel-level BPF programs." +echo " BPF programs run with kernel privileges and could potentially" +echo " affect system stability if not used properly." +echo "" +echo " PLEASE REVIEW THE SOURCE CODE BEFORE RUNNING:" +echo " https://github.com/pythonbpf/python-bpf" +echo "====================================================================" +echo + +echo "This script will:" +echo "1. Check and install required dependencies (libelf, clang, python, bpftool)" +echo "2. Download example programs from the Python-BPF GitHub repository" +echo "3. Create a Python virtual environment with necessary packages" +echo "4. Start a Jupyter notebook server" +echo + +read -p "Would you like to continue? (y/N) " -n 1 -r +echo +if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Script execution cancelled." + exit 1 +fi + +if [ "$EUID" -ne 0 ]; then + echo "Please run this script with sudo." + exit 1 +fi + +WORK_DIR="/tmp/python_bpf_setup" +REAL_USER=$(logname || echo "$SUDO_USER") + +echo "Creating temporary directory: $WORK_DIR" +mkdir -p "$WORK_DIR" +cd "$WORK_DIR" || exit 1 + +if [ -f /etc/os-release ]; then + . /etc/os-release + DISTRO=$ID +else + echo "Cannot determine Linux distribution. Exiting." + exit 1 +fi + +install_dependencies() { + case $DISTRO in + ubuntu|debian|pop|mint|elementary|zorin) + echo "Detected Ubuntu/Debian-based system" + apt update + + # Check and install libelf + if ! dpkg -l libelf-dev >/dev/null 2>&1; then + echo "Installing libelf-dev..." + apt install -y libelf-dev + else + echo "libelf-dev is already installed." + fi + + # Check and install clang + if ! command -v clang >/dev/null 2>&1; then + echo "Installing clang..." + apt install -y clang + else + echo "clang is already installed." + fi + + # Check and install python + if ! command -v python3 >/dev/null 2>&1; then + echo "Installing python3..." + apt install -y python3 python3-pip python3-venv + else + echo "python3 is already installed." + fi + + # Check and install bpftool + if ! command -v bpftool >/dev/null 2>&1; then + echo "Installing bpftool..." + apt install -y linux-tools-common linux-tools-generic + + # If bpftool still not found, try installing linux-tools-$(uname -r) + if ! command -v bpftool >/dev/null 2>&1; then + KERNEL_VERSION=$(uname -r) + apt install -y linux-tools-$KERNEL_VERSION + fi + else + echo "bpftool is already installed." + fi + ;; + + arch|manjaro|endeavouros) + echo "Detected Arch-based Linux system" + + # Check and install libelf + if ! pacman -Q libelf >/dev/null 2>&1; then + echo "Installing libelf..." + pacman -S --noconfirm libelf + else + echo "libelf is already installed." + fi + + # Check and install clang + if ! command -v clang >/dev/null 2>&1; then + echo "Installing clang..." + pacman -S --noconfirm clang + else + echo "clang is already installed." + fi + + # Check and install python + if ! command -v python3 >/dev/null 2>&1; then + echo "Installing python3..." + pacman -S --noconfirm python python-pip + else + echo "python3 is already installed." + fi + + # Check and install bpftool + if ! command -v bpftool >/dev/null 2>&1; then + echo "Installing bpftool..." + pacman -S --noconfirm bpf linux-headers + else + echo "bpftool is already installed." + fi + ;; + + *) + echo "Unsupported distribution: $DISTRO" + echo "This script only supports Ubuntu/Debian and Arch Linux derivatives." + exit 1 + ;; + esac +} + +echo "Checking and installing dependencies..." +install_dependencies + +# Download example programs +echo "Downloading example programs from Python-BPF GitHub repository..." +mkdir -p examples +cd examples || exit 1 + +echo "Fetching example files list..." +FILES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/examples" | grep -o '"path":"examples/[^"]*"' | awk -F'"' '{print $4}') + +if [ -z "$FILES" ]; then + echo "Failed to fetch file list from repository. Using fallback method..." + # Fallback to downloading common example files + EXAMPLES=( + "binops_demo.py" + "blk_request.py" + "clone-matplotlib.ipynb" + "clone_plot.py" + "hello_world.py" + "kprobes.py" + "struct_and_perf.py" + "sys_sync.py" + "xdp_pass.py" + ) + + for example in "${EXAMPLES[@]}"; do + echo "Downloading: $example" + curl -s -O "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/examples/$example" + done +else + for file in $FILES; do + filename=$(basename "$file") + echo "Downloading: $filename" + curl -s -o "$filename" "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/$file" + done +fi + +cd "$WORK_DIR" || exit 1 +chown -R "$REAL_USER:$(id -gn "$REAL_USER")" . + +echo "Creating Python virtual environment..." +su - "$REAL_USER" -c "cd \"$WORK_DIR\" && python3 -m venv venv" + +echo "Installing Python packages..." +su - "$REAL_USER" -c "cd \"$WORK_DIR\" && source venv/bin/activate && pip install --upgrade pip && pip install jupyter pythonbpf pylibbpf matplotlib" + +cat > "$WORK_DIR/start_jupyter.sh" << EOF +#!/bin/bash +cd "$WORK_DIR" +source venv/bin/activate +cd examples +sudo ../venv/bin/python -m notebook --ip=0.0.0.0 --allow-root +EOF + +chmod +x "$WORK_DIR/start_jupyter.sh" +chown "$REAL_USER:$(id -gn "$REAL_USER")" "$WORK_DIR/start_jupyter.sh" + +echo "========================================================" +echo "Setup complete! To start Jupyter Notebook, run:" +echo "$ sudo $WORK_DIR/start_jupyter.sh" +echo "" From cf3f4a00024c5e8147216b22d15afcee0cc358fb Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 22 Oct 2025 03:21:05 +0530 Subject: [PATCH 49/51] make script curlable --- tools/setup.sh | 59 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/tools/setup.sh b/tools/setup.sh index 39f8245..6dc62f7 100755 --- a/tools/setup.sh +++ b/tools/setup.sh @@ -1,35 +1,36 @@ #!/bin/bash -echo "====================================================================" -echo " ⚠️ WARNING ⚠️" -echo " This script will run kernel-level BPF programs." -echo " BPF programs run with kernel privileges and could potentially" -echo " affect system stability if not used properly." -echo "" -echo " PLEASE REVIEW THE SOURCE CODE BEFORE RUNNING:" -echo " https://github.com/pythonbpf/python-bpf" -echo "====================================================================" -echo - -echo "This script will:" -echo "1. Check and install required dependencies (libelf, clang, python, bpftool)" -echo "2. Download example programs from the Python-BPF GitHub repository" -echo "3. Create a Python virtual environment with necessary packages" -echo "4. Start a Jupyter notebook server" -echo - -read -p "Would you like to continue? (y/N) " -n 1 -r -echo -if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo "Script execution cancelled." - exit 1 -fi +print_warning() { + echo -e "\033[1;33m$1\033[0m" +} +print_info() { + echo -e "\033[1;32m$1\033[0m" +} if [ "$EUID" -ne 0 ]; then echo "Please run this script with sudo." exit 1 fi +print_warning "====================================================================" +print_warning " WARNING " +print_warning " This script will run kernel-level BPF programs. " +print_warning " BPF programs run with kernel privileges and could potentially " +print_warning " affect system stability if not used properly. " +print_warning " " +print_warning " This is a non-interactive version for curl piping. " +print_warning " The script will proceed automatically with installation. " +print_warning "====================================================================" +echo + +print_info "This script will:" +echo "1. Check and install required dependencies (libelf, clang, python, bpftool)" +echo "2. Download example programs from the Python-BPF GitHub repository" +echo "3. Create a Python virtual environment with necessary packages" +echo "4. Set up a Jupyter notebook server" +echo "Starting in 5 seconds. Press Ctrl+C to cancel..." +sleep 5 + WORK_DIR="/tmp/python_bpf_setup" REAL_USER=$(logname || echo "$SUDO_USER") @@ -143,7 +144,7 @@ mkdir -p examples cd examples || exit 1 echo "Fetching example files list..." -FILES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/examples" | grep -o '"path":"examples/[^"]*"' | awk -F'"' '{print $4}') +FILES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/examples" | grep -o '"path": "examples/[^"]*"' | awk -F'"' '{print $4}') if [ -z "$FILES" ]; then echo "Failed to fetch file list from repository. Using fallback method..." @@ -192,7 +193,7 @@ EOF chmod +x "$WORK_DIR/start_jupyter.sh" chown "$REAL_USER:$(id -gn "$REAL_USER")" "$WORK_DIR/start_jupyter.sh" -echo "========================================================" -echo "Setup complete! To start Jupyter Notebook, run:" -echo "$ sudo $WORK_DIR/start_jupyter.sh" -echo "" +print_info "========================================================" +print_info "Setup complete! To start Jupyter Notebook, run:" +print_info "$ sudo $WORK_DIR/start_jupyter.sh" +print_info "========================================================" From 711e34cae13c02d30cfecf4976a1789cf3a8d1cd Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 22 Oct 2025 03:25:00 +0530 Subject: [PATCH 50/51] Add script running instruction to README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 1b4c3f8..b09d6de 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,12 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses --- +## Try It Out! +Run +```bash +curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash +``` + ## Installation Dependencies: From 0d4ebf72b6e3a89232f700709fcdb2aa85cbd407 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 22 Oct 2025 03:59:53 +0530 Subject: [PATCH 51/51] lint readme Signed-off-by: varun-r-mallya --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b09d6de..fe88f18 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses ## Try It Out! Run ```bash -curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash +curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash ``` ## Installation