24 Commits

Author SHA1 Message Date
7ae629e8f7 bump version to v0.1.5 2025-10-16 19:04:04 +05:30
dd734ea2aa Merge pull request #56 from pythonbpf/vmlinux-ir-gen
Adds IR and debug info generation capabilities for vmlinux imported structs
2025-10-16 18:59:32 +05:30
71d005b6b1 complete vmlinux struct name generation in IR.
* Breaks when it finds unions.
* Still does not support function pointers.
2025-10-16 18:58:28 +05:30
5d9a29ee8e format chore 2025-10-16 18:22:25 +05:30
041e538b53 fix errors. Does not support union name resolution yet. 2025-10-16 18:21:14 +05:30
5413cc793b something fixed itself. 2025-10-16 18:06:36 +05:30
f21837aefe support most bitfields 2025-10-16 04:13:04 +05:30
0f5c1fa752 format chore 2025-10-16 04:10:24 +05:30
de02731ea1 add support with ctypes getattr offset. Also supports bitfields.
* breaks when struct_ring_buffer_per_cpu
2025-10-16 04:08:06 +05:30
c22d85ceb8 add array field generation support 2025-10-15 23:56:04 +05:30
2b3c81affa TODO added for llvmlite attribute issue
*Refer: https://github.com/numba/llvmlite/issues/1331

Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-15 21:35:28 +05:30
8372111616 add basic IR gen strategy 2025-10-15 21:25:53 +05:30
eb4ee64ee5 Revert "float vmlinux_assignments_symtab"
This reverts commit ce7b170fea.
2025-10-15 19:11:53 +05:30
ce7b170fea float vmlinux_assignments_symtab 2025-10-15 18:19:51 +05:30
9a60dd87e3 Merge pull request #55 from pythonbpf/vmlinux-ir-gen
remove bitfield support and add assignment support
2025-10-15 18:07:27 +05:30
c499fe7421 solve static typing issues 2025-10-15 18:05:57 +05:30
8239097fbb format chore 2025-10-15 17:49:38 +05:30
a4cfc2b7aa add assignments table and offset handler 2025-10-15 17:49:20 +05:30
69b73003ca setup skeleton for offset calculation 2025-10-15 04:42:38 +05:30
11e8e72188 add base for ir gen 2025-10-15 02:00:23 +05:30
d3f0e3b2ef remove tbaa_gen and make IR generator module 2025-10-14 03:09:18 +05:30
09ba749b46 Merge pull request #52 from pythonbpf/vmlinux-ir-gen
Dependency tree functionality to semantic analyser
2025-10-14 02:37:43 +05:30
a03d3e5d4c format chore 2025-10-14 02:36:04 +05:30
e1f9ac6ba0 add dependency tree functionality 2025-10-14 02:35:49 +05:30
13 changed files with 415 additions and 45 deletions

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "pythonbpf"
version = "0.1.4"
version = "0.1.5"
description = "Reduced Python frontend for eBPF"
authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },

View File

@ -19,10 +19,20 @@ from pylibbpf import BpfProgram
import tempfile
from logging import Logger
import logging
import re
logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.4"
VERSION = "v0.1.5"
def finalize_module(original_str):
"""After all IR generation is complete, we monkey patch btf_ama attribute"""
# Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
replacement = r'\1 "btf_ama"'
return re.sub(pattern, replacement, original_str)
def find_bpf_chunks(tree):
@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
module_string = finalize_module(str(module))
logger.info(f"IR written to {output}")
with open(output, "w") as f:
f.write(f'source_filename = "{filename}"\n')
f.write(str(module))
f.write(module_string)
f.write("\n")
return output

View File

@ -60,6 +60,10 @@ def process_vmlinux_post_ast(
pass
else:
new_dep_node = DependencyNode(name=current_symbol_name)
# elem_type_class is the actual vmlinux struct/class
new_dep_node.set_ctype_struct(elem_type_class)
handler.add_node(new_dep_node)
class_obj = getattr(imported_module, current_symbol_name)
# Inspect the class fields
@ -112,7 +116,11 @@ def process_vmlinux_post_ast(
type_length = elem_type._length_
if containing_type.__module__ == "vmlinux":
pass
new_dep_node.add_dependent(
elem_type._type_.__name__
if hasattr(elem_type._type_, "__name__")
else str(elem_type._type_)
)
elif containing_type.__module__ == ctypes.__name__:
if isinstance(elem_type, type):
if issubclass(elem_type, ctypes.Array):
@ -137,10 +145,35 @@ def process_vmlinux_post_ast(
)
new_dep_node.set_field_type(elem_name, elem_type)
if containing_type.__module__ == "vmlinux":
process_vmlinux_post_ast(
containing_type, llvm_handler, handler, processing_stack
containing_type_name = (
containing_type.__name__
if hasattr(containing_type, "__name__")
else str(containing_type)
)
new_dep_node.set_field_ready(elem_name, True)
# Check for self-reference or already processed
if containing_type_name == current_symbol_name:
# Self-referential pointer
logger.debug(
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
)
new_dep_node.set_field_ready(elem_name, True)
elif handler.has_node(containing_type_name):
# Already processed
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - THIS WAS MISSING
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif containing_type.__module__ == ctypes.__name__:
logger.debug(f"Processing ctype internal{containing_type}")
new_dep_node.set_field_ready(elem_name, True)
@ -149,6 +182,11 @@ def process_vmlinux_post_ast(
"Module not supported in recursive resolution"
)
else:
new_dep_node.add_dependent(
elem_type.__name__
if hasattr(elem_type, "__name__")
else str(elem_type)
)
process_vmlinux_post_ast(
elem_type, llvm_handler, handler, processing_stack
)

View File

@ -147,3 +147,27 @@ class DependencyHandler:
int: The number of nodes
"""
return len(self._nodes)
def __getitem__(self, name: str) -> DependencyNode:
"""
Get a node by name using dictionary-style access.
Args:
name: The name of the node to retrieve
Returns:
DependencyNode: The node with the given name
Raises:
KeyError: If no node with the given name exists
Example:
node = handler["some-dep_node_name"]
"""
if name not in self._nodes:
raise KeyError(f"No node with name '{name}' found")
return self._nodes[name]
@property
def nodes(self):
return self._nodes

View File

@ -1,5 +1,6 @@
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
import ctypes
# TODO: FIX THE FUCKING TYPE NAME CONVENTION.
@ -13,6 +14,7 @@ class Field:
containing_type: Optional[Any]
type_size: Optional[int]
bitfield_size: Optional[int]
offset: int
value: Any = None
ready: bool = False
@ -60,6 +62,10 @@ class Field:
if mark_ready:
self.ready = True
def set_offset(self, offset: int) -> None:
"""Set the offset of this field"""
self.offset = offset
@dataclass
class DependencyNode:
@ -106,8 +112,11 @@ class DependencyNode:
"""
name: str
depends_on: Optional[list[str]] = None
fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False)
current_offset: int = 0
ctype_struct: Optional[Any] = field(default=None, repr=False)
def add_field(
self,
@ -119,8 +128,11 @@ class DependencyNode:
ctype_complex_type: Optional[int] = None,
bitfield_size: Optional[int] = None,
ready: bool = False,
offset: int = 0,
) -> None:
"""Add a field to the node with an optional initial value and readiness state."""
if self.depends_on is None:
self.depends_on = []
self.fields[name] = Field(
name=name,
type=field_type,
@ -130,10 +142,21 @@ class DependencyNode:
type_size=type_size,
ctype_complex_type=ctype_complex_type,
bitfield_size=bitfield_size,
offset=offset,
)
# Invalidate readiness cache
self._ready_cache = None
def set_ctype_struct(self, ctype_struct: Any) -> None:
"""Set the ctypes structure for automatic offset calculation."""
self.ctype_struct = ctype_struct
def __sizeof__(self):
# If we have a ctype_struct, use its size
if self.ctype_struct is not None:
return ctypes.sizeof(self.ctype_struct)
return self.current_offset
def get_field(self, name: str) -> Field:
"""Get a field by name."""
return self.fields[name]
@ -200,15 +223,112 @@ class DependencyNode:
# Invalidate readiness cache
self._ready_cache = None
def set_field_ready(self, name: str, is_ready: bool = False) -> None:
def set_field_ready(
self,
name: str,
is_ready: bool = False,
size_of_containing_type: Optional[int] = None,
) -> None:
"""Mark a field as ready or not ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ready(is_ready)
# Use ctypes built-in offset if available
if self.ctype_struct is not None:
try:
self.fields[name].set_offset(getattr(self.ctype_struct, name).offset)
except AttributeError:
# Fallback to manual calculation if field not found in ctype_struct
self.fields[name].set_offset(self.current_offset)
self.current_offset += self._calculate_size(
name, size_of_containing_type
)
else:
# Manual offset calculation when no ctype_struct is available
self.fields[name].set_offset(self.current_offset)
self.current_offset += self._calculate_size(name, size_of_containing_type)
# Invalidate readiness cache
self._ready_cache = None
def _calculate_size(
self, name: str, size_of_containing_type: Optional[int] = None
) -> int:
processing_field = self.fields[name]
# size_of_field will be in bytes
if processing_field.type.__module__ == ctypes.__name__:
size_of_field = ctypes.sizeof(processing_field.type)
return size_of_field
elif processing_field.type.__module__ == "vmlinux":
if processing_field.ctype_complex_type is not None:
if issubclass(processing_field.ctype_complex_type, ctypes.Array):
if processing_field.containing_type.__module__ == ctypes.__name__:
if (
processing_field.containing_type is not None
and processing_field.type_size is not None
):
size_of_field = (
ctypes.sizeof(processing_field.containing_type)
* processing_field.type_size
)
else:
raise RuntimeError(
f"{processing_field} has no containing_type or type_size"
)
return size_of_field
elif processing_field.containing_type.__module__ == "vmlinux":
if (
size_of_containing_type is not None
and processing_field.type_size is not None
):
size_of_field = (
size_of_containing_type * processing_field.type_size
)
else:
raise RuntimeError(
f"{processing_field} has no containing_type or type_size"
)
return size_of_field
elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer):
return ctypes.sizeof(ctypes.c_void_p)
else:
raise NotImplementedError(
"This subclass of ctype not supported yet"
)
elif processing_field.type_size is not None:
# Handle vmlinux types with type_size but no ctype_complex_type
# This means it's a direct vmlinux struct field (not array/pointer wrapped)
# The type_size should already contain the full size of the struct
# But if there's a containing_type from vmlinux, we need that size
if processing_field.containing_type is not None:
if processing_field.containing_type.__module__ == "vmlinux":
# For vmlinux containing types, we need the pre-calculated size
if size_of_containing_type is not None:
return size_of_containing_type * processing_field.type_size
else:
raise RuntimeError(
f"Field {name}: vmlinux containing_type requires size_of_containing_type"
)
else:
raise ModuleNotFoundError(
f"Containing type module {processing_field.containing_type.__module__} not supported"
)
else:
raise RuntimeError("Wrong type found with no containing type")
else:
# No ctype_complex_type and no type_size, must rely on size_of_containing_type
if size_of_containing_type is None:
raise RuntimeError(
f"Size of containing type {size_of_containing_type} is None"
)
return size_of_containing_type
else:
raise ModuleNotFoundError("Module is not supported for the operation")
raise RuntimeError("control should not reach here")
@property
def is_ready(self) -> bool:
"""Check if the node is ready (all fields are ready)."""
@ -235,3 +355,9 @@ class DependencyNode:
def get_not_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as not ready."""
return {name: elem for name, elem in self.fields.items() if not elem.ready}
def add_dependent(self, dep_type):
if dep_type in self.depends_on:
return
else:
self.depends_on.append(dep_type)

View File

@ -1,11 +1,11 @@
import ast
import logging
from typing import List, Tuple, Dict
from typing import List, Tuple, Any
import importlib
import inspect
from .dependency_handler import DependencyHandler
from .ir_generation import IRGenerator
from .ir_gen import IRGenerator
from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__)
@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module):
# initialise dependency handler
handler = DependencyHandler()
# initialise assignment dictionary of name to type
assignments: Dict[str, type] = {}
assignments: dict[str, tuple[type, Any]] = {}
if not import_statements:
logger.info("No vmlinux imports found")
@ -129,7 +129,19 @@ def vmlinux_proc(tree: ast.AST, module):
)
IRGenerator(module, handler)
return assignments
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
raise NotImplementedError("Assignment handling has not been implemented yet")
def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]):
# Check if this is a simple assignment with a constant value
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
target_name = node.targets[0].id
if isinstance(node.value, ast.Constant):
assignments[target_name] = (type(node.value.value), node.value.value)
logger.info(
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
)
else:
raise ValueError(f"Unsupported assignment type for {target_name}")
else:
raise ValueError("Not a simple assignment")

View File

@ -0,0 +1,3 @@
from .ir_generation import IRGenerator
__all__ = ["IRGenerator"]

View File

@ -0,0 +1,15 @@
from pythonbpf.debuginfo import DebugInfoGenerator
def debug_info_generation(struct, llvm_module):
generator = DebugInfoGenerator(llvm_module)
# this is sample debug info generation
# i64type = generator.get_uint64_type()
struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
global_var = generator.create_global_var_debug_info(
struct.name, struct_type, is_local=False
)
return global_var

View File

@ -0,0 +1,161 @@
import ctypes
import logging
from ..dependency_handler import DependencyHandler
from .debug_info_gen import debug_info_generation
from ..dependency_node import DependencyNode
import llvmlite.ir as ir
logger = logging.getLogger(__name__)
class IRGenerator:
# get the assignments dict and add this stuff to it.
def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
self.llvm_module = llvm_module
self.handler: DependencyHandler = handler
self.generated: list[str] = []
if not handler.is_ready:
raise ImportError(
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
)
for struct in handler:
self.struct_processor(struct)
def struct_processor(self, struct, processing_stack=None):
# Initialize processing stack on first call
if processing_stack is None:
processing_stack = set()
# If already generated, skip
if struct.name in self.generated:
return
# Detect circular dependency
if struct.name in processing_stack:
logger.info(
f"Circular dependency detected for {struct.name}, skipping recursive processing"
)
# For circular dependencies, we can either:
# 1. Use forward declarations (opaque pointers)
# 2. Mark as incomplete and process later
# 3. Generate a placeholder type
# Here we'll just skip and let it be processed in its own call
return
logger.info(f"IR generating for {struct.name}")
# Add to processing stack before processing dependencies
processing_stack.add(struct.name)
try:
# Process all dependencies first
if struct.depends_on is None:
pass
else:
for dependency in struct.depends_on:
if dependency not in self.generated:
# Check if dependency exists in handler
if dependency in self.handler.nodes:
dep_node_from_dependency = self.handler[dependency]
# Pass the processing_stack down to track circular refs
self.struct_processor(
dep_node_from_dependency, processing_stack
)
else:
raise RuntimeError(
f"Warning: Dependency {dependency} not found in handler"
)
# Actual processor logic here after dependencies are resolved
self.gen_ir(struct)
self.generated.append(struct.name)
finally:
# Remove from processing stack after we're done
processing_stack.discard(struct.name)
def gen_ir(self, struct):
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
# accepts our issue, we will resort to normal accessed attribute based attribute addition
# currently we generate all possible field accesses for CO-RE and put into the assignment table
debug_info = debug_info_generation(struct, self.llvm_module)
field_index = 0
for field_name, field in struct.fields.items():
# does not take arrays and similar types into consideration yet.
if field.ctype_complex_type is not None and issubclass(
field.ctype_complex_type, ctypes.Array
):
array_size = field.type_size
containing_type = field.containing_type
if containing_type.__module__ == ctypes.__name__:
containing_type_size = ctypes.sizeof(containing_type)
for i in range(0, array_size):
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
field_index += 1
elif field.type_size is not None:
array_size = field.type_size
containing_type = field.containing_type
if containing_type.__module__ == "vmlinux":
containing_type_size = self.handler[
containing_type.__name__
].current_offset
for i in range(0, array_size):
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
field_index += 1
else:
field_co_re_name = self._struct_name_generator(
struct, field, field_index
)
field_index += 1
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
def _struct_name_generator(
self,
struct: DependencyNode,
field,
field_index: int,
is_indexed: bool = False,
index: int = 0,
containing_type_size: int = 0,
) -> str:
if is_indexed:
name = (
"llvm."
+ struct.name.removeprefix("struct_")
+ f":0:{field.offset + index * containing_type_size}"
+ "$"
+ f"0:{field_index}:{index}"
)
return name
elif struct.name.startswith("struct_"):
name = (
"llvm."
+ struct.name.removeprefix("struct_")
+ f":0:{field.offset}"
+ "$"
+ f"0:{field_index}"
)
return name
else:
print(self.handler[struct.name])
raise TypeError(
"Name generation cannot occur due to type name not starting with struct"
)

View File

@ -1,14 +0,0 @@
import logging
from .dependency_handler import DependencyHandler
logger = logging.getLogger(__name__)
class IRGenerator:
def __init__(self, module, handler: DependencyHandler):
self.module = module
self.handler: DependencyHandler = handler
if not handler.is_ready:
raise ImportError(
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
)

View File

@ -1,23 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
struct trace_entry {
short unsigned int type;
unsigned char flags;
unsigned char preempt_count;
int pid;
};
struct trace_event_raw_sys_enter {
struct trace_entry ent;
long int id;
long unsigned int args[6];
char __data[0];
};
struct event {
__u32 pid;
__u32 uid;

View File

@ -1,10 +1,17 @@
from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
from pythonbpf.maps import HashMap
from pythonbpf.helper import XDP_PASS
from vmlinux import struct_xdp_md
from vmlinux import struct_xdp_buff # noqa: F401
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_qspinlock # noqa: F401
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
# from vmlinux import struct_posix_cputimers # noqa: F401
from vmlinux import struct_xdp_md
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
# from vmlinux import struct_request # noqa: F401
from ctypes import c_int64
# Instructions to how to run this program