Merge branch 'master' into bcc_examples

This commit is contained in:
varunrmallya
2025-10-22 04:09:07 +05:30
committed by GitHub
26 changed files with 1285 additions and 67 deletions

View File

@ -40,6 +40,12 @@ Python-BPF is an LLVM IR generator for eBPF programs written in Python. It uses
--- ---
## Try It Out!
Run
```bash
curl -s https://raw.githubusercontent.com/pythonbpf/Python-BPF/refs/heads/master/tools/setup.sh | sudo bash
```
## Installation ## Installation
Dependencies: Dependencies:

View File

@ -4,12 +4,26 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "pythonbpf" name = "pythonbpf"
version = "0.1.4" version = "0.1.5"
description = "Reduced Python frontend for eBPF" description = "Reduced Python frontend for eBPF"
authors = [ authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },
{ name = "varun-r-mallya", email="varunrmallya@gmail.com" } { name = "varun-r-mallya", email="varunrmallya@gmail.com" }
] ]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: System :: Operating System Kernels :: Linux",
]
readme = "README.md" readme = "README.md"
license = {text = "Apache-2.0"} license = {text = "Apache-2.0"}
requires-python = ">=3.8" requires-python = ">=3.8"

View File

@ -5,6 +5,7 @@ from llvmlite import ir
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any from typing import Any
from pythonbpf.helper import HelperHandlerRegistry from pythonbpf.helper import HelperHandlerRegistry
from .expr import VmlinuxHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -65,6 +66,15 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
logger.debug(f"Variable {var_name} already allocated, skipping") logger.debug(f"Variable {var_name} already allocated, skipping")
continue continue
# When allocating a variable, check if it's a vmlinux struct type
if isinstance(stmt.value, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(
stmt.value.id
):
# Handle vmlinux struct allocation
# This requires more implementation
print(stmt.value)
pass
# Determine type and allocate based on rval # Determine type and allocate based on rval
if isinstance(rval, ast.Call): if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
@ -85,7 +95,6 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
) )
def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab):
"""Allocate memory for variable assigned from a call.""" """Allocate memory for variable assigned from a call."""

View File

@ -5,6 +5,8 @@ from .functions import func_proc
from .maps import maps_proc from .maps import maps_proc
from .structs import structs_proc from .structs import structs_proc
from .vmlinux_parser import vmlinux_proc from .vmlinux_parser import vmlinux_proc
from pythonbpf.vmlinux_parser.vmlinux_exports_handler import VmlinuxHandler
from .expr import VmlinuxHandlerRegistry
from .globals_pass import ( from .globals_pass import (
globals_list_creation, globals_list_creation,
globals_processing, globals_processing,
@ -19,10 +21,20 @@ from pylibbpf import BpfObject
import tempfile import tempfile
from logging import Logger from logging import Logger
import logging import logging
import re
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.4" VERSION = "v0.1.5"
def finalize_module(original_str):
"""After all IR generation is complete, we monkey patch btf_ama attribute"""
# Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
replacement = r'\1 "btf_ama"'
return re.sub(pattern, replacement, original_str)
def find_bpf_chunks(tree): def find_bpf_chunks(tree):
@ -45,11 +57,14 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks: for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}") logger.info(f"Found BPF function/struct: {func_node.name}")
vmlinux_proc(tree, module) vmlinux_symtab = vmlinux_proc(tree, module)
if vmlinux_symtab:
handler = VmlinuxHandler.initialize(vmlinux_symtab)
VmlinuxHandlerRegistry.set_handler(handler)
populate_global_symbol_table(tree, module) populate_global_symbol_table(tree, module)
license_processing(tree, module) license_processing(tree, module)
globals_processing(tree, module) globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks) structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
@ -122,10 +137,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
module_string = finalize_module(str(module))
logger.info(f"IR written to {output}") logger.info(f"IR written to {output}")
with open(output, "w") as f: with open(output, "w") as f:
f.write(f'source_filename = "{filename}"\n') f.write(f'source_filename = "{filename}"\n')
f.write(str(module)) f.write(module_string)
f.write("\n") f.write("\n")
return output, structs_sym_tab, maps_sym_tab return output, structs_sym_tab, maps_sym_tab

View File

@ -81,6 +81,20 @@ class DebugInfoGenerator:
}, },
) )
def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any:
"""Create an array type of the given base type with specified count"""
base_type, type_sizing = type_info
subrange = self.module.add_debug_info("DISubrange", {"count": count})
return self.module.add_debug_info(
"DICompositeType",
{
"tag": dc.DW_TAG_array_type,
"baseType": base_type,
"size": type_sizing,
"elements": [subrange],
},
)
@staticmethod @staticmethod
def _compute_array_size(base_type: Any, count: int) -> int: def _compute_array_size(base_type: Any, count: int) -> int:
# Extract size from base_type if possible # Extract size from base_type if possible
@ -101,6 +115,23 @@ class DebugInfoGenerator:
}, },
) )
def create_struct_member_vmlinux(
self, name: str, base_type_with_size: Any, offset: int
) -> Any:
"""Create a struct member with the given name, type, and offset"""
base_type, type_size = base_type_with_size
return self.module.add_debug_info(
"DIDerivedType",
{
"tag": dc.DW_TAG_member,
"name": name,
"file": self.module._file_metadata,
"baseType": base_type,
"size": type_size,
"offset": offset,
},
)
def create_struct_type( def create_struct_type(
self, members: List[Any], size: int, is_distinct: bool self, members: List[Any], size: int, is_distinct: bool
) -> Any: ) -> Any:
@ -116,6 +147,22 @@ class DebugInfoGenerator:
is_distinct=is_distinct, is_distinct=is_distinct,
) )
def create_struct_type_with_name(
self, name: str, members: List[Any], size: int, is_distinct: bool
) -> Any:
"""Create a struct type with the given members and size"""
return self.module.add_debug_info(
"DICompositeType",
{
"name": name,
"tag": dc.DW_TAG_structure_type,
"file": self.module._file_metadata,
"size": size,
"elements": members,
},
is_distinct=is_distinct,
)
def create_global_var_debug_info( def create_global_var_debug_info(
self, name: str, var_type: Any, is_local: bool = False self, name: str, var_type: Any, is_local: bool = False
) -> Any: ) -> Any:

View File

@ -2,6 +2,7 @@ from .expr_pass import eval_expr, handle_expr, get_operand_value
from .type_normalization import convert_to_bool, get_base_type_and_depth from .type_normalization import convert_to_bool, get_base_type_and_depth
from .ir_ops import deref_to_depth from .ir_ops import deref_to_depth
from .call_registry import CallHandlerRegistry from .call_registry import CallHandlerRegistry
from .vmlinux_registry import VmlinuxHandlerRegistry
__all__ = [ __all__ = [
"eval_expr", "eval_expr",
@ -11,4 +12,5 @@ __all__ = [
"deref_to_depth", "deref_to_depth",
"get_operand_value", "get_operand_value",
"CallHandlerRegistry", "CallHandlerRegistry",
"VmlinuxHandlerRegistry",
] ]

View File

@ -12,6 +12,7 @@ from .type_normalization import (
get_base_type_and_depth, get_base_type_and_depth,
deref_to_depth, deref_to_depth,
) )
from .vmlinux_registry import VmlinuxHandlerRegistry
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -27,8 +28,12 @@ def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder
val = builder.load(var) val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type return val, local_sym_tab[expr.id].ir_type
else: else:
logger.info(f"Undefined variable {expr.id}") # Check if it's a vmlinux enum/constant
return None vmlinux_result = VmlinuxHandlerRegistry.handle_name(expr.id)
if vmlinux_result is not None:
return vmlinux_result
raise SyntaxError(f"Undefined variable {expr.id}")
def _handle_constant_expr(module, builder, expr: ast.Constant): def _handle_constant_expr(module, builder, expr: ast.Constant):
@ -74,6 +79,13 @@ def _handle_attribute_expr(
val = builder.load(gep) val = builder.load(gep)
field_type = metadata.field_type(attr_name) field_type = metadata.field_type(attr_name)
return val, field_type return val, field_type
# Try vmlinux handler as fallback
vmlinux_result = VmlinuxHandlerRegistry.handle_attribute(
expr, local_sym_tab, None, builder
)
if vmlinux_result is not None:
return vmlinux_result
return None return None
@ -130,7 +142,12 @@ def get_operand_value(
logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}") logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}")
val = deref_to_depth(func, builder, var, depth) val = deref_to_depth(func, builder, var, depth)
return val return val
raise ValueError(f"Undefined variable: {operand.id}") else:
# Check if it's a vmlinux enum/constant
vmlinux_result = VmlinuxHandlerRegistry.handle_name(operand.id)
if vmlinux_result is not None:
val, _ = vmlinux_result
return val
elif isinstance(operand, ast.Constant): elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int): if isinstance(operand.value, int):
cst = ir.Constant(ir.IntType(64), int(operand.value)) cst = ir.Constant(ir.IntType(64), int(operand.value))
@ -332,6 +349,7 @@ def _handle_unary_op(
neg_one = ir.Constant(ir.IntType(64), -1) neg_one = ir.Constant(ir.IntType(64), -1)
result = builder.mul(operand, neg_one) result = builder.mul(operand, neg_one)
return result, ir.IntType(64) return result, ir.IntType(64)
return None
# ============================================================================ # ============================================================================

View File

@ -0,0 +1,45 @@
import ast
class VmlinuxHandlerRegistry:
"""Registry for vmlinux handler operations"""
_handler = None
@classmethod
def set_handler(cls, handler):
"""Set the vmlinux handler"""
cls._handler = handler
@classmethod
def get_handler(cls):
"""Get the vmlinux handler"""
return cls._handler
@classmethod
def handle_name(cls, name):
"""Try to handle a name as vmlinux enum/constant"""
if cls._handler is None:
return None
return cls._handler.handle_vmlinux_enum(name)
@classmethod
def handle_attribute(cls, expr, local_sym_tab, module, builder):
"""Try to handle an attribute access as vmlinux struct field"""
if cls._handler is None:
return None
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
field_name = expr.attr
return cls._handler.handle_vmlinux_struct_field(
var_name, field_name, module, builder, local_sym_tab
)
return None
@classmethod
def is_vmlinux_struct(cls, name):
"""Check if a name refers to a vmlinux struct"""
if cls._handler is None:
return False
return cls._handler.is_vmlinux_struct(name)

View File

@ -310,7 +310,13 @@ def process_stmt(
def process_func_body( def process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab module,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
): ):
"""Process the body of a bpf function""" """Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now # TODO: A lot. We just have print -> bpf_trace_printk for now
@ -383,7 +389,13 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
builder = ir.IRBuilder(block) builder = ir.IRBuilder(block)
process_func_body( process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab module,
builder,
func_node,
func,
ret_type,
map_sym_tab,
structs_sym_tab,
) )
return func return func

View File

@ -3,6 +3,7 @@ import logging
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth
from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -108,6 +109,16 @@ def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab):
if local_sym_tab and name_node.id in local_sym_tab: if local_sym_tab and name_node.id in local_sym_tab:
_, var_type, tmp = local_sym_tab[name_node.id] _, var_type, tmp = local_sym_tab[name_node.id]
_populate_fval(var_type, name_node, fmt_parts, exprs) _populate_fval(var_type, name_node, fmt_parts, exprs)
else:
# Try to resolve through vmlinux registry if not in local symbol table
result = VmlinuxHandlerRegistry.handle_name(name_node.id)
if result:
val, var_type = result
_populate_fval(var_type, name_node, fmt_parts, exprs)
else:
raise ValueError(
f"Variable '{name_node.id}' not found in symbol table or vmlinux"
)
def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab): def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab):

View File

@ -6,6 +6,8 @@ from llvmlite import ir
from .maps_utils import MapProcessorRegistry from .maps_utils import MapProcessorRegistry
from .map_types import BPFMapType from .map_types import BPFMapType
from .map_debug_info import create_map_debug_info, create_ringbuf_debug_info from .map_debug_info import create_map_debug_info, create_ringbuf_debug_info
from pythonbpf.expr.vmlinux_registry import VmlinuxHandlerRegistry
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -51,7 +53,7 @@ def _parse_map_params(rval, expected_args=None):
"""Parse map parameters from call arguments and keywords.""" """Parse map parameters from call arguments and keywords."""
params = {} params = {}
handler = VmlinuxHandlerRegistry.get_handler()
# Parse positional arguments # Parse positional arguments
if expected_args: if expected_args:
for i, arg_name in enumerate(expected_args): for i, arg_name in enumerate(expected_args):
@ -65,7 +67,12 @@ def _parse_map_params(rval, expected_args=None):
# Parse keyword arguments (override positional) # Parse keyword arguments (override positional)
for keyword in rval.keywords: for keyword in rval.keywords:
if isinstance(keyword.value, ast.Name): if isinstance(keyword.value, ast.Name):
params[keyword.arg] = keyword.value.id name = keyword.value.id
if handler and handler.is_vmlinux_enum(name):
result = handler.get_vmlinux_enum_value(name)
params[keyword.arg] = result if result is not None else name
else:
params[keyword.arg] = name
elif isinstance(keyword.value, ast.Constant): elif isinstance(keyword.value, ast.Constant):
params[keyword.arg] = keyword.value.value params[keyword.arg] = keyword.value.value

View File

@ -0,0 +1,36 @@
from enum import Enum, auto
from typing import Any, Dict, List, Optional, TypedDict
from dataclasses import dataclass
import llvmlite.ir as ir
from pythonbpf.vmlinux_parser.dependency_node import Field
@dataclass
class AssignmentType(Enum):
CONSTANT = auto()
STRUCT = auto()
ARRAY = auto() # probably won't be used
FUNCTION_POINTER = auto()
POINTER = auto() # again, probably won't be used
@dataclass
class FunctionSignature(TypedDict):
return_type: str
param_types: List[str]
varargs: bool
# Thew name of the assignment will be in the dict that uses this class
@dataclass
class AssignmentInfo(TypedDict):
value_type: AssignmentType
python_type: type
value: Optional[Any]
pointer_level: Optional[int]
signature: Optional[FunctionSignature] # For function pointers
# The key of the dict is the name of the field.
# Value is a tuple that contains the global variable representing that field
# along with all the information about that field as a Field type.
members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs.

View File

@ -1,6 +1,7 @@
import logging import logging
from functools import lru_cache from functools import lru_cache
import importlib import importlib
from .dependency_handler import DependencyHandler from .dependency_handler import DependencyHandler
from .dependency_node import DependencyNode from .dependency_node import DependencyNode
import ctypes import ctypes
@ -15,7 +16,11 @@ def get_module_symbols(module_name: str):
return [name for name in dir(imported_module)], imported_module return [name for name in dir(imported_module)], imported_module
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): def process_vmlinux_class(
node,
llvm_module,
handler: DependencyHandler,
):
symbols_in_module, imported_module = get_module_symbols("vmlinux") symbols_in_module, imported_module = get_module_symbols("vmlinux")
if node.name in symbols_in_module: if node.name in symbols_in_module:
vmlinux_type = getattr(imported_module, node.name) vmlinux_type = getattr(imported_module, node.name)
@ -25,7 +30,10 @@ def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
def process_vmlinux_post_ast( def process_vmlinux_post_ast(
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None elem_type_class,
llvm_handler,
handler: DependencyHandler,
processing_stack=None,
): ):
# Initialize processing stack on first call # Initialize processing stack on first call
if processing_stack is None: if processing_stack is None:
@ -46,7 +54,7 @@ def process_vmlinux_post_ast(
logger.debug(f"Node {current_symbol_name} already processed and ready") logger.debug(f"Node {current_symbol_name} already processed and ready")
return True return True
# XXX:Check it's use. It's probably not being used. # XXX:Check its use. It's probably not being used.
if current_symbol_name in processing_stack: if current_symbol_name in processing_stack:
logger.debug( logger.debug(
f"Dependency already in processing stack for {current_symbol_name}, skipping" f"Dependency already in processing stack for {current_symbol_name}, skipping"
@ -60,6 +68,10 @@ def process_vmlinux_post_ast(
pass pass
else: else:
new_dep_node = DependencyNode(name=current_symbol_name) new_dep_node = DependencyNode(name=current_symbol_name)
# elem_type_class is the actual vmlinux struct/class
new_dep_node.set_ctype_struct(elem_type_class)
handler.add_node(new_dep_node) handler.add_node(new_dep_node)
class_obj = getattr(imported_module, current_symbol_name) class_obj = getattr(imported_module, current_symbol_name)
# Inspect the class fields # Inspect the class fields
@ -94,12 +106,47 @@ def process_vmlinux_post_ast(
[elem_type, elem_bitfield_size] = elem_temp_list [elem_type, elem_bitfield_size] = elem_temp_list
local_module_name = getattr(elem_type, "__module__", None) local_module_name = getattr(elem_type, "__module__", None)
new_dep_node.add_field(elem_name, elem_type, ready=False) new_dep_node.add_field(elem_name, elem_type, ready=False)
if local_module_name == ctypes.__name__: if local_module_name == ctypes.__name__:
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.debug( # Process pointer to ctype
f"Field {elem_name} is direct ctypes type: {elem_type}" if isinstance(elem_type, type) and issubclass(
) elem_type, ctypes._Pointer
):
# Get the pointed-to type
pointed_type = elem_type._type_
logger.debug(f"Found pointer to type: {pointed_type}")
new_dep_node.set_field_containing_type(elem_name, pointed_type)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctypes._Pointer
)
new_dep_node.set_field_ready(elem_name, is_ready=True)
# Process function pointers (CFUNCTYPE)
elif hasattr(elem_type, "_restype_") and hasattr(
elem_type, "_argtypes_"
):
# This is a CFUNCTYPE or similar
logger.info(
f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}"
)
# Set the field as ready but mark it with special handling
new_dep_node.set_field_ctype_complex_type(
elem_name, ctypes.CFUNCTYPE
)
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.warning(
"Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported"
)
else:
# Regular ctype
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.debug(
f"Field {elem_name} is direct ctypes type: {elem_type}"
)
elif local_module_name == "vmlinux": elif local_module_name == "vmlinux":
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
logger.debug( logger.debug(
@ -112,13 +159,21 @@ def process_vmlinux_post_ast(
type_length = elem_type._length_ type_length = elem_type._length_
if containing_type.__module__ == "vmlinux": if containing_type.__module__ == "vmlinux":
pass new_dep_node.add_dependent(
elem_type._type_.__name__
if hasattr(elem_type._type_, "__name__")
else str(elem_type._type_)
)
elif containing_type.__module__ == ctypes.__name__: elif containing_type.__module__ == ctypes.__name__:
if isinstance(elem_type, type): if isinstance(elem_type, type):
if issubclass(elem_type, ctypes.Array): if issubclass(elem_type, ctypes.Array):
ctype_complex_type = ctypes.Array ctype_complex_type = ctypes.Array
elif issubclass(elem_type, ctypes._Pointer): elif issubclass(elem_type, ctypes._Pointer):
ctype_complex_type = ctypes._Pointer ctype_complex_type = ctypes._Pointer
else:
raise ImportError(
"Non Array and Pointer type ctype imports not supported in current version"
)
else: else:
raise TypeError("Unsupported ctypes subclass") raise TypeError("Unsupported ctypes subclass")
else: else:
@ -137,10 +192,35 @@ def process_vmlinux_post_ast(
) )
new_dep_node.set_field_type(elem_name, elem_type) new_dep_node.set_field_type(elem_name, elem_type)
if containing_type.__module__ == "vmlinux": if containing_type.__module__ == "vmlinux":
process_vmlinux_post_ast( containing_type_name = (
containing_type, llvm_handler, handler, processing_stack containing_type.__name__
if hasattr(containing_type, "__name__")
else str(containing_type)
) )
new_dep_node.set_field_ready(elem_name, True)
# Check for self-reference or already processed
if containing_type_name == current_symbol_name:
# Self-referential pointer
logger.debug(
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
)
new_dep_node.set_field_ready(elem_name, True)
elif handler.has_node(containing_type_name):
# Already processed
logger.debug(
f"Reusing already processed {containing_type_name}"
)
new_dep_node.set_field_ready(elem_name, True)
else:
# Process recursively - THIS WAS MISSING
new_dep_node.add_dependent(containing_type_name)
process_vmlinux_post_ast(
containing_type,
llvm_handler,
handler,
processing_stack,
)
new_dep_node.set_field_ready(elem_name, True)
elif containing_type.__module__ == ctypes.__name__: elif containing_type.__module__ == ctypes.__name__:
logger.debug(f"Processing ctype internal{containing_type}") logger.debug(f"Processing ctype internal{containing_type}")
new_dep_node.set_field_ready(elem_name, True) new_dep_node.set_field_ready(elem_name, True)
@ -149,8 +229,16 @@ def process_vmlinux_post_ast(
"Module not supported in recursive resolution" "Module not supported in recursive resolution"
) )
else: else:
new_dep_node.add_dependent(
elem_type.__name__
if hasattr(elem_type, "__name__")
else str(elem_type)
)
process_vmlinux_post_ast( process_vmlinux_post_ast(
elem_type, llvm_handler, handler, processing_stack elem_type,
llvm_handler,
handler,
processing_stack,
) )
new_dep_node.set_field_ready(elem_name, True) new_dep_node.set_field_ready(elem_name, True)
else: else:
@ -161,7 +249,7 @@ def process_vmlinux_post_ast(
else: else:
raise ImportError("UNSUPPORTED Module") raise ImportError("UNSUPPORTED Module")
logging.info( logger.info(
f"{current_symbol_name} processed and handler readiness {handler.is_ready}" f"{current_symbol_name} processed and handler readiness {handler.is_ready}"
) )
return True return True

View File

@ -147,3 +147,27 @@ class DependencyHandler:
int: The number of nodes int: The number of nodes
""" """
return len(self._nodes) return len(self._nodes)
def __getitem__(self, name: str) -> DependencyNode:
"""
Get a node by name using dictionary-style access.
Args:
name: The name of the node to retrieve
Returns:
DependencyNode: The node with the given name
Raises:
KeyError: If no node with the given name exists
Example:
node = handler["some-dep_node_name"]
"""
if name not in self._nodes:
raise KeyError(f"No node with name '{name}' found")
return self._nodes[name]
@property
def nodes(self):
return self._nodes

View File

@ -1,5 +1,6 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
import ctypes
# TODO: FIX THE FUCKING TYPE NAME CONVENTION. # TODO: FIX THE FUCKING TYPE NAME CONVENTION.
@ -13,9 +14,35 @@ class Field:
containing_type: Optional[Any] containing_type: Optional[Any]
type_size: Optional[int] type_size: Optional[int]
bitfield_size: Optional[int] bitfield_size: Optional[int]
offset: int
value: Any = None value: Any = None
ready: bool = False ready: bool = False
def __hash__(self):
"""
Create a hash based on the immutable attributes that define this field's identity.
This allows Field objects to be used as dictionary keys.
"""
# Use a tuple of the fields that uniquely identify this field
identity = (
self.name,
id(self.type), # Use id for non-hashable types
id(self.ctype_complex_type) if self.ctype_complex_type else None,
id(self.containing_type) if self.containing_type else None,
self.type_size,
self.bitfield_size,
self.offset,
self.value if self.value else None,
)
return hash(identity)
def __eq__(self, other):
"""
Define equality consistent with the hash function.
Two fields are equal if they have they are the same
"""
return self is other
def set_ready(self, is_ready: bool = True) -> None: def set_ready(self, is_ready: bool = True) -> None:
"""Set the readiness state of this field.""" """Set the readiness state of this field."""
self.ready = is_ready self.ready = is_ready
@ -60,6 +87,10 @@ class Field:
if mark_ready: if mark_ready:
self.ready = True self.ready = True
def set_offset(self, offset: int) -> None:
"""Set the offset of this field"""
self.offset = offset
@dataclass @dataclass
class DependencyNode: class DependencyNode:
@ -106,8 +137,11 @@ class DependencyNode:
""" """
name: str name: str
depends_on: Optional[list[str]] = None
fields: Dict[str, Field] = field(default_factory=dict) fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False) _ready_cache: Optional[bool] = field(default=None, repr=False)
current_offset: int = 0
ctype_struct: Optional[Any] = field(default=None, repr=False)
def add_field( def add_field(
self, self,
@ -119,8 +153,11 @@ class DependencyNode:
ctype_complex_type: Optional[int] = None, ctype_complex_type: Optional[int] = None,
bitfield_size: Optional[int] = None, bitfield_size: Optional[int] = None,
ready: bool = False, ready: bool = False,
offset: int = 0,
) -> None: ) -> None:
"""Add a field to the node with an optional initial value and readiness state.""" """Add a field to the node with an optional initial value and readiness state."""
if self.depends_on is None:
self.depends_on = []
self.fields[name] = Field( self.fields[name] = Field(
name=name, name=name,
type=field_type, type=field_type,
@ -130,10 +167,21 @@ class DependencyNode:
type_size=type_size, type_size=type_size,
ctype_complex_type=ctype_complex_type, ctype_complex_type=ctype_complex_type,
bitfield_size=bitfield_size, bitfield_size=bitfield_size,
offset=offset,
) )
# Invalidate readiness cache # Invalidate readiness cache
self._ready_cache = None self._ready_cache = None
def set_ctype_struct(self, ctype_struct: Any) -> None:
"""Set the ctypes structure for automatic offset calculation."""
self.ctype_struct = ctype_struct
def __sizeof__(self):
# If we have a ctype_struct, use its size
if self.ctype_struct is not None:
return ctypes.sizeof(self.ctype_struct)
return self.current_offset
def get_field(self, name: str) -> Field: def get_field(self, name: str) -> Field:
"""Get a field by name.""" """Get a field by name."""
return self.fields[name] return self.fields[name]
@ -200,15 +248,112 @@ class DependencyNode:
# Invalidate readiness cache # Invalidate readiness cache
self._ready_cache = None self._ready_cache = None
def set_field_ready(self, name: str, is_ready: bool = False) -> None: def set_field_ready(
self,
name: str,
is_ready: bool = False,
size_of_containing_type: Optional[int] = None,
) -> None:
"""Mark a field as ready or not ready.""" """Mark a field as ready or not ready."""
if name not in self.fields: if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ready(is_ready) self.fields[name].set_ready(is_ready)
# Use ctypes built-in offset if available
if self.ctype_struct is not None:
try:
self.fields[name].set_offset(getattr(self.ctype_struct, name).offset)
except AttributeError:
# Fallback to manual calculation if field not found in ctype_struct
self.fields[name].set_offset(self.current_offset)
self.current_offset += self._calculate_size(
name, size_of_containing_type
)
else:
# Manual offset calculation when no ctype_struct is available
self.fields[name].set_offset(self.current_offset)
self.current_offset += self._calculate_size(name, size_of_containing_type)
# Invalidate readiness cache # Invalidate readiness cache
self._ready_cache = None self._ready_cache = None
def _calculate_size(
self, name: str, size_of_containing_type: Optional[int] = None
) -> int:
processing_field = self.fields[name]
# size_of_field will be in bytes
if processing_field.type.__module__ == ctypes.__name__:
size_of_field = ctypes.sizeof(processing_field.type)
return size_of_field
elif processing_field.type.__module__ == "vmlinux":
if processing_field.ctype_complex_type is not None:
if issubclass(processing_field.ctype_complex_type, ctypes.Array):
if processing_field.containing_type.__module__ == ctypes.__name__:
if (
processing_field.containing_type is not None
and processing_field.type_size is not None
):
size_of_field = (
ctypes.sizeof(processing_field.containing_type)
* processing_field.type_size
)
else:
raise RuntimeError(
f"{processing_field} has no containing_type or type_size"
)
return size_of_field
elif processing_field.containing_type.__module__ == "vmlinux":
if (
size_of_containing_type is not None
and processing_field.type_size is not None
):
size_of_field = (
size_of_containing_type * processing_field.type_size
)
else:
raise RuntimeError(
f"{processing_field} has no containing_type or type_size"
)
return size_of_field
elif issubclass(processing_field.ctype_complex_type, ctypes._Pointer):
return ctypes.sizeof(ctypes.c_void_p)
else:
raise NotImplementedError(
"This subclass of ctype not supported yet"
)
elif processing_field.type_size is not None:
# Handle vmlinux types with type_size but no ctype_complex_type
# This means it's a direct vmlinux struct field (not array/pointer wrapped)
# The type_size should already contain the full size of the struct
# But if there's a containing_type from vmlinux, we need that size
if processing_field.containing_type is not None:
if processing_field.containing_type.__module__ == "vmlinux":
# For vmlinux containing types, we need the pre-calculated size
if size_of_containing_type is not None:
return size_of_containing_type * processing_field.type_size
else:
raise RuntimeError(
f"Field {name}: vmlinux containing_type requires size_of_containing_type"
)
else:
raise ModuleNotFoundError(
f"Containing type module {processing_field.containing_type.__module__} not supported"
)
else:
raise RuntimeError("Wrong type found with no containing type")
else:
# No ctype_complex_type and no type_size, must rely on size_of_containing_type
if size_of_containing_type is None:
raise RuntimeError(
f"Size of containing type {size_of_containing_type} is None"
)
return size_of_containing_type
else:
raise ModuleNotFoundError("Module is not supported for the operation")
raise RuntimeError("control should not reach here")
@property @property
def is_ready(self) -> bool: def is_ready(self) -> bool:
"""Check if the node is ready (all fields are ready).""" """Check if the node is ready (all fields are ready)."""
@ -235,3 +380,9 @@ class DependencyNode:
def get_not_ready_fields(self) -> Dict[str, Field]: def get_not_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as not ready.""" """Get all fields that are marked as not ready."""
return {name: elem for name, elem in self.fields.items() if not elem.ready} return {name: elem for name, elem in self.fields.items() if not elem.ready}
def add_dependent(self, dep_type):
if dep_type in self.depends_on:
return
else:
self.depends_on.append(dep_type)

View File

@ -1,17 +1,17 @@
import ast import ast
import logging import logging
from typing import List, Tuple, Dict
import importlib import importlib
import inspect import inspect
from .assignment_info import AssignmentInfo, AssignmentType
from .dependency_handler import DependencyHandler from .dependency_handler import DependencyHandler
from .ir_generation import IRGenerator from .ir_gen import IRGenerator
from .class_handler import process_vmlinux_class from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]: def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]:
""" """
Parse AST and detect import statements from vmlinux. Parse AST and detect import statements from vmlinux.
@ -82,7 +82,7 @@ def vmlinux_proc(tree: ast.AST, module):
# initialise dependency handler # initialise dependency handler
handler = DependencyHandler() handler = DependencyHandler()
# initialise assignment dictionary of name to type # initialise assignment dictionary of name to type
assignments: Dict[str, type] = {} assignments: dict[str, AssignmentInfo] = {}
if not import_statements: if not import_statements:
logger.info("No vmlinux imports found") logger.info("No vmlinux imports found")
@ -128,8 +128,35 @@ def vmlinux_proc(tree: ast.AST, module):
f"{imported_name} not found as ClassDef or Assign in vmlinux" f"{imported_name} not found as ClassDef or Assign in vmlinux"
) )
IRGenerator(module, handler) IRGenerator(module, handler, assignments)
return assignments
def process_vmlinux_assign(node, module, assignments: Dict[str, type]): def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]):
raise NotImplementedError("Assignment handling has not been implemented yet") """Process assignments from vmlinux module."""
# Only handle single-target assignments
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
target_name = node.targets[0].id
# Handle constant value assignments
if isinstance(node.value, ast.Constant):
# Fixed: using proper TypedDict creation syntax with named arguments
assignments[target_name] = AssignmentInfo(
value_type=AssignmentType.CONSTANT,
python_type=type(node.value.value),
value=node.value.value,
pointer_level=None,
signature=None,
members=None,
)
logger.info(
f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}"
)
# Handle other assignment types that we may need to support
else:
logger.warning(
f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}"
)
else:
raise ValueError("Not a simple assignment")

View File

@ -0,0 +1,3 @@
from .ir_generation import IRGenerator
__all__ = ["IRGenerator"]

View File

@ -0,0 +1,161 @@
from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc
from ..dependency_node import DependencyNode
import ctypes
import logging
from typing import List, Any, Tuple
logger = logging.getLogger(__name__)
def debug_info_generation(
struct: DependencyNode,
llvm_module,
generated_debug_info: List[Tuple[DependencyNode, Any]],
) -> Any:
"""
Generate DWARF debug information for a struct defined in a DependencyNode.
Args:
struct: The dependency node containing struct information
llvm_module: The LLVM module to add debug info to
generated_debug_info: List of tuples (struct, debug_info) to track generated debug info
Returns:
The generated global variable debug info
"""
# Set up debug info generator
generator = DebugInfoGenerator(llvm_module)
# Check if debug info for this struct has already been generated
for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct.name:
return debug_info
# Process all fields and create members for the struct
members = []
for field_name, field in struct.fields.items():
# Get appropriate debug type for this field
field_type = _get_field_debug_type(
field_name, field, generator, struct, generated_debug_info
)
# Create struct member with proper offset
member = generator.create_struct_member_vmlinux(
field_name, field_type, field.offset * 8
)
members.append(member)
if struct.name.startswith("struct_"):
struct_name = struct.name.removeprefix("struct_")
else:
raise ValueError("Unions are not supported in the current version")
# Create struct type with all members
struct_type = generator.create_struct_type_with_name(
struct_name, members, struct.__sizeof__() * 8, is_distinct=True
)
return struct_type
def _get_field_debug_type(
field_name: str,
field,
generator: DebugInfoGenerator,
parent_struct: DependencyNode,
generated_debug_info: List[Tuple[DependencyNode, Any]],
) -> tuple[Any, int]:
"""
Determine the appropriate debug type for a field based on its Python/ctypes type.
Args:
field_name: Name of the field
field: Field object containing type information
generator: DebugInfoGenerator instance
parent_struct: The parent struct containing this field
generated_debug_info: List of already generated debug info
Returns:
The debug info type for this field
"""
# Handle complex types (arrays, pointers)
if field.ctype_complex_type is not None:
if issubclass(field.ctype_complex_type, ctypes.Array):
# Handle array types
element_type, base_type_size = _get_basic_debug_type(
field.containing_type, generator
)
return generator.create_array_type_vmlinux(
(element_type, base_type_size * field.type_size), field.type_size
), field.type_size * base_type_size
elif issubclass(field.ctype_complex_type, ctypes._Pointer):
# Handle pointer types
pointee_type, _ = _get_basic_debug_type(field.containing_type, generator)
return generator.create_pointer_type(pointee_type), 64
# Handle other vmlinux types (nested structs)
if field.type.__module__ == "vmlinux":
# If it's a struct from vmlinux, check if we've already generated debug info for it
struct_name = field.type.__name__
# Look for existing debug info in the list
for existing_struct, debug_info in generated_debug_info:
if existing_struct.name == struct_name:
# Use existing debug info
return debug_info, existing_struct.__sizeof__()
# If not found, create a forward declaration
# This will be completed when the actual struct is processed
logger.warning("Forward declaration in struct created")
forward_type = generator.create_struct_type([], 0, is_distinct=True)
return forward_type, 0
# Handle basic C types
return _get_basic_debug_type(field.type, generator)
def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any:
"""
Map a ctypes type to a DWARF debug type.
Args:
ctype: A ctypes type or Python type
generator: DebugInfoGenerator instance
Returns:
The corresponding debug type
"""
# Map ctypes to debug info types
if ctype == ctypes.c_char or ctype == ctypes.c_byte:
return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8:
return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8
elif ctype == ctypes.c_short or ctype == ctypes.c_int16:
return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16
elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16:
return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16
elif ctype == ctypes.c_int or ctype == ctypes.c_int32:
return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32
elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32:
return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32
elif ctype == ctypes.c_long:
return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64
elif ctype == ctypes.c_ulong:
return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64
elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64:
return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64
elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64:
return generator.get_basic_type(
"unsigned long long", 64, dc.DW_ATE_unsigned
), 64
elif ctype == ctypes.c_float:
return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32
elif ctype == ctypes.c_double:
return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64
elif ctype == ctypes.c_bool:
return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8
elif ctype == ctypes.c_char_p:
char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
return generator.create_pointer_type(char_type)
elif ctype == ctypes.c_void_p:
return generator.create_pointer_type(None), 64
else:
return generator.get_uint64_type(), 64

View File

@ -0,0 +1,225 @@
import ctypes
import logging
from ..assignment_info import AssignmentInfo, AssignmentType
from ..dependency_handler import DependencyHandler
from .debug_info_gen import debug_info_generation
from ..dependency_node import DependencyNode
import llvmlite.ir as ir
logger = logging.getLogger(__name__)
class IRGenerator:
# get the assignments dict and add this stuff to it.
def __init__(self, llvm_module, handler: DependencyHandler, assignments):
self.llvm_module = llvm_module
self.handler: DependencyHandler = handler
self.generated: list[str] = []
self.generated_debug_info: list = []
# Use struct_name and field_name as key instead of Field object
self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {}
self.assignments: dict[str, AssignmentInfo] = assignments
if not handler.is_ready:
raise ImportError(
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
)
for struct in handler:
self.struct_processor(struct)
def struct_processor(self, struct, processing_stack=None):
# Initialize processing stack on first call
if processing_stack is None:
processing_stack = set()
# If already generated, skip
if struct.name in self.generated:
return
# Detect circular dependency
if struct.name in processing_stack:
logger.info(
f"Circular dependency detected for {struct.name}, skipping recursive processing"
)
# For circular dependencies, we can either:
# 1. Use forward declarations (opaque pointers)
# 2. Mark as incomplete and process later
# 3. Generate a placeholder type
# Here we'll just skip and let it be processed in its own call
return
logger.info(f"IR generating for {struct.name}")
# Add to processing stack before processing dependencies
processing_stack.add(struct.name)
try:
# Process all dependencies first
if struct.depends_on is None:
pass
else:
for dependency in struct.depends_on:
if dependency not in self.generated:
# Check if dependency exists in handler
if dependency in self.handler.nodes:
dep_node_from_dependency = self.handler[dependency]
# Pass the processing_stack down to track circular refs
self.struct_processor(
dep_node_from_dependency, processing_stack
)
else:
raise RuntimeError(
f"Warning: Dependency {dependency} not found in handler"
)
# Generate IR first to populate field names
self.generated_debug_info.append(
(struct, self.gen_ir(struct, self.generated_debug_info))
)
# Fill the assignments dictionary with struct information
if struct.name not in self.assignments:
# Create a members dictionary for AssignmentInfo
members_dict = {}
for field_name, field in struct.fields.items():
# Get the generated field name from our dictionary, or use field_name if not found
if (
struct.name in self.generated_field_names
and field_name in self.generated_field_names[struct.name]
):
field_global_variable = self.generated_field_names[struct.name][
field_name
]
members_dict[field_name] = (field_global_variable, field)
else:
raise ValueError(
f"llvm global name not found for struct field {field_name}"
)
# members_dict[field_name] = (field_name, field)
# Add struct to assignments dictionary
self.assignments[struct.name] = AssignmentInfo(
value_type=AssignmentType.STRUCT,
python_type=struct.ctype_struct,
value=None,
pointer_level=None,
signature=None,
members=members_dict,
)
logger.info(f"Added struct assignment info for {struct.name}")
self.generated.append(struct.name)
finally:
# Remove from processing stack after we're done
processing_stack.discard(struct.name)
def gen_ir(self, struct, generated_debug_info):
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
# accepts our issue, we will resort to normal accessed attribute based attribute addition
# currently we generate all possible field accesses for CO-RE and put into the assignment table
debug_info = debug_info_generation(
struct, self.llvm_module, generated_debug_info
)
field_index = 0
# Make sure the struct has an entry in our field names dictionary
if struct.name not in self.generated_field_names:
self.generated_field_names[struct.name] = {}
for field_name, field in struct.fields.items():
# does not take arrays and similar types into consideration yet.
if field.ctype_complex_type is not None and issubclass(
field.ctype_complex_type, ctypes.Array
):
array_size = field.type_size
containing_type = field.containing_type
if containing_type.__module__ == ctypes.__name__:
containing_type_size = ctypes.sizeof(containing_type)
if array_size == 0:
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, 0, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
continue
for i in range(0, array_size):
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
elif field.type_size is not None:
array_size = field.type_size
containing_type = field.containing_type
if containing_type.__module__ == "vmlinux":
containing_type_size = self.handler[
containing_type.__name__
].current_offset
for i in range(0, array_size):
field_co_re_name = self._struct_name_generator(
struct, field, field_index, True, i, containing_type_size
)
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
field_index += 1
else:
field_co_re_name = self._struct_name_generator(
struct, field, field_index
)
field_index += 1
globvar = ir.GlobalVariable(
self.llvm_module, ir.IntType(64), name=field_co_re_name
)
globvar.linkage = "external"
globvar.set_metadata("llvm.preserve.access.index", debug_info)
self.generated_field_names[struct.name][field_name] = globvar
return debug_info
def _struct_name_generator(
self,
struct: DependencyNode,
field,
field_index: int,
is_indexed: bool = False,
index: int = 0,
containing_type_size: int = 0,
) -> str:
# TODO: Does not support Unions as well as recursive pointer and array type naming
if is_indexed:
name = (
"llvm."
+ struct.name.removeprefix("struct_")
+ f":0:{field.offset + index * containing_type_size}"
+ "$"
+ f"0:{field_index}:{index}"
)
return name
elif struct.name.startswith("struct_"):
name = (
"llvm."
+ struct.name.removeprefix("struct_")
+ f":0:{field.offset}"
+ "$"
+ f"0:{field_index}"
)
return name
else:
print(self.handler[struct.name])
raise TypeError(
"Name generation cannot occur due to type name not starting with struct"
)

View File

@ -1,14 +0,0 @@
import logging
from .dependency_handler import DependencyHandler
logger = logging.getLogger(__name__)
class IRGenerator:
def __init__(self, module, handler: DependencyHandler):
self.module = module
self.handler: DependencyHandler = handler
if not handler.is_ready:
raise ImportError(
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
)

View File

@ -0,0 +1,90 @@
import logging
from llvmlite import ir
from pythonbpf.vmlinux_parser.assignment_info import AssignmentType
logger = logging.getLogger(__name__)
class VmlinuxHandler:
"""Handler for vmlinux-related operations"""
_instance = None
@classmethod
def get_instance(cls):
"""Get the singleton instance"""
if cls._instance is None:
logger.warning("VmlinuxHandler used before initialization")
return None
return cls._instance
@classmethod
def initialize(cls, vmlinux_symtab):
"""Initialize the handler with vmlinux symbol table"""
cls._instance = cls(vmlinux_symtab)
return cls._instance
def __init__(self, vmlinux_symtab):
"""Initialize with vmlinux symbol table"""
self.vmlinux_symtab = vmlinux_symtab
logger.info(
f"VmlinuxHandler initialized with {len(vmlinux_symtab) if vmlinux_symtab else 0} symbols"
)
def is_vmlinux_enum(self, name):
"""Check if name is a vmlinux enum constant"""
return (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.CONSTANT
)
def is_vmlinux_struct(self, name):
"""Check if name is a vmlinux struct"""
return (
name in self.vmlinux_symtab
and self.vmlinux_symtab[name]["value_type"] == AssignmentType.STRUCT
)
def handle_vmlinux_enum(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name]["value"]
logger.info(f"Resolving vmlinux enum {name} = {value}")
return ir.Constant(ir.IntType(64), value), ir.IntType(64)
return None
def get_vmlinux_enum_value(self, name):
"""Handle vmlinux enum constants by returning LLVM IR constants"""
if self.is_vmlinux_enum(name):
value = self.vmlinux_symtab[name]["value"]
logger.info(f"The value of vmlinux enum {name} = {value}")
return value
return None
def handle_vmlinux_struct(self, struct_name, module, builder):
"""Handle vmlinux struct initializations"""
if self.is_vmlinux_struct(struct_name):
# TODO: Implement core-specific struct handling
# This will be more complex and depends on the BTF information
logger.info(f"Handling vmlinux struct {struct_name}")
# Return struct type and allocated pointer
# This is a stub, actual implementation will be more complex
return None
return None
def handle_vmlinux_struct_field(
self, struct_var_name, field_name, module, builder, local_sym_tab
):
"""Handle access to vmlinux struct fields"""
# Check if it's a variable of vmlinux struct type
if struct_var_name in local_sym_tab:
var_info = local_sym_tab[struct_var_name] # noqa: F841
# Need to check if this variable is a vmlinux struct
# This will depend on how you track vmlinux struct types in your symbol table
logger.info(
f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}"
)
# Return pointer to field and field type
return None
return None

View File

@ -1,23 +1,9 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h> #include "vmlinux.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h> #include <bpf/bpf_tracing.h>
struct trace_entry {
short unsigned int type;
unsigned char flags;
unsigned char preempt_count;
int pid;
};
struct trace_event_raw_sys_enter {
struct trace_entry ent;
long int id;
long unsigned int args[6];
char __data[0];
};
struct event { struct event {
__u32 pid; __u32 pid;
__u32 uid; __u32 uid;
@ -33,7 +19,7 @@ struct {
SEC("tp/syscalls/sys_enter_setuid") SEC("tp/syscalls/sys_enter_setuid")
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) { int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
struct event data = {}; struct event data = {};
struct blk_integrity_iter it = {};
// Extract UID from the syscall arguments // Extract UID from the syscall arguments
data.uid = (unsigned int)ctx->args[0]; data.uid = (unsigned int)ctx->args[0];
data.ts = bpf_ktime_get_ns(); data.ts = bpf_ktime_get_ns();

View File

@ -1,10 +1,17 @@
from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from pythonbpf.helper import XDP_PASS from pythonbpf.helper import XDP_PASS
from vmlinux import struct_xdp_md from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_xdp_buff # noqa: F401
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
from vmlinux import struct_qspinlock # noqa: F401
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
# from vmlinux import struct_posix_cputimers # noqa: F401
from vmlinux import struct_xdp_md
# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
# from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
# from vmlinux import struct_request # noqa: F401
from ctypes import c_int64 from ctypes import c_int64
# Instructions to how to run this program # Instructions to how to run this program

View File

@ -0,0 +1,47 @@
import logging
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, map
from pythonbpf import compile # noqa: F401
from vmlinux import TASK_COMM_LEN # noqa: F401
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
from ctypes import c_uint64, c_int32, c_int64
from pythonbpf.maps import HashMap
# from vmlinux import struct_uinput_device
# from vmlinux import struct_blk_integrity_iter
@bpf
@map
def mymap() -> HashMap:
return HashMap(key=c_int32, value=c_uint64, max_entries=TASK_COMM_LEN)
@bpf
@map
def mymap2() -> HashMap:
return HashMap(key=c_int32, value=c_uint64, max_entries=18)
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python examples/simple_struct_test.py
# 3. Run the program with sudo: sudo tools/check.sh run examples/simple_struct_test.o
# 4. Attach object file to any network device with something like ./check.sh run examples/simple_struct_test.o tailscale0
# 5. send traffic through the device and observe effects
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
a = 2 + TASK_COMM_LEN + TASK_COMM_LEN
print(f"Hello, World{TASK_COMM_LEN} and {a}")
return c_int64(TASK_COMM_LEN + 2)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("simple_struct_test.py", "simple_struct_test.ll", loglevel=logging.DEBUG)
# compile()

199
tools/setup.sh Executable file
View File

@ -0,0 +1,199 @@
#!/bin/bash
print_warning() {
echo -e "\033[1;33m$1\033[0m"
}
print_info() {
echo -e "\033[1;32m$1\033[0m"
}
if [ "$EUID" -ne 0 ]; then
echo "Please run this script with sudo."
exit 1
fi
print_warning "===================================================================="
print_warning " WARNING "
print_warning " This script will run kernel-level BPF programs. "
print_warning " BPF programs run with kernel privileges and could potentially "
print_warning " affect system stability if not used properly. "
print_warning " "
print_warning " This is a non-interactive version for curl piping. "
print_warning " The script will proceed automatically with installation. "
print_warning "===================================================================="
echo
print_info "This script will:"
echo "1. Check and install required dependencies (libelf, clang, python, bpftool)"
echo "2. Download example programs from the Python-BPF GitHub repository"
echo "3. Create a Python virtual environment with necessary packages"
echo "4. Set up a Jupyter notebook server"
echo "Starting in 5 seconds. Press Ctrl+C to cancel..."
sleep 5
WORK_DIR="/tmp/python_bpf_setup"
REAL_USER=$(logname || echo "$SUDO_USER")
echo "Creating temporary directory: $WORK_DIR"
mkdir -p "$WORK_DIR"
cd "$WORK_DIR" || exit 1
if [ -f /etc/os-release ]; then
. /etc/os-release
DISTRO=$ID
else
echo "Cannot determine Linux distribution. Exiting."
exit 1
fi
install_dependencies() {
case $DISTRO in
ubuntu|debian|pop|mint|elementary|zorin)
echo "Detected Ubuntu/Debian-based system"
apt update
# Check and install libelf
if ! dpkg -l libelf-dev >/dev/null 2>&1; then
echo "Installing libelf-dev..."
apt install -y libelf-dev
else
echo "libelf-dev is already installed."
fi
# Check and install clang
if ! command -v clang >/dev/null 2>&1; then
echo "Installing clang..."
apt install -y clang
else
echo "clang is already installed."
fi
# Check and install python
if ! command -v python3 >/dev/null 2>&1; then
echo "Installing python3..."
apt install -y python3 python3-pip python3-venv
else
echo "python3 is already installed."
fi
# Check and install bpftool
if ! command -v bpftool >/dev/null 2>&1; then
echo "Installing bpftool..."
apt install -y linux-tools-common linux-tools-generic
# If bpftool still not found, try installing linux-tools-$(uname -r)
if ! command -v bpftool >/dev/null 2>&1; then
KERNEL_VERSION=$(uname -r)
apt install -y linux-tools-$KERNEL_VERSION
fi
else
echo "bpftool is already installed."
fi
;;
arch|manjaro|endeavouros)
echo "Detected Arch-based Linux system"
# Check and install libelf
if ! pacman -Q libelf >/dev/null 2>&1; then
echo "Installing libelf..."
pacman -S --noconfirm libelf
else
echo "libelf is already installed."
fi
# Check and install clang
if ! command -v clang >/dev/null 2>&1; then
echo "Installing clang..."
pacman -S --noconfirm clang
else
echo "clang is already installed."
fi
# Check and install python
if ! command -v python3 >/dev/null 2>&1; then
echo "Installing python3..."
pacman -S --noconfirm python python-pip
else
echo "python3 is already installed."
fi
# Check and install bpftool
if ! command -v bpftool >/dev/null 2>&1; then
echo "Installing bpftool..."
pacman -S --noconfirm bpf linux-headers
else
echo "bpftool is already installed."
fi
;;
*)
echo "Unsupported distribution: $DISTRO"
echo "This script only supports Ubuntu/Debian and Arch Linux derivatives."
exit 1
;;
esac
}
echo "Checking and installing dependencies..."
install_dependencies
# Download example programs
echo "Downloading example programs from Python-BPF GitHub repository..."
mkdir -p examples
cd examples || exit 1
echo "Fetching example files list..."
FILES=$(curl -s "https://api.github.com/repos/pythonbpf/Python-BPF/contents/examples" | grep -o '"path": "examples/[^"]*"' | awk -F'"' '{print $4}')
if [ -z "$FILES" ]; then
echo "Failed to fetch file list from repository. Using fallback method..."
# Fallback to downloading common example files
EXAMPLES=(
"binops_demo.py"
"blk_request.py"
"clone-matplotlib.ipynb"
"clone_plot.py"
"hello_world.py"
"kprobes.py"
"struct_and_perf.py"
"sys_sync.py"
"xdp_pass.py"
)
for example in "${EXAMPLES[@]}"; do
echo "Downloading: $example"
curl -s -O "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/examples/$example"
done
else
for file in $FILES; do
filename=$(basename "$file")
echo "Downloading: $filename"
curl -s -o "$filename" "https://raw.githubusercontent.com/pythonbpf/Python-BPF/master/$file"
done
fi
cd "$WORK_DIR" || exit 1
chown -R "$REAL_USER:$(id -gn "$REAL_USER")" .
echo "Creating Python virtual environment..."
su - "$REAL_USER" -c "cd \"$WORK_DIR\" && python3 -m venv venv"
echo "Installing Python packages..."
su - "$REAL_USER" -c "cd \"$WORK_DIR\" && source venv/bin/activate && pip install --upgrade pip && pip install jupyter pythonbpf pylibbpf matplotlib"
cat > "$WORK_DIR/start_jupyter.sh" << EOF
#!/bin/bash
cd "$WORK_DIR"
source venv/bin/activate
cd examples
sudo ../venv/bin/python -m notebook --ip=0.0.0.0 --allow-root
EOF
chmod +x "$WORK_DIR/start_jupyter.sh"
chown "$REAL_USER:$(id -gn "$REAL_USER")" "$WORK_DIR/start_jupyter.sh"
print_info "========================================================"
print_info "Setup complete! To start Jupyter Notebook, run:"
print_info "$ sudo $WORK_DIR/start_jupyter.sh"
print_info "========================================================"