Compare commits

..

8 Commits

38 changed files with 1213 additions and 902 deletions

13
TODO.md
View File

@ -1,13 +0,0 @@
## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
- Add all maps
- XDP support in pylibbpf
- ringbuf support
- Add oneline IfExpr conditionals (wishlist)
## Long term
- Refactor the codebase to be better than a hackathon project
- Port to C++ and use actual LLVM?
- Fix struct_kioctx issue in the vmlinux transpiler

View File

@ -1,3 +1,10 @@
"""
PythonBPF - A Python frontend for eBPF programs.
This package provides decorators and compilation tools to write BPF programs
in Python syntax and compile them to eBPF bytecode that can run in the kernel.
"""
from .decorators import bpf, map, section, bpfglobal, struct from .decorators import bpf, map, section, bpfglobal, struct
from .codegen import compile_to_ir, compile, BPF from .codegen import compile_to_ir, compile, BPF

View File

@ -1,191 +0,0 @@
import ast
import logging
from llvmlite import ir
from dataclasses import dataclass
from typing import Any
from pythonbpf.helper import HelperHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir
logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata
def _is_helper_call(call_node):
"""Check if a call node is a BPF helper function call."""
if isinstance(call_node.func, ast.Name):
# Exclude print from requiring temps (handles f-strings differently)
func_name = call_node.func.id
return HelperHandlerRegistry.has_handler(func_name) and func_name != "print"
elif isinstance(call_node.func, ast.Attribute):
return HelperHandlerRegistry.has_handler(call_node.func.attr)
return False
def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
"""Handle memory allocation for assignment statements."""
# Validate assignment
if len(stmt.targets) != 1:
logger.warning("Multi-target assignment not supported, skipping allocation")
return
target = stmt.targets[0]
# Skip non-name targets (e.g., struct field assignments)
if isinstance(target, ast.Attribute):
logger.debug(f"Struct field assignment to {target.attr}, no allocation needed")
return
if not isinstance(target, ast.Name):
logger.warning(f"Unsupported assignment target type: {type(target).__name__}")
return
var_name = target.id
rval = stmt.value
# Skip if already allocated
if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping")
return
# Determine type and allocate based on rval
if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp):
_allocate_for_binop(builder, var_name, local_sym_tab)
else:
logger.warning(
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
)
def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab):
"""Allocate memory for variable assigned from a call."""
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
# C type constructors
if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"):
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as {call_type}")
# Helper functions
elif HelperHandlerRegistry.has_handler(call_type):
ir_type = ir.IntType(64) # Assume i64 return type
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for helper {call_type}")
# Deref function
elif call_type == "deref":
ir_type = ir.IntType(64) # Assume i64 return type
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for deref")
# Struct constructors
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
var = builder.alloca(struct_info.ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
logger.info(f"Pre-allocated {var_name} for struct {call_type}")
else:
logger.warning(f"Unknown call type for allocation: {call_type}")
elif isinstance(rval.func, ast.Attribute):
# Map method calls - need double allocation for ptr handling
_allocate_for_map_method(builder, var_name, local_sym_tab)
else:
logger.warning(f"Unsupported call function type for {var_name}")
def _allocate_for_map_method(builder, var_name, local_sym_tab):
"""Allocate memory for variable assigned from map method (double alloc)."""
# Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
# Temporary variable for computed values
tmp_ir_type = ir.IntType(64)
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type)
logger.info(f"Pre-allocated {var_name} and {var_name}_tmp for map method")
def _allocate_for_constant(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable assigned from a constant."""
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as bool")
elif isinstance(rval.value, int):
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as i64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as string")
else:
logger.warning(
f"Unsupported constant type for {var_name}: {type(rval.value).__name__}"
)
def _allocate_for_binop(builder, var_name, local_sym_tab):
"""Allocate memory for variable assigned from a binary operation."""
ir_type = ir.IntType(64) # Assume i64 result
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for binop result")
def allocate_temp_pool(builder, max_temps, local_sym_tab):
"""Allocate the temporary scratch space pool for helper arguments."""
if max_temps == 0:
return
logger.info(f"Allocating temp pool of {max_temps} variables")
for i in range(max_temps):
temp_name = f"__helper_temp_{i}"
temp_var = builder.alloca(ir.IntType(64), name=temp_name)
temp_var.align = 8
local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64))

View File

@ -1,108 +0,0 @@
import ast
import logging
from llvmlite import ir
from pythonbpf.expr import eval_expr
logger = logging.getLogger(__name__)
def handle_struct_field_assignment(
func, module, builder, target, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle struct field assignment (obj.field = value)."""
var_name = target.value.id
field_name = target.attr
if var_name not in local_sym_tab:
logger.error(f"Variable '{var_name}' not found in symbol table")
return
struct_type = local_sym_tab[var_name].metadata
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
return
# Get field pointer and evaluate value
field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name)
val = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val is None:
logger.error(f"Failed to evaluate value for {var_name}.{field_name}")
return
# TODO: Handle string assignment to char array (not a priority)
field_type = struct_info.field_type(field_name)
if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
logger.warning(
f"String to char array assignment not implemented for {var_name}.{field_name}"
)
return
# Store the value
builder.store(val[0], field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
def handle_variable_assignment(
func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle single named variable assignment."""
if var_name not in local_sym_tab:
logger.error(f"Variable {var_name} not declared.")
return False
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
# NOTE: Special case for struct initialization
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
struct_name = rval.func.id
if struct_name in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[struct_name]
ir_struct = struct_info.ir_type
builder.store(ir.Constant(ir_struct, None), var_ptr)
logger.info(f"Initialized struct {struct_name} for variable {var_name}")
return True
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}")
return False
val, val_type = val_result
logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}")
if val_type != var_type:
if isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType):
# Allow implicit int widening
if val_type.width < var_type.width:
val = builder.sext(val, var_type)
logger.info(f"Implicitly widened int for variable {var_name}")
elif val_type.width > var_type.width:
val = builder.trunc(val, var_type)
logger.info(f"Implicitly truncated int for variable {var_name}")
elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.PointerType):
# NOTE: This is assignment to a PTR_TO_MAP_VALUE_OR_NULL
logger.info(
f"Creating temporary variable for pointer assignment to {var_name}"
)
var_ptr_tmp = local_sym_tab[f"{var_name}_tmp"].var
builder.store(val, var_ptr_tmp)
val = var_ptr_tmp
else:
logger.error(
f"Type mismatch for variable {var_name}: {val_type} vs {var_type}"
)
return False
builder.store(val, var_ptr)
logger.info(f"Assigned value to variable {var_name}")
return True

View File

@ -1,72 +1,63 @@
"""
Binary operations handling for BPF programs.
This module provides functions to handle binary operations (add, subtract,
multiply, etc.) and emit the corresponding LLVM IR instructions.
"""
import ast import ast
from llvmlite import ir from llvmlite import ir
from logging import Logger from logging import Logger
import logging import logging
from pythonbpf.expr import get_base_type_and_depth, deref_to_depth, eval_expr
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
def get_operand_value( def recursive_dereferencer(var, builder):
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None """dereference until primitive type comes out"""
): # TODO: Not worrying about stack overflow for now
logger.info(f"Dereferencing {var}, type is {var.type}")
if isinstance(var.type, ir.PointerType):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif isinstance(var.type, ir.IntType):
return var
else:
raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def get_operand_value(operand, builder, local_sym_tab):
"""Extract the value from an operand, handling variables and constants.""" """Extract the value from an operand, handling variables and constants."""
logger.info(f"Getting operand value for: {ast.dump(operand)}")
if isinstance(operand, ast.Name): if isinstance(operand, ast.Name):
if operand.id in local_sym_tab: if operand.id in local_sym_tab:
var = local_sym_tab[operand.id].var return recursive_dereferencer(local_sym_tab[operand.id].var, builder)
var_type = var.type
base_type, depth = get_base_type_and_depth(var_type)
logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}")
val = deref_to_depth(func, builder, var, depth)
return val
raise ValueError(f"Undefined variable: {operand.id}") raise ValueError(f"Undefined variable: {operand.id}")
elif isinstance(operand, ast.Constant): elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int): if isinstance(operand.value, int):
cst = ir.Constant(ir.IntType(64), int(operand.value)) return ir.Constant(ir.IntType(64), operand.value)
return cst
raise TypeError(f"Unsupported constant type: {type(operand.value)}") raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp): elif isinstance(operand, ast.BinOp):
res = handle_binary_op_impl( return handle_binary_op_impl(operand, builder, local_sym_tab)
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
return res
else:
res = eval_expr(
func, module, builder, operand, local_sym_tab, map_sym_tab, structs_sym_tab
)
if res is None:
raise ValueError(f"Failed to evaluate call expression: {operand}")
val, _ = res
logger.info(f"Evaluated expr to {val} of type {val.type}")
base_type, depth = get_base_type_and_depth(val.type)
if depth > 0:
val = deref_to_depth(func, builder, val, depth)
return val
raise TypeError(f"Unsupported operand type: {type(operand)}") raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl( def handle_binary_op_impl(rval, builder, local_sym_tab):
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None """
): Handle binary operations and emit corresponding LLVM IR instructions.
Args:
rval: The AST BinOp node representing the binary operation
builder: LLVM IR builder for emitting instructions
local_sym_tab: Symbol table mapping variable names to their IR representations
Returns:
The LLVM IR value representing the result of the binary operation
"""
op = rval.op op = rval.op
left = get_operand_value( left = get_operand_value(rval.left, builder, local_sym_tab)
func, module, rval.left, builder, local_sym_tab, map_sym_tab, structs_sym_tab right = get_operand_value(rval.right, builder, local_sym_tab)
)
right = get_operand_value(
func, module, rval.right, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
logger.info(f"left is {left}, right is {right}, op is {op}") logger.info(f"left is {left}, right is {right}, op is {op}")
# NOTE: Before doing the operation, if the operands are integers
# we always extend them to i64. The assignment to LHS will take
# care of truncation if needed.
if isinstance(left.type, ir.IntType) and left.type.width < 64:
left = builder.sext(left, ir.IntType(64))
if isinstance(right.type, ir.IntType) and right.type.width < 64:
right = builder.sext(right, ir.IntType(64))
# Map AST operation nodes to LLVM IR builder methods # Map AST operation nodes to LLVM IR builder methods
op_map = { op_map = {
ast.Add: builder.add, ast.Add: builder.add,
@ -89,19 +80,20 @@ def handle_binary_op_impl(
raise SyntaxError("Unsupported binary operation") raise SyntaxError("Unsupported binary operation")
def handle_binary_op( def handle_binary_op(rval, builder, var_name, local_sym_tab):
func, """
module, Handle binary operations and optionally store the result to a variable.
rval,
builder, Args:
var_name, rval: The AST BinOp node representing the binary operation
local_sym_tab, builder: LLVM IR builder for emitting instructions
map_sym_tab, var_name: Optional variable name to store the result
structs_sym_tab=None, local_sym_tab: Symbol table mapping variable names to their IR representations
):
result = handle_binary_op_impl( Returns:
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab A tuple of (result_value, result_type)
) """
result = handle_binary_op_impl(rval, builder, local_sym_tab)
if var_name and var_name in local_sym_tab: if var_name and var_name in local_sym_tab:
logger.info( logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}" f"Storing result {result} into variable {local_sym_tab[var_name].var}"

View File

@ -1,3 +1,11 @@
"""
Code generation module for PythonBPF.
This module handles the conversion of Python BPF programs to LLVM IR and
object files. It provides the main compilation pipeline from Python AST
to BPF bytecode.
"""
import ast import ast
from llvmlite import ir from llvmlite import ir
from .license_pass import license_processing from .license_pass import license_processing
@ -37,6 +45,14 @@ def find_bpf_chunks(tree):
def processor(source_code, filename, module): def processor(source_code, filename, module):
"""
Process Python source code and convert BPF-decorated functions to LLVM IR.
Args:
source_code: The Python source code to process
filename: The name of the source file
module: The LLVM IR module to populate
"""
tree = ast.parse(source_code, filename) tree = ast.parse(source_code, filename)
logger.debug(ast.dump(tree, indent=4)) logger.debug(ast.dump(tree, indent=4))
@ -56,6 +72,17 @@ def processor(source_code, filename, module):
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
"""
Compile a Python BPF program to LLVM IR.
Args:
filename: Path to the Python source file containing BPF programs
output: Path where the LLVM IR (.ll) file will be written
loglevel: Logging level for compilation messages
Returns:
Path to the generated LLVM IR file
"""
logging.basicConfig( logging.basicConfig(
level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
) )
@ -129,6 +156,18 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
def compile(loglevel=logging.INFO) -> bool: def compile(loglevel=logging.INFO) -> bool:
"""
Compile the calling Python BPF program to an object file.
This function should be called from a Python file containing BPF programs.
It will compile the calling file to LLVM IR and then to a BPF object file.
Args:
loglevel: Logging level for compilation messages
Returns:
True if compilation succeeded, False otherwise
"""
# Look one level up the stack to the caller of this function # Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve() caller_file = Path(caller_frame.filename).resolve()
@ -162,6 +201,18 @@ def compile(loglevel=logging.INFO) -> bool:
def BPF(loglevel=logging.INFO) -> BpfProgram: def BPF(loglevel=logging.INFO) -> BpfProgram:
"""
Compile the calling Python BPF program and return a BpfProgram object.
This function compiles the calling file's BPF programs to an object file
and loads it into a BpfProgram object for immediate use.
Args:
loglevel: Logging level for compilation messages
Returns:
A BpfProgram object that can be used to load and attach BPF programs
"""
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame) src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(

View File

@ -1,3 +1,5 @@
"""Debug information generation for BPF programs (DWARF/BTF)."""
from .dwarf_constants import * # noqa: F403 from .dwarf_constants import * # noqa: F403
from .dtypes import * # noqa: F403 from .dtypes import * # noqa: F403
from .debug_info_generator import DebugInfoGenerator from .debug_info_generator import DebugInfoGenerator

View File

@ -8,11 +8,31 @@ from typing import Any, List
class DebugInfoGenerator: class DebugInfoGenerator:
"""
Generator for DWARF/BTF debug information in LLVM IR modules.
This class provides methods to create debug metadata for BPF programs,
including types, structs, globals, and compilation units.
"""
def __init__(self, module): def __init__(self, module):
"""
Initialize the debug info generator.
Args:
module: LLVM IR module to attach debug info to
"""
self.module = module self.module = module
self._type_cache = {} # Cache for common debug types self._type_cache = {} # Cache for common debug types
def generate_file_metadata(self, filename, dirname): def generate_file_metadata(self, filename, dirname):
"""
Generate file metadata for debug info.
Args:
filename: Name of the source file
dirname: Directory containing the source file
"""
self.module._file_metadata = self.module.add_debug_info( self.module._file_metadata = self.module.add_debug_info(
"DIFile", "DIFile",
{ # type: ignore { # type: ignore
@ -24,6 +44,15 @@ class DebugInfoGenerator:
def generate_debug_cu( def generate_debug_cu(
self, language, producer: str, is_optimized: bool, is_distinct: bool self, language, producer: str, is_optimized: bool, is_distinct: bool
): ):
"""
Generate debug compile unit metadata.
Args:
language: DWARF language code (e.g., DW_LANG_C11)
producer: Compiler/producer string
is_optimized: Whether the code is optimized
is_distinct: Whether the compile unit should be distinct
"""
self.module._debug_compile_unit = self.module.add_debug_info( self.module._debug_compile_unit = self.module.add_debug_info(
"DICompileUnit", "DICompileUnit",
{ # type: ignore { # type: ignore
@ -83,6 +112,16 @@ class DebugInfoGenerator:
@staticmethod @staticmethod
def _compute_array_size(base_type: Any, count: int) -> int: def _compute_array_size(base_type: Any, count: int) -> int:
"""
Compute the size of an array in bits.
Args:
base_type: The base type of the array
count: Number of elements in the array
Returns:
Total size in bits
"""
# Extract size from base_type if possible # Extract size from base_type if possible
# For simplicity, assuming base_type has a size attribute # For simplicity, assuming base_type has a size attribute
return getattr(base_type, "size", 32) * count return getattr(base_type, "size", 32) * count

View File

@ -1,7 +1,10 @@
"""Debug information types and constants."""
import llvmlite.ir as ir import llvmlite.ir as ir
class DwarfBehaviorEnum: class DwarfBehaviorEnum:
"""DWARF module flag behavior constants for LLVM."""
ERROR_IF_MISMATCH = ir.Constant(ir.IntType(32), 1) ERROR_IF_MISMATCH = ir.Constant(ir.IntType(32), 1)
WARNING_IF_MISMATCH = ir.Constant(ir.IntType(32), 2) WARNING_IF_MISMATCH = ir.Constant(ir.IntType(32), 2)
OVERRIDE_USE_LARGEST = ir.Constant(ir.IntType(32), 7) OVERRIDE_USE_LARGEST = ir.Constant(ir.IntType(32), 7)

View File

@ -1,3 +1,9 @@
"""
DWARF debugging format constants.
Generated constants from dwarf.h for use in debug information generation.
"""
# generated constants from dwarf.h # generated constants from dwarf.h
DW_UT_compile = 0x01 DW_UT_compile = 0x01

View File

@ -1,3 +1,11 @@
"""
Decorators for marking BPF functions, maps, structs, and globals.
This module provides the core decorators used to annotate Python code
for BPF compilation.
"""
def bpf(func): def bpf(func):
"""Decorator to mark a function for BPF compilation.""" """Decorator to mark a function for BPF compilation."""
func._is_bpf = True func._is_bpf = True
@ -23,7 +31,17 @@ def struct(cls):
def section(name: str): def section(name: str):
"""
Decorator to specify the ELF section name for a BPF program.
Args:
name: The section name (e.g., 'xdp', 'tracepoint/syscalls/sys_enter_execve')
Returns:
A decorator function that marks the function with the section name
"""
def wrapper(fn): def wrapper(fn):
"""Decorator that sets the section name on the function."""
fn._section = name fn._section = name
return fn return fn

View File

@ -1,10 +1,6 @@
from .expr_pass import eval_expr, handle_expr """Expression evaluation and processing for BPF programs."""
from .type_normalization import convert_to_bool, get_base_type_and_depth, deref_to_depth
__all__ = [ from .expr_pass import eval_expr, handle_expr
"eval_expr", from .type_normalization import convert_to_bool
"handle_expr",
"convert_to_bool", __all__ = ["eval_expr", "handle_expr", "convert_to_bool"]
"get_base_type_and_depth",
"deref_to_depth",
]

View File

@ -1,3 +1,11 @@
"""
Expression evaluation and LLVM IR generation.
This module handles the evaluation of Python expressions in BPF programs,
including variables, constants, function calls, comparisons, boolean
operations, and more.
"""
import ast import ast
from llvmlite import ir from llvmlite import ir
from logging import Logger from logging import Logger
@ -26,7 +34,7 @@ def _handle_constant_expr(expr: ast.Constant):
if isinstance(expr.value, int) or isinstance(expr.value, bool): if isinstance(expr.value, int) or isinstance(expr.value, bool):
return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64) return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64)
else: else:
logger.error(f"Unsupported constant type {ast.dump(expr)}") logger.error("Unsupported constant type")
return None return None
@ -176,28 +184,21 @@ def _handle_unary_op(
structs_sym_tab=None, structs_sym_tab=None,
): ):
"""Handle ast.UnaryOp expressions.""" """Handle ast.UnaryOp expressions."""
if not isinstance(expr.op, ast.Not) and not isinstance(expr.op, ast.USub): if not isinstance(expr.op, ast.Not):
logger.error("Only 'not' and '-' unary operators are supported") logger.error("Only 'not' unary operator is supported")
return None return None
from pythonbpf.binary_ops import get_operand_value operand = eval_expr(
func, module, builder, expr.operand, local_sym_tab, map_sym_tab, structs_sym_tab
operand = get_operand_value(
func, module, expr.operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
) )
if operand is None: if operand is None:
logger.error("Failed to evaluate operand for unary operation") logger.error("Failed to evaluate operand for unary operation")
return None return None
if isinstance(expr.op, ast.Not): operand_val, operand_type = operand
true_const = ir.Constant(ir.IntType(1), 1) true_const = ir.Constant(ir.IntType(1), 1)
result = builder.xor(convert_to_bool(builder, operand), true_const) result = builder.xor(convert_to_bool(builder, operand_val), true_const)
return result, ir.IntType(1) return result, ir.IntType(1)
elif isinstance(expr.op, ast.USub):
# Multiply by -1
neg_one = ir.Constant(ir.IntType(64), -1)
result = builder.mul(operand, neg_one)
return result, ir.IntType(64)
def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab): def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
@ -339,6 +340,21 @@ def eval_expr(
map_sym_tab, map_sym_tab,
structs_sym_tab=None, structs_sym_tab=None,
): ):
"""
Evaluate an expression and return its LLVM IR value and type.
Args:
func: The LLVM IR function being built
module: The LLVM IR module
builder: LLVM IR builder
expr: The AST expression node to evaluate
local_sym_tab: Local symbol table
map_sym_tab: Map symbol table
structs_sym_tab: Struct symbol table
Returns:
A tuple of (value, type) or None if evaluation fails
"""
logger.info(f"Evaluating expression: {ast.dump(expr)}") logger.info(f"Evaluating expression: {ast.dump(expr)}")
if isinstance(expr, ast.Name): if isinstance(expr, ast.Name):
return _handle_name_expr(expr, local_sym_tab, builder) return _handle_name_expr(expr, local_sym_tab, builder)
@ -409,16 +425,7 @@ def eval_expr(
elif isinstance(expr, ast.BinOp): elif isinstance(expr, ast.BinOp):
from pythonbpf.binary_ops import handle_binary_op from pythonbpf.binary_ops import handle_binary_op
return handle_binary_op( return handle_binary_op(expr, builder, None, local_sym_tab)
func,
module,
expr,
builder,
None,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr, ast.Compare): elif isinstance(expr, ast.Compare):
return _handle_compare( return _handle_compare(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab

View File

@ -1,3 +1,10 @@
"""
Type normalization and comparison operations for expressions.
This module provides utilities for normalizing types between expressions,
handling pointer dereferencing, and generating comparison operations.
"""
from llvmlite import ir from llvmlite import ir
import logging import logging
import ast import ast
@ -16,8 +23,16 @@ COMPARISON_OPS = {
} }
def get_base_type_and_depth(ir_type): def _get_base_type_and_depth(ir_type):
"""Get the base type for pointer types.""" """
Get the base type and pointer depth for an LLVM IR type.
Args:
ir_type: The LLVM IR type to analyze
Returns:
A tuple of (base_type, depth) where depth is the number of pointer levels
"""
cur_type = ir_type cur_type = ir_type
depth = 0 depth = 0
while isinstance(cur_type, ir.PointerType): while isinstance(cur_type, ir.PointerType):
@ -26,8 +41,19 @@ def get_base_type_and_depth(ir_type):
return cur_type, depth return cur_type, depth
def deref_to_depth(func, builder, val, target_depth): def _deref_to_depth(func, builder, val, target_depth):
"""Dereference a pointer to a certain depth.""" """
Dereference a pointer to a certain depth with null checks.
Args:
func: The LLVM IR function being built
builder: LLVM IR builder
val: The pointer value to dereference
target_depth: Number of levels to dereference
Returns:
The dereferenced value, or None if dereferencing fails
"""
cur_val = val cur_val = val
cur_type = val.type cur_type = val.type
@ -73,7 +99,18 @@ def deref_to_depth(func, builder, val, target_depth):
def _normalize_types(func, builder, lhs, rhs): def _normalize_types(func, builder, lhs, rhs):
"""Normalize types for comparison.""" """
Normalize types for comparison by casting or dereferencing as needed.
Args:
func: The LLVM IR function being built
builder: LLVM IR builder
lhs: Left-hand side value
rhs: Right-hand side value
Returns:
A tuple of (normalized_lhs, normalized_rhs) or (None, None) on error
"""
logger.info(f"Normalizing types: {lhs.type} vs {rhs.type}") logger.info(f"Normalizing types: {lhs.type} vs {rhs.type}")
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType): if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
@ -88,18 +125,27 @@ def _normalize_types(func, builder, lhs, rhs):
logger.error(f"Type mismatch: {lhs.type} vs {rhs.type}") logger.error(f"Type mismatch: {lhs.type} vs {rhs.type}")
return None, None return None, None
else: else:
lhs_base, lhs_depth = get_base_type_and_depth(lhs.type) lhs_base, lhs_depth = _get_base_type_and_depth(lhs.type)
rhs_base, rhs_depth = get_base_type_and_depth(rhs.type) rhs_base, rhs_depth = _get_base_type_and_depth(rhs.type)
if lhs_base == rhs_base: if lhs_base == rhs_base:
if lhs_depth < rhs_depth: if lhs_depth < rhs_depth:
rhs = deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth) rhs = _deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth)
elif rhs_depth < lhs_depth: elif rhs_depth < lhs_depth:
lhs = deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth) lhs = _deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth)
return _normalize_types(func, builder, lhs, rhs) return _normalize_types(func, builder, lhs, rhs)
def convert_to_bool(builder, val): def convert_to_bool(builder, val):
"""Convert a value to boolean.""" """
Convert an LLVM IR value to a boolean (i1) type.
Args:
builder: LLVM IR builder
val: The value to convert
Returns:
An i1 boolean value
"""
if val.type == ir.IntType(1): if val.type == ir.IntType(1):
return val return val
if isinstance(val.type, ir.PointerType): if isinstance(val.type, ir.PointerType):
@ -110,7 +156,19 @@ def convert_to_bool(builder, val):
def handle_comparator(func, builder, op, lhs, rhs): def handle_comparator(func, builder, op, lhs, rhs):
"""Handle comparison operations.""" """
Handle comparison operations between two values.
Args:
func: The LLVM IR function being built
builder: LLVM IR builder
op: The AST comparison operator node
lhs: Left-hand side value
rhs: Right-hand side value
Returns:
A tuple of (result, ir.IntType(1)) or None on error
"""
if lhs.type != rhs.type: if lhs.type != rhs.type:
lhs, rhs = _normalize_types(func, builder, lhs, rhs) lhs, rhs = _normalize_types(func, builder, lhs, rhs)

View File

@ -1,3 +1,5 @@
"""BPF function processing and LLVM IR generation."""
from .functions_pass import func_proc from .functions_pass import func_proc
__all__ = ["func_proc"] __all__ = ["func_proc"]

View File

@ -1,3 +1,5 @@
"""Registry for statement handler functions."""
from typing import Dict from typing import Dict
@ -11,6 +13,7 @@ class StatementHandlerRegistry:
"""Register a handler for a specific statement type.""" """Register a handler for a specific statement type."""
def decorator(handler): def decorator(handler):
"""Decorator that registers the handler."""
cls._handlers[stmt_type] = handler cls._handlers[stmt_type] = handler
return handler return handler

View File

@ -1,18 +1,21 @@
"""
BPF function processing and LLVM IR generation.
This module handles the core function processing, converting Python function
definitions into LLVM IR for BPF programs. It manages local variables,
control flow, and statement processing.
"""
from llvmlite import ir from llvmlite import ir
import ast import ast
import logging import logging
from typing import Any
from dataclasses import dataclass
from pythonbpf.helper import ( from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
HelperHandlerRegistry,
reset_scratch_pool,
)
from pythonbpf.type_deducer import ctypes_to_ir from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.binary_ops import handle_binary_op
from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool
from pythonbpf.assign_pass import (
handle_variable_assignment,
handle_struct_field_assignment,
)
from pythonbpf.allocation_pass import handle_assign_allocation, allocate_temp_pool
from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name
@ -20,6 +23,27 @@ from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
"""
Represents a local variable in a BPF function.
Attributes:
var: LLVM IR alloca instruction for the variable
ir_type: LLVM IR type of the variable
metadata: Optional metadata (e.g., struct type name)
"""
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
"""Support tuple unpacking of LocalSymbol."""
yield self.var
yield self.ir_type
yield self.metadata
def get_probe_string(func_node): def get_probe_string(func_node):
"""Extract the probe string from the decorator of the function node.""" """Extract the probe string from the decorator of the function node."""
# TODO: right now we have the whole string in the section decorator # TODO: right now we have the whole string in the section decorator
@ -41,54 +65,216 @@ def handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
): ):
"""Handle assignment statements in the function body.""" """Handle assignment statements in the function body."""
# TODO: Support this later
# GH #37
if len(stmt.targets) != 1: if len(stmt.targets) != 1:
logger.error("Multi-target assignment is not supported for now") logger.info("Unsupported multiassignment")
return return
num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64")
target = stmt.targets[0] target = stmt.targets[0]
logger.info(f"Handling assignment to {ast.dump(target)}")
if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute):
logger.info("Unsupported assignment target")
return
var_name = target.id if isinstance(target, ast.Name) else target.value.id
rval = stmt.value rval = stmt.value
if isinstance(target, ast.Name):
# NOTE: Simple variable assignment case: x = 5
var_name = target.id
result = handle_variable_assignment(
func,
module,
builder,
var_name,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if not result:
logger.error(f"Failed to handle assignment to {var_name}")
return
if isinstance(target, ast.Attribute): if isinstance(target, ast.Attribute):
# NOTE: Struct field assignment case: pkt.field = value # struct field assignment
handle_struct_field_assignment( field_name = target.attr
func, if var_name in local_sym_tab:
module, struct_type = local_sym_tab[var_name].metadata
builder, struct_info = structs_sym_tab[struct_type]
target, if field_name in struct_info.fields:
rval, field_ptr = struct_info.gep(
local_sym_tab, builder, local_sym_tab[var_name].var, field_name
map_sym_tab, )
structs_sym_tab, val = eval_expr(
) func,
return module,
builder,
# Unsupported target type rval,
logger.error(f"Unsupported assignment target: {ast.dump(target)}") local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(struct_info.field_type(field_name), ir.ArrayType) and val[
1
] == ir.PointerType(ir.IntType(8)):
# TODO: Figure it out, not a priority rn
# Special case for string assignment to char array
# str_len = struct_info["field_types"][field_idx].count
# assign_string_to_array(builder, field_ptr, val[0], str_len)
# print(f"Assigned to struct field {var_name}.{field_name}")
pass
if val is None:
logger.info("Failed to evaluate struct field assignment")
return
logger.info(field_ptr)
builder.store(val[0], field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
return
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
if rval.value:
builder.store(
ir.Constant(ir.IntType(1), 1), local_sym_tab[var_name].var
)
else:
builder.store(
ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name].var
)
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, int):
# Assume c_int64 for now
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(
ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name].var
)
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, str):
str_val = rval.value.encode("utf-8") + b"\x00"
str_const = ir.Constant(
ir.ArrayType(ir.IntType(8), len(str_val)), bytearray(str_val)
)
global_str = ir.GlobalVariable(
module, str_const.type, name=f"{var_name}_str"
)
global_str.linkage = "internal"
global_str.global_constant = True
global_str.initializer = str_const
str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8)))
builder.store(str_ptr, local_sym_tab[var_name].var)
logger.info(f"Assigned string constant '{rval.value}' to {var_name}")
else:
logger.info("Unsupported constant type")
elif isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
logger.info(f"Assignment call type: {call_type}")
if (
call_type in num_types
and len(rval.args) == 1
and isinstance(rval.args[0], ast.Constant)
and isinstance(rval.args[0].value, int)
):
ir_type = ctypes_to_ir(call_type)
# var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
builder.store(
ir.Constant(ir_type, rval.args[0].value),
local_sym_tab[var_name].var,
)
logger.info(
f"Assigned {call_type} constant {rval.args[0].value} to {var_name}"
)
elif HelperHandlerRegistry.has_handler(call_type):
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
logger.info(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1:
logger.info(f"Handling deref assignment {ast.dump(rval)}")
val = eval_expr(
func,
module,
builder,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if val is None:
logger.info("Failed to evaluate deref argument")
return
logger.info(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val[0], local_sym_tab[var_name].var)
logger.info(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
# var = builder.alloca(ir_type, name=var_name)
# Null init
builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name].var)
logger.info(f"Assigned struct {call_type} to {var_name}")
else:
logger.info(f"Unsupported assignment call type: {call_type}")
elif isinstance(rval.func, ast.Attribute):
logger.info(f"Assignment call attribute: {ast.dump(rval.func)}")
if isinstance(rval.func.value, ast.Name):
if rval.func.value.id in map_sym_tab:
map_name = rval.func.value.id
method_name = rval.func.attr
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
else:
# TODO: probably a struct access
logger.info(f"TODO STRUCT ACCESS {ast.dump(rval)}")
elif isinstance(rval.func.value, ast.Call) and isinstance(
rval.func.value.func, ast.Name
):
map_name = rval.func.value.func.id
method_name = rval.func.attr
if map_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(val[0], local_sym_tab[var_name].var)
else:
logger.info("Unsupported assignment call structure")
else:
logger.info("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp):
handle_binary_op(rval, builder, var_name, local_sym_tab)
else:
logger.info("Unsupported assignment value type")
def handle_cond( def handle_cond(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None
): ):
"""
Evaluate a condition expression and convert it to a boolean value.
Args:
func: The LLVM IR function being built
module: The LLVM IR module
builder: LLVM IR builder
cond: The AST condition node to evaluate
local_sym_tab: Local symbol table
map_sym_tab: Map symbol table
structs_sym_tab: Struct symbol table
Returns:
LLVM IR boolean value representing the condition result
"""
val = eval_expr( val = eval_expr(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab
)[0] )[0]
@ -144,6 +330,18 @@ def handle_if(
def handle_return(builder, stmt, local_sym_tab, ret_type): def handle_return(builder, stmt, local_sym_tab, ret_type):
"""
Handle return statements in BPF functions.
Args:
builder: LLVM IR builder
stmt: The AST Return node
local_sym_tab: Local symbol table
ret_type: Expected return type
Returns:
True if a return was emitted, False otherwise
"""
logger.info(f"Handling return statement: {ast.dump(stmt)}") logger.info(f"Handling return statement: {ast.dump(stmt)}")
if stmt.value is None: if stmt.value is None:
return _handle_none_return(builder) return _handle_none_return(builder)
@ -175,8 +373,24 @@ def process_stmt(
did_return, did_return,
ret_type=ir.IntType(64), ret_type=ir.IntType(64),
): ):
"""
Process a single statement in a BPF function.
Args:
func: The LLVM IR function being built
module: The LLVM IR module
builder: LLVM IR builder
stmt: The AST statement node to process
local_sym_tab: Local symbol table
map_sym_tab: Map symbol table
structs_sym_tab: Struct symbol table
did_return: Whether a return has been emitted
ret_type: Expected return type
Returns:
True if a return was emitted, False otherwise
"""
logger.info(f"Processing statement: {ast.dump(stmt)}") logger.info(f"Processing statement: {ast.dump(stmt)}")
reset_scratch_pool()
if isinstance(stmt, ast.Expr): if isinstance(stmt, ast.Expr):
handle_expr( handle_expr(
func, func,
@ -207,107 +421,138 @@ def process_stmt(
return did_return return did_return
def handle_if_allocation(
module, builder, stmt, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
"""Recursively handle allocations in if/else branches."""
if stmt.body:
allocate_mem(
module,
builder,
stmt.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
if stmt.orelse:
allocate_mem(
module,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
def count_temps_in_call(call_node, local_sym_tab):
"""Count the number of temporary variables needed for a function call."""
count = 0
is_helper = False
# NOTE: We exclude print calls for now
if isinstance(call_node.func, ast.Name):
if (
HelperHandlerRegistry.has_handler(call_node.func.id)
and call_node.func.id != "print"
):
is_helper = True
elif isinstance(call_node.func, ast.Attribute):
if HelperHandlerRegistry.has_handler(call_node.func.attr):
is_helper = True
if not is_helper:
return 0
for arg in call_node.args:
# NOTE: Count all non-name arguments
# For struct fields, if it is being passed as an argument,
# The struct object should already exist in the local_sym_tab
if not isinstance(arg, ast.Name) and not (
isinstance(arg, ast.Attribute) and arg.value.id in local_sym_tab
):
count += 1
return count
def allocate_mem( def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
): ):
max_temps_needed = 0 """
Pre-allocate stack memory for local variables in a BPF function.
def update_max_temps_for_stmt(stmt):
nonlocal max_temps_needed This function scans the function body and creates alloca instructions
temps_needed = 0 for all local variables before processing the function statements.
if isinstance(stmt, ast.If): Args:
for s in stmt.body: module: The LLVM IR module
update_max_temps_for_stmt(s) builder: LLVM IR builder
for s in stmt.orelse: body: List of AST statements in the function body
update_max_temps_for_stmt(s) func: The LLVM IR function being built
return ret_type: Expected return type
map_sym_tab: Map symbol table
for node in ast.walk(stmt): local_sym_tab: Local symbol table to populate
if isinstance(node, ast.Call): structs_sym_tab: Struct symbol table
temps_needed += count_temps_in_call(node, local_sym_tab)
max_temps_needed = max(max_temps_needed, temps_needed) Returns:
Updated local symbol table
"""
for stmt in body: for stmt in body:
update_max_temps_for_stmt(stmt) has_metadata = False
# Handle allocations
if isinstance(stmt, ast.If): if isinstance(stmt, ast.If):
handle_if_allocation( if stmt.body:
module, local_sym_tab = allocate_mem(
builder, module,
stmt, builder,
func, stmt.body,
ret_type, func,
map_sym_tab, ret_type,
local_sym_tab, map_sym_tab,
structs_sym_tab, local_sym_tab,
) structs_sym_tab,
)
if stmt.orelse:
local_sym_tab = allocate_mem(
module,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign): elif isinstance(stmt, ast.Assign):
handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab) if len(stmt.targets) != 1:
logger.info("Unsupported multiassignment")
allocate_temp_pool(builder, max_temps_needed, local_sym_tab) continue
target = stmt.targets[0]
if not isinstance(target, ast.Name):
logger.info("Unsupported assignment target")
continue
var_name = target.id
rval = stmt.value
if var_name in local_sym_tab:
logger.info(f"Variable {var_name} already allocated")
continue
if isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"):
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(
f"Pre-allocated variable {var_name} of type {call_type}"
)
elif HelperHandlerRegistry.has_handler(call_type):
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for helper")
elif call_type == "deref" and len(rval.args) == 1:
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for deref")
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
var = builder.alloca(ir_type, name=var_name)
has_metadata = True
logger.info(
f"Pre-allocated variable {var_name} for struct {call_type}"
)
elif isinstance(rval.func, ast.Attribute):
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for map")
else:
logger.info("Unsupported assignment call function type")
continue
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
logger.info(f"Pre-allocated variable {var_name} of type c_bool")
elif isinstance(rval.value, int):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} of type c_int64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
logger.info(f"Pre-allocated variable {var_name} of type string")
else:
logger.info("Unsupported constant type")
continue
elif isinstance(rval, ast.BinOp):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} of type c_int64")
else:
logger.info("Unsupported assignment value type")
continue
if has_metadata:
local_sym_tab[var_name] = LocalSymbol(var, ir_type, call_type)
else:
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
return local_sym_tab return local_sym_tab
@ -391,6 +636,16 @@ def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_t
def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab): def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
"""
Process all BPF function chunks and generate LLVM IR.
Args:
tree: The Python AST (not used in current implementation)
module: The LLVM IR module to add functions to
chunks: List of AST function nodes decorated with @bpf
map_sym_tab: Map symbol table
structs_sym_tab: Struct symbol table
"""
for func_node in chunks: for func_node in chunks:
is_global = False is_global = False
for decorator in func_node.decorator_list: for decorator in func_node.decorator_list:
@ -416,6 +671,18 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
def infer_return_type(func_node: ast.FunctionDef): def infer_return_type(func_node: ast.FunctionDef):
"""
Infer the return type of a BPF function from annotations or return statements.
Args:
func_node: The AST function node
Returns:
String representation of the return type (e.g., 'c_int64')
Raises:
TypeError: If func_node is not a FunctionDef
"""
if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)): if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
raise TypeError("Expected ast.FunctionDef") raise TypeError("Expected ast.FunctionDef")
if func_node.returns is not None: if func_node.returns is not None:
@ -434,6 +701,7 @@ def infer_return_type(func_node: ast.FunctionDef):
found_type = None found_type = None
def _expr_type(e): def _expr_type(e):
"""Helper function to extract type from an expression."""
if e is None: if e is None:
return "None" return "None"
if isinstance(e, ast.Constant): if isinstance(e, ast.Constant):

View File

@ -1,3 +1,10 @@
"""
Utility functions for handling return statements in BPF functions.
Provides handlers for different types of returns including XDP actions,
None returns, and standard returns.
"""
import logging import logging
import ast import ast

View File

@ -1,3 +1,10 @@
"""
Global variables and compiler metadata processing.
This module handles BPF global variables and emits the @llvm.compiler.used
metadata to prevent LLVM from optimizing away important symbols.
"""
from llvmlite import ir from llvmlite import ir
import ast import ast
@ -12,6 +19,16 @@ global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module): def populate_global_symbol_table(tree, module: ir.Module):
"""
Populate the global symbol table with BPF functions, maps, and globals.
Args:
tree: The Python AST to scan for global symbols
module: The LLVM IR module (not used in current implementation)
Returns:
False (legacy return value)
"""
for node in tree.body: for node in tree.body:
if isinstance(node, ast.FunctionDef): if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list: for dec in node.decorator_list:
@ -33,6 +50,17 @@ def populate_global_symbol_table(tree, module: ir.Module):
def emit_global(module: ir.Module, node, name): def emit_global(module: ir.Module, node, name):
"""
Emit a BPF global variable into the LLVM IR module.
Args:
module: The LLVM IR module to add the global variable to
node: The AST function node containing the global definition
name: The name of the global variable
Returns:
The created global variable
"""
logger.info(f"global identifier {name} processing") logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return # deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name): if not isinstance(node.returns, ast.Name):
@ -117,7 +145,11 @@ def globals_processing(tree, module):
def emit_llvm_compiler_used(module: ir.Module, names: list[str]): def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
""" """
Emit the @llvm.compiler.used global given a list of function/global names. Emit the @llvm.compiler.used global to prevent LLVM from optimizing away symbols.
Args:
module: The LLVM IR module to add the compiler.used metadata to
names: List of function/global names that must be preserved
""" """
ptr_ty = ir.PointerType() ptr_ty = ir.PointerType()
used_array_ty = ir.ArrayType(ptr_ty, len(names)) used_array_ty = ir.ArrayType(ptr_ty, len(names))
@ -138,6 +170,13 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
def globals_list_creation(tree, module: ir.Module): def globals_list_creation(tree, module: ir.Module):
"""
Collect all BPF symbols and emit @llvm.compiler.used metadata.
Args:
tree: The Python AST to scan for symbols
module: The LLVM IR module to add metadata to
"""
collected = ["LICENSE"] collected = ["LICENSE"]
for node in tree.body: for node in tree.body:

View File

@ -1,10 +1,11 @@
from .helper_utils import HelperHandlerRegistry, reset_scratch_pool """BPF helper functions and handlers."""
from .helper_utils import HelperHandlerRegistry
from .bpf_helper_handler import handle_helper_call from .bpf_helper_handler import handle_helper_call
from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS
__all__ = [ __all__ = [
"HelperHandlerRegistry", "HelperHandlerRegistry",
"reset_scratch_pool",
"handle_helper_call", "handle_helper_call",
"ktime", "ktime",
"pid", "pid",

View File

@ -1,3 +1,11 @@
"""
BPF helper function handlers for LLVM IR emission.
This module provides handlers for various BPF helper functions, emitting
the appropriate LLVM IR to call kernel BPF helpers like map operations,
printing, time functions, etc.
"""
import ast import ast
from llvmlite import ir from llvmlite import ir
from enum import Enum from enum import Enum
@ -16,6 +24,7 @@ logger: Logger = logging.getLogger(__name__)
class BPFHelperID(Enum): class BPFHelperID(Enum):
"""Enumeration of BPF helper function IDs."""
BPF_MAP_LOOKUP_ELEM = 1 BPF_MAP_LOOKUP_ELEM = 1
BPF_MAP_UPDATE_ELEM = 2 BPF_MAP_UPDATE_ELEM = 2
BPF_MAP_DELETE_ELEM = 3 BPF_MAP_DELETE_ELEM = 3
@ -34,7 +43,6 @@ def bpf_ktime_get_ns_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
""" """
Emit LLVM IR for bpf_ktime_get_ns helper function call. Emit LLVM IR for bpf_ktime_get_ns helper function call.
@ -57,7 +65,6 @@ def bpf_map_lookup_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
""" """
Emit LLVM IR for bpf_map_lookup_elem helper function call. Emit LLVM IR for bpf_map_lookup_elem helper function call.
@ -66,17 +73,11 @@ def bpf_map_lookup_elem_emitter(
raise ValueError( raise ValueError(
f"Map lookup expects exactly one argument (key), got {len(call.args)}" f"Map lookup expects exactly one argument (key), got {len(call.args)}"
) )
key_ptr = get_or_create_ptr_from_arg( key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# TODO: I have changed the return type to i64*, as we are
# allocating space for that type in allocate_mem. This is
# temporary, and we will honour other widths later. But this
# allows us to have cool binary ops on the returned value.
fn_type = ir.FunctionType( fn_type = ir.FunctionType(
ir.PointerType(ir.IntType(64)), # Return type: void* ir.PointerType(), # Return type: void*
[ir.PointerType(), ir.PointerType()], # Args: (void*, void*) [ir.PointerType(), ir.PointerType()], # Args: (void*, void*)
var_arg=False, var_arg=False,
) )
@ -99,7 +100,6 @@ def bpf_printk_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
"""Emit LLVM IR for bpf_printk helper function call.""" """Emit LLVM IR for bpf_printk helper function call."""
if not hasattr(func, "_fmt_counter"): if not hasattr(func, "_fmt_counter"):
@ -147,7 +147,6 @@ def bpf_map_update_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
""" """
Emit LLVM IR for bpf_map_update_elem helper function call. Emit LLVM IR for bpf_map_update_elem helper function call.
@ -162,12 +161,8 @@ def bpf_map_update_elem_emitter(
value_arg = call.args[1] value_arg = call.args[1]
flags_arg = call.args[2] if len(call.args) > 2 else None flags_arg = call.args[2] if len(call.args) > 2 else None
key_ptr = get_or_create_ptr_from_arg( key_ptr = get_or_create_ptr_from_arg(key_arg, builder, local_sym_tab)
func, module, key_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab value_ptr = get_or_create_ptr_from_arg(value_arg, builder, local_sym_tab)
)
value_ptr = get_or_create_ptr_from_arg(
func, module, value_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
flags_val = get_flags_val(flags_arg, builder, local_sym_tab) flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -202,7 +197,6 @@ def bpf_map_delete_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
""" """
Emit LLVM IR for bpf_map_delete_elem helper function call. Emit LLVM IR for bpf_map_delete_elem helper function call.
@ -212,9 +206,7 @@ def bpf_map_delete_elem_emitter(
raise ValueError( raise ValueError(
f"Map delete expects exactly one argument (key), got {len(call.args)}" f"Map delete expects exactly one argument (key), got {len(call.args)}"
) )
key_ptr = get_or_create_ptr_from_arg( key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# Define function type for bpf_map_delete_elem # Define function type for bpf_map_delete_elem
@ -242,7 +234,6 @@ def bpf_get_current_pid_tgid_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
""" """
Emit LLVM IR for bpf_get_current_pid_tgid helper function call. Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
@ -269,8 +260,12 @@ def bpf_perf_event_output_handler(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None,
): ):
"""
Emit LLVM IR for bpf_perf_event_output helper function call.
This allows sending data to userspace via a perf event array.
"""
if len(call.args) != 1: if len(call.args) != 1:
raise ValueError( raise ValueError(
f"Perf event output expects exactly one argument, got {len(call.args)}" f"Perf event output expects exactly one argument, got {len(call.args)}"
@ -321,6 +316,7 @@ def handle_helper_call(
# Helper function to get map pointer and invoke handler # Helper function to get map pointer and invoke handler
def invoke_helper(method_name, map_ptr=None): def invoke_helper(method_name, map_ptr=None):
"""Helper function to look up and invoke a registered handler."""
handler = HelperHandlerRegistry.get_handler(method_name) handler = HelperHandlerRegistry.get_handler(method_name)
if not handler: if not handler:
raise NotImplementedError( raise NotImplementedError(
@ -334,7 +330,6 @@ def handle_helper_call(
func, func,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
map_sym_tab,
) )
# Handle direct function calls (e.g., print(), ktime()) # Handle direct function calls (e.g., print(), ktime())

View File

@ -1,10 +1,17 @@
"""
Utility functions for BPF helper function handling.
This module provides utility functions for processing BPF helper function
calls, including argument handling, string formatting for bpf_printk,
and a registry for helper function handlers.
"""
import ast import ast
import logging import logging
from collections.abc import Callable from collections.abc import Callable
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth from pythonbpf.expr import eval_expr
from pythonbpf.binary_ops import get_operand_value
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -19,6 +26,7 @@ class HelperHandlerRegistry:
"""Decorator to register a handler function for a helper""" """Decorator to register a handler function for a helper"""
def decorator(func): def decorator(func):
"""Decorator that registers the handler function."""
cls._handlers[helper_name] = func cls._handlers[helper_name] = func
return func return func
@ -35,88 +43,88 @@ class HelperHandlerRegistry:
return helper_name in cls._handlers return helper_name in cls._handlers
class ScratchPoolManager:
"""Manage the temporary helper variables in local_sym_tab"""
def __init__(self):
self._counter = 0
@property
def counter(self):
return self._counter
def reset(self):
self._counter = 0
logger.debug("Scratch pool counter reset to 0")
def get_next_temp(self, local_sym_tab):
temp_name = f"__helper_temp_{self._counter}"
self._counter += 1
if temp_name not in local_sym_tab:
raise ValueError(
f"Scratch pool exhausted or inadequate: {temp_name}. "
f"Current counter: {self._counter}"
)
return local_sym_tab[temp_name].var, temp_name
_temp_pool_manager = ScratchPoolManager() # Singleton instance
def reset_scratch_pool():
"""Reset the scratch pool counter"""
_temp_pool_manager.reset()
def get_var_ptr_from_name(var_name, local_sym_tab): def get_var_ptr_from_name(var_name, local_sym_tab):
"""Get a pointer to a variable from the symbol table.""" """
Get a pointer to a variable from the symbol table.
Args:
var_name: Name of the variable to look up
local_sym_tab: Local symbol table
Returns:
Pointer to the variable
Raises:
ValueError: If the variable is not found
"""
if local_sym_tab and var_name in local_sym_tab: if local_sym_tab and var_name in local_sym_tab:
return local_sym_tab[var_name].var return local_sym_tab[var_name].var
raise ValueError(f"Variable '{var_name}' not found in local symbol table") raise ValueError(f"Variable '{var_name}' not found in local symbol table")
def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64): def create_int_constant_ptr(value, builder, int_width=64):
"""Create a pointer to an integer constant.""" """
Create a pointer to an integer constant.
Args:
value: The integer value
builder: LLVM IR builder
int_width: Width of the integer in bits (default: 64)
Returns:
Pointer to the allocated integer constant
"""
# Default to 64-bit integer # Default to 64-bit integer
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab) int_type = ir.IntType(int_width)
logger.info(f"Using temp variable '{temp_name}' for int constant {value}") ptr = builder.alloca(int_type)
const_val = ir.Constant(ir.IntType(int_width), value) ptr.align = int_type.width // 8
builder.store(const_val, ptr) builder.store(ir.Constant(int_type, value), ptr)
return ptr return ptr
def get_or_create_ptr_from_arg( def get_or_create_ptr_from_arg(arg, builder, local_sym_tab):
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab=None """
): Extract or create pointer from call arguments.
"""Extract or create pointer from the call arguments."""
Args:
arg: The AST argument node
builder: LLVM IR builder
local_sym_tab: Local symbol table
Returns:
Pointer to the argument value
Raises:
NotImplementedError: If the argument type is not supported
"""
if isinstance(arg, ast.Name): if isinstance(arg, ast.Name):
ptr = get_var_ptr_from_name(arg.id, local_sym_tab) ptr = get_var_ptr_from_name(arg.id, local_sym_tab)
elif isinstance(arg, ast.Constant) and isinstance(arg.value, int): elif isinstance(arg, ast.Constant) and isinstance(arg.value, int):
ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab) ptr = create_int_constant_ptr(arg.value, builder)
else: else:
# Evaluate the expression and store the result in a temp variable raise NotImplementedError(
val = get_operand_value( "Only simple variable names are supported as args in map helpers."
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
) )
if val is None:
raise ValueError("Failed to evaluate expression for helper arg.")
# NOTE: We assume the result is an int64 for now
# if isinstance(arg, ast.Attribute):
# return val
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab)
logger.info(f"Using temp variable '{temp_name}' for expression result")
builder.store(val, ptr)
return ptr return ptr
def get_flags_val(arg, builder, local_sym_tab): def get_flags_val(arg, builder, local_sym_tab):
"""Extract or create flags value from the call arguments.""" """
Extract or create flags value from call arguments.
Args:
arg: The AST argument node for flags
builder: LLVM IR builder
local_sym_tab: Local symbol table
Returns:
Integer flags value or LLVM IR value
Raises:
ValueError: If a variable is not found in symbol table
NotImplementedError: If the argument type is not supported
"""
if not arg: if not arg:
return 0 return 0
@ -135,7 +143,18 @@ def get_flags_val(arg, builder, local_sym_tab):
def simple_string_print(string_value, module, builder, func): def simple_string_print(string_value, module, builder, func):
"""Prepare arguments for bpf_printk from a simple string value""" """
Prepare arguments for bpf_printk from a simple string value.
Args:
string_value: The string to print
module: LLVM IR module
builder: LLVM IR builder
func: The LLVM IR function being built
Returns:
List of arguments for bpf_printk
"""
fmt_str = string_value + "\n\0" fmt_str = string_value + "\n\0"
fmt_ptr = _create_format_string_global(fmt_str, func, module, builder) fmt_ptr = _create_format_string_global(fmt_str, func, module, builder)
@ -151,7 +170,23 @@ def handle_fstring_print(
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
): ):
"""Handle f-string formatting for bpf_printk emitter.""" """
Handle f-string formatting for bpf_printk emitter.
Args:
joined_str: AST JoinedStr node representing the f-string
module: LLVM IR module
builder: LLVM IR builder
func: The LLVM IR function being built
local_sym_tab: Local symbol table
struct_sym_tab: Struct symbol table
Returns:
List of arguments for bpf_printk
Raises:
NotImplementedError: If f-string contains unsupported value types
"""
fmt_parts = [] fmt_parts = []
exprs = [] exprs = []
@ -274,27 +309,10 @@ def _populate_fval(ftype, node, fmt_parts, exprs):
raise NotImplementedError( raise NotImplementedError(
f"Unsupported integer width in f-string: {ftype.width}" f"Unsupported integer width in f-string: {ftype.width}"
) )
elif isinstance(ftype, ir.PointerType): elif ftype == ir.PointerType(ir.IntType(8)):
target, depth = get_base_type_and_depth(ftype) # NOTE: We assume i8* is a string
if isinstance(target, ir.IntType): fmt_parts.append("%s")
if target.width == 64: exprs.append(node)
fmt_parts.append("%lld")
exprs.append(node)
elif target.width == 32:
fmt_parts.append("%d")
exprs.append(node)
elif target.width == 8 and depth == 1:
# NOTE: Assume i8* is a string
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
else:
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
else: else:
raise NotImplementedError(f"Unsupported field type in f-string: {ftype}") raise NotImplementedError(f"Unsupported field type in f-string: {ftype}")
@ -331,20 +349,7 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
if val: if val:
if isinstance(val.type, ir.PointerType): if isinstance(val.type, ir.PointerType):
target, depth = get_base_type_and_depth(val.type) val = builder.ptrtoint(val, ir.IntType(64))
if isinstance(target, ir.IntType):
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
val = builder.sext(val, ir.IntType(64))
elif target.width == 8 and depth == 1:
# NOTE: i8* is string, no need to deref
pass
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
elif isinstance(val.type, ir.IntType): elif isinstance(val.type, ir.IntType):
if val.type.width < 64: if val.type.width < 64:
val = builder.sext(val, ir.IntType(64)) val = builder.sext(val, ir.IntType(64))

View File

@ -1,11 +1,31 @@
"""
BPF helper function stubs for Python type hints.
This module provides Python stub functions that represent BPF helper functions.
These stubs are used for type checking and will be replaced with actual BPF
helper calls during compilation.
"""
import ctypes import ctypes
def ktime(): def ktime():
"""
Get the current kernel time in nanoseconds.
Returns:
A c_int64 stub value (actual implementation is in BPF runtime)
"""
return ctypes.c_int64(0) return ctypes.c_int64(0)
def pid(): def pid():
"""
Get the current process ID (PID).
Returns:
A c_int32 stub value (actual implementation is in BPF runtime)
"""
return ctypes.c_int32(0) return ctypes.c_int32(0)

View File

@ -1,3 +1,10 @@
"""
LICENSE global variable processing for BPF programs.
This module handles the processing of the LICENSE function which is required
for BPF programs to declare their license (typically "GPL").
"""
from llvmlite import ir from llvmlite import ir
import ast import ast
from logging import Logger from logging import Logger
@ -7,6 +14,16 @@ logger: Logger = logging.getLogger(__name__)
def emit_license(module: ir.Module, license_str: str): def emit_license(module: ir.Module, license_str: str):
"""
Emit a LICENSE global variable into the LLVM IR module.
Args:
module: The LLVM IR module to add the LICENSE variable to
license_str: The license string (e.g., 'GPL')
Returns:
The created global variable
"""
license_bytes = license_str.encode("utf8") + b"\x00" license_bytes = license_str.encode("utf8") + b"\x00"
elems = [ir.Constant(ir.IntType(8), b) for b in license_bytes] elems = [ir.Constant(ir.IntType(8), b) for b in license_bytes]
ty = ir.ArrayType(ir.IntType(8), len(elems)) ty = ir.ArrayType(ir.IntType(8), len(elems))

View File

@ -1,3 +1,5 @@
"""BPF map types and processing."""
from .maps import HashMap, PerfEventArray, RingBuf from .maps import HashMap, PerfEventArray, RingBuf
from .maps_pass import maps_proc from .maps_pass import maps_proc

View File

@ -1,18 +1,59 @@
"""
BPF map type definitions for Python type hints.
This module provides Python classes that represent BPF map types.
These are used for type checking and map definition; the actual BPF maps
are generated as LLVM IR during compilation.
"""
# This file provides type and function hints only and does not actually give any functionality. # This file provides type and function hints only and does not actually give any functionality.
class HashMap: class HashMap:
"""
A BPF hash map for storing key-value pairs.
This is a type hint class used during compilation. The actual BPF map
implementation is generated as LLVM IR.
"""
def __init__(self, key, value, max_entries): def __init__(self, key, value, max_entries):
"""
Initialize a HashMap definition.
Args:
key: The ctypes type for keys (e.g., c_int64)
value: The ctypes type for values (e.g., c_int64)
max_entries: Maximum number of entries the map can hold
"""
self.key = key self.key = key
self.value = value self.value = value
self.max_entries = max_entries self.max_entries = max_entries
self.entries = {} self.entries = {}
def lookup(self, key): def lookup(self, key):
"""
Look up a value by key in the map.
Args:
key: The key to look up
Returns:
The value if found, None otherwise
"""
if key in self.entries: if key in self.entries:
return self.entries[key] return self.entries[key]
else: else:
return None return None
def delete(self, key): def delete(self, key):
"""
Delete an entry from the map by key.
Args:
key: The key to delete
Raises:
KeyError: If the key is not found in the map
"""
if key in self.entries: if key in self.entries:
del self.entries[key] del self.entries[key]
else: else:
@ -20,6 +61,17 @@ class HashMap:
# TODO: define the flags that can be added # TODO: define the flags that can be added
def update(self, key, value, flags=None): def update(self, key, value, flags=None):
"""
Update or insert a key-value pair in the map.
Args:
key: The key to update
value: The new value
flags: Optional flags for update behavior
Raises:
KeyError: If the key is not found in the map
"""
if key in self.entries: if key in self.entries:
self.entries[key] = value self.entries[key] = value
else: else:
@ -27,25 +79,76 @@ class HashMap:
class PerfEventArray: class PerfEventArray:
"""
A BPF perf event array for sending data to userspace.
This is a type hint class used during compilation.
"""
def __init__(self, key_size, value_size): def __init__(self, key_size, value_size):
"""
Initialize a PerfEventArray definition.
Args:
key_size: The size/type for keys
value_size: The size/type for values
"""
self.key_type = key_size self.key_type = key_size
self.value_type = value_size self.value_type = value_size
self.entries = {} self.entries = {}
def output(self, data): def output(self, data):
"""
Output data to the perf event array.
Args:
data: The data to output
"""
pass # Placeholder for output method pass # Placeholder for output method
class RingBuf: class RingBuf:
"""
A BPF ring buffer for efficient data transfer to userspace.
This is a type hint class used during compilation.
"""
def __init__(self, max_entries): def __init__(self, max_entries):
"""
Initialize a RingBuf definition.
Args:
max_entries: Maximum number of entries the ring buffer can hold
"""
self.max_entries = max_entries self.max_entries = max_entries
def reserve(self, size: int, flags=0): def reserve(self, size: int, flags=0):
"""
Reserve space in the ring buffer.
Args:
size: Size in bytes to reserve
flags: Optional reservation flags
Returns:
0 as a placeholder (actual implementation is in BPF runtime)
Raises:
ValueError: If size exceeds max_entries
"""
if size > self.max_entries: if size > self.max_entries:
raise ValueError("size cannot be greater than set maximum entries") raise ValueError("size cannot be greater than set maximum entries")
return 0 return 0
def submit(self, data, flags=0): def submit(self, data, flags=0):
"""
Submit data to the ring buffer.
Args:
data: The data to submit
flags: Optional submission flags
"""
pass pass
# add discard, output and also give names to flags and stuff # add discard, output and also give names to flags and stuff

View File

@ -1,3 +1,10 @@
"""
BPF map processing and LLVM IR generation.
This module handles the processing of BPF map definitions decorated with @map,
converting them to appropriate LLVM IR global variables with BTF debug info.
"""
import ast import ast
from logging import Logger from logging import Logger
from llvmlite import ir from llvmlite import ir
@ -20,6 +27,15 @@ def maps_proc(tree, module, chunks):
def is_map(func_node): def is_map(func_node):
"""
Check if a function node is decorated with @map.
Args:
func_node: The AST function node to check
Returns:
True if the function is decorated with @map, False otherwise
"""
return any( return any(
isinstance(decorator, ast.Name) and decorator.id == "map" isinstance(decorator, ast.Name) and decorator.id == "map"
for decorator in func_node.decorator_list for decorator in func_node.decorator_list
@ -27,6 +43,7 @@ def is_map(func_node):
class BPFMapType(Enum): class BPFMapType(Enum):
"""Enumeration of BPF map types."""
UNSPEC = 0 UNSPEC = 0
HASH = 1 HASH = 1
ARRAY = 2 ARRAY = 2
@ -65,7 +82,17 @@ class BPFMapType(Enum):
def create_bpf_map(module, map_name, map_params): def create_bpf_map(module, map_name, map_params):
"""Create a BPF map in the module with given parameters and debug info""" """
Create a BPF map in the module with given parameters and debug info.
Args:
module: The LLVM IR module to add the map to
map_name: The name of the BPF map
map_params: Dictionary of map parameters (type, key_size, value_size, max_entries)
Returns:
The created global variable representing the map
"""
# Create the anonymous struct type for BPF map # Create the anonymous struct type for BPF map
map_struct_type = ir.LiteralStructType( map_struct_type = ir.LiteralStructType(

View File

@ -1,3 +1,5 @@
"""Registry for BPF map processor functions."""
from collections.abc import Callable from collections.abc import Callable
from typing import Any from typing import Any
@ -12,6 +14,7 @@ class MapProcessorRegistry:
"""Decorator to register a processor function for a map type""" """Decorator to register a processor function for a map type"""
def decorator(func): def decorator(func):
"""Decorator that registers the processor function."""
cls._processors[map_type_name] = func cls._processors[map_type_name] = func
return func return func

View File

@ -1,3 +1,5 @@
"""Struct processing for BPF programs."""
from .structs_pass import structs_proc from .structs_pass import structs_proc
__all__ = ["structs_proc"] __all__ = ["structs_proc"]

View File

@ -1,19 +1,72 @@
"""
Struct type wrapper for BPF structs.
This module provides a wrapper class for LLVM IR struct types with
helper methods for field access and manipulation.
"""
from llvmlite import ir from llvmlite import ir
class StructType: class StructType:
"""
Wrapper class for LLVM IR struct types with field access helpers.
Attributes:
ir_type: The LLVM IR struct type
fields: Dictionary mapping field names to their types
size: Total size of the struct in bytes
"""
def __init__(self, ir_type, fields, size): def __init__(self, ir_type, fields, size):
"""
Initialize a StructType.
Args:
ir_type: The LLVM IR struct type
fields: Dictionary mapping field names to their types
size: Total size of the struct in bytes
"""
self.ir_type = ir_type self.ir_type = ir_type
self.fields = fields self.fields = fields
self.size = size self.size = size
def field_idx(self, field_name): def field_idx(self, field_name):
"""
Get the index of a field in the struct.
Args:
field_name: The name of the field
Returns:
The zero-based index of the field
"""
return list(self.fields.keys()).index(field_name) return list(self.fields.keys()).index(field_name)
def field_type(self, field_name): def field_type(self, field_name):
"""
Get the LLVM IR type of a field.
Args:
field_name: The name of the field
Returns:
The LLVM IR type of the field
"""
return self.fields[field_name] return self.fields[field_name]
def gep(self, builder, ptr, field_name): def gep(self, builder, ptr, field_name):
"""
Generate a GEP (GetElementPtr) instruction to access a struct field.
Args:
builder: LLVM IR builder
ptr: Pointer to the struct
field_name: Name of the field to access
Returns:
A pointer to the field
"""
idx = self.field_idx(field_name) idx = self.field_idx(field_name)
return builder.gep( return builder.gep(
ptr, ptr,
@ -22,6 +75,18 @@ class StructType:
) )
def field_size(self, field_name): def field_size(self, field_name):
"""
Calculate the size of a field in bytes.
Args:
field_name: The name of the field
Returns:
The size of the field in bytes
Raises:
TypeError: If the field type is not supported
"""
fld = self.fields[field_name] fld = self.fields[field_name]
if isinstance(fld, ir.ArrayType): if isinstance(fld, ir.ArrayType):
return fld.count * (fld.element.width // 8) return fld.count * (fld.element.width // 8)

View File

@ -1,3 +1,10 @@
"""
BPF struct processing and LLVM IR type generation.
This module handles the processing of Python classes decorated with @struct,
converting them to LLVM IR struct types for use in BPF programs.
"""
import ast import ast
import logging import logging
from llvmlite import ir from llvmlite import ir
@ -26,6 +33,15 @@ def structs_proc(tree, module, chunks):
def is_bpf_struct(cls_node): def is_bpf_struct(cls_node):
"""
Check if a class node is decorated with @struct.
Args:
cls_node: The AST class node to check
Returns:
True if the class is decorated with @struct, False otherwise
"""
return any( return any(
isinstance(decorator, ast.Name) and decorator.id == "struct" isinstance(decorator, ast.Name) and decorator.id == "struct"
for decorator in cls_node.decorator_list for decorator in cls_node.decorator_list
@ -33,7 +49,16 @@ def is_bpf_struct(cls_node):
def process_bpf_struct(cls_node, module): def process_bpf_struct(cls_node, module):
"""Process a single BPF struct definition""" """
Process a single BPF struct definition and create its LLVM IR representation.
Args:
cls_node: The AST class node representing the struct
module: The LLVM IR module (not used in current implementation)
Returns:
A StructType object containing the struct's type information
"""
fields = parse_struct_fields(cls_node) fields = parse_struct_fields(cls_node)
field_types = list(fields.values()) field_types = list(fields.values())
@ -44,7 +69,18 @@ def process_bpf_struct(cls_node, module):
def parse_struct_fields(cls_node): def parse_struct_fields(cls_node):
"""Parse fields of a struct class node""" """
Parse fields of a struct class node.
Args:
cls_node: The AST class node representing the struct
Returns:
A dictionary mapping field names to their LLVM IR types
Raises:
TypeError: If a field has an unsupported type annotation
"""
fields = {} fields = {}
for item in cls_node.body: for item in cls_node.body:
@ -57,7 +93,18 @@ def parse_struct_fields(cls_node):
def get_type_from_ann(annotation): def get_type_from_ann(annotation):
"""Convert an AST annotation node to an LLVM IR type for struct fields""" """
Convert an AST annotation node to an LLVM IR type for struct fields.
Args:
annotation: The AST annotation node (e.g., c_int64, str(32))
Returns:
The corresponding LLVM IR type
Raises:
TypeError: If the annotation type is not supported
"""
if isinstance(annotation, ast.Call) and isinstance(annotation.func, ast.Name): if isinstance(annotation, ast.Call) and isinstance(annotation.func, ast.Name):
if annotation.func.id == "str": if annotation.func.id == "str":
# Char array # Char array
@ -72,7 +119,15 @@ def get_type_from_ann(annotation):
def calc_struct_size(field_types): def calc_struct_size(field_types):
"""Calculate total size of the struct with alignment and padding""" """
Calculate total size of the struct with alignment and padding.
Args:
field_types: List of LLVM IR types for each field
Returns:
The total size of the struct in bytes
"""
curr_offset = 0 curr_offset = 0
for ftype in field_types: for ftype in field_types:
if isinstance(ftype, ir.IntType): if isinstance(ftype, ir.IntType):

View File

@ -1,3 +1,10 @@
"""
Type mapping from Python ctypes to LLVM IR types.
This module provides utilities to convert Python ctypes type names
to their corresponding LLVM IR representations.
"""
from llvmlite import ir from llvmlite import ir
# TODO: THIS IS NOT SUPPOSED TO MATCH STRINGS :skull: # TODO: THIS IS NOT SUPPOSED TO MATCH STRINGS :skull:
@ -19,10 +26,31 @@ mapping = {
def ctypes_to_ir(ctype: str): def ctypes_to_ir(ctype: str):
"""
Convert a ctypes type name to its corresponding LLVM IR type.
Args:
ctype: String name of the ctypes type (e.g., 'c_int64', 'c_void_p')
Returns:
The corresponding LLVM IR type
Raises:
NotImplementedError: If the ctype is not supported
"""
if ctype in mapping: if ctype in mapping:
return mapping[ctype] return mapping[ctype]
raise NotImplementedError(f"No mapping for {ctype}") raise NotImplementedError(f"No mapping for {ctype}")
def is_ctypes(ctype: str) -> bool: def is_ctypes(ctype: str) -> bool:
"""
Check if a given type name is a supported ctypes type.
Args:
ctype: String name of the type to check
Returns:
True if the type is a supported ctypes type, False otherwise
"""
return ctype in mapping return ctype in mapping

View File

@ -1,39 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: This example tries to reinterpret the variable `x` to a different type.
# We do not allow this for now, as stack allocations are typed and have to be
# done in the first basic block. Allowing re-interpretation would require
# re-allocation of stack space (possibly in a new basic block), which is not
# supported in eBPF yet.
# We can allow bitcasts in cases where the width of the types is the same in
# the future. But for now, we do not allow any re-interpretation of variables.
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
x = last.lookup(0)
x = 20
if x == 2:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,69 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, struct
from ctypes import c_void_p, c_int64, c_int32, c_uint64
from pythonbpf.maps import HashMap
from pythonbpf.helper import ktime
# NOTE: This is a comprehensive test combining struct, helper, and map features
# Please note that at line 50, though we have used an absurd expression to test
# the compiler, it is recommended to use named variables to reduce the amount of
# scratch space that needs to be allocated.
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
dat.pid = 123
dat.pid = dat.pid + 1
print(f"pid is {dat.pid}")
tu = 9
last.update(0, tu)
last.update(1, -last.lookup(0))
x = last.lookup(0)
print(f"Map value at index 0: {x}")
x = x + c_int32(1)
print(f"x after adding 32-bit 1 is {x}")
x = ktime() - 121
print(f"ktime - 121 is {x}")
x = last.lookup(0)
x = x + 1
print(f"x is {x}")
if x == 10:
jat = data_t()
jat.ts = 456
print(f"Hello, World!, ts is {jat.ts}")
a = last.lookup(0)
print(f"a is {a}")
last.update(9, 9)
last.update(0, last.lookup(last.lookup(0)) +
last.lookup(last.lookup(0)) + last.lookup(last.lookup(0)))
z = last.lookup(0)
print(f"new map val at index 0 is {z}")
else:
a = last.lookup(0)
print("Goodbye, World!")
c = last.lookup(1 - 1)
print(f"c is {c}")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,27 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 1
print(f"Initial x: {x}")
a = 20
x = a
print(f"Updated x with a: {x}")
x = (x + x) * 3
if x == 2:
print("Hello, World!")
else:
print(f"Goodbye, World! {x}")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,34 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: An example of i64** assignment with binops on the RHS
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
x = last.lookup(0)
print(f"{x}")
x = x + 1
if x == 2:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,40 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile, struct
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.helper import ktime
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
dat.pid = 123
dat.pid = dat.pid + 1
print(f"pid is {dat.pid}")
x = ktime() - 121
print(f"ktime is {x}")
x = 1
x = x + 1
print(f"x is {x}")
if x == 2:
jat = data_t()
jat.ts = 456
print(f"Hello, World!, ts is {jat.ts}")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -243,6 +243,17 @@ class BTFConverter:
data data
) )
# below to replace those c_bool with bitfield greater than 8
def repl(m):
name, bits = m.groups()
return f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
data = re.sub(
r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)",
repl,
data
)
# Remove ctypes. prefix from invalid entries # Remove ctypes. prefix from invalid entries
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"] invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
for name in invalid_ctypes: for name in invalid_ctypes: