3 Commits

77 changed files with 189456 additions and 236710 deletions

View File

@ -12,8 +12,8 @@ jobs:
name: Format name: Format
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v4
- uses: actions/setup-python@v6 - uses: actions/setup-python@v5
with: with:
python-version: "3.x" python-version: "3.x"
- uses: pre-commit/action@v3.0.1 - uses: pre-commit/action@v3.0.1

View File

@ -21,7 +21,7 @@ ci:
repos: repos:
# Standard hooks # Standard hooks
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0 rev: v4.6.0
hooks: hooks:
- id: check-added-large-files - id: check-added-large-files
- id: check-case-conflict - id: check-case-conflict
@ -36,19 +36,19 @@ repos:
- id: trailing-whitespace - id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.13.2" rev: "v0.4.2"
hooks: hooks:
- id: ruff - id: ruff
args: ["--fix", "--show-fixes"] args: ["--fix", "--show-fixes"]
- id: ruff-format - id: ruff-format
exclude: ^(docs)|^(tests)|^(examples) exclude: ^(tests/|examples/|docs/)
# Checking static types # Checking static types
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.18.2" rev: "v1.10.0"
hooks: hooks:
- id: mypy - id: mypy
exclude: ^(tests)|^(examples) exclude: ^(tests/|examples/)
additional_dependencies: [types-setuptools] additional_dependencies: [types-setuptools]
# Changes tabs to spaces # Changes tabs to spaces

View File

@ -1,13 +1,13 @@
## Short term ## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF - Implement enough functionality to port the BCC tutorial examples in PythonBPF
- Static Typing
- Add all maps - Add all maps
- XDP support in pylibbpf - XDP support in pylibbpf
- ringbuf support - ringbuf support
- Add oneline IfExpr conditionals (wishlist) - recursive expression resolution
## Long term ## Long term
- Refactor the codebase to be better than a hackathon project - Refactor the codebase to be better than a hackathon project
- Port to C++ and use actual LLVM? - Port to C++ and use actual LLVM?
- Fix struct_kioctx issue in the vmlinux transpiler

View File

@ -12,7 +12,7 @@
"from pythonbpf import bpf, map, section, bpfglobal, BPF\n", "from pythonbpf import bpf, map, section, bpfglobal, BPF\n",
"from pythonbpf.helper import pid\n", "from pythonbpf.helper import pid\n",
"from pythonbpf.maps import HashMap\n", "from pythonbpf.maps import HashMap\n",
"from pylibbpf import BpfMap\n", "from pylibbpf import *\n",
"from ctypes import c_void_p, c_int64, c_uint64, c_int32\n", "from ctypes import c_void_p, c_int64, c_uint64, c_int32\n",
"import matplotlib.pyplot as plt" "import matplotlib.pyplot as plt"
] ]

View File

@ -22,9 +22,5 @@ def LICENSE() -> str:
b = BPF() b = BPF()
b.load_and_attach() b.load_and_attach()
if b.is_loaded() and b.is_attached():
print("Successfully loaded and attached")
else:
print("Could not load successfully")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of the execve syscall. # Now cat /sys/kernel/debug/tracing/trace_pipe to see results of the execve syscall.

View File

@ -1,27 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, BPF
from ctypes import c_void_p, c_int64
@bpf
@section("kretprobe/do_unlinkat")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@section("kprobe/do_unlinkat")
def hello_world2(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
while True:
print("running")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of unlink kprobe.

View File

@ -21,17 +21,17 @@ def last() -> HashMap:
@section("tracepoint/syscalls/sys_enter_sync") @section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64: def do_trace(ctx: c_void_p) -> c_int64:
key = 0 key = 0
tsp = last.lookup(key) tsp = last().lookup(key)
if tsp: if tsp:
kt = ktime() kt = ktime()
delta = kt - tsp delta = kt - tsp
if delta < 1000000000: if delta < 1000000000:
time_ms = delta // 1000000 time_ms = delta // 1000000
print(f"sync called within last second, last {time_ms} ms ago") print(f"sync called within last second, last {time_ms} ms ago")
last.delete(key) last().delete(key)
else: else:
kt = ktime() kt = ktime()
last.update(key, kt) last().update(key, kt)
return c_int64(0) return c_int64(0)

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "pythonbpf" name = "pythonbpf"
version = "0.1.4" version = "0.1.3"
description = "Reduced Python frontend for eBPF" description = "Reduced Python frontend for eBPF"
authors = [ authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },

View File

@ -1,191 +0,0 @@
import ast
import logging
from llvmlite import ir
from dataclasses import dataclass
from typing import Any
from pythonbpf.helper import HelperHandlerRegistry
from pythonbpf.type_deducer import ctypes_to_ir
logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata
def _is_helper_call(call_node):
"""Check if a call node is a BPF helper function call."""
if isinstance(call_node.func, ast.Name):
# Exclude print from requiring temps (handles f-strings differently)
func_name = call_node.func.id
return HelperHandlerRegistry.has_handler(func_name) and func_name != "print"
elif isinstance(call_node.func, ast.Attribute):
return HelperHandlerRegistry.has_handler(call_node.func.attr)
return False
def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab):
"""Handle memory allocation for assignment statements."""
# Validate assignment
if len(stmt.targets) != 1:
logger.warning("Multi-target assignment not supported, skipping allocation")
return
target = stmt.targets[0]
# Skip non-name targets (e.g., struct field assignments)
if isinstance(target, ast.Attribute):
logger.debug(f"Struct field assignment to {target.attr}, no allocation needed")
return
if not isinstance(target, ast.Name):
logger.warning(f"Unsupported assignment target type: {type(target).__name__}")
return
var_name = target.id
rval = stmt.value
# Skip if already allocated
if var_name in local_sym_tab:
logger.debug(f"Variable {var_name} already allocated, skipping")
return
# Determine type and allocate based on rval
if isinstance(rval, ast.Call):
_allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab)
elif isinstance(rval, ast.Constant):
_allocate_for_constant(builder, var_name, rval, local_sym_tab)
elif isinstance(rval, ast.BinOp):
_allocate_for_binop(builder, var_name, local_sym_tab)
else:
logger.warning(
f"Unsupported assignment value type for {var_name}: {type(rval).__name__}"
)
def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab):
"""Allocate memory for variable assigned from a call."""
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
# C type constructors
if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"):
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as {call_type}")
# Helper functions
elif HelperHandlerRegistry.has_handler(call_type):
ir_type = ir.IntType(64) # Assume i64 return type
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for helper {call_type}")
# Deref function
elif call_type == "deref":
ir_type = ir.IntType(64) # Assume i64 return type
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for deref")
# Struct constructors
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
var = builder.alloca(struct_info.ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type)
logger.info(f"Pre-allocated {var_name} for struct {call_type}")
else:
logger.warning(f"Unknown call type for allocation: {call_type}")
elif isinstance(rval.func, ast.Attribute):
# Map method calls - need double allocation for ptr handling
_allocate_for_map_method(builder, var_name, local_sym_tab)
else:
logger.warning(f"Unsupported call function type for {var_name}")
def _allocate_for_map_method(builder, var_name, local_sym_tab):
"""Allocate memory for variable assigned from map method (double alloc)."""
# Main variable (pointer to pointer)
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
# Temporary variable for computed values
tmp_ir_type = ir.IntType(64)
var_tmp = builder.alloca(tmp_ir_type, name=f"{var_name}_tmp")
local_sym_tab[f"{var_name}_tmp"] = LocalSymbol(var_tmp, tmp_ir_type)
logger.info(f"Pre-allocated {var_name} and {var_name}_tmp for map method")
def _allocate_for_constant(builder, var_name, rval, local_sym_tab):
"""Allocate memory for variable assigned from a constant."""
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as bool")
elif isinstance(rval.value, int):
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as i64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} as string")
else:
logger.warning(
f"Unsupported constant type for {var_name}: {type(rval.value).__name__}"
)
def _allocate_for_binop(builder, var_name, local_sym_tab):
"""Allocate memory for variable assigned from a binary operation."""
ir_type = ir.IntType(64) # Assume i64 result
var = builder.alloca(ir_type, name=var_name)
var.align = 8
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
logger.info(f"Pre-allocated {var_name} for binop result")
def allocate_temp_pool(builder, max_temps, local_sym_tab):
"""Allocate the temporary scratch space pool for helper arguments."""
if max_temps == 0:
return
logger.info(f"Allocating temp pool of {max_temps} variables")
for i in range(max_temps):
temp_name = f"__helper_temp_{i}"
temp_var = builder.alloca(ir.IntType(64), name=temp_name)
temp_var.align = 8
local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64))

View File

@ -1,108 +0,0 @@
import ast
import logging
from llvmlite import ir
from pythonbpf.expr import eval_expr
logger = logging.getLogger(__name__)
def handle_struct_field_assignment(
func, module, builder, target, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle struct field assignment (obj.field = value)."""
var_name = target.value.id
field_name = target.attr
if var_name not in local_sym_tab:
logger.error(f"Variable '{var_name}' not found in symbol table")
return
struct_type = local_sym_tab[var_name].metadata
struct_info = structs_sym_tab[struct_type]
if field_name not in struct_info.fields:
logger.error(f"Field '{field_name}' not found in struct '{struct_type}'")
return
# Get field pointer and evaluate value
field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name)
val = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val is None:
logger.error(f"Failed to evaluate value for {var_name}.{field_name}")
return
# TODO: Handle string assignment to char array (not a priority)
field_type = struct_info.field_type(field_name)
if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)):
logger.warning(
f"String to char array assignment not implemented for {var_name}.{field_name}"
)
return
# Store the value
builder.store(val[0], field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}")
def handle_variable_assignment(
func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab
):
"""Handle single named variable assignment."""
if var_name not in local_sym_tab:
logger.error(f"Variable {var_name} not declared.")
return False
var_ptr = local_sym_tab[var_name].var
var_type = local_sym_tab[var_name].ir_type
# NOTE: Special case for struct initialization
if isinstance(rval, ast.Call) and isinstance(rval.func, ast.Name):
struct_name = rval.func.id
if struct_name in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[struct_name]
ir_struct = struct_info.ir_type
builder.store(ir.Constant(ir_struct, None), var_ptr)
logger.info(f"Initialized struct {struct_name} for variable {var_name}")
return True
val_result = eval_expr(
func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab
)
if val_result is None:
logger.error(f"Failed to evaluate value for {var_name}")
return False
val, val_type = val_result
logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}")
if val_type != var_type:
if isinstance(val_type, ir.IntType) and isinstance(var_type, ir.IntType):
# Allow implicit int widening
if val_type.width < var_type.width:
val = builder.sext(val, var_type)
logger.info(f"Implicitly widened int for variable {var_name}")
elif val_type.width > var_type.width:
val = builder.trunc(val, var_type)
logger.info(f"Implicitly truncated int for variable {var_name}")
elif isinstance(val_type, ir.IntType) and isinstance(var_type, ir.PointerType):
# NOTE: This is assignment to a PTR_TO_MAP_VALUE_OR_NULL
logger.info(
f"Creating temporary variable for pointer assignment to {var_name}"
)
var_ptr_tmp = local_sym_tab[f"{var_name}_tmp"].var
builder.store(val, var_ptr_tmp)
val = var_ptr_tmp
else:
logger.error(
f"Type mismatch for variable {var_name}: {val_type} vs {var_type}"
)
return False
builder.store(val, var_ptr)
logger.info(f"Assigned value to variable {var_name}")
return True

View File

@ -1,110 +1,71 @@
import ast import ast
from llvmlite import ir from llvmlite import ir
from logging import Logger
import logging
from pythonbpf.expr import get_base_type_and_depth, deref_to_depth, eval_expr
logger: Logger = logging.getLogger(__name__)
def get_operand_value( def recursive_dereferencer(var, builder):
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None """dereference until primitive type comes out"""
): if var.type == ir.PointerType(ir.PointerType(ir.IntType(64))):
"""Extract the value from an operand, handling variables and constants.""" a = builder.load(var)
logger.info(f"Getting operand value for: {ast.dump(operand)}") return recursive_dereferencer(a, builder)
if isinstance(operand, ast.Name): elif var.type == ir.PointerType(ir.IntType(64)):
if operand.id in local_sym_tab: a = builder.load(var)
var = local_sym_tab[operand.id].var return recursive_dereferencer(a, builder)
var_type = var.type elif var.type == ir.IntType(64):
base_type, depth = get_base_type_and_depth(var_type) return var
logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}")
val = deref_to_depth(func, builder, var, depth)
return val
raise ValueError(f"Undefined variable: {operand.id}")
elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int):
cst = ir.Constant(ir.IntType(64), int(operand.value))
return cst
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
res = handle_binary_op_impl(
func, module, operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
return res
else: else:
res = eval_expr( raise TypeError(f"Unsupported type for dereferencing: {var.type}")
func, module, builder, operand, local_sym_tab, map_sym_tab, structs_sym_tab
)
if res is None:
raise ValueError(f"Failed to evaluate call expression: {operand}")
val, _ = res
logger.info(f"Evaluated expr to {val} of type {val.type}")
base_type, depth = get_base_type_and_depth(val.type)
if depth > 0:
val = deref_to_depth(func, builder, val, depth)
return val
raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl( def handle_binary_op(rval, module, builder, var_name, local_sym_tab, map_sym_tab, func):
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab=None print(module)
): left = rval.left
right = rval.right
op = rval.op op = rval.op
left = get_operand_value(
func, module, rval.left, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
right = get_operand_value(
func, module, rval.right, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
logger.info(f"left is {left}, right is {right}, op is {op}")
# NOTE: Before doing the operation, if the operands are integers # Handle left operand
# we always extend them to i64. The assignment to LHS will take if isinstance(left, ast.Name):
# care of truncation if needed. if left.id in local_sym_tab:
if isinstance(left.type, ir.IntType) and left.type.width < 64: left = recursive_dereferencer(local_sym_tab[left.id][0], builder)
left = builder.sext(left, ir.IntType(64)) else:
if isinstance(right.type, ir.IntType) and right.type.width < 64: raise SyntaxError(f"Undefined variable: {left.id}")
right = builder.sext(right, ir.IntType(64)) elif isinstance(left, ast.Constant):
left = ir.Constant(ir.IntType(64), left.value)
else:
raise SyntaxError("Unsupported left operand type")
# Map AST operation nodes to LLVM IR builder methods if isinstance(right, ast.Name):
op_map = { if right.id in local_sym_tab:
ast.Add: builder.add, right = recursive_dereferencer(local_sym_tab[right.id][0], builder)
ast.Sub: builder.sub, else:
ast.Mult: builder.mul, raise SyntaxError(f"Undefined variable: {right.id}")
ast.Div: builder.sdiv, elif isinstance(right, ast.Constant):
ast.Mod: builder.srem, right = ir.Constant(ir.IntType(64), right.value)
ast.LShift: builder.shl, else:
ast.RShift: builder.lshr, raise SyntaxError("Unsupported right operand type")
ast.BitOr: builder.or_,
ast.BitXor: builder.xor,
ast.BitAnd: builder.and_,
ast.FloorDiv: builder.udiv,
}
if type(op) in op_map: print(f"left is {left}, right is {right}, op is {op}")
result = op_map[type(op)](left, right)
return result if isinstance(op, ast.Add):
builder.store(builder.add(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Sub):
builder.store(builder.sub(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Mult):
builder.store(builder.mul(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Div):
builder.store(builder.sdiv(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Mod):
builder.store(builder.srem(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.LShift):
builder.store(builder.shl(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.RShift):
builder.store(builder.lshr(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.BitOr):
builder.store(builder.or_(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.BitXor):
builder.store(builder.xor(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.BitAnd):
builder.store(builder.and_(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.FloorDiv):
builder.store(builder.udiv(left, right), local_sym_tab[var_name][0])
else: else:
raise SyntaxError("Unsupported binary operation") raise SyntaxError("Unsupported binary operation")
def handle_binary_op(
func,
module,
rval,
builder,
var_name,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
result = handle_binary_op_impl(
func, module, rval, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
if var_name and var_name in local_sym_tab:
logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}"
)
builder.store(result, local_sym_tab[var_name].var)
return result, result.type

View File

@ -1,27 +1,19 @@
import ast import ast
from llvmlite import ir from llvmlite import ir
from .license_pass import license_processing from .license_pass import license_processing
from .functions import func_proc from .functions_pass import func_proc
from .maps import maps_proc from .maps import maps_proc
from .structs import structs_proc from .structs import structs_proc
from .globals_pass import ( from .globals_pass import globals_processing
globals_list_creation, from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum
globals_processing,
populate_global_symbol_table,
)
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os import os
import subprocess import subprocess
import inspect import inspect
from pathlib import Path from pathlib import Path
from pylibbpf import BpfProgram from pylibbpf import BpfProgram
import tempfile import tempfile
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__) VERSION = "v0.1.3"
VERSION = "v0.1.4"
def find_bpf_chunks(tree): def find_bpf_chunks(tree):
@ -38,27 +30,21 @@ def find_bpf_chunks(tree):
def processor(source_code, filename, module): def processor(source_code, filename, module):
tree = ast.parse(source_code, filename) tree = ast.parse(source_code, filename)
logger.debug(ast.dump(tree, indent=4)) print(ast.dump(tree, indent=4))
bpf_chunks = find_bpf_chunks(tree) bpf_chunks = find_bpf_chunks(tree)
for func_node in bpf_chunks: for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}") print(f"Found BPF function/struct: {func_node.name}")
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks) structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
globals_list_creation(tree, module) license_processing(tree, module)
globals_processing(tree, module)
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): def compile_to_ir(filename: str, output: str):
logging.basicConfig(
level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
with open(filename) as f: with open(filename) as f:
source = f.read() source = f.read()
@ -67,17 +53,33 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.triple = "bpf" module.triple = "bpf"
if not hasattr(module, "_debug_compile_unit"): if not hasattr(module, "_debug_compile_unit"):
debug_generator = DebugInfoGenerator(module) module._file_metadata = module.add_debug_info(
debug_generator.generate_file_metadata(filename, os.path.dirname(filename)) "DIFile",
debug_generator.generate_debug_cu( { # type: ignore
DW_LANG_C11, "filename": filename,
f"PythonBPF {VERSION}", "directory": os.path.dirname(filename),
True, # TODO: This is probably not true },
# TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might
# be required for kprobes.
True,
) )
module._debug_compile_unit = module.add_debug_info(
"DICompileUnit",
{ # type: ignore
"language": DW_LANG_C11,
"file": module._file_metadata, # type: ignore
"producer": f"PythonBPF {VERSION}",
"isOptimized": True, # TODO: This is probably not true
# TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might
# be required for kprobes.
"runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
"nameTableKind": 0,
},
is_distinct=True,
)
module.add_named_metadata("llvm.dbg.cu", module._debug_compile_unit) # type: ignore
processor(source, filename, module) processor(source, filename, module)
wchar_size = module.add_metadata( wchar_size = module.add_metadata(
@ -119,7 +121,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
logger.info(f"IR written to {output}") print(f"IR written to {output}")
with open(output, "w") as f: with open(output, "w") as f:
f.write(f'source_filename = "{filename}"\n') f.write(f'source_filename = "{filename}"\n')
f.write(str(module)) f.write(str(module))
@ -128,7 +130,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
return output return output
def compile(loglevel=logging.INFO) -> bool: def compile() -> bool:
# Look one level up the stack to the caller of this function # Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve() caller_file = Path(caller_frame.filename).resolve()
@ -137,9 +139,7 @@ def compile(loglevel=logging.INFO) -> bool:
o_file = caller_file.with_suffix(".o") o_file = caller_file.with_suffix(".o")
success = True success = True
success = ( success = compile_to_ir(str(caller_file), str(ll_file)) and success
compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success
)
success = bool( success = bool(
subprocess.run( subprocess.run(
@ -157,11 +157,11 @@ def compile(loglevel=logging.INFO) -> bool:
and success and success
) )
logger.info(f"Object written to {o_file}") print(f"Object written to {o_file}")
return success return success
def BPF(loglevel=logging.INFO) -> BpfProgram: def BPF() -> BpfProgram:
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame) src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
@ -174,7 +174,7 @@ def BPF(loglevel=logging.INFO) -> BpfProgram:
f.write(src) f.write(src)
f.flush() f.flush()
source = f.name source = f.name
compile_to_ir(source, str(inter.name), loglevel=loglevel) compile_to_ir(source, str(inter.name))
subprocess.run( subprocess.run(
[ [
"llc", "llc",

View File

@ -12,34 +12,6 @@ class DebugInfoGenerator:
self.module = module self.module = module
self._type_cache = {} # Cache for common debug types self._type_cache = {} # Cache for common debug types
def generate_file_metadata(self, filename, dirname):
self.module._file_metadata = self.module.add_debug_info(
"DIFile",
{ # type: ignore
"filename": filename,
"directory": dirname,
},
)
def generate_debug_cu(
self, language, producer: str, is_optimized: bool, is_distinct: bool
):
self.module._debug_compile_unit = self.module.add_debug_info(
"DICompileUnit",
{ # type: ignore
"language": language,
"file": self.module._file_metadata, # type: ignore
"producer": producer,
"isOptimized": is_optimized,
"runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
"nameTableKind": 0,
},
is_distinct=is_distinct,
)
self.module.add_named_metadata("llvm.dbg.cu", self.module._debug_compile_unit) # type: ignore
def get_basic_type(self, name: str, size: int, encoding: int) -> Any: def get_basic_type(self, name: str, size: int, encoding: int) -> Any:
"""Get or create a basic type with caching""" """Get or create a basic type with caching"""
key = (name, size, encoding) key = (name, size, encoding)

View File

@ -1,10 +0,0 @@
from .expr_pass import eval_expr, handle_expr
from .type_normalization import convert_to_bool, get_base_type_and_depth, deref_to_depth
__all__ = [
"eval_expr",
"handle_expr",
"convert_to_bool",
"get_base_type_and_depth",
"deref_to_depth",
]

View File

@ -1,461 +0,0 @@
import ast
from llvmlite import ir
from logging import Logger
import logging
from typing import Dict
from pythonbpf.type_deducer import ctypes_to_ir, is_ctypes
from .type_normalization import convert_to_bool, handle_comparator
logger: Logger = logging.getLogger(__name__)
def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle ast.Name expressions."""
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id].var
val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type
else:
logger.info(f"Undefined variable {expr.id}")
return None
def _handle_constant_expr(expr: ast.Constant):
"""Handle ast.Constant expressions."""
if isinstance(expr.value, int) or isinstance(expr.value, bool):
return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64)
else:
logger.error(f"Unsupported constant type {ast.dump(expr)}")
return None
def _handle_attribute_expr(
expr: ast.Attribute,
local_sym_tab: Dict,
structs_sym_tab: Dict,
builder: ir.IRBuilder,
):
"""Handle ast.Attribute expressions for struct field access."""
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name]
logger.info(f"Loading attribute {attr_name} from variable {var_name}")
logger.info(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
return None
def _handle_deref_call(expr: ast.Call, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle deref function calls."""
logger.info(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
logger.info("deref takes exactly one argument")
return None
arg = expr.args[0]
if (
isinstance(arg, ast.Call)
and isinstance(arg.func, ast.Name)
and arg.func.id == "deref"
):
logger.info("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg_ptr = local_sym_tab[arg.id].var
else:
logger.info(f"Undefined variable {arg.id}")
return None
else:
logger.info("Unsupported argument type for deref")
return None
if arg_ptr is None:
logger.info("Failed to evaluate deref argument")
return None
# Load the value from pointer
val = builder.load(arg_ptr)
return val, local_sym_tab[arg.id].ir_type
def _handle_ctypes_call(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle ctypes type constructor calls."""
if len(expr.args) != 1:
logger.info("ctypes constructor takes exactly one argument")
return None
arg = expr.args[0]
val = eval_expr(
func,
module,
builder,
arg,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if val is None:
logger.info("Failed to evaluate argument to ctypes constructor")
return None
call_type = expr.func.id
expected_type = ctypes_to_ir(call_type)
if val[1] != expected_type:
# NOTE: We are only considering casting to and from int types for now
if isinstance(val[1], ir.IntType) and isinstance(expected_type, ir.IntType):
if val[1].width < expected_type.width:
val = (builder.sext(val[0], expected_type), expected_type)
else:
val = (builder.trunc(val[0], expected_type), expected_type)
else:
raise ValueError(f"Type mismatch: expected {expected_type}, got {val[1]}")
return val
def _handle_compare(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
"""Handle ast.Compare expressions."""
if len(cond.ops) != 1 or len(cond.comparators) != 1:
logger.error("Only single comparisons are supported")
return None
lhs = eval_expr(
func,
module,
builder,
cond.left,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
rhs = eval_expr(
func,
module,
builder,
cond.comparators[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if lhs is None or rhs is None:
logger.error("Failed to evaluate comparison operands")
return None
lhs, _ = lhs
rhs, _ = rhs
return handle_comparator(func, builder, cond.ops[0], lhs, rhs)
def _handle_unary_op(
func,
module,
builder,
expr: ast.UnaryOp,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle ast.UnaryOp expressions."""
if not isinstance(expr.op, ast.Not) and not isinstance(expr.op, ast.USub):
logger.error("Only 'not' and '-' unary operators are supported")
return None
from pythonbpf.binary_ops import get_operand_value
operand = get_operand_value(
func, module, expr.operand, builder, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand is None:
logger.error("Failed to evaluate operand for unary operation")
return None
if isinstance(expr.op, ast.Not):
true_const = ir.Constant(ir.IntType(1), 1)
result = builder.xor(convert_to_bool(builder, operand), true_const)
return result, ir.IntType(1)
elif isinstance(expr.op, ast.USub):
# Multiply by -1
neg_one = ir.Constant(ir.IntType(64), -1)
result = builder.mul(operand, neg_one)
return result, ir.IntType(64)
def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
"""Handle `and` boolean operations."""
logger.debug(f"Handling 'and' operator with {len(expr.values)} operands")
merge_block = func.append_basic_block(name="and.merge")
false_block = func.append_basic_block(name="and.false")
incoming_values = []
for i, value in enumerate(expr.values):
is_last = i == len(expr.values) - 1
# Evaluate current operand
operand_result = eval_expr(
func, None, builder, value, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand_result is None:
logger.error(f"Failed to evaluate operand {i} in 'and' expression")
return None
operand_val, operand_type = operand_result
# Convert to boolean if needed
operand_bool = convert_to_bool(builder, operand_val)
current_block = builder.block
if is_last:
# Last operand: result is this value
builder.branch(merge_block)
incoming_values.append((operand_bool, current_block))
else:
# Not last: check if true, continue or short-circuit
next_check = func.append_basic_block(name=f"and.check_{i + 1}")
builder.cbranch(operand_bool, next_check, false_block)
builder.position_at_end(next_check)
# False block: short-circuit with false
builder.position_at_end(false_block)
builder.branch(merge_block)
false_value = ir.Constant(ir.IntType(1), 0)
incoming_values.append((false_value, false_block))
# Merge block: phi node
builder.position_at_end(merge_block)
phi = builder.phi(ir.IntType(1), name="and.result")
for val, block in incoming_values:
phi.add_incoming(val, block)
logger.debug(f"Generated 'and' with {len(incoming_values)} incoming values")
return phi, ir.IntType(1)
def _handle_or_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
"""Handle `or` boolean operations."""
logger.debug(f"Handling 'or' operator with {len(expr.values)} operands")
merge_block = func.append_basic_block(name="or.merge")
true_block = func.append_basic_block(name="or.true")
incoming_values = []
for i, value in enumerate(expr.values):
is_last = i == len(expr.values) - 1
# Evaluate current operand
operand_result = eval_expr(
func, None, builder, value, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand_result is None:
logger.error(f"Failed to evaluate operand {i} in 'or' expression")
return None
operand_val, operand_type = operand_result
# Convert to boolean if needed
operand_bool = convert_to_bool(builder, operand_val)
current_block = builder.block
if is_last:
# Last operand: result is this value
builder.branch(merge_block)
incoming_values.append((operand_bool, current_block))
else:
# Not last: check if false, continue or short-circuit
next_check = func.append_basic_block(name=f"or.check_{i + 1}")
builder.cbranch(operand_bool, true_block, next_check)
builder.position_at_end(next_check)
# True block: short-circuit with true
builder.position_at_end(true_block)
builder.branch(merge_block)
true_value = ir.Constant(ir.IntType(1), 1)
incoming_values.append((true_value, true_block))
# Merge block: phi node
builder.position_at_end(merge_block)
phi = builder.phi(ir.IntType(1), name="or.result")
for val, block in incoming_values:
phi.add_incoming(val, block)
logger.debug(f"Generated 'or' with {len(incoming_values)} incoming values")
return phi, ir.IntType(1)
def _handle_boolean_op(
func,
module,
builder,
expr: ast.BoolOp,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle `and` and `or` boolean operations."""
if isinstance(expr.op, ast.And):
return _handle_and_op(
func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(expr.op, ast.Or):
return _handle_or_op(
func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
else:
logger.error(f"Unsupported boolean operator: {type(expr.op).__name__}")
return None
def eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
logger.info(f"Evaluating expression: {ast.dump(expr)}")
if isinstance(expr, ast.Name):
return _handle_name_expr(expr, local_sym_tab, builder)
elif isinstance(expr, ast.Constant):
return _handle_constant_expr(expr)
elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder)
if isinstance(expr.func, ast.Name) and is_ctypes(expr.func.id):
return _handle_ctypes_call(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# delayed import to avoid circular dependency
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
if isinstance(expr.func, ast.Name) and HelperHandlerRegistry.has_handler(
expr.func.id
):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func, ast.Attribute):
logger.info(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(
expr.func.value.func, ast.Name
):
method_name = expr.func.attr
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func.value, ast.Name):
obj_name = expr.func.value.id
method_name = expr.func.attr
if obj_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr, ast.Attribute):
return _handle_attribute_expr(expr, local_sym_tab, structs_sym_tab, builder)
elif isinstance(expr, ast.BinOp):
from pythonbpf.binary_ops import handle_binary_op
return handle_binary_op(
func,
module,
expr,
builder,
None,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr, ast.Compare):
return _handle_compare(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(expr, ast.UnaryOp):
return _handle_unary_op(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(expr, ast.BoolOp):
return _handle_boolean_op(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
logger.info("Unsupported expression evaluation")
return None
def handle_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
):
"""Handle expression statements in the function body."""
logger.info(f"Handling expression: {ast.dump(expr)}")
call = expr.value
if isinstance(call, ast.Call):
eval_expr(
func,
module,
builder,
call,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
else:
logger.info("Unsupported expression type")

View File

@ -1,128 +0,0 @@
from llvmlite import ir
import logging
import ast
logger = logging.getLogger(__name__)
COMPARISON_OPS = {
ast.Eq: "==",
ast.NotEq: "!=",
ast.Lt: "<",
ast.LtE: "<=",
ast.Gt: ">",
ast.GtE: ">=",
ast.Is: "==",
ast.IsNot: "!=",
}
def get_base_type_and_depth(ir_type):
"""Get the base type for pointer types."""
cur_type = ir_type
depth = 0
while isinstance(cur_type, ir.PointerType):
depth += 1
cur_type = cur_type.pointee
return cur_type, depth
def deref_to_depth(func, builder, val, target_depth):
"""Dereference a pointer to a certain depth."""
cur_val = val
cur_type = val.type
for depth in range(target_depth):
if not isinstance(val.type, ir.PointerType):
logger.error("Cannot dereference further, non-pointer type")
return None
# dereference with null check
pointee_type = cur_type.pointee
null_check_block = builder.block
not_null_block = func.append_basic_block(name=f"deref_not_null_{depth}")
merge_block = func.append_basic_block(name=f"deref_merge_{depth}")
null_ptr = ir.Constant(cur_type, None)
is_not_null = builder.icmp_signed("!=", cur_val, null_ptr)
logger.debug(f"Inserted null check for pointer at depth {depth}")
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
dereferenced_val = builder.load(cur_val)
logger.debug(f"Dereferenced to depth {depth - 1}, type: {pointee_type}")
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(pointee_type, name=f"deref_result_{depth}")
zero_value = (
ir.Constant(pointee_type, 0)
if isinstance(pointee_type, ir.IntType)
else ir.Constant(pointee_type, None)
)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(dereferenced_val, not_null_block)
# Continue with phi result
cur_val = phi
cur_type = pointee_type
return cur_val
def _normalize_types(func, builder, lhs, rhs):
"""Normalize types for comparison."""
logger.info(f"Normalizing types: {lhs.type} vs {rhs.type}")
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
if lhs.type.width < rhs.type.width:
lhs = builder.sext(lhs, rhs.type)
else:
rhs = builder.sext(rhs, lhs.type)
return lhs, rhs
elif not isinstance(lhs.type, ir.PointerType) and not isinstance(
rhs.type, ir.PointerType
):
logger.error(f"Type mismatch: {lhs.type} vs {rhs.type}")
return None, None
else:
lhs_base, lhs_depth = get_base_type_and_depth(lhs.type)
rhs_base, rhs_depth = get_base_type_and_depth(rhs.type)
if lhs_base == rhs_base:
if lhs_depth < rhs_depth:
rhs = deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth)
elif rhs_depth < lhs_depth:
lhs = deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth)
return _normalize_types(func, builder, lhs, rhs)
def convert_to_bool(builder, val):
"""Convert a value to boolean."""
if val.type == ir.IntType(1):
return val
if isinstance(val.type, ir.PointerType):
zero = ir.Constant(val.type, None)
else:
zero = ir.Constant(val.type, 0)
return builder.icmp_signed("!=", val, zero)
def handle_comparator(func, builder, op, lhs, rhs):
"""Handle comparison operations."""
if lhs.type != rhs.type:
lhs, rhs = _normalize_types(func, builder, lhs, rhs)
if lhs is None or rhs is None:
return None
if type(op) not in COMPARISON_OPS:
logger.error(f"Unsupported comparison operator: {type(op)}")
return None
predicate = COMPARISON_OPS[type(op)]
result = builder.icmp_signed(predicate, lhs, rhs)
logger.debug(f"Comparison result: {result}")
return result, ir.IntType(1)

155
pythonbpf/expr_pass.py Normal file
View File

@ -0,0 +1,155 @@
import ast
from llvmlite import ir
def eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
local_var_metadata=None,
):
print(f"Evaluating expression: {ast.dump(expr)}")
print(local_var_metadata)
if isinstance(expr, ast.Name):
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id][0]
val = builder.load(var)
return val, local_sym_tab[expr.id][1] # return value and type
else:
print(f"Undefined variable {expr.id}")
return None
elif isinstance(expr, ast.Constant):
if isinstance(expr.value, int):
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
elif isinstance(expr.value, bool):
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
else:
print("Unsupported constant type")
return None
elif isinstance(expr, ast.Call):
# delayed import to avoid circular dependency
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
if isinstance(expr.func, ast.Name):
# check deref
if expr.func.id == "deref":
print(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
print("deref takes exactly one argument")
return None
arg = expr.args[0]
if (
isinstance(arg, ast.Call)
and isinstance(arg.func, ast.Name)
and arg.func.id == "deref"
):
print("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg = local_sym_tab[arg.id][0]
else:
print(f"Undefined variable {arg.id}")
return None
if arg is None:
print("Failed to evaluate deref argument")
return None
# Since we are handling only name case, directly take type from sym tab
val = builder.load(arg)
return val, local_sym_tab[expr.args[0].id][1]
# check for helpers
if HelperHandlerRegistry.has_handler(expr.func.id):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(expr.func, ast.Attribute):
print(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(
expr.func.value.func, ast.Name
):
method_name = expr.func.attr
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(expr.func.value, ast.Name):
obj_name = expr.func.value.id
method_name = expr.func.attr
if obj_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(expr, ast.Attribute):
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type = local_sym_tab[var_name]
print(f"Loading attribute " f"{attr_name} from variable {var_name}")
print(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
print(local_var_metadata)
if local_var_metadata and var_name in local_var_metadata:
metadata = structs_sym_tab[local_var_metadata[var_name]]
if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
print("Unsupported expression evaluation")
return None
def handle_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
):
"""Handle expression statements in the function body."""
print(f"Handling expression: {ast.dump(expr)}")
print(local_var_metadata)
call = expr.value
if isinstance(call, ast.Call):
eval_expr(
func,
module,
builder,
call,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
else:
print("Unsupported expression type")

View File

@ -1,3 +0,0 @@
from .functions_pass import func_proc
__all__ = ["func_proc"]

View File

@ -1,22 +0,0 @@
from typing import Dict
class StatementHandlerRegistry:
"""Registry for statement handlers."""
_handlers: Dict = {}
@classmethod
def register(cls, stmt_type):
"""Register a handler for a specific statement type."""
def decorator(handler):
cls._handlers[stmt_type] = handler
return handler
return decorator
@classmethod
def __getitem__(cls, stmt_type):
"""Get the handler for a specific statement type."""
return cls._handlers.get(stmt_type, None)

View File

@ -1,521 +0,0 @@
from llvmlite import ir
import ast
import logging
from pythonbpf.helper import (
HelperHandlerRegistry,
reset_scratch_pool,
)
from pythonbpf.type_deducer import ctypes_to_ir
from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool
from pythonbpf.assign_pass import (
handle_variable_assignment,
handle_struct_field_assignment,
)
from pythonbpf.allocation_pass import handle_assign_allocation, allocate_temp_pool
from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name
logger = logging.getLogger(__name__)
def get_probe_string(func_node):
"""Extract the probe string from the decorator of the function node."""
# TODO: right now we have the whole string in the section decorator
# But later we can implement typed tuples for tracepoints and kprobes
# For helper functions, we return "helper"
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "bpfglobal":
return None
if isinstance(decorator, ast.Call) and isinstance(decorator.func, ast.Name):
if decorator.func.id == "section" and len(decorator.args) == 1:
arg = decorator.args[0]
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
return arg.value
return "helper"
def handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
):
"""Handle assignment statements in the function body."""
# TODO: Support this later
# GH #37
if len(stmt.targets) != 1:
logger.error("Multi-target assignment is not supported for now")
return
target = stmt.targets[0]
rval = stmt.value
if isinstance(target, ast.Name):
# NOTE: Simple variable assignment case: x = 5
var_name = target.id
result = handle_variable_assignment(
func,
module,
builder,
var_name,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if not result:
logger.error(f"Failed to handle assignment to {var_name}")
return
if isinstance(target, ast.Attribute):
# NOTE: Struct field assignment case: pkt.field = value
handle_struct_field_assignment(
func,
module,
builder,
target,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
return
# Unsupported target type
logger.error(f"Unsupported assignment target: {ast.dump(target)}")
def handle_cond(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
val = eval_expr(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab
)[0]
return convert_to_bool(builder, val)
def handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None
):
"""Handle if statements in the function body."""
logger.info("Handling if statement")
# start = builder.block.parent
then_block = func.append_basic_block(name="if.then")
merge_block = func.append_basic_block(name="if.end")
if stmt.orelse:
else_block = func.append_basic_block(name="if.else")
else:
else_block = None
cond = handle_cond(
func, module, builder, stmt.test, local_sym_tab, map_sym_tab, structs_sym_tab
)
if else_block:
builder.cbranch(cond, then_block, else_block)
else:
builder.cbranch(cond, then_block, merge_block)
builder.position_at_end(then_block)
for s in stmt.body:
process_stmt(
func, module, builder, s, local_sym_tab, map_sym_tab, structs_sym_tab, False
)
if not builder.block.is_terminated:
builder.branch(merge_block)
if else_block:
builder.position_at_end(else_block)
for s in stmt.orelse:
process_stmt(
func,
module,
builder,
s,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
False,
)
if not builder.block.is_terminated:
builder.branch(merge_block)
builder.position_at_end(merge_block)
def handle_return(builder, stmt, local_sym_tab, ret_type):
logger.info(f"Handling return statement: {ast.dump(stmt)}")
if stmt.value is None:
return _handle_none_return(builder)
elif isinstance(stmt.value, ast.Name) and _is_xdp_name(stmt.value.id):
return _handle_xdp_return(stmt, builder, ret_type)
else:
val = eval_expr(
func=None,
module=None,
builder=builder,
expr=stmt.value,
local_sym_tab=local_sym_tab,
map_sym_tab={},
structs_sym_tab={},
)
logger.info(f"Evaluated return expression to {val}")
builder.ret(val[0])
return True
def process_stmt(
func,
module,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
did_return,
ret_type=ir.IntType(64),
):
logger.info(f"Processing statement: {ast.dump(stmt)}")
reset_scratch_pool()
if isinstance(stmt, ast.Expr):
handle_expr(
func,
module,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
)
elif isinstance(stmt, ast.AugAssign):
raise SyntaxError("Augmented assignment not supported")
elif isinstance(stmt, ast.If):
handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
)
elif isinstance(stmt, ast.Return):
did_return = handle_return(
builder,
stmt,
local_sym_tab,
ret_type,
)
return did_return
def handle_if_allocation(
module, builder, stmt, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
"""Recursively handle allocations in if/else branches."""
if stmt.body:
allocate_mem(
module,
builder,
stmt.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
if stmt.orelse:
allocate_mem(
module,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
def count_temps_in_call(call_node, local_sym_tab):
"""Count the number of temporary variables needed for a function call."""
count = 0
is_helper = False
# NOTE: We exclude print calls for now
if isinstance(call_node.func, ast.Name):
if (
HelperHandlerRegistry.has_handler(call_node.func.id)
and call_node.func.id != "print"
):
is_helper = True
elif isinstance(call_node.func, ast.Attribute):
if HelperHandlerRegistry.has_handler(call_node.func.attr):
is_helper = True
if not is_helper:
return 0
for arg in call_node.args:
# NOTE: Count all non-name arguments
# For struct fields, if it is being passed as an argument,
# The struct object should already exist in the local_sym_tab
if not isinstance(arg, ast.Name) and not (
isinstance(arg, ast.Attribute) and arg.value.id in local_sym_tab
):
count += 1
return count
def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
max_temps_needed = 0
def update_max_temps_for_stmt(stmt):
nonlocal max_temps_needed
temps_needed = 0
if isinstance(stmt, ast.If):
for s in stmt.body:
update_max_temps_for_stmt(s)
for s in stmt.orelse:
update_max_temps_for_stmt(s)
return
for node in ast.walk(stmt):
if isinstance(node, ast.Call):
temps_needed += count_temps_in_call(node, local_sym_tab)
max_temps_needed = max(max_temps_needed, temps_needed)
for stmt in body:
update_max_temps_for_stmt(stmt)
# Handle allocations
if isinstance(stmt, ast.If):
handle_if_allocation(
module,
builder,
stmt,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab)
allocate_temp_pool(builder, max_temps_needed, local_sym_tab)
return local_sym_tab
def process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
):
"""Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now
did_return = False
local_sym_tab = {}
# pre-allocate dynamic variables
local_sym_tab = allocate_mem(
module,
builder,
func_node.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
logger.info(f"Local symbol table: {local_sym_tab.keys()}")
for stmt in func_node.body:
did_return = process_stmt(
func,
module,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
did_return,
ret_type,
)
if not did_return:
builder.ret(ir.Constant(ir.IntType(64), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):
"""Process a single BPF chunk (function) and emit corresponding LLVM IR."""
func_name = func_node.name
ret_type = return_type
# TODO: parse parameters
param_types = []
if func_node.args.args:
# Assume first arg to be ctx
param_types.append(ir.PointerType())
func_ty = ir.FunctionType(ret_type, param_types)
func = ir.Function(module, func_ty, func_name)
func.linkage = "dso_local"
func.attributes.add("nounwind")
func.attributes.add("noinline")
func.attributes.add("optnone")
if func_node.args.args:
# Only look at the first argument for now
param = func.args[0]
param.add_attribute("nocapture")
probe_string = get_probe_string(func_node)
if probe_string is not None:
func.section = probe_string
block = func.append_basic_block(name="entry")
builder = ir.IRBuilder(block)
process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
)
return func
def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
for func_node in chunks:
is_global = False
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id in (
"map",
"bpfglobal",
"struct",
):
is_global = True
break
if is_global:
continue
func_type = get_probe_string(func_node)
logger.info(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk(
func_node,
module,
ctypes_to_ir(infer_return_type(func_node)),
map_sym_tab,
structs_sym_tab,
)
def infer_return_type(func_node: ast.FunctionDef):
if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
raise TypeError("Expected ast.FunctionDef")
if func_node.returns is not None:
try:
return ast.unparse(func_node.returns)
except Exception:
node = func_node.returns
if isinstance(node, ast.Name):
return node.id
if isinstance(node, ast.Attribute):
return getattr(node, "attr", type(node).__name__)
try:
return str(node)
except Exception:
return type(node).__name__
found_type = None
def _expr_type(e):
if e is None:
return "None"
if isinstance(e, ast.Constant):
return type(e.value).__name__
if isinstance(e, ast.Name):
return e.id
if isinstance(e, ast.Call):
f = e.func
if isinstance(f, ast.Name):
return f.id
if isinstance(f, ast.Attribute):
try:
return ast.unparse(f)
except Exception:
return getattr(f, "attr", type(f).__name__)
try:
return ast.unparse(f)
except Exception:
return type(f).__name__
if isinstance(e, ast.Attribute):
try:
return ast.unparse(e)
except Exception:
return getattr(e, "attr", type(e).__name__)
try:
return ast.unparse(e)
except Exception:
return type(e).__name__
for walked_node in ast.walk(func_node):
if isinstance(walked_node, ast.Return):
t = _expr_type(walked_node.value)
if found_type is None:
found_type = t
elif found_type != t:
raise ValueError(f"Conflicting return types: {found_type} vs {t}")
return found_type or "None"
# For string assignment to fixed-size arrays
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
"""
Copy a string (i8*) to a fixed-size array ([N x i8]*)
"""
# Create a loop to copy characters one by one
# entry_block = builder.block
copy_block = builder.append_basic_block("copy_char")
end_block = builder.append_basic_block("copy_end")
# Create loop counter
i = builder.alloca(ir.IntType(32))
builder.store(ir.Constant(ir.IntType(32), 0), i)
# Start the loop
builder.branch(copy_block)
# Copy loop
builder.position_at_end(copy_block)
idx = builder.load(i)
in_bounds = builder.icmp_unsigned(
"<", idx, ir.Constant(ir.IntType(32), array_length)
)
builder.cbranch(in_bounds, copy_block, end_block)
with builder.if_then(in_bounds):
# Load character from source
src_ptr = builder.gep(source_string_ptr, [idx])
char = builder.load(src_ptr)
# Store character in target
dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx])
builder.store(char, dst_ptr)
# Increment counter
next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1))
builder.store(next_idx, i)
builder.position_at_end(end_block)
# Ensure null termination
last_idx = ir.Constant(ir.IntType(32), array_length - 1)
null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx])
builder.store(ir.Constant(ir.IntType(8), 0), null_ptr)

View File

@ -1,45 +0,0 @@
import logging
import ast
from llvmlite import ir
logger: logging.Logger = logging.getLogger(__name__)
XDP_ACTIONS = {
"XDP_ABORTED": 0,
"XDP_DROP": 1,
"XDP_PASS": 2,
"XDP_TX": 3,
"XDP_REDIRECT": 4,
}
def _handle_none_return(builder) -> bool:
"""Handle return or return None -> returns 0."""
builder.ret(ir.Constant(ir.IntType(64), 0))
logger.debug("Generated default return: 0")
return True
def _is_xdp_name(name: str) -> bool:
"""Check if a name is an XDP action"""
return name in XDP_ACTIONS
def _handle_xdp_return(stmt: ast.Return, builder, ret_type) -> bool:
"""Handle XDP returns"""
if not isinstance(stmt.value, ast.Name):
return False
action_name = stmt.value.id
if action_name not in XDP_ACTIONS:
raise ValueError(
f"Unknown XDP action: {action_name}. Available: {XDP_ACTIONS.keys()}"
)
return False
value = XDP_ACTIONS[action_name]
builder.ret(ir.Constant(ret_type, value))
logger.debug(f"Generated XDP action return: {action_name} = {value}")
return True

714
pythonbpf/functions_pass.py Normal file
View File

@ -0,0 +1,714 @@
from llvmlite import ir
import ast
from typing import Any
from .helper import HelperHandlerRegistry, handle_helper_call
from .type_deducer import ctypes_to_ir
from .binary_ops import handle_binary_op
from .expr_pass import eval_expr, handle_expr
local_var_metadata: dict[str | Any, Any] = {}
def get_probe_string(func_node):
"""Extract the probe string from the decorator of the function node."""
# TODO: right now we have the whole string in the section decorator
# But later we can implement typed tuples for tracepoints and kprobes
# For helper functions, we return "helper"
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id == "bpfglobal":
return None
if isinstance(decorator, ast.Call) and isinstance(decorator.func, ast.Name):
if decorator.func.id == "section" and len(decorator.args) == 1:
arg = decorator.args[0]
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
return arg.value
return "helper"
def handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
):
"""Handle assignment statements in the function body."""
if len(stmt.targets) != 1:
print("Unsupported multiassignment")
return
num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64")
target = stmt.targets[0]
print(f"Handling assignment to {ast.dump(target)}")
if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute):
print("Unsupported assignment target")
return
var_name = target.id if isinstance(target, ast.Name) else target.value.id
rval = stmt.value
if isinstance(target, ast.Attribute):
# struct field assignment
field_name = target.attr
if var_name in local_sym_tab and var_name in local_var_metadata:
struct_type = local_var_metadata[var_name]
struct_info = structs_sym_tab[struct_type]
if field_name in struct_info.fields:
field_ptr = struct_info.gep(
builder, local_sym_tab[var_name][0], field_name
)
val = eval_expr(
func,
module,
builder,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(struct_info.field_type(field_name), ir.ArrayType) and val[
1
] == ir.PointerType(ir.IntType(8)):
# TODO: Figure it out, not a priority rn
# Special case for string assignment to char array
# str_len = struct_info["field_types"][field_idx].count
# assign_string_to_array(builder, field_ptr, val[0], str_len)
# print(f"Assigned to struct field {var_name}.{field_name}")
pass
if val is None:
print("Failed to evaluate struct field assignment")
return
print(field_ptr)
builder.store(val[0], field_ptr)
print(f"Assigned to struct field {var_name}.{field_name}")
return
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
if rval.value:
builder.store(ir.Constant(ir.IntType(1), 1), local_sym_tab[var_name][0])
else:
builder.store(ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name][0])
print(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, int):
# Assume c_int64 for now
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(
ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name][0]
)
# local_sym_tab[var_name] = var
print(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, str):
str_val = rval.value.encode("utf-8") + b"\x00"
str_const = ir.Constant(
ir.ArrayType(ir.IntType(8), len(str_val)), bytearray(str_val)
)
global_str = ir.GlobalVariable(
module, str_const.type, name=f"{var_name}_str"
)
global_str.linkage = "internal"
global_str.global_constant = True
global_str.initializer = str_const
str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8)))
builder.store(str_ptr, local_sym_tab[var_name][0])
print(f"Assigned string constant '{rval.value}' to {var_name}")
else:
print("Unsupported constant type")
elif isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
print(f"Assignment call type: {call_type}")
if (
call_type in num_types
and len(rval.args) == 1
and isinstance(rval.args[0], ast.Constant)
and isinstance(rval.args[0].value, int)
):
ir_type = ctypes_to_ir(call_type)
# var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
builder.store(
ir.Constant(ir_type, rval.args[0].value), local_sym_tab[var_name][0]
)
print(
f"Assigned {call_type} constant "
f"{rval.args[0].value} to {var_name}"
)
# local_sym_tab[var_name] = var
elif HelperHandlerRegistry.has_handler(call_type):
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
print(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1:
print(f"Handling deref assignment {ast.dump(rval)}")
val = eval_expr(
func,
module,
builder,
rval,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if val is None:
print("Failed to evaluate deref argument")
return
print(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
print(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
# var = builder.alloca(ir_type, name=var_name)
# Null init
builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name][0])
local_var_metadata[var_name] = call_type
print(f"Assigned struct {call_type} to {var_name}")
# local_sym_tab[var_name] = var
else:
print(f"Unsupported assignment call type: {call_type}")
elif isinstance(rval.func, ast.Attribute):
print(f"Assignment call attribute: {ast.dump(rval.func)}")
if isinstance(rval.func.value, ast.Name):
# TODO: probably a struct access
print(f"TODO STRUCT ACCESS {ast.dump(rval)}")
elif isinstance(rval.func.value, ast.Call) and isinstance(
rval.func.value.func, ast.Name
):
map_name = rval.func.value.func.id
method_name = rval.func.attr
if map_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
# var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8
builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
else:
print("Unsupported assignment call structure")
else:
print("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp):
handle_binary_op(
rval, module, builder, var_name, local_sym_tab, map_sym_tab, func
)
else:
print("Unsupported assignment value type")
def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
if isinstance(cond, ast.Constant):
if isinstance(cond.value, bool):
return ir.Constant(ir.IntType(1), int(cond.value))
elif isinstance(cond.value, int):
return ir.Constant(ir.IntType(1), int(bool(cond.value)))
else:
print("Unsupported constant type in condition")
return None
elif isinstance(cond, ast.Name):
if cond.id in local_sym_tab:
var = local_sym_tab[cond.id][0]
val = builder.load(var)
if val.type != ir.IntType(1):
# Convert nonzero values to true, zero to false
if isinstance(val.type, ir.PointerType):
# For pointer types, compare with null pointer
zero = ir.Constant(val.type, None)
else:
# For integer types, compare with zero
zero = ir.Constant(val.type, 0)
val = builder.icmp_signed("!=", val, zero)
return val
else:
print(f"Undefined variable {cond.id} in condition")
return None
elif isinstance(cond, ast.Compare):
lhs = eval_expr(func, module, builder, cond.left, local_sym_tab, map_sym_tab)[0]
if len(cond.ops) != 1 or len(cond.comparators) != 1:
print("Unsupported complex comparison")
return None
rhs = eval_expr(
func, module, builder, cond.comparators[0], local_sym_tab, map_sym_tab
)[0]
op = cond.ops[0]
if lhs.type != rhs.type:
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
# Extend the smaller type to the larger type
if lhs.type.width < rhs.type.width:
lhs = builder.sext(lhs, rhs.type)
elif lhs.type.width > rhs.type.width:
rhs = builder.sext(rhs, lhs.type)
else:
print("Type mismatch in comparison")
return None
if isinstance(op, ast.Eq):
return builder.icmp_signed("==", lhs, rhs)
elif isinstance(op, ast.NotEq):
return builder.icmp_signed("!=", lhs, rhs)
elif isinstance(op, ast.Lt):
return builder.icmp_signed("<", lhs, rhs)
elif isinstance(op, ast.LtE):
return builder.icmp_signed("<=", lhs, rhs)
elif isinstance(op, ast.Gt):
return builder.icmp_signed(">", lhs, rhs)
elif isinstance(op, ast.GtE):
return builder.icmp_signed(">=", lhs, rhs)
else:
print("Unsupported comparison operator")
return None
else:
print("Unsupported condition expression")
return None
def handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None
):
"""Handle if statements in the function body."""
print("Handling if statement")
# start = builder.block.parent
then_block = func.append_basic_block(name="if.then")
merge_block = func.append_basic_block(name="if.end")
if stmt.orelse:
else_block = func.append_basic_block(name="if.else")
else:
else_block = None
cond = handle_cond(func, module, builder, stmt.test, local_sym_tab, map_sym_tab)
if else_block:
builder.cbranch(cond, then_block, else_block)
else:
builder.cbranch(cond, then_block, merge_block)
builder.position_at_end(then_block)
for s in stmt.body:
process_stmt(
func, module, builder, s, local_sym_tab, map_sym_tab, structs_sym_tab, False
)
if not builder.block.is_terminated:
builder.branch(merge_block)
if else_block:
builder.position_at_end(else_block)
for s in stmt.orelse:
process_stmt(
func,
module,
builder,
s,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
False,
)
if not builder.block.is_terminated:
builder.branch(merge_block)
builder.position_at_end(merge_block)
def process_stmt(
func,
module,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
did_return,
ret_type=ir.IntType(64),
):
print(f"Processing statement: {ast.dump(stmt)}")
if isinstance(stmt, ast.Expr):
print(local_var_metadata)
handle_expr(
func,
module,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(stmt, ast.Assign):
handle_assign(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
)
elif isinstance(stmt, ast.AugAssign):
raise SyntaxError("Augmented assignment not supported")
elif isinstance(stmt, ast.If):
handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
)
elif isinstance(stmt, ast.Return):
if stmt.value is None:
builder.ret(ir.Constant(ir.IntType(32), 0))
did_return = True
elif (
isinstance(stmt.value, ast.Call)
and isinstance(stmt.value.func, ast.Name)
and len(stmt.value.args) == 1
and isinstance(stmt.value.args[0], ast.Constant)
and isinstance(stmt.value.args[0].value, int)
):
call_type = stmt.value.func.id
if ctypes_to_ir(call_type) != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ctypes_to_ir(call_type)}, got {call_type}"
)
else:
builder.ret(ir.Constant(ret_type, stmt.value.args[0].value))
did_return = True
elif isinstance(stmt.value, ast.Name):
if stmt.value.id == "XDP_PASS":
builder.ret(ir.Constant(ret_type, 2))
did_return = True
elif stmt.value.id == "XDP_DROP":
builder.ret(ir.Constant(ret_type, 1))
did_return = True
else:
raise ValueError("Failed to evaluate return expression")
else:
raise ValueError("Unsupported return value")
return did_return
def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
):
for stmt in body:
if isinstance(stmt, ast.If):
if stmt.body:
local_sym_tab = allocate_mem(
module,
builder,
stmt.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
if stmt.orelse:
local_sym_tab = allocate_mem(
module,
builder,
stmt.orelse,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
elif isinstance(stmt, ast.Assign):
if len(stmt.targets) != 1:
print("Unsupported multiassignment")
continue
target = stmt.targets[0]
if not isinstance(target, ast.Name):
print("Unsupported assignment target")
continue
var_name = target.id
rval = stmt.value
if isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
if call_type in ("c_int32", "c_int64", "c_uint32", "c_uint64"):
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} of type {call_type}")
elif HelperHandlerRegistry.has_handler(call_type):
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} for helper")
elif call_type == "deref" and len(rval.args) == 1:
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} for deref")
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
var = builder.alloca(ir_type, name=var_name)
local_var_metadata[var_name] = call_type
print(
f"Pre-allocated variable {var_name} "
f"for struct {call_type}"
)
elif isinstance(rval.func, ast.Attribute):
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} for map")
else:
print("Unsupported assignment call function type")
continue
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
print(f"Pre-allocated variable {var_name} of type c_bool")
elif isinstance(rval.value, int):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} of type c_int64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
print(f"Pre-allocated variable {var_name} of type string")
else:
print("Unsupported constant type")
continue
elif isinstance(rval, ast.BinOp):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} of type c_int64")
else:
print("Unsupported assignment value type")
continue
local_sym_tab[var_name] = (var, ir_type)
return local_sym_tab
def process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
):
"""Process the body of a bpf function"""
# TODO: A lot. We just have print -> bpf_trace_printk for now
did_return = False
local_sym_tab = {}
# pre-allocate dynamic variables
local_sym_tab = allocate_mem(
module,
builder,
func_node.body,
func,
ret_type,
map_sym_tab,
local_sym_tab,
structs_sym_tab,
)
print(f"Local symbol table: {local_sym_tab.keys()}")
for stmt in func_node.body:
did_return = process_stmt(
func,
module,
builder,
stmt,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
did_return,
ret_type,
)
if not did_return:
builder.ret(ir.Constant(ir.IntType(32), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):
"""Process a single BPF chunk (function) and emit corresponding LLVM IR."""
func_name = func_node.name
ret_type = return_type
# TODO: parse parameters
param_types = []
if func_node.args.args:
# Assume first arg to be ctx
param_types.append(ir.PointerType())
func_ty = ir.FunctionType(ret_type, param_types)
func = ir.Function(module, func_ty, func_name)
func.linkage = "dso_local"
func.attributes.add("nounwind")
func.attributes.add("noinline")
func.attributes.add("optnone")
if func_node.args.args:
# Only look at the first argument for now
param = func.args[0]
param.add_attribute("nocapture")
probe_string = get_probe_string(func_node)
if probe_string is not None:
func.section = probe_string
block = func.append_basic_block(name="entry")
builder = ir.IRBuilder(block)
process_func_body(
module, builder, func_node, func, ret_type, map_sym_tab, structs_sym_tab
)
return func
def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
for func_node in chunks:
is_global = False
for decorator in func_node.decorator_list:
if isinstance(decorator, ast.Name) and decorator.id in (
"map",
"bpfglobal",
"struct",
):
is_global = True
break
if is_global:
continue
func_type = get_probe_string(func_node)
print(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk(
func_node,
module,
ctypes_to_ir(infer_return_type(func_node)),
map_sym_tab,
structs_sym_tab,
)
def infer_return_type(func_node: ast.FunctionDef):
if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
raise TypeError("Expected ast.FunctionDef")
if func_node.returns is not None:
try:
return ast.unparse(func_node.returns)
except Exception:
node = func_node.returns
if isinstance(node, ast.Name):
return node.id
if isinstance(node, ast.Attribute):
return getattr(node, "attr", type(node).__name__)
try:
return str(node)
except Exception:
return type(node).__name__
found_type = None
def _expr_type(e):
if e is None:
return "None"
if isinstance(e, ast.Constant):
return type(e.value).__name__
if isinstance(e, ast.Name):
return e.id
if isinstance(e, ast.Call):
f = e.func
if isinstance(f, ast.Name):
return f.id
if isinstance(f, ast.Attribute):
try:
return ast.unparse(f)
except Exception:
return getattr(f, "attr", type(f).__name__)
try:
return ast.unparse(f)
except Exception:
return type(f).__name__
if isinstance(e, ast.Attribute):
try:
return ast.unparse(e)
except Exception:
return getattr(e, "attr", type(e).__name__)
try:
return ast.unparse(e)
except Exception:
return type(e).__name__
for walked_node in ast.walk(func_node):
if isinstance(walked_node, ast.Return):
t = _expr_type(walked_node.value)
if found_type is None:
found_type = t
elif found_type != t:
raise ValueError("Conflicting return types:" f"{found_type} vs {t}")
return found_type or "None"
# For string assignment to fixed-size arrays
def assign_string_to_array(builder, target_array_ptr, source_string_ptr, array_length):
"""
Copy a string (i8*) to a fixed-size array ([N x i8]*)
"""
# Create a loop to copy characters one by one
# entry_block = builder.block
copy_block = builder.append_basic_block("copy_char")
end_block = builder.append_basic_block("copy_end")
# Create loop counter
i = builder.alloca(ir.IntType(32))
builder.store(ir.Constant(ir.IntType(32), 0), i)
# Start the loop
builder.branch(copy_block)
# Copy loop
builder.position_at_end(copy_block)
idx = builder.load(i)
in_bounds = builder.icmp_unsigned(
"<", idx, ir.Constant(ir.IntType(32), array_length)
)
builder.cbranch(in_bounds, copy_block, end_block)
with builder.if_then(in_bounds):
# Load character from source
src_ptr = builder.gep(source_string_ptr, [idx])
char = builder.load(src_ptr)
# Store character in target
dst_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), idx])
builder.store(char, dst_ptr)
# Increment counter
next_idx = builder.add(idx, ir.Constant(ir.IntType(32), 1))
builder.store(next_idx, i)
builder.position_at_end(end_block)
# Ensure null termination
last_idx = ir.Constant(ir.IntType(32), array_length - 1)
null_ptr = builder.gep(target_array_ptr, [ir.Constant(ir.IntType(32), 0), last_idx])
builder.store(ir.Constant(ir.IntType(8), 0), null_ptr)

View File

@ -1,121 +1,8 @@
from llvmlite import ir from llvmlite import ir
import ast import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
logger: Logger = logging.getLogger(__name__) def emit_globals(module: ir.Module, names: list[str]):
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
""" """
Emit the @llvm.compiler.used global given a list of function/global names. Emit the @llvm.compiler.used global given a list of function/global names.
""" """
@ -137,7 +24,7 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata" gv.section = "llvm.metadata"
def globals_list_creation(tree, module: ir.Module): def globals_processing(tree, module: ir.Module):
collected = ["LICENSE"] collected = ["LICENSE"]
for node in tree.body: for node in tree.body:
@ -153,11 +40,10 @@ def globals_list_creation(tree, module: ir.Module):
): ):
collected.append(node.name) collected.append(node.name)
# NOTE: all globals other than elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal": collected.append(node.name)
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map": elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name) collected.append(node.name)
emit_llvm_compiler_used(module, collected) emit_globals(module, collected)

View File

@ -1,10 +1,9 @@
from .helper_utils import HelperHandlerRegistry, reset_scratch_pool from .helper_utils import HelperHandlerRegistry
from .bpf_helper_handler import handle_helper_call from .bpf_helper_handler import handle_helper_call
from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS
__all__ = [ __all__ = [
"HelperHandlerRegistry", "HelperHandlerRegistry",
"reset_scratch_pool",
"handle_helper_call", "handle_helper_call",
"ktime", "ktime",
"pid", "pid",

View File

@ -9,10 +9,6 @@ from .helper_utils import (
simple_string_print, simple_string_print,
get_data_ptr_and_size, get_data_ptr_and_size,
) )
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
class BPFHelperID(Enum): class BPFHelperID(Enum):
@ -23,6 +19,8 @@ class BPFHelperID(Enum):
BPF_PRINTK = 6 BPF_PRINTK = 6
BPF_GET_CURRENT_PID_TGID = 14 BPF_GET_CURRENT_PID_TGID = 14
BPF_PERF_EVENT_OUTPUT = 25 BPF_PERF_EVENT_OUTPUT = 25
BPF_RINGBUF_RESERVE = 131
BPF_RINGBUF_SUBMIT = 132
@HelperHandlerRegistry.register("ktime") @HelperHandlerRegistry.register("ktime")
@ -34,7 +32,7 @@ def bpf_ktime_get_ns_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_ktime_get_ns helper function call. Emit LLVM IR for bpf_ktime_get_ns helper function call.
@ -57,26 +55,20 @@ def bpf_map_lookup_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_map_lookup_elem helper function call. Emit LLVM IR for bpf_map_lookup_elem helper function call.
""" """
if not call.args or len(call.args) != 1: if not call.args or len(call.args) != 1:
raise ValueError( raise ValueError(
f"Map lookup expects exactly one argument (key), got {len(call.args)}" "Map lookup expects exactly one argument (key), got " f"{len(call.args)}"
) )
key_ptr = get_or_create_ptr_from_arg( key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# TODO: I have changed the return type to i64*, as we are
# allocating space for that type in allocate_mem. This is
# temporary, and we will honour other widths later. But this
# allows us to have cool binary ops on the returned value.
fn_type = ir.FunctionType( fn_type = ir.FunctionType(
ir.PointerType(ir.IntType(64)), # Return type: void* ir.PointerType(), # Return type: void*
[ir.PointerType(), ir.PointerType()], # Args: (void*, void*) [ir.PointerType(), ir.PointerType()], # Args: (void*, void*)
var_arg=False, var_arg=False,
) )
@ -99,7 +91,7 @@ def bpf_printk_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
"""Emit LLVM IR for bpf_printk helper function call.""" """Emit LLVM IR for bpf_printk helper function call."""
if not hasattr(func, "_fmt_counter"): if not hasattr(func, "_fmt_counter"):
@ -117,6 +109,7 @@ def bpf_printk_emitter(
func, func,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
elif isinstance(call.args[0], ast.Constant) and isinstance(call.args[0].value, str): elif isinstance(call.args[0], ast.Constant) and isinstance(call.args[0].value, str):
# TODO: We are only supporting single arguments for now. # TODO: We are only supporting single arguments for now.
@ -147,7 +140,7 @@ def bpf_map_update_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_map_update_elem helper function call. Emit LLVM IR for bpf_map_update_elem helper function call.
@ -155,19 +148,16 @@ def bpf_map_update_elem_emitter(
""" """
if not call.args or len(call.args) < 2 or len(call.args) > 3: if not call.args or len(call.args) < 2 or len(call.args) > 3:
raise ValueError( raise ValueError(
f"Map update expects 2 or 3 args (key, value, flags), got {len(call.args)}" "Map update expects 2 or 3 args (key, value, flags), "
f"got {len(call.args)}"
) )
key_arg = call.args[0] key_arg = call.args[0]
value_arg = call.args[1] value_arg = call.args[1]
flags_arg = call.args[2] if len(call.args) > 2 else None flags_arg = call.args[2] if len(call.args) > 2 else None
key_ptr = get_or_create_ptr_from_arg( key_ptr = get_or_create_ptr_from_arg(key_arg, builder, local_sym_tab)
func, module, key_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab value_ptr = get_or_create_ptr_from_arg(value_arg, builder, local_sym_tab)
)
value_ptr = get_or_create_ptr_from_arg(
func, module, value_arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
flags_val = get_flags_val(flags_arg, builder, local_sym_tab) flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -192,6 +182,114 @@ def bpf_map_update_elem_emitter(
return result, None return result, None
@HelperHandlerRegistry.register("submit")
def bpf_ringbuf_submit_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
local_var_metadata=None,
):
"""
Emit LLVM IR for bpf_ringbuf_submit helper function call.
Expected call signature: ringbuf.submit(data, flags=0)
"""
if not call.args or len(call.args) < 1 or len(call.args) > 2:
raise ValueError(
"Ringbuf submit expects 1 or 2 args (data, flags), "
f"got {len(call.args)}"
)
data_arg = call.args[0]
data_ptr = get_or_create_ptr_from_arg(data_arg, builder, local_sym_tab)
# Get flags argument (default to 0)
flags_arg = call.args[1] if len(call.args) > 1 else None
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
# Returns: void
# Args: (void* data, u64 flags)
fn_type = ir.FunctionType(
ir.VoidType(),
[ir.PointerType(), ir.IntType(64)],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_SUBMIT.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
if isinstance(flags_val, int):
flags_const = ir.Constant(ir.IntType(64), flags_val)
else:
flags_const = flags_val
builder.call(fn_ptr, [data_ptr, flags_const], tail=True)
return None
@HelperHandlerRegistry.register("reserve")
def bpf_ringbuf_reserve_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
local_var_metadata=None,
):
"""
Emit LLVM IR for bpf_ringbuf_reserve helper function call.
Expected call signature: ringbuf.reserve(size, flags=0)
"""
if not call.args or len(call.args) < 1 or len(call.args) > 2:
raise ValueError(
"Ringbuf reserve expects 1 or 2 args (size, flags), "
f"got {len(call.args)}"
)
# TODO: here, getting length of stuff does not actually work. need to fix this.
size_arg = call.args[0]
if isinstance(size_arg, ast.Constant):
size_val = ir.Constant(ir.IntType(64), size_arg.value)
elif isinstance(size_arg, ast.Name):
if size_arg.id not in local_sym_tab:
raise ValueError(
f"Variable '{size_arg.id}' not found in local symbol table"
)
size_val = builder.load(local_sym_tab[size_arg.id])
else:
raise NotImplementedError(f"Unsupported size argument type: {type(size_arg)}")
flags_arg = call.args[1] if len(call.args) > 1 else None
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# Args: (void* ringbuf, u64 size, u64 flags)
fn_type = ir.FunctionType(
ir.PointerType(),
[ir.PointerType(), ir.IntType(64), ir.IntType(64)],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_RESERVE.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
if isinstance(flags_val, int):
flags_const = ir.Constant(ir.IntType(64), flags_val)
else:
flags_const = flags_val
result = builder.call(fn_ptr, [map_void_ptr, size_val, flags_const], tail=True)
return result, ir.PointerType()
@HelperHandlerRegistry.register("delete") @HelperHandlerRegistry.register("delete")
def bpf_map_delete_elem_emitter( def bpf_map_delete_elem_emitter(
@ -202,7 +300,7 @@ def bpf_map_delete_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_map_delete_elem helper function call. Emit LLVM IR for bpf_map_delete_elem helper function call.
@ -210,11 +308,9 @@ def bpf_map_delete_elem_emitter(
""" """
if not call.args or len(call.args) != 1: if not call.args or len(call.args) != 1:
raise ValueError( raise ValueError(
f"Map delete expects exactly one argument (key), got {len(call.args)}" "Map delete expects exactly one argument (key), got " f"{len(call.args)}"
) )
key_ptr = get_or_create_ptr_from_arg( key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab
)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# Define function type for bpf_map_delete_elem # Define function type for bpf_map_delete_elem
@ -242,7 +338,7 @@ def bpf_get_current_pid_tgid_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_get_current_pid_tgid helper function call. Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
@ -269,16 +365,18 @@ def bpf_perf_event_output_handler(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
map_sym_tab=None, local_var_metadata=None,
): ):
if len(call.args) != 1: if len(call.args) != 1:
raise ValueError( raise ValueError(
f"Perf event output expects exactly one argument, got {len(call.args)}" "Perf event output expects exactly one argument, " f"got {len(call.args)}"
) )
data_arg = call.args[0] data_arg = call.args[0]
ctx_ptr = func.args[0] # First argument to the function is ctx ctx_ptr = func.args[0] # First argument to the function is ctx
data_ptr, size_val = get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab) data_ptr, size_val = get_data_ptr_and_size(
data_arg, local_sym_tab, struct_sym_tab, local_var_metadata
)
# BPF_F_CURRENT_CPU is -1 in 32 bit # BPF_F_CURRENT_CPU is -1 in 32 bit
flags_val = ir.Constant(ir.IntType(64), 0xFFFFFFFF) flags_val = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
@ -316,6 +414,7 @@ def handle_helper_call(
local_sym_tab=None, local_sym_tab=None,
map_sym_tab=None, map_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
"""Process a BPF helper function call and emit the appropriate LLVM IR.""" """Process a BPF helper function call and emit the appropriate LLVM IR."""
@ -334,7 +433,7 @@ def handle_helper_call(
func, func,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
map_sym_tab, local_var_metadata,
) )
# Handle direct function calls (e.g., print(), ktime()) # Handle direct function calls (e.g., print(), ktime())
@ -345,7 +444,7 @@ def handle_helper_call(
elif isinstance(call.func, ast.Attribute): elif isinstance(call.func, ast.Attribute):
method_name = call.func.attr method_name = call.func.attr
value = call.func.value value = call.func.value
logger.info(f"Handling method call: {ast.dump(call.func)}") print(f"Handling method call: {ast.dump(call.func)}")
# Get map pointer from different styles of map access # Get map pointer from different styles of map access
if isinstance(value, ast.Call) and isinstance(value.func, ast.Name): if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
# Func style: my_map().lookup(key) # Func style: my_map().lookup(key)

View File

@ -3,8 +3,7 @@ import logging
from collections.abc import Callable from collections.abc import Callable
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr, get_base_type_and_depth, deref_to_depth from pythonbpf.expr_pass import eval_expr
from pythonbpf.binary_ops import get_operand_value
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -35,83 +34,34 @@ class HelperHandlerRegistry:
return helper_name in cls._handlers return helper_name in cls._handlers
class ScratchPoolManager:
"""Manage the temporary helper variables in local_sym_tab"""
def __init__(self):
self._counter = 0
@property
def counter(self):
return self._counter
def reset(self):
self._counter = 0
logger.debug("Scratch pool counter reset to 0")
def get_next_temp(self, local_sym_tab):
temp_name = f"__helper_temp_{self._counter}"
self._counter += 1
if temp_name not in local_sym_tab:
raise ValueError(
f"Scratch pool exhausted or inadequate: {temp_name}. "
f"Current counter: {self._counter}"
)
return local_sym_tab[temp_name].var, temp_name
_temp_pool_manager = ScratchPoolManager() # Singleton instance
def reset_scratch_pool():
"""Reset the scratch pool counter"""
_temp_pool_manager.reset()
def get_var_ptr_from_name(var_name, local_sym_tab): def get_var_ptr_from_name(var_name, local_sym_tab):
"""Get a pointer to a variable from the symbol table.""" """Get a pointer to a variable from the symbol table."""
if local_sym_tab and var_name in local_sym_tab: if local_sym_tab and var_name in local_sym_tab:
return local_sym_tab[var_name].var return local_sym_tab[var_name][0]
raise ValueError(f"Variable '{var_name}' not found in local symbol table") raise ValueError(f"Variable '{var_name}' not found in local symbol table")
def create_int_constant_ptr(value, builder, local_sym_tab, int_width=64): def create_int_constant_ptr(value, builder, int_width=64):
"""Create a pointer to an integer constant.""" """Create a pointer to an integer constant."""
# Default to 64-bit integer # Default to 64-bit integer
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab) int_type = ir.IntType(int_width)
logger.info(f"Using temp variable '{temp_name}' for int constant {value}") ptr = builder.alloca(int_type)
const_val = ir.Constant(ir.IntType(int_width), value) ptr.align = int_type.width // 8
builder.store(const_val, ptr) builder.store(ir.Constant(int_type, value), ptr)
return ptr return ptr
def get_or_create_ptr_from_arg( def get_or_create_ptr_from_arg(arg, builder, local_sym_tab):
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab=None
):
"""Extract or create pointer from the call arguments.""" """Extract or create pointer from the call arguments."""
if isinstance(arg, ast.Name): if isinstance(arg, ast.Name):
ptr = get_var_ptr_from_name(arg.id, local_sym_tab) ptr = get_var_ptr_from_name(arg.id, local_sym_tab)
elif isinstance(arg, ast.Constant) and isinstance(arg.value, int): elif isinstance(arg, ast.Constant) and isinstance(arg.value, int):
ptr = create_int_constant_ptr(arg.value, builder, local_sym_tab) ptr = create_int_constant_ptr(arg.value, builder)
else: else:
# Evaluate the expression and store the result in a temp variable raise NotImplementedError(
val = get_operand_value( "Only simple variable names are supported as args in map helpers."
func, module, arg, builder, local_sym_tab, map_sym_tab, struct_sym_tab
) )
if val is None:
raise ValueError("Failed to evaluate expression for helper arg.")
# NOTE: We assume the result is an int64 for now
# if isinstance(arg, ast.Attribute):
# return val
ptr, temp_name = _temp_pool_manager.get_next_temp(local_sym_tab)
logger.info(f"Using temp variable '{temp_name}' for expression result")
builder.store(val, ptr)
return ptr return ptr
@ -122,7 +72,7 @@ def get_flags_val(arg, builder, local_sym_tab):
if isinstance(arg, ast.Name): if isinstance(arg, ast.Name):
if local_sym_tab and arg.id in local_sym_tab: if local_sym_tab and arg.id in local_sym_tab:
flags_ptr = local_sym_tab[arg.id].var flags_ptr = local_sym_tab[arg.id][0]
return builder.load(flags_ptr) return builder.load(flags_ptr)
else: else:
raise ValueError(f"Variable '{arg.id}' not found in local symbol table") raise ValueError(f"Variable '{arg.id}' not found in local symbol table")
@ -150,6 +100,7 @@ def handle_fstring_print(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
"""Handle f-string formatting for bpf_printk emitter.""" """Handle f-string formatting for bpf_printk emitter."""
fmt_parts = [] fmt_parts = []
@ -167,6 +118,7 @@ def handle_fstring_print(
exprs, exprs,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
else: else:
raise NotImplementedError(f"Unsupported f-string value type: {type(value)}") raise NotImplementedError(f"Unsupported f-string value type: {type(value)}")
@ -186,6 +138,7 @@ def handle_fstring_print(
builder, builder,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
args.append(arg_value) args.append(arg_value)
@ -205,7 +158,9 @@ def _process_constant_in_fstring(cst, fmt_parts, exprs):
) )
def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab): def _process_fval(
fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab, local_var_metadata
):
"""Process formatted values in f-string.""" """Process formatted values in f-string."""
logger.debug(f"Processing formatted value: {ast.dump(fval)}") logger.debug(f"Processing formatted value: {ast.dump(fval)}")
@ -218,6 +173,7 @@ def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
exprs, exprs,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
else: else:
raise NotImplementedError( raise NotImplementedError(
@ -228,11 +184,13 @@ def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab): def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab):
"""Process name nodes in formatted values.""" """Process name nodes in formatted values."""
if local_sym_tab and name_node.id in local_sym_tab: if local_sym_tab and name_node.id in local_sym_tab:
_, var_type, tmp = local_sym_tab[name_node.id] _, var_type = local_sym_tab[name_node.id]
_populate_fval(var_type, name_node, fmt_parts, exprs) _populate_fval(var_type, name_node, fmt_parts, exprs)
def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab): def _process_attr_in_fval(
attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab, local_var_metadata
):
"""Process attribute nodes in formatted values.""" """Process attribute nodes in formatted values."""
if ( if (
isinstance(attr_node.value, ast.Name) isinstance(attr_node.value, ast.Name)
@ -242,7 +200,12 @@ def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym
var_name = attr_node.value.id var_name = attr_node.value.id
field_name = attr_node.attr field_name = attr_node.attr
var_type = local_sym_tab[var_name].metadata if not local_var_metadata or var_name not in local_var_metadata:
raise ValueError(
f"Metadata for '{var_name}' not found in local var metadata"
)
var_type = local_var_metadata[var_name]
if var_type not in struct_sym_tab: if var_type not in struct_sym_tab:
raise ValueError( raise ValueError(
f"Struct '{var_type}' for '{var_name}' not in symbol table" f"Struct '{var_type}' for '{var_name}' not in symbol table"
@ -274,27 +237,10 @@ def _populate_fval(ftype, node, fmt_parts, exprs):
raise NotImplementedError( raise NotImplementedError(
f"Unsupported integer width in f-string: {ftype.width}" f"Unsupported integer width in f-string: {ftype.width}"
) )
elif isinstance(ftype, ir.PointerType): elif ftype == ir.PointerType(ir.IntType(8)):
target, depth = get_base_type_and_depth(ftype) # NOTE: We assume i8* is a string
if isinstance(target, ir.IntType): fmt_parts.append("%s")
if target.width == 64: exprs.append(node)
fmt_parts.append("%lld")
exprs.append(node)
elif target.width == 32:
fmt_parts.append("%d")
exprs.append(node)
elif target.width == 8 and depth == 1:
# NOTE: Assume i8* is a string
fmt_parts.append("%s")
exprs.append(node)
else:
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
else:
raise NotImplementedError(
f"Unsupported pointer target type in f-string: {target}"
)
else: else:
raise NotImplementedError(f"Unsupported field type in f-string: {ftype}") raise NotImplementedError(f"Unsupported field type in f-string: {ftype}")
@ -317,7 +263,9 @@ def _create_format_string_global(fmt_str, func, module, builder):
return builder.bitcast(fmt_gvar, ir.PointerType()) return builder.bitcast(fmt_gvar, ir.PointerType())
def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab): def _prepare_expr_args(
expr, func, module, builder, local_sym_tab, struct_sym_tab, local_var_metadata
):
"""Evaluate and prepare an expression to use as an arg for bpf_printk.""" """Evaluate and prepare an expression to use as an arg for bpf_printk."""
val, _ = eval_expr( val, _ = eval_expr(
func, func,
@ -327,30 +275,18 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
local_sym_tab, local_sym_tab,
None, None,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
if val: if val:
if isinstance(val.type, ir.PointerType): if isinstance(val.type, ir.PointerType):
target, depth = get_base_type_and_depth(val.type) val = builder.ptrtoint(val, ir.IntType(64))
if isinstance(target, ir.IntType):
if target.width >= 32:
val = deref_to_depth(func, builder, val, depth)
val = builder.sext(val, ir.IntType(64))
elif target.width == 8 and depth == 1:
# NOTE: i8* is string, no need to deref
pass
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
elif isinstance(val.type, ir.IntType): elif isinstance(val.type, ir.IntType):
if val.type.width < 64: if val.type.width < 64:
val = builder.sext(val, ir.IntType(64)) val = builder.sext(val, ir.IntType(64))
else: else:
logger.warning( logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0." "Only int and ptr supported in bpf_printk args. " "Others default to 0."
) )
val = ir.Constant(ir.IntType(64), 0) val = ir.Constant(ir.IntType(64), 0)
return val return val
@ -362,26 +298,34 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
return ir.Constant(ir.IntType(64), 0) return ir.Constant(ir.IntType(64), 0)
def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab): def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab, local_var_metadata):
"""Extract data pointer and size information for perf event output.""" """Extract data pointer and size information for perf event output."""
if isinstance(data_arg, ast.Name): if isinstance(data_arg, ast.Name):
data_name = data_arg.id data_name = data_arg.id
if local_sym_tab and data_name in local_sym_tab: if local_sym_tab and data_name in local_sym_tab:
data_ptr = local_sym_tab[data_name].var data_ptr = local_sym_tab[data_name][0]
else: else:
raise ValueError( raise ValueError(
f"Data variable {data_name} not found in local symbol table." f"Data variable {data_name} not found in local symbol table."
) )
# Check if data_name is a struct # Check if data_name is a struct
data_type = local_sym_tab[data_name].metadata if local_var_metadata and data_name in local_var_metadata:
if data_type in struct_sym_tab: data_type = local_var_metadata[data_name]
struct_info = struct_sym_tab[data_type] if data_type in struct_sym_tab:
size_val = ir.Constant(ir.IntType(64), struct_info.size) struct_info = struct_sym_tab[data_type]
return data_ptr, size_val size_val = ir.Constant(ir.IntType(64), struct_info.size)
return data_ptr, size_val
else:
raise ValueError(
f"Struct {data_type} for {data_name} not in symbol table."
)
else: else:
raise ValueError(f"Struct {data_type} for {data_name} not in symbol table.") raise ValueError(
f"Metadata for variable {data_name} "
"not found in local variable metadata."
)
else: else:
raise NotImplementedError( raise NotImplementedError(
"Only simple object names are supported as data in perf event output." "Only simple object names are supported " "as data in perf event output."
) )

View File

@ -1,9 +1,5 @@
from llvmlite import ir from llvmlite import ir
import ast import ast
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
def emit_license(module: ir.Module, license_str: str): def emit_license(module: ir.Module, license_str: str):
@ -45,9 +41,9 @@ def license_processing(tree, module):
emit_license(module, node.body[0].value.value) emit_license(module, node.body[0].value.value)
return "LICENSE" return "LICENSE"
else: else:
logger.info("ERROR: LICENSE() must return a string literal") print("ERROR: LICENSE() must return a string literal")
return None return None
else: else:
logger.info("ERROR: LICENSE already defined") print("ERROR: LICENSE already defined")
return None return None
return None return None

View File

@ -3,7 +3,7 @@ from logging import Logger
from llvmlite import ir from llvmlite import ir
from enum import Enum from enum import Enum
from .maps_utils import MapProcessorRegistry from .maps_utils import MapProcessorRegistry
from pythonbpf.debuginfo import DebugInfoGenerator from ..debuginfo import DebugInfoGenerator
import logging import logging
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -85,7 +85,7 @@ def create_bpf_map(module, map_name, map_params):
def create_map_debug_info(module, map_global, map_name, map_params): def create_map_debug_info(module, map_global, map_name, map_params):
"""Generate debug info metadata for BPF maps HASH and PERF_EVENT_ARRAY""" """Generate debug information metadata for BPF maps HASH and PERF_EVENT_ARRAY"""
generator = DebugInfoGenerator(module) generator = DebugInfoGenerator(module)
uint_type = generator.get_uint32_type() uint_type = generator.get_uint32_type()
@ -278,7 +278,9 @@ def process_bpf_map(func_node, module):
if handler: if handler:
return handler(map_name, rval, module) return handler(map_name, rval, module)
else: else:
logger.warning(f"Unknown map type {rval.func.id}, defaulting to HashMap") logger.warning(
f"Unknown map type " f"{rval.func.id}, defaulting to HashMap"
)
return process_hash_map(map_name, rval, module) return process_hash_map(map_name, rval, module)
else: else:
raise ValueError("Function under @map must return a map") raise ValueError("Function under @map must return a map")

View File

@ -19,7 +19,7 @@ def structs_proc(tree, module, chunks):
structs_sym_tab = {} structs_sym_tab = {}
for cls_node in chunks: for cls_node in chunks:
if is_bpf_struct(cls_node): if is_bpf_struct(cls_node):
logger.info(f"Found BPF struct: {cls_node.name}") print(f"Found BPF struct: {cls_node.name}")
struct_info = process_bpf_struct(cls_node, module) struct_info = process_bpf_struct(cls_node, module)
structs_sym_tab[cls_node.name] = struct_info structs_sym_tab[cls_node.name] = struct_info
return structs_sym_tab return structs_sym_tab

View File

@ -1,28 +1,24 @@
from llvmlite import ir from llvmlite import ir
# TODO: THIS IS NOT SUPPOSED TO MATCH STRINGS :skull: # TODO: THIS IS NOT SUPPOSED TO MATCH STRINGS :skull:
mapping = {
"c_int8": ir.IntType(8),
"c_uint8": ir.IntType(8),
"c_int16": ir.IntType(16),
"c_uint16": ir.IntType(16),
"c_int32": ir.IntType(32),
"c_uint32": ir.IntType(32),
"c_int64": ir.IntType(64),
"c_uint64": ir.IntType(64),
"c_float": ir.FloatType(),
"c_double": ir.DoubleType(),
"c_void_p": ir.IntType(64),
# Not so sure about this one
"str": ir.PointerType(ir.IntType(8)),
}
def ctypes_to_ir(ctype: str): def ctypes_to_ir(ctype: str):
mapping = {
"c_int8": ir.IntType(8),
"c_uint8": ir.IntType(8),
"c_int16": ir.IntType(16),
"c_uint16": ir.IntType(16),
"c_int32": ir.IntType(32),
"c_uint32": ir.IntType(32),
"c_int64": ir.IntType(64),
"c_uint64": ir.IntType(64),
"c_float": ir.FloatType(),
"c_double": ir.DoubleType(),
"c_void_p": ir.IntType(64),
# Not so sure about this one
"str": ir.PointerType(ir.IntType(8)),
}
if ctype in mapping: if ctype in mapping:
return mapping[ctype] return mapping[ctype]
raise NotImplementedError(f"No mapping for {ctype}") raise NotImplementedError(f"No mapping for {ctype}")
def is_ctypes(ctype: str) -> bool:
return ctype in mapping

View File

@ -1,27 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -1,19 +0,0 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char LICENSE[] SEC("license") = "Dual BSD/GPL";
SEC("kprobe/do_unlinkat")
int kprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat created");
return 0;
}
SEC("kretprobe/do_unlinkat")
int kretprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat returned\n");
return 0;
}

View File

@ -22,27 +22,29 @@ struct {
SEC("tracepoint/syscalls/sys_enter_execve") SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx) int trace_execve(void *ctx)
{ {
struct event *e; // struct event *e;
__u64 pid_tgid; // __u64 pid_tgid;
__u64 uid_gid; // __u64 uid_gid;
__u32 *e;
// Reserve space in the ringbuffer // Reserve space in the ringbuffer
e = bpf_ringbuf_reserve(&events, sizeof(*e), 0); e = bpf_ringbuf_reserve(&events, sizeof(*e), 0);
if (!e) if (!e)
return 0; return 0;
//
// // Fill the struct with data
// pid_tgid = bpf_get_current_pid_tgid();
// e->pid = pid_tgid >> 32;
//
// uid_gid = bpf_get_current_uid_gid();
// e->uid = uid_gid & 0xFFFFFFFF;
//
// e->timestamp = bpf_ktime_get_ns();
// Fill the struct with data // bpf_get_current_comm(&e->comm, sizeof(e->comm));
pid_tgid = bpf_get_current_pid_tgid(); //
e->pid = pid_tgid >> 32; // // Submit the event to ringbuffer
__u32 temp = 32;
uid_gid = bpf_get_current_uid_gid(); e = &temp;
e->uid = uid_gid & 0xFFFFFFFF;
e->timestamp = bpf_ktime_get_ns();
bpf_get_current_comm(&e->comm, sizeof(e->comm));
// Submit the event to ringbuffer
bpf_ringbuf_submit(e, 0); bpf_ringbuf_submit(e, 0);
return 0; return 0;

View File

@ -1,39 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: This example tries to reinterpret the variable `x` to a different type.
# We do not allow this for now, as stack allocations are typed and have to be
# done in the first basic block. Allowing re-interpretation would require
# re-allocation of stack space (possibly in a new basic block), which is not
# supported in eBPF yet.
# We can allow bitcasts in cases where the width of the types is the same in
# the future. But for now, we do not allow any re-interpretation of variables.
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
x = last.lookup(0)
x = 20
if x == 2:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -3,9 +3,9 @@ from ctypes import c_void_p, c_int64
@bpf @bpf
@section("tracepoint/syscalls/sys_enter_sync") @section("sometag1")
def sometag(ctx: c_void_p) -> c_int64: def sometag(ctx: c_void_p) -> c_int64:
a = 1 + 2 + 1 + 12 + 13 a = 1 + 2 + 1
print(f"{a}") print(f"{a}")
return c_int64(0) return c_int64(0)

View File

@ -1,5 +1,3 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal from pythonbpf import compile, bpf, section, bpfglobal
from ctypes import c_void_p, c_int64 from ctypes import c_void_p, c_int64
@ -7,7 +5,8 @@ from ctypes import c_void_p, c_int64
@bpf @bpf
@section("sometag1") @section("sometag1")
def sometag(ctx: c_void_p) -> c_int64: def sometag(ctx: c_void_p) -> c_int64:
a = 1 - 1 b = 1 + 2
a = 1 + b
return c_int64(a) return c_int64(a)
@ -17,4 +16,4 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile(loglevel=logging.INFO) compile()

View File

@ -0,0 +1,33 @@
from pythonbpf import bpf, map, bpfglobal, section, compile, compile_to_ir, BPF
from pythonbpf.maps import RingBuf
from ctypes import c_int32, c_void_p
# Define a map
@bpf
@map
def mymap() -> RingBuf:
return RingBuf(max_entries=(1024))
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def random_section(ctx: c_void_p) -> c_int32:
e: c_int32 = mymap().reserve(64)
if e == 0: # here is the issue i think
return c_int32(0)
mymap().submit(e)
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("ringbuf.py", "ringbuf.ll")
compile()
b = BPF()
b.load_and_attach()
while True:
print("running")

View File

@ -1,34 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: Decided against fixing this
# as a workaround is assigning the result of lookup to a variable
# and then using that variable in the if statement.
# Might fix in future.
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
if last.lookup(0) > 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!") if True else print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,34 +0,0 @@
from pythonbpf import bpf, struct, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
# NOTE: Decided against fixing this
# as one workaround is to just check any field of the struct
# in the if statement. Ugly but works.
# Might fix in future.
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
if dat:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -4,18 +4,6 @@ from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64 from ctypes import c_void_p, c_int64
# NOTE: I have decided to not fix this example for now.
# The issue is in line 31, where we are passing an expression.
# The update helper expects a pointer type. But the problem is
# that we must allocate the space for said pointer in the first
# basic block. As that usage is in a different basic block, we
# are unable to cast the expression to a pointer type. (as we never
# allocated space for it).
# Shall we change our space allocation logic? That allows users to
# spam the same helper with the same args, and still run out of
# stack space. So we consider this usage invalid for now.
# Might fix it later.
@bpf @bpf
@map @map
@ -26,12 +14,12 @@ def count() -> HashMap:
@bpf @bpf
@section("xdp") @section("xdp")
def hello_world(ctx: c_void_p) -> c_int64: def hello_world(ctx: c_void_p) -> c_int64:
prev = count.lookup(0) prev = count().lookup(0)
if prev: if prev:
count.update(0, prev + 1) count().update(0, prev + 1)
return XDP_PASS return XDP_PASS
else: else:
count.update(0, 1) count().update(0, 1)
return XDP_PASS return XDP_PASS

View File

@ -1,101 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
#TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
#TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -1,40 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# NOTE: This example exposes the problems with our typing system.
# We can't do steps on line 25 and 27.
# prev is of type i64**. For prev + 1, we deref it down to i64
# To assign it back to prev, we need to go back to i64**.
# We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable.
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
prev = count.lookup(0)
if prev:
prev = prev + 1
count.update(0, prev)
return XDP_PASS
else:
count.update(0, 1)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -1,69 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, struct
from ctypes import c_void_p, c_int64, c_int32, c_uint64
from pythonbpf.maps import HashMap
from pythonbpf.helper import ktime
# NOTE: This is a comprehensive test combining struct, helper, and map features
# Please note that at line 50, though we have used an absurd expression to test
# the compiler, it is recommended to use named variables to reduce the amount of
# scratch space that needs to be allocated.
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
dat.pid = 123
dat.pid = dat.pid + 1
print(f"pid is {dat.pid}")
tu = 9
last.update(0, tu)
last.update(1, -last.lookup(0))
x = last.lookup(0)
print(f"Map value at index 0: {x}")
x = x + c_int32(1)
print(f"x after adding 32-bit 1 is {x}")
x = ktime() - 121
print(f"ktime - 121 is {x}")
x = last.lookup(0)
x = x + 1
print(f"x is {x}")
if x == 10:
jat = data_t()
jat.ts = 456
print(f"Hello, World!, ts is {jat.ts}")
a = last.lookup(0)
print(f"a is {a}")
last.update(9, 9)
last.update(0, last.lookup(last.lookup(0)) +
last.lookup(last.lookup(0)) + last.lookup(last.lookup(0)))
z = last.lookup(0)
print(f"new map val at index 0 is {z}")
else:
a = last.lookup(0)
print("Goodbye, World!")
c = last.lookup(1 - 1)
print(f"c is {c}")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,27 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 1
print(f"Initial x: {x}")
a = 20
x = a
print(f"Updated x with a: {x}")
x = (x + x) * 3
if x == 2:
print("Hello, World!")
else:
print(f"Goodbye, World! {x}")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,34 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: An example of i64** assignment with binops on the RHS
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
x = last.lookup(0)
print(f"{x}")
x = x + 1
if x == 2:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,40 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile, struct
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.helper import ktime
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
dat.pid = 123
dat.pid = dat.pid + 1
print(f"pid is {dat.pid}")
x = ktime() - 121
print(f"ktime is {x}")
x = 1
x = x + 1
print(f"x is {x}")
if x == 2:
jat = data_t()
jat.ts = 456
print(f"Hello, World!, ts is {jat.ts}")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,20 +0,0 @@
from pythonbpf import compile, bpf, section, bpfglobal
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def sometag(ctx: c_void_p) -> c_int64:
b = 1 + 2
a = 1 + b
print(f"{a}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,32 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
last.update(1, 2)
x = last.lookup(0)
y = last.lookup(1)
if x and y:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
if True:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
if (0 + 1) * 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
if 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,30 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
# last.update(0, 1)
tsp = last.lookup(0)
if tsp:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,30 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
tsp = last.lookup(0)
if tsp > 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,30 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
# last.update(0, 1)
tsp = last.lookup(0)
if not tsp:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,32 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
# last.update(1, 2)
x = last.lookup(0)
y = last.lookup(1)
if x or y:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,29 +0,0 @@
from pythonbpf import bpf, struct, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
if dat.ts:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,23 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 0
y = c_int32(0)
if x == y:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,22 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 0
if x:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,22 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 0
if x * 1:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,22 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 2
if x > 3:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,7 +1,7 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, compile, compile_to_ir, BPF from pythonbpf import bpf, map, struct, section, bpfglobal, compile, compile_to_ir, BPF
from pythonbpf.helper import ktime, pid from pythonbpf.helper import ktime, pid
from pythonbpf.maps import PerfEventArray from pythonbpf.maps import PerfEventArray
import logging
from ctypes import c_void_p, c_int32, c_uint64 from ctypes import c_void_p, c_int32, c_uint64
@ -42,8 +42,8 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile()
compile_to_ir("perf_buffer_map.py", "perf_buffer_map.ll") compile_to_ir("perf_buffer_map.py", "perf_buffer_map.ll")
compile(loglevel=logging.INFO)
b = BPF() b = BPF()
b.load_and_attach() b.load_and_attach()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 1 + 1 - 2
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
a = 2
return a - 2
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return True
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 1
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,20 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
a = 1 # int64
x = 1 # int64
return c_int32(a - x) # typecast to int32
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
return c_int32(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
a = 1 # int64
return c_int32(a) # typecast to int32
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
a = 1
return a
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
from pythonbpf.helper import XDP_PASS
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,5 +1,5 @@
from pythonbpf import bpf, BPF, map, bpfglobal, section, compile, compile_to_ir from pythonbpf import bpf, map, bpfglobal, section, compile, compile_to_ir, BPF
from pythonbpf.maps import RingBuf, HashMap from pythonbpf.maps import RingBuf
from ctypes import c_int32, c_void_p from ctypes import c_int32, c_void_p
@ -9,17 +9,13 @@ from ctypes import c_int32, c_void_p
def mymap() -> RingBuf: def mymap() -> RingBuf:
return RingBuf(max_entries=(1024)) return RingBuf(max_entries=(1024))
@bpf
@map
def mymap2() -> HashMap:
return HashMap(key=c_int32, value=c_int32, max_entries=1024)
@bpf @bpf
@section("tracepoint/syscalls/sys_enter_clone") @section("tracepoint/syscalls/sys_enter_clone")
def random_section(ctx: c_void_p) -> c_int32: def random_section(ctx: c_void_p) -> c_int32:
print("Hello") print("Hello")
e = mymap().reserve(6)
if e:
mymap().submit(e)
return c_int32(0) return c_int32(0)
@ -33,3 +29,5 @@ compile_to_ir("ringbuf.py", "ringbuf.ll")
compile() compile()
b = BPF() b = BPF()
b.load_and_attach() b.load_and_attach()
while True:
print("running")

View File

@ -1,369 +0,0 @@
#!/usr/bin/env python3
"""
BTF to Python ctypes Converter
Converts Linux kernel BTF (BPF Type Format) to Python ctypes definitions.
This tool automates the process of:
1. Dumping BTF from vmlinux
2. Preprocessing enum definitions
3. Processing struct kioctx to extract anonymous nested structs
4. Running C preprocessor
5. Converting to Python ctypes using clang2py
6. Post-processing the output
Requirements:
- bpftool
- clang
- ctypeslib2 (pip install ctypeslib2)
"""
import argparse
import os
import re
import subprocess
import sys
import tempfile
class BTFConverter:
def __init__(self, btf_source="/sys/kernel/btf/vmlinux", output_file="vmlinux.py",
keep_intermediate=False, verbose=False):
self.btf_source = btf_source
self.output_file = output_file
self.keep_intermediate = keep_intermediate
self.verbose = verbose
self.temp_dir = tempfile.mkdtemp() if not keep_intermediate else "."
def log(self, message):
"""Print message if verbose mode is enabled."""
if self.verbose:
print(f"[*] {message}")
def run_command(self, cmd, description):
"""Run a shell command and handle errors."""
self.log(f"{description}...")
try:
result = subprocess.run(
cmd,
shell=True,
check=True,
capture_output=True,
text=True
)
if self.verbose and result.stdout:
print(result.stdout)
return result
except subprocess.CalledProcessError as e:
print(f"Error during {description}:", file=sys.stderr)
print(e.stderr, file=sys.stderr)
sys.exit(1)
def step1_dump_btf(self):
"""Step 1: Dump BTF from vmlinux."""
vmlinux_h = os.path.join(self.temp_dir, "vmlinux.h")
cmd = f"bpftool btf dump file {self.btf_source} format c > {vmlinux_h}"
self.run_command(cmd, "Dumping BTF from vmlinux")
return vmlinux_h
def step2_preprocess_enums(self, input_file):
"""Step 1.5: Preprocess enum definitions."""
self.log("Preprocessing enum definitions...")
with open(input_file, 'r') as f:
original_code = f.read()
# Extract anonymous enums
enums = re.findall(
r'(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;',
original_code
)
enum_defs = [enum_block + ';' for enum_block, _ in enums]
# Replace anonymous enums with int declarations
processed_code = re.sub(
r'(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;',
r'int \1;',
original_code
)
# Prepend enum definitions
if enum_defs:
enum_text = '\n'.join(enum_defs) + '\n\n'
processed_code = enum_text + processed_code
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
with open(output_file, 'w') as f:
f.write(processed_code)
return output_file
def step2_5_process_kioctx(self, input_file):
#TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
"""Step 2.5: Process struct kioctx to extract nested anonymous structs."""
self.log("Processing struct kioctx nested structs...")
with open(input_file, 'r') as f:
content = f.read()
# Pattern to match struct kioctx with its full body (handles multiple nesting levels)
kioctx_pattern = r'struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;'
def process_kioctx_replacement(match):
full_struct = match.group(0)
self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
# Extract the struct body (everything between outermost { and })
body_match = re.search(r'struct\s+kioctx\s*\{(.*)\}\s*;', full_struct, re.DOTALL)
if not body_match:
return full_struct
body = body_match.group(1)
# Find all anonymous structs within the body
# Pattern: struct { ... } followed by ; (not a member name)
anon_struct_pattern = r'struct\s*\{[^}]*\}'
anon_structs = []
anon_counter = 4 # Start from 4, counting down to 1
def replace_anonymous_struct(m):
nonlocal anon_counter
anon_struct_content = m.group(0)
# Extract the body of the anonymous struct
anon_body_match = re.search(r'struct\s*\{(.*)\}', anon_struct_content, re.DOTALL)
if not anon_body_match:
return anon_struct_content
anon_body = anon_body_match.group(1)
# Create the named struct definition
anon_name = f"__anon{anon_counter}"
member_name = f"a{anon_counter}"
# Store the struct definition
anon_structs.append(f"struct {anon_name} {{{anon_body}}};")
anon_counter -= 1
# Return the member declaration
return f"struct {anon_name} {member_name}"
# Process the body, finding and replacing anonymous structs
# We need to be careful to only match anonymous structs followed by ;
processed_body = body
# Find all occurrences and process them
pattern_with_semicolon = r'struct\s*\{([^}]*)\}\s*;'
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
if not matches:
self.log("No anonymous structs found in kioctx")
return full_struct
self.log(f"Found {len(matches)} anonymous struct(s)")
# Process in reverse order to maintain string positions
for match in reversed(matches):
anon_struct_content = match.group(1)
start_pos = match.start()
end_pos = match.end()
# Create the named struct definition
anon_name = f"__anon{anon_counter}"
member_name = f"a{anon_counter}"
# Store the struct definition
anon_structs.insert(0, f"struct {anon_name} {{{anon_struct_content}}};")
# Replace in the body
replacement = f"struct {anon_name} {member_name};"
processed_body = processed_body[:start_pos] + replacement + processed_body[end_pos:]
anon_counter -= 1
# Rebuild the complete definition
if anon_structs:
# Prepend the anonymous struct definitions
anon_definitions = '\n'.join(anon_structs) + '\n\n'
new_struct = f"struct kioctx {{{processed_body}}};"
return anon_definitions + new_struct
else:
return full_struct
# Apply the transformation
processed_content = re.sub(
kioctx_pattern,
process_kioctx_replacement,
content,
flags=re.DOTALL
)
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
with open(output_file, 'w') as f:
f.write(processed_content)
self.log(f"Saved kioctx-processed output to {output_file}")
return output_file
def step3_run_preprocessor(self, input_file):
"""Step 2: Run C preprocessor."""
output_file = os.path.join(self.temp_dir, "vmlinux.i")
cmd = f"clang -E {input_file} > {output_file}"
self.run_command(cmd, "Running C preprocessor")
return output_file
def step4_convert_to_ctypes(self, input_file):
"""Step 3: Convert to Python ctypes using clang2py."""
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
cmd = (
f"clang2py {input_file} -o {output_file} "
f"--clang-args=\"-fno-ms-extensions -I/usr/include -I/usr/include/linux\""
)
self.run_command(cmd, "Converting to Python ctypes")
return output_file
def step5_postprocess(self, input_file):
"""Step 4: Post-process the generated Python file."""
self.log("Post-processing Python ctypes definitions...")
with open(input_file, "r") as f:
data = f.read()
# Remove lines like ('_45', ctypes.c_int64, 0)
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
data = re.sub(r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data)
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
data = re.sub(
r"(ctypes\.c_char)(\s*,\s*\d+\))",
r"ctypes.c_uint8\2",
data
)
# Remove ctypes. prefix from invalid entries
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
for name in invalid_ctypes:
data = re.sub(rf"\bctypes\.{name}\b", name, data)
with open(self.output_file, "w") as f:
f.write(data)
self.log(f"Saved final output to {self.output_file}")
def cleanup(self):
"""Remove temporary files if not keeping them."""
if not self.keep_intermediate and self.temp_dir != ".":
self.log(f"Cleaning up temporary directory: {self.temp_dir}")
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def convert(self):
"""Run the complete conversion pipeline."""
try:
self.log("Starting BTF to Python ctypes conversion...")
# Check dependencies
self.check_dependencies()
# Run conversion pipeline
vmlinux_h = self.step1_dump_btf()
vmlinux_processed_h = self.step2_preprocess_enums(vmlinux_h)
vmlinux_kioctx_h = self.step2_5_process_kioctx(vmlinux_processed_h)
vmlinux_i = self.step3_run_preprocessor(vmlinux_kioctx_h)
vmlinux_raw_py = self.step4_convert_to_ctypes(vmlinux_i)
self.step5_postprocess(vmlinux_raw_py)
print(f"\n✓ Conversion complete! Output saved to: {self.output_file}")
except Exception as e:
print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
finally:
self.cleanup()
def check_dependencies(self):
"""Check if required tools are available."""
self.log("Checking dependencies...")
dependencies = {
"bpftool": "bpftool --version",
"clang": "clang --version",
"clang2py": "clang2py --version"
}
missing = []
for tool, cmd in dependencies.items():
try:
subprocess.run(
cmd,
shell=True,
check=True,
capture_output=True
)
except subprocess.CalledProcessError:
missing.append(tool)
if missing:
print("Error: Missing required dependencies:", file=sys.stderr)
for tool in missing:
print(f" - {tool}", file=sys.stderr)
if "clang2py" in missing:
print("\nInstall ctypeslib2: pip install ctypeslib2", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description="Convert Linux kernel BTF to Python ctypes definitions",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s
%(prog)s -o kernel_types.py
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
"""
)
parser.add_argument(
"--btf-source",
default="/sys/kernel/btf/vmlinux",
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)"
)
parser.add_argument(
"-o", "--output",
default="vmlinux.py",
help="Output Python file (default: vmlinux.py)"
)
parser.add_argument(
"-k", "--keep-intermediate",
action="store_true",
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)"
)
parser.add_argument(
"-v", "--verbose",
action="store_true",
help="Enable verbose output"
)
args = parser.parse_args()
converter = BTFConverter(
btf_source=args.btf_source,
output_file=args.output,
keep_intermediate=args.keep_intermediate,
verbose=args.verbose
)
converter.convert()
if __name__ == "__main__":
main()