17 Commits

Author SHA1 Message Date
552cd352f2 Merge pull request #20 from pythonbpf/fix-failing-tests
Fix failing tests in tests/
2025-10-05 14:04:14 +05:30
c7f2955ee9 Fix typo in process_stmt 2025-10-05 14:03:19 +05:30
ef36ea1e03 Add nullcheck for var_name in handle_binary_ops 2025-10-05 14:02:08 +05:30
d341cb24c0 Update explanation for named_arg 2025-10-05 04:27:37 +05:30
2fabb67942 Add note for faling test named_arg 2025-10-05 03:15:17 +05:30
a0b0ad370e Merge pull request #23 from pythonbpf/formatter
update formatter and pre-commit
2025-10-05 01:15:01 +05:30
283b947fc5 Add named_arg failing test 2025-10-04 19:50:33 +05:30
bf78ac21fe Remove 'Static Typing' from short term tasks 2025-10-04 07:30:11 +05:30
ac49cd8b1c Fix hashmap access in direct_assign.py 2025-10-04 02:14:33 +05:30
af44bd063c Add explanation for direct_assign.py failing test 2025-10-04 02:13:46 +05:30
1239d1c35f Fix handle_binary_ops calls in functions_pass 2025-10-04 02:09:11 +05:30
f41a9ccf26 Remove unnecessary args from binary_ops 2025-10-04 02:07:31 +05:30
be05b5d102 Allow local symbols to be used within return 2025-10-03 19:50:56 +05:30
3f061750cf fix return value error 2025-10-03 19:11:11 +05:30
6d5d6345e2 Add var_rval failing test 2025-10-03 18:01:15 +05:30
6fea580693 Fix t/f/return.py, tweak handle_binary_ops 2025-10-03 17:56:21 +05:30
86b9ec56d7 update formatter and pre-commit
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-02 22:43:05 +05:30
17 changed files with 153 additions and 323 deletions

View File

@ -21,7 +21,7 @@ ci:
repos:
# Standard hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v6.0.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
@ -36,7 +36,7 @@ repos:
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.4.2"
rev: "v0.13.2"
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
@ -45,7 +45,7 @@ repos:
# Checking static types
- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.10.0"
rev: "v1.18.2"
hooks:
- id: mypy
exclude: ^(tests)|^(examples)

View File

@ -1,7 +1,6 @@
## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
- Static Typing
- Add all maps
- XDP support in pylibbpf
- ringbuf support

View File

@ -12,7 +12,7 @@
"from pythonbpf import bpf, map, section, bpfglobal, BPF\n",
"from pythonbpf.helper import pid\n",
"from pythonbpf.maps import HashMap\n",
"from pylibbpf import *\n",
"from pylibbpf import BpfMap\n",
"from ctypes import c_void_p, c_int64, c_uint64, c_int32\n",
"import matplotlib.pyplot as plt"
]

View File

@ -9,6 +9,7 @@ logger: Logger = logging.getLogger(__name__)
def recursive_dereferencer(var, builder):
"""dereference until primitive type comes out"""
# TODO: Not worrying about stack overflow for now
logger.info(f"Dereferencing {var}, type is {var.type}")
if isinstance(var.type, ir.PointerType):
a = builder.load(var)
return recursive_dereferencer(a, builder)
@ -18,7 +19,7 @@ def recursive_dereferencer(var, builder):
raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def get_operand_value(operand, module, builder, local_sym_tab):
def get_operand_value(operand, builder, local_sym_tab):
"""Extract the value from an operand, handling variables and constants."""
if isinstance(operand, ast.Name):
if operand.id in local_sym_tab:
@ -29,14 +30,14 @@ def get_operand_value(operand, module, builder, local_sym_tab):
return ir.Constant(ir.IntType(64), operand.value)
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
return handle_binary_op_impl(operand, module, builder, local_sym_tab)
return handle_binary_op_impl(operand, builder, local_sym_tab)
raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl(rval, module, builder, local_sym_tab):
def handle_binary_op_impl(rval, builder, local_sym_tab):
op = rval.op
left = get_operand_value(rval.left, module, builder, local_sym_tab)
right = get_operand_value(rval.right, module, builder, local_sym_tab)
left = get_operand_value(rval.left, builder, local_sym_tab)
right = get_operand_value(rval.right, builder, local_sym_tab)
logger.info(f"left is {left}, right is {right}, op is {op}")
# Map AST operation nodes to LLVM IR builder methods
@ -61,6 +62,11 @@ def handle_binary_op_impl(rval, module, builder, local_sym_tab):
raise SyntaxError("Unsupported binary operation")
def handle_binary_op(rval, module, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, module, builder, local_sym_tab)
builder.store(result, local_sym_tab[var_name].var)
def handle_binary_op(rval, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, builder, local_sym_tab)
if var_name and var_name in local_sym_tab:
logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}"
)
builder.store(result, local_sym_tab[var_name].var)
return result, result.type

View File

@ -4,11 +4,7 @@ from .license_pass import license_processing
from .functions_pass import func_proc
from .maps import maps_proc
from .structs import structs_proc
from .globals_pass import (
globals_list_creation,
globals_processing,
populate_global_symbol_table,
)
from .globals_pass import globals_processing
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os
import subprocess
@ -44,18 +40,15 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}")
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
globals_list_creation(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING):
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
logging.basicConfig(
level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
@ -128,7 +121,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING):
return output
def compile(loglevel=logging.WARNING) -> bool:
def compile(loglevel=logging.INFO) -> bool:
# Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
@ -161,7 +154,7 @@ def compile(loglevel=logging.WARNING) -> bool:
return success
def BPF(loglevel=logging.WARNING) -> BpfProgram:
def BPF(loglevel=logging.INFO) -> BpfProgram:
caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile(

View File

@ -146,8 +146,7 @@ def handle_assign(
local_sym_tab[var_name].var,
)
logger.info(
f"Assigned {call_type} constant "
f"{rval.args[0].value} to {var_name}"
f"Assigned {call_type} constant {rval.args[0].value} to {var_name}"
)
elif HelperHandlerRegistry.has_handler(call_type):
# var = builder.alloca(ir.IntType(64), name=var_name)
@ -233,7 +232,7 @@ def handle_assign(
else:
logger.info("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp):
handle_binary_op(rval, module, builder, var_name, local_sym_tab)
handle_binary_op(rval, builder, var_name, local_sym_tab)
else:
logger.info("Unsupported assignment value type")
@ -385,24 +384,49 @@ def process_stmt(
)
elif isinstance(stmt, ast.Return):
if stmt.value is None:
builder.ret(ir.Constant(ir.IntType(32), 0))
builder.ret(ir.Constant(ir.IntType(64), 0))
did_return = True
elif (
isinstance(stmt.value, ast.Call)
and isinstance(stmt.value.func, ast.Name)
and len(stmt.value.args) == 1
and isinstance(stmt.value.args[0], ast.Constant)
and isinstance(stmt.value.args[0].value, int)
):
call_type = stmt.value.func.id
if ctypes_to_ir(call_type) != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ctypes_to_ir(call_type)}, got {call_type}"
)
else:
builder.ret(ir.Constant(ret_type, stmt.value.args[0].value))
if isinstance(stmt.value.args[0], ast.Constant) and isinstance(
stmt.value.args[0].value, int
):
call_type = stmt.value.func.id
if ctypes_to_ir(call_type) != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ctypes_to_ir(call_type)}, got {call_type}"
)
else:
builder.ret(ir.Constant(ret_type, stmt.value.args[0].value))
did_return = True
elif isinstance(stmt.value.args[0], ast.BinOp):
# TODO: Should be routed through eval_expr
val = handle_binary_op(stmt.value.args[0], builder, None, local_sym_tab)
if val is None:
raise ValueError("Failed to evaluate return expression")
if val[1] != ret_type:
raise ValueError(
"Return type mismatch: expected " f"{ret_type}, got {val[1]}"
)
builder.ret(val[0])
did_return = True
elif isinstance(stmt.value.args[0], ast.Name):
if stmt.value.args[0].id in local_sym_tab:
var = local_sym_tab[stmt.value.args[0].id].var
val = builder.load(var)
if val.type != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ret_type}, got {val.type}"
)
builder.ret(val)
did_return = True
else:
raise ValueError("Failed to evaluate return expression")
elif isinstance(stmt.value, ast.Name):
if stmt.value.id == "XDP_PASS":
builder.ret(ir.Constant(ret_type, 2))
@ -455,6 +479,9 @@ def allocate_mem(
continue
var_name = target.id
rval = stmt.value
if var_name in local_sym_tab:
logger.info(f"Variable {var_name} already allocated")
continue
if isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
@ -483,8 +510,7 @@ def allocate_mem(
var = builder.alloca(ir_type, name=var_name)
has_metadata = True
logger.info(
f"Pre-allocated variable {var_name} "
f"for struct {call_type}"
f"Pre-allocated variable {var_name} for struct {call_type}"
)
elif isinstance(rval.func, ast.Attribute):
ir_type = ir.PointerType(ir.IntType(64))
@ -568,7 +594,7 @@ def process_func_body(
)
if not did_return:
builder.ret(ir.Constant(ir.IntType(32), 0))
builder.ret(ir.Constant(ir.IntType(64), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):

View File

@ -1,121 +1,8 @@
from llvmlite import ir
import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
logger: Logger = logging.getLogger(__name__)
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
def emit_globals(module: ir.Module, names: list[str]):
"""
Emit the @llvm.compiler.used global given a list of function/global names.
"""
@ -137,7 +24,7 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata"
def globals_list_creation(tree, module: ir.Module):
def globals_processing(tree, module: ir.Module):
collected = ["LICENSE"]
for node in tree.body:
@ -153,11 +40,10 @@ def globals_list_creation(tree, module: ir.Module):
):
collected.append(node.name)
# NOTE: all globals other than
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name)
emit_llvm_compiler_used(module, collected)
emit_globals(module, collected)

View File

@ -62,7 +62,7 @@ def bpf_map_lookup_elem_emitter(
"""
if not call.args or len(call.args) != 1:
raise ValueError(
"Map lookup expects exactly one argument (key), got " f"{len(call.args)}"
f"Map lookup expects exactly one argument (key), got {len(call.args)}"
)
key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -145,8 +145,7 @@ def bpf_map_update_elem_emitter(
"""
if not call.args or len(call.args) < 2 or len(call.args) > 3:
raise ValueError(
"Map update expects 2 or 3 args (key, value, flags), "
f"got {len(call.args)}"
f"Map update expects 2 or 3 args (key, value, flags), got {len(call.args)}"
)
key_arg = call.args[0]
@ -196,7 +195,7 @@ def bpf_map_delete_elem_emitter(
"""
if not call.args or len(call.args) != 1:
raise ValueError(
"Map delete expects exactly one argument (key), got " f"{len(call.args)}"
f"Map delete expects exactly one argument (key), got {len(call.args)}"
)
key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -255,7 +254,7 @@ def bpf_perf_event_output_handler(
):
if len(call.args) != 1:
raise ValueError(
"Perf event output expects exactly one argument, " f"got {len(call.args)}"
f"Perf event output expects exactly one argument, got {len(call.args)}"
)
data_arg = call.args[0]
ctx_ptr = func.args[0] # First argument to the function is ctx

View File

@ -270,7 +270,7 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
val = builder.sext(val, ir.IntType(64))
else:
logger.warning(
"Only int and ptr supported in bpf_printk args. " "Others default to 0."
"Only int and ptr supported in bpf_printk args. Others default to 0."
)
val = ir.Constant(ir.IntType(64), 0)
return val

View File

@ -278,9 +278,7 @@ def process_bpf_map(func_node, module):
if handler:
return handler(map_name, rval, module)
else:
logger.warning(
f"Unknown map type " f"{rval.func.id}, defaulting to HashMap"
)
logger.warning(f"Unknown map type {rval.func.id}, defaulting to HashMap")
return process_hash_map(map_name, rval, module)
else:
raise ValueError("Function under @map must return a map")

View File

@ -1,27 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -4,6 +4,18 @@ from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# NOTE: I have decided to not fix this example for now.
# The issue is in line 31, where we are passing an expression.
# The update helper expects a pointer type. But the problem is
# that we must allocate the space for said pointer in the first
# basic block. As that usage is in a different basic block, we
# are unable to cast the expression to a pointer type. (as we never
# allocated space for it).
# Shall we change our space allocation logic? That allows users to
# spam the same helper with the same args, and still run out of
# stack space. So we consider this usage invalid for now.
# Might fix it later.
@bpf
@map
@ -14,12 +26,12 @@ def count() -> HashMap:
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
prev = count().lookup(0)
prev = count.lookup(0)
if prev:
count().update(0, prev + 1)
count.update(0, prev + 1)
return XDP_PASS
else:
count().update(0, 1)
count.update(0, 1)
return XDP_PASS

View File

@ -1,101 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
#TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
#TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,40 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# NOTE: This example exposes the problems with our typing system.
# We can't do steps on line 25 and 27.
# prev is of type i64**. For prev + 1, we deref it down to i64
# To assign it back to prev, we need to go back to i64**.
# We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable.
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
prev = count.lookup(0)
if prev:
prev = prev + 1
count.update(0, prev)
return XDP_PASS
else:
count.update(0, 1)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,20 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal
from ctypes import c_void_p, c_int64
@bpf
@section("sometag1")
def sometag(ctx: c_void_p) -> c_int64:
a = 1 - 1
return c_int64(a)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile(loglevel=logging.INFO)