37 Commits
sym ... globals

Author SHA1 Message Date
8e3942d38c format chore 2025-10-08 14:31:37 +05:30
3abe07c5b2 add global symbol table populate function 2025-10-05 14:05:10 +05:30
01bd7604ed add global symbol table populate function 2025-10-05 14:04:25 +05:30
7ae84a0d5a add failing test 2025-10-05 00:55:38 +05:30
df3f00261a changer order of passes 2025-10-04 08:17:16 +05:30
ab610147a5 update globals test and todos. 2025-10-04 06:36:51 +05:30
7720fe9f9f format chore 2025-10-04 06:33:09 +05:30
7aeac86bd3 fix broken IR generation logic for globals 2025-10-04 06:32:25 +05:30
ab1c4223d5 fix broken IR generation logic for globals 2025-10-03 22:55:40 +05:30
c3a512d5cf add global support with broken generation function 2025-10-03 22:20:04 +05:30
4a60c42cd0 add global failing test
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-03 21:25:58 +05:30
b35134625b Merge pull request #19 from pythonbpf/fix-expr
Refactor expr_pass
2025-10-03 17:36:31 +05:30
c3db609a90 Revert to using Warning loglevel as default 2025-10-03 17:35:57 +05:30
cc626c38f7 Move binops1 to tests/passing 2025-10-03 17:13:02 +05:30
a8b3f4f86c Fix recursive binops, move failing binops to passing 2025-10-03 17:08:41 +05:30
d593969408 Refactor ugly if-elif chain in handle_binary_op 2025-10-03 14:04:38 +05:30
6d5895ebc2 More fixes to recursive dereferencer, add get_operand value 2025-10-03 13:46:52 +05:30
c9ee6e4f17 Fix recursive_dereferencer in binops 2025-10-03 13:35:15 +05:30
a622c53e0f Add deref 2025-10-03 02:00:01 +05:30
a4f1363aed Add _handle_attribute_expr 2025-10-03 01:50:59 +05:30
3a819dcaee Add _handle_constant_expr 2025-10-02 22:54:38 +05:30
729270b34b Use _handle_name_expr in eval_expr 2025-10-02 22:50:21 +05:30
44cbcccb6c Create _handle_name_expr 2025-10-02 22:43:54 +05:30
253944afd2 Merge pull request #18 from pythonbpf/fix-maps
Fix map calling convention
2025-10-02 22:12:01 +05:30
54993ce5c2 Merge branch 'master' into fix-maps 2025-10-02 22:11:38 +05:30
05083bd513 janitorial nitpicks 2025-10-02 22:10:28 +05:30
6e4c340780 Allow non-call convention for maps 2025-10-02 22:07:28 +05:30
9dbca410c2 Remove calls from map in sys_sync 2025-10-02 21:24:15 +05:30
62ca3b5ffe format errors 2025-10-02 19:07:49 +05:30
f263c35156 move debug cu generation to debug module 2025-10-02 19:05:58 +05:30
0678d70309 bump version 2025-10-02 18:02:36 +05:30
96fa5687f8 Merge pull request #17 from pythonbpf/logging
add logging
2025-10-02 17:59:18 +05:30
4d0dd68d56 fix formatting 2025-10-02 17:58:24 +05:30
89b0a07419 add logging level control 2025-10-02 17:57:37 +05:30
469ca43eaa replace prints with logger.info 2025-10-02 17:46:27 +05:30
dc2b611cbc format errors
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-02 05:17:02 +05:30
0c1acf1420 Fix local_sym_tab usage in binary_ops 2025-10-02 05:08:05 +05:30
18 changed files with 584 additions and 238 deletions

View File

@ -21,17 +21,17 @@ def last() -> HashMap:
@section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64:
key = 0
tsp = last().lookup(key)
tsp = last.lookup(key)
if tsp:
kt = ktime()
delta = kt - tsp
if delta < 1000000000:
time_ms = delta // 1000000
print(f"sync called within last second, last {time_ms} ms ago")
last().delete(key)
last.delete(key)
else:
kt = ktime()
last().update(key, kt)
last.update(key, kt)
return c_int64(0)

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "pythonbpf"
version = "0.1.3"
version = "0.1.4"
description = "Reduced Python frontend for eBPF"
authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },

View File

@ -1,71 +1,66 @@
import ast
from llvmlite import ir
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
def recursive_dereferencer(var, builder):
"""dereference until primitive type comes out"""
if var.type == ir.PointerType(ir.PointerType(ir.IntType(64))):
# TODO: Not worrying about stack overflow for now
if isinstance(var.type, ir.PointerType):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif var.type == ir.PointerType(ir.IntType(64)):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif var.type == ir.IntType(64):
elif isinstance(var.type, ir.IntType):
return var
else:
raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def handle_binary_op(rval, module, builder, var_name, local_sym_tab, map_sym_tab, func):
print(module)
left = rval.left
right = rval.right
def get_operand_value(operand, module, builder, local_sym_tab):
"""Extract the value from an operand, handling variables and constants."""
if isinstance(operand, ast.Name):
if operand.id in local_sym_tab:
return recursive_dereferencer(local_sym_tab[operand.id].var, builder)
raise ValueError(f"Undefined variable: {operand.id}")
elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int):
return ir.Constant(ir.IntType(64), operand.value)
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
return handle_binary_op_impl(operand, module, builder, local_sym_tab)
raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl(rval, module, builder, local_sym_tab):
op = rval.op
left = get_operand_value(rval.left, module, builder, local_sym_tab)
right = get_operand_value(rval.right, module, builder, local_sym_tab)
logger.info(f"left is {left}, right is {right}, op is {op}")
# Handle left operand
if isinstance(left, ast.Name):
if left.id in local_sym_tab:
left = recursive_dereferencer(local_sym_tab[left.id].var, builder)
else:
raise SyntaxError(f"Undefined variable: {left.id}")
elif isinstance(left, ast.Constant):
left = ir.Constant(ir.IntType(64), left.value)
else:
raise SyntaxError("Unsupported left operand type")
# Map AST operation nodes to LLVM IR builder methods
op_map = {
ast.Add: builder.add,
ast.Sub: builder.sub,
ast.Mult: builder.mul,
ast.Div: builder.sdiv,
ast.Mod: builder.srem,
ast.LShift: builder.shl,
ast.RShift: builder.lshr,
ast.BitOr: builder.or_,
ast.BitXor: builder.xor,
ast.BitAnd: builder.and_,
ast.FloorDiv: builder.udiv,
}
if isinstance(right, ast.Name):
if right.id in local_sym_tab:
right = recursive_dereferencer(local_sym_tab[right.id].var, builder)
else:
raise SyntaxError(f"Undefined variable: {right.id}")
elif isinstance(right, ast.Constant):
right = ir.Constant(ir.IntType(64), right.value)
else:
raise SyntaxError("Unsupported right operand type")
print(f"left is {left}, right is {right}, op is {op}")
if isinstance(op, ast.Add):
builder.store(builder.add(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.Sub):
builder.store(builder.sub(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.Mult):
builder.store(builder.mul(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.Div):
builder.store(builder.sdiv(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.Mod):
builder.store(builder.srem(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.LShift):
builder.store(builder.shl(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.RShift):
builder.store(builder.lshr(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.BitOr):
builder.store(builder.or_(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.BitXor):
builder.store(builder.xor(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.BitAnd):
builder.store(builder.and_(left, right), local_sym_tab[var_name].var)
elif isinstance(op, ast.FloorDiv):
builder.store(builder.udiv(left, right), local_sym_tab[var_name].var)
if type(op) in op_map:
result = op_map[type(op)](left, right)
return result
else:
raise SyntaxError("Unsupported binary operation")
def handle_binary_op(rval, module, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, module, builder, local_sym_tab)
builder.store(result, local_sym_tab[var_name].var)

View File

@ -4,16 +4,24 @@ from .license_pass import license_processing
from .functions_pass import func_proc
from .maps import maps_proc
from .structs import structs_proc
from .globals_pass import globals_processing
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum
from .globals_pass import (
globals_list_creation,
globals_processing,
populate_global_symbol_table,
)
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os
import subprocess
import inspect
from pathlib import Path
from pylibbpf import BpfProgram
import tempfile
from logging import Logger
import logging
VERSION = "v0.1.3"
logger: Logger = logging.getLogger(__name__)
VERSION = "v0.1.4"
def find_bpf_chunks(tree):
@ -30,21 +38,27 @@ def find_bpf_chunks(tree):
def processor(source_code, filename, module):
tree = ast.parse(source_code, filename)
print(ast.dump(tree, indent=4))
logger.debug(ast.dump(tree, indent=4))
bpf_chunks = find_bpf_chunks(tree)
for func_node in bpf_chunks:
print(f"Found BPF function/struct: {func_node.name}")
logger.info(f"Found BPF function/struct: {func_node.name}")
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
license_processing(tree, module)
globals_processing(tree, module)
globals_list_creation(tree, module)
def compile_to_ir(filename: str, output: str):
def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING):
logging.basicConfig(
level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
with open(filename) as f:
source = f.read()
@ -53,33 +67,17 @@ def compile_to_ir(filename: str, output: str):
module.triple = "bpf"
if not hasattr(module, "_debug_compile_unit"):
module._file_metadata = module.add_debug_info(
"DIFile",
{ # type: ignore
"filename": filename,
"directory": os.path.dirname(filename),
},
debug_generator = DebugInfoGenerator(module)
debug_generator.generate_file_metadata(filename, os.path.dirname(filename))
debug_generator.generate_debug_cu(
DW_LANG_C11,
f"PythonBPF {VERSION}",
True, # TODO: This is probably not true
# TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might
# be required for kprobes.
True,
)
module._debug_compile_unit = module.add_debug_info(
"DICompileUnit",
{ # type: ignore
"language": DW_LANG_C11,
"file": module._file_metadata, # type: ignore
"producer": f"PythonBPF {VERSION}",
"isOptimized": True, # TODO: This is probably not true
# TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might
# be required for kprobes.
"runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
"nameTableKind": 0,
},
is_distinct=True,
)
module.add_named_metadata("llvm.dbg.cu", module._debug_compile_unit) # type: ignore
processor(source, filename, module)
wchar_size = module.add_metadata(
@ -121,7 +119,7 @@ def compile_to_ir(filename: str, output: str):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
print(f"IR written to {output}")
logger.info(f"IR written to {output}")
with open(output, "w") as f:
f.write(f'source_filename = "{filename}"\n')
f.write(str(module))
@ -130,7 +128,7 @@ def compile_to_ir(filename: str, output: str):
return output
def compile() -> bool:
def compile(loglevel=logging.WARNING) -> bool:
# Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
@ -139,7 +137,9 @@ def compile() -> bool:
o_file = caller_file.with_suffix(".o")
success = True
success = compile_to_ir(str(caller_file), str(ll_file)) and success
success = (
compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success
)
success = bool(
subprocess.run(
@ -157,11 +157,11 @@ def compile() -> bool:
and success
)
print(f"Object written to {o_file}")
logger.info(f"Object written to {o_file}")
return success
def BPF() -> BpfProgram:
def BPF(loglevel=logging.WARNING) -> BpfProgram:
caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile(
@ -174,7 +174,7 @@ def BPF() -> BpfProgram:
f.write(src)
f.flush()
source = f.name
compile_to_ir(source, str(inter.name))
compile_to_ir(source, str(inter.name), loglevel=loglevel)
subprocess.run(
[
"llc",

View File

@ -12,6 +12,34 @@ class DebugInfoGenerator:
self.module = module
self._type_cache = {} # Cache for common debug types
def generate_file_metadata(self, filename, dirname):
self.module._file_metadata = self.module.add_debug_info(
"DIFile",
{ # type: ignore
"filename": filename,
"directory": dirname,
},
)
def generate_debug_cu(
self, language, producer: str, is_optimized: bool, is_distinct: bool
):
self.module._debug_compile_unit = self.module.add_debug_info(
"DICompileUnit",
{ # type: ignore
"language": language,
"file": self.module._file_metadata, # type: ignore
"producer": producer,
"isOptimized": is_optimized,
"runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
"nameTableKind": 0,
},
is_distinct=is_distinct,
)
self.module.add_named_metadata("llvm.dbg.cu", self.module._debug_compile_unit) # type: ignore
def get_basic_type(self, name: str, size: int, encoding: int) -> Any:
"""Get or create a basic type with caching"""
key = (name, size, encoding)

View File

@ -1,5 +1,91 @@
import ast
from llvmlite import ir
from logging import Logger
import logging
from typing import Dict
logger: Logger = logging.getLogger(__name__)
def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle ast.Name expressions."""
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id].var
val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type
else:
logger.info(f"Undefined variable {expr.id}")
return None
def _handle_constant_expr(expr: ast.Constant):
"""Handle ast.Constant expressions."""
if isinstance(expr.value, int):
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
elif isinstance(expr.value, bool):
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
else:
logger.info("Unsupported constant type")
return None
def _handle_attribute_expr(
expr: ast.Attribute,
local_sym_tab: Dict,
structs_sym_tab: Dict,
builder: ir.IRBuilder,
):
"""Handle ast.Attribute expressions for struct field access."""
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name]
logger.info(f"Loading attribute {attr_name} from variable {var_name}")
logger.info(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
return None
def _handle_deref_call(expr: ast.Call, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle deref function calls."""
logger.info(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
logger.info("deref takes exactly one argument")
return None
arg = expr.args[0]
if (
isinstance(arg, ast.Call)
and isinstance(arg.func, ast.Name)
and arg.func.id == "deref"
):
logger.info("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg_ptr = local_sym_tab[arg.id].var
else:
logger.info(f"Undefined variable {arg.id}")
return None
else:
logger.info("Unsupported argument type for deref")
return None
if arg_ptr is None:
logger.info("Failed to evaluate deref argument")
return None
# Load the value from pointer
val = builder.load(arg_ptr)
return val, local_sym_tab[arg.id].ir_type
def eval_expr(
@ -11,68 +97,32 @@ def eval_expr(
map_sym_tab,
structs_sym_tab=None,
):
print(f"Evaluating expression: {ast.dump(expr)}")
logger.info(f"Evaluating expression: {ast.dump(expr)}")
if isinstance(expr, ast.Name):
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id].var
val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type # return value and type
else:
print(f"Undefined variable {expr.id}")
return None
return _handle_name_expr(expr, local_sym_tab, builder)
elif isinstance(expr, ast.Constant):
if isinstance(expr.value, int):
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
elif isinstance(expr.value, bool):
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
else:
print("Unsupported constant type")
return None
return _handle_constant_expr(expr)
elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder)
# delayed import to avoid circular dependency
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
if isinstance(expr.func, ast.Name):
# check deref
if expr.func.id == "deref":
print(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
print("deref takes exactly one argument")
return None
arg = expr.args[0]
if (
isinstance(arg, ast.Call)
and isinstance(arg.func, ast.Name)
and arg.func.id == "deref"
):
print("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg = local_sym_tab[arg.id].var
else:
print(f"Undefined variable {arg.id}")
return None
if arg is None:
print("Failed to evaluate deref argument")
return None
# Since we are handling only name case, directly take type from sym tab
val = builder.load(arg)
return val, local_sym_tab[expr.args[0].id].ir_type
# check for helpers
if HelperHandlerRegistry.has_handler(expr.func.id):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if isinstance(expr.func, ast.Name) and HelperHandlerRegistry.has_handler(
expr.func.id
):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func, ast.Attribute):
print(f"Handling method call: {ast.dump(expr.func)}")
logger.info(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(
expr.func.value.func, ast.Name
):
@ -102,20 +152,8 @@ def eval_expr(
structs_sym_tab,
)
elif isinstance(expr, ast.Attribute):
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name]
print(f"Loading attribute {attr_name} from variable {var_name}")
print(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
print("Unsupported expression evaluation")
return _handle_attribute_expr(expr, local_sym_tab, structs_sym_tab, builder)
logger.info("Unsupported expression evaluation")
return None
@ -129,7 +167,7 @@ def handle_expr(
structs_sym_tab,
):
"""Handle expression statements in the function body."""
print(f"Handling expression: {ast.dump(expr)}")
logger.info(f"Handling expression: {ast.dump(expr)}")
call = expr.value
if isinstance(call, ast.Call):
eval_expr(
@ -142,4 +180,4 @@ def handle_expr(
structs_sym_tab,
)
else:
print("Unsupported expression type")
logger.info("Unsupported expression type")

View File

@ -46,15 +46,15 @@ def handle_assign(
):
"""Handle assignment statements in the function body."""
if len(stmt.targets) != 1:
print("Unsupported multiassignment")
logger.info("Unsupported multiassignment")
return
num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64")
target = stmt.targets[0]
print(f"Handling assignment to {ast.dump(target)}")
logger.info(f"Handling assignment to {ast.dump(target)}")
if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute):
print("Unsupported assignment target")
logger.info("Unsupported assignment target")
return
var_name = target.id if isinstance(target, ast.Name) else target.value.id
rval = stmt.value
@ -87,11 +87,11 @@ def handle_assign(
# print(f"Assigned to struct field {var_name}.{field_name}")
pass
if val is None:
print("Failed to evaluate struct field assignment")
logger.info("Failed to evaluate struct field assignment")
return
print(field_ptr)
logger.info(field_ptr)
builder.store(val[0], field_ptr)
print(f"Assigned to struct field {var_name}.{field_name}")
logger.info(f"Assigned to struct field {var_name}.{field_name}")
return
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
@ -103,7 +103,7 @@ def handle_assign(
builder.store(
ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name].var
)
print(f"Assigned constant {rval.value} to {var_name}")
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, int):
# Assume c_int64 for now
# var = builder.alloca(ir.IntType(64), name=var_name)
@ -111,7 +111,7 @@ def handle_assign(
builder.store(
ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name].var
)
print(f"Assigned constant {rval.value} to {var_name}")
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, str):
str_val = rval.value.encode("utf-8") + b"\x00"
str_const = ir.Constant(
@ -125,13 +125,13 @@ def handle_assign(
global_str.initializer = str_const
str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8)))
builder.store(str_ptr, local_sym_tab[var_name].var)
print(f"Assigned string constant '{rval.value}' to {var_name}")
logger.info(f"Assigned string constant '{rval.value}' to {var_name}")
else:
print("Unsupported constant type")
logger.info("Unsupported constant type")
elif isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
print(f"Assignment call type: {call_type}")
logger.info(f"Assignment call type: {call_type}")
if (
call_type in num_types
and len(rval.args) == 1
@ -145,7 +145,7 @@ def handle_assign(
ir.Constant(ir_type, rval.args[0].value),
local_sym_tab[var_name].var,
)
print(
logger.info(
f"Assigned {call_type} constant "
f"{rval.args[0].value} to {var_name}"
)
@ -162,9 +162,9 @@ def handle_assign(
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
print(f"Assigned constant {rval.func.id} to {var_name}")
logger.info(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1:
print(f"Handling deref assignment {ast.dump(rval)}")
logger.info(f"Handling deref assignment {ast.dump(rval)}")
val = eval_expr(
func,
module,
@ -175,25 +175,40 @@ def handle_assign(
structs_sym_tab,
)
if val is None:
print("Failed to evaluate deref argument")
logger.info("Failed to evaluate deref argument")
return
print(f"Dereferenced value: {val}, storing in {var_name}")
logger.info(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val[0], local_sym_tab[var_name].var)
print(f"Dereferenced and assigned to {var_name}")
logger.info(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
# var = builder.alloca(ir_type, name=var_name)
# Null init
builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name].var)
print(f"Assigned struct {call_type} to {var_name}")
logger.info(f"Assigned struct {call_type} to {var_name}")
else:
print(f"Unsupported assignment call type: {call_type}")
logger.info(f"Unsupported assignment call type: {call_type}")
elif isinstance(rval.func, ast.Attribute):
print(f"Assignment call attribute: {ast.dump(rval.func)}")
logger.info(f"Assignment call attribute: {ast.dump(rval.func)}")
if isinstance(rval.func.value, ast.Name):
# TODO: probably a struct access
print(f"TODO STRUCT ACCESS {ast.dump(rval)}")
if rval.func.value.id in map_sym_tab:
map_name = rval.func.value.id
method_name = rval.func.attr
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
else:
# TODO: probably a struct access
logger.info(f"TODO STRUCT ACCESS {ast.dump(rval)}")
elif isinstance(rval.func.value, ast.Call) and isinstance(
rval.func.value.func, ast.Name
):
@ -214,15 +229,13 @@ def handle_assign(
# var.align = 8
builder.store(val[0], local_sym_tab[var_name].var)
else:
print("Unsupported assignment call structure")
logger.info("Unsupported assignment call structure")
else:
print("Unsupported assignment call function type")
logger.info("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp):
handle_binary_op(
rval, module, builder, var_name, local_sym_tab, map_sym_tab, func
)
handle_binary_op(rval, module, builder, var_name, local_sym_tab)
else:
print("Unsupported assignment value type")
logger.info("Unsupported assignment value type")
def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
@ -232,7 +245,7 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
elif isinstance(cond.value, int):
return ir.Constant(ir.IntType(1), int(bool(cond.value)))
else:
print("Unsupported constant type in condition")
logger.info("Unsupported constant type in condition")
return None
elif isinstance(cond, ast.Name):
if cond.id in local_sym_tab:
@ -249,12 +262,12 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
val = builder.icmp_signed("!=", val, zero)
return val
else:
print(f"Undefined variable {cond.id} in condition")
logger.info(f"Undefined variable {cond.id} in condition")
return None
elif isinstance(cond, ast.Compare):
lhs = eval_expr(func, module, builder, cond.left, local_sym_tab, map_sym_tab)[0]
if len(cond.ops) != 1 or len(cond.comparators) != 1:
print("Unsupported complex comparison")
logger.info("Unsupported complex comparison")
return None
rhs = eval_expr(
func, module, builder, cond.comparators[0], local_sym_tab, map_sym_tab
@ -269,7 +282,7 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
elif lhs.type.width > rhs.type.width:
rhs = builder.sext(rhs, lhs.type)
else:
print("Type mismatch in comparison")
logger.info("Type mismatch in comparison")
return None
if isinstance(op, ast.Eq):
@ -285,10 +298,10 @@ def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
elif isinstance(op, ast.GtE):
return builder.icmp_signed(">=", lhs, rhs)
else:
print("Unsupported comparison operator")
logger.info("Unsupported comparison operator")
return None
else:
print("Unsupported condition expression")
logger.info("Unsupported condition expression")
return None
@ -296,7 +309,7 @@ def handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None
):
"""Handle if statements in the function body."""
print("Handling if statement")
logger.info("Handling if statement")
# start = builder.block.parent
then_block = func.append_basic_block(name="if.then")
merge_block = func.append_basic_block(name="if.end")
@ -349,7 +362,7 @@ def process_stmt(
did_return,
ret_type=ir.IntType(64),
):
print(f"Processing statement: {ast.dump(stmt)}")
logger.info(f"Processing statement: {ast.dump(stmt)}")
if isinstance(stmt, ast.Expr):
handle_expr(
func,
@ -434,11 +447,11 @@ def allocate_mem(
)
elif isinstance(stmt, ast.Assign):
if len(stmt.targets) != 1:
print("Unsupported multiassignment")
logger.info("Unsupported multiassignment")
continue
target = stmt.targets[0]
if not isinstance(target, ast.Name):
print("Unsupported assignment target")
logger.info("Unsupported assignment target")
continue
var_name = target.id
rval = stmt.value
@ -449,25 +462,27 @@ def allocate_mem(
ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} of type {call_type}")
logger.info(
f"Pre-allocated variable {var_name} of type {call_type}"
)
elif HelperHandlerRegistry.has_handler(call_type):
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} for helper")
logger.info(f"Pre-allocated variable {var_name} for helper")
elif call_type == "deref" and len(rval.args) == 1:
# Assume return type is int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} for deref")
logger.info(f"Pre-allocated variable {var_name} for deref")
elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type
var = builder.alloca(ir_type, name=var_name)
has_metadata = True
print(
logger.info(
f"Pre-allocated variable {var_name} "
f"for struct {call_type}"
)
@ -475,38 +490,38 @@ def allocate_mem(
ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} for map")
logger.info(f"Pre-allocated variable {var_name} for map")
else:
print("Unsupported assignment call function type")
logger.info("Unsupported assignment call function type")
continue
elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool):
ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name)
var.align = 1
print(f"Pre-allocated variable {var_name} of type c_bool")
logger.info(f"Pre-allocated variable {var_name} of type c_bool")
elif isinstance(rval.value, int):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} of type c_int64")
logger.info(f"Pre-allocated variable {var_name} of type c_int64")
elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name)
var.align = 8
print(f"Pre-allocated variable {var_name} of type string")
logger.info(f"Pre-allocated variable {var_name} of type string")
else:
print("Unsupported constant type")
logger.info("Unsupported constant type")
continue
elif isinstance(rval, ast.BinOp):
# Assume c_int64 for now
ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8
print(f"Pre-allocated variable {var_name} of type c_int64")
logger.info(f"Pre-allocated variable {var_name} of type c_int64")
else:
print("Unsupported assignment value type")
logger.info("Unsupported assignment value type")
continue
if has_metadata:
@ -537,7 +552,7 @@ def process_func_body(
structs_sym_tab,
)
print(f"Local symbol table: {local_sym_tab.keys()}")
logger.info(f"Local symbol table: {local_sym_tab.keys()}")
for stmt in func_node.body:
did_return = process_stmt(
@ -609,7 +624,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
if is_global:
continue
func_type = get_probe_string(func_node)
print(f"Found probe_string of {func_node.name}: {func_type}")
logger.info(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk(
func_node,

View File

@ -1,8 +1,121 @@
from llvmlite import ir
import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
def emit_globals(module: ir.Module, names: list[str]):
logger: Logger = logging.getLogger(__name__)
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
"""
Emit the @llvm.compiler.used global given a list of function/global names.
"""
@ -24,7 +137,7 @@ def emit_globals(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata"
def globals_processing(tree, module: ir.Module):
def globals_list_creation(tree, module: ir.Module):
collected = ["LICENSE"]
for node in tree.body:
@ -40,10 +153,11 @@ def globals_processing(tree, module: ir.Module):
):
collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
collected.append(node.name)
# NOTE: all globals other than
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name)
emit_globals(module, collected)
emit_llvm_compiler_used(module, collected)

View File

@ -9,6 +9,10 @@ from .helper_utils import (
simple_string_print,
get_data_ptr_and_size,
)
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
class BPFHelperID(Enum):
@ -322,7 +326,7 @@ def handle_helper_call(
elif isinstance(call.func, ast.Attribute):
method_name = call.func.attr
value = call.func.value
print(f"Handling method call: {ast.dump(call.func)}")
logger.info(f"Handling method call: {ast.dump(call.func)}")
# Get map pointer from different styles of map access
if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
# Func style: my_map().lookup(key)

View File

@ -1,5 +1,9 @@
from llvmlite import ir
import ast
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
def emit_license(module: ir.Module, license_str: str):
@ -41,9 +45,9 @@ def license_processing(tree, module):
emit_license(module, node.body[0].value.value)
return "LICENSE"
else:
print("ERROR: LICENSE() must return a string literal")
logger.info("ERROR: LICENSE() must return a string literal")
return None
else:
print("ERROR: LICENSE already defined")
logger.info("ERROR: LICENSE already defined")
return None
return None

View File

@ -158,8 +158,7 @@ def create_ringbuf_debug_info(module, map_global, map_name, map_params):
type_ptr = generator.create_pointer_type(type_array, 64)
type_member = generator.create_struct_member("type", type_ptr, 0)
max_entries_array = generator.create_array_type(
int_type, map_params["max_entries"])
max_entries_array = generator.create_array_type(int_type, map_params["max_entries"])
max_entries_ptr = generator.create_pointer_type(max_entries_array, 64)
max_entries_member = generator.create_struct_member(
"max_entries", max_entries_ptr, 64
@ -167,8 +166,7 @@ def create_ringbuf_debug_info(module, map_global, map_name, map_params):
elements_arr = [type_member, max_entries_member]
struct_type = generator.create_struct_type(
elements_arr, 128, is_distinct=True)
struct_type = generator.create_struct_type(elements_arr, 128, is_distinct=True)
global_var = generator.create_global_var_debug_info(
map_name, struct_type, is_local=False

View File

@ -19,7 +19,7 @@ def structs_proc(tree, module, chunks):
structs_sym_tab = {}
for cls_node in chunks:
if is_bpf_struct(cls_node):
print(f"Found BPF struct: {cls_node.name}")
logger.info(f"Found BPF struct: {cls_node.name}")
struct_info = process_bpf_struct(cls_node, module)
structs_sym_tab[cls_node.name] = struct_info
return structs_sym_tab

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -0,0 +1,101 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
#TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
#TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,21 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -3,9 +3,9 @@ from ctypes import c_void_p, c_int64
@bpf
@section("sometag1")
@section("tracepoint/syscalls/sys_enter_sync")
def sometag(ctx: c_void_p) -> c_int64:
a = 1 + 2 + 1
a = 1 + 2 + 1 + 12 + 13
print(f"{a}")
return c_int64(0)

View File

@ -3,11 +3,12 @@ from ctypes import c_void_p, c_int64
@bpf
@section("sometag1")
@section("tracepoint/syscalls/sys_enter_sync")
def sometag(ctx: c_void_p) -> c_int64:
b = 1 + 2
a = 1 + b
return c_int64(a)
print(f"{a}")
return c_int64(0)
@bpf

View File

@ -1,7 +1,7 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, compile, compile_to_ir, BPF
from pythonbpf.helper import ktime, pid
from pythonbpf.maps import PerfEventArray
import logging
from ctypes import c_void_p, c_int32, c_uint64
@ -42,8 +42,8 @@ def LICENSE() -> str:
return "GPL"
compile()
compile_to_ir("perf_buffer_map.py", "perf_buffer_map.ll")
compile(loglevel=logging.INFO)
b = BPF()
b.load_and_attach()