13 Commits

11 changed files with 141 additions and 305 deletions

View File

@ -9,6 +9,7 @@ logger: Logger = logging.getLogger(__name__)
def recursive_dereferencer(var, builder):
"""dereference until primitive type comes out"""
# TODO: Not worrying about stack overflow for now
logger.info(f"Dereferencing {var}, type is {var.type}")
if isinstance(var.type, ir.PointerType):
a = builder.load(var)
return recursive_dereferencer(a, builder)
@ -18,7 +19,7 @@ def recursive_dereferencer(var, builder):
raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def get_operand_value(operand, module, builder, local_sym_tab):
def get_operand_value(operand, builder, local_sym_tab):
"""Extract the value from an operand, handling variables and constants."""
if isinstance(operand, ast.Name):
if operand.id in local_sym_tab:
@ -29,14 +30,14 @@ def get_operand_value(operand, module, builder, local_sym_tab):
return ir.Constant(ir.IntType(64), operand.value)
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
return handle_binary_op_impl(operand, module, builder, local_sym_tab)
return handle_binary_op_impl(operand, builder, local_sym_tab)
raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl(rval, module, builder, local_sym_tab):
def handle_binary_op_impl(rval, builder, local_sym_tab):
op = rval.op
left = get_operand_value(rval.left, module, builder, local_sym_tab)
right = get_operand_value(rval.right, module, builder, local_sym_tab)
left = get_operand_value(rval.left, builder, local_sym_tab)
right = get_operand_value(rval.right, builder, local_sym_tab)
logger.info(f"left is {left}, right is {right}, op is {op}")
# Map AST operation nodes to LLVM IR builder methods
@ -61,6 +62,11 @@ def handle_binary_op_impl(rval, module, builder, local_sym_tab):
raise SyntaxError("Unsupported binary operation")
def handle_binary_op(rval, module, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, module, builder, local_sym_tab)
builder.store(result, local_sym_tab[var_name].var)
def handle_binary_op(rval, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, builder, local_sym_tab)
if var_name and var_name in local_sym_tab:
logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}"
)
builder.store(result, local_sym_tab[var_name].var)
return result, result.type

View File

@ -4,11 +4,7 @@ from .license_pass import license_processing
from .functions_pass import func_proc
from .maps import maps_proc
from .structs import structs_proc
from .globals_pass import (
globals_list_creation,
globals_processing,
populate_global_symbol_table,
)
from .globals_pass import globals_processing
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os
import subprocess
@ -44,18 +40,15 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}")
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
globals_list_creation(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING):
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
logging.basicConfig(
level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
@ -128,7 +121,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING):
return output
def compile(loglevel=logging.WARNING) -> bool:
def compile(loglevel=logging.INFO) -> bool:
# Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve()
@ -161,7 +154,7 @@ def compile(loglevel=logging.WARNING) -> bool:
return success
def BPF(loglevel=logging.WARNING) -> BpfProgram:
def BPF(loglevel=logging.INFO) -> BpfProgram:
caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile(

View File

@ -233,7 +233,7 @@ def handle_assign(
else:
logger.info("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp):
handle_binary_op(rval, module, builder, var_name, local_sym_tab)
handle_binary_op(rval, builder, var_name, local_sym_tab)
else:
logger.info("Unsupported assignment value type")
@ -385,24 +385,49 @@ def process_stmt(
)
elif isinstance(stmt, ast.Return):
if stmt.value is None:
builder.ret(ir.Constant(ir.IntType(32), 0))
builder.ret(ir.Constant(ir.IntType(64), 0))
did_return = True
elif (
isinstance(stmt.value, ast.Call)
and isinstance(stmt.value.func, ast.Name)
and len(stmt.value.args) == 1
and isinstance(stmt.value.args[0], ast.Constant)
and isinstance(stmt.value.args[0].value, int)
):
call_type = stmt.value.func.id
if ctypes_to_ir(call_type) != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ctypes_to_ir(call_type)}, got {call_type}"
)
else:
builder.ret(ir.Constant(ret_type, stmt.value.args[0].value))
if isinstance(stmt.value.args[0], ast.Constant) and isinstance(
stmt.value.args[0].value, int
):
call_type = stmt.value.func.id
if ctypes_to_ir(call_type) != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ctypes_to_ir(call_type)}, got {call_type}"
)
else:
builder.ret(ir.Constant(ret_type, stmt.value.args[0].value))
did_return = True
elif isinstance(stmt.value.args[0], ast.BinOp):
# TODO: Should be routed through eval_expr
val = handle_binary_op(stmt.value.args[0], builder, None, local_sym_tab)
if val is None:
raise ValueError("Failed to evaluate return expression")
if val[1] != ret_type:
raise ValueError(
"Return type mismatch: expected " f"{ret_type}, got {val[1]}"
)
builder.ret(val[0])
did_return = True
elif isinstance(stmt.value.args[0], ast.Name):
if stmt.value.args[0].id in local_sym_tab:
var = local_sym_tab[stmt.value.args[0].id].var
val = builder.load(var)
if val.type != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ret_type}, got {val.type}"
)
builder.ret(val)
did_return = True
else:
raise ValueError("Failed to evaluate return expression")
elif isinstance(stmt.value, ast.Name):
if stmt.value.id == "XDP_PASS":
builder.ret(ir.Constant(ret_type, 2))
@ -455,6 +480,9 @@ def allocate_mem(
continue
var_name = target.id
rval = stmt.value
if var_name in local_sym_tab:
logger.info(f"Variable {var_name} already allocated")
continue
if isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name):
call_type = rval.func.id
@ -568,7 +596,7 @@ def process_func_body(
)
if not did_return:
builder.ret(ir.Constant(ir.IntType(32), 0))
builder.ret(ir.Constant(ir.IntType(64), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):

View File

@ -1,121 +1,8 @@
from llvmlite import ir
import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
logger: Logger = logging.getLogger(__name__)
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
def emit_globals(module: ir.Module, names: list[str]):
"""
Emit the @llvm.compiler.used global given a list of function/global names.
"""
@ -137,7 +24,7 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata"
def globals_list_creation(tree, module: ir.Module):
def globals_processing(tree, module: ir.Module):
collected = ["LICENSE"]
for node in tree.body:
@ -153,11 +40,10 @@ def globals_list_creation(tree, module: ir.Module):
):
collected.append(node.name)
# NOTE: all globals other than
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name)
emit_llvm_compiler_used(module, collected)
emit_globals(module, collected)

View File

@ -1,27 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -4,6 +4,18 @@ from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# NOTE: I have decided to not fix this example for now.
# The issue is in line 31, where we are passing an expression.
# The update helper expects a pointer type. But the problem is
# that we must allocate the space for said pointer in the first
# basic block. As that usage is in a different basic block, we
# are unable to cast the expression to a pointer type. (as we never
# allocated space for it).
# Shall we change our space allocation logic? That allows users to
# spam the same helper with the same args, and still run out of
# stack space. So we consider this usage invalid for now.
# Might fix it later.
@bpf
@map
@ -14,12 +26,12 @@ def count() -> HashMap:
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
prev = count().lookup(0)
prev = count.lookup(0)
if prev:
count().update(0, prev + 1)
count.update(0, prev + 1)
return XDP_PASS
else:
count().update(0, 1)
count.update(0, 1)
return XDP_PASS

View File

@ -1,101 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
#TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
#TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,40 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# NOTE: This example exposes the problems with our typing system.
# We can't do steps on line 25 and 27.
# prev is of type i64**. For prev + 1, we deref it down to i64
# To assign it back to prev, we need to go back to i64**.
# We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable.
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
prev = count.lookup(0)
if prev:
prev = prev + 1
count.update(0, prev)
return XDP_PASS
else:
count.update(0, 1)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,20 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal
from ctypes import c_void_p, c_int64
@bpf
@section("sometag1")
def sometag(ctx: c_void_p) -> c_int64:
a = 1 - 1
return c_int64(a)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile(loglevel=logging.INFO)