3 Commits

82 changed files with 204112 additions and 3167 deletions

View File

@ -12,8 +12,8 @@ jobs:
name: Format name: Format
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v4
- uses: actions/setup-python@v6 - uses: actions/setup-python@v5
with: with:
python-version: "3.x" python-version: "3.x"
- uses: pre-commit/action@v3.0.1 - uses: pre-commit/action@v3.0.1

2
.gitignore vendored
View File

@ -7,5 +7,3 @@ __pycache__/
*.ll *.ll
*.o *.o
.ipynb_checkpoints/ .ipynb_checkpoints/
vmlinux.py
~*

View File

@ -12,7 +12,7 @@
# #
# See https://github.com/pre-commit/pre-commit # See https://github.com/pre-commit/pre-commit
exclude: 'vmlinux.py' exclude: 'vmlinux.*\.py$'
ci: ci:
autoupdate_commit_msg: "chore: update pre-commit hooks" autoupdate_commit_msg: "chore: update pre-commit hooks"
@ -21,7 +21,7 @@ ci:
repos: repos:
# Standard hooks # Standard hooks
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0 rev: v4.6.0
hooks: hooks:
- id: check-added-large-files - id: check-added-large-files
- id: check-case-conflict - id: check-case-conflict
@ -36,19 +36,19 @@ repos:
- id: trailing-whitespace - id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.13.2" rev: "v0.4.2"
hooks: hooks:
- id: ruff - id: ruff
args: ["--fix", "--show-fixes"] args: ["--fix", "--show-fixes"]
- id: ruff-format - id: ruff-format
# exclude: ^(docs)|^(tests)|^(examples) exclude: ^(tests/|examples/|docs/)
# Checking static types # Checking static types
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.18.2" rev: "v1.10.0"
hooks: hooks:
- id: mypy - id: mypy
exclude: ^(tests)|^(examples) exclude: ^(tests/|examples/)
additional_dependencies: [types-setuptools] additional_dependencies: [types-setuptools]
# Changes tabs to spaces # Changes tabs to spaces

View File

@ -83,14 +83,14 @@ def hist() -> HashMap:
def hello(ctx: c_void_p) -> c_int64: def hello(ctx: c_void_p) -> c_int64:
process_id = pid() process_id = pid()
one = 1 one = 1
prev = hist.lookup(process_id) prev = hist().lookup(process_id)
if prev: if prev:
previous_value = prev + 1 previous_value = prev + 1
print(f"count: {previous_value} with {process_id}") print(f"count: {previous_value} with {process_id}")
hist.update(process_id, previous_value) hist().update(process_id, previous_value)
return c_int64(0) return c_int64(0)
else: else:
hist.update(process_id, one) hist().update(process_id, one)
return c_int64(0) return c_int64(0)

13
TODO.md Normal file
View File

@ -0,0 +1,13 @@
## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
- Static Typing
- Add all maps
- XDP support in pylibbpf
- ringbuf support
- recursive expression resolution
## Long term
- Refactor the codebase to be better than a hackathon project
- Port to C++ and use actual LLVM?

View File

@ -12,7 +12,7 @@
"from pythonbpf import bpf, map, section, bpfglobal, BPF\n", "from pythonbpf import bpf, map, section, bpfglobal, BPF\n",
"from pythonbpf.helper import pid\n", "from pythonbpf.helper import pid\n",
"from pythonbpf.maps import HashMap\n", "from pythonbpf.maps import HashMap\n",
"from pylibbpf import BpfMap\n", "from pylibbpf import *\n",
"from ctypes import c_void_p, c_int64, c_uint64, c_int32\n", "from ctypes import c_void_p, c_int64, c_uint64, c_int32\n",
"import matplotlib.pyplot as plt" "import matplotlib.pyplot as plt"
] ]
@ -308,7 +308,6 @@
"def hist() -> HashMap:\n", "def hist() -> HashMap:\n",
" return HashMap(key=c_int32, value=c_uint64, max_entries=4096)\n", " return HashMap(key=c_int32, value=c_uint64, max_entries=4096)\n",
"\n", "\n",
"\n",
"@bpf\n", "@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n", "@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello(ctx: c_void_p) -> c_int64:\n", "def hello(ctx: c_void_p) -> c_int64:\n",
@ -330,7 +329,6 @@
"def LICENSE() -> str:\n", "def LICENSE() -> str:\n",
" return \"GPL\"\n", " return \"GPL\"\n",
"\n", "\n",
"\n",
"b = BPF()" "b = BPF()"
] ]
}, },
@ -359,6 +357,7 @@
} }
], ],
"source": [ "source": [
"\n",
"b.load_and_attach()\n", "b.load_and_attach()\n",
"hist = BpfMap(b, hist)\n", "hist = BpfMap(b, hist)\n",
"print(\"Recording\")\n", "print(\"Recording\")\n",

View File

@ -22,9 +22,5 @@ def LICENSE() -> str:
b = BPF() b = BPF()
b.load_and_attach() b.load_and_attach()
if b.is_loaded() and b.is_attached():
print("Successfully loaded and attached")
else:
print("Could not load successfully")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of the execve syscall. # Now cat /sys/kernel/debug/tracing/trace_pipe to see results of the execve syscall.

View File

@ -1,29 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, BPF
from ctypes import c_void_p, c_int64
@bpf
@section("kretprobe/do_unlinkat")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@section("kprobe/do_unlinkat")
def hello_world2(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
while True:
print("running")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of unlink kprobe.

View File

@ -27,7 +27,7 @@ def hello(ctx: c_void_p) -> c_int32:
dataobj.pid = pid() dataobj.pid = pid()
dataobj.ts = ktime() dataobj.ts = ktime()
# dataobj.comm = strobj # dataobj.comm = strobj
print(f"clone called at {dataobj.ts} by pid{dataobj.pid}, comm {strobj}") print(f"clone called at {dataobj.ts} by pid" f"{dataobj.pid}, comm {strobj}")
events.output(dataobj) events.output(dataobj)
return c_int32(0) return c_int32(0)

View File

@ -21,17 +21,17 @@ def last() -> HashMap:
@section("tracepoint/syscalls/sys_enter_sync") @section("tracepoint/syscalls/sys_enter_sync")
def do_trace(ctx: c_void_p) -> c_int64: def do_trace(ctx: c_void_p) -> c_int64:
key = 0 key = 0
tsp = last.lookup(key) tsp = last().lookup(key)
if tsp: if tsp:
kt = ktime() kt = ktime()
delta = kt - tsp delta = kt - tsp
if delta < 1000000000: if delta < 1000000000:
time_ms = delta // 1000000 time_ms = delta // 1000000
print(f"sync called within last second, last {time_ms} ms ago") print(f"sync called within last second, last {time_ms} ms ago")
last.delete(key) last().delete(key)
else: else:
kt = ktime() kt = ktime()
last.update(key, kt) last().update(key, kt)
return c_int64(0) return c_int64(0)

203381
examples/vmlinux.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helper import XDP_PASS from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from ctypes import c_int64, c_void_p
from ctypes import c_void_p, c_int64
# Instructions to how to run this program # Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf # 1. Install PythonBPF: pip install pythonbpf
@ -41,5 +41,4 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
compile() compile()

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "pythonbpf" name = "pythonbpf"
version = "0.1.4" version = "0.1.3"
description = "Reduced Python frontend for eBPF" description = "Reduced Python frontend for eBPF"
authors = [ authors = [
{ name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" }, { name = "r41k0u", email="pragyanshchaturvedi18@gmail.com" },

View File

@ -1,72 +1,71 @@
import ast import ast
from llvmlite import ir from llvmlite import ir
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
def recursive_dereferencer(var, builder): def recursive_dereferencer(var, builder):
"""dereference until primitive type comes out""" """dereference until primitive type comes out"""
# TODO: Not worrying about stack overflow for now if var.type == ir.PointerType(ir.PointerType(ir.IntType(64))):
logger.info(f"Dereferencing {var}, type is {var.type}")
if isinstance(var.type, ir.PointerType):
a = builder.load(var) a = builder.load(var)
return recursive_dereferencer(a, builder) return recursive_dereferencer(a, builder)
elif isinstance(var.type, ir.IntType): elif var.type == ir.PointerType(ir.IntType(64)):
a = builder.load(var)
return recursive_dereferencer(a, builder)
elif var.type == ir.IntType(64):
return var return var
else: else:
raise TypeError(f"Unsupported type for dereferencing: {var.type}") raise TypeError(f"Unsupported type for dereferencing: {var.type}")
def get_operand_value(operand, builder, local_sym_tab): def handle_binary_op(rval, module, builder, var_name, local_sym_tab, map_sym_tab, func):
"""Extract the value from an operand, handling variables and constants.""" print(module)
if isinstance(operand, ast.Name): left = rval.left
if operand.id in local_sym_tab: right = rval.right
return recursive_dereferencer(local_sym_tab[operand.id].var, builder)
raise ValueError(f"Undefined variable: {operand.id}")
elif isinstance(operand, ast.Constant):
if isinstance(operand.value, int):
return ir.Constant(ir.IntType(64), operand.value)
raise TypeError(f"Unsupported constant type: {type(operand.value)}")
elif isinstance(operand, ast.BinOp):
return handle_binary_op_impl(operand, builder, local_sym_tab)
raise TypeError(f"Unsupported operand type: {type(operand)}")
def handle_binary_op_impl(rval, builder, local_sym_tab):
op = rval.op op = rval.op
left = get_operand_value(rval.left, builder, local_sym_tab)
right = get_operand_value(rval.right, builder, local_sym_tab)
logger.info(f"left is {left}, right is {right}, op is {op}")
# Map AST operation nodes to LLVM IR builder methods # Handle left operand
op_map = { if isinstance(left, ast.Name):
ast.Add: builder.add, if left.id in local_sym_tab:
ast.Sub: builder.sub, left = recursive_dereferencer(local_sym_tab[left.id][0], builder)
ast.Mult: builder.mul, else:
ast.Div: builder.sdiv, raise SyntaxError(f"Undefined variable: {left.id}")
ast.Mod: builder.srem, elif isinstance(left, ast.Constant):
ast.LShift: builder.shl, left = ir.Constant(ir.IntType(64), left.value)
ast.RShift: builder.lshr, else:
ast.BitOr: builder.or_, raise SyntaxError("Unsupported left operand type")
ast.BitXor: builder.xor,
ast.BitAnd: builder.and_,
ast.FloorDiv: builder.udiv,
}
if type(op) in op_map: if isinstance(right, ast.Name):
result = op_map[type(op)](left, right) if right.id in local_sym_tab:
return result right = recursive_dereferencer(local_sym_tab[right.id][0], builder)
else:
raise SyntaxError(f"Undefined variable: {right.id}")
elif isinstance(right, ast.Constant):
right = ir.Constant(ir.IntType(64), right.value)
else:
raise SyntaxError("Unsupported right operand type")
print(f"left is {left}, right is {right}, op is {op}")
if isinstance(op, ast.Add):
builder.store(builder.add(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Sub):
builder.store(builder.sub(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Mult):
builder.store(builder.mul(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Div):
builder.store(builder.sdiv(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.Mod):
builder.store(builder.srem(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.LShift):
builder.store(builder.shl(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.RShift):
builder.store(builder.lshr(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.BitOr):
builder.store(builder.or_(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.BitXor):
builder.store(builder.xor(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.BitAnd):
builder.store(builder.and_(left, right), local_sym_tab[var_name][0])
elif isinstance(op, ast.FloorDiv):
builder.store(builder.udiv(left, right), local_sym_tab[var_name][0])
else: else:
raise SyntaxError("Unsupported binary operation") raise SyntaxError("Unsupported binary operation")
def handle_binary_op(rval, builder, var_name, local_sym_tab):
result = handle_binary_op_impl(rval, builder, local_sym_tab)
if var_name and var_name in local_sym_tab:
logger.info(
f"Storing result {result} into variable {local_sym_tab[var_name].var}"
)
builder.store(result, local_sym_tab[var_name].var)
return result, result.type

View File

@ -1,28 +1,19 @@
import ast import ast
from llvmlite import ir from llvmlite import ir
from .license_pass import license_processing from .license_pass import license_processing
from .functions import func_proc from .functions_pass import func_proc
from .maps import maps_proc from .maps import maps_proc
from .structs import structs_proc from .structs import structs_proc
from .vmlinux_parser import vmlinux_proc from .globals_pass import globals_processing
from .globals_pass import ( from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum
globals_list_creation,
globals_processing,
populate_global_symbol_table,
)
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os import os
import subprocess import subprocess
import inspect import inspect
from pathlib import Path from pathlib import Path
from pylibbpf import BpfProgram from pylibbpf import BpfProgram
import tempfile import tempfile
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__) VERSION = "v0.1.3"
VERSION = "v0.1.4"
def find_bpf_chunks(tree): def find_bpf_chunks(tree):
@ -39,28 +30,21 @@ def find_bpf_chunks(tree):
def processor(source_code, filename, module): def processor(source_code, filename, module):
tree = ast.parse(source_code, filename) tree = ast.parse(source_code, filename)
logger.debug(ast.dump(tree, indent=4)) print(ast.dump(tree, indent=4))
bpf_chunks = find_bpf_chunks(tree) bpf_chunks = find_bpf_chunks(tree)
for func_node in bpf_chunks: for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}") print(f"Found BPF function/struct: {func_node.name}")
vmlinux_proc(tree, module)
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks) structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
globals_list_creation(tree, module) license_processing(tree, module)
globals_processing(tree, module)
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): def compile_to_ir(filename: str, output: str):
logging.basicConfig(
level=loglevel, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
with open(filename) as f: with open(filename) as f:
source = f.read() source = f.read()
@ -69,17 +53,33 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.triple = "bpf" module.triple = "bpf"
if not hasattr(module, "_debug_compile_unit"): if not hasattr(module, "_debug_compile_unit"):
debug_generator = DebugInfoGenerator(module) module._file_metadata = module.add_debug_info(
debug_generator.generate_file_metadata(filename, os.path.dirname(filename)) "DIFile",
debug_generator.generate_debug_cu( { # type: ignore
DW_LANG_C11, "filename": filename,
f"PythonBPF {VERSION}", "directory": os.path.dirname(filename),
True, # TODO: This is probably not true },
)
module._debug_compile_unit = module.add_debug_info(
"DICompileUnit",
{ # type: ignore
"language": DW_LANG_C11,
"file": module._file_metadata, # type: ignore
"producer": f"PythonBPF {VERSION}",
"isOptimized": True, # TODO: This is probably not true
# TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might # TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might
# be required for kprobes. # be required for kprobes.
True, "runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
"nameTableKind": 0,
},
is_distinct=True,
) )
module.add_named_metadata("llvm.dbg.cu", module._debug_compile_unit) # type: ignore
processor(source, filename, module) processor(source, filename, module)
wchar_size = module.add_metadata( wchar_size = module.add_metadata(
@ -121,7 +121,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
logger.info(f"IR written to {output}") print(f"IR written to {output}")
with open(output, "w") as f: with open(output, "w") as f:
f.write(f'source_filename = "{filename}"\n') f.write(f'source_filename = "{filename}"\n')
f.write(str(module)) f.write(str(module))
@ -130,7 +130,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
return output return output
def compile(loglevel=logging.INFO) -> bool: def compile() -> bool:
# Look one level up the stack to the caller of this function # Look one level up the stack to the caller of this function
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
caller_file = Path(caller_frame.filename).resolve() caller_file = Path(caller_frame.filename).resolve()
@ -139,9 +139,7 @@ def compile(loglevel=logging.INFO) -> bool:
o_file = caller_file.with_suffix(".o") o_file = caller_file.with_suffix(".o")
success = True success = True
success = ( success = compile_to_ir(str(caller_file), str(ll_file)) and success
compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success
)
success = bool( success = bool(
subprocess.run( subprocess.run(
@ -159,11 +157,11 @@ def compile(loglevel=logging.INFO) -> bool:
and success and success
) )
logger.info(f"Object written to {o_file}") print(f"Object written to {o_file}")
return success return success
def BPF(loglevel=logging.INFO) -> BpfProgram: def BPF() -> BpfProgram:
caller_frame = inspect.stack()[1] caller_frame = inspect.stack()[1]
src = inspect.getsource(caller_frame.frame) src = inspect.getsource(caller_frame.frame)
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
@ -176,7 +174,7 @@ def BPF(loglevel=logging.INFO) -> BpfProgram:
f.write(src) f.write(src)
f.flush() f.flush()
source = f.name source = f.name
compile_to_ir(source, str(inter.name), loglevel=loglevel) compile_to_ir(source, str(inter.name))
subprocess.run( subprocess.run(
[ [
"llc", "llc",

View File

@ -12,34 +12,6 @@ class DebugInfoGenerator:
self.module = module self.module = module
self._type_cache = {} # Cache for common debug types self._type_cache = {} # Cache for common debug types
def generate_file_metadata(self, filename, dirname):
self.module._file_metadata = self.module.add_debug_info(
"DIFile",
{ # type: ignore
"filename": filename,
"directory": dirname,
},
)
def generate_debug_cu(
self, language, producer: str, is_optimized: bool, is_distinct: bool
):
self.module._debug_compile_unit = self.module.add_debug_info(
"DICompileUnit",
{ # type: ignore
"language": language,
"file": self.module._file_metadata, # type: ignore
"producer": producer,
"isOptimized": is_optimized,
"runtimeVersion": 0,
"emissionKind": 1,
"splitDebugInlining": False,
"nameTableKind": 0,
},
is_distinct=is_distinct,
)
self.module.add_named_metadata("llvm.dbg.cu", self.module._debug_compile_unit) # type: ignore
def get_basic_type(self, name: str, size: int, encoding: int) -> Any: def get_basic_type(self, name: str, size: int, encoding: int) -> Any:
"""Get or create a basic type with caching""" """Get or create a basic type with caching"""
key = (name, size, encoding) key = (name, size, encoding)

View File

@ -1,4 +0,0 @@
from .expr_pass import eval_expr, handle_expr
from .type_normalization import convert_to_bool
__all__ = ["eval_expr", "handle_expr", "convert_to_bool"]

View File

@ -1,445 +0,0 @@
import ast
from llvmlite import ir
from logging import Logger
import logging
from typing import Dict
from pythonbpf.type_deducer import ctypes_to_ir, is_ctypes
from .type_normalization import convert_to_bool, handle_comparator
logger: Logger = logging.getLogger(__name__)
def _handle_name_expr(expr: ast.Name, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle ast.Name expressions."""
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id].var
val = builder.load(var)
return val, local_sym_tab[expr.id].ir_type
else:
logger.info(f"Undefined variable {expr.id}")
return None
def _handle_constant_expr(expr: ast.Constant):
"""Handle ast.Constant expressions."""
if isinstance(expr.value, int) or isinstance(expr.value, bool):
return ir.Constant(ir.IntType(64), int(expr.value)), ir.IntType(64)
else:
logger.error("Unsupported constant type")
return None
def _handle_attribute_expr(
expr: ast.Attribute,
local_sym_tab: Dict,
structs_sym_tab: Dict,
builder: ir.IRBuilder,
):
"""Handle ast.Attribute expressions for struct field access."""
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type, var_metadata = local_sym_tab[var_name]
logger.info(f"Loading attribute {attr_name} from variable {var_name}")
logger.info(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
metadata = structs_sym_tab[var_metadata]
if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
return None
def _handle_deref_call(expr: ast.Call, local_sym_tab: Dict, builder: ir.IRBuilder):
"""Handle deref function calls."""
logger.info(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
logger.info("deref takes exactly one argument")
return None
arg = expr.args[0]
if (
isinstance(arg, ast.Call)
and isinstance(arg.func, ast.Name)
and arg.func.id == "deref"
):
logger.info("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg_ptr = local_sym_tab[arg.id].var
else:
logger.info(f"Undefined variable {arg.id}")
return None
else:
logger.info("Unsupported argument type for deref")
return None
if arg_ptr is None:
logger.info("Failed to evaluate deref argument")
return None
# Load the value from pointer
val = builder.load(arg_ptr)
return val, local_sym_tab[arg.id].ir_type
def _handle_ctypes_call(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle ctypes type constructor calls."""
if len(expr.args) != 1:
logger.info("ctypes constructor takes exactly one argument")
return None
arg = expr.args[0]
val = eval_expr(
func,
module,
builder,
arg,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if val is None:
logger.info("Failed to evaluate argument to ctypes constructor")
return None
call_type = expr.func.id
expected_type = ctypes_to_ir(call_type)
if val[1] != expected_type:
# NOTE: We are only considering casting to and from int types for now
if isinstance(val[1], ir.IntType) and isinstance(expected_type, ir.IntType):
if val[1].width < expected_type.width:
val = (builder.sext(val[0], expected_type), expected_type)
else:
val = (builder.trunc(val[0], expected_type), expected_type)
else:
raise ValueError(f"Type mismatch: expected {expected_type}, got {val[1]}")
return val
def _handle_compare(
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None
):
"""Handle ast.Compare expressions."""
if len(cond.ops) != 1 or len(cond.comparators) != 1:
logger.error("Only single comparisons are supported")
return None
lhs = eval_expr(
func,
module,
builder,
cond.left,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
rhs = eval_expr(
func,
module,
builder,
cond.comparators[0],
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
if lhs is None or rhs is None:
logger.error("Failed to evaluate comparison operands")
return None
lhs, _ = lhs
rhs, _ = rhs
return handle_comparator(func, builder, cond.ops[0], lhs, rhs)
def _handle_unary_op(
func,
module,
builder,
expr: ast.UnaryOp,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle ast.UnaryOp expressions."""
if not isinstance(expr.op, ast.Not):
logger.error("Only 'not' unary operator is supported")
return None
operand = eval_expr(
func, module, builder, expr.operand, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand is None:
logger.error("Failed to evaluate operand for unary operation")
return None
operand_val, operand_type = operand
true_const = ir.Constant(ir.IntType(1), 1)
result = builder.xor(convert_to_bool(builder, operand_val), true_const)
return result, ir.IntType(1)
def _handle_and_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
"""Handle `and` boolean operations."""
logger.debug(f"Handling 'and' operator with {len(expr.values)} operands")
merge_block = func.append_basic_block(name="and.merge")
false_block = func.append_basic_block(name="and.false")
incoming_values = []
for i, value in enumerate(expr.values):
is_last = i == len(expr.values) - 1
# Evaluate current operand
operand_result = eval_expr(
func, None, builder, value, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand_result is None:
logger.error(f"Failed to evaluate operand {i} in 'and' expression")
return None
operand_val, operand_type = operand_result
# Convert to boolean if needed
operand_bool = convert_to_bool(builder, operand_val)
current_block = builder.block
if is_last:
# Last operand: result is this value
builder.branch(merge_block)
incoming_values.append((operand_bool, current_block))
else:
# Not last: check if true, continue or short-circuit
next_check = func.append_basic_block(name=f"and.check_{i + 1}")
builder.cbranch(operand_bool, next_check, false_block)
builder.position_at_end(next_check)
# False block: short-circuit with false
builder.position_at_end(false_block)
builder.branch(merge_block)
false_value = ir.Constant(ir.IntType(1), 0)
incoming_values.append((false_value, false_block))
# Merge block: phi node
builder.position_at_end(merge_block)
phi = builder.phi(ir.IntType(1), name="and.result")
for val, block in incoming_values:
phi.add_incoming(val, block)
logger.debug(f"Generated 'and' with {len(incoming_values)} incoming values")
return phi, ir.IntType(1)
def _handle_or_op(func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab):
"""Handle `or` boolean operations."""
logger.debug(f"Handling 'or' operator with {len(expr.values)} operands")
merge_block = func.append_basic_block(name="or.merge")
true_block = func.append_basic_block(name="or.true")
incoming_values = []
for i, value in enumerate(expr.values):
is_last = i == len(expr.values) - 1
# Evaluate current operand
operand_result = eval_expr(
func, None, builder, value, local_sym_tab, map_sym_tab, structs_sym_tab
)
if operand_result is None:
logger.error(f"Failed to evaluate operand {i} in 'or' expression")
return None
operand_val, operand_type = operand_result
# Convert to boolean if needed
operand_bool = convert_to_bool(builder, operand_val)
current_block = builder.block
if is_last:
# Last operand: result is this value
builder.branch(merge_block)
incoming_values.append((operand_bool, current_block))
else:
# Not last: check if false, continue or short-circuit
next_check = func.append_basic_block(name=f"or.check_{i + 1}")
builder.cbranch(operand_bool, true_block, next_check)
builder.position_at_end(next_check)
# True block: short-circuit with true
builder.position_at_end(true_block)
builder.branch(merge_block)
true_value = ir.Constant(ir.IntType(1), 1)
incoming_values.append((true_value, true_block))
# Merge block: phi node
builder.position_at_end(merge_block)
phi = builder.phi(ir.IntType(1), name="or.result")
for val, block in incoming_values:
phi.add_incoming(val, block)
logger.debug(f"Generated 'or' with {len(incoming_values)} incoming values")
return phi, ir.IntType(1)
def _handle_boolean_op(
func,
module,
builder,
expr: ast.BoolOp,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
"""Handle `and` and `or` boolean operations."""
if isinstance(expr.op, ast.And):
return _handle_and_op(
func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(expr.op, ast.Or):
return _handle_or_op(
func, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
else:
logger.error(f"Unsupported boolean operator: {type(expr.op).__name__}")
return None
def eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
):
logger.info(f"Evaluating expression: {ast.dump(expr)}")
if isinstance(expr, ast.Name):
return _handle_name_expr(expr, local_sym_tab, builder)
elif isinstance(expr, ast.Constant):
return _handle_constant_expr(expr)
elif isinstance(expr, ast.Call):
if isinstance(expr.func, ast.Name) and expr.func.id == "deref":
return _handle_deref_call(expr, local_sym_tab, builder)
if isinstance(expr.func, ast.Name) and is_ctypes(expr.func.id):
return _handle_ctypes_call(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
# delayed import to avoid circular dependency
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
if isinstance(expr.func, ast.Name) and HelperHandlerRegistry.has_handler(
expr.func.id
):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func, ast.Attribute):
logger.info(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(
expr.func.value.func, ast.Name
):
method_name = expr.func.attr
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr.func.value, ast.Name):
obj_name = expr.func.value.id
method_name = expr.func.attr
if obj_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
elif isinstance(expr, ast.Attribute):
return _handle_attribute_expr(expr, local_sym_tab, structs_sym_tab, builder)
elif isinstance(expr, ast.BinOp):
from pythonbpf.binary_ops import handle_binary_op
return handle_binary_op(expr, builder, None, local_sym_tab)
elif isinstance(expr, ast.Compare):
return _handle_compare(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(expr, ast.UnaryOp):
return _handle_unary_op(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
elif isinstance(expr, ast.BoolOp):
return _handle_boolean_op(
func, module, builder, expr, local_sym_tab, map_sym_tab, structs_sym_tab
)
logger.info("Unsupported expression evaluation")
return None
def handle_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
):
"""Handle expression statements in the function body."""
logger.info(f"Handling expression: {ast.dump(expr)}")
call = expr.value
if isinstance(call, ast.Call):
eval_expr(
func,
module,
builder,
call,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
else:
logger.info("Unsupported expression type")

View File

@ -1,128 +0,0 @@
from llvmlite import ir
import logging
import ast
logger = logging.getLogger(__name__)
COMPARISON_OPS = {
ast.Eq: "==",
ast.NotEq: "!=",
ast.Lt: "<",
ast.LtE: "<=",
ast.Gt: ">",
ast.GtE: ">=",
ast.Is: "==",
ast.IsNot: "!=",
}
def _get_base_type_and_depth(ir_type):
"""Get the base type for pointer types."""
cur_type = ir_type
depth = 0
while isinstance(cur_type, ir.PointerType):
depth += 1
cur_type = cur_type.pointee
return cur_type, depth
def _deref_to_depth(func, builder, val, target_depth):
"""Dereference a pointer to a certain depth."""
cur_val = val
cur_type = val.type
for depth in range(target_depth):
if not isinstance(val.type, ir.PointerType):
logger.error("Cannot dereference further, non-pointer type")
return None
# dereference with null check
pointee_type = cur_type.pointee
null_check_block = builder.block
not_null_block = func.append_basic_block(name=f"deref_not_null_{depth}")
merge_block = func.append_basic_block(name=f"deref_merge_{depth}")
null_ptr = ir.Constant(cur_type, None)
is_not_null = builder.icmp_signed("!=", cur_val, null_ptr)
logger.debug(f"Inserted null check for pointer at depth {depth}")
builder.cbranch(is_not_null, not_null_block, merge_block)
builder.position_at_end(not_null_block)
dereferenced_val = builder.load(cur_val)
logger.debug(f"Dereferenced to depth {depth - 1}, type: {pointee_type}")
builder.branch(merge_block)
builder.position_at_end(merge_block)
phi = builder.phi(pointee_type, name=f"deref_result_{depth}")
zero_value = (
ir.Constant(pointee_type, 0)
if isinstance(pointee_type, ir.IntType)
else ir.Constant(pointee_type, None)
)
phi.add_incoming(zero_value, null_check_block)
phi.add_incoming(dereferenced_val, not_null_block)
# Continue with phi result
cur_val = phi
cur_type = pointee_type
return cur_val
def _normalize_types(func, builder, lhs, rhs):
"""Normalize types for comparison."""
logger.info(f"Normalizing types: {lhs.type} vs {rhs.type}")
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
if lhs.type.width < rhs.type.width:
lhs = builder.sext(lhs, rhs.type)
else:
rhs = builder.sext(rhs, lhs.type)
return lhs, rhs
elif not isinstance(lhs.type, ir.PointerType) and not isinstance(
rhs.type, ir.PointerType
):
logger.error(f"Type mismatch: {lhs.type} vs {rhs.type}")
return None, None
else:
lhs_base, lhs_depth = _get_base_type_and_depth(lhs.type)
rhs_base, rhs_depth = _get_base_type_and_depth(rhs.type)
if lhs_base == rhs_base:
if lhs_depth < rhs_depth:
rhs = _deref_to_depth(func, builder, rhs, rhs_depth - lhs_depth)
elif rhs_depth < lhs_depth:
lhs = _deref_to_depth(func, builder, lhs, lhs_depth - rhs_depth)
return _normalize_types(func, builder, lhs, rhs)
def convert_to_bool(builder, val):
"""Convert a value to boolean."""
if val.type == ir.IntType(1):
return val
if isinstance(val.type, ir.PointerType):
zero = ir.Constant(val.type, None)
else:
zero = ir.Constant(val.type, 0)
return builder.icmp_signed("!=", val, zero)
def handle_comparator(func, builder, op, lhs, rhs):
"""Handle comparison operations."""
if lhs.type != rhs.type:
lhs, rhs = _normalize_types(func, builder, lhs, rhs)
if lhs is None or rhs is None:
return None
if type(op) not in COMPARISON_OPS:
logger.error(f"Unsupported comparison operator: {type(op)}")
return None
predicate = COMPARISON_OPS[type(op)]
result = builder.icmp_signed(predicate, lhs, rhs)
logger.debug(f"Comparison result: {result}")
return result, ir.IntType(1)

155
pythonbpf/expr_pass.py Normal file
View File

@ -0,0 +1,155 @@
import ast
from llvmlite import ir
def eval_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab=None,
local_var_metadata=None,
):
print(f"Evaluating expression: {ast.dump(expr)}")
print(local_var_metadata)
if isinstance(expr, ast.Name):
if expr.id in local_sym_tab:
var = local_sym_tab[expr.id][0]
val = builder.load(var)
return val, local_sym_tab[expr.id][1] # return value and type
else:
print(f"Undefined variable {expr.id}")
return None
elif isinstance(expr, ast.Constant):
if isinstance(expr.value, int):
return ir.Constant(ir.IntType(64), expr.value), ir.IntType(64)
elif isinstance(expr.value, bool):
return ir.Constant(ir.IntType(1), int(expr.value)), ir.IntType(1)
else:
print("Unsupported constant type")
return None
elif isinstance(expr, ast.Call):
# delayed import to avoid circular dependency
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call
if isinstance(expr.func, ast.Name):
# check deref
if expr.func.id == "deref":
print(f"Handling deref {ast.dump(expr)}")
if len(expr.args) != 1:
print("deref takes exactly one argument")
return None
arg = expr.args[0]
if (
isinstance(arg, ast.Call)
and isinstance(arg.func, ast.Name)
and arg.func.id == "deref"
):
print("Multiple deref not supported")
return None
if isinstance(arg, ast.Name):
if arg.id in local_sym_tab:
arg = local_sym_tab[arg.id][0]
else:
print(f"Undefined variable {arg.id}")
return None
if arg is None:
print("Failed to evaluate deref argument")
return None
# Since we are handling only name case, directly take type from sym tab
val = builder.load(arg)
return val, local_sym_tab[expr.args[0].id][1]
# check for helpers
if HelperHandlerRegistry.has_handler(expr.func.id):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(expr.func, ast.Attribute):
print(f"Handling method call: {ast.dump(expr.func)}")
if isinstance(expr.func.value, ast.Call) and isinstance(
expr.func.value.func, ast.Name
):
method_name = expr.func.attr
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(expr.func.value, ast.Name):
obj_name = expr.func.value.id
method_name = expr.func.attr
if obj_name in map_sym_tab:
if HelperHandlerRegistry.has_handler(method_name):
return handle_helper_call(
expr,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
elif isinstance(expr, ast.Attribute):
if isinstance(expr.value, ast.Name):
var_name = expr.value.id
attr_name = expr.attr
if var_name in local_sym_tab:
var_ptr, var_type = local_sym_tab[var_name]
print(f"Loading attribute " f"{attr_name} from variable {var_name}")
print(f"Variable type: {var_type}, Variable ptr: {var_ptr}")
print(local_var_metadata)
if local_var_metadata and var_name in local_var_metadata:
metadata = structs_sym_tab[local_var_metadata[var_name]]
if attr_name in metadata.fields:
gep = metadata.gep(builder, var_ptr, attr_name)
val = builder.load(gep)
field_type = metadata.field_type(attr_name)
return val, field_type
print("Unsupported expression evaluation")
return None
def handle_expr(
func,
module,
builder,
expr,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
):
"""Handle expression statements in the function body."""
print(f"Handling expression: {ast.dump(expr)}")
print(local_var_metadata)
call = expr.value
if isinstance(call, ast.Call):
eval_expr(
func,
module,
builder,
call,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
local_var_metadata,
)
else:
print("Unsupported expression type")

View File

@ -1,3 +0,0 @@
from .functions_pass import func_proc
__all__ = ["func_proc"]

View File

@ -1,22 +0,0 @@
from typing import Dict
class StatementHandlerRegistry:
"""Registry for statement handlers."""
_handlers: Dict = {}
@classmethod
def register(cls, stmt_type):
"""Register a handler for a specific statement type."""
def decorator(handler):
cls._handlers[stmt_type] = handler
return handler
return decorator
@classmethod
def __getitem__(cls, stmt_type):
"""Get the handler for a specific statement type."""
return cls._handlers.get(stmt_type, None)

View File

@ -1,45 +0,0 @@
import logging
import ast
from llvmlite import ir
logger: logging.Logger = logging.getLogger(__name__)
XDP_ACTIONS = {
"XDP_ABORTED": 0,
"XDP_DROP": 1,
"XDP_PASS": 2,
"XDP_TX": 3,
"XDP_REDIRECT": 4,
}
def _handle_none_return(builder) -> bool:
"""Handle return or return None -> returns 0."""
builder.ret(ir.Constant(ir.IntType(64), 0))
logger.debug("Generated default return: 0")
return True
def _is_xdp_name(name: str) -> bool:
"""Check if a name is an XDP action"""
return name in XDP_ACTIONS
def _handle_xdp_return(stmt: ast.Return, builder, ret_type) -> bool:
"""Handle XDP returns"""
if not isinstance(stmt.value, ast.Name):
return False
action_name = stmt.value.id
if action_name not in XDP_ACTIONS:
raise ValueError(
f"Unknown XDP action: {action_name}. Available: {XDP_ACTIONS.keys()}"
)
return False
value = XDP_ACTIONS[action_name]
builder.ret(ir.Constant(ret_type, value))
logger.debug(f"Generated XDP action return: {action_name} = {value}")
return True

View File

@ -1,30 +1,13 @@
from llvmlite import ir from llvmlite import ir
import ast import ast
import logging
from typing import Any from typing import Any
from dataclasses import dataclass
from pythonbpf.helper import HelperHandlerRegistry, handle_helper_call from .helper import HelperHandlerRegistry, handle_helper_call
from pythonbpf.type_deducer import ctypes_to_ir from .type_deducer import ctypes_to_ir
from pythonbpf.binary_ops import handle_binary_op from .binary_ops import handle_binary_op
from pythonbpf.expr import eval_expr, handle_expr, convert_to_bool from .expr_pass import eval_expr, handle_expr
from .return_utils import _handle_none_return, _handle_xdp_return, _is_xdp_name local_var_metadata: dict[str | Any, Any] = {}
logger = logging.getLogger(__name__)
@dataclass
class LocalSymbol:
var: ir.AllocaInstr
ir_type: ir.Type
metadata: Any = None
def __iter__(self):
yield self.var
yield self.ir_type
yield self.metadata
def get_probe_string(func_node): def get_probe_string(func_node):
@ -49,27 +32,28 @@ def handle_assign(
): ):
"""Handle assignment statements in the function body.""" """Handle assignment statements in the function body."""
if len(stmt.targets) != 1: if len(stmt.targets) != 1:
logger.info("Unsupported multiassignment") print("Unsupported multiassignment")
return return
num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64") num_types = ("c_int32", "c_int64", "c_uint32", "c_uint64")
target = stmt.targets[0] target = stmt.targets[0]
logger.info(f"Handling assignment to {ast.dump(target)}") print(f"Handling assignment to {ast.dump(target)}")
if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute): if not isinstance(target, ast.Name) and not isinstance(target, ast.Attribute):
logger.info("Unsupported assignment target") print("Unsupported assignment target")
return return
var_name = target.id if isinstance(target, ast.Name) else target.value.id var_name = target.id if isinstance(target, ast.Name) else target.value.id
rval = stmt.value rval = stmt.value
if isinstance(target, ast.Attribute): if isinstance(target, ast.Attribute):
# struct field assignment # struct field assignment
field_name = target.attr field_name = target.attr
if var_name in local_sym_tab: if var_name in local_sym_tab and var_name in local_var_metadata:
struct_type = local_sym_tab[var_name].metadata struct_type = local_var_metadata[var_name]
struct_info = structs_sym_tab[struct_type] struct_info = structs_sym_tab[struct_type]
if field_name in struct_info.fields: if field_name in struct_info.fields:
field_ptr = struct_info.gep( field_ptr = struct_info.gep(
builder, local_sym_tab[var_name].var, field_name builder, local_sym_tab[var_name][0], field_name
) )
val = eval_expr( val = eval_expr(
func, func,
@ -90,31 +74,28 @@ def handle_assign(
# print(f"Assigned to struct field {var_name}.{field_name}") # print(f"Assigned to struct field {var_name}.{field_name}")
pass pass
if val is None: if val is None:
logger.info("Failed to evaluate struct field assignment") print("Failed to evaluate struct field assignment")
return return
logger.info(field_ptr) print(field_ptr)
builder.store(val[0], field_ptr) builder.store(val[0], field_ptr)
logger.info(f"Assigned to struct field {var_name}.{field_name}") print(f"Assigned to struct field {var_name}.{field_name}")
return return
elif isinstance(rval, ast.Constant): elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool): if isinstance(rval.value, bool):
if rval.value: if rval.value:
builder.store( builder.store(ir.Constant(ir.IntType(1), 1), local_sym_tab[var_name][0])
ir.Constant(ir.IntType(1), 1), local_sym_tab[var_name].var
)
else: else:
builder.store( builder.store(ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name][0])
ir.Constant(ir.IntType(1), 0), local_sym_tab[var_name].var print(f"Assigned constant {rval.value} to {var_name}")
)
logger.info(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, int): elif isinstance(rval.value, int):
# Assume c_int64 for now # Assume c_int64 for now
# var = builder.alloca(ir.IntType(64), name=var_name) # var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8 # var.align = 8
builder.store( builder.store(
ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name].var ir.Constant(ir.IntType(64), rval.value), local_sym_tab[var_name][0]
) )
logger.info(f"Assigned constant {rval.value} to {var_name}") # local_sym_tab[var_name] = var
print(f"Assigned constant {rval.value} to {var_name}")
elif isinstance(rval.value, str): elif isinstance(rval.value, str):
str_val = rval.value.encode("utf-8") + b"\x00" str_val = rval.value.encode("utf-8") + b"\x00"
str_const = ir.Constant( str_const = ir.Constant(
@ -127,14 +108,14 @@ def handle_assign(
global_str.global_constant = True global_str.global_constant = True
global_str.initializer = str_const global_str.initializer = str_const
str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8))) str_ptr = builder.bitcast(global_str, ir.PointerType(ir.IntType(8)))
builder.store(str_ptr, local_sym_tab[var_name].var) builder.store(str_ptr, local_sym_tab[var_name][0])
logger.info(f"Assigned string constant '{rval.value}' to {var_name}") print(f"Assigned string constant '{rval.value}' to {var_name}")
else: else:
logger.info("Unsupported constant type") print("Unsupported constant type")
elif isinstance(rval, ast.Call): elif isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name): if isinstance(rval.func, ast.Name):
call_type = rval.func.id call_type = rval.func.id
logger.info(f"Assignment call type: {call_type}") print(f"Assignment call type: {call_type}")
if ( if (
call_type in num_types call_type in num_types
and len(rval.args) == 1 and len(rval.args) == 1
@ -145,12 +126,13 @@ def handle_assign(
# var = builder.alloca(ir_type, name=var_name) # var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8 # var.align = ir_type.width // 8
builder.store( builder.store(
ir.Constant(ir_type, rval.args[0].value), ir.Constant(ir_type, rval.args[0].value), local_sym_tab[var_name][0]
local_sym_tab[var_name].var,
) )
logger.info( print(
f"Assigned {call_type} constant {rval.args[0].value} to {var_name}" f"Assigned {call_type} constant "
f"{rval.args[0].value} to {var_name}"
) )
# local_sym_tab[var_name] = var
elif HelperHandlerRegistry.has_handler(call_type): elif HelperHandlerRegistry.has_handler(call_type):
# var = builder.alloca(ir.IntType(64), name=var_name) # var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8 # var.align = 8
@ -162,11 +144,13 @@ def handle_assign(
local_sym_tab, local_sym_tab,
map_sym_tab, map_sym_tab,
structs_sym_tab, structs_sym_tab,
local_var_metadata,
) )
builder.store(val[0], local_sym_tab[var_name].var) builder.store(val[0], local_sym_tab[var_name][0])
logger.info(f"Assigned constant {rval.func.id} to {var_name}") # local_sym_tab[var_name] = var
print(f"Assigned constant {rval.func.id} to {var_name}")
elif call_type == "deref" and len(rval.args) == 1: elif call_type == "deref" and len(rval.args) == 1:
logger.info(f"Handling deref assignment {ast.dump(rval)}") print(f"Handling deref assignment {ast.dump(rval)}")
val = eval_expr( val = eval_expr(
func, func,
module, module,
@ -177,40 +161,28 @@ def handle_assign(
structs_sym_tab, structs_sym_tab,
) )
if val is None: if val is None:
logger.info("Failed to evaluate deref argument") print("Failed to evaluate deref argument")
return return
logger.info(f"Dereferenced value: {val}, storing in {var_name}") print(f"Dereferenced value: {val}, storing in {var_name}")
builder.store(val[0], local_sym_tab[var_name].var) builder.store(val[0], local_sym_tab[var_name][0])
logger.info(f"Dereferenced and assigned to {var_name}") # local_sym_tab[var_name] = var
print(f"Dereferenced and assigned to {var_name}")
elif call_type in structs_sym_tab and len(rval.args) == 0: elif call_type in structs_sym_tab and len(rval.args) == 0:
struct_info = structs_sym_tab[call_type] struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type ir_type = struct_info.ir_type
# var = builder.alloca(ir_type, name=var_name) # var = builder.alloca(ir_type, name=var_name)
# Null init # Null init
builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name].var) builder.store(ir.Constant(ir_type, None), local_sym_tab[var_name][0])
logger.info(f"Assigned struct {call_type} to {var_name}") local_var_metadata[var_name] = call_type
print(f"Assigned struct {call_type} to {var_name}")
# local_sym_tab[var_name] = var
else: else:
logger.info(f"Unsupported assignment call type: {call_type}") print(f"Unsupported assignment call type: {call_type}")
elif isinstance(rval.func, ast.Attribute): elif isinstance(rval.func, ast.Attribute):
logger.info(f"Assignment call attribute: {ast.dump(rval.func)}") print(f"Assignment call attribute: {ast.dump(rval.func)}")
if isinstance(rval.func.value, ast.Name): if isinstance(rval.func.value, ast.Name):
if rval.func.value.id in map_sym_tab:
map_name = rval.func.value.id
method_name = rval.func.attr
if HelperHandlerRegistry.has_handler(method_name):
val = handle_helper_call(
rval,
module,
builder,
func,
local_sym_tab,
map_sym_tab,
structs_sym_tab,
)
builder.store(val[0], local_sym_tab[var_name].var)
else:
# TODO: probably a struct access # TODO: probably a struct access
logger.info(f"TODO STRUCT ACCESS {ast.dump(rval)}") print(f"TODO STRUCT ACCESS {ast.dump(rval)}")
elif isinstance(rval.func.value, ast.Call) and isinstance( elif isinstance(rval.func.value, ast.Call) and isinstance(
rval.func.value.func, ast.Name rval.func.value.func, ast.Name
): ):
@ -226,34 +198,96 @@ def handle_assign(
local_sym_tab, local_sym_tab,
map_sym_tab, map_sym_tab,
structs_sym_tab, structs_sym_tab,
local_var_metadata,
) )
# var = builder.alloca(ir.IntType(64), name=var_name) # var = builder.alloca(ir.IntType(64), name=var_name)
# var.align = 8 # var.align = 8
builder.store(val[0], local_sym_tab[var_name].var) builder.store(val[0], local_sym_tab[var_name][0])
# local_sym_tab[var_name] = var
else: else:
logger.info("Unsupported assignment call structure") print("Unsupported assignment call structure")
else: else:
logger.info("Unsupported assignment call function type") print("Unsupported assignment call function type")
elif isinstance(rval, ast.BinOp): elif isinstance(rval, ast.BinOp):
handle_binary_op(rval, builder, var_name, local_sym_tab) handle_binary_op(
rval, module, builder, var_name, local_sym_tab, map_sym_tab, func
)
else: else:
logger.info("Unsupported assignment value type") print("Unsupported assignment value type")
def handle_cond( def handle_cond(func, module, builder, cond, local_sym_tab, map_sym_tab):
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab=None if isinstance(cond, ast.Constant):
): if isinstance(cond.value, bool):
val = eval_expr( return ir.Constant(ir.IntType(1), int(cond.value))
func, module, builder, cond, local_sym_tab, map_sym_tab, structs_sym_tab elif isinstance(cond.value, int):
return ir.Constant(ir.IntType(1), int(bool(cond.value)))
else:
print("Unsupported constant type in condition")
return None
elif isinstance(cond, ast.Name):
if cond.id in local_sym_tab:
var = local_sym_tab[cond.id][0]
val = builder.load(var)
if val.type != ir.IntType(1):
# Convert nonzero values to true, zero to false
if isinstance(val.type, ir.PointerType):
# For pointer types, compare with null pointer
zero = ir.Constant(val.type, None)
else:
# For integer types, compare with zero
zero = ir.Constant(val.type, 0)
val = builder.icmp_signed("!=", val, zero)
return val
else:
print(f"Undefined variable {cond.id} in condition")
return None
elif isinstance(cond, ast.Compare):
lhs = eval_expr(func, module, builder, cond.left, local_sym_tab, map_sym_tab)[0]
if len(cond.ops) != 1 or len(cond.comparators) != 1:
print("Unsupported complex comparison")
return None
rhs = eval_expr(
func, module, builder, cond.comparators[0], local_sym_tab, map_sym_tab
)[0] )[0]
return convert_to_bool(builder, val) op = cond.ops[0]
if lhs.type != rhs.type:
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
# Extend the smaller type to the larger type
if lhs.type.width < rhs.type.width:
lhs = builder.sext(lhs, rhs.type)
elif lhs.type.width > rhs.type.width:
rhs = builder.sext(rhs, lhs.type)
else:
print("Type mismatch in comparison")
return None
if isinstance(op, ast.Eq):
return builder.icmp_signed("==", lhs, rhs)
elif isinstance(op, ast.NotEq):
return builder.icmp_signed("!=", lhs, rhs)
elif isinstance(op, ast.Lt):
return builder.icmp_signed("<", lhs, rhs)
elif isinstance(op, ast.LtE):
return builder.icmp_signed("<=", lhs, rhs)
elif isinstance(op, ast.Gt):
return builder.icmp_signed(">", lhs, rhs)
elif isinstance(op, ast.GtE):
return builder.icmp_signed(">=", lhs, rhs)
else:
print("Unsupported comparison operator")
return None
else:
print("Unsupported condition expression")
return None
def handle_if( def handle_if(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab=None
): ):
"""Handle if statements in the function body.""" """Handle if statements in the function body."""
logger.info("Handling if statement") print("Handling if statement")
# start = builder.block.parent # start = builder.block.parent
then_block = func.append_basic_block(name="if.then") then_block = func.append_basic_block(name="if.then")
merge_block = func.append_basic_block(name="if.end") merge_block = func.append_basic_block(name="if.end")
@ -262,9 +296,7 @@ def handle_if(
else: else:
else_block = None else_block = None
cond = handle_cond( cond = handle_cond(func, module, builder, stmt.test, local_sym_tab, map_sym_tab)
func, module, builder, stmt.test, local_sym_tab, map_sym_tab, structs_sym_tab
)
if else_block: if else_block:
builder.cbranch(cond, then_block, else_block) builder.cbranch(cond, then_block, else_block)
else: else:
@ -297,27 +329,6 @@ def handle_if(
builder.position_at_end(merge_block) builder.position_at_end(merge_block)
def handle_return(builder, stmt, local_sym_tab, ret_type):
logger.info(f"Handling return statement: {ast.dump(stmt)}")
if stmt.value is None:
return _handle_none_return(builder)
elif isinstance(stmt.value, ast.Name) and _is_xdp_name(stmt.value.id):
return _handle_xdp_return(stmt, builder, ret_type)
else:
val = eval_expr(
func=None,
module=None,
builder=builder,
expr=stmt.value,
local_sym_tab=local_sym_tab,
map_sym_tab={},
structs_sym_tab={},
)
logger.info(f"Evaluated return expression to {val}")
builder.ret(val[0])
return True
def process_stmt( def process_stmt(
func, func,
module, module,
@ -329,8 +340,9 @@ def process_stmt(
did_return, did_return,
ret_type=ir.IntType(64), ret_type=ir.IntType(64),
): ):
logger.info(f"Processing statement: {ast.dump(stmt)}") print(f"Processing statement: {ast.dump(stmt)}")
if isinstance(stmt, ast.Expr): if isinstance(stmt, ast.Expr):
print(local_var_metadata)
handle_expr( handle_expr(
func, func,
module, module,
@ -339,6 +351,7 @@ def process_stmt(
local_sym_tab, local_sym_tab,
map_sym_tab, map_sym_tab,
structs_sym_tab, structs_sym_tab,
local_var_metadata,
) )
elif isinstance(stmt, ast.Assign): elif isinstance(stmt, ast.Assign):
handle_assign( handle_assign(
@ -351,12 +364,36 @@ def process_stmt(
func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab func, module, builder, stmt, map_sym_tab, local_sym_tab, structs_sym_tab
) )
elif isinstance(stmt, ast.Return): elif isinstance(stmt, ast.Return):
did_return = handle_return( if stmt.value is None:
builder, builder.ret(ir.Constant(ir.IntType(32), 0))
stmt, did_return = True
local_sym_tab, elif (
ret_type, isinstance(stmt.value, ast.Call)
and isinstance(stmt.value.func, ast.Name)
and len(stmt.value.args) == 1
and isinstance(stmt.value.args[0], ast.Constant)
and isinstance(stmt.value.args[0].value, int)
):
call_type = stmt.value.func.id
if ctypes_to_ir(call_type) != ret_type:
raise ValueError(
"Return type mismatch: expected"
f"{ctypes_to_ir(call_type)}, got {call_type}"
) )
else:
builder.ret(ir.Constant(ret_type, stmt.value.args[0].value))
did_return = True
elif isinstance(stmt.value, ast.Name):
if stmt.value.id == "XDP_PASS":
builder.ret(ir.Constant(ret_type, 2))
did_return = True
elif stmt.value.id == "XDP_DROP":
builder.ret(ir.Constant(ret_type, 1))
did_return = True
else:
raise ValueError("Failed to evaluate return expression")
else:
raise ValueError("Unsupported return value")
return did_return return did_return
@ -364,7 +401,6 @@ def allocate_mem(
module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab module, builder, body, func, ret_type, map_sym_tab, local_sym_tab, structs_sym_tab
): ):
for stmt in body: for stmt in body:
has_metadata = False
if isinstance(stmt, ast.If): if isinstance(stmt, ast.If):
if stmt.body: if stmt.body:
local_sym_tab = allocate_mem( local_sym_tab = allocate_mem(
@ -390,17 +426,14 @@ def allocate_mem(
) )
elif isinstance(stmt, ast.Assign): elif isinstance(stmt, ast.Assign):
if len(stmt.targets) != 1: if len(stmt.targets) != 1:
logger.info("Unsupported multiassignment") print("Unsupported multiassignment")
continue continue
target = stmt.targets[0] target = stmt.targets[0]
if not isinstance(target, ast.Name): if not isinstance(target, ast.Name):
logger.info("Unsupported assignment target") print("Unsupported assignment target")
continue continue
var_name = target.id var_name = target.id
rval = stmt.value rval = stmt.value
if var_name in local_sym_tab:
logger.info(f"Variable {var_name} already allocated")
continue
if isinstance(rval, ast.Call): if isinstance(rval, ast.Call):
if isinstance(rval.func, ast.Name): if isinstance(rval.func, ast.Name):
call_type = rval.func.id call_type = rval.func.id
@ -408,71 +441,66 @@ def allocate_mem(
ir_type = ctypes_to_ir(call_type) ir_type = ctypes_to_ir(call_type)
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8 var.align = ir_type.width // 8
logger.info( print(f"Pre-allocated variable {var_name} of type {call_type}")
f"Pre-allocated variable {var_name} of type {call_type}"
)
elif HelperHandlerRegistry.has_handler(call_type): elif HelperHandlerRegistry.has_handler(call_type):
# Assume return type is int64 for now # Assume return type is int64 for now
ir_type = ir.IntType(64) ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8 var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for helper") print(f"Pre-allocated variable {var_name} for helper")
elif call_type == "deref" and len(rval.args) == 1: elif call_type == "deref" and len(rval.args) == 1:
# Assume return type is int64 for now # Assume return type is int64 for now
ir_type = ir.IntType(64) ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8 var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for deref") print(f"Pre-allocated variable {var_name} for deref")
elif call_type in structs_sym_tab: elif call_type in structs_sym_tab:
struct_info = structs_sym_tab[call_type] struct_info = structs_sym_tab[call_type]
ir_type = struct_info.ir_type ir_type = struct_info.ir_type
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
has_metadata = True local_var_metadata[var_name] = call_type
logger.info( print(
f"Pre-allocated variable {var_name} for struct {call_type}" f"Pre-allocated variable {var_name} "
f"for struct {call_type}"
) )
elif isinstance(rval.func, ast.Attribute): elif isinstance(rval.func, ast.Attribute):
ir_type = ir.PointerType(ir.IntType(64)) ir_type = ir.PointerType(ir.IntType(64))
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
# var.align = ir_type.width // 8 # var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} for map") print(f"Pre-allocated variable {var_name} for map")
else: else:
logger.info("Unsupported assignment call function type") print("Unsupported assignment call function type")
continue continue
elif isinstance(rval, ast.Constant): elif isinstance(rval, ast.Constant):
if isinstance(rval.value, bool): if isinstance(rval.value, bool):
ir_type = ir.IntType(1) ir_type = ir.IntType(1)
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = 1 var.align = 1
logger.info(f"Pre-allocated variable {var_name} of type c_bool") print(f"Pre-allocated variable {var_name} of type c_bool")
elif isinstance(rval.value, int): elif isinstance(rval.value, int):
# Assume c_int64 for now # Assume c_int64 for now
ir_type = ir.IntType(64) ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8 var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} of type c_int64") print(f"Pre-allocated variable {var_name} of type c_int64")
elif isinstance(rval.value, str): elif isinstance(rval.value, str):
ir_type = ir.PointerType(ir.IntType(8)) ir_type = ir.PointerType(ir.IntType(8))
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = 8 var.align = 8
logger.info(f"Pre-allocated variable {var_name} of type string") print(f"Pre-allocated variable {var_name} of type string")
else: else:
logger.info("Unsupported constant type") print("Unsupported constant type")
continue continue
elif isinstance(rval, ast.BinOp): elif isinstance(rval, ast.BinOp):
# Assume c_int64 for now # Assume c_int64 for now
ir_type = ir.IntType(64) ir_type = ir.IntType(64)
var = builder.alloca(ir_type, name=var_name) var = builder.alloca(ir_type, name=var_name)
var.align = ir_type.width // 8 var.align = ir_type.width // 8
logger.info(f"Pre-allocated variable {var_name} of type c_int64") print(f"Pre-allocated variable {var_name} of type c_int64")
else: else:
logger.info("Unsupported assignment value type") print("Unsupported assignment value type")
continue continue
local_sym_tab[var_name] = (var, ir_type)
if has_metadata:
local_sym_tab[var_name] = LocalSymbol(var, ir_type, call_type)
else:
local_sym_tab[var_name] = LocalSymbol(var, ir_type)
return local_sym_tab return local_sym_tab
@ -497,7 +525,7 @@ def process_func_body(
structs_sym_tab, structs_sym_tab,
) )
logger.info(f"Local symbol table: {local_sym_tab.keys()}") print(f"Local symbol table: {local_sym_tab.keys()}")
for stmt in func_node.body: for stmt in func_node.body:
did_return = process_stmt( did_return = process_stmt(
@ -513,7 +541,7 @@ def process_func_body(
) )
if not did_return: if not did_return:
builder.ret(ir.Constant(ir.IntType(64), 0)) builder.ret(ir.Constant(ir.IntType(32), 0))
def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab): def process_bpf_chunk(func_node, module, return_type, map_sym_tab, structs_sym_tab):
@ -569,7 +597,7 @@ def func_proc(tree, module, chunks, map_sym_tab, structs_sym_tab):
if is_global: if is_global:
continue continue
func_type = get_probe_string(func_node) func_type = get_probe_string(func_node)
logger.info(f"Found probe_string of {func_node.name}: {func_type}") print(f"Found probe_string of {func_node.name}: {func_type}")
process_bpf_chunk( process_bpf_chunk(
func_node, func_node,
@ -634,7 +662,7 @@ def infer_return_type(func_node: ast.FunctionDef):
if found_type is None: if found_type is None:
found_type = t found_type = t
elif found_type != t: elif found_type != t:
raise ValueError(f"Conflicting return types: {found_type} vs {t}") raise ValueError("Conflicting return types:" f"{found_type} vs {t}")
return found_type or "None" return found_type or "None"

View File

@ -1,121 +1,8 @@
from llvmlite import ir from llvmlite import ir
import ast import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
logger: Logger = logging.getLogger(__name__) def emit_globals(module: ir.Module, names: list[str]):
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
""" """
Emit the @llvm.compiler.used global given a list of function/global names. Emit the @llvm.compiler.used global given a list of function/global names.
""" """
@ -137,7 +24,7 @@ def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata" gv.section = "llvm.metadata"
def globals_list_creation(tree, module: ir.Module): def globals_processing(tree, module: ir.Module):
collected = ["LICENSE"] collected = ["LICENSE"]
for node in tree.body: for node in tree.body:
@ -153,11 +40,10 @@ def globals_list_creation(tree, module: ir.Module):
): ):
collected.append(node.name) collected.append(node.name)
# NOTE: all globals other than elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal": collected.append(node.name)
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map": elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name) collected.append(node.name)
emit_llvm_compiler_used(module, collected) emit_globals(module, collected)

View File

@ -9,10 +9,6 @@ from .helper_utils import (
simple_string_print, simple_string_print,
get_data_ptr_and_size, get_data_ptr_and_size,
) )
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
class BPFHelperID(Enum): class BPFHelperID(Enum):
@ -23,6 +19,8 @@ class BPFHelperID(Enum):
BPF_PRINTK = 6 BPF_PRINTK = 6
BPF_GET_CURRENT_PID_TGID = 14 BPF_GET_CURRENT_PID_TGID = 14
BPF_PERF_EVENT_OUTPUT = 25 BPF_PERF_EVENT_OUTPUT = 25
BPF_RINGBUF_RESERVE = 131
BPF_RINGBUF_SUBMIT = 132
@HelperHandlerRegistry.register("ktime") @HelperHandlerRegistry.register("ktime")
@ -34,6 +32,7 @@ def bpf_ktime_get_ns_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_ktime_get_ns helper function call. Emit LLVM IR for bpf_ktime_get_ns helper function call.
@ -56,13 +55,14 @@ def bpf_map_lookup_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_map_lookup_elem helper function call. Emit LLVM IR for bpf_map_lookup_elem helper function call.
""" """
if not call.args or len(call.args) != 1: if not call.args or len(call.args) != 1:
raise ValueError( raise ValueError(
f"Map lookup expects exactly one argument (key), got {len(call.args)}" "Map lookup expects exactly one argument (key), got " f"{len(call.args)}"
) )
key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab) key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -91,6 +91,7 @@ def bpf_printk_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
"""Emit LLVM IR for bpf_printk helper function call.""" """Emit LLVM IR for bpf_printk helper function call."""
if not hasattr(func, "_fmt_counter"): if not hasattr(func, "_fmt_counter"):
@ -108,6 +109,7 @@ def bpf_printk_emitter(
func, func,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
elif isinstance(call.args[0], ast.Constant) and isinstance(call.args[0].value, str): elif isinstance(call.args[0], ast.Constant) and isinstance(call.args[0].value, str):
# TODO: We are only supporting single arguments for now. # TODO: We are only supporting single arguments for now.
@ -138,6 +140,7 @@ def bpf_map_update_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_map_update_elem helper function call. Emit LLVM IR for bpf_map_update_elem helper function call.
@ -145,7 +148,8 @@ def bpf_map_update_elem_emitter(
""" """
if not call.args or len(call.args) < 2 or len(call.args) > 3: if not call.args or len(call.args) < 2 or len(call.args) > 3:
raise ValueError( raise ValueError(
f"Map update expects 2 or 3 args (key, value, flags), got {len(call.args)}" "Map update expects 2 or 3 args (key, value, flags), "
f"got {len(call.args)}"
) )
key_arg = call.args[0] key_arg = call.args[0]
@ -178,6 +182,114 @@ def bpf_map_update_elem_emitter(
return result, None return result, None
@HelperHandlerRegistry.register("submit")
def bpf_ringbuf_submit_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
local_var_metadata=None,
):
"""
Emit LLVM IR for bpf_ringbuf_submit helper function call.
Expected call signature: ringbuf.submit(data, flags=0)
"""
if not call.args or len(call.args) < 1 or len(call.args) > 2:
raise ValueError(
"Ringbuf submit expects 1 or 2 args (data, flags), "
f"got {len(call.args)}"
)
data_arg = call.args[0]
data_ptr = get_or_create_ptr_from_arg(data_arg, builder, local_sym_tab)
# Get flags argument (default to 0)
flags_arg = call.args[1] if len(call.args) > 1 else None
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
# Returns: void
# Args: (void* data, u64 flags)
fn_type = ir.FunctionType(
ir.VoidType(),
[ir.PointerType(), ir.IntType(64)],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_SUBMIT.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
if isinstance(flags_val, int):
flags_const = ir.Constant(ir.IntType(64), flags_val)
else:
flags_const = flags_val
builder.call(fn_ptr, [data_ptr, flags_const], tail=True)
return None
@HelperHandlerRegistry.register("reserve")
def bpf_ringbuf_reserve_emitter(
call,
map_ptr,
module,
builder,
func,
local_sym_tab=None,
struct_sym_tab=None,
local_var_metadata=None,
):
"""
Emit LLVM IR for bpf_ringbuf_reserve helper function call.
Expected call signature: ringbuf.reserve(size, flags=0)
"""
if not call.args or len(call.args) < 1 or len(call.args) > 2:
raise ValueError(
"Ringbuf reserve expects 1 or 2 args (size, flags), "
f"got {len(call.args)}"
)
# TODO: here, getting length of stuff does not actually work. need to fix this.
size_arg = call.args[0]
if isinstance(size_arg, ast.Constant):
size_val = ir.Constant(ir.IntType(64), size_arg.value)
elif isinstance(size_arg, ast.Name):
if size_arg.id not in local_sym_tab:
raise ValueError(
f"Variable '{size_arg.id}' not found in local symbol table"
)
size_val = builder.load(local_sym_tab[size_arg.id])
else:
raise NotImplementedError(f"Unsupported size argument type: {type(size_arg)}")
flags_arg = call.args[1] if len(call.args) > 1 else None
flags_val = get_flags_val(flags_arg, builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
# Args: (void* ringbuf, u64 size, u64 flags)
fn_type = ir.FunctionType(
ir.PointerType(),
[ir.PointerType(), ir.IntType(64), ir.IntType(64)],
var_arg=False,
)
fn_ptr_type = ir.PointerType(fn_type)
fn_addr = ir.Constant(ir.IntType(64), BPFHelperID.BPF_RINGBUF_RESERVE.value)
fn_ptr = builder.inttoptr(fn_addr, fn_ptr_type)
if isinstance(flags_val, int):
flags_const = ir.Constant(ir.IntType(64), flags_val)
else:
flags_const = flags_val
result = builder.call(fn_ptr, [map_void_ptr, size_val, flags_const], tail=True)
return result, ir.PointerType()
@HelperHandlerRegistry.register("delete") @HelperHandlerRegistry.register("delete")
def bpf_map_delete_elem_emitter( def bpf_map_delete_elem_emitter(
@ -188,6 +300,7 @@ def bpf_map_delete_elem_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_map_delete_elem helper function call. Emit LLVM IR for bpf_map_delete_elem helper function call.
@ -195,7 +308,7 @@ def bpf_map_delete_elem_emitter(
""" """
if not call.args or len(call.args) != 1: if not call.args or len(call.args) != 1:
raise ValueError( raise ValueError(
f"Map delete expects exactly one argument (key), got {len(call.args)}" "Map delete expects exactly one argument (key), got " f"{len(call.args)}"
) )
key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab) key_ptr = get_or_create_ptr_from_arg(call.args[0], builder, local_sym_tab)
map_void_ptr = builder.bitcast(map_ptr, ir.PointerType()) map_void_ptr = builder.bitcast(map_ptr, ir.PointerType())
@ -225,6 +338,7 @@ def bpf_get_current_pid_tgid_emitter(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
""" """
Emit LLVM IR for bpf_get_current_pid_tgid helper function call. Emit LLVM IR for bpf_get_current_pid_tgid helper function call.
@ -251,15 +365,18 @@ def bpf_perf_event_output_handler(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
if len(call.args) != 1: if len(call.args) != 1:
raise ValueError( raise ValueError(
f"Perf event output expects exactly one argument, got {len(call.args)}" "Perf event output expects exactly one argument, " f"got {len(call.args)}"
) )
data_arg = call.args[0] data_arg = call.args[0]
ctx_ptr = func.args[0] # First argument to the function is ctx ctx_ptr = func.args[0] # First argument to the function is ctx
data_ptr, size_val = get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab) data_ptr, size_val = get_data_ptr_and_size(
data_arg, local_sym_tab, struct_sym_tab, local_var_metadata
)
# BPF_F_CURRENT_CPU is -1 in 32 bit # BPF_F_CURRENT_CPU is -1 in 32 bit
flags_val = ir.Constant(ir.IntType(64), 0xFFFFFFFF) flags_val = ir.Constant(ir.IntType(64), 0xFFFFFFFF)
@ -297,6 +414,7 @@ def handle_helper_call(
local_sym_tab=None, local_sym_tab=None,
map_sym_tab=None, map_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
"""Process a BPF helper function call and emit the appropriate LLVM IR.""" """Process a BPF helper function call and emit the appropriate LLVM IR."""
@ -315,6 +433,7 @@ def handle_helper_call(
func, func,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
# Handle direct function calls (e.g., print(), ktime()) # Handle direct function calls (e.g., print(), ktime())
@ -325,7 +444,7 @@ def handle_helper_call(
elif isinstance(call.func, ast.Attribute): elif isinstance(call.func, ast.Attribute):
method_name = call.func.attr method_name = call.func.attr
value = call.func.value value = call.func.value
logger.info(f"Handling method call: {ast.dump(call.func)}") print(f"Handling method call: {ast.dump(call.func)}")
# Get map pointer from different styles of map access # Get map pointer from different styles of map access
if isinstance(value, ast.Call) and isinstance(value.func, ast.Name): if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
# Func style: my_map().lookup(key) # Func style: my_map().lookup(key)

View File

@ -3,7 +3,7 @@ import logging
from collections.abc import Callable from collections.abc import Callable
from llvmlite import ir from llvmlite import ir
from pythonbpf.expr import eval_expr from pythonbpf.expr_pass import eval_expr
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -37,7 +37,7 @@ class HelperHandlerRegistry:
def get_var_ptr_from_name(var_name, local_sym_tab): def get_var_ptr_from_name(var_name, local_sym_tab):
"""Get a pointer to a variable from the symbol table.""" """Get a pointer to a variable from the symbol table."""
if local_sym_tab and var_name in local_sym_tab: if local_sym_tab and var_name in local_sym_tab:
return local_sym_tab[var_name].var return local_sym_tab[var_name][0]
raise ValueError(f"Variable '{var_name}' not found in local symbol table") raise ValueError(f"Variable '{var_name}' not found in local symbol table")
@ -72,7 +72,7 @@ def get_flags_val(arg, builder, local_sym_tab):
if isinstance(arg, ast.Name): if isinstance(arg, ast.Name):
if local_sym_tab and arg.id in local_sym_tab: if local_sym_tab and arg.id in local_sym_tab:
flags_ptr = local_sym_tab[arg.id].var flags_ptr = local_sym_tab[arg.id][0]
return builder.load(flags_ptr) return builder.load(flags_ptr)
else: else:
raise ValueError(f"Variable '{arg.id}' not found in local symbol table") raise ValueError(f"Variable '{arg.id}' not found in local symbol table")
@ -100,6 +100,7 @@ def handle_fstring_print(
func, func,
local_sym_tab=None, local_sym_tab=None,
struct_sym_tab=None, struct_sym_tab=None,
local_var_metadata=None,
): ):
"""Handle f-string formatting for bpf_printk emitter.""" """Handle f-string formatting for bpf_printk emitter."""
fmt_parts = [] fmt_parts = []
@ -117,6 +118,7 @@ def handle_fstring_print(
exprs, exprs,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
else: else:
raise NotImplementedError(f"Unsupported f-string value type: {type(value)}") raise NotImplementedError(f"Unsupported f-string value type: {type(value)}")
@ -136,6 +138,7 @@ def handle_fstring_print(
builder, builder,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
args.append(arg_value) args.append(arg_value)
@ -155,7 +158,9 @@ def _process_constant_in_fstring(cst, fmt_parts, exprs):
) )
def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab): def _process_fval(
fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab, local_var_metadata
):
"""Process formatted values in f-string.""" """Process formatted values in f-string."""
logger.debug(f"Processing formatted value: {ast.dump(fval)}") logger.debug(f"Processing formatted value: {ast.dump(fval)}")
@ -168,6 +173,7 @@ def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
exprs, exprs,
local_sym_tab, local_sym_tab,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
else: else:
raise NotImplementedError( raise NotImplementedError(
@ -178,11 +184,13 @@ def _process_fval(fval, fmt_parts, exprs, local_sym_tab, struct_sym_tab):
def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab): def _process_name_in_fval(name_node, fmt_parts, exprs, local_sym_tab):
"""Process name nodes in formatted values.""" """Process name nodes in formatted values."""
if local_sym_tab and name_node.id in local_sym_tab: if local_sym_tab and name_node.id in local_sym_tab:
_, var_type, tmp = local_sym_tab[name_node.id] _, var_type = local_sym_tab[name_node.id]
_populate_fval(var_type, name_node, fmt_parts, exprs) _populate_fval(var_type, name_node, fmt_parts, exprs)
def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab): def _process_attr_in_fval(
attr_node, fmt_parts, exprs, local_sym_tab, struct_sym_tab, local_var_metadata
):
"""Process attribute nodes in formatted values.""" """Process attribute nodes in formatted values."""
if ( if (
isinstance(attr_node.value, ast.Name) isinstance(attr_node.value, ast.Name)
@ -192,7 +200,12 @@ def _process_attr_in_fval(attr_node, fmt_parts, exprs, local_sym_tab, struct_sym
var_name = attr_node.value.id var_name = attr_node.value.id
field_name = attr_node.attr field_name = attr_node.attr
var_type = local_sym_tab[var_name].metadata if not local_var_metadata or var_name not in local_var_metadata:
raise ValueError(
f"Metadata for '{var_name}' not found in local var metadata"
)
var_type = local_var_metadata[var_name]
if var_type not in struct_sym_tab: if var_type not in struct_sym_tab:
raise ValueError( raise ValueError(
f"Struct '{var_type}' for '{var_name}' not in symbol table" f"Struct '{var_type}' for '{var_name}' not in symbol table"
@ -250,7 +263,9 @@ def _create_format_string_global(fmt_str, func, module, builder):
return builder.bitcast(fmt_gvar, ir.PointerType()) return builder.bitcast(fmt_gvar, ir.PointerType())
def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab): def _prepare_expr_args(
expr, func, module, builder, local_sym_tab, struct_sym_tab, local_var_metadata
):
"""Evaluate and prepare an expression to use as an arg for bpf_printk.""" """Evaluate and prepare an expression to use as an arg for bpf_printk."""
val, _ = eval_expr( val, _ = eval_expr(
func, func,
@ -260,6 +275,7 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
local_sym_tab, local_sym_tab,
None, None,
struct_sym_tab, struct_sym_tab,
local_var_metadata,
) )
if val: if val:
@ -270,7 +286,7 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
val = builder.sext(val, ir.IntType(64)) val = builder.sext(val, ir.IntType(64))
else: else:
logger.warning( logger.warning(
"Only int and ptr supported in bpf_printk args. Others default to 0." "Only int and ptr supported in bpf_printk args. " "Others default to 0."
) )
val = ir.Constant(ir.IntType(64), 0) val = ir.Constant(ir.IntType(64), 0)
return val return val
@ -282,26 +298,34 @@ def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_ta
return ir.Constant(ir.IntType(64), 0) return ir.Constant(ir.IntType(64), 0)
def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab): def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab, local_var_metadata):
"""Extract data pointer and size information for perf event output.""" """Extract data pointer and size information for perf event output."""
if isinstance(data_arg, ast.Name): if isinstance(data_arg, ast.Name):
data_name = data_arg.id data_name = data_arg.id
if local_sym_tab and data_name in local_sym_tab: if local_sym_tab and data_name in local_sym_tab:
data_ptr = local_sym_tab[data_name].var data_ptr = local_sym_tab[data_name][0]
else: else:
raise ValueError( raise ValueError(
f"Data variable {data_name} not found in local symbol table." f"Data variable {data_name} not found in local symbol table."
) )
# Check if data_name is a struct # Check if data_name is a struct
data_type = local_sym_tab[data_name].metadata if local_var_metadata and data_name in local_var_metadata:
data_type = local_var_metadata[data_name]
if data_type in struct_sym_tab: if data_type in struct_sym_tab:
struct_info = struct_sym_tab[data_type] struct_info = struct_sym_tab[data_type]
size_val = ir.Constant(ir.IntType(64), struct_info.size) size_val = ir.Constant(ir.IntType(64), struct_info.size)
return data_ptr, size_val return data_ptr, size_val
else: else:
raise ValueError(f"Struct {data_type} for {data_name} not in symbol table.") raise ValueError(
f"Struct {data_type} for {data_name} not in symbol table."
)
else:
raise ValueError(
f"Metadata for variable {data_name} "
"not found in local variable metadata."
)
else: else:
raise NotImplementedError( raise NotImplementedError(
"Only simple object names are supported as data in perf event output." "Only simple object names are supported " "as data in perf event output."
) )

View File

@ -15,8 +15,5 @@ def deref(ptr):
return result if result is not None else 0 return result if result is not None else 0
XDP_ABORTED = ctypes.c_int64(0)
XDP_DROP = ctypes.c_int64(1) XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2) XDP_PASS = ctypes.c_int64(2)
XDP_TX = ctypes.c_int64(3)
XDP_REDIRECT = ctypes.c_int64(4)

View File

@ -1,9 +1,5 @@
from llvmlite import ir from llvmlite import ir
import ast import ast
from logging import Logger
import logging
logger: Logger = logging.getLogger(__name__)
def emit_license(module: ir.Module, license_str: str): def emit_license(module: ir.Module, license_str: str):
@ -45,9 +41,9 @@ def license_processing(tree, module):
emit_license(module, node.body[0].value.value) emit_license(module, node.body[0].value.value)
return "LICENSE" return "LICENSE"
else: else:
logger.info("ERROR: LICENSE() must return a string literal") print("ERROR: LICENSE() must return a string literal")
return None return None
else: else:
logger.info("ERROR: LICENSE already defined") print("ERROR: LICENSE already defined")
return None return None
return None return None

View File

@ -3,7 +3,7 @@ from logging import Logger
from llvmlite import ir from llvmlite import ir
from enum import Enum from enum import Enum
from .maps_utils import MapProcessorRegistry from .maps_utils import MapProcessorRegistry
from pythonbpf.debuginfo import DebugInfoGenerator from ..debuginfo import DebugInfoGenerator
import logging import logging
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
@ -85,7 +85,7 @@ def create_bpf_map(module, map_name, map_params):
def create_map_debug_info(module, map_global, map_name, map_params): def create_map_debug_info(module, map_global, map_name, map_params):
"""Generate debug info metadata for BPF maps HASH and PERF_EVENT_ARRAY""" """Generate debug information metadata for BPF maps HASH and PERF_EVENT_ARRAY"""
generator = DebugInfoGenerator(module) generator = DebugInfoGenerator(module)
uint_type = generator.get_uint32_type() uint_type = generator.get_uint32_type()
@ -278,7 +278,9 @@ def process_bpf_map(func_node, module):
if handler: if handler:
return handler(map_name, rval, module) return handler(map_name, rval, module)
else: else:
logger.warning(f"Unknown map type {rval.func.id}, defaulting to HashMap") logger.warning(
f"Unknown map type " f"{rval.func.id}, defaulting to HashMap"
)
return process_hash_map(map_name, rval, module) return process_hash_map(map_name, rval, module)
else: else:
raise ValueError("Function under @map must return a map") raise ValueError("Function under @map must return a map")

View File

@ -19,7 +19,7 @@ def structs_proc(tree, module, chunks):
structs_sym_tab = {} structs_sym_tab = {}
for cls_node in chunks: for cls_node in chunks:
if is_bpf_struct(cls_node): if is_bpf_struct(cls_node):
logger.info(f"Found BPF struct: {cls_node.name}") print(f"Found BPF struct: {cls_node.name}")
struct_info = process_bpf_struct(cls_node, module) struct_info = process_bpf_struct(cls_node, module)
structs_sym_tab[cls_node.name] = struct_info structs_sym_tab[cls_node.name] = struct_info
return structs_sym_tab return structs_sym_tab

View File

@ -1,7 +1,10 @@
from llvmlite import ir from llvmlite import ir
# TODO: THIS IS NOT SUPPOSED TO MATCH STRINGS :skull: # TODO: THIS IS NOT SUPPOSED TO MATCH STRINGS :skull:
mapping = {
def ctypes_to_ir(ctype: str):
mapping = {
"c_int8": ir.IntType(8), "c_int8": ir.IntType(8),
"c_uint8": ir.IntType(8), "c_uint8": ir.IntType(8),
"c_int16": ir.IntType(16), "c_int16": ir.IntType(16),
@ -15,14 +18,7 @@ mapping = {
"c_void_p": ir.IntType(64), "c_void_p": ir.IntType(64),
# Not so sure about this one # Not so sure about this one
"str": ir.PointerType(ir.IntType(8)), "str": ir.PointerType(ir.IntType(8)),
} }
def ctypes_to_ir(ctype: str):
if ctype in mapping: if ctype in mapping:
return mapping[ctype] return mapping[ctype]
raise NotImplementedError(f"No mapping for {ctype}") raise NotImplementedError(f"No mapping for {ctype}")
def is_ctypes(ctype: str) -> bool:
return ctype in mapping

View File

@ -1,3 +0,0 @@
from .import_detector import vmlinux_proc
__all__ = ["vmlinux_proc"]

View File

@ -1,156 +0,0 @@
import logging
from functools import lru_cache
import importlib
from .dependency_handler import DependencyHandler
from .dependency_node import DependencyNode
import ctypes
from typing import Optional, Any
logger = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def get_module_symbols(module_name: str):
imported_module = importlib.import_module(module_name)
return [name for name in dir(imported_module)], imported_module
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
symbols_in_module, imported_module = get_module_symbols("vmlinux")
if node.name in symbols_in_module:
vmlinux_type = getattr(imported_module, node.name)
process_vmlinux_post_ast(vmlinux_type, llvm_module, handler)
else:
raise ImportError(f"{node.name} not in vmlinux")
def process_vmlinux_post_ast(
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None
):
# Initialize processing stack on first call
if processing_stack is None:
processing_stack = set()
symbols_in_module, imported_module = get_module_symbols("vmlinux")
current_symbol_name = elem_type_class.__name__
field_table = {}
is_complex_type = False
containing_type: Optional[Any] = None
ctype_complex_type: Optional[Any] = None
type_length: Optional[int] = None
module_name = getattr(elem_type_class, "__module__", None)
if current_symbol_name in processing_stack:
logger.info(f"Circular dependency detected for {current_symbol_name}, skipping")
return True
# Check if already processed
if handler.has_node(current_symbol_name):
logger.info(f"Node {current_symbol_name} already processed and ready")
return True
processing_stack.add(current_symbol_name)
if module_name == "vmlinux":
if hasattr(elem_type_class, "_type_"):
pass
else:
new_dep_node = DependencyNode(name=current_symbol_name)
handler.add_node(new_dep_node)
class_obj = getattr(imported_module, current_symbol_name)
# Inspect the class fields
if hasattr(class_obj, "_fields_"):
for field_elem in class_obj._fields_:
field_name = None
field_type = None
bitfield_size = None
if len(field_elem) == 2:
field_name, field_type = field_elem
elif len(field_elem) == 3:
field_name, field_type, bitfield_size = field_elem
field_table[field_name] = [field_type, bitfield_size]
elif hasattr(class_obj, "__annotations__"):
for field_elem in class_obj.__annotations__.items():
field_name = None
field_type = None
bitfield_size = None
if len(field_elem) == 2:
field_name, field_type = field_elem
elif len(field_elem) == 3:
field_name, field_type, bitfield_size = field_elem
field_table[field_name] = [field_type, bitfield_size]
else:
raise TypeError("Could not get required class and definition")
logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}")
for elem in field_table.items():
elem_name, elem_temp_list = elem
[elem_type, elem_bitfield_size] = elem_temp_list
local_module_name = getattr(elem_type, "__module__", None)
if local_module_name == ctypes.__name__:
new_dep_node.add_field(elem_name, elem_type, ready=False)
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
new_dep_node.set_field_ready(elem_name, is_ready=True)
logger.info(f"Field {elem_name} is direct ctypes type: {elem_type}")
elif local_module_name == "vmlinux":
new_dep_node.add_field(elem_name, elem_type, ready=False)
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
logger.debug(
f"Processing vmlinux field: {elem_name}, type: {elem_type}"
)
if hasattr(elem_type, "_type_"):
is_complex_type = True
containing_type = elem_type._type_
if hasattr(elem_type, "_length_") and is_complex_type:
type_length = elem_type._length_
if containing_type.__module__ == "vmlinux":
pass
elif containing_type.__module__ == ctypes.__name__:
if isinstance(elem_type, type):
if issubclass(elem_type, ctypes.Array):
ctype_complex_type = ctypes.Array
elif issubclass(elem_type, ctypes._Pointer):
ctype_complex_type = ctypes._Pointer
else:
raise TypeError("Unsupported ctypes subclass")
else:
raise ImportError(
f"Unsupported module of {containing_type}"
)
logger.info(
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
)
new_dep_node.set_field_containing_type(
elem_name, containing_type
)
new_dep_node.set_field_type_size(elem_name, type_length)
new_dep_node.set_field_ctype_complex_type(
elem_name, ctype_complex_type
)
new_dep_node.set_field_type(elem_name, elem_type)
if containing_type.__module__ == "vmlinux":
if process_vmlinux_post_ast(
containing_type, llvm_handler, handler, processing_stack
):
new_dep_node.set_field_ready(elem_name, True)
elif containing_type.__module__ == ctypes.__name__:
logger.info(f"Processing ctype internal{containing_type}")
else:
raise TypeError(
"Module not supported in recursive resolution"
)
continue
if process_vmlinux_post_ast(
elem_type, llvm_handler, handler, processing_stack
):
new_dep_node.set_field_ready(elem_name, True)
else:
raise ValueError(
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
)
else:
raise ImportError("UNSUPPORTED Module")
print(current_symbol_name, "DONE")
print(f"handler readiness {handler.is_ready}")

View File

@ -1,149 +0,0 @@
from typing import Optional, Dict, List, Iterator
from .dependency_node import DependencyNode
class DependencyHandler:
"""
Manages a collection of DependencyNode objects with no duplicates.
Ensures that no two nodes with the same name can be added and provides
methods to check readiness and retrieve specific nodes.
Example usage:
# Create a handler
handler = DependencyHandler()
# Create some dependency nodes
node1 = DependencyNode(name="node1")
node1.add_field("field1", str)
node1.set_field_value("field1", "value1")
node2 = DependencyNode(name="node2")
node2.add_field("field1", int)
# Add nodes to the handler
handler.add_node(node1)
handler.add_node(node2)
# Check if a specific node exists
print(handler.has_node("node1")) # True
# Get a reference to a node and modify it
node = handler.get_node("node2")
node.set_field_value("field1", 42)
# Check if all nodes are ready
print(handler.is_ready) # False (node2 is ready, but node1 isn't)
"""
def __init__(self):
# Using a dictionary with node names as keys ensures name uniqueness
# and provides efficient lookups
self._nodes: Dict[str, DependencyNode] = {}
def add_node(self, node: DependencyNode) -> bool:
"""
Add a dependency node to the handler.
Args:
node: The DependencyNode to add
Returns:
bool: True if the node was added, False if a node with the same name already exists
Raises:
TypeError: If the provided object is not a DependencyNode
"""
if not isinstance(node, DependencyNode):
raise TypeError(f"Expected DependencyNode, got {type(node).__name__}")
# Check if a node with this name already exists
if node.name in self._nodes:
return False
self._nodes[node.name] = node
return True
@property
def is_ready(self) -> bool:
"""
Check if all nodes are ready.
Returns:
bool: True if all nodes are ready (or if there are no nodes), False otherwise
"""
if not self._nodes:
return True
return all(node.is_ready for node in self._nodes.values())
def has_node(self, name: str) -> bool:
"""
Check if a node with the given name exists.
Args:
name: The name to check
Returns:
bool: True if a node with the given name exists, False otherwise
"""
return name in self._nodes
def get_node(self, name: str) -> Optional[DependencyNode]:
"""
Get a node by name for manipulation.
Args:
name: The name of the node to retrieve
Returns:
Optional[DependencyNode]: The node with the given name, or None if not found
"""
return self._nodes.get(name)
def remove_node(self, node_or_name) -> bool:
"""
Remove a node by name or reference.
Args:
node_or_name: The node to remove or its name
Returns:
bool: True if the node was removed, False if not found
"""
if isinstance(node_or_name, DependencyNode):
name = node_or_name.name
else:
name = node_or_name
if name in self._nodes:
del self._nodes[name]
return True
return False
def get_all_nodes(self) -> List[DependencyNode]:
"""
Get all nodes stored in the handler.
Returns:
List[DependencyNode]: List of all nodes
"""
return list(self._nodes.values())
def __iter__(self) -> Iterator[DependencyNode]:
"""
Iterate over all nodes.
Returns:
Iterator[DependencyNode]: Iterator over all nodes
"""
return iter(self._nodes.values())
def __len__(self) -> int:
"""
Get the number of nodes in the handler.
Returns:
int: The number of nodes
"""
return len(self._nodes)

View File

@ -1,233 +0,0 @@
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
# TODO: FIX THE FUCKING TYPE NAME CONVENTION.
@dataclass
class Field:
"""Represents a field in a dependency node with its type and readiness state."""
name: str
type: type
ctype_complex_type: Optional[Any]
containing_type: Optional[Any]
type_size: Optional[int]
bitfield_size: Optional[int]
value: Any = None
ready: bool = False
def set_ready(self, is_ready: bool = True) -> None:
"""Set the readiness state of this field."""
self.ready = is_ready
def set_value(self, value: Any, mark_ready: bool = True) -> None:
"""Set the value of this field and optionally mark it as ready."""
self.value = value
if mark_ready:
self.ready = True
def set_type(self, given_type, mark_ready: bool = True) -> None:
"""Set value of the type field and mark as ready"""
self.type = given_type
if mark_ready:
self.ready = True
def set_containing_type(
self, containing_type: Optional[Any], mark_ready: bool = True
) -> None:
"""Set the containing_type of this field and optionally mark it as ready."""
self.containing_type = containing_type
if mark_ready:
self.ready = True
def set_type_size(self, type_size: Any, mark_ready: bool = True) -> None:
"""Set the type_size of this field and optionally mark it as ready."""
self.type_size = type_size
if mark_ready:
self.ready = True
def set_ctype_complex_type(
self, ctype_complex_type: Any, mark_ready: bool = True
) -> None:
"""Set the ctype_complex_type of this field and optionally mark it as ready."""
self.ctype_complex_type = ctype_complex_type
if mark_ready:
self.ready = True
def set_bitfield_size(self, bitfield_size: Any, mark_ready: bool = True) -> None:
"""Set the bitfield_size of this field and optionally mark it as ready."""
self.bitfield_size = bitfield_size
if mark_ready:
self.ready = True
@dataclass
class DependencyNode:
"""
A node with typed fields and readiness tracking.
Example usage:
# Create a dependency node for a Person
somestruct = DependencyNode(name="struct_1")
# Add fields with their types
somestruct.add_field("field_1", str)
somestruct.add_field("field_2", int)
somestruct.add_field("field_3", str)
# Check if the node is ready (should be False initially)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set some field values
somestruct.set_field_value("field_1", "someproperty")
somestruct.set_field_value("field_2", 30)
# Check if the node is ready (still False because email is not ready)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set the last field and make the node ready
somestruct.set_field_value("field_3", "anotherproperty")
# Now the node should be ready
print(f"Is node ready? {somestruct.is_ready}") # True
# You can also mark a field as not ready
somestruct.set_field_ready("field_3", False)
# Now the node is not ready again
print(f"Is node ready? {somestruct.is_ready}") # False
# Get all field values
print(somestruct.get_field_values()) # {'field_1': 'someproperty', 'field_2': 30, 'field_3': 'anotherproperty'}
# Get only ready fields
ready_fields = somestruct.get_ready_fields()
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
"""
name: str
fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False)
def add_field(
self,
name: str,
field_type: type,
initial_value: Any = None,
containing_type: Optional[Any] = None,
type_size: Optional[int] = None,
ctype_complex_type: Optional[int] = None,
bitfield_size: Optional[int] = None,
ready: bool = False,
) -> None:
"""Add a field to the node with an optional initial value and readiness state."""
self.fields[name] = Field(
name=name,
type=field_type,
value=initial_value,
ready=ready,
containing_type=containing_type,
type_size=type_size,
ctype_complex_type=ctype_complex_type,
bitfield_size=bitfield_size,
)
# Invalidate readiness cache
self._ready_cache = None
def get_field(self, name: str) -> Field:
"""Get a field by name."""
return self.fields[name]
def set_field_value(self, name: str, value: Any, mark_ready: bool = True) -> None:
"""Set a field's value and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_value(value, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_type(self, name: str, type: Any, mark_ready: bool = True) -> None:
"""Set a field's type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_type(type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_containing_type(
self, name: str, containing_type: Any, mark_ready: bool = True
) -> None:
"""Set a field's containing_type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_containing_type(containing_type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_type_size(
self, name: str, type_size: Any, mark_ready: bool = True
) -> None:
"""Set a field's type_size and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_type_size(type_size, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_ctype_complex_type(
self, name: str, ctype_complex_type: Any, mark_ready: bool = True
) -> None:
"""Set a field's ctype_complex_type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ctype_complex_type(ctype_complex_type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_bitfield_size(
self, name: str, bitfield_size: Any, mark_ready: bool = True
) -> None:
"""Set a field's bitfield_size and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_bitfield_size(bitfield_size, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_ready(self, name: str, is_ready: bool = True) -> None:
"""Mark a field as ready or not ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ready(is_ready)
# Invalidate readiness cache
self._ready_cache = None
@property
def is_ready(self) -> bool:
"""Check if the node is ready (all fields are ready)."""
# Use cached value if available
if self._ready_cache is not None:
return self._ready_cache
# Calculate readiness only when needed
if not self.fields:
self._ready_cache = False
return False
self._ready_cache = all(elem.ready for elem in self.fields.values())
return self._ready_cache
def get_field_values(self) -> Dict[str, Any]:
"""Get a dictionary of field names to their values."""
return {name: elem.value for name, elem in self.fields.items()}
def get_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as ready."""
return {name: elem for name, elem in self.fields.items() if elem.ready}

View File

@ -1,135 +0,0 @@
import ast
import logging
from typing import List, Tuple, Dict
import importlib
import inspect
from .dependency_handler import DependencyHandler
from .ir_generation import IRGenerator
from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__)
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
"""
Parse AST and detect import statements from vmlinux.
Returns a list of tuples (module_name, imported_item) for vmlinux imports.
Raises SyntaxError for invalid import patterns.
Args:
tree: The AST to parse
Returns:
List of tuples containing (module_name, imported_item) for each vmlinux import
Raises:
SyntaxError: If multiple imports from vmlinux are attempted or import * is used
"""
vmlinux_imports = []
for node in ast.walk(tree):
# Handle "from vmlinux import ..." statements
if isinstance(node, ast.ImportFrom):
if node.module == "vmlinux":
# Check for wildcard import: from vmlinux import *
if any(alias.name == "*" for alias in node.names):
raise SyntaxError(
"Wildcard imports from vmlinux are not supported. "
"Please import specific types explicitly."
)
# Check for multiple imports: from vmlinux import A, B, C
if len(node.names) > 1:
imported_names = [alias.name for alias in node.names]
raise SyntaxError(
f"Multiple imports from vmlinux are not supported. "
f"Found: {', '.join(imported_names)}. "
f"Please use separate import statements for each type."
)
# Check if no specific import is specified (should not happen with valid Python)
if len(node.names) == 0:
raise SyntaxError(
"Import from vmlinux must specify at least one type."
)
# Valid single import
for alias in node.names:
import_name = alias.name
# Use alias if provided, otherwise use the original name (commented)
# as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node))
logger.info(f"Found vmlinux import: {import_name}")
# Handle "import vmlinux" statements (not typical but should be rejected)
elif isinstance(node, ast.Import):
for alias in node.names:
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
raise SyntaxError(
"Direct import of vmlinux module is not supported. "
"Use 'from vmlinux import <type>' instead."
)
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
return vmlinux_imports
def vmlinux_proc(tree: ast.AST, module):
import_statements = detect_import_statement(tree)
# initialise dependency handler
handler = DependencyHandler()
# initialise assignment dictionary of name to type
assignments: Dict[str, type] = {}
if not import_statements:
logger.info("No vmlinux imports found")
return
# Import vmlinux module directly
try:
vmlinux_mod = importlib.import_module("vmlinux")
except ImportError:
logger.warning("Could not import vmlinux module")
return
source_file = inspect.getsourcefile(vmlinux_mod)
if source_file is None:
logger.warning("Cannot find source for vmlinux module")
return
with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file)
for import_mod, import_node in import_statements:
for alias in import_node.names:
imported_name = alias.name
found = False
for mod_node in mod_ast.body:
if (
isinstance(mod_node, ast.ClassDef)
and mod_node.name == imported_name
):
process_vmlinux_class(mod_node, module, handler)
found = True
break
if isinstance(mod_node, ast.Assign):
for target in mod_node.targets:
if isinstance(target, ast.Name) and target.id == imported_name:
process_vmlinux_assign(mod_node, module, assignments)
found = True
break
if found:
break
if not found:
logger.info(
f"{imported_name} not found as ClassDef or Assign in vmlinux"
)
IRGenerator(module, handler)
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
raise NotImplementedError("Assignment handling has not been implemented yet")

View File

@ -1,8 +0,0 @@
# here, we will iterate through the dependencies and generate IR once dependencies are resolved fully
from .dependency_handler import DependencyHandler
class IRGenerator:
def __init__(self, module, handler):
self.module = module
self.handler: DependencyHandler = handler

View File

@ -1,10 +1,11 @@
#include "vmlinux.h" #include <linux/bpf.h>
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h> #define u64 unsigned long long
#define u32 unsigned int
SEC("xdp") SEC("xdp")
int hello(struct xdp_md *ctx) { int hello(struct xdp_md *ctx) {
bpf_printk("Hello, World! %ud \n", ctx->data); bpf_printk("Hello, World!\n");
return XDP_PASS; return XDP_PASS;
} }

View File

@ -1,27 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -1,19 +0,0 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char LICENSE[] SEC("license") = "Dual BSD/GPL";
SEC("kprobe/do_unlinkat")
int kprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat created");
return 0;
}
SEC("kretprobe/do_unlinkat")
int kretprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat returned\n");
return 0;
}

View File

@ -22,27 +22,29 @@ struct {
SEC("tracepoint/syscalls/sys_enter_execve") SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx) int trace_execve(void *ctx)
{ {
struct event *e; // struct event *e;
__u64 pid_tgid; // __u64 pid_tgid;
__u64 uid_gid; // __u64 uid_gid;
__u32 *e;
// Reserve space in the ringbuffer // Reserve space in the ringbuffer
e = bpf_ringbuf_reserve(&events, sizeof(*e), 0); e = bpf_ringbuf_reserve(&events, sizeof(*e), 0);
if (!e) if (!e)
return 0; return 0;
//
// // Fill the struct with data
// pid_tgid = bpf_get_current_pid_tgid();
// e->pid = pid_tgid >> 32;
//
// uid_gid = bpf_get_current_uid_gid();
// e->uid = uid_gid & 0xFFFFFFFF;
//
// e->timestamp = bpf_ktime_get_ns();
// Fill the struct with data // bpf_get_current_comm(&e->comm, sizeof(e->comm));
pid_tgid = bpf_get_current_pid_tgid(); //
e->pid = pid_tgid >> 32; // // Submit the event to ringbuffer
__u32 temp = 32;
uid_gid = bpf_get_current_uid_gid(); e = &temp;
e->uid = uid_gid & 0xFFFFFFFF;
e->timestamp = bpf_ktime_get_ns();
bpf_get_current_comm(&e->comm, sizeof(e->comm));
// Submit the event to ringbuffer
bpf_ringbuf_submit(e, 0); bpf_ringbuf_submit(e, 0);
return 0; return 0;

View File

@ -3,9 +3,9 @@ from ctypes import c_void_p, c_int64
@bpf @bpf
@section("tracepoint/syscalls/sys_enter_sync") @section("sometag1")
def sometag(ctx: c_void_p) -> c_int64: def sometag(ctx: c_void_p) -> c_int64:
a = 1 + 2 + 1 + 12 + 13 a = 1 + 2 + 1
print(f"{a}") print(f"{a}")
return c_int64(0) return c_int64(0)

View File

@ -1,5 +1,3 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal from pythonbpf import compile, bpf, section, bpfglobal
from ctypes import c_void_p, c_int64 from ctypes import c_void_p, c_int64
@ -7,7 +5,8 @@ from ctypes import c_void_p, c_int64
@bpf @bpf
@section("sometag1") @section("sometag1")
def sometag(ctx: c_void_p) -> c_int64: def sometag(ctx: c_void_p) -> c_int64:
a = 1 - 1 b = 1 + 2
a = 1 + b
return c_int64(a) return c_int64(a)
@ -17,4 +16,4 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile(loglevel=logging.INFO) compile()

View File

@ -0,0 +1,33 @@
from pythonbpf import bpf, map, bpfglobal, section, compile, compile_to_ir, BPF
from pythonbpf.maps import RingBuf
from ctypes import c_int32, c_void_p
# Define a map
@bpf
@map
def mymap() -> RingBuf:
return RingBuf(max_entries=(1024))
@bpf
@section("tracepoint/syscalls/sys_enter_clone")
def random_section(ctx: c_void_p) -> c_int32:
e: c_int32 = mymap().reserve(64)
if e == 0: # here is the issue i think
return c_int32(0)
mymap().submit(e)
return c_int32(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("ringbuf.py", "ringbuf.ll")
compile()
b = BPF()
b.load_and_attach()
while True:
print("running")

View File

@ -1,34 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
# NOTE: Decided against fixing this
# as a workaround is assigning the result of lookup to a variable
# and then using that variable in the if statement.
# Might fix in future.
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
if last.lookup(0) > 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!") if True else print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,34 +0,0 @@
from pythonbpf import bpf, struct, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
# NOTE: Decided against fixing this
# as one workaround is to just check any field of the struct
# in the if statement. Ugly but works.
# Might fix in future.
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
if dat:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -4,18 +4,6 @@ from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64 from ctypes import c_void_p, c_int64
# NOTE: I have decided to not fix this example for now.
# The issue is in line 31, where we are passing an expression.
# The update helper expects a pointer type. But the problem is
# that we must allocate the space for said pointer in the first
# basic block. As that usage is in a different basic block, we
# are unable to cast the expression to a pointer type. (as we never
# allocated space for it).
# Shall we change our space allocation logic? That allows users to
# spam the same helper with the same args, and still run out of
# stack space. So we consider this usage invalid for now.
# Might fix it later.
@bpf @bpf
@map @map
@ -26,12 +14,12 @@ def count() -> HashMap:
@bpf @bpf
@section("xdp") @section("xdp")
def hello_world(ctx: c_void_p) -> c_int64: def hello_world(ctx: c_void_p) -> c_int64:
prev = count.lookup(0) prev = count().lookup(0)
if prev: if prev:
count.update(0, prev + 1) count().update(0, prev + 1)
return XDP_PASS return XDP_PASS
else: else:
count.update(0, 1) count().update(0, 1)
return XDP_PASS return XDP_PASS

View File

@ -1,109 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
# TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
# TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -1,41 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap
from ctypes import c_void_p, c_int64
# NOTE: This example exposes the problems with our typing system.
# We can't do steps on line 25 and 27.
# prev is of type i64**. For prev + 1, we deref it down to i64
# To assign it back to prev, we need to go back to i64**.
# We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable.
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: c_void_p) -> c_int64:
prev = count.lookup(0)
if prev:
prev = prev + 1
count.update(0, prev)
return XDP_PASS
else:
count.update(0, 1)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,23 +0,0 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -1,49 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile_to_ir
from pythonbpf.maps import HashMap
from pythonbpf.helper import XDP_PASS
from vmlinux import struct_xdp_md
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
from vmlinux import struct_xdp_buff # noqa: F401
# from vmlinux import struct_xdp_md
from ctypes import c_int64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python examples/xdp_pass.py
# 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o
# 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0
# 5. send traffic through the device and observe effects
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: struct_xdp_md) -> c_int64:
key = 0
one = 1
prev = count().lookup(key)
if prev:
prevval = prev + 1
print(f"count: {prevval}")
count().update(key, prevval)
return XDP_PASS
else:
count().update(key, one)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")

View File

@ -1,20 +0,0 @@
from pythonbpf import compile, bpf, section, bpfglobal
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_sync")
def sometag(ctx: c_void_p) -> c_int64:
b = 1 + 2
a = 1 + b
print(f"{a}")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,32 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
last.update(1, 2)
x = last.lookup(0)
y = last.lookup(1)
if x and y:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
if True:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
if (0 + 1) * 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,21 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
if 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,30 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
# last.update(0, 1)
tsp = last.lookup(0)
if tsp:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,30 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
tsp = last.lookup(0)
if tsp > 0:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,30 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
# last.update(0, 1)
tsp = last.lookup(0)
if not tsp:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,32 +0,0 @@
from pythonbpf import bpf, map, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
from pythonbpf.maps import HashMap
@bpf
@map
def last() -> HashMap:
return HashMap(key=c_uint64, value=c_uint64, max_entries=3)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
last.update(0, 1)
# last.update(1, 2)
x = last.lookup(0)
y = last.lookup(1)
if x or y:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,29 +0,0 @@
from pythonbpf import bpf, struct, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_uint64
@bpf
@struct
class data_t:
pid: c_uint64
ts: c_uint64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
dat = data_t()
if dat.ts:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,23 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 0
y = c_int32(0)
if x == y:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,22 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 0
if x:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,22 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 0
if x * 1:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,22 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
x = 2
if x > 3:
print("Hello, World!")
else:
print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,7 +1,7 @@
from pythonbpf import bpf, map, struct, section, bpfglobal, compile, compile_to_ir, BPF from pythonbpf import bpf, map, struct, section, bpfglobal, compile, compile_to_ir, BPF
from pythonbpf.helper import ktime, pid from pythonbpf.helper import ktime, pid
from pythonbpf.maps import PerfEventArray from pythonbpf.maps import PerfEventArray
import logging
from ctypes import c_void_p, c_int32, c_uint64 from ctypes import c_void_p, c_int32, c_uint64
@ -42,8 +42,8 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile()
compile_to_ir("perf_buffer_map.py", "perf_buffer_map.ll") compile_to_ir("perf_buffer_map.py", "perf_buffer_map.ll")
compile(loglevel=logging.INFO)
b = BPF() b = BPF()
b.load_and_attach() b.load_and_attach()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 1 + 1 - 2
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
a = 2
return a - 2
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return True
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return 1
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,20 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
a = 1 # int64
x = 1 # int64
return c_int32(a - x) # typecast to int32
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,18 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
return c_int32(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int32
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int32:
print("Hello, World!")
a = 1 # int64
return c_int32(a) # typecast to int32
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
a = 1
return a
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,19 +0,0 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
from pythonbpf.helper import XDP_PASS
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -1,5 +1,5 @@
from pythonbpf import bpf, BPF, map, bpfglobal, section, compile, compile_to_ir from pythonbpf import bpf, map, bpfglobal, section, compile, compile_to_ir, BPF
from pythonbpf.maps import RingBuf, HashMap from pythonbpf.maps import RingBuf
from ctypes import c_int32, c_void_p from ctypes import c_int32, c_void_p
@ -9,17 +9,13 @@ from ctypes import c_int32, c_void_p
def mymap() -> RingBuf: def mymap() -> RingBuf:
return RingBuf(max_entries=(1024)) return RingBuf(max_entries=(1024))
@bpf
@map
def mymap2() -> HashMap:
return HashMap(key=c_int32, value=c_int32, max_entries=1024)
@bpf @bpf
@section("tracepoint/syscalls/sys_enter_clone") @section("tracepoint/syscalls/sys_enter_clone")
def random_section(ctx: c_void_p) -> c_int32: def random_section(ctx: c_void_p) -> c_int32:
print("Hello") print("Hello")
e = mymap().reserve(6)
if e:
mymap().submit(e)
return c_int32(0) return c_int32(0)
@ -33,3 +29,5 @@ compile_to_ir("ringbuf.py", "ringbuf.ll")
compile() compile()
b = BPF() b = BPF()
b.load_and_attach() b.load_and_attach()
while True:
print("running")

View File

@ -1,379 +0,0 @@
#!/usr/bin/env python3
"""
BTF to Python ctypes Converter
Converts Linux kernel BTF (BPF Type Format) to Python ctypes definitions.
This tool automates the process of:
1. Dumping BTF from vmlinux
2. Preprocessing enum definitions
3. Processing struct kioctx to extract anonymous nested structs
4. Running C preprocessor
5. Converting to Python ctypes using clang2py
6. Post-processing the output
Requirements:
- bpftool
- clang
- ctypeslib2 (pip install ctypeslib2)
"""
import argparse
import os
import re
import subprocess
import sys
import tempfile
class BTFConverter:
def __init__(
self,
btf_source="/sys/kernel/btf/vmlinux",
output_file="vmlinux.py",
keep_intermediate=False,
verbose=False,
):
self.btf_source = btf_source
self.output_file = output_file
self.keep_intermediate = keep_intermediate
self.verbose = verbose
self.temp_dir = tempfile.mkdtemp() if not keep_intermediate else "."
def log(self, message):
"""Print message if verbose mode is enabled."""
if self.verbose:
print(f"[*] {message}")
def run_command(self, cmd, description):
"""Run a shell command and handle errors."""
self.log(f"{description}...")
try:
result = subprocess.run(
cmd, shell=True, check=True, capture_output=True, text=True
)
if self.verbose and result.stdout:
print(result.stdout)
return result
except subprocess.CalledProcessError as e:
print(f"Error during {description}:", file=sys.stderr)
print(e.stderr, file=sys.stderr)
sys.exit(1)
def step1_dump_btf(self):
"""Step 1: Dump BTF from vmlinux."""
vmlinux_h = os.path.join(self.temp_dir, "vmlinux.h")
cmd = f"bpftool btf dump file {self.btf_source} format c > {vmlinux_h}"
self.run_command(cmd, "Dumping BTF from vmlinux")
return vmlinux_h
def step2_preprocess_enums(self, input_file):
"""Step 1.5: Preprocess enum definitions."""
self.log("Preprocessing enum definitions...")
with open(input_file, "r") as f:
original_code = f.read()
# Extract anonymous enums
enums = re.findall(
r"(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;",
original_code,
)
enum_defs = [enum_block + ";" for enum_block, _ in enums]
# Replace anonymous enums with int declarations
processed_code = re.sub(
r"(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;",
r"int \1;",
original_code,
)
# Prepend enum definitions
if enum_defs:
enum_text = "\n".join(enum_defs) + "\n\n"
processed_code = enum_text + processed_code
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
with open(output_file, "w") as f:
f.write(processed_code)
return output_file
def step2_5_process_kioctx(self, input_file):
# TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
"""Step 2.5: Process struct kioctx to extract nested anonymous structs."""
self.log("Processing struct kioctx nested structs...")
with open(input_file, "r") as f:
content = f.read()
# Pattern to match struct kioctx with its full body (handles multiple nesting levels)
kioctx_pattern = (
r"struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;"
)
def process_kioctx_replacement(match):
full_struct = match.group(0)
self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
# Extract the struct body (everything between outermost { and })
body_match = re.search(
r"struct\s+kioctx\s*\{(.*)\}\s*;", full_struct, re.DOTALL
)
if not body_match:
return full_struct
body = body_match.group(1)
# Find all anonymous structs within the body
# Pattern: struct { ... } followed by ; (not a member name)
# anon_struct_pattern = r"struct\s*\{[^}]*\}"
anon_structs = []
anon_counter = 4 # Start from 4, counting down to 1
def replace_anonymous_struct(m):
nonlocal anon_counter
anon_struct_content = m.group(0)
# Extract the body of the anonymous struct
anon_body_match = re.search(
r"struct\s*\{(.*)\}", anon_struct_content, re.DOTALL
)
if not anon_body_match:
return anon_struct_content
anon_body = anon_body_match.group(1)
# Create the named struct definition
anon_name = f"__anon{anon_counter}"
member_name = f"a{anon_counter}"
# Store the struct definition
anon_structs.append(f"struct {anon_name} {{{anon_body}}};")
anon_counter -= 1
# Return the member declaration
return f"struct {anon_name} {member_name}"
# Process the body, finding and replacing anonymous structs
# We need to be careful to only match anonymous structs followed by ;
processed_body = body
# Find all occurrences and process them
pattern_with_semicolon = r"struct\s*\{([^}]*)\}\s*;"
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
if not matches:
self.log("No anonymous structs found in kioctx")
return full_struct
self.log(f"Found {len(matches)} anonymous struct(s)")
# Process in reverse order to maintain string positions
for match in reversed(matches):
anon_struct_content = match.group(1)
start_pos = match.start()
end_pos = match.end()
# Create the named struct definition
anon_name = f"__anon{anon_counter}"
member_name = f"a{anon_counter}"
# Store the struct definition
anon_structs.insert(0, f"struct {anon_name} {{{anon_struct_content}}};")
# Replace in the body
replacement = f"struct {anon_name} {member_name};"
processed_body = (
processed_body[:start_pos] + replacement + processed_body[end_pos:]
)
anon_counter -= 1
# Rebuild the complete definition
if anon_structs:
# Prepend the anonymous struct definitions
anon_definitions = "\n".join(anon_structs) + "\n\n"
new_struct = f"struct kioctx {{{processed_body}}};"
return anon_definitions + new_struct
else:
return full_struct
# Apply the transformation
processed_content = re.sub(
kioctx_pattern, process_kioctx_replacement, content, flags=re.DOTALL
)
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
with open(output_file, "w") as f:
f.write(processed_content)
self.log(f"Saved kioctx-processed output to {output_file}")
return output_file
def step3_run_preprocessor(self, input_file):
"""Step 2: Run C preprocessor."""
output_file = os.path.join(self.temp_dir, "vmlinux.i")
cmd = f"clang -E {input_file} > {output_file}"
self.run_command(cmd, "Running C preprocessor")
return output_file
def step4_convert_to_ctypes(self, input_file):
"""Step 3: Convert to Python ctypes using clang2py."""
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
cmd = (
f"clang2py {input_file} -o {output_file} "
f'--clang-args="-fno-ms-extensions -I/usr/include -I/usr/include/linux"'
)
self.run_command(cmd, "Converting to Python ctypes")
return output_file
def step5_postprocess(self, input_file):
"""Step 4: Post-process the generated Python file."""
self.log("Post-processing Python ctypes definitions...")
with open(input_file, "r") as f:
data = f.read()
# Remove lines like ('_45', ctypes.c_int64, 0)
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
data = re.sub(
r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data
)
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
data = re.sub(r"(ctypes\.c_char)(\s*,\s*\d+\))", r"ctypes.c_uint8\2", data)
# below to replace those c_bool with bitfield greater than 8
def repl(m):
name, bits = m.groups()
return (
f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
)
data = re.sub(r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)", repl, data)
# Remove ctypes. prefix from invalid entries
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
for name in invalid_ctypes:
data = re.sub(rf"\bctypes\.{name}\b", name, data)
with open(self.output_file, "w") as f:
f.write(data)
self.log(f"Saved final output to {self.output_file}")
def cleanup(self):
"""Remove temporary files if not keeping them."""
if not self.keep_intermediate and self.temp_dir != ".":
self.log(f"Cleaning up temporary directory: {self.temp_dir}")
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def convert(self):
"""Run the complete conversion pipeline."""
try:
self.log("Starting BTF to Python ctypes conversion...")
# Check dependencies
self.check_dependencies()
# Run conversion pipeline
vmlinux_h = self.step1_dump_btf()
vmlinux_processed_h = self.step2_preprocess_enums(vmlinux_h)
vmlinux_kioctx_h = self.step2_5_process_kioctx(vmlinux_processed_h)
vmlinux_i = self.step3_run_preprocessor(vmlinux_kioctx_h)
vmlinux_raw_py = self.step4_convert_to_ctypes(vmlinux_i)
self.step5_postprocess(vmlinux_raw_py)
print(f"\n✓ Conversion complete! Output saved to: {self.output_file}")
except Exception as e:
print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
finally:
self.cleanup()
def check_dependencies(self):
"""Check if required tools are available."""
self.log("Checking dependencies...")
dependencies = {
"bpftool": "bpftool --version",
"clang": "clang --version",
"clang2py": "clang2py --version",
}
missing = []
for tool, cmd in dependencies.items():
try:
subprocess.run(cmd, shell=True, check=True, capture_output=True)
except subprocess.CalledProcessError:
missing.append(tool)
if missing:
print("Error: Missing required dependencies:", file=sys.stderr)
for tool in missing:
print(f" - {tool}", file=sys.stderr)
if "clang2py" in missing:
print("\nInstall ctypeslib2: pip install ctypeslib2", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description="Convert Linux kernel BTF to Python ctypes definitions",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s
%(prog)s -o kernel_types.py
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
""",
)
parser.add_argument(
"--btf-source",
default="/sys/kernel/btf/vmlinux",
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)",
)
parser.add_argument(
"-o",
"--output",
default="vmlinux.py",
help="Output Python file (default: vmlinux.py)",
)
parser.add_argument(
"-k",
"--keep-intermediate",
action="store_true",
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose output"
)
args = parser.parse_args()
converter = BTFConverter(
btf_source=args.btf_source,
output_file=args.output,
keep_intermediate=args.keep_intermediate,
verbose=args.verbose,
)
converter.convert()
if __name__ == "__main__":
main()