mirror of
https://github.com/varun-r-mallya/Python-BPF.git
synced 2026-02-09 22:50:56 +00:00
Compare commits
115 Commits
refactor_c
...
80396c78a6
| Author | SHA1 | Date | |
|---|---|---|---|
| 80396c78a6 | |||
| 8774277000 | |||
| 8743ea17f3 | |||
| f8844104a6 | |||
| 3343bedd11 | |||
| 75d3ad4fe2 | |||
| abbf17748d | |||
| 7c559840f0 | |||
| 06773c895f | |||
| 1e3d775865 | |||
| 168e26268e | |||
| 2cf7b28793 | |||
| d24d59c2ba | |||
| f190a33e21 | |||
| eb636ef731 | |||
| 2ae3aade60 | |||
| f227fe9310 | |||
| 7940d02bc7 | |||
| 2483ef2840 | |||
| 68e9693f9a | |||
| e4575a6b1e | |||
| 3ec3ab30fe | |||
| 7fb3ecff48 | |||
| ec59dad025 | |||
| 28b7b1620c | |||
| 9f8e240a38 | |||
| e6c05ab494 | |||
| 8aa9cf7119 | |||
| 9683e3799f | |||
| 200d293750 | |||
| ed196caebf | |||
| a049796b81 | |||
| 384fc9dd40 | |||
| 5f2df57e64 | |||
| 130d8a9edc | |||
| 40ae3d825a | |||
| 484624104e | |||
| e7c4bdb150 | |||
| 7210366e7d | |||
| 435bf27176 | |||
| 1ba27ac7cf | |||
| e4ddec3a02 | |||
| bc7b5c97d1 | |||
| fa720f8e6b | |||
| eff0f66d95 | |||
| b43c252224 | |||
| aae7aa981d | |||
| 6f9a7301af | |||
| 48923d03d4 | |||
| 019a83cf11 | |||
| 140d9e6e35 | |||
| a351b0f1b5 | |||
| 3cb73ff0c3 | |||
| 3b08c2bede | |||
| 86378d6cc4 | |||
| 00d1c583af | |||
| cfc246c80d | |||
| f3c80f9e5f | |||
| 0d3a5748dd | |||
| 079431754c | |||
| 46f5eca33d | |||
| 7081e939fb | |||
| 1e29460d6f | |||
| e180a89644 | |||
| 34a267e982 | |||
| c81aad7c67 | |||
| 2e677c2c7b | |||
| 4ea7b22b44 | |||
| b8b937bfca | |||
| 6cc29c4fa1 | |||
| 5451ba646d | |||
| 7720437ca5 | |||
| eb0a7a917d | |||
| 6f65903552 | |||
| 97e74d09be | |||
| 9c7560ed2e | |||
| 2979ceedcf | |||
| 745f59278f | |||
| 49c59b32ca | |||
| ff78140a7d | |||
| 82ff71b753 | |||
| f46e7cd846 | |||
| 9d73eb67c4 | |||
| 21ce041353 | |||
| 6402cf7be5 | |||
| 9a96e1247b | |||
| 989134f4be | |||
| 120aec08da | |||
| e66ae7cc89 | |||
| b95fbd0ed0 | |||
| 32dc8e6636 | |||
| 8e3942d38c | |||
| d84ce0c6fa | |||
| 8d07a4cd05 | |||
| 8485460374 | |||
| 0c977514af | |||
| 1207730ce3 | |||
| 0d9dcd122c | |||
| 8a69e05ee2 | |||
| 976af290af | |||
| a3443ab1d5 | |||
| a27360482b | |||
| c423cc647d | |||
| 8554688230 | |||
| 3e873f378e | |||
| 3abe07c5b2 | |||
| 01bd7604ed | |||
| 7ae84a0d5a | |||
| df3f00261a | |||
| ab610147a5 | |||
| 7720fe9f9f | |||
| 7aeac86bd3 | |||
| ab1c4223d5 | |||
| c3a512d5cf | |||
| 4a60c42cd0 |
4
.github/workflows/format.yml
vendored
4
.github/workflows/format.yml
vendored
@ -12,8 +12,8 @@ jobs:
|
||||
name: Format
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -7,3 +7,5 @@ __pycache__/
|
||||
*.ll
|
||||
*.o
|
||||
.ipynb_checkpoints/
|
||||
vmlinux.py
|
||||
~*
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
#
|
||||
# See https://github.com/pre-commit/pre-commit
|
||||
|
||||
exclude: 'vmlinux.*\.py$'
|
||||
exclude: 'vmlinux.py'
|
||||
|
||||
ci:
|
||||
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
||||
@ -41,7 +41,7 @@ repos:
|
||||
- id: ruff
|
||||
args: ["--fix", "--show-fixes"]
|
||||
- id: ruff-format
|
||||
exclude: ^(docs)|^(tests)|^(examples)
|
||||
# exclude: ^(docs)|^(tests)|^(examples)
|
||||
|
||||
# Checking static types
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
|
||||
@ -83,14 +83,14 @@ def hist() -> HashMap:
|
||||
def hello(ctx: c_void_p) -> c_int64:
|
||||
process_id = pid()
|
||||
one = 1
|
||||
prev = hist().lookup(process_id)
|
||||
prev = hist.lookup(process_id)
|
||||
if prev:
|
||||
previous_value = prev + 1
|
||||
print(f"count: {previous_value} with {process_id}")
|
||||
hist().update(process_id, previous_value)
|
||||
hist.update(process_id, previous_value)
|
||||
return c_int64(0)
|
||||
else:
|
||||
hist().update(process_id, one)
|
||||
hist.update(process_id, one)
|
||||
return c_int64(0)
|
||||
|
||||
|
||||
|
||||
12
TODO.md
12
TODO.md
@ -1,12 +0,0 @@
|
||||
## Short term
|
||||
|
||||
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
|
||||
- Add all maps
|
||||
- XDP support in pylibbpf
|
||||
- ringbuf support
|
||||
- recursive expression resolution
|
||||
|
||||
## Long term
|
||||
|
||||
- Refactor the codebase to be better than a hackathon project
|
||||
- Port to C++ and use actual LLVM?
|
||||
@ -308,6 +308,7 @@
|
||||
"def hist() -> HashMap:\n",
|
||||
" return HashMap(key=c_int32, value=c_uint64, max_entries=4096)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@bpf\n",
|
||||
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
|
||||
"def hello(ctx: c_void_p) -> c_int64:\n",
|
||||
@ -329,6 +330,7 @@
|
||||
"def LICENSE() -> str:\n",
|
||||
" return \"GPL\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"b = BPF()"
|
||||
]
|
||||
},
|
||||
@ -357,7 +359,6 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"b.load_and_attach()\n",
|
||||
"hist = BpfMap(b, hist)\n",
|
||||
"print(\"Recording\")\n",
|
||||
|
||||
29
examples/kprobes.py
Normal file
29
examples/kprobes.py
Normal file
@ -0,0 +1,29 @@
|
||||
from pythonbpf import bpf, section, bpfglobal, BPF
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
|
||||
@bpf
|
||||
@section("kretprobe/do_unlinkat")
|
||||
def hello_world(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!")
|
||||
return c_int64(0)
|
||||
|
||||
|
||||
@bpf
|
||||
@section("kprobe/do_unlinkat")
|
||||
def hello_world2(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!")
|
||||
return c_int64(0)
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
b = BPF()
|
||||
b.load_and_attach()
|
||||
while True:
|
||||
print("running")
|
||||
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of unlink kprobe.
|
||||
@ -27,7 +27,7 @@ def hello(ctx: c_void_p) -> c_int32:
|
||||
dataobj.pid = pid()
|
||||
dataobj.ts = ktime()
|
||||
# dataobj.comm = strobj
|
||||
print(f"clone called at {dataobj.ts} by pid" f"{dataobj.pid}, comm {strobj}")
|
||||
print(f"clone called at {dataobj.ts} by pid{dataobj.pid}, comm {strobj}")
|
||||
events.output(dataobj)
|
||||
return c_int32(0)
|
||||
|
||||
|
||||
203381
examples/vmlinux.py
203381
examples/vmlinux.py
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,8 @@
|
||||
from pythonbpf import bpf, map, section, bpfglobal, compile
|
||||
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
|
||||
from pythonbpf.helper import XDP_PASS
|
||||
from pythonbpf.maps import HashMap
|
||||
from ctypes import c_int64, c_void_p
|
||||
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
# Instructions to how to run this program
|
||||
# 1. Install PythonBPF: pip install pythonbpf
|
||||
@ -41,4 +41,5 @@ def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
|
||||
compile()
|
||||
|
||||
@ -4,7 +4,12 @@ from .license_pass import license_processing
|
||||
from .functions import func_proc
|
||||
from .maps import maps_proc
|
||||
from .structs import structs_proc
|
||||
from .globals_pass import globals_processing
|
||||
from .vmlinux_parser import vmlinux_proc
|
||||
from .globals_pass import (
|
||||
globals_list_creation,
|
||||
globals_processing,
|
||||
populate_global_symbol_table,
|
||||
)
|
||||
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
|
||||
import os
|
||||
import subprocess
|
||||
@ -40,12 +45,16 @@ def processor(source_code, filename, module):
|
||||
for func_node in bpf_chunks:
|
||||
logger.info(f"Found BPF function/struct: {func_node.name}")
|
||||
|
||||
vmlinux_proc(tree, module)
|
||||
populate_global_symbol_table(tree, module)
|
||||
license_processing(tree, module)
|
||||
globals_processing(tree, module)
|
||||
|
||||
structs_sym_tab = structs_proc(tree, module, bpf_chunks)
|
||||
map_sym_tab = maps_proc(tree, module, bpf_chunks)
|
||||
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
|
||||
|
||||
license_processing(tree, module)
|
||||
globals_processing(tree, module)
|
||||
globals_list_creation(tree, module)
|
||||
|
||||
|
||||
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
|
||||
|
||||
@ -1,8 +1,121 @@
|
||||
from llvmlite import ir
|
||||
import ast
|
||||
|
||||
from logging import Logger
|
||||
import logging
|
||||
from .type_deducer import ctypes_to_ir
|
||||
|
||||
def emit_globals(module: ir.Module, names: list[str]):
|
||||
logger: Logger = logging.getLogger(__name__)
|
||||
|
||||
# TODO: this is going to be a huge fuck of a headache in the future.
|
||||
global_sym_tab = []
|
||||
|
||||
|
||||
def populate_global_symbol_table(tree, module: ir.Module):
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
for dec in node.decorator_list:
|
||||
if (
|
||||
isinstance(dec, ast.Call)
|
||||
and isinstance(dec.func, ast.Name)
|
||||
and dec.func.id == "section"
|
||||
and len(dec.args) == 1
|
||||
and isinstance(dec.args[0], ast.Constant)
|
||||
and isinstance(dec.args[0].value, str)
|
||||
):
|
||||
global_sym_tab.append(node)
|
||||
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
|
||||
global_sym_tab.append(node)
|
||||
|
||||
elif isinstance(dec, ast.Name) and dec.id == "map":
|
||||
global_sym_tab.append(node)
|
||||
return False
|
||||
|
||||
|
||||
def emit_global(module: ir.Module, node, name):
|
||||
logger.info(f"global identifier {name} processing")
|
||||
# deduce LLVM type from the annotated return
|
||||
if not isinstance(node.returns, ast.Name):
|
||||
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
|
||||
ty = ctypes_to_ir(node.returns.id)
|
||||
|
||||
# extract the return expression
|
||||
# TODO: turn this return extractor into a generic function I can use everywhere.
|
||||
ret_stmt = node.body[0]
|
||||
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
|
||||
raise ValueError(f"Global '{name}' has no valid return")
|
||||
|
||||
init_val = ret_stmt.value
|
||||
|
||||
# simple constant like "return 0"
|
||||
if isinstance(init_val, ast.Constant):
|
||||
llvm_init = ir.Constant(ty, init_val.value)
|
||||
|
||||
# variable reference like "return SOME_CONST"
|
||||
elif isinstance(init_val, ast.Name):
|
||||
# need symbol resolution here, stub as 0 for now
|
||||
raise ValueError(f"Name reference {init_val.id} not yet supported")
|
||||
|
||||
# constructor call like "return c_int64(0)" or dataclass(...)
|
||||
elif isinstance(init_val, ast.Call):
|
||||
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
|
||||
llvm_init = ir.Constant(ty, init_val.args[0].value)
|
||||
else:
|
||||
logger.info("Defaulting to zero as no constant argument found")
|
||||
llvm_init = ir.Constant(ty, 0)
|
||||
else:
|
||||
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
|
||||
|
||||
gvar = ir.GlobalVariable(module, ty, name=name)
|
||||
gvar.initializer = llvm_init
|
||||
gvar.align = 8
|
||||
gvar.linkage = "dso_local"
|
||||
gvar.global_constant = False
|
||||
return gvar
|
||||
|
||||
|
||||
def globals_processing(tree, module):
|
||||
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
|
||||
globals_sym_tab = []
|
||||
|
||||
for node in tree.body:
|
||||
# Skip non-assignment and non-function nodes
|
||||
if not (isinstance(node, ast.FunctionDef)):
|
||||
continue
|
||||
|
||||
# Get the name based on node type
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
name = node.name
|
||||
else:
|
||||
continue
|
||||
|
||||
# Check for duplicate names
|
||||
if name in globals_sym_tab:
|
||||
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
|
||||
else:
|
||||
globals_sym_tab.append(name)
|
||||
|
||||
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
|
||||
decorators = [
|
||||
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
|
||||
]
|
||||
if "bpf" in decorators and "bpfglobal" in decorators:
|
||||
if (
|
||||
len(node.body) == 1
|
||||
and isinstance(node.body[0], ast.Return)
|
||||
and node.body[0].value is not None
|
||||
and isinstance(
|
||||
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
|
||||
)
|
||||
):
|
||||
emit_global(module, node, name)
|
||||
else:
|
||||
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
|
||||
"""
|
||||
Emit the @llvm.compiler.used global given a list of function/global names.
|
||||
"""
|
||||
@ -24,7 +137,7 @@ def emit_globals(module: ir.Module, names: list[str]):
|
||||
gv.section = "llvm.metadata"
|
||||
|
||||
|
||||
def globals_processing(tree, module: ir.Module):
|
||||
def globals_list_creation(tree, module: ir.Module):
|
||||
collected = ["LICENSE"]
|
||||
|
||||
for node in tree.body:
|
||||
@ -40,10 +153,11 @@ def globals_processing(tree, module: ir.Module):
|
||||
):
|
||||
collected.append(node.name)
|
||||
|
||||
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
|
||||
collected.append(node.name)
|
||||
# NOTE: all globals other than
|
||||
# elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
|
||||
# collected.append(node.name)
|
||||
|
||||
elif isinstance(dec, ast.Name) and dec.id == "map":
|
||||
collected.append(node.name)
|
||||
|
||||
emit_globals(module, collected)
|
||||
emit_llvm_compiler_used(module, collected)
|
||||
|
||||
@ -15,5 +15,8 @@ def deref(ptr):
|
||||
return result if result is not None else 0
|
||||
|
||||
|
||||
XDP_ABORTED = ctypes.c_int64(0)
|
||||
XDP_DROP = ctypes.c_int64(1)
|
||||
XDP_PASS = ctypes.c_int64(2)
|
||||
XDP_TX = ctypes.c_int64(3)
|
||||
XDP_REDIRECT = ctypes.c_int64(4)
|
||||
|
||||
0
pythonbpf/tbaa_gen/__init__.py
Normal file
0
pythonbpf/tbaa_gen/__init__.py
Normal file
3
pythonbpf/vmlinux_parser/__init__.py
Normal file
3
pythonbpf/vmlinux_parser/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .import_detector import vmlinux_proc
|
||||
|
||||
__all__ = ["vmlinux_proc"]
|
||||
120
pythonbpf/vmlinux_parser/class_handler.py
Normal file
120
pythonbpf/vmlinux_parser/class_handler.py
Normal file
@ -0,0 +1,120 @@
|
||||
import logging
|
||||
from functools import lru_cache
|
||||
import importlib
|
||||
from .dependency_handler import DependencyHandler
|
||||
from .dependency_node import DependencyNode
|
||||
import ctypes
|
||||
from typing import Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_module_symbols(module_name: str):
|
||||
imported_module = importlib.import_module(module_name)
|
||||
return [name for name in dir(imported_module)], imported_module
|
||||
|
||||
|
||||
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
|
||||
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
||||
if node.name in symbols_in_module:
|
||||
vmlinux_type = getattr(imported_module, node.name)
|
||||
process_vmlinux_post_ast(vmlinux_type, llvm_module, handler)
|
||||
else:
|
||||
raise ImportError(f"{node.name} not in vmlinux")
|
||||
|
||||
|
||||
def process_vmlinux_post_ast(
|
||||
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None
|
||||
):
|
||||
# Initialize processing stack on first call
|
||||
if processing_stack is None:
|
||||
processing_stack = set()
|
||||
symbols_in_module, imported_module = get_module_symbols("vmlinux")
|
||||
|
||||
current_symbol_name = elem_type_class.__name__
|
||||
field_table = {}
|
||||
is_complex_type = False
|
||||
containing_type: Optional[Any] = None
|
||||
ctype_complex_type: Optional[Any] = None
|
||||
type_length: Optional[int] = None
|
||||
module_name = getattr(elem_type_class, "__module__", None)
|
||||
|
||||
if hasattr(elem_type_class, "_length_") and is_complex_type:
|
||||
type_length = elem_type_class._length_
|
||||
|
||||
if current_symbol_name in processing_stack:
|
||||
logger.debug(
|
||||
f"Circular dependency detected for {current_symbol_name}, skipping"
|
||||
)
|
||||
return True
|
||||
|
||||
# Check if already processed
|
||||
if handler.has_node(current_symbol_name):
|
||||
existing_node = handler.get_node(current_symbol_name)
|
||||
# If the node exists and is ready, we're done
|
||||
if existing_node and existing_node.is_ready:
|
||||
logger.info(f"Node {current_symbol_name} already processed and ready")
|
||||
return True
|
||||
|
||||
processing_stack.add(current_symbol_name)
|
||||
|
||||
if module_name == "vmlinux":
|
||||
if hasattr(elem_type_class, "_type_"):
|
||||
is_complex_type = True
|
||||
containing_type = elem_type_class._type_
|
||||
if containing_type.__module__ == "vmlinux":
|
||||
print("Very weird type ig for containing type", containing_type)
|
||||
elif containing_type.__module__ == ctypes.__name__:
|
||||
if isinstance(elem_type_class, type):
|
||||
if issubclass(elem_type_class, ctypes.Array):
|
||||
ctype_complex_type = ctypes.Array
|
||||
elif issubclass(elem_type_class, ctypes._Pointer):
|
||||
ctype_complex_type = ctypes._Pointer
|
||||
else:
|
||||
raise TypeError("Unsupported ctypes subclass")
|
||||
# handle ctype complex type
|
||||
|
||||
else:
|
||||
raise ImportError(f"Unsupported module of {containing_type}")
|
||||
else:
|
||||
new_dep_node = DependencyNode(name=current_symbol_name)
|
||||
handler.add_node(new_dep_node)
|
||||
class_obj = getattr(imported_module, current_symbol_name)
|
||||
# Inspect the class fields
|
||||
if hasattr(class_obj, "_fields_"):
|
||||
for field_name, field_type in class_obj._fields_:
|
||||
field_table[field_name] = field_type
|
||||
elif hasattr(class_obj, "__annotations__"):
|
||||
for field_name, field_type in class_obj.__annotations__.items():
|
||||
field_table[field_name] = field_type
|
||||
else:
|
||||
raise TypeError("Could not get required class and definition")
|
||||
|
||||
logger.info(f"Extracted fields for {current_symbol_name}: {field_table}")
|
||||
|
||||
for elem_name, elem_type in field_table.items():
|
||||
local_module_name = getattr(elem_type, "__module__", None)
|
||||
if local_module_name == ctypes.__name__:
|
||||
new_dep_node.add_field(elem_name, elem_type, ready=True)
|
||||
logger.info(f"Field {elem_name} is direct ctypes type: {elem_type}")
|
||||
elif local_module_name == "vmlinux":
|
||||
new_dep_node.add_field(elem_name, elem_type, ready=False)
|
||||
logger.debug(
|
||||
f"Processing vmlinux field: {elem_name}, type: {elem_type}"
|
||||
)
|
||||
if process_vmlinux_post_ast(
|
||||
elem_type, llvm_handler, handler, processing_stack
|
||||
):
|
||||
new_dep_node.set_field_ready(elem_name, True)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
|
||||
)
|
||||
print("")
|
||||
|
||||
else:
|
||||
raise ImportError("UNSUPPORTED Module")
|
||||
|
||||
print(current_symbol_name, "DONE")
|
||||
print(f"handler readiness {handler.is_ready}")
|
||||
149
pythonbpf/vmlinux_parser/dependency_handler.py
Normal file
149
pythonbpf/vmlinux_parser/dependency_handler.py
Normal file
@ -0,0 +1,149 @@
|
||||
from typing import Optional, Dict, List, Iterator
|
||||
from .dependency_node import DependencyNode
|
||||
|
||||
|
||||
class DependencyHandler:
|
||||
"""
|
||||
Manages a collection of DependencyNode objects with no duplicates.
|
||||
|
||||
Ensures that no two nodes with the same name can be added and provides
|
||||
methods to check readiness and retrieve specific nodes.
|
||||
|
||||
Example usage:
|
||||
# Create a handler
|
||||
handler = DependencyHandler()
|
||||
|
||||
# Create some dependency nodes
|
||||
node1 = DependencyNode(name="node1")
|
||||
node1.add_field("field1", str)
|
||||
node1.set_field_value("field1", "value1")
|
||||
|
||||
node2 = DependencyNode(name="node2")
|
||||
node2.add_field("field1", int)
|
||||
|
||||
# Add nodes to the handler
|
||||
handler.add_node(node1)
|
||||
handler.add_node(node2)
|
||||
|
||||
# Check if a specific node exists
|
||||
print(handler.has_node("node1")) # True
|
||||
|
||||
# Get a reference to a node and modify it
|
||||
node = handler.get_node("node2")
|
||||
node.set_field_value("field1", 42)
|
||||
|
||||
# Check if all nodes are ready
|
||||
print(handler.is_ready) # False (node2 is ready, but node1 isn't)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# Using a dictionary with node names as keys ensures name uniqueness
|
||||
# and provides efficient lookups
|
||||
self._nodes: Dict[str, DependencyNode] = {}
|
||||
|
||||
def add_node(self, node: DependencyNode) -> bool:
|
||||
"""
|
||||
Add a dependency node to the handler.
|
||||
|
||||
Args:
|
||||
node: The DependencyNode to add
|
||||
|
||||
Returns:
|
||||
bool: True if the node was added, False if a node with the same name already exists
|
||||
|
||||
Raises:
|
||||
TypeError: If the provided object is not a DependencyNode
|
||||
"""
|
||||
if not isinstance(node, DependencyNode):
|
||||
raise TypeError(f"Expected DependencyNode, got {type(node).__name__}")
|
||||
|
||||
# Check if a node with this name already exists
|
||||
if node.name in self._nodes:
|
||||
return False
|
||||
|
||||
self._nodes[node.name] = node
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_ready(self) -> bool:
|
||||
"""
|
||||
Check if all nodes are ready.
|
||||
|
||||
Returns:
|
||||
bool: True if all nodes are ready (or if there are no nodes), False otherwise
|
||||
"""
|
||||
if not self._nodes:
|
||||
return True
|
||||
|
||||
return all(node.is_ready for node in self._nodes.values())
|
||||
|
||||
def has_node(self, name: str) -> bool:
|
||||
"""
|
||||
Check if a node with the given name exists.
|
||||
|
||||
Args:
|
||||
name: The name to check
|
||||
|
||||
Returns:
|
||||
bool: True if a node with the given name exists, False otherwise
|
||||
"""
|
||||
return name in self._nodes
|
||||
|
||||
def get_node(self, name: str) -> Optional[DependencyNode]:
|
||||
"""
|
||||
Get a node by name for manipulation.
|
||||
|
||||
Args:
|
||||
name: The name of the node to retrieve
|
||||
|
||||
Returns:
|
||||
Optional[DependencyNode]: The node with the given name, or None if not found
|
||||
"""
|
||||
return self._nodes.get(name)
|
||||
|
||||
def remove_node(self, node_or_name) -> bool:
|
||||
"""
|
||||
Remove a node by name or reference.
|
||||
|
||||
Args:
|
||||
node_or_name: The node to remove or its name
|
||||
|
||||
Returns:
|
||||
bool: True if the node was removed, False if not found
|
||||
"""
|
||||
if isinstance(node_or_name, DependencyNode):
|
||||
name = node_or_name.name
|
||||
else:
|
||||
name = node_or_name
|
||||
|
||||
if name in self._nodes:
|
||||
del self._nodes[name]
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_all_nodes(self) -> List[DependencyNode]:
|
||||
"""
|
||||
Get all nodes stored in the handler.
|
||||
|
||||
Returns:
|
||||
List[DependencyNode]: List of all nodes
|
||||
"""
|
||||
return list(self._nodes.values())
|
||||
|
||||
def __iter__(self) -> Iterator[DependencyNode]:
|
||||
"""
|
||||
Iterate over all nodes.
|
||||
|
||||
Returns:
|
||||
Iterator[DependencyNode]: Iterator over all nodes
|
||||
"""
|
||||
return iter(self._nodes.values())
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""
|
||||
Get the number of nodes in the handler.
|
||||
|
||||
Returns:
|
||||
int: The number of nodes
|
||||
"""
|
||||
return len(self._nodes)
|
||||
191
pythonbpf/vmlinux_parser/dependency_node.py
Normal file
191
pythonbpf/vmlinux_parser/dependency_node.py
Normal file
@ -0,0 +1,191 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
# TODO: FIX THE FUCKING TYPE NAME CONVENTION.
|
||||
@dataclass
|
||||
class Field:
|
||||
"""Represents a field in a dependency node with its type and readiness state."""
|
||||
|
||||
name: str
|
||||
type: type
|
||||
containing_type: Optional[Any]
|
||||
type_size: Optional[int]
|
||||
value: Any = None
|
||||
ready: bool = False
|
||||
|
||||
def set_ready(self, is_ready: bool = True) -> None:
|
||||
"""Set the readiness state of this field."""
|
||||
self.ready = is_ready
|
||||
|
||||
def set_value(self, value: Any, mark_ready: bool = True) -> None:
|
||||
"""Set the value of this field and optionally mark it as ready."""
|
||||
self.value = value
|
||||
if mark_ready:
|
||||
self.ready = True
|
||||
|
||||
def set_type(self, given_type, mark_ready: bool = True) -> None:
|
||||
"""Set value of the type field and mark as ready"""
|
||||
self.type = given_type
|
||||
if mark_ready:
|
||||
self.ready = True
|
||||
|
||||
def set_containing_type(
|
||||
self, containing_type: Optional[Any], mark_ready: bool = True
|
||||
) -> None:
|
||||
"""Set the containing_type of this field and optionally mark it as ready."""
|
||||
self.containing_type = containing_type
|
||||
if mark_ready:
|
||||
self.ready = True
|
||||
|
||||
def set_type_size(self, type_size: Any, mark_ready: bool = True) -> None:
|
||||
"""Set the type_size of this field and optionally mark it as ready."""
|
||||
self.type_size = type_size
|
||||
if mark_ready:
|
||||
self.ready = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class DependencyNode:
|
||||
"""
|
||||
A node with typed fields and readiness tracking.
|
||||
|
||||
Example usage:
|
||||
# Create a dependency node for a Person
|
||||
somestruct = DependencyNode(name="struct_1")
|
||||
|
||||
# Add fields with their types
|
||||
somestruct.add_field("field_1", str)
|
||||
somestruct.add_field("field_2", int)
|
||||
somestruct.add_field("field_3", str)
|
||||
|
||||
# Check if the node is ready (should be False initially)
|
||||
print(f"Is node ready? {somestruct.is_ready}") # False
|
||||
|
||||
# Set some field values
|
||||
somestruct.set_field_value("field_1", "someproperty")
|
||||
somestruct.set_field_value("field_2", 30)
|
||||
|
||||
# Check if the node is ready (still False because email is not ready)
|
||||
print(f"Is node ready? {somestruct.is_ready}") # False
|
||||
|
||||
# Set the last field and make the node ready
|
||||
somestruct.set_field_value("field_3", "anotherproperty")
|
||||
|
||||
# Now the node should be ready
|
||||
print(f"Is node ready? {somestruct.is_ready}") # True
|
||||
|
||||
# You can also mark a field as not ready
|
||||
somestruct.set_field_ready("field_3", False)
|
||||
|
||||
# Now the node is not ready again
|
||||
print(f"Is node ready? {somestruct.is_ready}") # False
|
||||
|
||||
# Get all field values
|
||||
print(somestruct.get_field_values()) # {'field_1': 'someproperty', 'field_2': 30, 'field_3': 'anotherproperty'}
|
||||
|
||||
# Get only ready fields
|
||||
ready_fields = somestruct.get_ready_fields()
|
||||
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
|
||||
"""
|
||||
|
||||
name: str
|
||||
fields: Dict[str, Field] = field(default_factory=dict)
|
||||
_ready_cache: Optional[bool] = field(default=None, repr=False)
|
||||
|
||||
def add_field(
|
||||
self,
|
||||
name: str,
|
||||
field_type: type,
|
||||
initial_value: Any = None,
|
||||
containing_type: Optional[Any] = None,
|
||||
type_size: Optional[int] = None,
|
||||
ready: bool = False,
|
||||
) -> None:
|
||||
"""Add a field to the node with an optional initial value and readiness state."""
|
||||
self.fields[name] = Field(
|
||||
name=name,
|
||||
type=field_type,
|
||||
value=initial_value,
|
||||
ready=ready,
|
||||
containing_type=containing_type,
|
||||
type_size=type_size,
|
||||
)
|
||||
# Invalidate readiness cache
|
||||
self._ready_cache = None
|
||||
|
||||
def get_field(self, name: str) -> Field:
|
||||
"""Get a field by name."""
|
||||
return self.fields[name]
|
||||
|
||||
def set_field_value(self, name: str, value: Any, mark_ready: bool = True) -> None:
|
||||
"""Set a field's value and optionally mark it as ready."""
|
||||
if name not in self.fields:
|
||||
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||
|
||||
self.fields[name].set_value(value, mark_ready)
|
||||
# Invalidate readiness cache
|
||||
self._ready_cache = None
|
||||
|
||||
def set_field_type(self, name: str, type: Any, mark_ready: bool = True) -> None:
|
||||
"""Set a field's type and optionally mark it as ready."""
|
||||
if name not in self.fields:
|
||||
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||
|
||||
self.fields[name].set_type(type, mark_ready)
|
||||
# Invalidate readiness cache
|
||||
self._ready_cache = None
|
||||
|
||||
def set_field_containing_type(
|
||||
self, name: str, containing_type: Any, mark_ready: bool = True
|
||||
) -> None:
|
||||
"""Set a field's containing_type and optionally mark it as ready."""
|
||||
if name not in self.fields:
|
||||
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||
|
||||
self.fields[name].set_containing_type(containing_type, mark_ready)
|
||||
# Invalidate readiness cache
|
||||
self._ready_cache = None
|
||||
|
||||
def set_field_type_size(
|
||||
self, name: str, type_size: Any, mark_ready: bool = True
|
||||
) -> None:
|
||||
"""Set a field's type_size and optionally mark it as ready."""
|
||||
if name not in self.fields:
|
||||
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||
|
||||
self.fields[name].set_type_size(type_size, mark_ready)
|
||||
# Invalidate readiness cache
|
||||
self._ready_cache = None
|
||||
|
||||
def set_field_ready(self, name: str, is_ready: bool = True) -> None:
|
||||
"""Mark a field as ready or not ready."""
|
||||
if name not in self.fields:
|
||||
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
|
||||
|
||||
self.fields[name].set_ready(is_ready)
|
||||
# Invalidate readiness cache
|
||||
self._ready_cache = None
|
||||
|
||||
@property
|
||||
def is_ready(self) -> bool:
|
||||
"""Check if the node is ready (all fields are ready)."""
|
||||
# Use cached value if available
|
||||
if self._ready_cache is not None:
|
||||
return self._ready_cache
|
||||
|
||||
# Calculate readiness only when needed
|
||||
if not self.fields:
|
||||
self._ready_cache = False
|
||||
return False
|
||||
|
||||
self._ready_cache = all(elem.ready for elem in self.fields.values())
|
||||
return self._ready_cache
|
||||
|
||||
def get_field_values(self) -> Dict[str, Any]:
|
||||
"""Get a dictionary of field names to their values."""
|
||||
return {name: elem.value for name, elem in self.fields.items()}
|
||||
|
||||
def get_ready_fields(self) -> Dict[str, Field]:
|
||||
"""Get all fields that are marked as ready."""
|
||||
return {name: elem for name, elem in self.fields.items() if elem.ready}
|
||||
135
pythonbpf/vmlinux_parser/import_detector.py
Normal file
135
pythonbpf/vmlinux_parser/import_detector.py
Normal file
@ -0,0 +1,135 @@
|
||||
import ast
|
||||
import logging
|
||||
from typing import List, Tuple, Dict
|
||||
import importlib
|
||||
import inspect
|
||||
|
||||
from .dependency_handler import DependencyHandler
|
||||
from .ir_generation import IRGenerator
|
||||
from .class_handler import process_vmlinux_class
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
|
||||
"""
|
||||
Parse AST and detect import statements from vmlinux.
|
||||
|
||||
Returns a list of tuples (module_name, imported_item) for vmlinux imports.
|
||||
Raises SyntaxError for invalid import patterns.
|
||||
|
||||
Args:
|
||||
tree: The AST to parse
|
||||
|
||||
Returns:
|
||||
List of tuples containing (module_name, imported_item) for each vmlinux import
|
||||
|
||||
Raises:
|
||||
SyntaxError: If multiple imports from vmlinux are attempted or import * is used
|
||||
"""
|
||||
vmlinux_imports = []
|
||||
|
||||
for node in ast.walk(tree):
|
||||
# Handle "from vmlinux import ..." statements
|
||||
if isinstance(node, ast.ImportFrom):
|
||||
if node.module == "vmlinux":
|
||||
# Check for wildcard import: from vmlinux import *
|
||||
if any(alias.name == "*" for alias in node.names):
|
||||
raise SyntaxError(
|
||||
"Wildcard imports from vmlinux are not supported. "
|
||||
"Please import specific types explicitly."
|
||||
)
|
||||
|
||||
# Check for multiple imports: from vmlinux import A, B, C
|
||||
if len(node.names) > 1:
|
||||
imported_names = [alias.name for alias in node.names]
|
||||
raise SyntaxError(
|
||||
f"Multiple imports from vmlinux are not supported. "
|
||||
f"Found: {', '.join(imported_names)}. "
|
||||
f"Please use separate import statements for each type."
|
||||
)
|
||||
|
||||
# Check if no specific import is specified (should not happen with valid Python)
|
||||
if len(node.names) == 0:
|
||||
raise SyntaxError(
|
||||
"Import from vmlinux must specify at least one type."
|
||||
)
|
||||
|
||||
# Valid single import
|
||||
for alias in node.names:
|
||||
import_name = alias.name
|
||||
# Use alias if provided, otherwise use the original name (commented)
|
||||
# as_name = alias.asname if alias.asname else alias.name
|
||||
vmlinux_imports.append(("vmlinux", node))
|
||||
logger.info(f"Found vmlinux import: {import_name}")
|
||||
|
||||
# Handle "import vmlinux" statements (not typical but should be rejected)
|
||||
elif isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
|
||||
raise SyntaxError(
|
||||
"Direct import of vmlinux module is not supported. "
|
||||
"Use 'from vmlinux import <type>' instead."
|
||||
)
|
||||
|
||||
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
|
||||
return vmlinux_imports
|
||||
|
||||
|
||||
def vmlinux_proc(tree: ast.AST, module):
|
||||
import_statements = detect_import_statement(tree)
|
||||
|
||||
# initialise dependency handler
|
||||
handler = DependencyHandler()
|
||||
# initialise assignment dictionary of name to type
|
||||
assignments: Dict[str, type] = {}
|
||||
|
||||
if not import_statements:
|
||||
logger.info("No vmlinux imports found")
|
||||
return
|
||||
|
||||
# Import vmlinux module directly
|
||||
try:
|
||||
vmlinux_mod = importlib.import_module("vmlinux")
|
||||
except ImportError:
|
||||
logger.warning("Could not import vmlinux module")
|
||||
return
|
||||
|
||||
source_file = inspect.getsourcefile(vmlinux_mod)
|
||||
if source_file is None:
|
||||
logger.warning("Cannot find source for vmlinux module")
|
||||
return
|
||||
|
||||
with open(source_file, "r") as f:
|
||||
mod_ast = ast.parse(f.read(), filename=source_file)
|
||||
|
||||
for import_mod, import_node in import_statements:
|
||||
for alias in import_node.names:
|
||||
imported_name = alias.name
|
||||
found = False
|
||||
for mod_node in mod_ast.body:
|
||||
if (
|
||||
isinstance(mod_node, ast.ClassDef)
|
||||
and mod_node.name == imported_name
|
||||
):
|
||||
process_vmlinux_class(mod_node, module, handler)
|
||||
found = True
|
||||
break
|
||||
if isinstance(mod_node, ast.Assign):
|
||||
for target in mod_node.targets:
|
||||
if isinstance(target, ast.Name) and target.id == imported_name:
|
||||
process_vmlinux_assign(mod_node, module, assignments)
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
break
|
||||
if not found:
|
||||
logger.info(
|
||||
f"{imported_name} not found as ClassDef or Assign in vmlinux"
|
||||
)
|
||||
|
||||
IRGenerator(module, handler)
|
||||
|
||||
|
||||
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
|
||||
raise NotImplementedError("Assignment handling has not been implemented yet")
|
||||
8
pythonbpf/vmlinux_parser/ir_generation.py
Normal file
8
pythonbpf/vmlinux_parser/ir_generation.py
Normal file
@ -0,0 +1,8 @@
|
||||
# here, we will iterate through the dependencies and generate IR once dependencies are resolved fully
|
||||
from .dependency_handler import DependencyHandler
|
||||
|
||||
|
||||
class IRGenerator:
|
||||
def __init__(self, module, handler):
|
||||
self.module = module
|
||||
self.handler: DependencyHandler = handler
|
||||
@ -1,11 +1,10 @@
|
||||
#include <linux/bpf.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#define u64 unsigned long long
|
||||
#define u32 unsigned int
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
SEC("xdp")
|
||||
int hello(struct xdp_md *ctx) {
|
||||
bpf_printk("Hello, World!\n");
|
||||
bpf_printk("Hello, World! %ud \n", ctx->data);
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
|
||||
27
tests/c-form/globals.bpf.c
Normal file
27
tests/c-form/globals.bpf.c
Normal file
@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct test_struct {
|
||||
__u64 a;
|
||||
__u64 b;
|
||||
};
|
||||
|
||||
struct test_struct w = {};
|
||||
volatile __u64 prev_time = 0;
|
||||
|
||||
SEC("tracepoint/syscalls/sys_enter_execve")
|
||||
int trace_execve(void *ctx)
|
||||
{
|
||||
bpf_printk("previous %ul now %ul", w.b, w.a);
|
||||
__u64 ts = bpf_ktime_get_ns();
|
||||
bpf_printk("prev %ul now %ul", prev_time, ts);
|
||||
w.a = ts;
|
||||
w.b = prev_time;
|
||||
prev_time = ts;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char LICENSE[] SEC("license") = "GPL";
|
||||
19
tests/c-form/kprobe.bpf.c
Normal file
19
tests/c-form/kprobe.bpf.c
Normal file
@ -0,0 +1,19 @@
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
||||
|
||||
SEC("kprobe/do_unlinkat")
|
||||
int kprobe_execve(struct pt_regs *ctx)
|
||||
{
|
||||
bpf_printk("unlinkat created");
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/do_unlinkat")
|
||||
int kretprobe_execve(struct pt_regs *ctx)
|
||||
{
|
||||
bpf_printk("unlinkat returned\n");
|
||||
return 0;
|
||||
}
|
||||
18
tests/failing_tests/conditionals/oneline.py
Normal file
18
tests/failing_tests/conditionals/oneline.py
Normal file
@ -0,0 +1,18 @@
|
||||
from pythonbpf import bpf, section, bpfglobal, compile
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def hello_world(ctx: c_void_p) -> c_int64:
|
||||
print("Hello, World!") if True else print("Goodbye, World!")
|
||||
return
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
compile()
|
||||
109
tests/failing_tests/globals.py
Normal file
109
tests/failing_tests/globals.py
Normal file
@ -0,0 +1,109 @@
|
||||
import logging
|
||||
|
||||
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
|
||||
from ctypes import c_void_p, c_int64, c_int32
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def somevalue() -> c_int32:
|
||||
return c_int32(42)
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def somevalue2() -> c_int64:
|
||||
return c_int64(69)
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def somevalue1() -> c_int32:
|
||||
return c_int32(42)
|
||||
|
||||
|
||||
# --- Passing examples ---
|
||||
|
||||
|
||||
# Simple constant return
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def g1() -> c_int64:
|
||||
return c_int64(42)
|
||||
|
||||
|
||||
# Constructor with one constant argument
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def g2() -> c_int64:
|
||||
return c_int64(69)
|
||||
|
||||
|
||||
# --- Failing examples ---
|
||||
|
||||
# No return annotation
|
||||
# @bpf
|
||||
# @bpfglobal
|
||||
# def g3():
|
||||
# return 42
|
||||
|
||||
# Return annotation is complex
|
||||
# @bpf
|
||||
# @bpfglobal
|
||||
# def g4() -> List[int]:
|
||||
# return []
|
||||
|
||||
# # Return is missing
|
||||
# @bpf
|
||||
# @bpfglobal
|
||||
# def g5() -> c_int64:
|
||||
# pass
|
||||
|
||||
# # Return is a variable reference
|
||||
# #TODO: maybe fix this sometime later. It defaults to 0
|
||||
# CONST = 5
|
||||
# @bpf
|
||||
# @bpfglobal
|
||||
# def g6() -> c_int64:
|
||||
# return c_int64(CONST)
|
||||
|
||||
|
||||
# Constructor with multiple args
|
||||
# TODO: this is not working. should it work ?
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def g7() -> c_int64:
|
||||
return c_int64(1)
|
||||
|
||||
|
||||
# Dataclass call
|
||||
# TODO: fails with dataclass
|
||||
# @dataclass
|
||||
# class Point:
|
||||
# x: c_int64
|
||||
# y: c_int64
|
||||
|
||||
# @bpf
|
||||
# @bpfglobal
|
||||
# def g8() -> Point:
|
||||
# return Point(1, 2)
|
||||
|
||||
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def sometag(ctx: c_void_p) -> c_int64:
|
||||
print("test")
|
||||
global somevalue
|
||||
somevalue = 2
|
||||
print(f"{somevalue}")
|
||||
return c_int64(1)
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
|
||||
compile()
|
||||
@ -11,6 +11,7 @@ from ctypes import c_void_p, c_int64
|
||||
# We cannot allocate space for the intermediate type now.
|
||||
# We probably need to track the ref/deref chain for each variable.
|
||||
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def count() -> HashMap:
|
||||
|
||||
23
tests/failing_tests/undeclared_values.py
Normal file
23
tests/failing_tests/undeclared_values.py
Normal file
@ -0,0 +1,23 @@
|
||||
import logging
|
||||
|
||||
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
|
||||
from ctypes import c_void_p, c_int64
|
||||
|
||||
|
||||
# This should not pass as somevalue is not declared at all.
|
||||
@bpf
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def sometag(ctx: c_void_p) -> c_int64:
|
||||
print("test")
|
||||
print(f"{somevalue}") # noqa: F821
|
||||
return c_int64(1)
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
|
||||
compile()
|
||||
48
tests/failing_tests/xdp_pass.py
Normal file
48
tests/failing_tests/xdp_pass.py
Normal file
@ -0,0 +1,48 @@
|
||||
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
|
||||
from pythonbpf.maps import HashMap
|
||||
from pythonbpf.helper import XDP_PASS
|
||||
from vmlinux import struct_xdp_md
|
||||
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
|
||||
|
||||
# from vmlinux import struct_xdp_buff # noqa: F401
|
||||
# from vmlinux import struct_xdp_md
|
||||
from ctypes import c_int64
|
||||
|
||||
# Instructions to how to run this program
|
||||
# 1. Install PythonBPF: pip install pythonbpf
|
||||
# 2. Run the program: python examples/xdp_pass.py
|
||||
# 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o
|
||||
# 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0
|
||||
# 5. send traffic through the device and observe effects
|
||||
|
||||
|
||||
@bpf
|
||||
@map
|
||||
def count() -> HashMap:
|
||||
return HashMap(key=c_int64, value=c_int64, max_entries=1)
|
||||
|
||||
|
||||
@bpf
|
||||
@section("xdp")
|
||||
def hello_world(ctx: struct_xdp_md) -> c_int64:
|
||||
key = 0
|
||||
one = 1
|
||||
prev = count().lookup(key)
|
||||
if prev:
|
||||
prevval = prev + 1
|
||||
print(f"count: {prevval}")
|
||||
count().update(key, prevval)
|
||||
return XDP_PASS
|
||||
else:
|
||||
count().update(key, one)
|
||||
|
||||
return XDP_PASS
|
||||
|
||||
|
||||
@bpf
|
||||
@bpfglobal
|
||||
def LICENSE() -> str:
|
||||
return "GPL"
|
||||
|
||||
|
||||
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
|
||||
@ -6,8 +6,8 @@ from ctypes import c_void_p, c_int32
|
||||
@section("tracepoint/syscalls/sys_enter_execve")
|
||||
def hello_world(ctx: c_void_p) -> c_int32:
|
||||
print("Hello, World!")
|
||||
a = 1 # int64
|
||||
return c_int32(a) # typecast to int32
|
||||
a = 1 # int64
|
||||
return c_int32(a) # typecast to int32
|
||||
|
||||
|
||||
@bpf
|
||||
|
||||
379
tools/vmlinux-gen.py
Executable file
379
tools/vmlinux-gen.py
Executable file
@ -0,0 +1,379 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
BTF to Python ctypes Converter
|
||||
Converts Linux kernel BTF (BPF Type Format) to Python ctypes definitions.
|
||||
|
||||
This tool automates the process of:
|
||||
1. Dumping BTF from vmlinux
|
||||
2. Preprocessing enum definitions
|
||||
3. Processing struct kioctx to extract anonymous nested structs
|
||||
4. Running C preprocessor
|
||||
5. Converting to Python ctypes using clang2py
|
||||
6. Post-processing the output
|
||||
|
||||
Requirements:
|
||||
- bpftool
|
||||
- clang
|
||||
- ctypeslib2 (pip install ctypeslib2)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
|
||||
class BTFConverter:
|
||||
def __init__(
|
||||
self,
|
||||
btf_source="/sys/kernel/btf/vmlinux",
|
||||
output_file="vmlinux.py",
|
||||
keep_intermediate=False,
|
||||
verbose=False,
|
||||
):
|
||||
self.btf_source = btf_source
|
||||
self.output_file = output_file
|
||||
self.keep_intermediate = keep_intermediate
|
||||
self.verbose = verbose
|
||||
self.temp_dir = tempfile.mkdtemp() if not keep_intermediate else "."
|
||||
|
||||
def log(self, message):
|
||||
"""Print message if verbose mode is enabled."""
|
||||
if self.verbose:
|
||||
print(f"[*] {message}")
|
||||
|
||||
def run_command(self, cmd, description):
|
||||
"""Run a shell command and handle errors."""
|
||||
self.log(f"{description}...")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, shell=True, check=True, capture_output=True, text=True
|
||||
)
|
||||
if self.verbose and result.stdout:
|
||||
print(result.stdout)
|
||||
return result
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error during {description}:", file=sys.stderr)
|
||||
print(e.stderr, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def step1_dump_btf(self):
|
||||
"""Step 1: Dump BTF from vmlinux."""
|
||||
vmlinux_h = os.path.join(self.temp_dir, "vmlinux.h")
|
||||
cmd = f"bpftool btf dump file {self.btf_source} format c > {vmlinux_h}"
|
||||
self.run_command(cmd, "Dumping BTF from vmlinux")
|
||||
return vmlinux_h
|
||||
|
||||
def step2_preprocess_enums(self, input_file):
|
||||
"""Step 1.5: Preprocess enum definitions."""
|
||||
self.log("Preprocessing enum definitions...")
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
original_code = f.read()
|
||||
|
||||
# Extract anonymous enums
|
||||
enums = re.findall(
|
||||
r"(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;",
|
||||
original_code,
|
||||
)
|
||||
enum_defs = [enum_block + ";" for enum_block, _ in enums]
|
||||
|
||||
# Replace anonymous enums with int declarations
|
||||
processed_code = re.sub(
|
||||
r"(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;",
|
||||
r"int \1;",
|
||||
original_code,
|
||||
)
|
||||
|
||||
# Prepend enum definitions
|
||||
if enum_defs:
|
||||
enum_text = "\n".join(enum_defs) + "\n\n"
|
||||
processed_code = enum_text + processed_code
|
||||
|
||||
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
|
||||
with open(output_file, "w") as f:
|
||||
f.write(processed_code)
|
||||
|
||||
return output_file
|
||||
|
||||
def step2_5_process_kioctx(self, input_file):
|
||||
# TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
|
||||
"""Step 2.5: Process struct kioctx to extract nested anonymous structs."""
|
||||
self.log("Processing struct kioctx nested structs...")
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Pattern to match struct kioctx with its full body (handles multiple nesting levels)
|
||||
kioctx_pattern = (
|
||||
r"struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;"
|
||||
)
|
||||
|
||||
def process_kioctx_replacement(match):
|
||||
full_struct = match.group(0)
|
||||
self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
|
||||
|
||||
# Extract the struct body (everything between outermost { and })
|
||||
body_match = re.search(
|
||||
r"struct\s+kioctx\s*\{(.*)\}\s*;", full_struct, re.DOTALL
|
||||
)
|
||||
if not body_match:
|
||||
return full_struct
|
||||
|
||||
body = body_match.group(1)
|
||||
|
||||
# Find all anonymous structs within the body
|
||||
# Pattern: struct { ... } followed by ; (not a member name)
|
||||
# anon_struct_pattern = r"struct\s*\{[^}]*\}"
|
||||
|
||||
anon_structs = []
|
||||
anon_counter = 4 # Start from 4, counting down to 1
|
||||
|
||||
def replace_anonymous_struct(m):
|
||||
nonlocal anon_counter
|
||||
anon_struct_content = m.group(0)
|
||||
|
||||
# Extract the body of the anonymous struct
|
||||
anon_body_match = re.search(
|
||||
r"struct\s*\{(.*)\}", anon_struct_content, re.DOTALL
|
||||
)
|
||||
if not anon_body_match:
|
||||
return anon_struct_content
|
||||
|
||||
anon_body = anon_body_match.group(1)
|
||||
|
||||
# Create the named struct definition
|
||||
anon_name = f"__anon{anon_counter}"
|
||||
member_name = f"a{anon_counter}"
|
||||
|
||||
# Store the struct definition
|
||||
anon_structs.append(f"struct {anon_name} {{{anon_body}}};")
|
||||
|
||||
anon_counter -= 1
|
||||
|
||||
# Return the member declaration
|
||||
return f"struct {anon_name} {member_name}"
|
||||
|
||||
# Process the body, finding and replacing anonymous structs
|
||||
# We need to be careful to only match anonymous structs followed by ;
|
||||
processed_body = body
|
||||
|
||||
# Find all occurrences and process them
|
||||
pattern_with_semicolon = r"struct\s*\{([^}]*)\}\s*;"
|
||||
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
|
||||
|
||||
if not matches:
|
||||
self.log("No anonymous structs found in kioctx")
|
||||
return full_struct
|
||||
|
||||
self.log(f"Found {len(matches)} anonymous struct(s)")
|
||||
|
||||
# Process in reverse order to maintain string positions
|
||||
for match in reversed(matches):
|
||||
anon_struct_content = match.group(1)
|
||||
start_pos = match.start()
|
||||
end_pos = match.end()
|
||||
|
||||
# Create the named struct definition
|
||||
anon_name = f"__anon{anon_counter}"
|
||||
member_name = f"a{anon_counter}"
|
||||
|
||||
# Store the struct definition
|
||||
anon_structs.insert(0, f"struct {anon_name} {{{anon_struct_content}}};")
|
||||
|
||||
# Replace in the body
|
||||
replacement = f"struct {anon_name} {member_name};"
|
||||
processed_body = (
|
||||
processed_body[:start_pos] + replacement + processed_body[end_pos:]
|
||||
)
|
||||
|
||||
anon_counter -= 1
|
||||
|
||||
# Rebuild the complete definition
|
||||
if anon_structs:
|
||||
# Prepend the anonymous struct definitions
|
||||
anon_definitions = "\n".join(anon_structs) + "\n\n"
|
||||
new_struct = f"struct kioctx {{{processed_body}}};"
|
||||
return anon_definitions + new_struct
|
||||
else:
|
||||
return full_struct
|
||||
|
||||
# Apply the transformation
|
||||
processed_content = re.sub(
|
||||
kioctx_pattern, process_kioctx_replacement, content, flags=re.DOTALL
|
||||
)
|
||||
|
||||
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
|
||||
with open(output_file, "w") as f:
|
||||
f.write(processed_content)
|
||||
|
||||
self.log(f"Saved kioctx-processed output to {output_file}")
|
||||
return output_file
|
||||
|
||||
def step3_run_preprocessor(self, input_file):
|
||||
"""Step 2: Run C preprocessor."""
|
||||
output_file = os.path.join(self.temp_dir, "vmlinux.i")
|
||||
cmd = f"clang -E {input_file} > {output_file}"
|
||||
self.run_command(cmd, "Running C preprocessor")
|
||||
return output_file
|
||||
|
||||
def step4_convert_to_ctypes(self, input_file):
|
||||
"""Step 3: Convert to Python ctypes using clang2py."""
|
||||
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
|
||||
cmd = (
|
||||
f"clang2py {input_file} -o {output_file} "
|
||||
f'--clang-args="-fno-ms-extensions -I/usr/include -I/usr/include/linux"'
|
||||
)
|
||||
self.run_command(cmd, "Converting to Python ctypes")
|
||||
return output_file
|
||||
|
||||
def step5_postprocess(self, input_file):
|
||||
"""Step 4: Post-process the generated Python file."""
|
||||
self.log("Post-processing Python ctypes definitions...")
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
data = f.read()
|
||||
|
||||
# Remove lines like ('_45', ctypes.c_int64, 0)
|
||||
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
|
||||
|
||||
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
|
||||
data = re.sub(
|
||||
r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data
|
||||
)
|
||||
|
||||
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
|
||||
data = re.sub(r"(ctypes\.c_char)(\s*,\s*\d+\))", r"ctypes.c_uint8\2", data)
|
||||
|
||||
# below to replace those c_bool with bitfield greater than 8
|
||||
def repl(m):
|
||||
name, bits = m.groups()
|
||||
return (
|
||||
f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
|
||||
)
|
||||
|
||||
data = re.sub(r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)", repl, data)
|
||||
|
||||
# Remove ctypes. prefix from invalid entries
|
||||
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
|
||||
for name in invalid_ctypes:
|
||||
data = re.sub(rf"\bctypes\.{name}\b", name, data)
|
||||
|
||||
with open(self.output_file, "w") as f:
|
||||
f.write(data)
|
||||
|
||||
self.log(f"Saved final output to {self.output_file}")
|
||||
|
||||
def cleanup(self):
|
||||
"""Remove temporary files if not keeping them."""
|
||||
if not self.keep_intermediate and self.temp_dir != ".":
|
||||
self.log(f"Cleaning up temporary directory: {self.temp_dir}")
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def convert(self):
|
||||
"""Run the complete conversion pipeline."""
|
||||
try:
|
||||
self.log("Starting BTF to Python ctypes conversion...")
|
||||
|
||||
# Check dependencies
|
||||
self.check_dependencies()
|
||||
|
||||
# Run conversion pipeline
|
||||
vmlinux_h = self.step1_dump_btf()
|
||||
vmlinux_processed_h = self.step2_preprocess_enums(vmlinux_h)
|
||||
vmlinux_kioctx_h = self.step2_5_process_kioctx(vmlinux_processed_h)
|
||||
vmlinux_i = self.step3_run_preprocessor(vmlinux_kioctx_h)
|
||||
vmlinux_raw_py = self.step4_convert_to_ctypes(vmlinux_i)
|
||||
self.step5_postprocess(vmlinux_raw_py)
|
||||
|
||||
print(f"\n✓ Conversion complete! Output saved to: {self.output_file}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
self.cleanup()
|
||||
|
||||
def check_dependencies(self):
|
||||
"""Check if required tools are available."""
|
||||
self.log("Checking dependencies...")
|
||||
|
||||
dependencies = {
|
||||
"bpftool": "bpftool --version",
|
||||
"clang": "clang --version",
|
||||
"clang2py": "clang2py --version",
|
||||
}
|
||||
|
||||
missing = []
|
||||
for tool, cmd in dependencies.items():
|
||||
try:
|
||||
subprocess.run(cmd, shell=True, check=True, capture_output=True)
|
||||
except subprocess.CalledProcessError:
|
||||
missing.append(tool)
|
||||
|
||||
if missing:
|
||||
print("Error: Missing required dependencies:", file=sys.stderr)
|
||||
for tool in missing:
|
||||
print(f" - {tool}", file=sys.stderr)
|
||||
if "clang2py" in missing:
|
||||
print("\nInstall ctypeslib2: pip install ctypeslib2", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Convert Linux kernel BTF to Python ctypes definitions",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s
|
||||
%(prog)s -o kernel_types.py
|
||||
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--btf-source",
|
||||
default="/sys/kernel/btf/vmlinux",
|
||||
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
default="vmlinux.py",
|
||||
help="Output Python file (default: vmlinux.py)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--keep-intermediate",
|
||||
action="store_true",
|
||||
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Enable verbose output"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
converter = BTFConverter(
|
||||
btf_source=args.btf_source,
|
||||
output_file=args.output,
|
||||
keep_intermediate=args.keep_intermediate,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
converter.convert()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user