115 Commits

Author SHA1 Message Date
80396c78a6 recursive parsing fix without ctypes in recursed type 2025-10-12 20:59:18 +05:30
8774277000 try to separate out ast node from vmlinux type 2025-10-12 01:59:14 +05:30
8743ea17f3 one recursion issue solved 2025-10-12 01:33:23 +05:30
f8844104a6 add support for single depth pointer resolution 2025-10-11 23:18:51 +05:30
3343bedd11 add extra fields to Field datatype
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 22:28:23 +05:30
75d3ad4fe2 format chore 2025-10-11 22:00:25 +05:30
abbf17748d format chore 2025-10-11 21:34:28 +05:30
7c559840f0 add ctype subclass identifier
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 20:02:20 +05:30
06773c895f add error message 2025-10-11 19:05:21 +05:30
1e3d775865 handle non-complex types along with recursion 2025-10-11 19:04:11 +05:30
168e26268e add recursive addition algorithm with mixing of ast node type and type node which is not right.
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 18:55:56 +05:30
2cf7b28793 extract fields from the class 2025-10-11 18:26:13 +05:30
d24d59c2ba fix structure for IR generation separation. 2025-10-11 18:11:46 +05:30
f190a33e21 init IR generation file and clarify purpose 2025-10-11 17:52:22 +05:30
eb636ef731 add dependency handler class 2025-10-11 17:49:40 +05:30
2ae3aade60 static type checks 2025-10-11 17:13:22 +05:30
f227fe9310 add dependency_node format and also cache results of symbol loader. 2025-10-11 17:13:22 +05:30
7940d02bc7 add symbol resolution to import detection 2025-10-11 17:13:21 +05:30
2483ef2840 separate vmlinux class handler 2025-10-11 17:13:21 +05:30
68e9693f9a add import parser
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-11 17:13:21 +05:30
e4575a6b1e Merge branch 'master' into vmlinux-working 2025-10-10 22:55:51 +05:30
3ec3ab30fe add vmlinux processor to codegen pipeline
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:14 +05:30
7fb3ecff48 initialise tbaa generation and vmlinux recursive importer modules
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:14 +05:30
ec59dad025 Refactor hist() calls to use dot notation 2025-10-10 22:54:13 +05:30
28b7b1620c remove todos and move to projects on github. 2025-10-10 22:54:13 +05:30
9f8e240a38 add patch for Kernel 6.14 BTF
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:12 +05:30
e6c05ab494 Update TODO.md 2025-10-10 22:54:12 +05:30
8aa9cf7119 Add failing oneline IfExpr conditional test 2025-10-10 22:54:11 +05:30
9683e3799f format chore 2025-10-10 22:54:11 +05:30
200d293750 add global symbol table populate function 2025-10-10 22:54:10 +05:30
ed196caebf add global symbol table populate function 2025-10-10 22:54:10 +05:30
a049796b81 add failing test 2025-10-10 22:54:10 +05:30
384fc9dd40 changer order of passes 2025-10-10 22:54:09 +05:30
5f2df57e64 update globals test and todos. 2025-10-10 22:54:09 +05:30
130d8a9edc format chore 2025-10-10 22:54:08 +05:30
40ae3d825a fix broken IR generation logic for globals 2025-10-10 22:54:08 +05:30
484624104e fix broken IR generation logic for globals 2025-10-10 22:54:07 +05:30
e7c4bdb150 add global support with broken generation function 2025-10-10 22:54:07 +05:30
7210366e7d add global failing test
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-10 22:54:06 +05:30
435bf27176 Add compile to tests/failing_tests/conditionals/helper_cond.py 2025-10-10 22:54:06 +05:30
1ba27ac7cf Remove completed short term goal from TODO.md 2025-10-10 22:54:05 +05:30
e4ddec3a02 Fix expr imports 2025-10-10 22:54:05 +05:30
bc7b5c97d1 Move handle_comparator to type_normalization 2025-10-10 22:54:05 +05:30
fa720f8e6b Move convert_to_bool to type_normalization 2025-10-10 22:54:04 +05:30
eff0f66d95 Seperate type_normalization from expr_pass 2025-10-10 22:54:04 +05:30
b43c252224 Fix type_deducer import in expr 2025-10-10 22:54:03 +05:30
aae7aa981d Fix expr imports 2025-10-10 22:54:03 +05:30
6f9a7301af Rework dir structure for expr 2025-10-10 22:54:02 +05:30
48923d03d4 cleanup handle_cond in functions_pass 2025-10-10 22:54:02 +05:30
019a83cf11 Add passing and.py test for conditionals 2025-10-10 22:54:01 +05:30
140d9e6e35 Add passing or.py test for conditionals 2025-10-10 22:54:01 +05:30
a351b0f1b5 Add _handle_or_or in expr_pass 2025-10-10 22:54:01 +05:30
3cb73ff0c3 Add _handle_and_op in expr_pass 2025-10-10 22:54:00 +05:30
3b08c2bede Add handle_and and handle_or handling stub in eval_expr 2025-10-10 22:54:00 +05:30
86378d6cc4 Add BoolOp handling stub in eval_expr 2025-10-10 22:53:59 +05:30
00d1c583af Add support for is and is not keywords 2025-10-10 22:53:59 +05:30
cfc246c80d Add explanation notes to failing conditionals tests 2025-10-10 22:53:58 +05:30
f3c80f9e5f Add helper_cond failing test for conditionals 2025-10-10 22:53:58 +05:30
0d3a5748dd Move map_comp test to passing 2025-10-10 22:53:57 +05:30
079431754c Add null checks for pointer derefs to avoid map_value_or_null verifier errors 2025-10-10 22:53:57 +05:30
46f5eca33d Add _deref_to_depth in expr_pass 2025-10-10 22:53:56 +05:30
7081e939fb Move _get_base_type to _get_base_type_and_depth 2025-10-10 22:53:56 +05:30
1e29460d6f Add _get_base_type to expr_pass 2025-10-10 22:53:56 +05:30
e180a89644 Add _normalize_types to handle mismatched ints, move type_mismatch test to passing 2025-10-10 22:53:55 +05:30
34a267e982 Add type_mismatch failing test for conditionals 2025-10-10 22:53:55 +05:30
c81aad7c67 Add failing struct_ptr test for conditionals 2025-10-10 22:53:54 +05:30
2e677c2c7b Fix struct_access in eval_expr, move struct_access conditional test to passing 2025-10-10 22:53:54 +05:30
4ea7b22b44 Add 'and' and 'or' BoolOps as future deliverables 2025-10-10 22:53:53 +05:30
b8b937bfca Add failing test map_comp for conditionals 2025-10-10 22:53:53 +05:30
6cc29c4fa1 Add var_comp test for conditionals 2025-10-10 22:53:52 +05:30
5451ba646d Add support for unary op 'not' in eval_expr, move not test to passing 2025-10-10 22:53:52 +05:30
7720437ca5 Add failing tests struct and not for conditionals 2025-10-10 22:53:52 +05:30
eb0a7a917d Add map test to conditionals 2025-10-10 22:53:51 +05:30
6f65903552 Add var_binop test for conditionals 2025-10-10 22:53:51 +05:30
97e74d09be Add var test for conditionals 2025-10-10 22:53:50 +05:30
9c7560ed2e Add const_binop test for conditionals 2025-10-10 22:53:50 +05:30
2979ceedcf Add const_int test for conditionals 2025-10-10 22:53:49 +05:30
745f59278f Move conditional logic to eval_expr, add _conver_to_bool, add passing bool test 2025-10-10 22:53:49 +05:30
49c59b32ca Add Boolean return support 2025-10-10 22:53:48 +05:30
ff78140a7d Eval LHS and RHS in _handle_compare 2025-10-10 22:53:48 +05:30
82ff71b753 Add _handle_cond to expr_pass 2025-10-10 22:53:48 +05:30
f46e7cd846 Reduce a condition from handle_cond 2025-10-10 22:53:47 +05:30
9d73eb67c4 Add TODO for fixing struct_kioctx issue 2025-10-10 22:53:47 +05:30
21ce041353 Refactor hist() calls to use dot notation 2025-10-10 20:45:07 +05:30
6402cf7be5 remove todos and move to projects on github. 2025-10-08 22:27:51 +05:30
9a96e1247b Merge pull request #29 from pythonbpf/smol_pp
add patch for Kernel 6.14 BTF in transpiler
2025-10-08 21:47:49 +05:30
989134f4be add patch for Kernel 6.14 BTF
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 21:47:02 +05:30
120aec08da Update TODO.md 2025-10-08 21:40:14 +05:30
e66ae7cc89 Add failing oneline IfExpr conditional test 2025-10-08 21:36:08 +05:30
b95fbd0ed0 rollback example 2025-10-08 14:53:51 +05:30
32dc8e6636 Merge pull request #21 from pythonbpf/globals
Adds support for globals
SO......
*I'm not merging this because it's complete, but because I don't want it to diverge from master too much.
*Stuff I still need to complete:
-> Structs and eval expressions in these globals.
-> handling the global keyword.
-> assigning back to the global and reading from inside a function.
-> Basically, `global` keyword in Python is used to write only and reading can be done directly without declaring as global as a direct assign without global declaration is going to diverge from Python.
-> The above logic is going to be supported by `global_sym_tab` generated using the new order of passes that we are doing.
-> This needs to be fixed and done ASAP to avoid conflicts. so yes, im  gonna do it soon.
2025-10-08 14:48:37 +05:30
8e3942d38c format chore 2025-10-08 14:31:37 +05:30
d84ce0c6fa update helpers and change examples. 2025-10-08 13:57:09 +05:30
8d07a4cd05 add xdp struct to args
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 11:40:12 +05:30
8485460374 Merge pull request #26 from pythonbpf/refactor_conds
Refactor conds
2025-10-08 07:28:08 +05:30
0c977514af Add TODO for fixing struct_kioctx issue 2025-10-08 05:34:25 +05:30
1207730ce3 update vmlinux.py 2025-10-08 05:27:56 +05:30
0d9dcd122c Merge pull request #27 from pythonbpf/vmlinux
Add vmlinux transpiler from experiments
2025-10-08 05:19:44 +05:30
8a69e05ee2 fix duplicate variable in example
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 05:18:49 +05:30
976af290af Revert "format chore"
This reverts commit a3443ab1d5.
2025-10-08 05:17:59 +05:30
a3443ab1d5 format chore
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 05:16:36 +05:30
a27360482b complete vmlinux transpiler.
TODO: struct_kioctx for x86_64 vmlinux.h has anonymous structs that refused to transpile well, so an extra rule has been written to make only the structs of that external. Fix this in the future.
2025-10-08 05:15:29 +05:30
c423cc647d add vmlinux.py transpiler from experiment repository
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-08 00:45:30 +05:30
8554688230 Merge pull request #25 from pythonbpf/dependabot/github_actions/actions-6a14be197d
Bump the actions group with 2 updates
2025-10-06 19:32:01 +05:30
3e873f378e Bump the actions group with 2 updates
Bumps the actions group with 2 updates: [actions/checkout](https://github.com/actions/checkout) and [actions/setup-python](https://github.com/actions/setup-python).


Updates `actions/checkout` from 4 to 5
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v4...v5)

Updates `actions/setup-python` from 5 to 6
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
- dependency-name: actions/setup-python
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: actions
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-06 11:26:23 +00:00
3abe07c5b2 add global symbol table populate function 2025-10-05 14:05:10 +05:30
01bd7604ed add global symbol table populate function 2025-10-05 14:04:25 +05:30
7ae84a0d5a add failing test 2025-10-05 00:55:38 +05:30
df3f00261a changer order of passes 2025-10-04 08:17:16 +05:30
ab610147a5 update globals test and todos. 2025-10-04 06:36:51 +05:30
7720fe9f9f format chore 2025-10-04 06:33:09 +05:30
7aeac86bd3 fix broken IR generation logic for globals 2025-10-04 06:32:25 +05:30
ab1c4223d5 fix broken IR generation logic for globals 2025-10-03 22:55:40 +05:30
c3a512d5cf add global support with broken generation function 2025-10-03 22:20:04 +05:30
4a60c42cd0 add global failing test
Signed-off-by: varun-r-mallya <varunrmallya@gmail.com>
2025-10-03 21:25:58 +05:30
30 changed files with 1413 additions and 203418 deletions

View File

@ -12,8 +12,8 @@ jobs:
name: Format name: Format
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v5
- uses: actions/setup-python@v5 - uses: actions/setup-python@v6
with: with:
python-version: "3.x" python-version: "3.x"
- uses: pre-commit/action@v3.0.1 - uses: pre-commit/action@v3.0.1

2
.gitignore vendored
View File

@ -7,3 +7,5 @@ __pycache__/
*.ll *.ll
*.o *.o
.ipynb_checkpoints/ .ipynb_checkpoints/
vmlinux.py
~*

View File

@ -12,7 +12,7 @@
# #
# See https://github.com/pre-commit/pre-commit # See https://github.com/pre-commit/pre-commit
exclude: 'vmlinux.*\.py$' exclude: 'vmlinux.py'
ci: ci:
autoupdate_commit_msg: "chore: update pre-commit hooks" autoupdate_commit_msg: "chore: update pre-commit hooks"
@ -41,7 +41,7 @@ repos:
- id: ruff - id: ruff
args: ["--fix", "--show-fixes"] args: ["--fix", "--show-fixes"]
- id: ruff-format - id: ruff-format
exclude: ^(docs)|^(tests)|^(examples) # exclude: ^(docs)|^(tests)|^(examples)
# Checking static types # Checking static types
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy

View File

@ -83,14 +83,14 @@ def hist() -> HashMap:
def hello(ctx: c_void_p) -> c_int64: def hello(ctx: c_void_p) -> c_int64:
process_id = pid() process_id = pid()
one = 1 one = 1
prev = hist().lookup(process_id) prev = hist.lookup(process_id)
if prev: if prev:
previous_value = prev + 1 previous_value = prev + 1
print(f"count: {previous_value} with {process_id}") print(f"count: {previous_value} with {process_id}")
hist().update(process_id, previous_value) hist.update(process_id, previous_value)
return c_int64(0) return c_int64(0)
else: else:
hist().update(process_id, one) hist.update(process_id, one)
return c_int64(0) return c_int64(0)

12
TODO.md
View File

@ -1,12 +0,0 @@
## Short term
- Implement enough functionality to port the BCC tutorial examples in PythonBPF
- Add all maps
- XDP support in pylibbpf
- ringbuf support
- recursive expression resolution
## Long term
- Refactor the codebase to be better than a hackathon project
- Port to C++ and use actual LLVM?

View File

@ -308,6 +308,7 @@
"def hist() -> HashMap:\n", "def hist() -> HashMap:\n",
" return HashMap(key=c_int32, value=c_uint64, max_entries=4096)\n", " return HashMap(key=c_int32, value=c_uint64, max_entries=4096)\n",
"\n", "\n",
"\n",
"@bpf\n", "@bpf\n",
"@section(\"tracepoint/syscalls/sys_enter_clone\")\n", "@section(\"tracepoint/syscalls/sys_enter_clone\")\n",
"def hello(ctx: c_void_p) -> c_int64:\n", "def hello(ctx: c_void_p) -> c_int64:\n",
@ -329,6 +330,7 @@
"def LICENSE() -> str:\n", "def LICENSE() -> str:\n",
" return \"GPL\"\n", " return \"GPL\"\n",
"\n", "\n",
"\n",
"b = BPF()" "b = BPF()"
] ]
}, },
@ -357,7 +359,6 @@
} }
], ],
"source": [ "source": [
"\n",
"b.load_and_attach()\n", "b.load_and_attach()\n",
"hist = BpfMap(b, hist)\n", "hist = BpfMap(b, hist)\n",
"print(\"Recording\")\n", "print(\"Recording\")\n",

29
examples/kprobes.py Normal file
View File

@ -0,0 +1,29 @@
from pythonbpf import bpf, section, bpfglobal, BPF
from ctypes import c_void_p, c_int64
@bpf
@section("kretprobe/do_unlinkat")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@section("kprobe/do_unlinkat")
def hello_world2(ctx: c_void_p) -> c_int64:
print("Hello, World!")
return c_int64(0)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
b = BPF()
b.load_and_attach()
while True:
print("running")
# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of unlink kprobe.

View File

@ -27,7 +27,7 @@ def hello(ctx: c_void_p) -> c_int32:
dataobj.pid = pid() dataobj.pid = pid()
dataobj.ts = ktime() dataobj.ts = ktime()
# dataobj.comm = strobj # dataobj.comm = strobj
print(f"clone called at {dataobj.ts} by pid" f"{dataobj.pid}, comm {strobj}") print(f"clone called at {dataobj.ts} by pid{dataobj.pid}, comm {strobj}")
events.output(dataobj) events.output(dataobj)
return c_int32(0) return c_int32(0)

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
from pythonbpf import bpf, map, section, bpfglobal, compile from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
from pythonbpf.helper import XDP_PASS from pythonbpf.helper import XDP_PASS
from pythonbpf.maps import HashMap from pythonbpf.maps import HashMap
from ctypes import c_int64, c_void_p
from ctypes import c_void_p, c_int64
# Instructions to how to run this program # Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf # 1. Install PythonBPF: pip install pythonbpf
@ -41,4 +41,5 @@ def LICENSE() -> str:
return "GPL" return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")
compile() compile()

View File

@ -4,7 +4,12 @@ from .license_pass import license_processing
from .functions import func_proc from .functions import func_proc
from .maps import maps_proc from .maps import maps_proc
from .structs import structs_proc from .structs import structs_proc
from .globals_pass import globals_processing from .vmlinux_parser import vmlinux_proc
from .globals_pass import (
globals_list_creation,
globals_processing,
populate_global_symbol_table,
)
from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum, DebugInfoGenerator
import os import os
import subprocess import subprocess
@ -40,12 +45,16 @@ def processor(source_code, filename, module):
for func_node in bpf_chunks: for func_node in bpf_chunks:
logger.info(f"Found BPF function/struct: {func_node.name}") logger.info(f"Found BPF function/struct: {func_node.name}")
vmlinux_proc(tree, module)
populate_global_symbol_table(tree, module)
license_processing(tree, module)
globals_processing(tree, module)
structs_sym_tab = structs_proc(tree, module, bpf_chunks) structs_sym_tab = structs_proc(tree, module, bpf_chunks)
map_sym_tab = maps_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks)
func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab)
license_processing(tree, module) globals_list_creation(tree, module)
globals_processing(tree, module)
def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):

View File

@ -1,8 +1,121 @@
from llvmlite import ir from llvmlite import ir
import ast import ast
from logging import Logger
import logging
from .type_deducer import ctypes_to_ir
def emit_globals(module: ir.Module, names: list[str]): logger: Logger = logging.getLogger(__name__)
# TODO: this is going to be a huge fuck of a headache in the future.
global_sym_tab = []
def populate_global_symbol_table(tree, module: ir.Module):
for node in tree.body:
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if (
isinstance(dec, ast.Call)
and isinstance(dec.func, ast.Name)
and dec.func.id == "section"
and len(dec.args) == 1
and isinstance(dec.args[0], ast.Constant)
and isinstance(dec.args[0].value, str)
):
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
global_sym_tab.append(node)
elif isinstance(dec, ast.Name) and dec.id == "map":
global_sym_tab.append(node)
return False
def emit_global(module: ir.Module, node, name):
logger.info(f"global identifier {name} processing")
# deduce LLVM type from the annotated return
if not isinstance(node.returns, ast.Name):
raise ValueError(f"Unsupported return annotation {ast.dump(node.returns)}")
ty = ctypes_to_ir(node.returns.id)
# extract the return expression
# TODO: turn this return extractor into a generic function I can use everywhere.
ret_stmt = node.body[0]
if not isinstance(ret_stmt, ast.Return) or ret_stmt.value is None:
raise ValueError(f"Global '{name}' has no valid return")
init_val = ret_stmt.value
# simple constant like "return 0"
if isinstance(init_val, ast.Constant):
llvm_init = ir.Constant(ty, init_val.value)
# variable reference like "return SOME_CONST"
elif isinstance(init_val, ast.Name):
# need symbol resolution here, stub as 0 for now
raise ValueError(f"Name reference {init_val.id} not yet supported")
# constructor call like "return c_int64(0)" or dataclass(...)
elif isinstance(init_val, ast.Call):
if len(init_val.args) >= 1 and isinstance(init_val.args[0], ast.Constant):
llvm_init = ir.Constant(ty, init_val.args[0].value)
else:
logger.info("Defaulting to zero as no constant argument found")
llvm_init = ir.Constant(ty, 0)
else:
raise ValueError(f"Unsupported return expr {ast.dump(init_val)}")
gvar = ir.GlobalVariable(module, ty, name=name)
gvar.initializer = llvm_init
gvar.align = 8
gvar.linkage = "dso_local"
gvar.global_constant = False
return gvar
def globals_processing(tree, module):
"""Process stuff decorated with @bpf and @bpfglobal except license and return the section name"""
globals_sym_tab = []
for node in tree.body:
# Skip non-assignment and non-function nodes
if not (isinstance(node, ast.FunctionDef)):
continue
# Get the name based on node type
if isinstance(node, ast.FunctionDef):
name = node.name
else:
continue
# Check for duplicate names
if name in globals_sym_tab:
raise SyntaxError(f"ERROR: Global name '{name}' previously defined")
else:
globals_sym_tab.append(name)
if isinstance(node, ast.FunctionDef) and node.name != "LICENSE":
decorators = [
dec.id for dec in node.decorator_list if isinstance(dec, ast.Name)
]
if "bpf" in decorators and "bpfglobal" in decorators:
if (
len(node.body) == 1
and isinstance(node.body[0], ast.Return)
and node.body[0].value is not None
and isinstance(
node.body[0].value, (ast.Constant, ast.Name, ast.Call)
)
):
emit_global(module, node, name)
else:
raise SyntaxError(f"ERROR: Invalid syntax for {name} global")
return None
def emit_llvm_compiler_used(module: ir.Module, names: list[str]):
""" """
Emit the @llvm.compiler.used global given a list of function/global names. Emit the @llvm.compiler.used global given a list of function/global names.
""" """
@ -24,7 +137,7 @@ def emit_globals(module: ir.Module, names: list[str]):
gv.section = "llvm.metadata" gv.section = "llvm.metadata"
def globals_processing(tree, module: ir.Module): def globals_list_creation(tree, module: ir.Module):
collected = ["LICENSE"] collected = ["LICENSE"]
for node in tree.body: for node in tree.body:
@ -40,10 +153,11 @@ def globals_processing(tree, module: ir.Module):
): ):
collected.append(node.name) collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "bpfglobal": # NOTE: all globals other than
collected.append(node.name) # elif isinstance(dec, ast.Name) and dec.id == "bpfglobal":
# collected.append(node.name)
elif isinstance(dec, ast.Name) and dec.id == "map": elif isinstance(dec, ast.Name) and dec.id == "map":
collected.append(node.name) collected.append(node.name)
emit_globals(module, collected) emit_llvm_compiler_used(module, collected)

View File

@ -15,5 +15,8 @@ def deref(ptr):
return result if result is not None else 0 return result if result is not None else 0
XDP_ABORTED = ctypes.c_int64(0)
XDP_DROP = ctypes.c_int64(1) XDP_DROP = ctypes.c_int64(1)
XDP_PASS = ctypes.c_int64(2) XDP_PASS = ctypes.c_int64(2)
XDP_TX = ctypes.c_int64(3)
XDP_REDIRECT = ctypes.c_int64(4)

View File

View File

@ -0,0 +1,3 @@
from .import_detector import vmlinux_proc
__all__ = ["vmlinux_proc"]

View File

@ -0,0 +1,120 @@
import logging
from functools import lru_cache
import importlib
from .dependency_handler import DependencyHandler
from .dependency_node import DependencyNode
import ctypes
from typing import Optional, Any
logger = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def get_module_symbols(module_name: str):
imported_module = importlib.import_module(module_name)
return [name for name in dir(imported_module)], imported_module
def process_vmlinux_class(node, llvm_module, handler: DependencyHandler):
symbols_in_module, imported_module = get_module_symbols("vmlinux")
if node.name in symbols_in_module:
vmlinux_type = getattr(imported_module, node.name)
process_vmlinux_post_ast(vmlinux_type, llvm_module, handler)
else:
raise ImportError(f"{node.name} not in vmlinux")
def process_vmlinux_post_ast(
elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None
):
# Initialize processing stack on first call
if processing_stack is None:
processing_stack = set()
symbols_in_module, imported_module = get_module_symbols("vmlinux")
current_symbol_name = elem_type_class.__name__
field_table = {}
is_complex_type = False
containing_type: Optional[Any] = None
ctype_complex_type: Optional[Any] = None
type_length: Optional[int] = None
module_name = getattr(elem_type_class, "__module__", None)
if hasattr(elem_type_class, "_length_") and is_complex_type:
type_length = elem_type_class._length_
if current_symbol_name in processing_stack:
logger.debug(
f"Circular dependency detected for {current_symbol_name}, skipping"
)
return True
# Check if already processed
if handler.has_node(current_symbol_name):
existing_node = handler.get_node(current_symbol_name)
# If the node exists and is ready, we're done
if existing_node and existing_node.is_ready:
logger.info(f"Node {current_symbol_name} already processed and ready")
return True
processing_stack.add(current_symbol_name)
if module_name == "vmlinux":
if hasattr(elem_type_class, "_type_"):
is_complex_type = True
containing_type = elem_type_class._type_
if containing_type.__module__ == "vmlinux":
print("Very weird type ig for containing type", containing_type)
elif containing_type.__module__ == ctypes.__name__:
if isinstance(elem_type_class, type):
if issubclass(elem_type_class, ctypes.Array):
ctype_complex_type = ctypes.Array
elif issubclass(elem_type_class, ctypes._Pointer):
ctype_complex_type = ctypes._Pointer
else:
raise TypeError("Unsupported ctypes subclass")
# handle ctype complex type
else:
raise ImportError(f"Unsupported module of {containing_type}")
else:
new_dep_node = DependencyNode(name=current_symbol_name)
handler.add_node(new_dep_node)
class_obj = getattr(imported_module, current_symbol_name)
# Inspect the class fields
if hasattr(class_obj, "_fields_"):
for field_name, field_type in class_obj._fields_:
field_table[field_name] = field_type
elif hasattr(class_obj, "__annotations__"):
for field_name, field_type in class_obj.__annotations__.items():
field_table[field_name] = field_type
else:
raise TypeError("Could not get required class and definition")
logger.info(f"Extracted fields for {current_symbol_name}: {field_table}")
for elem_name, elem_type in field_table.items():
local_module_name = getattr(elem_type, "__module__", None)
if local_module_name == ctypes.__name__:
new_dep_node.add_field(elem_name, elem_type, ready=True)
logger.info(f"Field {elem_name} is direct ctypes type: {elem_type}")
elif local_module_name == "vmlinux":
new_dep_node.add_field(elem_name, elem_type, ready=False)
logger.debug(
f"Processing vmlinux field: {elem_name}, type: {elem_type}"
)
if process_vmlinux_post_ast(
elem_type, llvm_handler, handler, processing_stack
):
new_dep_node.set_field_ready(elem_name, True)
else:
raise ValueError(
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
)
print("")
else:
raise ImportError("UNSUPPORTED Module")
print(current_symbol_name, "DONE")
print(f"handler readiness {handler.is_ready}")

View File

@ -0,0 +1,149 @@
from typing import Optional, Dict, List, Iterator
from .dependency_node import DependencyNode
class DependencyHandler:
"""
Manages a collection of DependencyNode objects with no duplicates.
Ensures that no two nodes with the same name can be added and provides
methods to check readiness and retrieve specific nodes.
Example usage:
# Create a handler
handler = DependencyHandler()
# Create some dependency nodes
node1 = DependencyNode(name="node1")
node1.add_field("field1", str)
node1.set_field_value("field1", "value1")
node2 = DependencyNode(name="node2")
node2.add_field("field1", int)
# Add nodes to the handler
handler.add_node(node1)
handler.add_node(node2)
# Check if a specific node exists
print(handler.has_node("node1")) # True
# Get a reference to a node and modify it
node = handler.get_node("node2")
node.set_field_value("field1", 42)
# Check if all nodes are ready
print(handler.is_ready) # False (node2 is ready, but node1 isn't)
"""
def __init__(self):
# Using a dictionary with node names as keys ensures name uniqueness
# and provides efficient lookups
self._nodes: Dict[str, DependencyNode] = {}
def add_node(self, node: DependencyNode) -> bool:
"""
Add a dependency node to the handler.
Args:
node: The DependencyNode to add
Returns:
bool: True if the node was added, False if a node with the same name already exists
Raises:
TypeError: If the provided object is not a DependencyNode
"""
if not isinstance(node, DependencyNode):
raise TypeError(f"Expected DependencyNode, got {type(node).__name__}")
# Check if a node with this name already exists
if node.name in self._nodes:
return False
self._nodes[node.name] = node
return True
@property
def is_ready(self) -> bool:
"""
Check if all nodes are ready.
Returns:
bool: True if all nodes are ready (or if there are no nodes), False otherwise
"""
if not self._nodes:
return True
return all(node.is_ready for node in self._nodes.values())
def has_node(self, name: str) -> bool:
"""
Check if a node with the given name exists.
Args:
name: The name to check
Returns:
bool: True if a node with the given name exists, False otherwise
"""
return name in self._nodes
def get_node(self, name: str) -> Optional[DependencyNode]:
"""
Get a node by name for manipulation.
Args:
name: The name of the node to retrieve
Returns:
Optional[DependencyNode]: The node with the given name, or None if not found
"""
return self._nodes.get(name)
def remove_node(self, node_or_name) -> bool:
"""
Remove a node by name or reference.
Args:
node_or_name: The node to remove or its name
Returns:
bool: True if the node was removed, False if not found
"""
if isinstance(node_or_name, DependencyNode):
name = node_or_name.name
else:
name = node_or_name
if name in self._nodes:
del self._nodes[name]
return True
return False
def get_all_nodes(self) -> List[DependencyNode]:
"""
Get all nodes stored in the handler.
Returns:
List[DependencyNode]: List of all nodes
"""
return list(self._nodes.values())
def __iter__(self) -> Iterator[DependencyNode]:
"""
Iterate over all nodes.
Returns:
Iterator[DependencyNode]: Iterator over all nodes
"""
return iter(self._nodes.values())
def __len__(self) -> int:
"""
Get the number of nodes in the handler.
Returns:
int: The number of nodes
"""
return len(self._nodes)

View File

@ -0,0 +1,191 @@
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
# TODO: FIX THE FUCKING TYPE NAME CONVENTION.
@dataclass
class Field:
"""Represents a field in a dependency node with its type and readiness state."""
name: str
type: type
containing_type: Optional[Any]
type_size: Optional[int]
value: Any = None
ready: bool = False
def set_ready(self, is_ready: bool = True) -> None:
"""Set the readiness state of this field."""
self.ready = is_ready
def set_value(self, value: Any, mark_ready: bool = True) -> None:
"""Set the value of this field and optionally mark it as ready."""
self.value = value
if mark_ready:
self.ready = True
def set_type(self, given_type, mark_ready: bool = True) -> None:
"""Set value of the type field and mark as ready"""
self.type = given_type
if mark_ready:
self.ready = True
def set_containing_type(
self, containing_type: Optional[Any], mark_ready: bool = True
) -> None:
"""Set the containing_type of this field and optionally mark it as ready."""
self.containing_type = containing_type
if mark_ready:
self.ready = True
def set_type_size(self, type_size: Any, mark_ready: bool = True) -> None:
"""Set the type_size of this field and optionally mark it as ready."""
self.type_size = type_size
if mark_ready:
self.ready = True
@dataclass
class DependencyNode:
"""
A node with typed fields and readiness tracking.
Example usage:
# Create a dependency node for a Person
somestruct = DependencyNode(name="struct_1")
# Add fields with their types
somestruct.add_field("field_1", str)
somestruct.add_field("field_2", int)
somestruct.add_field("field_3", str)
# Check if the node is ready (should be False initially)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set some field values
somestruct.set_field_value("field_1", "someproperty")
somestruct.set_field_value("field_2", 30)
# Check if the node is ready (still False because email is not ready)
print(f"Is node ready? {somestruct.is_ready}") # False
# Set the last field and make the node ready
somestruct.set_field_value("field_3", "anotherproperty")
# Now the node should be ready
print(f"Is node ready? {somestruct.is_ready}") # True
# You can also mark a field as not ready
somestruct.set_field_ready("field_3", False)
# Now the node is not ready again
print(f"Is node ready? {somestruct.is_ready}") # False
# Get all field values
print(somestruct.get_field_values()) # {'field_1': 'someproperty', 'field_2': 30, 'field_3': 'anotherproperty'}
# Get only ready fields
ready_fields = somestruct.get_ready_fields()
print(f"Ready fields: {[field.name for field in ready_fields.values()]}") # ['field_1', 'field_2']
"""
name: str
fields: Dict[str, Field] = field(default_factory=dict)
_ready_cache: Optional[bool] = field(default=None, repr=False)
def add_field(
self,
name: str,
field_type: type,
initial_value: Any = None,
containing_type: Optional[Any] = None,
type_size: Optional[int] = None,
ready: bool = False,
) -> None:
"""Add a field to the node with an optional initial value and readiness state."""
self.fields[name] = Field(
name=name,
type=field_type,
value=initial_value,
ready=ready,
containing_type=containing_type,
type_size=type_size,
)
# Invalidate readiness cache
self._ready_cache = None
def get_field(self, name: str) -> Field:
"""Get a field by name."""
return self.fields[name]
def set_field_value(self, name: str, value: Any, mark_ready: bool = True) -> None:
"""Set a field's value and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_value(value, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_type(self, name: str, type: Any, mark_ready: bool = True) -> None:
"""Set a field's type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_type(type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_containing_type(
self, name: str, containing_type: Any, mark_ready: bool = True
) -> None:
"""Set a field's containing_type and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_containing_type(containing_type, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_type_size(
self, name: str, type_size: Any, mark_ready: bool = True
) -> None:
"""Set a field's type_size and optionally mark it as ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_type_size(type_size, mark_ready)
# Invalidate readiness cache
self._ready_cache = None
def set_field_ready(self, name: str, is_ready: bool = True) -> None:
"""Mark a field as ready or not ready."""
if name not in self.fields:
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
self.fields[name].set_ready(is_ready)
# Invalidate readiness cache
self._ready_cache = None
@property
def is_ready(self) -> bool:
"""Check if the node is ready (all fields are ready)."""
# Use cached value if available
if self._ready_cache is not None:
return self._ready_cache
# Calculate readiness only when needed
if not self.fields:
self._ready_cache = False
return False
self._ready_cache = all(elem.ready for elem in self.fields.values())
return self._ready_cache
def get_field_values(self) -> Dict[str, Any]:
"""Get a dictionary of field names to their values."""
return {name: elem.value for name, elem in self.fields.items()}
def get_ready_fields(self) -> Dict[str, Field]:
"""Get all fields that are marked as ready."""
return {name: elem for name, elem in self.fields.items() if elem.ready}

View File

@ -0,0 +1,135 @@
import ast
import logging
from typing import List, Tuple, Dict
import importlib
import inspect
from .dependency_handler import DependencyHandler
from .ir_generation import IRGenerator
from .class_handler import process_vmlinux_class
logger = logging.getLogger(__name__)
def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]:
"""
Parse AST and detect import statements from vmlinux.
Returns a list of tuples (module_name, imported_item) for vmlinux imports.
Raises SyntaxError for invalid import patterns.
Args:
tree: The AST to parse
Returns:
List of tuples containing (module_name, imported_item) for each vmlinux import
Raises:
SyntaxError: If multiple imports from vmlinux are attempted or import * is used
"""
vmlinux_imports = []
for node in ast.walk(tree):
# Handle "from vmlinux import ..." statements
if isinstance(node, ast.ImportFrom):
if node.module == "vmlinux":
# Check for wildcard import: from vmlinux import *
if any(alias.name == "*" for alias in node.names):
raise SyntaxError(
"Wildcard imports from vmlinux are not supported. "
"Please import specific types explicitly."
)
# Check for multiple imports: from vmlinux import A, B, C
if len(node.names) > 1:
imported_names = [alias.name for alias in node.names]
raise SyntaxError(
f"Multiple imports from vmlinux are not supported. "
f"Found: {', '.join(imported_names)}. "
f"Please use separate import statements for each type."
)
# Check if no specific import is specified (should not happen with valid Python)
if len(node.names) == 0:
raise SyntaxError(
"Import from vmlinux must specify at least one type."
)
# Valid single import
for alias in node.names:
import_name = alias.name
# Use alias if provided, otherwise use the original name (commented)
# as_name = alias.asname if alias.asname else alias.name
vmlinux_imports.append(("vmlinux", node))
logger.info(f"Found vmlinux import: {import_name}")
# Handle "import vmlinux" statements (not typical but should be rejected)
elif isinstance(node, ast.Import):
for alias in node.names:
if alias.name == "vmlinux" or alias.name.startswith("vmlinux."):
raise SyntaxError(
"Direct import of vmlinux module is not supported. "
"Use 'from vmlinux import <type>' instead."
)
logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}")
return vmlinux_imports
def vmlinux_proc(tree: ast.AST, module):
import_statements = detect_import_statement(tree)
# initialise dependency handler
handler = DependencyHandler()
# initialise assignment dictionary of name to type
assignments: Dict[str, type] = {}
if not import_statements:
logger.info("No vmlinux imports found")
return
# Import vmlinux module directly
try:
vmlinux_mod = importlib.import_module("vmlinux")
except ImportError:
logger.warning("Could not import vmlinux module")
return
source_file = inspect.getsourcefile(vmlinux_mod)
if source_file is None:
logger.warning("Cannot find source for vmlinux module")
return
with open(source_file, "r") as f:
mod_ast = ast.parse(f.read(), filename=source_file)
for import_mod, import_node in import_statements:
for alias in import_node.names:
imported_name = alias.name
found = False
for mod_node in mod_ast.body:
if (
isinstance(mod_node, ast.ClassDef)
and mod_node.name == imported_name
):
process_vmlinux_class(mod_node, module, handler)
found = True
break
if isinstance(mod_node, ast.Assign):
for target in mod_node.targets:
if isinstance(target, ast.Name) and target.id == imported_name:
process_vmlinux_assign(mod_node, module, assignments)
found = True
break
if found:
break
if not found:
logger.info(
f"{imported_name} not found as ClassDef or Assign in vmlinux"
)
IRGenerator(module, handler)
def process_vmlinux_assign(node, module, assignments: Dict[str, type]):
raise NotImplementedError("Assignment handling has not been implemented yet")

View File

@ -0,0 +1,8 @@
# here, we will iterate through the dependencies and generate IR once dependencies are resolved fully
from .dependency_handler import DependencyHandler
class IRGenerator:
def __init__(self, module, handler):
self.module = module
self.handler: DependencyHandler = handler

View File

@ -1,11 +1,10 @@
#include <linux/bpf.h> #include "vmlinux.h"
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#define u64 unsigned long long #include <bpf/bpf_endian.h>
#define u32 unsigned int
SEC("xdp") SEC("xdp")
int hello(struct xdp_md *ctx) { int hello(struct xdp_md *ctx) {
bpf_printk("Hello, World!\n"); bpf_printk("Hello, World! %ud \n", ctx->data);
return XDP_PASS; return XDP_PASS;
} }

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/types.h>
struct test_struct {
__u64 a;
__u64 b;
};
struct test_struct w = {};
volatile __u64 prev_time = 0;
SEC("tracepoint/syscalls/sys_enter_execve")
int trace_execve(void *ctx)
{
bpf_printk("previous %ul now %ul", w.b, w.a);
__u64 ts = bpf_ktime_get_ns();
bpf_printk("prev %ul now %ul", prev_time, ts);
w.a = ts;
w.b = prev_time;
prev_time = ts;
return 0;
}
char LICENSE[] SEC("license") = "GPL";

19
tests/c-form/kprobe.bpf.c Normal file
View File

@ -0,0 +1,19 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char LICENSE[] SEC("license") = "Dual BSD/GPL";
SEC("kprobe/do_unlinkat")
int kprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat created");
return 0;
}
SEC("kretprobe/do_unlinkat")
int kretprobe_execve(struct pt_regs *ctx)
{
bpf_printk("unlinkat returned\n");
return 0;
}

View File

@ -0,0 +1,18 @@
from pythonbpf import bpf, section, bpfglobal, compile
from ctypes import c_void_p, c_int64
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def hello_world(ctx: c_void_p) -> c_int64:
print("Hello, World!") if True else print("Goodbye, World!")
return
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile()

View File

@ -0,0 +1,109 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64, c_int32
@bpf
@bpfglobal
def somevalue() -> c_int32:
return c_int32(42)
@bpf
@bpfglobal
def somevalue2() -> c_int64:
return c_int64(69)
@bpf
@bpfglobal
def somevalue1() -> c_int32:
return c_int32(42)
# --- Passing examples ---
# Simple constant return
@bpf
@bpfglobal
def g1() -> c_int64:
return c_int64(42)
# Constructor with one constant argument
@bpf
@bpfglobal
def g2() -> c_int64:
return c_int64(69)
# --- Failing examples ---
# No return annotation
# @bpf
# @bpfglobal
# def g3():
# return 42
# Return annotation is complex
# @bpf
# @bpfglobal
# def g4() -> List[int]:
# return []
# # Return is missing
# @bpf
# @bpfglobal
# def g5() -> c_int64:
# pass
# # Return is a variable reference
# #TODO: maybe fix this sometime later. It defaults to 0
# CONST = 5
# @bpf
# @bpfglobal
# def g6() -> c_int64:
# return c_int64(CONST)
# Constructor with multiple args
# TODO: this is not working. should it work ?
@bpf
@bpfglobal
def g7() -> c_int64:
return c_int64(1)
# Dataclass call
# TODO: fails with dataclass
# @dataclass
# class Point:
# x: c_int64
# y: c_int64
# @bpf
# @bpfglobal
# def g8() -> Point:
# return Point(1, 2)
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
global somevalue
somevalue = 2
print(f"{somevalue}")
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -11,6 +11,7 @@ from ctypes import c_void_p, c_int64
# We cannot allocate space for the intermediate type now. # We cannot allocate space for the intermediate type now.
# We probably need to track the ref/deref chain for each variable. # We probably need to track the ref/deref chain for each variable.
@bpf @bpf
@map @map
def count() -> HashMap: def count() -> HashMap:

View File

@ -0,0 +1,23 @@
import logging
from pythonbpf import compile, bpf, section, bpfglobal, compile_to_ir
from ctypes import c_void_p, c_int64
# This should not pass as somevalue is not declared at all.
@bpf
@section("tracepoint/syscalls/sys_enter_execve")
def sometag(ctx: c_void_p) -> c_int64:
print("test")
print(f"{somevalue}") # noqa: F821
return c_int64(1)
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("globals.py", "globals.ll", loglevel=logging.INFO)
compile()

View File

@ -0,0 +1,48 @@
from pythonbpf import bpf, map, section, bpfglobal, compile, compile_to_ir
from pythonbpf.maps import HashMap
from pythonbpf.helper import XDP_PASS
from vmlinux import struct_xdp_md
from vmlinux import struct_ring_buffer_per_cpu # noqa: F401
# from vmlinux import struct_xdp_buff # noqa: F401
# from vmlinux import struct_xdp_md
from ctypes import c_int64
# Instructions to how to run this program
# 1. Install PythonBPF: pip install pythonbpf
# 2. Run the program: python examples/xdp_pass.py
# 3. Run the program with sudo: sudo tools/check.sh run examples/xdp_pass.o
# 4. Attach object file to any network device with something like ./check.sh xdp examples/xdp_pass.o tailscale0
# 5. send traffic through the device and observe effects
@bpf
@map
def count() -> HashMap:
return HashMap(key=c_int64, value=c_int64, max_entries=1)
@bpf
@section("xdp")
def hello_world(ctx: struct_xdp_md) -> c_int64:
key = 0
one = 1
prev = count().lookup(key)
if prev:
prevval = prev + 1
print(f"count: {prevval}")
count().update(key, prevval)
return XDP_PASS
else:
count().update(key, one)
return XDP_PASS
@bpf
@bpfglobal
def LICENSE() -> str:
return "GPL"
compile_to_ir("xdp_pass.py", "xdp_pass.ll")

379
tools/vmlinux-gen.py Executable file
View File

@ -0,0 +1,379 @@
#!/usr/bin/env python3
"""
BTF to Python ctypes Converter
Converts Linux kernel BTF (BPF Type Format) to Python ctypes definitions.
This tool automates the process of:
1. Dumping BTF from vmlinux
2. Preprocessing enum definitions
3. Processing struct kioctx to extract anonymous nested structs
4. Running C preprocessor
5. Converting to Python ctypes using clang2py
6. Post-processing the output
Requirements:
- bpftool
- clang
- ctypeslib2 (pip install ctypeslib2)
"""
import argparse
import os
import re
import subprocess
import sys
import tempfile
class BTFConverter:
def __init__(
self,
btf_source="/sys/kernel/btf/vmlinux",
output_file="vmlinux.py",
keep_intermediate=False,
verbose=False,
):
self.btf_source = btf_source
self.output_file = output_file
self.keep_intermediate = keep_intermediate
self.verbose = verbose
self.temp_dir = tempfile.mkdtemp() if not keep_intermediate else "."
def log(self, message):
"""Print message if verbose mode is enabled."""
if self.verbose:
print(f"[*] {message}")
def run_command(self, cmd, description):
"""Run a shell command and handle errors."""
self.log(f"{description}...")
try:
result = subprocess.run(
cmd, shell=True, check=True, capture_output=True, text=True
)
if self.verbose and result.stdout:
print(result.stdout)
return result
except subprocess.CalledProcessError as e:
print(f"Error during {description}:", file=sys.stderr)
print(e.stderr, file=sys.stderr)
sys.exit(1)
def step1_dump_btf(self):
"""Step 1: Dump BTF from vmlinux."""
vmlinux_h = os.path.join(self.temp_dir, "vmlinux.h")
cmd = f"bpftool btf dump file {self.btf_source} format c > {vmlinux_h}"
self.run_command(cmd, "Dumping BTF from vmlinux")
return vmlinux_h
def step2_preprocess_enums(self, input_file):
"""Step 1.5: Preprocess enum definitions."""
self.log("Preprocessing enum definitions...")
with open(input_file, "r") as f:
original_code = f.read()
# Extract anonymous enums
enums = re.findall(
r"(?<!typedef\s)(enum\s*\{[^}]*\})\s*(\w+)\s*(?::\s*\d+)?\s*;",
original_code,
)
enum_defs = [enum_block + ";" for enum_block, _ in enums]
# Replace anonymous enums with int declarations
processed_code = re.sub(
r"(?<!typedef\s)enum\s*\{[^}]*\}\s*(\w+)\s*(?::\s*\d+)?\s*;",
r"int \1;",
original_code,
)
# Prepend enum definitions
if enum_defs:
enum_text = "\n".join(enum_defs) + "\n\n"
processed_code = enum_text + processed_code
output_file = os.path.join(self.temp_dir, "vmlinux_processed.h")
with open(output_file, "w") as f:
f.write(processed_code)
return output_file
def step2_5_process_kioctx(self, input_file):
# TODO: this is a very bad bug and design decision. A single struct has an issue mostly.
"""Step 2.5: Process struct kioctx to extract nested anonymous structs."""
self.log("Processing struct kioctx nested structs...")
with open(input_file, "r") as f:
content = f.read()
# Pattern to match struct kioctx with its full body (handles multiple nesting levels)
kioctx_pattern = (
r"struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;"
)
def process_kioctx_replacement(match):
full_struct = match.group(0)
self.log(f"Found struct kioctx, length: {len(full_struct)} chars")
# Extract the struct body (everything between outermost { and })
body_match = re.search(
r"struct\s+kioctx\s*\{(.*)\}\s*;", full_struct, re.DOTALL
)
if not body_match:
return full_struct
body = body_match.group(1)
# Find all anonymous structs within the body
# Pattern: struct { ... } followed by ; (not a member name)
# anon_struct_pattern = r"struct\s*\{[^}]*\}"
anon_structs = []
anon_counter = 4 # Start from 4, counting down to 1
def replace_anonymous_struct(m):
nonlocal anon_counter
anon_struct_content = m.group(0)
# Extract the body of the anonymous struct
anon_body_match = re.search(
r"struct\s*\{(.*)\}", anon_struct_content, re.DOTALL
)
if not anon_body_match:
return anon_struct_content
anon_body = anon_body_match.group(1)
# Create the named struct definition
anon_name = f"__anon{anon_counter}"
member_name = f"a{anon_counter}"
# Store the struct definition
anon_structs.append(f"struct {anon_name} {{{anon_body}}};")
anon_counter -= 1
# Return the member declaration
return f"struct {anon_name} {member_name}"
# Process the body, finding and replacing anonymous structs
# We need to be careful to only match anonymous structs followed by ;
processed_body = body
# Find all occurrences and process them
pattern_with_semicolon = r"struct\s*\{([^}]*)\}\s*;"
matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL))
if not matches:
self.log("No anonymous structs found in kioctx")
return full_struct
self.log(f"Found {len(matches)} anonymous struct(s)")
# Process in reverse order to maintain string positions
for match in reversed(matches):
anon_struct_content = match.group(1)
start_pos = match.start()
end_pos = match.end()
# Create the named struct definition
anon_name = f"__anon{anon_counter}"
member_name = f"a{anon_counter}"
# Store the struct definition
anon_structs.insert(0, f"struct {anon_name} {{{anon_struct_content}}};")
# Replace in the body
replacement = f"struct {anon_name} {member_name};"
processed_body = (
processed_body[:start_pos] + replacement + processed_body[end_pos:]
)
anon_counter -= 1
# Rebuild the complete definition
if anon_structs:
# Prepend the anonymous struct definitions
anon_definitions = "\n".join(anon_structs) + "\n\n"
new_struct = f"struct kioctx {{{processed_body}}};"
return anon_definitions + new_struct
else:
return full_struct
# Apply the transformation
processed_content = re.sub(
kioctx_pattern, process_kioctx_replacement, content, flags=re.DOTALL
)
output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h")
with open(output_file, "w") as f:
f.write(processed_content)
self.log(f"Saved kioctx-processed output to {output_file}")
return output_file
def step3_run_preprocessor(self, input_file):
"""Step 2: Run C preprocessor."""
output_file = os.path.join(self.temp_dir, "vmlinux.i")
cmd = f"clang -E {input_file} > {output_file}"
self.run_command(cmd, "Running C preprocessor")
return output_file
def step4_convert_to_ctypes(self, input_file):
"""Step 3: Convert to Python ctypes using clang2py."""
output_file = os.path.join(self.temp_dir, "vmlinux_raw.py")
cmd = (
f"clang2py {input_file} -o {output_file} "
f'--clang-args="-fno-ms-extensions -I/usr/include -I/usr/include/linux"'
)
self.run_command(cmd, "Converting to Python ctypes")
return output_file
def step5_postprocess(self, input_file):
"""Step 4: Post-process the generated Python file."""
self.log("Post-processing Python ctypes definitions...")
with open(input_file, "r") as f:
data = f.read()
# Remove lines like ('_45', ctypes.c_int64, 0)
data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data)
# Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64)
data = re.sub(
r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data
)
# Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8)
data = re.sub(r"(ctypes\.c_char)(\s*,\s*\d+\))", r"ctypes.c_uint8\2", data)
# below to replace those c_bool with bitfield greater than 8
def repl(m):
name, bits = m.groups()
return (
f"('{name}', ctypes.c_uint32, {bits})" if int(bits) > 8 else m.group(0)
)
data = re.sub(r"\('([^']+)',\s*ctypes\.c_bool,\s*(\d+)\)", repl, data)
# Remove ctypes. prefix from invalid entries
invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"]
for name in invalid_ctypes:
data = re.sub(rf"\bctypes\.{name}\b", name, data)
with open(self.output_file, "w") as f:
f.write(data)
self.log(f"Saved final output to {self.output_file}")
def cleanup(self):
"""Remove temporary files if not keeping them."""
if not self.keep_intermediate and self.temp_dir != ".":
self.log(f"Cleaning up temporary directory: {self.temp_dir}")
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def convert(self):
"""Run the complete conversion pipeline."""
try:
self.log("Starting BTF to Python ctypes conversion...")
# Check dependencies
self.check_dependencies()
# Run conversion pipeline
vmlinux_h = self.step1_dump_btf()
vmlinux_processed_h = self.step2_preprocess_enums(vmlinux_h)
vmlinux_kioctx_h = self.step2_5_process_kioctx(vmlinux_processed_h)
vmlinux_i = self.step3_run_preprocessor(vmlinux_kioctx_h)
vmlinux_raw_py = self.step4_convert_to_ctypes(vmlinux_i)
self.step5_postprocess(vmlinux_raw_py)
print(f"\n✓ Conversion complete! Output saved to: {self.output_file}")
except Exception as e:
print(f"\n✗ Error during conversion: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
finally:
self.cleanup()
def check_dependencies(self):
"""Check if required tools are available."""
self.log("Checking dependencies...")
dependencies = {
"bpftool": "bpftool --version",
"clang": "clang --version",
"clang2py": "clang2py --version",
}
missing = []
for tool, cmd in dependencies.items():
try:
subprocess.run(cmd, shell=True, check=True, capture_output=True)
except subprocess.CalledProcessError:
missing.append(tool)
if missing:
print("Error: Missing required dependencies:", file=sys.stderr)
for tool in missing:
print(f" - {tool}", file=sys.stderr)
if "clang2py" in missing:
print("\nInstall ctypeslib2: pip install ctypeslib2", file=sys.stderr)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description="Convert Linux kernel BTF to Python ctypes definitions",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s
%(prog)s -o kernel_types.py
%(prog)s --btf-source /sys/kernel/btf/custom_module -k -v
""",
)
parser.add_argument(
"--btf-source",
default="/sys/kernel/btf/vmlinux",
help="Path to BTF source (default: /sys/kernel/btf/vmlinux)",
)
parser.add_argument(
"-o",
"--output",
default="vmlinux.py",
help="Output Python file (default: vmlinux.py)",
)
parser.add_argument(
"-k",
"--keep-intermediate",
action="store_true",
help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose output"
)
args = parser.parse_args()
converter = BTFConverter(
btf_source=args.btf_source,
output_file=args.output,
keep_intermediate=args.keep_intermediate,
verbose=args.verbose,
)
converter.convert()
if __name__ == "__main__":
main()